├── .coveragerc ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci_checks.yml │ ├── release.yml │ └── wheels_build.yml ├── .gitignore ├── .readthedocs.yaml ├── .vscode └── settings.json ├── AUTHORS.rst ├── CONTRIBUTING.rst ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── _static │ └── css │ │ └── custom.css ├── authors.rst ├── conf.py ├── contributing.rst ├── first_steps.rst ├── genindex.rst ├── glossary.rst ├── history.rst ├── index.rst ├── make.bat ├── readme.rst ├── reference │ ├── classification.rst │ ├── clustering.rst │ ├── data.rst │ ├── embedding.rst │ ├── gnn.rst │ ├── hierarchy.rst │ ├── linalg.rst │ ├── linkpred.rst │ ├── path.rst │ ├── ranking.rst │ ├── regression.rst │ ├── topology.rst │ ├── utils.rst │ └── visualization.rst ├── tutorials │ ├── classification │ │ ├── diffusion.ipynb │ │ ├── index.rst │ │ ├── knn.ipynb │ │ ├── metrics.ipynb │ │ ├── pagerank.ipynb │ │ └── propagation.ipynb │ ├── clustering │ │ ├── index.rst │ │ ├── kcenters.ipynb │ │ ├── leiden.ipynb │ │ ├── louvain.ipynb │ │ └── propagation.ipynb │ ├── data │ │ ├── adult-income.csv │ │ ├── index.rst │ │ ├── iris.p │ │ ├── load_collection.ipynb │ │ ├── load_data.ipynb │ │ ├── miserables.graphml │ │ ├── miserables.tsv │ │ ├── models.ipynb │ │ ├── movie_actor.tsv │ │ ├── mygraph │ │ │ └── names.npy │ │ ├── painters.graphml │ │ ├── painters.tsv │ │ ├── save.ipynb │ │ └── toy_graphs.ipynb │ ├── embedding │ │ ├── forceatlas.ipynb │ │ ├── gsvd.ipynb │ │ ├── index.rst │ │ ├── louvain_embedding.ipynb │ │ ├── pca.ipynb │ │ ├── random_projection.ipynb │ │ ├── spectral.ipynb │ │ ├── spring.ipynb │ │ └── svd.ipynb │ ├── gnn │ │ ├── gnn_classifier.ipynb │ │ └── index.rst │ ├── hierarchy │ │ ├── index.rst │ │ ├── louvain_iteration.ipynb │ │ ├── louvain_recursion.ipynb │ │ └── paris.ipynb │ ├── linkpred │ │ ├── index.rst │ │ └── nn.ipynb │ ├── overview │ │ ├── get_started.ipynb │ │ ├── index.rst │ │ ├── miserables.tsv │ │ └── movie_actor.tsv │ ├── path │ │ ├── distance.ipynb │ │ ├── index.rst │ │ └── shortest_path.ipynb │ ├── ranking │ │ ├── index.rst │ │ ├── katz.ipynb │ │ └── pagerank.ipynb │ ├── regression │ │ ├── diffusion.ipynb │ │ ├── dirichlet.ipynb │ │ └── index.rst │ ├── topology │ │ ├── cliques.ipynb │ │ ├── connected_components.ipynb │ │ ├── core_decomposition.ipynb │ │ ├── cycles.ipynb │ │ ├── index.rst │ │ └── isomorphism.ipynb │ └── visualization │ │ ├── dendrogram_karate_club.svg │ │ ├── dendrograms.ipynb │ │ ├── graphs.ipynb │ │ ├── index.rst │ │ ├── karate_club.svg │ │ ├── paths.ipynb │ │ └── pie_charts.ipynb └── use_cases │ ├── atp.csv │ ├── miserables-en.txt │ ├── recommendation.ipynb │ ├── sport.ipynb │ ├── text.ipynb │ ├── votes.ipynb │ └── wikipedia.ipynb ├── images ├── logo_sknetwork.png ├── logo_sknetwork.xml └── logo_sknetwork_long.png ├── make.bat ├── miserables.tsv ├── movie_actor.tsv ├── pyproject.toml ├── requirements_dev.txt ├── setup.cfg ├── setup.py └── sknetwork ├── __init__.py ├── base.py ├── classification ├── __init__.py ├── base.py ├── base_rank.py ├── diffusion.py ├── knn.py ├── metrics.py ├── pagerank.py ├── propagation.py ├── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_diffusion.py │ ├── test_knn.py │ ├── test_metrics.py │ ├── test_pagerank.py │ └── test_propagation.py └── vote.pyx ├── clustering ├── __init__.py ├── base.py ├── kcenters.py ├── leiden.py ├── leiden_core.pyx ├── louvain.py ├── louvain_core.pyx ├── metrics.py ├── postprocess.py ├── propagation_clustering.py └── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_kcenters.py │ ├── test_leiden.py │ ├── test_louvain.py │ ├── test_metrics.py │ └── test_postprocess.py ├── data ├── __init__.py ├── base.py ├── load.py ├── models.py ├── parse.py ├── test_graphs.py ├── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_base.py │ ├── test_load.py │ ├── test_models.py │ ├── test_parse.py │ ├── test_test_graphs.py │ └── test_toy_graphs.py ├── timeout.py └── toy_graphs.py ├── embedding ├── __init__.py ├── base.py ├── force_atlas.py ├── louvain_embedding.py ├── random_projection.py ├── spectral.py ├── spring.py ├── svd.py └── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_force_atlas.py │ ├── test_louvain_embedding.py │ ├── test_random_projection.py │ ├── test_spectral.py │ ├── test_spring.py │ └── test_svd.py ├── gnn ├── __init__.py ├── activation.py ├── base.py ├── base_activation.py ├── base_layer.py ├── gnn_classifier.py ├── layer.py ├── loss.py ├── neighbor_sampler.py ├── optimizer.py ├── tests │ ├── __init__.py │ ├── test_activation.py │ ├── test_base.py │ ├── test_base_layer.py │ ├── test_gnn_classifier.py │ ├── test_layers.py │ ├── test_loss.py │ ├── test_neigh_sampler.py │ ├── test_optimizer.py │ └── test_utils.py └── utils.py ├── hierarchy ├── __init__.py ├── base.py ├── louvain_hierarchy.py ├── metrics.py ├── paris.pyx ├── postprocess.py └── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_algos.py │ ├── test_metrics.py │ └── test_postprocess.py ├── linalg ├── __init__.py ├── basics.py ├── diteration.pyx ├── eig_solver.py ├── laplacian.py ├── normalizer.py ├── operators.py ├── polynome.py ├── ppr_solver.py ├── push.pyx ├── sparse_lowrank.py ├── svd_solver.py └── tests │ ├── __init__.py │ ├── test_eig.py │ ├── test_laplacian.py │ ├── test_normalization.py │ ├── test_operators.py │ ├── test_polynome.py │ ├── test_ppr.py │ ├── test_sparse_lowrank.py │ └── test_svd.py ├── linkpred ├── __init__.py ├── base.py ├── nn.py └── tests │ ├── __init__.py │ └── test_nn.py ├── log.py ├── path ├── __init__.py ├── dag.py ├── distances.py ├── search.py ├── shortest_path.py └── tests │ ├── __init__.py │ ├── test_dag.py │ ├── test_distances.py │ ├── test_search.py │ └── test_shortest_path.py ├── ranking ├── __init__.py ├── base.py ├── betweenness.pyx ├── closeness.py ├── hits.py ├── katz.py ├── pagerank.py ├── postprocess.py └── tests │ ├── __init__.py │ ├── test_API.py │ ├── test_betweenness.py │ ├── test_closeness.py │ ├── test_hits.py │ ├── test_pagerank.py │ └── test_postprocess.py ├── regression ├── __init__.py ├── base.py ├── diffusion.py └── tests │ ├── __init__.py │ ├── test_API.py │ └── test_diffusion.py ├── sknetwork.py ├── test_base.py ├── test_log.py ├── topology ├── __init__.py ├── cliques.pyx ├── core.pyx ├── cycles.py ├── minheap.pxd ├── minheap.pyx ├── structure.py ├── tests │ ├── __init__.py │ ├── test_cliques.py │ ├── test_core.py │ ├── test_cycles.py │ ├── test_structure.py │ ├── test_triangles.py │ └── test_wl.py ├── triangles.pyx ├── weisfeiler_lehman.py └── weisfeiler_lehman_core.pyx ├── utils ├── __init__.py ├── check.py ├── format.py ├── membership.py ├── neighbors.py ├── tests │ ├── __init__.py │ ├── test_check.py │ ├── test_format.py │ ├── test_membership.py │ ├── test_neighbors.py │ ├── test_tfidf.py │ └── test_values.py ├── tfidf.py └── values.py └── visualization ├── __init__.py ├── colors.py ├── dendrograms.py ├── graphs.py └── tests ├── __init__.py ├── test_dendrograms.py └── test_graphs.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | plugins = Cython.Coverage 3 | omit = venv/* 4 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * scikit-network version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Modifications: 2 | (new algorithms, added tests or documentation) 3 | * Item 1 4 | * Item 2 5 | 6 | ### Impacted submodules: 7 | (e.g. `sknetwork.topology`) 8 | * Submodule 1 9 | * Submodule 2 10 | 11 | ### This pull request: 12 | (your PR must match these criteria before review) 13 | - [ ] **targets the `develop` branch** 14 | - [ ] **does not decrease code coverage** 15 | - [ ] **passes the tests** 16 | - [ ] **is documented** and **the documentation build passes** 17 | - [ ] **has PEP8-compliant code** and **explicit variable naming** 18 | - [ ] **has a tutorial** (facultative but recommended) 19 | 20 | *Any doubts about some technicalities? Do not hesitate to look at the [dedicated Wiki](https://github.com/sknetwork-team/scikit-network/wiki/Contributing-guide).* 21 | -------------------------------------------------------------------------------- /.github/workflows/ci_checks.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://github.com/mclegrand/scikit-network/blob/master/.github/workflows/python-package.yml 2 | 3 | name: CI Checks 4 | 5 | on: [push, pull_request] 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | env: 12 | WITH_CYTHON_PROFILE: ON 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | python -m pip install flake8 28 | python -m pip install -r requirements_dev.txt 29 | python setup.py develop 30 | - name: Test with pytest 31 | run: | 32 | py.test --doctest-modules --cov-report=xml --cov=sknetwork 33 | - name: Codecov 34 | uses: codecov/codecov-action@v4 35 | env: 36 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 37 | 38 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://cibuildwheel.readthedocs.io/en/stable/setup/ 2 | 3 | name: Release 4 | 5 | on: 6 | push: 7 | tags: 8 | - "v*" 9 | 10 | jobs: 11 | build_wheels: 12 | name: Build wheels on ${{ matrix.os }} 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Build wheels 22 | uses: pypa/cibuildwheel@v2.23.3 23 | env: 24 | CIBW_PLATFORM: auto 25 | CIBW_ARCHS: auto64 26 | CIBW_SKIP: cp*-musllinux* 27 | CIBW_ARCHS_MACOS: x86_64 arm64 28 | CIBW_BUILD_VERBOSITY: 3 29 | CIBW_BEFORE_BUILD: "pip install -r requirements_dev.txt && pip install ." 30 | CIBW_BEFORE_BUILD_MACOS: "pip install -r requirements_dev.txt && pip install ." 31 | CIBW_BUILD: cp39-* cp310-* cp311-* cp312-* cp313-* 32 | 33 | - uses: actions/upload-artifact@v4 34 | with: 35 | path: wheelhouse 36 | name: dist-${{ matrix.os }} 37 | 38 | build_sdist: 39 | name: Build source distribution 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v4 43 | 44 | - uses: actions/setup-python@v5 45 | name: Install Python 46 | with: 47 | python-version: '3.9' 48 | 49 | - name: Build sdist 50 | run: | 51 | python -m pip install . 52 | python -m pip install -r requirements_dev.txt . 53 | python -m build 54 | 55 | - uses: actions/upload-artifact@v4 56 | with: 57 | path: dist/*.tar.gz 58 | 59 | upload_pypi: 60 | needs: [ build_wheels, build_sdist ] 61 | runs-on: ubuntu-latest 62 | environment: 63 | name: pypi 64 | url: https://pypi.org/p/scikit-network 65 | permissions: 66 | id-token: write 67 | 68 | steps: 69 | - uses: actions/download-artifact@v4 70 | with: 71 | path: dist/ 72 | merge-multiple: true 73 | - uses: pypa/gh-action-pypi-publish@release/v1 74 | -------------------------------------------------------------------------------- /.github/workflows/wheels_build.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://cibuildwheel.readthedocs.io/en/stable/setup/ 2 | 3 | name: Wheels build 4 | 5 | on: [pull_request, workflow_dispatch] 6 | 7 | jobs: 8 | build_wheels: 9 | name: Build wheels on ${{ matrix.os }} 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm] 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Build wheels 19 | uses: pypa/cibuildwheel@v2.23.3 20 | env: 21 | CIBW_PLATFORM: auto 22 | CIBW_ARCHS: auto64 23 | CIBW_SKIP: cp*-musllinux* 24 | CIBW_ARCHS_MACOS: x86_64 arm64 25 | CIBW_BUILD_VERBOSITY: 3 26 | CIBW_BEFORE_BUILD: "pip install -r requirements_dev.txt && pip install ." 27 | CIBW_BEFORE_BUILD_MACOS: "pip install -r requirements_dev.txt && pip install ." 28 | CIBW_BUILD: cp39-* cp310-* cp311-* cp312-* cp313-* 29 | 30 | - uses: actions/upload-artifact@v4 31 | with: 32 | name: dist-${{ matrix.os }}-${{ matrix.python-version }} 33 | path: wheelhouse 34 | 35 | build_sdist: 36 | name: Build source distribution 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v4 40 | 41 | - uses: actions/setup-python@v5 42 | name: Install Python 43 | with: 44 | python-version: '3.9' 45 | 46 | - name: Build sdist 47 | run: | 48 | python -m pip install . 49 | python -m pip install -r requirements_dev.txt . 50 | python -m build 51 | 52 | - uses: actions/upload-artifact@v4 53 | with: 54 | path: dist/*.tar.gz 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.c 8 | *.cpp 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .Senv 90 | venv/ 91 | ENV/ 92 | venv-3.8/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # Pycharm 108 | .idea 109 | 110 | # Travis 111 | .travis.yml 112 | 113 | # Pypi 114 | /dist 115 | 116 | 117 | \.DS_Store 118 | 119 | *.npy 120 | *.npz 121 | *.p 122 | *.ipynb 123 | 124 | #allow notebooks and pickled objects for tutorials 125 | !/docs/tutorials/*.ipynb 126 | !/docs/tutorials/*/*.ipynb 127 | !/docs/tutorials/*/*.p 128 | 129 | #html in src 130 | sknetwork/*/*.html 131 | 132 | docs/tutorials/data/mygraph/ 133 | 134 | karate_club.svg 135 | /venv39/ 136 | /my_dataset/ 137 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read-the-Docs configuration file 3 | version: 2 4 | 5 | build: 6 | os: "ubuntu-20.04" 7 | tools: 8 | python: "3.9" 9 | 10 | sphinx: 11 | configuration: docs/conf.py 12 | 13 | python: 14 | install: 15 | - requirements: ./requirements_dev.txt 16 | - method: pip 17 | path: . 18 | - method: setuptools 19 | path: . 20 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.unittestArgs": [ 3 | "-v", 4 | "-s", 5 | "./sknetwork", 6 | "-p", 7 | "test_*.py" 8 | ], 9 | "python.testing.pytestEnabled": false, 10 | "python.testing.unittestEnabled": true 11 | } -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | The project started with the Master internship of Bertrand Charpentier and 6 | the PhD theses of Nathan de Lara and Quentin Lutz, under the supervision of Thomas Bonald at Télécom Paris, 7 | Institut Polytechnique de Paris. 8 | 9 | Development Lead 10 | ---------------- 11 | 12 | * Thomas Bonald 13 | * Simon Delarue 14 | * Marc Jeanmougin 15 | 16 | Former lead 17 | ----------- 18 | 19 | * Quentin Lutz 20 | * Nathan de Lara 21 | 22 | Contributors 23 | ------------ 24 | 25 | * Bertrand Charpentier 26 | * Maximilien Danisch 27 | * François Durand 28 | * Alexandre Hollocou 29 | * Fabien Mathieu 30 | * Yohann Robert 31 | * Julien Simonnet 32 | * Alexis Barreaux 33 | * Rémi Jaylet 34 | * Victor Manach 35 | * Pierre Pébereau 36 | * Armand Boschin 37 | * Tiphaine Viard 38 | * Flávio Juvenal 39 | * Wenzhuo Zhao 40 | * Henry Carscadden 41 | * Yiwen Peng 42 | * Ahmed Zaiou 43 | * Laurène David 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | 3 | BSD License 4 | 5 | Copyright (c) 2018, Scikit-network Developers 6 | Bertrand Charpentier 7 | Thomas Bonald 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without modification, 11 | are permitted provided that the following conditions are met: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions and the following disclaimer. 15 | 16 | * Redistributions in binary form must reproduce the above copyright notice, this 17 | list of conditions and the following disclaimer in the documentation and/or 18 | other materials provided with the distribution. 19 | 20 | * Neither the name of the copyright holder nor the names of its 21 | contributors may be used to endorse or promote products derived from this 22 | software without specific prior written permission. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 25 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 32 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 33 | OF THE POSSIBILITY OF SUCH DAMAGE. 34 | 35 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.rst 2 | include CONTRIBUTING.rst 3 | include HISTORY.rst 4 | include LICENSE 5 | include README.rst 6 | 7 | recursive-include sknetwork *.pyx *.pxd 8 | recursive-include tests * 9 | recursive-exclude * __pycache__ 10 | recursive-exclude * *.py[co] 11 | 12 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | try: 8 | from urllib import pathname2url 9 | except: 10 | from urllib.request import pathname2url 11 | 12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 13 | endef 14 | export BROWSER_PYSCRIPT 15 | 16 | define PRINT_HELP_PYSCRIPT 17 | import re, sys 18 | 19 | for line in sys.stdin: 20 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 21 | if match: 22 | target, help = match.groups() 23 | print("%-20s %s" % (target, help)) 24 | endef 25 | export PRINT_HELP_PYSCRIPT 26 | 27 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 28 | 29 | help: 30 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 31 | 32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 33 | 34 | clean-build: ## remove build artifacts 35 | rm -fr build/ 36 | rm -fr dist/ 37 | rm -fr .eggs/ 38 | find . -name '*.egg-info' -exec rm -fr {} + 39 | find . -name '*.egg' -exec rm -f {} + 40 | 41 | clean-pyc: ## remove Python file artifacts 42 | find . -name '*.pyc' -exec rm -f {} + 43 | find . -name '*.pyo' -exec rm -f {} + 44 | find . -name '*~' -exec rm -f {} + 45 | find . -name '__pycache__' -exec rm -fr {} + 46 | 47 | clean-test: ## remove test and coverage artifacts 48 | rm -fr .tox/ 49 | rm -f .coverage 50 | rm -fr htmlcov/ 51 | rm -fr .pytest_cache 52 | 53 | lint: ## check style with flake8 54 | flake8 sknetwork tests 55 | 56 | test: ## run tests quickly with the default Python 57 | py.test 58 | 59 | test-all: ## run tests on every Python version with tox 60 | tox 61 | 62 | coverage: ## check code coverage quickly with the default Python 63 | coverage run --source sknetwork -m pytest 64 | coverage report -m 65 | coverage html 66 | $(BROWSER) htmlcov/index.html 67 | 68 | docs: ## generate Sphinx HTML documentation, including API docs 69 | rm -f docs/sknetwork.rst 70 | rm -f docs/modules.rst 71 | sphinx-apidoc -o docs/ sknetwork 72 | $(MAKE) -C docs clean 73 | $(MAKE) -C docs html 74 | $(BROWSER) docs/_build/html/index.html 75 | 76 | servedocs: docs ## compile the docs watching for changes 77 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 78 | 79 | release: dist ## package and upload a release 80 | twine upload dist/* 81 | 82 | dist: clean ## builds source and wheel package 83 | python setup.py sdist 84 | python setup.py bdist_wheel 85 | ls -l dist 86 | 87 | install: clean ## install the package to the active Python's site-packages 88 | python setup.py install 89 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = sknetwork 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | html { 2 | background-color: #e6e6e6; 3 | } 4 | 5 | .wy-nav-content { 6 | max-width: none; 7 | } 8 | 9 | body.wy-body-for-nav { 10 | line-height: 1.5em; 11 | color: #333; 12 | } 13 | 14 | div.wy-nav-side { 15 | background-color: #333; 16 | } 17 | 18 | div.wy-side-nav-search { 19 | background-color: #777777; 20 | } 21 | 22 | a { 23 | color: #007fff 24 | } 25 | 26 | div.wy-menu.wy-menu-vertical>p { 27 | color: #ff8000 /* section titles */ 28 | } 29 | 30 | .wy-nav-top { 31 | background-color: #777777; 32 | } 33 | 34 | .wy-side-nav-search>a:hover, .wy-side-nav-search .wy-dropdown>a:hover { 35 | background: None; /*background for logo when hovered*/ 36 | } 37 | 38 | .wy-side-nav-search>div.version { 39 | color: white; 40 | } 41 | 42 | .wy-side-nav-search input[type=text] { 43 | border-color: #d9d9d9; 44 | } 45 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/first_steps.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | Overview 4 | -------- 5 | 6 | Scikit-network is an open-source python package for machine learning on graphs. 7 | 8 | Each graph is represented by its adjacency matrix in the sparse CSR format of ``scipy``. 9 | 10 | An overview of the package is presented in this :ref:`notebook`. 11 | 12 | Installation 13 | ------------ 14 | 15 | To install scikit-network, run this command in your terminal: 16 | 17 | .. code-block:: console 18 | 19 | $ pip install scikit-network 20 | 21 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 22 | you through the process. 23 | 24 | Alternately, you can download the sources from `Github`_ and run: 25 | 26 | .. code-block:: console 27 | 28 | $ cd 29 | $ python setup.py develop 30 | 31 | 32 | .. _pip: https://pip.pypa.io 33 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 34 | .. _Github: https://github.com/sknetwork-team/scikit-network 35 | 36 | Import 37 | ------ 38 | 39 | Import scikit-network in Python: 40 | 41 | .. code-block:: python 42 | 43 | import sknetwork as skn 44 | 45 | Usage 46 | ----- 47 | 48 | A graph is represented by its :term:`adjacency` matrix (square matrix). When the graph is bipartite, it can be represented by its :term:`biadjacency` matrix (rectangular matrix). 49 | Check our :ref:`tutorial` for various ways of loading a graph 50 | (from a list of edges, a dataframe or a CSV file, for instance). 51 | 52 | Each algorithm is represented as an object with a ``fit`` method. 53 | 54 | Here is an example to cluster the `Karate club graph`_ with the `Louvain algorithm`_: 55 | 56 | .. code-block:: python 57 | 58 | from sknetwork.data import karate_club 59 | from sknetwork.clustering import Louvain 60 | 61 | adjacency = karate_club() 62 | algorithm = Louvain() 63 | algorithm.fit(adjacency) 64 | 65 | 66 | More details are provided in this :ref:`tutorial`. 67 | 68 | .. _Karate club graph: https://en.wikipedia.org/wiki/Zachary%27s_karate_club 69 | .. _Louvain algorithm: https://en.wikipedia.org/wiki/Louvain_method 70 | -------------------------------------------------------------------------------- /docs/genindex.rst: -------------------------------------------------------------------------------- 1 | Index 2 | ===== 3 | -------------------------------------------------------------------------------- /docs/glossary.rst: -------------------------------------------------------------------------------- 1 | .. _glossary: 2 | 3 | Glossary 4 | ******** 5 | 6 | .. glossary:: 7 | 8 | adjacency 9 | Square matrix whose entries indicate edges between nodes of a graph, usually denoted by :math:`A`. 10 | 11 | biadjacency 12 | Rectangular matrix whose entries indicate edges between nodes of a bipartite graph, 13 | usually denoted by :math:`B`. 14 | 15 | embedding 16 | Mapping of the nodes of a graph to points in a vector space. 17 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to scikit-network's documentation! 2 | ========================================== 3 | 4 | .. include:: readme.rst 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | :caption: Getting started 9 | 10 | first_steps 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: User manual 15 | 16 | reference/data 17 | reference/topology 18 | reference/path 19 | reference/clustering 20 | reference/classification 21 | reference/gnn 22 | reference/regression 23 | reference/hierarchy 24 | reference/embedding 25 | reference/ranking 26 | reference/linkpred 27 | reference/linalg 28 | reference/utils 29 | reference/visualization 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: Tutorials 34 | 35 | tutorials/overview/index 36 | tutorials/data/index 37 | tutorials/topology/index 38 | tutorials/path/index 39 | tutorials/clustering/index 40 | tutorials/classification/index 41 | tutorials/gnn/index 42 | tutorials/regression/index 43 | tutorials/hierarchy/index 44 | tutorials/embedding/index 45 | tutorials/ranking/index 46 | tutorials/linkpred/index 47 | tutorials/visualization/index 48 | 49 | .. toctree:: 50 | :maxdepth: 1 51 | :caption: Examples 52 | 53 | use_cases/text 54 | use_cases/wikipedia 55 | use_cases/recommendation 56 | use_cases/votes 57 | use_cases/sport 58 | 59 | .. toctree:: 60 | :maxdepth: 1 61 | :caption: About 62 | 63 | authors 64 | history 65 | contributing 66 | genindex 67 | glossary 68 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=sknetwork 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /docs/reference/classification.rst: -------------------------------------------------------------------------------- 1 | .. _classification: 2 | 3 | Classification 4 | ************** 5 | 6 | Node classification algorithms. 7 | 8 | The attribute ``labels_`` gives the label of each node of the graph. 9 | 10 | Diffusion 11 | --------- 12 | .. autoclass:: sknetwork.classification.DiffusionClassifier 13 | 14 | Nearest neighbors 15 | ----------------- 16 | .. autoclass:: sknetwork.classification.NNClassifier 17 | 18 | Propagation 19 | ----------- 20 | .. autoclass:: sknetwork.classification.Propagation 21 | 22 | PageRank 23 | -------- 24 | .. autoclass:: sknetwork.classification.PageRankClassifier 25 | 26 | Metrics 27 | ------- 28 | .. autofunction:: sknetwork.classification.get_accuracy_score 29 | 30 | .. autofunction:: sknetwork.classification.get_f1_score 31 | 32 | .. autofunction:: sknetwork.classification.get_f1_scores 33 | 34 | .. autofunction:: sknetwork.classification.get_average_f1_score 35 | 36 | .. autofunction:: sknetwork.classification.get_confusion_matrix 37 | 38 | -------------------------------------------------------------------------------- /docs/reference/data.rst: -------------------------------------------------------------------------------- 1 | .. _data: 2 | 3 | Data 4 | #### 5 | 6 | Tools for loading and saving graphs. 7 | 8 | Edge list 9 | ********* 10 | 11 | .. autofunction:: sknetwork.data.from_edge_list 12 | 13 | Adjacency list 14 | ************** 15 | 16 | .. autofunction:: sknetwork.data.from_adjacency_list 17 | 18 | Files 19 | ***** 20 | 21 | Check the :ref:`tutorial` for importing graphs from dataframes. 22 | 23 | .. autofunction:: sknetwork.data.from_csv 24 | 25 | .. autofunction:: sknetwork.data.from_graphml 26 | 27 | Datasets 28 | ******** 29 | 30 | .. autofunction:: sknetwork.data.load_netset 31 | 32 | .. autofunction:: sknetwork.data.load_konect 33 | 34 | You can also find some datasets on NetRep_. 35 | 36 | .. _NetRep: http://networkrepository.com/ 37 | 38 | Toy graphs 39 | ********** 40 | 41 | .. autofunction:: sknetwork.data.house 42 | 43 | .. autofunction:: sknetwork.data.bow_tie 44 | 45 | .. autofunction:: sknetwork.data.karate_club 46 | 47 | .. autofunction:: sknetwork.data.miserables 48 | 49 | .. autofunction:: sknetwork.data.painters 50 | 51 | .. autofunction:: sknetwork.data.star_wars 52 | 53 | .. autofunction:: sknetwork.data.movie_actor 54 | 55 | .. autofunction:: sknetwork.data.art_philo_science 56 | 57 | Models 58 | ****** 59 | 60 | .. autofunction:: sknetwork.data.linear_graph 61 | 62 | .. autofunction:: sknetwork.data.linear_digraph 63 | 64 | .. autofunction:: sknetwork.data.cyclic_graph 65 | 66 | .. autofunction:: sknetwork.data.cyclic_digraph 67 | 68 | .. autofunction:: sknetwork.data.grid 69 | 70 | .. autofunction:: sknetwork.data.erdos_renyi 71 | 72 | .. autofunction:: sknetwork.data.block_model 73 | 74 | .. autofunction:: sknetwork.data.albert_barabasi 75 | 76 | .. autofunction:: sknetwork.data.watts_strogatz 77 | 78 | Save 79 | **** 80 | 81 | .. autofunction:: sknetwork.data.save 82 | 83 | .. autofunction:: sknetwork.data.load 84 | -------------------------------------------------------------------------------- /docs/reference/embedding.rst: -------------------------------------------------------------------------------- 1 | .. _embedding: 2 | 3 | Embedding 4 | ********* 5 | 6 | Graph embedding algorithms. 7 | 8 | The attribute ``embedding_`` assigns a vector to each node of the graph. 9 | 10 | Spectral 11 | -------- 12 | 13 | .. autoclass:: sknetwork.embedding.Spectral 14 | 15 | SVD 16 | --- 17 | 18 | .. autoclass:: sknetwork.embedding.SVD 19 | 20 | GSVD 21 | ---- 22 | 23 | .. autoclass:: sknetwork.embedding.GSVD 24 | 25 | PCA 26 | --- 27 | 28 | .. autoclass:: sknetwork.embedding.PCA 29 | 30 | Random Projection 31 | ----------------- 32 | 33 | .. autoclass:: sknetwork.embedding.RandomProjection 34 | 35 | Louvain 36 | ------- 37 | 38 | .. autoclass:: sknetwork.embedding.LouvainEmbedding 39 | 40 | 41 | Force Atlas 42 | ----------- 43 | 44 | .. autoclass:: sknetwork.embedding.ForceAtlas 45 | 46 | Spring 47 | ------ 48 | 49 | .. autoclass:: sknetwork.embedding.Spring 50 | -------------------------------------------------------------------------------- /docs/reference/gnn.rst: -------------------------------------------------------------------------------- 1 | .. _gnn: 2 | 3 | GNN 4 | *** 5 | 6 | Graph Neural Network. 7 | 8 | Classifier 9 | ---------- 10 | 11 | The attribute ``labels_`` assigns a label to each node of the graph. 12 | 13 | .. autoclass:: sknetwork.gnn.GNNClassifier 14 | 15 | Convolution layers 16 | ------------------ 17 | 18 | .. autoclass:: sknetwork.gnn.Convolution 19 | 20 | Activation functions 21 | -------------------- 22 | 23 | .. autoclass:: sknetwork.gnn.BaseActivation 24 | .. autoclass:: sknetwork.gnn.ReLu 25 | .. autoclass:: sknetwork.gnn.Sigmoid 26 | .. autoclass:: sknetwork.gnn.Softmax 27 | 28 | Loss functions 29 | -------------- 30 | 31 | .. autoclass:: sknetwork.gnn.BaseLoss 32 | .. autoclass:: sknetwork.gnn.CrossEntropy 33 | .. autoclass:: sknetwork.gnn.BinaryCrossEntropy 34 | 35 | Optimizers 36 | ---------- 37 | 38 | .. autoclass:: sknetwork.gnn.BaseOptimizer 39 | .. autoclass:: sknetwork.gnn.ADAM 40 | .. autoclass:: sknetwork.gnn.GD 41 | -------------------------------------------------------------------------------- /docs/reference/hierarchy.rst: -------------------------------------------------------------------------------- 1 | .. _hierarchy: 2 | 3 | Hierarchy 4 | ********* 5 | 6 | Hierarchical clustering algorithms. 7 | 8 | The attribute ``dendrogram_`` gives the dendrogram. 9 | 10 | A dendrogram is an array of size :math:`(n-1) \times 4` representing the successive merges of nodes. 11 | Each row gives the two merged nodes, their distance and the size of the resulting cluster. 12 | Any new node resulting from a merge takes the first available index 13 | (e.g., the first merge corresponds to node :math:`n`). 14 | 15 | Paris 16 | ----- 17 | .. autoclass:: sknetwork.hierarchy.Paris 18 | 19 | Louvain 20 | ------- 21 | .. autoclass:: sknetwork.hierarchy.LouvainHierarchy 22 | 23 | .. autoclass:: sknetwork.hierarchy.LouvainIteration 24 | 25 | Metrics 26 | ------- 27 | .. autofunction:: sknetwork.hierarchy.dasgupta_cost 28 | 29 | .. autofunction:: sknetwork.hierarchy.dasgupta_score 30 | 31 | .. autofunction:: sknetwork.hierarchy.tree_sampling_divergence 32 | 33 | Cuts 34 | ---- 35 | .. autofunction:: sknetwork.hierarchy.cut_straight 36 | 37 | .. autofunction:: sknetwork.hierarchy.cut_balanced 38 | 39 | Dendrograms 40 | ----------- 41 | .. autofunction:: sknetwork.hierarchy.aggregate_dendrogram 42 | 43 | .. autofunction:: sknetwork.hierarchy.reorder_dendrogram 44 | -------------------------------------------------------------------------------- /docs/reference/linalg.rst: -------------------------------------------------------------------------------- 1 | .. _linalg: 2 | 3 | Linear algebra 4 | ************** 5 | 6 | Tools of linear algebra. 7 | 8 | Normalization 9 | ------------- 10 | 11 | .. autofunction:: sknetwork.linalg.normalize 12 | 13 | .. autofunction:: sknetwork.linalg.diagonal_pseudo_inverse 14 | 15 | Sparse + Low Rank 16 | ----------------- 17 | 18 | .. autoclass:: sknetwork.linalg.SparseLR 19 | 20 | Solvers 21 | ------- 22 | 23 | .. autoclass:: sknetwork.linalg.LanczosEig 24 | 25 | .. _lanczossvd: 26 | .. autoclass:: sknetwork.linalg.LanczosSVD 27 | -------------------------------------------------------------------------------- /docs/reference/linkpred.rst: -------------------------------------------------------------------------------- 1 | .. _linkpred: 2 | 3 | Link prediction 4 | *************** 5 | 6 | Link prediction algorithms. 7 | 8 | The attribute ``links_`` gives the predicted links of each node as a sparse matrix. 9 | 10 | 11 | Nearest neighbors 12 | ----------------- 13 | 14 | .. autoclass:: sknetwork.linkpred.NNLinker 15 | 16 | -------------------------------------------------------------------------------- /docs/reference/path.rst: -------------------------------------------------------------------------------- 1 | .. _path: 2 | 3 | Path 4 | **** 5 | 6 | Distances 7 | --------- 8 | 9 | .. autofunction:: sknetwork.path.get_distances 10 | 11 | 12 | Shortest paths 13 | -------------- 14 | 15 | .. autofunction:: sknetwork.path.get_shortest_path 16 | 17 | 18 | Search 19 | ------ 20 | 21 | .. autofunction:: sknetwork.path.breadth_first_search 22 | -------------------------------------------------------------------------------- /docs/reference/ranking.rst: -------------------------------------------------------------------------------- 1 | .. _ranking: 2 | 3 | Ranking 4 | ******* 5 | 6 | Node ranking algorithms. 7 | 8 | The attribute ``scores_`` assigns a score of importance to each node of the graph. 9 | 10 | PageRank 11 | -------- 12 | .. autoclass:: sknetwork.ranking.PageRank 13 | 14 | Katz 15 | ---- 16 | .. autoclass:: sknetwork.ranking.Katz 17 | 18 | HITS 19 | ---- 20 | .. autoclass:: sknetwork.ranking.HITS 21 | 22 | Betweenness centrality 23 | ---------------------- 24 | .. autoclass:: sknetwork.ranking.Betweenness 25 | 26 | Closeness centrality 27 | -------------------- 28 | .. autoclass:: sknetwork.ranking.Closeness 29 | 30 | Post-processing 31 | --------------- 32 | .. autofunction:: sknetwork.ranking.top_k 33 | -------------------------------------------------------------------------------- /docs/reference/regression.rst: -------------------------------------------------------------------------------- 1 | .. _regression: 2 | 3 | Regression 4 | ********** 5 | 6 | Regression algorithms. 7 | 8 | The attribute ``values_`` assigns a value to each node of the graph. 9 | 10 | Diffusion 11 | --------- 12 | .. autoclass:: sknetwork.regression.Diffusion 13 | 14 | Dirichlet 15 | --------- 16 | .. autoclass:: sknetwork.regression.Dirichlet 17 | 18 | -------------------------------------------------------------------------------- /docs/reference/topology.rst: -------------------------------------------------------------------------------- 1 | .. _topology: 2 | 3 | Topology 4 | ******** 5 | 6 | Functions related to graph topology. 7 | 8 | Connectivity 9 | ------------ 10 | 11 | .. autofunction:: sknetwork.topology.get_connected_components 12 | 13 | .. autofunction:: sknetwork.topology.is_connected 14 | 15 | .. autofunction:: sknetwork.topology.get_largest_connected_component 16 | 17 | Structure 18 | --------- 19 | 20 | .. autofunction:: sknetwork.topology.is_bipartite 21 | 22 | Cycles 23 | ------ 24 | 25 | .. autofunction:: sknetwork.topology.is_acyclic 26 | 27 | .. autofunction:: sknetwork.topology.get_cycles 28 | 29 | .. autofunction:: sknetwork.topology.break_cycles 30 | 31 | Core decomposition 32 | ------------------ 33 | 34 | .. autoclass:: sknetwork.topology.get_core_decomposition 35 | 36 | 37 | Triangles 38 | --------- 39 | 40 | .. autoclass:: sknetwork.topology.count_triangles 41 | 42 | .. autoclass:: sknetwork.topology.get_clustering_coefficient 43 | 44 | 45 | Cliques 46 | ------- 47 | 48 | .. autoclass:: sknetwork.topology.count_cliques 49 | 50 | 51 | Isomorphism 52 | ----------- 53 | 54 | .. autoclass:: sknetwork.topology.color_weisfeiler_lehman 55 | 56 | .. autofunction:: sknetwork.topology.are_isomorphic 57 | -------------------------------------------------------------------------------- /docs/reference/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | Utils 4 | ***** 5 | 6 | Various tools for graph analysis. 7 | 8 | 9 | Convert graphs 10 | -------------- 11 | 12 | .. autofunction:: sknetwork.utils.directed2undirected 13 | 14 | .. autofunction:: sknetwork.utils.bipartite2undirected 15 | 16 | .. autofunction:: sknetwork.utils.bipartite2directed 17 | 18 | 19 | Neighborhood 20 | ------------ 21 | 22 | .. autofunction:: sknetwork.utils.get_degrees 23 | 24 | .. autofunction:: sknetwork.utils.get_weights 25 | 26 | .. autofunction:: sknetwork.utils.get_neighbors 27 | 28 | 29 | Membership matrix 30 | ----------------- 31 | 32 | .. autofunction:: sknetwork.utils.get_membership 33 | 34 | .. autofunction:: sknetwork.utils.from_membership 35 | 36 | TF-IDF 37 | ------ 38 | 39 | .. autofunction:: sknetwork.utils.get_tfidf 40 | 41 | -------------------------------------------------------------------------------- /docs/reference/visualization.rst: -------------------------------------------------------------------------------- 1 | .. _visualization: 2 | 3 | Visualization 4 | ************* 5 | 6 | Visualization tools. 7 | 8 | Graphs 9 | ------ 10 | 11 | .. autofunction:: sknetwork.visualization.graphs.visualize_graph 12 | 13 | .. autofunction:: sknetwork.visualization.graphs.visualize_bigraph 14 | 15 | Dendrograms 16 | ----------- 17 | 18 | .. autofunction:: sknetwork.visualization.dendrograms.visualize_dendrogram 19 | -------------------------------------------------------------------------------- /docs/tutorials/classification/index.rst: -------------------------------------------------------------------------------- 1 | Classification 2 | ************** 3 | 4 | 5 | .. toctree:: 6 | 7 | pagerank 8 | diffusion 9 | propagation 10 | knn 11 | metrics 12 | -------------------------------------------------------------------------------- /docs/tutorials/clustering/index.rst: -------------------------------------------------------------------------------- 1 | .. _ClusteringTag: 2 | 3 | Clustering 4 | ********** 5 | 6 | 7 | .. toctree:: 8 | 9 | louvain 10 | leiden 11 | kcenters 12 | propagation 13 | -------------------------------------------------------------------------------- /docs/tutorials/data/index.rst: -------------------------------------------------------------------------------- 1 | .. _DataTag: 2 | 3 | Data 4 | **** 5 | 6 | .. toctree:: 7 | 8 | load_data 9 | load_collection 10 | models 11 | toy_graphs 12 | save 13 | -------------------------------------------------------------------------------- /docs/tutorials/data/iris.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sknetwork-team/scikit-network/07101d78b76510d7ebf7b625f314cf786ef426c5/docs/tutorials/data/iris.p -------------------------------------------------------------------------------- /docs/tutorials/data/movie_actor.tsv: -------------------------------------------------------------------------------- 1 | Inception Leonardo DiCaprio 2 | Inception Marion Cotillard 3 | Inception Joseph Gordon Lewitt 4 | The Dark Knight Rises Marion Cotillard 5 | The Dark Knight Rises Joseph Gordon Lewitt 6 | The Dark Knight Rises Christian Bale 7 | The Big Short Christian Bale 8 | The Big Short Ryan Gosling 9 | The Big Short Brad Pitt 10 | The Big Short Steve Carell 11 | Drive Ryan Gosling 12 | Drive Carey Mulligan 13 | The Great Gatsby Leonardo DiCaprio 14 | The Great Gatsby Carey Mulligan 15 | La La Land Ryan Gosling 16 | La La Land Emma Stone 17 | Crazy Stupid Love Ryan Gosling 18 | Crazy Stupid Love Emma Stone 19 | Crazy Stupid Love Steve Carell 20 | Vice Christian Bale 21 | Vice Steve Carell 22 | The Grand Budapest Hotel Lea Seydoux 23 | The Grand Budapest Hotel Ralph Fiennes 24 | The Grand Budapest Hotel Jude Law 25 | The Grand Budapest Hotel Willem Dafoe 26 | The Grand Budapest Hotel Owen Wilson 27 | Aviator Leonardo DiCaprio 28 | Aviator Jude Law 29 | Aviator Willem Dafoe 30 | 007 Spectre Lea Seydoux 31 | 007 Spectre Ralph Fiennes 32 | Inglourious Basterds Brad Pitt 33 | Inglourious Basterds Lea Seydoux 34 | Inglourious Basterds Christophe Waltz 35 | Midnight In Paris Marion Cotillard 36 | Midnight In Paris Lea Seydoux 37 | Midnight In Paris Owen Wilson 38 | Murder on the Orient Express Willem Dafoe 39 | Murder on the Orient Express Johnny Depp 40 | Fantastic Beasts 2 Jude Law 41 | Fantastic Beasts 2 Johnny Depp 42 | -------------------------------------------------------------------------------- /docs/tutorials/data/mygraph/names.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sknetwork-team/scikit-network/07101d78b76510d7ebf7b625f314cf786ef426c5/docs/tutorials/data/mygraph/names.npy -------------------------------------------------------------------------------- /docs/tutorials/data/painters.tsv: -------------------------------------------------------------------------------- 1 | Pablo Picasso Edouard Manet 2 | Pablo Picasso Henri Matisse 3 | Claude Monet Edouard Manet 4 | Claude Monet Pierre-Auguste Renoir 5 | Michelangelo Leonardo da Vinci 6 | Edouard Manet Pablo Picasso 7 | Edouard Manet Claude Monet 8 | Edouard Manet Edgar Degas 9 | Edouard Manet Paul Cezanne 10 | Edouard Manet Pierre-Auguste Renoir 11 | Peter Paul Rubens Michelangelo 12 | Peter Paul Rubens Rembrandt 13 | Peter Paul Rubens Leonardo da Vinci 14 | Rembrandt Michelangelo 15 | Rembrandt Peter Paul Rubens 16 | Rembrandt Vincent van Gogh 17 | Rembrandt Leonardo da Vinci 18 | Gustav Klimt Pablo Picasso 19 | Gustav Klimt Egon Schiele 20 | Edgar Degas Claude Monet 21 | Edgar Degas Michelangelo 22 | Edgar Degas Edouard Manet 23 | Edgar Degas Vincent van Gogh 24 | Edgar Degas Paul Cezanne 25 | Edgar Degas Pierre-Auguste Renoir 26 | Vincent van Gogh Pablo Picasso 27 | Vincent van Gogh Claude Monet 28 | Vincent van Gogh Peter Paul Rubens 29 | Vincent van Gogh Rembrandt 30 | Vincent van Gogh Edgar Degas 31 | Vincent van Gogh Henri Matisse 32 | Vincent van Gogh Paul Cezanne 33 | Leonardo da Vinci Michelangelo 34 | Leonardo da Vinci Peter Paul Rubens 35 | Henri Matisse Pablo Picasso 36 | Henri Matisse Edouard Manet 37 | Henri Matisse Vincent van Gogh 38 | Henri Matisse Paul Cezanne 39 | Henri Matisse Pierre-Auguste Renoir 40 | Paul Cezanne Pablo Picasso 41 | Paul Cezanne Claude Monet 42 | Paul Cezanne Edouard Manet 43 | Paul Cezanne Henri Matisse 44 | Paul Cezanne Pierre-Auguste Renoir 45 | Pierre-Auguste Renoir Claude Monet 46 | Pierre-Auguste Renoir Edouard Manet 47 | Pierre-Auguste Renoir Peter Paul Rubens 48 | Pierre-Auguste Renoir Edgar Degas 49 | Egon Schiele Gustav Klimt 50 | Egon Schiele Vincent van Gogh 51 | -------------------------------------------------------------------------------- /docs/tutorials/embedding/index.rst: -------------------------------------------------------------------------------- 1 | Embedding 2 | ********* 3 | 4 | 5 | .. toctree:: 6 | 7 | spectral 8 | svd 9 | gsvd 10 | pca 11 | random_projection 12 | louvain_embedding 13 | spring 14 | forceatlas 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/tutorials/gnn/index.rst: -------------------------------------------------------------------------------- 1 | GNN 2 | ************** 3 | 4 | 5 | .. toctree:: 6 | 7 | gnn_classifier 8 | -------------------------------------------------------------------------------- /docs/tutorials/hierarchy/index.rst: -------------------------------------------------------------------------------- 1 | Hierarchy 2 | ********* 3 | 4 | 5 | .. toctree:: 6 | 7 | paris 8 | louvain_recursion 9 | louvain_iteration 10 | -------------------------------------------------------------------------------- /docs/tutorials/linkpred/index.rst: -------------------------------------------------------------------------------- 1 | Link prediction 2 | *************** 3 | 4 | 5 | .. toctree:: 6 | 7 | nn 8 | -------------------------------------------------------------------------------- /docs/tutorials/overview/index.rst: -------------------------------------------------------------------------------- 1 | .. _OverviewTag: 2 | 3 | Overview 4 | ******** 5 | 6 | .. toctree:: 7 | 8 | get_started 9 | -------------------------------------------------------------------------------- /docs/tutorials/overview/movie_actor.tsv: -------------------------------------------------------------------------------- 1 | Inception Leonardo DiCaprio 2 | Inception Marion Cotillard 3 | Inception Joseph Gordon Lewitt 4 | The Dark Knight Rises Marion Cotillard 5 | The Dark Knight Rises Joseph Gordon Lewitt 6 | The Dark Knight Rises Christian Bale 7 | The Big Short Christian Bale 8 | The Big Short Ryan Gosling 9 | The Big Short Brad Pitt 10 | The Big Short Steve Carell 11 | Drive Ryan Gosling 12 | Drive Carey Mulligan 13 | The Great Gatsby Leonardo DiCaprio 14 | The Great Gatsby Carey Mulligan 15 | La La Land Ryan Gosling 16 | La La Land Emma Stone 17 | Crazy Stupid Love Ryan Gosling 18 | Crazy Stupid Love Emma Stone 19 | Crazy Stupid Love Steve Carell 20 | Vice Christian Bale 21 | Vice Steve Carell 22 | The Grand Budapest Hotel Lea Seydoux 23 | The Grand Budapest Hotel Ralph Fiennes 24 | The Grand Budapest Hotel Jude Law 25 | The Grand Budapest Hotel Willem Dafoe 26 | The Grand Budapest Hotel Owen Wilson 27 | Aviator Leonardo DiCaprio 28 | Aviator Jude Law 29 | Aviator Willem Dafoe 30 | 007 Spectre Lea Seydoux 31 | 007 Spectre Ralph Fiennes 32 | Inglourious Basterds Brad Pitt 33 | Inglourious Basterds Lea Seydoux 34 | Inglourious Basterds Christophe Waltz 35 | Midnight In Paris Marion Cotillard 36 | Midnight In Paris Lea Seydoux 37 | Midnight In Paris Owen Wilson 38 | Murder on the Orient Express Willem Dafoe 39 | Murder on the Orient Express Johnny Depp 40 | Fantastic Beasts 2 Jude Law 41 | Fantastic Beasts 2 Johnny Depp 42 | -------------------------------------------------------------------------------- /docs/tutorials/path/index.rst: -------------------------------------------------------------------------------- 1 | Path 2 | **** 3 | 4 | 5 | .. toctree:: 6 | 7 | distance 8 | shortest_path 9 | -------------------------------------------------------------------------------- /docs/tutorials/ranking/index.rst: -------------------------------------------------------------------------------- 1 | Ranking 2 | ******* 3 | 4 | 5 | .. toctree:: 6 | 7 | pagerank 8 | katz 9 | -------------------------------------------------------------------------------- /docs/tutorials/regression/index.rst: -------------------------------------------------------------------------------- 1 | Regression 2 | ********** 3 | 4 | 5 | .. toctree:: 6 | 7 | diffusion 8 | dirichlet 9 | -------------------------------------------------------------------------------- /docs/tutorials/topology/index.rst: -------------------------------------------------------------------------------- 1 | Topology 2 | ******** 3 | 4 | 5 | .. toctree:: 6 | 7 | connected_components 8 | cycles 9 | core_decomposition 10 | cliques 11 | isomorphism 12 | -------------------------------------------------------------------------------- /docs/tutorials/visualization/index.rst: -------------------------------------------------------------------------------- 1 | Visualization 2 | ************* 3 | 4 | 5 | .. toctree:: 6 | 7 | graphs 8 | paths 9 | dendrograms 10 | pie_charts 11 | 12 | -------------------------------------------------------------------------------- /images/logo_sknetwork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sknetwork-team/scikit-network/07101d78b76510d7ebf7b625f314cf786ef426c5/images/logo_sknetwork.png -------------------------------------------------------------------------------- /images/logo_sknetwork.xml: -------------------------------------------------------------------------------- 1 | 7Vhbb9owGP01eQQ5d3gEWph2kSoxadLeTGKCW8fOEgNhv3527IQ4pCorF6naVInGx7d85zvfRbHcWVoucphtvrEYEcsBcWm5D5bjjL1A/ErgoIAg8BSQ5DhWkH0Elvg30iDQ6BbHqDAWcsYIx5kJRoxSFHEDg3nO9uayNSPmrRlM0AmwjCA5RX/gmG8UOnLCI/4J4WRT32wHYzWTwnqxtqTYwJjtW5D7aLmznDGuntJyhojkruZF7Zu/Mtu8WI4oP2uDr3bsINlq4/SL8UNt7X6DOVpmMJLjvXCo5U43PCViZItHWGSK4zUukTh2usaEzBhhucAoo0hCjHLtxhDIPQQnVAwi8ZpIrJsWPGcvqLPr1Bht3w7lHJUtSBu3QCxFPD+IJfVsoIk+1K7Q4/3Rby7Q2KblswaEWitJc/aRTvGgGX2FXe9tdqUtWEjrK1wh8sQKzDGT1KwY5ywVLNQLJpozzjoOEBrK5GFpmchoG65ggaOhoI3PGI0eCRFhgSqOBTShSXUzGIa+wBCNjwgArj1ygtDzfeCNAnc8CqSv8qgWORjaTsfBluNOqz/Tyx64oVMdr+NU/9SpzZq2U5sEcolTnbd9ihrOL4kdQe18PgLgllTaoUllQ1uLyiaG2lQ24CVUunekEoBwPp/fkErX6ajS6aGyL9XY10g1QQ+VARE3TGO8MygNfm1ldZkKs/hAJ+KJLJZ1Lq4qIuWDogplOeWDrKwm6r3iKdH/qzuKDNLeS849qIjwC+b1ccJYdaJ5i4ArY2q0IxVpkKmJHImr4UqlNzHOGKa8ItmfWv6DVM2WM/V6SkTdutRNvimOY4KUxRGmyXeZjB8Gnpn9fHAlSXnd6PRPJNUXnM4VBBXeUlDhZYJawxSTgzpK2JYnGL7vGrn+HN2+ehBFfM/yl5ZC1ZEnwv3Qeg6vpGeRIw09u+BUz02NNruxywVdfll+/rlbPD//Zn40gd5TNoGDc/rf/x3a37cVtpm4/Lt2aP2e7quQH7Zv6xSG4K5tWz+/fQXjwzZz9rgj4Ls2c/0Ej25ZkR1wrR7v7ZN01V0r7x/XLXQpb2285DrVUw7+jRLtVOlGAkYamqswuUJEhJ0vKUFPRPR+SXlHxhHD4yewaq71HdF9/AM= 2 | -------------------------------------------------------------------------------- /images/logo_sknetwork_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sknetwork-team/scikit-network/07101d78b76510d7ebf7b625f314cf786ef426c5/images/logo_sknetwork_long.png -------------------------------------------------------------------------------- /make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /movie_actor.tsv: -------------------------------------------------------------------------------- 1 | Inception Leonardo DiCaprio 2 | Inception Marion Cotillard 3 | Inception Joseph Gordon Lewitt 4 | The Dark Knight Rises Marion Cotillard 5 | The Dark Knight Rises Joseph Gordon Lewitt 6 | The Dark Knight Rises Christian Bale 7 | The Big Short Christian Bale 8 | The Big Short Ryan Gosling 9 | The Big Short Brad Pitt 10 | The Big Short Steve Carell 11 | Drive Ryan Gosling 12 | Drive Carey Mulligan 13 | The Great Gatsby Leonardo DiCaprio 14 | The Great Gatsby Carey Mulligan 15 | La La Land Ryan Gosling 16 | La La Land Emma Stone 17 | Crazy Stupid Love Ryan Gosling 18 | Crazy Stupid Love Emma Stone 19 | Crazy Stupid Love Steve Carell 20 | Vice Christian Bale 21 | Vice Steve Carell 22 | The Grand Budapest Hotel Lea Seydoux 23 | The Grand Budapest Hotel Ralph Fiennes 24 | The Grand Budapest Hotel Jude Law 25 | The Grand Budapest Hotel Willem Dafoe 26 | The Grand Budapest Hotel Owen Wilson 27 | Aviator Leonardo DiCaprio 28 | Aviator Jude Law 29 | Aviator Willem Dafoe 30 | 007 Spectre Lea Seydoux 31 | 007 Spectre Ralph Fiennes 32 | Inglourious Basterds Brad Pitt 33 | Inglourious Basterds Lea Seydoux 34 | Inglourious Basterds Christophe Waltz 35 | Midnight In Paris Marion Cotillard 36 | Midnight In Paris Lea Seydoux 37 | Midnight In Paris Owen Wilson 38 | Murder on the Orient Express Willem Dafoe 39 | Murder on the Orient Express Johnny Depp 40 | Fantastic Beasts 2 Jude Law 41 | Fantastic Beasts 2 Johnny Depp 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "numpy", 4 | "setuptools", 5 | "scipy", 6 | "pytest-runner", 7 | "cython <= 3.0.12", 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | 11 | 12 | [project] 13 | name = "scikit-network" 14 | version = "0.33.3" 15 | dependencies = ['numpy>=1.22.4', 'scipy>=1.7.3'] 16 | authors = [{name = "Scikit-network team"}] 17 | maintainers = [{name = "Thomas Bonald", email = "bonald@enst.fr"}] 18 | description = "Graph algorithms" 19 | readme = "README.rst" 20 | license = {text = "BSD License"} 21 | keywords = ["sknetwork"] 22 | classifiers=[ 23 | 'Development Status :: 3 - Alpha', 24 | 'Intended Audience :: Developers', 25 | 'Intended Audience :: Information Technology', 26 | 'Intended Audience :: Education', 27 | 'Intended Audience :: Science/Research', 28 | 'License :: OSI Approved :: BSD License', 29 | 'Natural Language :: English', 30 | 'Programming Language :: Cython', 31 | 'Programming Language :: Python :: 3.9', 32 | 'Programming Language :: Python :: 3.10', 33 | 'Programming Language :: Python :: 3.11', 34 | 'Programming Language :: Python :: 3.12', 35 | 'Programming Language :: Python :: 3.13' 36 | ] 37 | requires-python = ">= 3.9" 38 | 39 | [project.optional-dependencies] 40 | test = ["pytest", "note", "pluggy>=0.7.1"] 41 | 42 | [project.urls] 43 | Repository = "https://github.com/sknetwork-team/scikit-network" 44 | Documentation = "https://scikit-network.readthedocs.io/" 45 | Changelog = "https://github.com/sknetwork-team/scikit-network/blob/master/HISTORY.rst" 46 | 47 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # standard Python package manager 2 | pip>=22.0.4 3 | 4 | # build 5 | build>=1.2.2 6 | 7 | # base dependencies 8 | numpy>=1.26.4 9 | scipy>=1.7.3 10 | 11 | # tool to automatically change version number in the package upon release 12 | bumpversion>=0.6.0 13 | 14 | # build wheels for the package 15 | wheel>=0.42.0 16 | 17 | # standard package to produce coverage from test runs 18 | # coverage>=6.2 19 | 20 | # used to upload wheels and sources to PyPI 21 | twine>=3.7.1 22 | 23 | # main doc tool 24 | Sphinx>=7.2.6 25 | 26 | # base theme for the documentation website 27 | sphinx-rtd-theme>=1.0.0 28 | 29 | # pinned in order to fix this issue: https://readthedocs.org/projects/scikit-network/builds/11876754/ 30 | Pygments>=2.15.0 31 | 32 | # used to produce doc from Jupyter notebooks (aka. tutorials) 33 | nbsphinx>=0.9.3 34 | 35 | # used to run Jupyter notebooks (necessary for nbsphinx) 36 | ipython>=8.10.0 37 | jupyter_client>=7.1.0 38 | ipykernel>=6.6.1 39 | pandas>=1.3.5 40 | 41 | # tests on Linux images (upon release) 42 | nose>=1.3.7 43 | 44 | # python package tool 45 | setuptools>=69.5.1 46 | 47 | # main test packages 48 | pytest-runner>=5.3.1 49 | pytest>=6.2.5 50 | pytest-cov>=3.0.0 51 | 52 | # C++ code generator for performance speed-up 53 | cython>=3.0.8, <3.1 54 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.33.2 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:sknetwork/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [flake8] 15 | exclude = docs 16 | 17 | [aliases] 18 | test = pytest 19 | 20 | [tool:pytest] 21 | collect_ignore = ['setup.py'] 22 | -------------------------------------------------------------------------------- /sknetwork/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Top-level package for scikit-network""" 4 | 5 | __author__ = """scikit-network team""" 6 | __email__ = "thomas.bonald@telecom-paris.fr" 7 | __version__ = '0.33.0' 8 | 9 | import sknetwork.topology 10 | import sknetwork.path 11 | import sknetwork.classification 12 | import sknetwork.clustering 13 | import sknetwork.embedding 14 | import sknetwork.hierarchy 15 | import sknetwork.linalg 16 | import sknetwork.linkpred 17 | import sknetwork.ranking 18 | import sknetwork.data 19 | import sknetwork.utils 20 | import sknetwork.visualization 21 | import sknetwork.gnn 22 | -------------------------------------------------------------------------------- /sknetwork/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in June 2019 5 | @author: Quentin Lutz 6 | """ 7 | import inspect 8 | 9 | 10 | class Algorithm: 11 | """Base class for all algorithms. 12 | """ 13 | def get_params(self): 14 | """Get parameters as dictionary. 15 | 16 | Returns 17 | ------- 18 | params : dict 19 | Parameters of the algorithm. 20 | """ 21 | signature = inspect.signature(self.__class__.__init__) 22 | params_exclude = ['self', 'random_state', 'verbose'] 23 | params = dict() 24 | for param in signature.parameters.values(): 25 | name = param.name 26 | if name not in params_exclude: 27 | try: 28 | value = self.__dict__[name] 29 | except KeyError: 30 | continue 31 | params[name] = value 32 | return params 33 | 34 | def set_params(self, params: dict) -> 'Algorithm': 35 | """Set parameters of the algorithm. 36 | 37 | Parameters 38 | ---------- 39 | params : dict 40 | Parameters of the algorithm. 41 | 42 | Returns 43 | ------- 44 | self : :class:`Algorithm` 45 | """ 46 | valid_params = self.get_params() 47 | if type(params) is not dict: 48 | raise ValueError('The parameters must be given as a dictionary.') 49 | for name, value in params.items(): 50 | if name not in valid_params: 51 | raise ValueError(f'Invalid parameter: {name}.') 52 | setattr(self, name, value) 53 | return self 54 | 55 | def __repr__(self): 56 | params_string = [] 57 | for name, value in self.get_params().items(): 58 | if type(value) == str: 59 | value = "'" + value + "'" 60 | else: 61 | value = str(value) 62 | params_string.append(name + '=' + value) 63 | return self.__class__.__name__ + '(' + ', '.join(params_string) + ')' 64 | 65 | def fit(self, *args, **kwargs): 66 | """Fit algorithm to data.""" 67 | raise NotImplementedError 68 | -------------------------------------------------------------------------------- /sknetwork/classification/__init__.py: -------------------------------------------------------------------------------- 1 | """classification module""" 2 | from sknetwork.classification.base import BaseClassifier 3 | from sknetwork.classification.diffusion import DiffusionClassifier 4 | from sknetwork.classification.knn import NNClassifier 5 | from sknetwork.classification.metrics import get_accuracy_score, get_confusion_matrix, get_f1_score, get_f1_scores, \ 6 | get_average_f1_score 7 | from sknetwork.classification.pagerank import PageRankClassifier 8 | from sknetwork.classification.propagation import Propagation 9 | -------------------------------------------------------------------------------- /sknetwork/classification/pagerank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in March 2020 5 | @author: Nathan de Lara 6 | """ 7 | from typing import Optional 8 | 9 | import numpy as np 10 | 11 | from sknetwork.classification.base_rank import RankClassifier 12 | from sknetwork.ranking.pagerank import PageRank 13 | 14 | 15 | class PageRankClassifier(RankClassifier): 16 | """Node classification by multiple personalized PageRanks. 17 | 18 | Parameters 19 | ---------- 20 | damping_factor: float 21 | Probability to continue the random walk. 22 | solver : str 23 | Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'. 24 | n_iter : int 25 | Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``. 26 | tol : float 27 | Tolerance for the convergence of some solvers such as ``'bicgstab'`` or ``'lanczos'``. 28 | 29 | Attributes 30 | ---------- 31 | labels_ : np.ndarray, shape (n_labels,) 32 | Labels of nodes. 33 | probs_ : sparse.csr_matrix, shape (n_row, n_labels) 34 | Probability distribution over labels. 35 | labels_row_ : np.ndarray 36 | Labels of rows, for bipartite graphs. 37 | labels_col_ : np.ndarray 38 | Labels of columns, for bipartite graphs. 39 | probs_row_ : sparse.csr_matrix, shape (n_row, n_labels) 40 | Probability distributions over labels of rows, for bipartite graphs. 41 | probs_col_ : sparse.csr_matrix, shape (n_col, n_labels) 42 | Probability distributions over labels of columns, for bipartite graphs. 43 | 44 | Example 45 | ------- 46 | >>> from sknetwork.classification import PageRankClassifier 47 | >>> from sknetwork.data import karate_club 48 | >>> pagerank = PageRankClassifier() 49 | >>> graph = karate_club(metadata=True) 50 | >>> adjacency = graph.adjacency 51 | >>> labels_true = graph.labels 52 | >>> labels = {0: labels_true[0], 33: labels_true[33]} 53 | >>> labels_pred = pagerank.fit_predict(adjacency, labels) 54 | >>> float(np.round(np.mean(labels_pred == labels_true), 2)) 55 | 0.97 56 | 57 | References 58 | ---------- 59 | Lin, F., & Cohen, W. W. (2010). `Semi-supervised classification of network data using very few labels. 60 | `_ 61 | In IEEE International Conference on Advances in Social Networks Analysis and Mining. 62 | """ 63 | def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 0., 64 | n_jobs: Optional[int] = None, verbose: bool = False): 65 | algorithm = PageRank(damping_factor, solver, n_iter, tol) 66 | super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose) 67 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for classification""" 2 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for classification API""" 4 | 5 | import unittest 6 | 7 | from sknetwork.classification import * 8 | from sknetwork.data.test_graphs import * 9 | from sknetwork.embedding import LouvainEmbedding 10 | 11 | 12 | class TestClassificationAPI(unittest.TestCase): 13 | 14 | def test_undirected(self): 15 | for adjacency in [test_graph(), test_digraph()]: 16 | n = adjacency.shape[0] 17 | seeds_array = -np.ones(n) 18 | seeds_array[:2] = np.arange(2) 19 | seeds_dict = {0: 0, 1: 1} 20 | 21 | classifiers = [PageRankClassifier(), DiffusionClassifier(), 22 | NNClassifier(embedding_method=LouvainEmbedding(), n_neighbors=1), Propagation()] 23 | 24 | for algo in classifiers: 25 | labels1 = algo.fit_predict(adjacency, seeds_array) 26 | labels2 = algo.fit_predict(adjacency, seeds_dict) 27 | self.assertTrue((labels1 == labels2).all()) 28 | self.assertEqual(labels2.shape, (n,)) 29 | membership = algo.fit_transform(adjacency, seeds_array) 30 | self.assertTupleEqual(membership.shape, (n, 2)) 31 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/test_knn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for KNN""" 4 | import unittest 5 | 6 | from sknetwork.classification import NNClassifier 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.embedding import Spectral 9 | 10 | 11 | class TestKNNClassifier(unittest.TestCase): 12 | 13 | def test_classification(self): 14 | for adjacency in [test_graph(), test_digraph(), test_bigraph()]: 15 | labels = {0: 0, 1: 1} 16 | 17 | algo = NNClassifier(n_neighbors=1) 18 | labels_pred = algo.fit_predict(adjacency, labels) 19 | self.assertTrue(len(set(labels_pred)) == 2) 20 | 21 | algo = NNClassifier(n_neighbors=1, embedding_method=Spectral(2), normalize=False) 22 | labels_pred = algo.fit_predict(adjacency, labels) 23 | self.assertTrue(len(set(labels_pred)) == 2) 24 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for classification metrics""" 4 | 5 | import unittest 6 | 7 | from sknetwork.classification.metrics import * 8 | 9 | 10 | class TestMetrics(unittest.TestCase): 11 | 12 | def setUp(self) -> None: 13 | self.labels_true = np.array([0, 1, 1, 2, 2, -1]) 14 | self.labels_pred1 = np.array([0, -1, 1, 2, 0, 0]) 15 | self.labels_pred2 = np.array([-1, -1, -1, -1, -1, 0]) 16 | 17 | def test_accuracy(self): 18 | self.assertEqual(get_accuracy_score(self.labels_true, self.labels_pred1), 0.75) 19 | with self.assertRaises(ValueError): 20 | get_accuracy_score(self.labels_true, self.labels_pred2) 21 | 22 | def test_confusion(self): 23 | confusion = get_confusion_matrix(self.labels_true, self.labels_pred1) 24 | self.assertEqual(confusion.data.sum(), 4) 25 | self.assertEqual(confusion.diagonal().sum(), 3) 26 | with self.assertRaises(ValueError): 27 | get_accuracy_score(self.labels_true, self.labels_pred2) 28 | 29 | def test_f1_score(self): 30 | f1_score = get_f1_score(np.array([0, 0, 1]), np.array([0, 1, 1])) 31 | self.assertAlmostEqual(f1_score, 0.67, 2) 32 | with self.assertRaises(ValueError): 33 | get_f1_score(self.labels_true, self.labels_pred1) 34 | 35 | def test_f1_scores(self): 36 | f1_scores = get_f1_scores(self.labels_true, self.labels_pred1) 37 | self.assertAlmostEqual(min(f1_scores), 0.67, 2) 38 | f1_scores, precisions, recalls = get_f1_scores(self.labels_true, self.labels_pred1, True) 39 | self.assertAlmostEqual(min(f1_scores), 0.67, 2) 40 | self.assertAlmostEqual(min(precisions), 0.5, 2) 41 | self.assertAlmostEqual(min(recalls), 0.5, 2) 42 | with self.assertRaises(ValueError): 43 | get_f1_scores(self.labels_true, self.labels_pred2) 44 | 45 | def test_average_f1_score(self): 46 | f1_score = get_average_f1_score(self.labels_true, self.labels_pred1) 47 | self.assertAlmostEqual(f1_score, 0.78, 2) 48 | f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='micro') 49 | self.assertEqual(f1_score, 0.75) 50 | f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='weighted') 51 | self.assertEqual(f1_score, 0.80) 52 | with self.assertRaises(ValueError): 53 | get_average_f1_score(self.labels_true, self.labels_pred2, 'toto') 54 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/test_pagerank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for PageRankClassifier""" 4 | 5 | import unittest 6 | 7 | from sknetwork.classification import PageRankClassifier 8 | from sknetwork.data.test_graphs import * 9 | 10 | 11 | class TestPageRankClassifier(unittest.TestCase): 12 | 13 | def test_solvers(self): 14 | adjacency = test_graph() 15 | labels = {0: 0, 1: 1} 16 | 17 | ref = PageRankClassifier(solver='piteration').fit_predict(adjacency, labels) 18 | for solver in ['lanczos', 'bicgstab']: 19 | labels_pred = PageRankClassifier(solver=solver).fit_predict(adjacency, labels) 20 | self.assertTrue((ref == labels_pred).all()) 21 | -------------------------------------------------------------------------------- /sknetwork/classification/tests/test_propagation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for label propagation""" 4 | 5 | import unittest 6 | 7 | from sknetwork.classification import Propagation 8 | from sknetwork.data.test_graphs import * 9 | 10 | 11 | class TestLabelPropagation(unittest.TestCase): 12 | 13 | def test_algo(self): 14 | for adjacency in [test_graph(), test_digraph(), test_bigraph()]: 15 | n = adjacency.shape[0] 16 | labels = {0: 0, 1: 1} 17 | propagation = Propagation(n_iter=3, weighted=False) 18 | labels_pred = propagation.fit_predict(adjacency, labels) 19 | self.assertEqual(labels_pred.shape, (n,)) 20 | 21 | for order in ['random', 'decreasing', 'increasing']: 22 | propagation = Propagation(node_order=order) 23 | labels_pred = propagation.fit_predict(adjacency, labels) 24 | self.assertEqual(labels_pred.shape, (n,)) 25 | -------------------------------------------------------------------------------- /sknetwork/classification/vote.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: language_level=3 3 | """ 4 | Created in April 2020 5 | @author: Nathan de Lara 6 | """ 7 | from libcpp.set cimport set 8 | from libcpp.vector cimport vector 9 | 10 | cimport cython 11 | 12 | 13 | @cython.boundscheck(False) 14 | @cython.wraparound(False) 15 | def vote_update(int[:] indptr, int[:] indices, float[:] data, int[:] labels, int[:] index): 16 | """One pass of label updates over the graph by majority vote among neighbors.""" 17 | cdef int i 18 | cdef int ii 19 | cdef int j 20 | cdef int jj 21 | cdef int n_indices = index.shape[0] 22 | cdef int label 23 | cdef int label_neigh_size 24 | cdef float best_score 25 | 26 | cdef vector[int] labels_neigh 27 | cdef vector[float] votes_neigh, votes 28 | cdef set[int] labels_unique = () 29 | 30 | cdef int n = labels.shape[0] 31 | for i in range(n): 32 | votes.push_back(0) 33 | 34 | for ii in range(n_indices): 35 | i = index[ii] 36 | labels_neigh.clear() 37 | for j in range(indptr[i], indptr[i + 1]): 38 | jj = indices[j] 39 | labels_neigh.push_back(labels[jj]) 40 | votes_neigh.push_back(data[jj]) 41 | 42 | labels_unique.clear() 43 | label_neigh_size = labels_neigh.size() 44 | for jj in range(label_neigh_size): 45 | label = labels_neigh[jj] 46 | if label >= 0: 47 | labels_unique.insert(label) 48 | votes[label] += votes_neigh[jj] 49 | 50 | best_score = -1 51 | for label in labels_unique: 52 | if votes[label] > best_score: 53 | labels[i] = label 54 | best_score = votes[label] 55 | votes[label] = 0 56 | return labels 57 | -------------------------------------------------------------------------------- /sknetwork/clustering/__init__.py: -------------------------------------------------------------------------------- 1 | """clustering module""" 2 | from sknetwork.clustering.base import BaseClustering 3 | from sknetwork.clustering.louvain import Louvain 4 | from sknetwork.clustering.leiden import Leiden 5 | from sknetwork.clustering.propagation_clustering import PropagationClustering 6 | from sknetwork.clustering.metrics import get_modularity 7 | from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph 8 | from sknetwork.clustering.kcenters import KCenters 9 | -------------------------------------------------------------------------------- /sknetwork/clustering/postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on July 10, 2019 5 | @author: Nathan de Lara 6 | @author: Thomas Bonald 7 | """ 8 | from typing import Optional 9 | 10 | import numpy as np 11 | from scipy import sparse 12 | 13 | from sknetwork.utils.membership import get_membership 14 | 15 | 16 | def reindex_labels(labels: np.ndarray) -> np.ndarray: 17 | """Reindex clusters in decreasing order of size. 18 | 19 | Parameters 20 | ---------- 21 | labels : 22 | Label of each node. 23 | Returns 24 | ------- 25 | new_labels : np.ndarray 26 | New label of each node. 27 | 28 | Example 29 | ------- 30 | >>> from sknetwork.clustering import reindex_labels 31 | >>> labels = np.array([0, 1, 1]) 32 | >>> reindex_labels(labels) 33 | array([1, 0, 0]) 34 | """ 35 | _, index, counts = np.unique(labels, return_inverse=True, return_counts=True) 36 | _, new_index = np.unique(np.argsort(-counts), return_index=True) 37 | return new_index[index] 38 | 39 | 40 | def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray] = None, 41 | labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \ 42 | -> sparse.csr_matrix: 43 | """Aggregate graph per label. All nodes with the same label become a single node. 44 | Negative labels are ignored (corresponding nodes are discarded). 45 | 46 | Parameters 47 | ---------- 48 | input_matrix: sparse matrix 49 | Adjacency or biadjacency matrix of the graph. 50 | labels: np.ndarray 51 | Labels of nodes. 52 | labels_row: np.ndarray 53 | Labels of rows (for bipartite graphs). Alias for labels. 54 | labels_col: np.ndarray 55 | Labels of columns (for bipartite graphs). 56 | """ 57 | if labels_row is not None: 58 | membership_row = get_membership(labels_row) 59 | else: 60 | membership_row = get_membership(labels) 61 | if labels_col is not None: 62 | membership_col = get_membership(labels_col) 63 | else: 64 | membership_col = membership_row 65 | aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col) 66 | return aggregate_matrix.tocsr() 67 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for clustering""" 2 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for clustering API""" 4 | import unittest 5 | 6 | from sknetwork.clustering import * 7 | from sknetwork.data.test_graphs import * 8 | 9 | 10 | class TestClusteringAPI(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True), 14 | PropagationClustering(return_aggregate=True)] 15 | 16 | def test_regular(self): 17 | for algo in self.algos: 18 | for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]: 19 | n = adjacency.shape[0] 20 | labels = algo.fit_predict(adjacency) 21 | n_labels = len(set(labels)) 22 | self.assertEqual(labels.shape, (n,)) 23 | self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels)) 24 | adjacency_bool = adjacency.astype(bool) 25 | labels = algo.fit_predict(adjacency_bool) 26 | n_labels = len(set(labels)) 27 | self.assertEqual(labels.shape, (n,)) 28 | self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels)) 29 | membership = algo.fit_transform(adjacency_bool) 30 | self.assertEqual(membership.shape, (n, n_labels)) 31 | 32 | def test_bipartite(self): 33 | biadjacency = test_bigraph() 34 | n_row, n_col = biadjacency.shape 35 | for algo in self.algos: 36 | algo.fit(biadjacency) 37 | self.assertEqual(algo.labels_row_.shape, (n_row,)) 38 | self.assertEqual(algo.labels_col_.shape, (n_col,)) 39 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/test_kcenters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for KCenters""" 4 | import unittest 5 | 6 | from sknetwork.clustering import KCenters 7 | from sknetwork.data.test_graphs import * 8 | 9 | 10 | class TestKCentersClustering(unittest.TestCase): 11 | 12 | def test_kcenters(self): 13 | # Test undirected graph 14 | n_clusters = 2 15 | adjacency = test_graph() 16 | n_row = adjacency.shape[0] 17 | kcenters = KCenters(n_clusters=n_clusters) 18 | labels = kcenters.fit_predict(adjacency) 19 | self.assertEqual(len(labels), n_row) 20 | self.assertEqual(len(set(labels)), n_clusters) 21 | 22 | # Test directed graph 23 | n_clusters = 3 24 | adjacency = test_digraph() 25 | n_row = adjacency.shape[0] 26 | kcenters = KCenters(n_clusters=n_clusters, directed=True) 27 | labels = kcenters.fit_predict(adjacency) 28 | self.assertEqual(len(labels), n_row) 29 | self.assertEqual(len(set(labels)), n_clusters) 30 | 31 | # Test bipartite graph 32 | n_clusters = 2 33 | biadjacency = test_bigraph() 34 | n_row, n_col = biadjacency.shape 35 | kcenters = KCenters(n_clusters=n_clusters) 36 | kcenters.fit(biadjacency) 37 | labels = kcenters.labels_ 38 | self.assertEqual(len(kcenters.labels_row_), n_row) 39 | self.assertEqual(len(kcenters.labels_col_), n_col) 40 | self.assertEqual(len(set(labels)), n_clusters) 41 | 42 | def test_kcenters_error(self): 43 | # Test value errors 44 | adjacency = test_graph() 45 | biadjacency = test_bigraph() 46 | 47 | # test n_clusters error 48 | kcenters = KCenters(n_clusters=1) 49 | with self.assertRaises(ValueError): 50 | kcenters.fit(adjacency) 51 | 52 | # test n_init error 53 | kcenters = KCenters(n_clusters=2, n_init=0) 54 | with self.assertRaises(ValueError): 55 | kcenters.fit(adjacency) 56 | 57 | # test center_position error 58 | kcenters = KCenters(n_clusters=2, center_position="other") 59 | with self.assertRaises(ValueError): 60 | kcenters.fit(biadjacency) 61 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/test_leiden.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for Leiden""" 4 | import unittest 5 | 6 | from sknetwork.clustering import Leiden 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.utils import bipartite2undirected 9 | 10 | 11 | class TestLeidenClustering(unittest.TestCase): 12 | 13 | def test_disconnected(self): 14 | adjacency = test_disconnected_graph() 15 | n = adjacency.shape[0] 16 | labels = Leiden().fit_predict(adjacency) 17 | self.assertEqual(len(labels), n) 18 | 19 | def test_modularity(self): 20 | adjacency = test_graph() 21 | leiden_d = Leiden(modularity='dugue') 22 | leiden_n = Leiden(modularity='newman') 23 | labels_d = leiden_d.fit_predict(adjacency) 24 | labels_n = leiden_n.fit_predict(adjacency) 25 | self.assertTrue((labels_d == labels_n).all()) 26 | 27 | def test_bipartite(self): 28 | biadjacency = test_bigraph() 29 | adjacency = bipartite2undirected(biadjacency) 30 | leiden = Leiden(modularity='newman') 31 | labels1 = leiden.fit_predict(adjacency) 32 | leiden.fit(biadjacency) 33 | labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_)) 34 | self.assertTrue((labels1 == labels2).all()) 35 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # tests for metrics.py 3 | """"tests for clustering metrics""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.clustering import get_modularity, Louvain 9 | from sknetwork.data import star_wars, karate_club 10 | from sknetwork.data.test_graphs import test_graph 11 | 12 | 13 | class TestClusteringMetrics(unittest.TestCase): 14 | 15 | def setUp(self): 16 | """Basic graph for tests""" 17 | self.adjacency = test_graph() 18 | n = self.adjacency.shape[0] 19 | labels = np.zeros(n) 20 | labels[0] = 1 21 | self.labels = labels.astype(int) 22 | self.unique_cluster = np.zeros(n, dtype=int) 23 | 24 | def test_api(self): 25 | for metric in [get_modularity]: 26 | _, fit, div = metric(self.adjacency, self.labels, return_all=True) 27 | mod = metric(self.adjacency, self.labels, return_all=False) 28 | self.assertAlmostEqual(fit - div, mod) 29 | self.assertAlmostEqual(metric(self.adjacency, self.unique_cluster), 0.) 30 | 31 | with self.assertRaises(ValueError): 32 | metric(self.adjacency, self.labels[:3]) 33 | 34 | def test_modularity(self): 35 | adjacency = karate_club() 36 | labels = Louvain().fit_predict(adjacency) 37 | self.assertAlmostEqual(get_modularity(adjacency, labels), 0.42, 2) 38 | 39 | def test_bimodularity(self): 40 | biadjacency = star_wars() 41 | labels_row = np.array([0, 0, 1, 1]) 42 | labels_col = np.array([0, 1, 0]) 43 | self.assertAlmostEqual(get_modularity(biadjacency, labels_row, labels_col), 0.12, 2) 44 | 45 | with self.assertRaises(ValueError): 46 | get_modularity(biadjacency, labels_row) 47 | with self.assertRaises(ValueError): 48 | get_modularity(biadjacency, labels_row[:2], labels_col) 49 | with self.assertRaises(ValueError): 50 | get_modularity(biadjacency, labels_row, labels_col[:2]) 51 | -------------------------------------------------------------------------------- /sknetwork/clustering/tests/test_postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for clustering post-processing""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.data import house, star_wars 9 | from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph 10 | 11 | 12 | class TestClusteringPostProcessing(unittest.TestCase): 13 | 14 | def test_reindex_clusters(self): 15 | truth = np.array([1, 1, 2, 0, 0, 0]) 16 | 17 | labels = np.array([0, 0, 1, 2, 2, 2]) 18 | output = reindex_labels(labels) 19 | self.assertTrue(np.array_equal(truth, output)) 20 | 21 | labels = np.array([0, 0, 5, 2, 2, 2]) 22 | output = reindex_labels(labels) 23 | self.assertTrue(np.array_equal(truth, output)) 24 | 25 | def test_aggregate_graph(self): 26 | adjacency = house() 27 | labels = np.array([0, 0, 1, 1, 2]) 28 | aggregate = aggregate_graph(adjacency, labels) 29 | self.assertEqual(aggregate.shape, (3, 3)) 30 | 31 | biadjacency = star_wars() 32 | labels = np.array([0, 0, 1, 2]) 33 | labels_row = np.array([0, 1, 3, -1]) 34 | labels_col = np.array([0, 0, 1]) 35 | aggregate = aggregate_graph(biadjacency, labels=labels, labels_col=labels_col) 36 | self.assertEqual(aggregate.shape, (3, 2)) 37 | self.assertEqual(aggregate.shape, (3, 2)) 38 | aggregate = aggregate_graph(biadjacency, labels_row=labels_row, labels_col=labels_col) 39 | self.assertEqual(aggregate.shape, (4, 2)) 40 | -------------------------------------------------------------------------------- /sknetwork/data/__init__.py: -------------------------------------------------------------------------------- 1 | """data module""" 2 | from sknetwork.data.base import * 3 | from sknetwork.data.load import * 4 | from sknetwork.data.models import * 5 | from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml 6 | from sknetwork.data.toy_graphs import * 7 | -------------------------------------------------------------------------------- /sknetwork/data/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in May 2023 5 | @author: Thomas Bonald 6 | """ 7 | 8 | 9 | class Dataset(dict): 10 | """Container object for datasets. 11 | Dictionary-like object that exposes its keys as attributes. 12 | >>> dataset = Dataset(name='dataset') 13 | >>> dataset['name'] 14 | 'dataset' 15 | >>> dataset.name 16 | 'dataset' 17 | """ 18 | def __init__(self, **kwargs): 19 | super().__init__(kwargs) 20 | 21 | def __setattr__(self, key, value): 22 | self[key] = value 23 | 24 | def __getattr__(self, key): 25 | try: 26 | return self[key] 27 | except KeyError: 28 | raise AttributeError(key) 29 | 30 | 31 | # alias for Dataset 32 | Bunch = Dataset 33 | 34 | -------------------------------------------------------------------------------- /sknetwork/data/test_graphs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Nov 29, 2018 5 | @author: Quentin Lutz 6 | @author: Nathan de Lara 7 | @author: Thomas Bonald 8 | """ 9 | 10 | import numpy as np 11 | from scipy import sparse 12 | 13 | from sknetwork.utils import directed2undirected 14 | 15 | 16 | def test_digraph(): 17 | """Simple directed graph, used for testing. 18 | 10 nodes, 12 edges 19 | """ 20 | row = np.array([0, 1, 1, 3, 4, 6, 6, 6, 6, 7, 8, 8, 9]) 21 | col = np.array([1, 4, 3, 2, 5, 4, 5, 7, 1, 9, 9, 2, 9]) 22 | data = np.array([1, 1, 2.5, 1, 2, 2, 1, 2, 2, 1.5, 2, 1, 2]) 23 | return sparse.csr_matrix((data, (row, col)), shape=(10, 10)) 24 | 25 | 26 | def test_graph(): 27 | """Simple undirected graph, used for testing. 28 | 10 nodes, 12 edges. 29 | """ 30 | return directed2undirected(test_digraph(), weighted=True) 31 | 32 | 33 | def test_bigraph(): 34 | """Simple bipartite graph, used for testing. 35 | 6 + 8 nodes, 9 edges. 36 | """ 37 | row = np.array([0, 1, 1, 2, 2, 3, 4, 5, 5]) 38 | col = np.array([1, 2, 3, 1, 0, 4, 7, 5, 6]) 39 | data = np.array([1, 2.5, 1, 2, 2, 1.5, 1, 2, 3]) 40 | return sparse.csr_matrix((data, (row, col)), shape=(6, 8)) 41 | 42 | 43 | def test_disconnected_graph(): 44 | """Simple disconnected undirected graph, used for testing. 45 | 10 nodes, 10 edges. 46 | """ 47 | row = np.array([1, 2, 3, 4, 6, 6, 6, 7, 8, 9]) 48 | col = np.array([1, 3, 2, 5, 4, 5, 7, 9, 9, 9]) 49 | data = np.array([1, 2.5, 1, 2, 2, 1, 2, 2, 1.5, 2]) 50 | adjacency = sparse.csr_matrix((data, (row, col)), shape=(10, 10)) 51 | return directed2undirected(adjacency) 52 | 53 | 54 | def test_bigraph_disconnect(): 55 | """Simple disconnected bipartite graph, used for testing. 56 | 6 + 8 nodes, 9 edges. 57 | """ 58 | row = np.array([1, 1, 1, 2, 2, 3, 5, 4, 5]) 59 | col = np.array([1, 2, 3, 1, 3, 4, 7, 7, 6]) 60 | data = np.array([1, 2.5, 1, 2, 2, 1.5, 3, 0, 1]) 61 | return sparse.csr_matrix((data, (row, col)), shape=(6, 8)) 62 | 63 | 64 | def test_graph_bool(): 65 | """Simple undirected graph with boolean entries, used for testing (10 nodes, 10 edges).""" 66 | adjacency = test_graph() 67 | adjacency.data = adjacency.data.astype(bool) 68 | return adjacency 69 | 70 | 71 | def test_clique(): 72 | """Clique graph, used for testing (10 nodes, 45 edges). 73 | """ 74 | n = 10 75 | adjacency = sparse.csr_matrix(np.ones((n, n), dtype=bool)) 76 | adjacency.setdiag(0) 77 | adjacency.eliminate_zeros() 78 | return adjacency 79 | 80 | 81 | def test_graph_empty(): 82 | """Empty graph, used for testing (10 nodes, 0 edges). 83 | """ 84 | return sparse.csr_matrix((10, 10), dtype=bool) 85 | -------------------------------------------------------------------------------- /sknetwork/data/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests module""" 2 | -------------------------------------------------------------------------------- /sknetwork/data/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for data API""" 4 | 5 | import tempfile 6 | import unittest 7 | import warnings 8 | 9 | from sknetwork.data.load import * 10 | from sknetwork.data.toy_graphs import * 11 | from sknetwork.data import Dataset 12 | 13 | 14 | class TestDataAPI(unittest.TestCase): 15 | 16 | def test_toy_graphs(self): 17 | toy_graphs = [karate_club, painters, bow_tie, house, miserables] 18 | for toy_graph in toy_graphs: 19 | self.assertEqual(type(toy_graph()), sparse.csr_matrix) 20 | self.assertEqual(type(toy_graph(metadata=True)), Dataset) 21 | 22 | def test_load(self): 23 | tmp_data_dir = tempfile.gettempdir() + '/stub' 24 | clear_data_home(tmp_data_dir) 25 | try: 26 | graph = load_netset('stub', tmp_data_dir) 27 | self.assertEqual(type(graph), Dataset) 28 | except URLError: # pragma: no cover 29 | warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning) 30 | return 31 | -------------------------------------------------------------------------------- /sknetwork/data/tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """tests for dataset""" 3 | 4 | import unittest 5 | 6 | from sknetwork.data.base import Dataset 7 | 8 | 9 | class TestDataset(unittest.TestCase): 10 | 11 | def test(self): 12 | dataset = Dataset(name='dataset') 13 | self.assertEqual(dataset.name, 'dataset') 14 | self.assertEqual(dataset['name'], 'dataset') 15 | -------------------------------------------------------------------------------- /sknetwork/data/tests/test_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # tests for toy_graphs.py 3 | """authors: 4 | Thomas Bonald 5 | Quentin Lutz """ 6 | import unittest 7 | 8 | from sknetwork.data.models import * 9 | 10 | 11 | class TestModels(unittest.TestCase): 12 | 13 | def test_shape(self): 14 | n = 10 15 | for model in [linear_graph, linear_digraph, cyclic_graph, cyclic_digraph, watts_strogatz]: 16 | adjacency = model(n) 17 | graph = model(n, metadata=True) 18 | self.assertEqual(adjacency.shape, (n, n)) 19 | self.assertEqual(graph.adjacency.shape, (n, n)) 20 | if hasattr(graph, 'position'): 21 | self.assertEqual(graph.position.shape, (n, 2)) 22 | 23 | adjacency = star(n) 24 | self.assertEqual(adjacency.shape, (n+1, n+1)) 25 | graph = star(n, metadata=True) 26 | self.assertTrue(hasattr(graph, 'position')) 27 | 28 | adjacency = erdos_renyi(n) 29 | self.assertEqual(adjacency.shape, (n, n)) 30 | adjacency = erdos_renyi(n, directed=True, self_loops=True, seed=4) 31 | self.assertEqual(adjacency.shape, (n, n)) 32 | 33 | adjacency = albert_barabasi(n, 2) 34 | self.assertEqual(adjacency.shape, (n, n)) 35 | 36 | n1, n2 = 4, 6 37 | n = n1 * n2 38 | adjacency = grid(n1, n2) 39 | self.assertEqual(adjacency.shape, (n, n)) 40 | graph = grid(n1, n2, metadata=True) 41 | self.assertEqual(graph.adjacency.shape, (n, n)) 42 | if hasattr(graph, 'position'): 43 | self.assertEqual(graph.position.shape, (n, 2)) 44 | 45 | def test_block_model(self): 46 | graph = block_model(np.array([4, 5, 6]), np.array([0.5, 0.3, 0.2]), 0.1, metadata=True) 47 | adjacency = graph.adjacency 48 | labels = graph.labels 49 | self.assertEqual(adjacency.shape, (15, 15)) 50 | self.assertEqual(len(labels), 15) 51 | adjacency = block_model(np.array([4, 5, 6]), np.array([0.5, 0.3, 0.2]), 0.1, directed=True, self_loops=True) 52 | self.assertEqual(adjacency.shape, (15, 15)) 53 | -------------------------------------------------------------------------------- /sknetwork/data/tests/test_test_graphs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # tests for test_graphs.py 3 | """ 4 | @author: Quentin Lutz 5 | @author: Nathan de Lara 6 | @author: Thomas Bonald 7 | """ 8 | import unittest 9 | 10 | from sknetwork.data.test_graphs import * 11 | 12 | 13 | class TestTestGraphs(unittest.TestCase): 14 | 15 | def test_undirected(self): 16 | adjacency = test_graph() 17 | self.assertEqual(adjacency.shape, (10, 10)) 18 | adjacency = test_disconnected_graph() 19 | self.assertEqual(adjacency.shape, (10, 10)) 20 | 21 | def test_directed(self): 22 | adjacency = test_digraph() 23 | self.assertEqual(adjacency.shape, (10, 10)) 24 | 25 | def test_bipartite(self): 26 | biadjacency = test_bigraph() 27 | self.assertEqual(biadjacency.shape, (6, 8)) 28 | biadjacency = test_bigraph_disconnect() 29 | self.assertEqual(biadjacency.shape, (6, 8)) 30 | -------------------------------------------------------------------------------- /sknetwork/data/tests/test_toy_graphs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # tests for toy_graphs.py 3 | """ 4 | @author: Quentin Lutz 5 | @author: Nathan de Lara 6 | @author: Thomas Bonald 7 | """ 8 | import unittest 9 | 10 | from sknetwork.data.toy_graphs import * 11 | 12 | 13 | class TestToys(unittest.TestCase): 14 | 15 | def test_undirected(self): 16 | adjacency = house() 17 | self.assertEqual(adjacency.shape, (5, 5)) 18 | 19 | dataset = house(metadata=True) 20 | self.assertEqual(dataset.position.shape, (5, 2)) 21 | 22 | adjacency = bow_tie() 23 | self.assertEqual(adjacency.shape, (5, 5)) 24 | 25 | dataset = bow_tie(metadata=True) 26 | self.assertEqual(dataset.position.shape, (5, 2)) 27 | 28 | dataset = karate_club(True) 29 | self.assertEqual(dataset.adjacency.shape, (34, 34)) 30 | self.assertEqual(len(dataset.labels), 34) 31 | 32 | dataset = miserables(True) 33 | self.assertEqual(dataset.adjacency.shape, (77, 77)) 34 | self.assertEqual(len(dataset.names), 77) 35 | 36 | def test_directed(self): 37 | adjacency = painters() 38 | self.assertEqual(adjacency.shape, (14, 14)) 39 | 40 | adjacency = art_philo_science() 41 | self.assertEqual(adjacency.shape, (30, 30)) 42 | 43 | dataset = painters(True) 44 | self.assertEqual(dataset.adjacency.shape, (14, 14)) 45 | self.assertEqual(len(dataset.names), 14) 46 | 47 | dataset = art_philo_science(True) 48 | self.assertEqual(dataset.adjacency.shape, (30, 30)) 49 | self.assertEqual(len(dataset.names), 30) 50 | 51 | def test_bipartite(self): 52 | dataset = star_wars(True) 53 | self.assertEqual(dataset.biadjacency.shape, (4, 3)) 54 | self.assertEqual(len(dataset.names), 4) 55 | self.assertEqual(len(dataset.names_col), 3) 56 | 57 | dataset = movie_actor(True) 58 | self.assertEqual(dataset.biadjacency.shape, (15, 17)) 59 | self.assertEqual(len(dataset.names), 15) 60 | self.assertEqual(len(dataset.names_col), 17) 61 | 62 | dataset = hourglass(True) 63 | self.assertEqual(dataset.biadjacency.shape, (2, 2)) 64 | 65 | dataset = art_philo_science(True) 66 | self.assertEqual(dataset.biadjacency.shape, (30, 11)) 67 | self.assertEqual(len(dataset.names), 30) 68 | self.assertEqual(len(dataset.names_col), 11) 69 | -------------------------------------------------------------------------------- /sknetwork/data/timeout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import contextlib 3 | import signal 4 | import warnings 5 | 6 | 7 | class TimeOut(contextlib.ContextDecorator): 8 | """ 9 | Timeout context manager/decorator. 10 | 11 | Adapted from https://gist.github.com/TySkby/143190ad1b88c6115597c45f996b030c on 12/10/2020. 12 | 13 | Examples 14 | -------- 15 | >>> from time import sleep 16 | >>> try: 17 | ... with TimeOut(1): 18 | ... sleep(10) 19 | ... except TimeoutError: 20 | ... print("Function timed out") 21 | Function timed out 22 | """ 23 | def __init__(self, seconds: float): 24 | self.seconds = seconds 25 | 26 | def _timeout_handler(self, signum, frame): 27 | raise TimeoutError("Code timed out.") 28 | 29 | def __enter__(self): 30 | if hasattr(signal, "SIGALRM"): 31 | signal.signal(signal.SIGALRM, self._timeout_handler) 32 | signal.alarm(self.seconds) 33 | else: 34 | warnings.warn("SIGALRM is unavailable on Windows. Timeouts are not functional.") 35 | 36 | def __exit__(self, exc_type, exc_val, exc_tb): 37 | if hasattr(signal, "SIGALRM"): 38 | signal.alarm(0) 39 | -------------------------------------------------------------------------------- /sknetwork/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | """embedding module""" 2 | from sknetwork.embedding.base import BaseEmbedding 3 | from sknetwork.embedding.force_atlas import ForceAtlas 4 | from sknetwork.embedding.louvain_embedding import LouvainEmbedding 5 | from sknetwork.embedding.random_projection import RandomProjection 6 | from sknetwork.embedding.spectral import Spectral 7 | from sknetwork.embedding.spring import Spring 8 | from sknetwork.embedding.svd import SVD, GSVD, PCA 9 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for embedding""" 2 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for embeddings""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.embedding import Spectral, SVD, GSVD, Spring 9 | 10 | 11 | class TestEmbeddings(unittest.TestCase): 12 | 13 | def setUp(self): 14 | """Algorithms by input types.""" 15 | self.methods = [Spectral(), GSVD(), SVD()] 16 | 17 | def test_undirected(self): 18 | adjacency = test_graph() 19 | n = adjacency.shape[0] 20 | 21 | method = Spring() 22 | embedding = method.fit_transform(adjacency) 23 | self.assertEqual(embedding.shape, (n, 2)) 24 | 25 | embedding = method.transform() 26 | self.assertEqual(embedding.shape, (n, 2)) 27 | 28 | def test_bipartite(self): 29 | for adjacency in [test_digraph(), test_bigraph()]: 30 | n_row, n_col = adjacency.shape 31 | 32 | for method in self.methods: 33 | method.fit(adjacency) 34 | 35 | self.assertEqual(method.embedding_.shape, (n_row, 2)) 36 | self.assertEqual(method.embedding_row_.shape, (n_row, 2)) 37 | self.assertEqual(method.embedding_col_.shape, (n_col, 2)) 38 | 39 | def test_disconnected(self): 40 | n = 10 41 | adjacency = np.eye(n) 42 | for method in self.methods: 43 | embedding = method.fit_transform(adjacency) 44 | self.assertEqual(embedding.shape, (n, 2)) 45 | 46 | def test_regularization(self): 47 | adjacency = test_graph() 48 | method = Spectral() 49 | self.assertEqual(method._get_regularization(-1, adjacency), 0) 50 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_force_atlas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for force atlas2 embeddings""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.data.test_graphs import test_graph, test_digraph 9 | from sknetwork.embedding.force_atlas import ForceAtlas 10 | 11 | 12 | class TestEmbeddings(unittest.TestCase): 13 | 14 | def test_options(self): 15 | for adjacency in [test_graph(), test_digraph()]: 16 | n = adjacency.shape[0] 17 | 18 | force_atlas = ForceAtlas() 19 | layout = force_atlas.fit_transform(adjacency) 20 | self.assertEqual((n, 2), layout.shape) 21 | 22 | force_atlas = ForceAtlas(lin_log=True) 23 | layout = force_atlas.fit_transform(adjacency) 24 | self.assertEqual((n, 2), layout.shape) 25 | 26 | force_atlas = ForceAtlas(approx_radius=1.) 27 | layout = force_atlas.fit_transform(adjacency) 28 | self.assertEqual((n, 2), layout.shape) 29 | 30 | force_atlas.fit(adjacency, pos_init=layout, n_iter=1) 31 | 32 | def test_errors(self): 33 | adjacency = test_graph() 34 | with self.assertRaises(ValueError): 35 | ForceAtlas().fit(adjacency, pos_init=np.ones((5, 7))) 36 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_louvain_embedding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for Louvain embedding""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.data.test_graphs import test_graph, test_bigraph 9 | from sknetwork.embedding import LouvainEmbedding 10 | 11 | 12 | class TestLouvainEmbedding(unittest.TestCase): 13 | 14 | def test_predict(self): 15 | adjacency = test_graph() 16 | adjacency_vector = np.zeros(10, dtype=int) 17 | adjacency_vector[:5] = 1 18 | louvain = LouvainEmbedding() 19 | louvain.fit(adjacency) 20 | self.assertEqual(louvain.embedding_.shape[0], 10) 21 | louvain.fit(adjacency, force_bipartite=True) 22 | self.assertEqual(louvain.embedding_.shape[0], 10) 23 | 24 | # bipartite 25 | biadjacency = test_bigraph() 26 | louvain.fit(biadjacency) 27 | self.assertEqual(louvain.embedding_row_.shape[0], 6) 28 | self.assertEqual(louvain.embedding_col_.shape[0], 8) 29 | 30 | for method in ['remove', 'merge', 'keep']: 31 | louvain = LouvainEmbedding(isolated_nodes=method) 32 | embedding = louvain.fit_transform(adjacency) 33 | self.assertEqual(embedding.shape[0], adjacency.shape[0]) 34 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_random_projection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for random projection""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import test_graph, test_bigraph, test_digraph, test_disconnected_graph 7 | from sknetwork.embedding import RandomProjection 8 | 9 | 10 | class TestEmbeddings(unittest.TestCase): 11 | 12 | def test_random_projection(self): 13 | for algo in [RandomProjection(), RandomProjection(random_walk=True)]: 14 | adjacency = test_graph() 15 | embedding = algo.fit_transform(adjacency) 16 | self.assertEqual(embedding.shape[1], 2) 17 | embedding = algo.fit_transform(adjacency, force_bipartite=True) 18 | self.assertEqual(embedding.shape[1], 2) 19 | adjacency = test_digraph() 20 | embedding = algo.fit_transform(adjacency) 21 | self.assertEqual(embedding.shape[1], 2) 22 | adjacency = test_disconnected_graph() 23 | embedding = algo.fit_transform(adjacency) 24 | self.assertEqual(embedding.shape[1], 2) 25 | biadjacency = test_bigraph() 26 | embedding = algo.fit_transform(biadjacency) 27 | self.assertEqual(embedding.shape[1], 2) 28 | self.assertEqual(algo.embedding_col_.shape[1], 2) 29 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_spring.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for spring embeddings""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.embedding import Spring 9 | 10 | 11 | class TestEmbeddings(unittest.TestCase): 12 | 13 | def test_shape(self): 14 | for adjacency in [test_graph(), test_digraph()]: 15 | n = adjacency.shape[0] 16 | spring = Spring() 17 | layout = spring.fit_transform(adjacency) 18 | self.assertEqual((n, 2), layout.shape) 19 | 20 | spring = Spring(n_components=3) 21 | layout = spring.fit_transform(adjacency) 22 | self.assertEqual((n, 3), layout.shape) 23 | 24 | def test_pos_init(self): 25 | adjacency = test_graph() 26 | n = adjacency.shape[0] 27 | 28 | spring = Spring(strength=0.1, position_init='spectral', tol=1e3) 29 | layout = spring.fit_transform(adjacency) 30 | self.assertEqual((n, 2), layout.shape) 31 | layout = spring.fit_transform(adjacency, position_init=layout) 32 | self.assertEqual((n, 2), layout.shape) 33 | 34 | def test_approx_radius(self): 35 | adjacency = test_graph() 36 | n = adjacency.shape[0] 37 | 38 | spring = Spring(approx_radius=1.) 39 | layout = spring.fit_transform(adjacency) 40 | self.assertEqual((n, 2), layout.shape) 41 | 42 | def test_errors(self): 43 | adjacency = test_graph() 44 | with self.assertRaises(ValueError): 45 | Spring(position_init='toto') 46 | with self.assertRaises(ValueError): 47 | Spring().fit(adjacency, position_init=np.ones((2, 2))) 48 | with self.assertRaises(TypeError): 49 | # noinspection PyTypeChecker 50 | Spring().fit(adjacency, position_init='toto') 51 | -------------------------------------------------------------------------------- /sknetwork/embedding/tests/test_svd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for svd""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import star_wars 10 | from sknetwork.embedding import GSVD, SVD, PCA 11 | from sknetwork.linalg import LanczosSVD 12 | 13 | 14 | class TestSVD(unittest.TestCase): 15 | 16 | def test_options(self): 17 | biadjacency = star_wars(metadata=False) 18 | n_row, n_col = biadjacency.shape 19 | min_dim = min(n_row, n_col) - 1 20 | gsvd = GSVD(n_components=5, regularization=0., solver='halko') 21 | 22 | with self.assertWarns(Warning): 23 | gsvd.fit(biadjacency) 24 | self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim)) 25 | self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim)) 26 | 27 | embedding = gsvd.predict(np.array([0, 1, 1])) 28 | self.assertEqual(embedding.shape, (min_dim,)) 29 | 30 | gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos') 31 | gsvd.fit(biadjacency) 32 | self.assertEqual(gsvd.embedding_row_.shape, (n_row, 1)) 33 | 34 | pca = PCA(n_components=min_dim, solver='lanczos') 35 | pca.fit(biadjacency) 36 | self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim)) 37 | pca = PCA(n_components=min_dim, solver=LanczosSVD()) 38 | pca.fit(biadjacency) 39 | self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim)) 40 | 41 | svd = SVD(n_components=min_dim, solver=LanczosSVD()) 42 | svd.fit(biadjacency) 43 | self.assertEqual(svd.embedding_row_.shape, (n_row, min_dim)) 44 | -------------------------------------------------------------------------------- /sknetwork/gnn/__init__.py: -------------------------------------------------------------------------------- 1 | """gnn module""" 2 | from sknetwork.gnn.base import BaseGNN 3 | from sknetwork.gnn.base_activation import BaseActivation, BaseLoss 4 | from sknetwork.gnn.base_layer import BaseLayer 5 | from sknetwork.gnn.gnn_classifier import GNNClassifier 6 | from sknetwork.gnn.layer import Convolution 7 | from sknetwork.gnn.neighbor_sampler import UniformNeighborSampler 8 | from sknetwork.gnn.activation import ReLu, Sigmoid, Softmax 9 | from sknetwork.gnn.loss import BinaryCrossEntropy, CrossEntropy 10 | from sknetwork.gnn.optimizer import BaseOptimizer, GD, ADAM 11 | -------------------------------------------------------------------------------- /sknetwork/gnn/base_activation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on April 2022 5 | @author: Simon Delarue 6 | """ 7 | import numpy as np 8 | 9 | 10 | class BaseActivation: 11 | """Base class for activation functions. 12 | 13 | Parameters 14 | ---------- 15 | name : str 16 | Name of the activation function. 17 | """ 18 | def __init__(self, name: str = 'custom'): 19 | self.name = name 20 | 21 | @staticmethod 22 | def output(signal: np.ndarray) -> np.ndarray: 23 | """Output of the activation function. 24 | 25 | Parameters 26 | ---------- 27 | signal : np.ndarray, shape (n_samples, n_channels) 28 | Input signal. 29 | 30 | Returns 31 | ------- 32 | output : np.ndarray, shape (n_samples, n_channels) 33 | Output signal. 34 | """ 35 | output = signal 36 | return output 37 | 38 | @staticmethod 39 | def gradient(signal: np.ndarray, direction: np.ndarray) -> np.ndarray: 40 | """Gradient of the activation function. 41 | 42 | Parameters 43 | ---------- 44 | signal : np.ndarray, shape (n_samples, n_channels) 45 | Input signal. 46 | direction : np.ndarray, shape (n_samples, n_channels) 47 | Direction where the gradient is taken. 48 | 49 | Returns 50 | ------- 51 | gradient : np.ndarray, shape (n_samples, n_channels) 52 | Gradient. 53 | """ 54 | gradient = direction 55 | return gradient 56 | 57 | 58 | class BaseLoss(BaseActivation): 59 | """Base class for loss functions.""" 60 | @staticmethod 61 | def loss(signal: np.ndarray, labels: np.ndarray) -> float: 62 | """Get the loss value. 63 | 64 | Parameters 65 | ---------- 66 | signal : np.ndarray, shape (n_samples, n_channels) 67 | Input signal (before activation). 68 | labels : np.ndarray, shape (n_samples) 69 | True labels. 70 | """ 71 | return 0 72 | 73 | @staticmethod 74 | def loss_gradient(signal: np.ndarray, labels: np.ndarray) -> np.ndarray: 75 | """Gradient of the loss function. 76 | 77 | Parameters 78 | ---------- 79 | signal : np.ndarray, shape (n_samples, n_channels) 80 | Input signal. 81 | labels : np.ndarray, shape (n_samples,) 82 | True labels. 83 | 84 | Returns 85 | ------- 86 | gradient : np.ndarray, shape (n_samples, n_channels) 87 | Gradient. 88 | """ 89 | gradient = np.ones_like(signal) 90 | return gradient 91 | -------------------------------------------------------------------------------- /sknetwork/gnn/neighbor_sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | """ 4 | @author: Simon Delarue 5 | """ 6 | from typing import Union 7 | 8 | import numpy as np 9 | from scipy import sparse 10 | 11 | from sknetwork.utils import get_degrees 12 | 13 | 14 | class UniformNeighborSampler: 15 | """Neighbor node sampler. 16 | 17 | Uniformly sample nodes over neighborhood. 18 | 19 | Parameters 20 | ---------- 21 | sample_size : int 22 | Size of neighborhood sampled for each node. 23 | """ 24 | def __init__(self, sample_size: int): 25 | self.sample_size = sample_size 26 | 27 | def _sample_indexes(self, size: int) -> np.ndarray: 28 | """Randomly chose indexes without replacement. 29 | 30 | Parameters 31 | ---------- 32 | size : int 33 | Highest index available. This index is used if lower than a threshold. 34 | 35 | Returns 36 | ------- 37 | Array of sampled indexes. 38 | """ 39 | return np.random.choice(size, size=min(size, self.sample_size), replace=False) 40 | 41 | def __call__(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> sparse.csr_matrix: 42 | """Apply node sampling on each node and return filtered adjacency matrix. 43 | 44 | Parameters 45 | ---------- 46 | adjacency 47 | Adjacency matrix of the graph. 48 | 49 | Returns 50 | ------- 51 | Filtered adjacency matrix using node sampling. 52 | """ 53 | n_row, _ = adjacency.shape 54 | sampled_adjacency = adjacency.copy() 55 | 56 | degrees = get_degrees(adjacency) 57 | neighbor_samples = list(map(self._sample_indexes, degrees)) 58 | 59 | for i, neighbors in enumerate(neighbor_samples): 60 | sampled_adjacency.data[sampled_adjacency.indptr[i]:sampled_adjacency.indptr[i + 1]] = np.zeros(degrees[i]) 61 | sampled_adjacency.data[sampled_adjacency.indptr[i]:sampled_adjacency.indptr[i + 1]][neighbors] = 1 62 | 63 | sampled_adjacency.eliminate_zeros() 64 | 65 | return sampled_adjacency 66 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for gnn""" 2 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_activation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for activation""" 4 | 5 | import unittest 6 | 7 | from sknetwork.gnn.activation import * 8 | 9 | 10 | class TestActivation(unittest.TestCase): 11 | 12 | def test_get_activation(self): 13 | self.assertTrue(isinstance(get_activation('Identity'), BaseActivation)) 14 | self.assertTrue(isinstance(get_activation('Relu'), ReLu)) 15 | self.assertTrue(isinstance(get_activation('Sigmoid'), Sigmoid)) 16 | self.assertTrue(isinstance(get_activation('Softmax'), Softmax)) 17 | with self.assertRaises(ValueError): 18 | get_activation('foo') 19 | 20 | base_act = BaseActivation() 21 | self.assertTrue(base_act == get_activation(base_act)) 22 | with self.assertRaises(TypeError): 23 | get_activation(0) 24 | 25 | def test_activation_identity(self): 26 | activation = get_activation('Identity') 27 | signal = np.arange(5) 28 | self.assertTrue((activation.output(signal) == signal).all()) 29 | direction = np.arange(5) 30 | self.assertTrue((activation.gradient(signal, direction) == direction).all()) 31 | 32 | def test_activation_relu(self): 33 | activation = get_activation('ReLu') 34 | signal = np.linspace(-2, 2, 5) 35 | self.assertTrue((activation.output(signal) == [0., 0., 0., 1., 2.]).all()) 36 | direction = np.arange(5) 37 | self.assertTrue((activation.gradient(signal, direction) == direction * (signal > 0)).all()) 38 | 39 | def test_activation_sigmoid(self): 40 | activation = get_activation('Sigmoid') 41 | signal = np.array([-np.inf, -1.5, 0, 1.5, np.inf]) 42 | self.assertTrue(np.allclose(activation.output(signal), np.array([0., 0.18242552, 0.5, 0.81757448, 1.]))) 43 | signal = np.array([[-1000, 1000, 1000]]) 44 | direction = np.arange(3) 45 | self.assertTrue(np.allclose(activation.output(signal), np.array([[0., 1., 1.]]))) 46 | self.assertTrue(np.allclose(activation.gradient(signal, direction), np.array([[0., 0., 0.]]))) 47 | 48 | def test_activation_softmax(self): 49 | activation = get_activation('Softmax') 50 | signal = np.array([[-1, 0, 3, 5]]) 51 | output = activation.output(signal) 52 | self.assertTrue(np.allclose(output, np.array([[0.0021657, 0.00588697, 0.11824302, 0.87370431]]))) 53 | signal = np.array([[-1000, 1000, 1000]]) 54 | direction = np.arange(3) 55 | self.assertTrue(np.allclose(activation.output(signal), np.array([[0., 0.5, 0.5]]))) 56 | self.assertTrue(np.allclose(activation.gradient(signal, direction), np.array([[0., -0.25, 0.25]]))) 57 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_base_layer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for base layer gnn""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data.test_graphs import test_graph 10 | from sknetwork.gnn.base_layer import BaseLayer 11 | 12 | 13 | class TestBaseLayer(unittest.TestCase): 14 | 15 | def setUp(self) -> None: 16 | """Test graph for tests.""" 17 | self.adjacency = test_graph() 18 | self.n = self.adjacency.shape[0] 19 | self.features = self.adjacency 20 | self.labels = np.array([0]*5 + [1]*5) 21 | self.base_layer = BaseLayer('Conv', len(self.labels)) 22 | 23 | def test_base_layer_init(self): 24 | with self.assertRaises(NotImplementedError): 25 | self.base_layer.forward(self.adjacency, self.features) 26 | 27 | def test_base_layer_initialize_weights(self): 28 | self.base_layer._initialize_weights(10) 29 | self.assertTrue(self.base_layer.weight.shape == (10, len(self.labels))) 30 | self.assertTrue(self.base_layer.bias.shape == (1, len(self.labels))) 31 | self.assertTrue(self.base_layer.weights_initialized) 32 | 33 | def test_base_layer_repr(self): 34 | self.assertTrue(self.base_layer.__repr__().startswith(" BaseLayer(layer_type: Conv, out_channels: 10")) 35 | sage_layer = BaseLayer(layer_type='sageconv', out_channels=len(self.labels)) 36 | self.assertTrue('sample_size' in sage_layer.__repr__()) 37 | self.assertTrue('sageconv' in sage_layer.__repr__()) 38 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for loss""" 4 | 5 | import unittest 6 | 7 | from sknetwork.gnn.loss import * 8 | 9 | 10 | class TestLoss(unittest.TestCase): 11 | 12 | def test_get_loss(self): 13 | self.assertTrue(isinstance(get_loss('CrossEntropy'), CrossEntropy)) 14 | self.assertTrue(isinstance(get_loss('BinaryCrossEntropy'), BinaryCrossEntropy)) 15 | with self.assertRaises(ValueError): 16 | get_loss('foo') 17 | 18 | base_loss = BaseLoss() 19 | self.assertTrue(base_loss == get_loss(base_loss)) 20 | with self.assertRaises(TypeError): 21 | get_loss(0) 22 | 23 | def test_ce_loss(self): 24 | cross_entropy = CrossEntropy() 25 | signal = np.array([[0, 5]]) 26 | labels = np.array([1]) 27 | self.assertAlmostEqual(cross_entropy.loss(signal, labels), 0.00671534848911828) 28 | 29 | def test_bce_loss(self): 30 | binary_cross_entropy = BinaryCrossEntropy() 31 | signal = np.array([[0, 5]]) 32 | labels = np.array([1]) 33 | self.assertAlmostEqual(binary_cross_entropy.loss(signal, labels), 0.6998625290490632) 34 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_neigh_sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for neighbor sampler""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import test_graph 8 | from sknetwork.gnn.neighbor_sampler import * 9 | from sknetwork.utils import get_degrees 10 | 11 | 12 | class TestNeighSampler(unittest.TestCase): 13 | 14 | def setUp(self) -> None: 15 | """Test graph for tests.""" 16 | self.adjacency = test_graph() 17 | self.n = self.adjacency.shape[0] 18 | 19 | def test_uni_node_sampler(self): 20 | uni_sampler = UniformNeighborSampler(sample_size=2) 21 | sampled_adj = uni_sampler(self.adjacency) 22 | self.assertTrue(sampled_adj.shape == self.adjacency.shape) 23 | self.assertTrue(all(get_degrees(sampled_adj) <= 2)) 24 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_optimizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for optimizer""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data.test_graphs import test_graph 10 | from sknetwork.gnn.gnn_classifier import GNNClassifier 11 | from sknetwork.gnn.optimizer import get_optimizer 12 | 13 | 14 | class TestOptimizer(unittest.TestCase): 15 | 16 | def setUp(self) -> None: 17 | self.adjacency = test_graph() 18 | self.features = self.adjacency 19 | self.labels = np.array(4 * [0, 1] + 2 * [-1]) 20 | 21 | def test_get_optimizer(self): 22 | with self.assertRaises(ValueError): 23 | get_optimizer('foo') 24 | with self.assertRaises(TypeError): 25 | get_optimizer(GNNClassifier()) 26 | 27 | def test_optimizer(self): 28 | for optimizer in ['Adam', 'GD']: 29 | gnn = GNNClassifier([4, 2], 'Conv', ['Relu', 'Softmax'], optimizer=optimizer) 30 | _ = gnn.fit_predict(self.adjacency, self.features, self.labels, n_epochs=1) 31 | conv0_weight, conv1_weight = gnn.layers[0].weight.copy(), gnn.layers[1].weight.copy() 32 | conv0_b, conv1_b = gnn.layers[0].bias.copy(), gnn.layers[1].bias.copy() 33 | gnn.optimizer.step(gnn) 34 | # Test weight matrix 35 | self.assertTrue(gnn.layers[0].weight.shape == conv0_weight.shape) 36 | self.assertTrue(gnn.layers[1].weight.shape == conv1_weight.shape) 37 | self.assertTrue((gnn.layers[0].weight != conv0_weight).any()) 38 | self.assertTrue((gnn.layers[1].weight != conv1_weight).any()) 39 | # Test bias vector 40 | self.assertTrue(gnn.layers[0].bias.shape == conv0_b.shape) 41 | self.assertTrue(gnn.layers[1].bias.shape == conv1_b.shape) 42 | self.assertTrue((gnn.layers[0].bias != conv0_b).any()) 43 | self.assertTrue((gnn.layers[1].bias != conv1_b).any()) 44 | -------------------------------------------------------------------------------- /sknetwork/gnn/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for gnn utils""" 4 | 5 | import unittest 6 | 7 | from sknetwork.gnn.utils import * 8 | 9 | 10 | class TestUtils(unittest.TestCase): 11 | 12 | def test_check_norm(self): 13 | with self.assertRaises(ValueError): 14 | check_normalizations('foo') 15 | with self.assertRaises(ValueError): 16 | check_normalizations(['foo', 'bar']) 17 | 18 | def test_early_stopping(self): 19 | self.assertTrue(check_early_stopping(True, np.array([True, False]), 2)) 20 | self.assertFalse(check_early_stopping(True, None, 2)) 21 | self.assertFalse(check_early_stopping(True, np.array([True, False, True]), None)) 22 | self.assertFalse(check_early_stopping(True, np.array([False, False, False]), 5)) 23 | 24 | def test_get_layers(self): 25 | with self.assertRaises(ValueError): 26 | get_layers([4, 2], 'Conv', activations=['Relu', 'Sigmoid', 'Relu'], use_bias=True, normalizations='Both', 27 | self_embeddings=True, sample_sizes=5, loss=None) 28 | # Type compatibility 29 | layers = get_layers([4], 'Conv', activations=['Relu'], use_bias=[True], normalizations=['Both'], 30 | self_embeddings=[True], sample_sizes=[5], loss='Cross entropy') 31 | self.assertTrue(len(np.ravel(layers)) == 1) 32 | # Broadcasting parameters 33 | layers = get_layers([4, 2], ['Conv', 'Conv'], activations='Relu', use_bias=True, normalizations='Both', 34 | self_embeddings=True, sample_sizes=5, loss='Cross entropy') 35 | self.assertTrue(len(layers) == 2) 36 | 37 | def test_check_loss(self): 38 | layer = get_layers([4], 'Conv', activations=['Relu'], use_bias=[True], normalizations=['Both'], 39 | self_embeddings=[True], sample_sizes=[5], loss=None) 40 | with self.assertRaises(ValueError): 41 | check_loss(layer[0]) 42 | -------------------------------------------------------------------------------- /sknetwork/hierarchy/__init__.py: -------------------------------------------------------------------------------- 1 | """hierarchy module""" 2 | from sknetwork.hierarchy.paris import Paris 3 | from sknetwork.hierarchy.base import BaseHierarchy 4 | from sknetwork.hierarchy.louvain_hierarchy import LouvainIteration, LouvainHierarchy 5 | from sknetwork.hierarchy.metrics import dasgupta_cost, dasgupta_score, tree_sampling_divergence 6 | from sknetwork.hierarchy.postprocess import cut_straight, cut_balanced, aggregate_dendrogram, reorder_dendrogram 7 | -------------------------------------------------------------------------------- /sknetwork/hierarchy/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for hierarchy""" 2 | -------------------------------------------------------------------------------- /sknetwork/hierarchy/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for hierarchy API""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.hierarchy import * 8 | 9 | 10 | class TestHierarchyAPI(unittest.TestCase): 11 | 12 | def test_undirected(self): 13 | adjacency = test_graph() 14 | n = adjacency.shape[0] 15 | 16 | for algo in [Paris(), LouvainIteration()]: 17 | dendrogram = algo.fit_predict(adjacency) 18 | self.assertTupleEqual(dendrogram.shape, (n - 1, 4)) 19 | 20 | def test_disconnected(self): 21 | adjacency = test_disconnected_graph() 22 | for algo in [Paris(), LouvainIteration()]: 23 | dendrogram = algo.fit_transform(adjacency) 24 | self.assertEqual(dendrogram.shape, (9, 4)) 25 | -------------------------------------------------------------------------------- /sknetwork/hierarchy/tests/test_algos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in March 2020 5 | @author: Quentin Lutz 6 | @author: Thomas Bonald 7 | """ 8 | 9 | import unittest 10 | 11 | from sknetwork.data.test_graphs import * 12 | from sknetwork.hierarchy import LouvainIteration, LouvainHierarchy, Paris 13 | 14 | 15 | class TestLouvainHierarchy(unittest.TestCase): 16 | 17 | def test(self): 18 | louvain_iteration = LouvainIteration() 19 | louvain_iteration_ = LouvainIteration(resolution=2, depth=1) 20 | louvain_hierarchy = LouvainHierarchy() 21 | louvain_hierarchy_ = LouvainHierarchy(tol_aggregation=0.1) 22 | paris = Paris() 23 | paris_ = Paris(weights='uniform', reorder=False) 24 | for algo in [louvain_iteration, louvain_iteration_, louvain_hierarchy, louvain_hierarchy_, paris, paris_]: 25 | for input_matrix in [test_graph(), test_digraph(), test_bigraph()]: 26 | dendrogram = algo.fit_predict(input_matrix) 27 | self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4)) 28 | if algo.bipartite: 29 | self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4)) 30 | adjacency = test_graph() 31 | algo = Paris() 32 | dendrogram = algo.fit_predict(adjacency) 33 | dendrogram_ = algo.predict() 34 | self.assertAlmostEqual(np.linalg.norm(dendrogram - dendrogram_), 0) 35 | -------------------------------------------------------------------------------- /sknetwork/hierarchy/tests/test_postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on March 2019 5 | @author: Thomas Bonald 6 | @author: Quentin Lutz 7 | """ 8 | 9 | import unittest 10 | 11 | from sknetwork.data import karate_club 12 | from sknetwork.hierarchy import Paris, cut_straight, cut_balanced, aggregate_dendrogram 13 | 14 | 15 | # noinspection PyMissingOrEmptyDocstring 16 | class TestCuts(unittest.TestCase): 17 | 18 | def setUp(self): 19 | paris = Paris() 20 | self.adjacency = karate_club() 21 | self.dendrogram = paris.fit_transform(self.adjacency) 22 | 23 | def test_cuts(self): 24 | labels = cut_straight(self.dendrogram) 25 | self.assertEqual(len(set(labels)), 2) 26 | labels = cut_straight(self.dendrogram, n_clusters=5) 27 | self.assertEqual(len(set(labels)), 5) 28 | labels = cut_balanced(self.dendrogram, 2) 29 | self.assertEqual(len(set(labels)), 21) 30 | labels, new_dendrogram = cut_balanced(self.dendrogram, max_cluster_size=4, return_dendrogram=True) 31 | self.assertEqual(len(set(labels)), 12) 32 | self.assertTupleEqual(new_dendrogram.shape, (11, 4)) 33 | paris = Paris(reorder=False) 34 | dendrogram = paris.fit_predict(self.adjacency) 35 | labels = cut_balanced(dendrogram, 4) 36 | self.assertEqual(len(set(labels)), 12) 37 | 38 | def test_options(self): 39 | labels = cut_straight(self.dendrogram, threshold=0.5) 40 | self.assertEqual(len(set(labels)), 7) 41 | labels = cut_straight(self.dendrogram, n_clusters=3, threshold=0.5) 42 | self.assertEqual(len(set(labels)), 3) 43 | labels = cut_straight(self.dendrogram, sort_clusters=False) 44 | self.assertEqual(len(set(labels)), 2) 45 | labels = cut_balanced(self.dendrogram, max_cluster_size=2, sort_clusters=False) 46 | self.assertEqual(len(set(labels)), 21) 47 | labels = cut_balanced(self.dendrogram, max_cluster_size=10) 48 | self.assertEqual(len(set(labels)), 5) 49 | 50 | def test_aggregation(self): 51 | aggregated = aggregate_dendrogram(self.dendrogram, n_clusters=3) 52 | self.assertEqual(len(aggregated), 2) 53 | 54 | aggregated, counts = aggregate_dendrogram(self.dendrogram, n_clusters=3, return_counts=True) 55 | self.assertEqual(len(aggregated), 2) 56 | self.assertEqual(len(counts), 3) 57 | 58 | -------------------------------------------------------------------------------- /sknetwork/linalg/__init__.py: -------------------------------------------------------------------------------- 1 | """Module of linear algebra.""" 2 | from sknetwork.linalg.basics import safe_sparse_dot 3 | from sknetwork.linalg.eig_solver import EigSolver, LanczosEig 4 | from sknetwork.linalg.laplacian import get_laplacian 5 | from sknetwork.linalg.normalizer import diagonal_pseudo_inverse, get_norms, normalize 6 | from sknetwork.linalg.operators import Regularizer, Laplacian, Normalizer, CoNeighbor 7 | from sknetwork.linalg.polynome import Polynome 8 | from sknetwork.linalg.sparse_lowrank import SparseLR 9 | from sknetwork.linalg.svd_solver import SVDSolver, LanczosSVD 10 | -------------------------------------------------------------------------------- /sknetwork/linalg/basics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Apr 2020 5 | @author: Nathan de Lara 6 | """ 7 | 8 | import numpy as np 9 | from scipy import sparse 10 | from scipy.sparse.linalg import LinearOperator 11 | 12 | 13 | def safe_sparse_dot(a, b): 14 | """Dot product with proper use of the sparse matrix format. 15 | Use BLAS instead of numpy.dot when possible to avoid unnecessary copies. 16 | 17 | Parameters 18 | ---------- 19 | a : array, sparse matrix or LinearOperator 20 | b : array, sparse matrix or LinearOperator 21 | Returns 22 | ------- 23 | dot_product : array or sparse matrix 24 | sparse if ``a`` or ``b`` is sparse. 25 | """ 26 | if type(a) == np.ndarray: 27 | return b.T.dot(a.T).T 28 | if isinstance(a, LinearOperator) and isinstance(b, LinearOperator): 29 | raise NotImplementedError 30 | if hasattr(a, 'right_sparse_dot') and type(b) == sparse.csr_matrix: 31 | if callable(a.right_sparse_dot): 32 | return a.right_sparse_dot(b) 33 | if hasattr(b, 'left_sparse_dot') and type(a) == sparse.csr_matrix: 34 | if callable(b.left_sparse_dot): 35 | return b.left_sparse_dot(a) 36 | else: 37 | return a.dot(b) 38 | -------------------------------------------------------------------------------- /sknetwork/linalg/diteration.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: language_level=3 3 | """ 4 | Created on Apr 2020 5 | @author: Nathan de Lara 6 | """ 7 | cimport cython 8 | from cython.parallel import prange 9 | 10 | 11 | @cython.boundscheck(False) 12 | @cython.wraparound(False) 13 | def diffusion(int[:] indptr, int[:] indices, float[:] data, float[:] scores, float[:] fluid, 14 | float damping_factor, int n_iter, float tol): 15 | """One loop of fluid diffusion.""" 16 | cdef int n = fluid.shape[0] 17 | cdef int i 18 | cdef int j 19 | cdef int j1 20 | cdef int j2 21 | cdef int jj 22 | cdef float sent 23 | cdef float tmp 24 | cdef float removed 25 | cdef float restart_prob = 1 - damping_factor 26 | cdef float residu = restart_prob 27 | 28 | for k in range(n_iter): 29 | for i in prange(n, nogil=True, schedule='guided'): 30 | sent = fluid[i] 31 | if sent > 0: 32 | scores[i] += sent 33 | fluid[i] = 0 34 | j1 = indptr[i] 35 | j2 = indptr[i+1] 36 | tmp = sent * damping_factor 37 | if j2 != j1: 38 | for jj in range(j1, j2): 39 | j = indices[jj] 40 | fluid[j] += tmp * data[jj] 41 | removed = sent * restart_prob 42 | else: 43 | removed = sent 44 | residu -= removed 45 | if residu < tol * restart_prob: 46 | return 47 | return 48 | -------------------------------------------------------------------------------- /sknetwork/linalg/laplacian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in July 2022 5 | @author: Thomas Bonald 6 | """ 7 | 8 | import numpy as np 9 | from scipy import sparse 10 | 11 | 12 | def get_laplacian(adjacency: sparse.csr_matrix) -> sparse.csr_matrix: 13 | """Return the Laplacian matrix of a graph.""" 14 | weights = adjacency.dot(np.ones(adjacency.shape[0])) 15 | return sparse.diags(weights) - adjacency 16 | -------------------------------------------------------------------------------- /sknetwork/linalg/normalizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in November 2019 5 | @author: Nathan de Lara 6 | """ 7 | from typing import Union 8 | 9 | import numpy as np 10 | from scipy import sparse 11 | from scipy.sparse.linalg import LinearOperator 12 | 13 | 14 | def diagonal_pseudo_inverse(weights: np.ndarray) -> sparse.csr_matrix: 15 | """Compute :math:`\\text{diag}(w)^+`, the pseudo-inverse of the diagonal matrix 16 | with diagonal elements given by the weights :math:`w`. 17 | 18 | Parameters 19 | ---------- 20 | weights: 21 | The weights to invert. 22 | 23 | Returns 24 | ------- 25 | sparse.csr_matrix 26 | 27 | """ 28 | diag: sparse.csr_matrix = sparse.diags(weights, format='csr') 29 | diag.data = 1 / diag.data 30 | return diag 31 | 32 | 33 | def get_norms(matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator], p=1): 34 | """Get the norms of rows of a matrix. 35 | 36 | Parameters 37 | ---------- 38 | matrix : numpy array or sparse CSR matrix or LinearOperator, shape (n_rows, n_cols) 39 | Input matrix. 40 | p : 41 | Order of the norm (1 or 2). 42 | Returns 43 | ------- 44 | norms : np.array, shape (n_rows,) 45 | Vector norms 46 | """ 47 | n_row, n_col = matrix.shape 48 | if isinstance(matrix, np.ndarray): 49 | input_matrix = sparse.csr_matrix(matrix) 50 | elif isinstance(matrix, sparse.csr_matrix): 51 | input_matrix = matrix.copy() 52 | else: 53 | input_matrix = matrix 54 | if p == 1: 55 | if not isinstance(matrix, LinearOperator): 56 | input_matrix.data = np.abs(input_matrix.data) 57 | return input_matrix.dot(np.ones(n_col)) 58 | elif p == 2: 59 | if isinstance(matrix, LinearOperator): 60 | raise ValueError('Only norm 1 is available for linear operators.') 61 | input_matrix.data = input_matrix.data**2 62 | return np.sqrt(input_matrix.dot(np.ones(n_col))) 63 | else: 64 | raise ValueError('Only norms 1 and 2 are available.') 65 | 66 | 67 | def normalize(matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator], p=1): 68 | """Normalize the rows of a matrix so that all have norm 1 (or 0; null rows remain null). 69 | 70 | Parameters 71 | ---------- 72 | matrix : 73 | Input matrix. 74 | p : 75 | Order of the norm. 76 | 77 | Returns 78 | ------- 79 | normalized matrix : 80 | Normalized matrix (same format as input matrix). 81 | """ 82 | norms = get_norms(matrix, p) 83 | diag = diagonal_pseudo_inverse(norms) 84 | if hasattr(matrix, 'left_sparse_dot') and callable(matrix.left_sparse_dot): 85 | return matrix.left_sparse_dot(diag) 86 | return diag.dot(matrix) 87 | -------------------------------------------------------------------------------- /sknetwork/linalg/polynome.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in April 2020 5 | @author: Nathan de Lara 6 | """ 7 | 8 | from typing import Union 9 | 10 | import numpy as np 11 | from scipy import sparse 12 | from scipy.sparse.linalg import LinearOperator 13 | 14 | from sknetwork.utils.check import check_format, check_square 15 | 16 | 17 | class Polynome(LinearOperator): 18 | """Polynome of a matrix as a linear operator 19 | 20 | :math:`P(A) = \\alpha_k A^k + ... + \\alpha_1 A + \\alpha_0`. 21 | 22 | Parameters 23 | ---------- 24 | matrix : 25 | Square matrix 26 | coeffs : np.ndarray 27 | Coefficients of the polynome by increasing order of power. 28 | 29 | Examples 30 | -------- 31 | >>> from scipy import sparse 32 | >>> from sknetwork.linalg import Polynome 33 | >>> matrix = sparse.eye(2, format='csr') 34 | >>> polynome = Polynome(matrix, np.arange(3)) 35 | >>> x = np.ones(2) 36 | >>> polynome.dot(x) 37 | array([3., 3.]) 38 | >>> polynome.T.dot(x) 39 | array([3., 3.]) 40 | 41 | Notes 42 | ----- 43 | The polynome is evaluated using the `Ruffini-Horner method 44 | `_. 45 | """ 46 | 47 | def __init__(self, matrix: Union[sparse.csr_matrix, np.ndarray], coeffs: np.ndarray): 48 | if coeffs.shape[0] == 0: 49 | raise ValueError('A polynome requires at least one coefficient.') 50 | if not isinstance(matrix, LinearOperator): 51 | matrix = check_format(matrix) 52 | check_square(matrix) 53 | shape = matrix.shape 54 | dtype = matrix.dtype 55 | super(Polynome, self).__init__(dtype=dtype, shape=shape) 56 | 57 | self.matrix = matrix 58 | self.coeffs = coeffs 59 | 60 | def __neg__(self): 61 | return Polynome(self.matrix, -self.coeffs) 62 | 63 | def __mul__(self, other): 64 | return Polynome(self.matrix, other * self.coeffs) 65 | 66 | def _matvec(self, matrix: np.ndarray): 67 | """Right dot product with a dense matrix. 68 | """ 69 | y = self.coeffs[-1] * matrix 70 | for a in self.coeffs[::-1][1:]: 71 | y = self.matrix.dot(y) + a * matrix 72 | return y 73 | 74 | def _transpose(self): 75 | """Transposed operator.""" 76 | return Polynome(self.matrix.T.tocsr(), self.coeffs) 77 | -------------------------------------------------------------------------------- /sknetwork/linalg/push.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: language_level=3 3 | """ 4 | Created on Mars 2021 5 | @author: Wenzhuo Zhao 6 | """ 7 | from libcpp.queue cimport queue 8 | from cython.parallel cimport prange 9 | import numpy as np 10 | cimport numpy as cnp 11 | cimport cython 12 | 13 | 14 | @cython.boundscheck(False) 15 | @cython.wraparound(False) 16 | def push_pagerank(int n, cnp.ndarray[cnp.int32_t, ndim=1] degrees, 17 | int[:] indptr, int[:] indices, 18 | int[:] rev_indptr, int[:] rev_indices, 19 | cnp.ndarray[cnp.float32_t, ndim=1] seeds, 20 | cnp.float32_t damping_factor, cnp.float32_t tol): 21 | """Push-based PageRank""" 22 | cdef cnp.ndarray[cnp.float32_t, ndim=1] residuals 23 | cdef int vertex 24 | cdef int neighbor 25 | cdef int j1 26 | cdef int j2 27 | cdef int j 28 | cdef int[:] indexes 29 | cdef int index 30 | cdef float probability 31 | cdef queue[int] worklist 32 | cdef cnp.ndarray[cnp.float32_t, ndim=1] scores 33 | cdef cnp.float32_t tmp 34 | cdef float norm 35 | 36 | residuals = np.zeros(n, dtype=np.float32) 37 | for vertex in prange(n, nogil=True): 38 | j1 = rev_indptr[vertex] 39 | j2 = rev_indptr[vertex + 1] 40 | # iterate node's in-coming neighbors 41 | for j in range(j1, j2): 42 | neighbor = rev_indices[j] 43 | residuals[vertex] += 1 / degrees[neighbor] 44 | """add the probability of seeds""" 45 | residuals[vertex] *= (1 - damping_factor) * \ 46 | damping_factor * (1 + seeds[vertex]) 47 | 48 | # node with high residual value will be processed first 49 | indexes = np.argsort(-residuals).astype(np.int32) 50 | for index in indexes: 51 | worklist.push(index) 52 | scores = np.full(n, (1 - damping_factor), dtype=np.float32) 53 | 54 | while not worklist.empty(): 55 | vertex = worklist.front() 56 | worklist.pop() 57 | # scores[v]_new 58 | scores[vertex] += residuals[vertex] 59 | # iterate node's out-coming neighbors 60 | j1 = indptr[vertex] 61 | j2 = indptr[vertex + 1] 62 | for j in prange(j1, j2, nogil=True): 63 | neighbor = indices[j] 64 | tmp = residuals[neighbor] 65 | residuals[neighbor] += residuals[vertex] * \ 66 | (1 - damping_factor) / degrees[vertex] 67 | if residuals[neighbor] > tol > tmp: 68 | worklist.push(neighbor) 69 | norm = np.linalg.norm(scores, 1) 70 | scores /= norm 71 | return scores 72 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for linalg""" 2 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_eig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for eigenvalue solver.""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import miserables, karate_club 10 | from sknetwork.linalg import LanczosEig, SparseLR 11 | 12 | 13 | def eigenvector_err(matrix, eigenvectors, eigenvalues): 14 | """Approximation error for eigenvectors.""" 15 | err = matrix.dot(eigenvectors) - eigenvectors * eigenvalues 16 | return np.linalg.norm(err) 17 | 18 | 19 | # noinspection DuplicatedCode 20 | class TestSolvers(unittest.TestCase): 21 | 22 | def setUp(self): 23 | """Load les Miserables and regularized version""" 24 | self.adjacency = miserables() 25 | self.random_state = np.random.RandomState(123) 26 | n = self.adjacency.shape[0] 27 | x = np.random.random(n) 28 | self.slr = SparseLR(self.adjacency, [(x, x)]) 29 | 30 | def test_lanczos(self): 31 | solver = LanczosEig('LM') 32 | solver.fit(self.adjacency, 2) 33 | self.assertEqual(len(solver.eigenvalues_), 2) 34 | self.assertAlmostEqual(eigenvector_err(self.adjacency, solver.eigenvectors_, solver.eigenvalues_), 0) 35 | 36 | solver.fit(self.slr, 2) 37 | self.assertEqual(len(solver.eigenvalues_), 2) 38 | self.assertAlmostEqual(eigenvector_err(self.slr, solver.eigenvectors_, solver.eigenvalues_), 0) 39 | 40 | adjacency = karate_club() 41 | solver = LanczosEig('SM') 42 | solver.fit(adjacency, 2) 43 | self.assertEqual(len(solver.eigenvalues_), 2) 44 | self.assertAlmostEqual(eigenvector_err(adjacency, solver.eigenvectors_, solver.eigenvalues_), 0) 45 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_laplacian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for laplacian.""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data.test_graphs import test_graph 10 | from sknetwork.linalg import get_laplacian 11 | 12 | 13 | class TestLaplacian(unittest.TestCase): 14 | 15 | def test(self): 16 | adjacency = test_graph() 17 | laplacian = get_laplacian(adjacency) 18 | self.assertEqual(np.linalg.norm(laplacian.dot(np.ones(adjacency.shape[0]))), 0) 19 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in April 2020 5 | @author: Nathan de Lara 6 | """ 7 | 8 | import unittest 9 | 10 | import numpy as np 11 | from scipy import sparse 12 | 13 | from sknetwork.linalg import normalize 14 | 15 | 16 | class TestNormalization(unittest.TestCase): 17 | 18 | def test_formats(self): 19 | n = 5 20 | mat1 = normalize(np.eye(n)) 21 | mat2 = normalize(sparse.eye(n)) 22 | 23 | x = np.random.randn(n) 24 | self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - x), 0) 25 | self.assertAlmostEqual(np.linalg.norm(mat2.dot(x) - x), 0) 26 | 27 | mat1 = np.random.rand(n**2).reshape((n, n)) 28 | mat2 = sparse.csr_matrix(mat1) 29 | mat1 = normalize(mat1, p=2) 30 | mat2 = normalize(mat2, p=2) 31 | self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - mat2.dot(x)), 0) 32 | 33 | with self.assertRaises(ValueError): 34 | normalize(mat1, p=3) 35 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_polynome.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for polynomials.""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | from scipy import sparse 9 | 10 | from sknetwork.data.test_graphs import test_graph 11 | from sknetwork.linalg import Polynome 12 | 13 | 14 | class TestPolynome(unittest.TestCase): 15 | 16 | def test_init(self): 17 | adjacency = test_graph() 18 | with self.assertRaises(ValueError): 19 | Polynome(adjacency, np.array([])) 20 | 21 | def test_operations(self): 22 | adjacency = test_graph() 23 | n = adjacency.shape[0] 24 | polynome = Polynome(adjacency, np.arange(3)) 25 | x = np.random.randn(n) 26 | 27 | y1 = (polynome * 2).dot(x) 28 | y2 = (-polynome).dot(x) 29 | self.assertAlmostEqual(np.linalg.norm(0.5 * y1 + y2), 0) 30 | 31 | def test_dot(self): 32 | adjacency = sparse.eye(5, format='csr') 33 | polynome = Polynome(adjacency, np.arange(2)) 34 | 35 | x = np.random.randn(5, 3) 36 | y = polynome.dot(x) 37 | self.assertAlmostEqual(np.linalg.norm(x - y), 0) 38 | 39 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_ppr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for d-iteration""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.data import house, karate_club 9 | from sknetwork.data.parse import from_edge_list 10 | from sknetwork.data.test_graphs import * 11 | from sknetwork.linalg.operators import Regularizer 12 | from sknetwork.linalg.ppr_solver import get_pagerank 13 | from sknetwork.utils.check import is_proba_array 14 | 15 | 16 | class TestPPR(unittest.TestCase): 17 | 18 | def test_diteration(self): 19 | # test convergence by tolerance 20 | for adjacency in [house(), test_graph(), test_digraph()]: 21 | seeds = np.ones(adjacency.shape[0]) / adjacency.shape[0] 22 | pr = get_pagerank(adjacency, damping_factor=0.85, n_iter=100, tol=10, solver='diteration', seeds=seeds) 23 | self.assertTrue(is_proba_array(pr)) 24 | 25 | # test graph with some null out-degree 26 | adjacency = from_edge_list([(0, 1)]) 27 | seeds = np.ones(adjacency.shape[0]) / adjacency.shape[0] 28 | pr = get_pagerank(adjacency, damping_factor=0.85, n_iter=100, tol=10, solver='diteration', seeds=seeds) 29 | self.assertTrue(is_proba_array(pr)) 30 | 31 | # test invalid entry 32 | adjacency = Regularizer(house(), 0.1) 33 | seeds = np.ones(adjacency.shape[0]) / adjacency.shape[0] 34 | with self.assertRaises(ValueError): 35 | get_pagerank(adjacency, damping_factor=0.85, n_iter=100, tol=10, solver='diteration', seeds=seeds) 36 | 37 | def test_push(self): 38 | # test convergence by tolerance 39 | adjacency = karate_club() 40 | seeds = np.ones(adjacency.shape[0]) / adjacency.shape[0] 41 | pr = get_pagerank(adjacency, damping_factor=0.85, 42 | n_iter=100, tol=1e-1, solver='push', seeds=seeds) 43 | self.assertTrue(is_proba_array(pr)) 44 | 45 | def test_piteration(self): 46 | # test on SparseLR matrix 47 | adjacency = Regularizer(house(), 0.1) 48 | seeds = np.ones(adjacency.shape[0]) / adjacency.shape[0] 49 | pr = get_pagerank(adjacency, damping_factor=0.85, n_iter=100, tol=10, solver='piteration', seeds=seeds) 50 | self.assertTrue(is_proba_array(pr)) 51 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_sparse_lowrank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for embeddings metrics.""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import house, star_wars 10 | from sknetwork.linalg.sparse_lowrank import SparseLR 11 | 12 | 13 | class TestSparseLowRank(unittest.TestCase): 14 | 15 | def setUp(self): 16 | """Simple regularized adjacency and biadjacency for tests.""" 17 | self.undirected = SparseLR(house(), [(np.ones(5), np.ones(5))]) 18 | self.bipartite = SparseLR(star_wars(), [(np.ones(4), np.ones(3))]) 19 | 20 | def test_init(self): 21 | with self.assertRaises(ValueError): 22 | SparseLR(house(), [(np.ones(5), np.ones(4))]) 23 | with self.assertRaises(ValueError): 24 | SparseLR(house(), [(np.ones(4), np.ones(5))]) 25 | 26 | def test_addition(self): 27 | addition = self.undirected + self.undirected 28 | expected = SparseLR(2 * house(), [(np.ones(5), 2 * np.ones(5))]) 29 | err = (addition.sparse_mat - expected.sparse_mat).count_nonzero() 30 | self.assertEqual(err, 0) 31 | x = np.random.rand(5) 32 | self.assertAlmostEqual(np.linalg.norm(addition.dot(x) - expected.dot(x)), 0) 33 | 34 | def test_operations(self): 35 | adjacency = self.undirected.sparse_mat 36 | slr = -self.undirected 37 | slr += adjacency 38 | slr -= adjacency 39 | slr.left_sparse_dot(adjacency) 40 | slr.right_sparse_dot(adjacency) 41 | slr.astype(float) 42 | 43 | def test_product(self): 44 | prod = self.undirected.dot(np.ones(5)) 45 | self.assertEqual(prod.shape, (5,)) 46 | prod = self.bipartite.dot(np.ones(3)) 47 | self.assertEqual(np.linalg.norm(prod - np.array([5., 4., 6., 5.])), 0.) 48 | prod = self.bipartite.dot(0.5 * np.ones(3)) 49 | self.assertEqual(np.linalg.norm(prod - np.array([2.5, 2., 3., 2.5])), 0.) 50 | prod = (2 * self.bipartite).dot(0.5 * np.ones(3)) 51 | self.assertEqual(np.linalg.norm(prod - 2 * np.array([2.5, 2., 3., 2.5])), 0.) 52 | 53 | def test_transposition(self): 54 | transposed = self.undirected.T 55 | error = (self.undirected.sparse_mat - transposed.sparse_mat).data 56 | self.assertEqual(abs(error).sum(), 0.) 57 | transposed = self.bipartite.T 58 | x, y = transposed.low_rank_tuples[0] 59 | self.assertTrue((x == np.ones(3)).all()) 60 | self.assertTrue((y == np.ones(4)).all()) 61 | 62 | -------------------------------------------------------------------------------- /sknetwork/linalg/tests/test_svd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for svd.""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import movie_actor 10 | from sknetwork.linalg import LanczosSVD, SparseLR 11 | 12 | 13 | def svd_err(matrix, u, v, sigma): 14 | """Approximation error for singular vectors.""" 15 | err = matrix.dot(v) - u * sigma 16 | return np.linalg.norm(err) 17 | 18 | 19 | # noinspection DuplicatedCode 20 | class TestSolvers(unittest.TestCase): 21 | 22 | def setUp(self): 23 | """Simple biadjacency for tests.""" 24 | self.biadjacency = movie_actor() 25 | n_row, n_col = self.biadjacency.shape 26 | self.slr = SparseLR(self.biadjacency, [(np.random.rand(n_row), np.random.rand(n_col))]) 27 | 28 | def test_lanczos(self): 29 | solver = LanczosSVD() 30 | solver.fit(self.biadjacency, 2) 31 | self.assertEqual(len(solver.singular_values_), 2) 32 | self.assertAlmostEqual(svd_err(self.biadjacency, solver.singular_vectors_left_, solver.singular_vectors_right_, 33 | solver.singular_values_), 0) 34 | 35 | solver.fit(self.slr, 2) 36 | self.assertEqual(len(solver.singular_values_), 2) 37 | self.assertAlmostEqual(svd_err(self.slr, solver.singular_vectors_left_, solver.singular_vectors_right_, 38 | solver.singular_values_), 0) 39 | -------------------------------------------------------------------------------- /sknetwork/linkpred/__init__.py: -------------------------------------------------------------------------------- 1 | """link prediction module""" 2 | from sknetwork.linkpred.nn import NNLinker 3 | -------------------------------------------------------------------------------- /sknetwork/linkpred/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in March 2022 5 | @author: Thomas Bonald 6 | """ 7 | from abc import ABC 8 | 9 | import numpy as np 10 | from scipy import sparse 11 | 12 | from sknetwork.base import Algorithm 13 | 14 | 15 | class BaseLinker(Algorithm, ABC): 16 | """Base class for link prediction. 17 | 18 | Attributes 19 | ---------- 20 | links_: sparse.csr_matrix 21 | Link matrix. 22 | """ 23 | 24 | def __init__(self): 25 | self.links_ = None 26 | 27 | def predict(self) -> sparse.csr_matrix: 28 | """Return the predicted links. 29 | 30 | Returns 31 | ------- 32 | links_ : sparse.csr_matrix 33 | Link matrix. 34 | """ 35 | return self.links_ 36 | 37 | def fit_predict(self, *args, **kwargs) -> sparse.csr_matrix: 38 | """Fit algorithm to data and return the links. Same parameters as the ``fit`` method. 39 | 40 | Returns 41 | ------- 42 | links_ : sparse.csr_matrix 43 | Link matrix. 44 | """ 45 | self.fit(*args, **kwargs) 46 | return self.links_ 47 | -------------------------------------------------------------------------------- /sknetwork/linkpred/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for link prediction""" 2 | -------------------------------------------------------------------------------- /sknetwork/linkpred/tests/test_nn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for link prediction by nearest neighbors""" 4 | import unittest 5 | 6 | from sknetwork.linkpred import NNLinker 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.embedding import Spectral 9 | from sknetwork.utils import get_degrees 10 | 11 | 12 | class TestNNLinker(unittest.TestCase): 13 | 14 | def test_link_prediction(self): 15 | for input_matrix in [test_graph(), test_digraph(), test_bigraph()]: 16 | 17 | n_neighbors = 5 18 | threshold = 0.2 19 | algo = NNLinker(n_neighbors=n_neighbors, threshold=threshold) 20 | links = algo.fit_predict(input_matrix) 21 | self.assertTrue(links.shape == input_matrix.shape) 22 | self.assertTrue(np.all(get_degrees(links) <= n_neighbors)) 23 | self.assertTrue(np.all(links.data >= threshold)) 24 | 25 | algo = NNLinker(embedding_method=Spectral(2)) 26 | links = algo.fit_predict(input_matrix) 27 | self.assertTrue(links.shape == input_matrix.shape) 28 | -------------------------------------------------------------------------------- /sknetwork/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in December 2019 5 | @author: Quentin Lutz 6 | """ 7 | 8 | 9 | class Log: 10 | """Log class for verbosity features""" 11 | def __init__(self, verbose: bool = False): 12 | self.verbose = verbose 13 | self.log = '' 14 | 15 | def print_log(self, *args): 16 | """Fill log with text.""" 17 | if self.verbose: 18 | print(*args) 19 | self.log += ' '.join(map(str, args)) + '\n' 20 | -------------------------------------------------------------------------------- /sknetwork/path/__init__.py: -------------------------------------------------------------------------------- 1 | """Path module""" 2 | from sknetwork.path.dag import get_dag 3 | from sknetwork.path.distances import get_distances 4 | from sknetwork.path.search import breadth_first_search 5 | from sknetwork.path.shortest_path import get_shortest_path 6 | -------------------------------------------------------------------------------- /sknetwork/path/dag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in May 2023 5 | @author: Thomas Bonald 6 | """ 7 | from typing import Iterable, Optional, Union 8 | 9 | import numpy as np 10 | from scipy import sparse 11 | 12 | from sknetwork.path.distances import get_distances 13 | from sknetwork.utils.check import check_format, check_square 14 | 15 | 16 | def get_dag(adjacency: sparse.csr_matrix, source: Optional[Union[int, Iterable]] = None, 17 | order: Optional[np.ndarray] = None) -> sparse.csr_matrix: 18 | """Get a Directed Acyclic Graph (DAG) from a graph. 19 | If the order is specified, keep only edges i -> j such that 0 <= order[i] < order[j]. 20 | If the source is specified, use the distances from this source node (or set of source nodes) as order. 21 | If neither the order nor the source is specified, use the node indices as order. 22 | 23 | Parameters 24 | ---------- 25 | adjacency : 26 | Adjacency matrix of the graph. 27 | source : 28 | Source node (or set of source nodes). 29 | order : 30 | Order of nodes. Negative values ignored. 31 | 32 | Returns 33 | ------- 34 | dag : 35 | Adjacency matrix of the directed acyclic graph. 36 | """ 37 | adjacency = check_format(adjacency, allow_empty=True) 38 | check_square(adjacency) 39 | 40 | if order is None: 41 | if source is None: 42 | order = np.arange(adjacency.shape[0]) 43 | else: 44 | order = get_distances(adjacency, source) 45 | 46 | dag = adjacency.astype(bool).tocoo() 47 | for value in np.unique(order): 48 | if value < 0: 49 | dag.data[order[dag.row] == value] = 0 50 | else: 51 | dag.data[(order[dag.row] == value) & (order[dag.col] <= value)] = 0 52 | dag.eliminate_zeros() 53 | 54 | return dag.tocsr() 55 | -------------------------------------------------------------------------------- /sknetwork/path/search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in May 2023 5 | """ 6 | import numpy as np 7 | from scipy import sparse 8 | 9 | from sknetwork.path.distances import get_distances 10 | 11 | 12 | def breadth_first_search(adjacency: sparse.csr_matrix, source: int): 13 | """Breadth-first ordering starting from some node. 14 | 15 | Parameters 16 | ---------- 17 | adjacency : 18 | Adjacency matrix of the graph. 19 | source : int 20 | Source node. 21 | 22 | Returns 23 | ------- 24 | index : np.ndarray 25 | Node index corresponding to the breadth-first-search from the source. 26 | The length of the vector is the number of nodes reachable from the source. 27 | """ 28 | distances = get_distances(adjacency, source) 29 | indices = np.argsort(distances) 30 | n = np.sum(distances < 0) 31 | return indices[n:] 32 | -------------------------------------------------------------------------------- /sknetwork/path/shortest_path.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in May 2023 5 | @author: Thomas Bonald 6 | """ 7 | from typing import Iterable, Optional, Union, Tuple 8 | 9 | import numpy as np 10 | from scipy import sparse 11 | 12 | from sknetwork.path.dag import get_dag 13 | from sknetwork.utils.format import bipartite2undirected 14 | from sknetwork.path.distances import get_distances 15 | 16 | 17 | def get_shortest_path(input_matrix: sparse.csr_matrix, source: Optional[Union[int, Iterable]] = None, 18 | source_row: Optional[Union[int, Iterable]] = None, 19 | source_col: Optional[Union[int, Iterable]] = None, force_bipartite: bool = False) \ 20 | -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]: 21 | """Get the shortest paths from a source (or a set of sources) in number of hops. 22 | 23 | Parameters 24 | ---------- 25 | input_matrix : 26 | Adjacency matrix or biadjacency matrix of the graph. 27 | source : 28 | If an integer, index of the source node. 29 | If a list, indices of source nodes (the shortest distances to one of these nodes in returned). 30 | source_row, source_col : 31 | For bipartite graphs, index of source nodes on rows and columns. 32 | The parameter source_row is an alias for source (at least one of them must be ``None``). 33 | force_bipartite : 34 | If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph. 35 | Set to ``True`` is the parameters source_row or source_col are specified. 36 | 37 | Returns 38 | ------- 39 | path : sparse.csr_matrix 40 | Adjacency matrix of the graph of the shortest paths from the source node (or the set of source nodes). 41 | If the input graph is a bipartite graph, the shape of the matrix is (n_row + n_col, n_row + n_col) with the new 42 | index corresponding to the rows then the columns of the original graph. 43 | 44 | Examples 45 | -------- 46 | >>> from sknetwork.data import cyclic_digraph 47 | >>> adjacency = cyclic_digraph(3) 48 | >>> path = get_shortest_path(adjacency, source=0) 49 | >>> path.toarray().astype(int) 50 | array([[0, 1, 0], 51 | [0, 0, 1], 52 | [0, 0, 0]]) 53 | """ 54 | distances = get_distances(input_matrix, source, source_row, source_col, force_bipartite) 55 | if type(distances) == tuple: 56 | adjacency = bipartite2undirected(input_matrix) 57 | distances = np.hstack(distances) 58 | else: 59 | adjacency = input_matrix 60 | return get_dag(adjacency, order=distances) 61 | 62 | -------------------------------------------------------------------------------- /sknetwork/path/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for path module""" 2 | -------------------------------------------------------------------------------- /sknetwork/path/tests/test_dag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """"tests for search.py""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import cyclic_digraph 10 | from sknetwork.data.test_graphs import * 11 | from sknetwork.path import get_dag 12 | 13 | 14 | class TestSearch(unittest.TestCase): 15 | 16 | def test(self): 17 | adjacency = cyclic_digraph(3) 18 | dag = get_dag(adjacency) 19 | self.assertEqual(dag.nnz, 2) 20 | 21 | adjacency = test_graph_empty() 22 | dag = get_dag(adjacency) 23 | self.assertEqual(dag.nnz, 0) 24 | 25 | adjacency = test_graph() 26 | dag = get_dag(adjacency) 27 | self.assertEqual(dag.nnz, 12) 28 | dag = get_dag(adjacency, order=np.arange(10) % 3) 29 | self.assertEqual(dag.nnz, 10) 30 | 31 | adjacency = test_disconnected_graph() 32 | dag = get_dag(adjacency, 3) 33 | self.assertEqual(dag.nnz, 1) 34 | 35 | adjacency = test_digraph() 36 | dag = get_dag(adjacency, 1) 37 | self.assertEqual(dag.nnz, 4) 38 | -------------------------------------------------------------------------------- /sknetwork/path/tests/test_distances.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """"tests for distances.py""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.path.distances import get_distances 8 | 9 | 10 | class TestDistances(unittest.TestCase): 11 | 12 | def test_input(self): 13 | adjacency = test_graph() 14 | with self.assertRaises(ValueError): 15 | get_distances(adjacency) 16 | with self.assertRaises(ValueError): 17 | get_distances(adjacency, source=0, source_row=5) 18 | 19 | def test_algo(self): 20 | adjacency = test_graph() 21 | distances = get_distances(adjacency, 0) 22 | distances_ = np.array([0, 1, 3, 2, 2, 3, 2, 3, 4, 4]) 23 | self.assertTrue(all(distances == distances_)) 24 | distances = get_distances(adjacency, 0, transpose=True) 25 | self.assertTrue(all(distances == distances_)) 26 | distances = get_distances(adjacency, [0, 5]) 27 | distances_ = np.array([0, 1, 3, 2, 1, 0, 1, 2, 4, 3]) 28 | self.assertTrue(all(distances == distances_)) 29 | 30 | adjacency = test_graph_empty() 31 | source = [0, 3] 32 | distances = get_distances(adjacency, source) 33 | distances_ = -np.ones(len(distances), dtype=int) 34 | distances_[source] = 0 35 | self.assertTrue(all(distances == distances_)) 36 | 37 | adjacency = test_digraph() 38 | distances = get_distances(adjacency, [0]) 39 | distances_ = np.array([0, 1, 3, 2, 2, 3, -1, -1, -1, -1]) 40 | self.assertTrue(all(distances == distances_)) 41 | distances = get_distances(adjacency, [0], transpose=True) 42 | self.assertTrue(sum(distances < 0) == 9) 43 | distances = get_distances(adjacency, [0, 5], transpose=True) 44 | distances_ = np.array([0, 2, -1, -1, 1, 0, 1, -1, -1, -1]) 45 | self.assertTrue(all(distances == distances_)) 46 | 47 | biadjacency = test_bigraph() 48 | distances_row, distances_col = get_distances(biadjacency, [0]) 49 | distances_row_, distances_col_ = np.array([0, -1, 2, -1, -1, -1]), np.array([3, 1, -1, -1, -1, -1, -1, -1]) 50 | self.assertTrue(all(distances_row == distances_row_)) 51 | self.assertTrue(all(distances_col == distances_col_)) 52 | 53 | adjacency = test_graph() 54 | distances_row, distances_col = get_distances(adjacency, source_col=[0]) 55 | self.assertTrue(all(distances_row % 2)) 56 | self.assertTrue(all((distances_col + 1) % 2)) 57 | 58 | biadjacency = test_bigraph() 59 | distances_row, distances_col = get_distances(biadjacency, source=0, source_col=[1, 2]) 60 | distances_row_, distances_col_ = np.array([0, 1, 1, -1, -1, -1]), np.array([2, 0, 0, 2, -1, -1, -1, -1]) 61 | self.assertTrue(all(distances_row == distances_row_)) 62 | self.assertTrue(all(distances_col == distances_col_)) 63 | -------------------------------------------------------------------------------- /sknetwork/path/tests/test_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """"tests for search.py""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data import cyclic_digraph 10 | from sknetwork.data.test_graphs import * 11 | from sknetwork.path import breadth_first_search 12 | 13 | 14 | class TestSearch(unittest.TestCase): 15 | 16 | def test_bfs(self): 17 | adjacency = cyclic_digraph(3) 18 | search = breadth_first_search(adjacency, 0) 19 | search_ = np.arange(3) 20 | self.assertTrue(all(search == search_)) 21 | 22 | adjacency = test_graph_empty() 23 | search = breadth_first_search(adjacency, 0) 24 | search_ = np.array([0]) 25 | self.assertTrue(all(search == search_)) 26 | 27 | adjacency = test_graph() 28 | search = breadth_first_search(adjacency, 3) 29 | search_ = np.array([3, 1, 2, 0, 4, 6, 8, 5, 7, 9]) 30 | self.assertTrue(all(search == search_)) 31 | 32 | adjacency = test_disconnected_graph() 33 | search = breadth_first_search(adjacency, 2) 34 | search_ = np.array([2, 3]) 35 | self.assertTrue(all(search == search_)) 36 | 37 | adjacency = test_digraph() 38 | search = breadth_first_search(adjacency, 1) 39 | search_ = {1, 3, 4, 2, 5} 40 | self.assertTrue(set(list(search)) == search_) 41 | -------------------------------------------------------------------------------- /sknetwork/path/tests/test_shortest_path.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """"tests for shortest_path.py""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.path.shortest_path import get_shortest_path 8 | 9 | 10 | class TestShortestPath(unittest.TestCase): 11 | 12 | def test_path(self): 13 | adjacency = test_graph_empty() 14 | path = get_shortest_path(adjacency, 0) 15 | self.assertEqual(path.nnz, 0) 16 | 17 | adjacency = test_graph() 18 | path = get_shortest_path(adjacency, 0) 19 | self.assertEqual(path.nnz, 10) 20 | path = get_shortest_path(adjacency, [0, 4, 6]) 21 | self.assertEqual(path.nnz, 10) 22 | path = get_shortest_path(adjacency, np.arange(10)) 23 | self.assertEqual(path.nnz, 0) 24 | path = get_shortest_path(adjacency, [0, 5]) 25 | self.assertEqual(path.nnz, 9) 26 | 27 | adjacency = test_disconnected_graph() 28 | path = get_shortest_path(adjacency, 4) 29 | self.assertEqual(path.nnz, 5) 30 | 31 | adjacency = test_digraph() 32 | path = get_shortest_path(adjacency, 0) 33 | self.assertEqual(path.nnz, 5) 34 | 35 | biadjacency = test_bigraph() 36 | path = get_shortest_path(biadjacency, 0) 37 | self.assertEqual(path.nnz, 3) 38 | self.assertTrue(path.shape[0] == np.sum(biadjacency.shape)) 39 | path = get_shortest_path(biadjacency, source_col=np.arange(biadjacency.shape[1])) 40 | self.assertEqual(path.nnz, biadjacency.nnz) 41 | -------------------------------------------------------------------------------- /sknetwork/ranking/__init__.py: -------------------------------------------------------------------------------- 1 | """ranking module""" 2 | from sknetwork.ranking.base import BaseRanking 3 | from sknetwork.ranking.betweenness import Betweenness 4 | from sknetwork.ranking.closeness import Closeness 5 | from sknetwork.ranking.hits import HITS 6 | from sknetwork.ranking.katz import Katz 7 | from sknetwork.ranking.pagerank import PageRank 8 | from sknetwork.ranking.postprocess import top_k 9 | -------------------------------------------------------------------------------- /sknetwork/ranking/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in November 2019 5 | @author: Nathan de Lara 6 | """ 7 | from abc import ABC 8 | 9 | import numpy as np 10 | 11 | from sknetwork.base import Algorithm 12 | 13 | 14 | class BaseRanking(Algorithm, ABC): 15 | """Base class for ranking algorithms. 16 | 17 | Attributes 18 | ---------- 19 | scores_ : np.ndarray 20 | Score of each node. 21 | scores_row_: np.ndarray 22 | Scores of rows, for bipartite graphs. 23 | scores_col_: np.ndarray 24 | Scores of columns, for bipartite graphs. 25 | """ 26 | def __init__(self): 27 | self.scores_ = None 28 | 29 | def predict(self, columns: bool = False) -> np.ndarray: 30 | """Return the scores predicted by the algorithm. 31 | 32 | Parameters 33 | ---------- 34 | columns : bool 35 | If ``True``, return the prediction for columns. 36 | 37 | Returns 38 | ------- 39 | scores : np.ndarray 40 | Scores. 41 | """ 42 | if columns: 43 | return self.scores_col_ 44 | return self.scores_ 45 | 46 | def fit_predict(self, *args, **kwargs) -> np.ndarray: 47 | """Fit algorithm to data and return the scores. Same parameters as the ``fit`` method. 48 | 49 | Returns 50 | ------- 51 | scores : np.ndarray 52 | Scores. 53 | """ 54 | self.fit(*args, **kwargs) 55 | return self.scores_ 56 | 57 | def _split_vars(self, shape): 58 | n_row = shape[0] 59 | self.scores_row_ = self.scores_[:n_row] 60 | self.scores_col_ = self.scores_[n_row:] 61 | self.scores_ = self.scores_row_ 62 | -------------------------------------------------------------------------------- /sknetwork/ranking/katz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 2020 5 | @author: Nathan de Lara 6 | """ 7 | from typing import Union 8 | 9 | import numpy as np 10 | from scipy import sparse 11 | from scipy.sparse.linalg import LinearOperator 12 | 13 | from sknetwork.linalg.polynome import Polynome 14 | from sknetwork.ranking.base import BaseRanking 15 | from sknetwork.utils.check import check_format 16 | from sknetwork.utils.format import get_adjacency 17 | 18 | 19 | class Katz(BaseRanking): 20 | """Katz centrality, defined by: 21 | 22 | :math:`\\sum_{k=1}^K\\alpha^k(A^k)^T\\mathbf{1}` 23 | 24 | where :math:`A` is the adjacency matrix, :math:`\\alpha` is the damping factor and :math:`K` is the path length. 25 | 26 | Parameters 27 | ---------- 28 | damping_factor : float 29 | Damping factor for path contributions. 30 | path_length : int 31 | Maximum length of the paths. 32 | 33 | Attributes 34 | ---------- 35 | scores_ : np.ndarray 36 | Score of each node. 37 | scores_row_: np.ndarray 38 | Scores of rows, for bipartite graphs. 39 | scores_col_: np.ndarray 40 | Scores of columns, for bipartite graphs. 41 | 42 | Examples 43 | -------- 44 | >>> from sknetwork.data.toy_graphs import house 45 | >>> adjacency = house() 46 | >>> katz = Katz() 47 | >>> scores = katz.fit_predict(adjacency) 48 | >>> np.round(scores, 2) 49 | array([6.5 , 8.25, 5.62, 5.62, 8.25]) 50 | 51 | References 52 | ---------- 53 | Katz, L. (1953). `A new status index derived from sociometric analysis 54 | `_. Psychometrika, 18(1), 39-43. 55 | """ 56 | def __init__(self, damping_factor: float = 0.5, path_length: int = 4): 57 | super(Katz, self).__init__() 58 | self.damping_factor = damping_factor 59 | self.path_length = path_length 60 | self.bipartite = None 61 | 62 | def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator]) -> 'Katz': 63 | """Katz centrality. 64 | 65 | Parameters 66 | ---------- 67 | input_matrix : 68 | Adjacency matrix or biadjacency matrix of the graph. 69 | 70 | Returns 71 | ------- 72 | self: :class:`Katz` 73 | """ 74 | input_matrix = check_format(input_matrix) 75 | adjacency, self.bipartite = get_adjacency(input_matrix) 76 | n = adjacency.shape[0] 77 | coefs = self.damping_factor ** np.arange(self.path_length + 1) 78 | coefs[0] = 0. 79 | polynome = Polynome(adjacency.T.astype(bool).tocsr(), coefs) 80 | self.scores_ = polynome.dot(np.ones(n)) 81 | if self.bipartite: 82 | self._split_vars(input_matrix.shape) 83 | return self 84 | -------------------------------------------------------------------------------- /sknetwork/ranking/postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on May 2019 5 | @author: Nathan de Lara 6 | """ 7 | import numpy as np 8 | 9 | 10 | def top_k(scores: np.ndarray, k: int = 1, sort: bool = True): 11 | """Return the indices of the k elements of highest values. 12 | 13 | Parameters 14 | ---------- 15 | scores : np.ndarray 16 | Array of values. 17 | k : int 18 | Number of elements to return. 19 | sort : bool 20 | If ``True``, sort the indices in decreasing order of value (element of highest value first). 21 | 22 | Examples 23 | -------- 24 | >>> top_k([1, 3, 2], k=2) 25 | array([1, 2]) 26 | """ 27 | scores = np.array(scores) 28 | if k >= len(scores): 29 | if sort: 30 | index = np.argsort(-scores) 31 | else: 32 | index = np.arange(scores) 33 | else: 34 | index = np.argpartition(-scores, k)[:k] 35 | if sort: 36 | index = index[np.argsort(-scores[index])] 37 | return index 38 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for ranking""" 2 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for ranking API""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.ranking import * 8 | 9 | 10 | class TestPageRank(unittest.TestCase): 11 | 12 | def test_basic(self): 13 | methods = [PageRank(), Closeness(), HITS(), Katz()] 14 | for adjacency in [test_graph(), test_digraph()]: 15 | n = adjacency.shape[0] 16 | for method in methods: 17 | score = method.fit_predict(adjacency) 18 | self.assertEqual(score.shape, (n, )) 19 | self.assertTrue(min(score) >= 0) 20 | 21 | def test_bipartite(self): 22 | biadjacency = test_bigraph() 23 | n_row, n_col = biadjacency.shape 24 | 25 | methods = [PageRank(), HITS(), Katz()] 26 | for method in methods: 27 | method.fit(biadjacency) 28 | scores_row = method.scores_row_ 29 | scores_col = method.scores_col_ 30 | 31 | self.assertEqual(scores_row.shape, (n_row,)) 32 | self.assertEqual(scores_col.shape, (n_col,)) 33 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_betweenness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for betweenness.py""" 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from sknetwork.ranking.betweenness import Betweenness 9 | from sknetwork.data.test_graphs import test_graph, test_disconnected_graph 10 | from sknetwork.data.toy_graphs import bow_tie, star_wars 11 | 12 | 13 | class TestBetweenness(unittest.TestCase): 14 | 15 | def test_basic(self): 16 | adjacency = test_graph() 17 | betweenness = Betweenness() 18 | scores = betweenness.fit_predict(adjacency) 19 | self.assertEqual(len(scores), adjacency.shape[0]) 20 | 21 | def test_bowtie(self): 22 | adjacency = bow_tie() 23 | betweenness = Betweenness() 24 | scores = betweenness.fit_predict(adjacency) 25 | self.assertEqual(np.sum(scores > 0), 1) 26 | 27 | def test_disconnected(self): 28 | adjacency = test_disconnected_graph() 29 | betweenness = Betweenness() 30 | with self.assertRaises(ValueError): 31 | betweenness.fit(adjacency) 32 | 33 | def test_bipartite(self): 34 | adjacency = star_wars() 35 | betweenness = Betweenness() 36 | 37 | with self.assertRaises(ValueError): 38 | betweenness.fit_predict(adjacency) 39 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_closeness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for closeness.py""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.ranking.closeness import Closeness 9 | 10 | 11 | class TestDiffusion(unittest.TestCase): 12 | 13 | def test_params(self): 14 | with self.assertRaises(ValueError): 15 | adjacency = test_graph() 16 | Closeness(method='toto').fit(adjacency) 17 | 18 | def test_parallel(self): 19 | adjacency = test_graph() 20 | n = adjacency.shape[0] 21 | 22 | closeness = Closeness(method='approximate') 23 | scores = closeness.fit_predict(adjacency) 24 | self.assertEqual(scores.shape, (n,)) 25 | 26 | def test_disconnected(self): 27 | adjacency = test_disconnected_graph() 28 | closeness = Closeness() 29 | with self.assertRaises(ValueError): 30 | closeness.fit(adjacency) 31 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_hits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for his.py""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import test_bigraph 8 | from sknetwork.ranking import HITS 9 | 10 | 11 | class TestHITS(unittest.TestCase): 12 | 13 | def test_keywords(self): 14 | biadjacency = test_bigraph() 15 | n_row, n_col = biadjacency.shape 16 | 17 | hits = HITS() 18 | hits.fit(biadjacency) 19 | self.assertEqual(hits.scores_row_.shape, (n_row,)) 20 | self.assertEqual(hits.scores_col_.shape, (n_col,)) 21 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_pagerank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for pagerank.py""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.data.models import cyclic_digraph 10 | from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph 11 | from sknetwork.ranking.pagerank import PageRank 12 | 13 | 14 | class TestPageRank(unittest.TestCase): 15 | 16 | def setUp(self) -> None: 17 | """Cycle graph for tests.""" 18 | self.n = 5 19 | self.adjacency = cyclic_digraph(self.n) 20 | self.truth = np.ones(self.n) / self.n 21 | 22 | def test_params(self): 23 | with self.assertRaises(ValueError): 24 | PageRank(damping_factor=1789) 25 | 26 | def test_solvers(self): 27 | for solver in ['piteration', 'lanczos', 'bicgstab', 'RH']: 28 | pagerank = PageRank(solver=solver) 29 | scores = pagerank.fit_predict(self.adjacency) 30 | self.assertAlmostEqual(0, np.linalg.norm(scores - self.truth)) 31 | with self.assertRaises(ValueError): 32 | PageRank(solver='toto').fit_predict(self.adjacency) 33 | 34 | def test_seeding(self): 35 | pagerank = PageRank() 36 | seeds_array = np.zeros(self.n) 37 | seeds_array[0] = 1. 38 | seeds_dict = {0: 1} 39 | 40 | scores1 = pagerank.fit_predict(self.adjacency, seeds_array) 41 | scores2 = pagerank.fit_predict(self.adjacency, seeds_dict) 42 | self.assertAlmostEqual(np.linalg.norm(scores1 - scores2), 0.) 43 | 44 | def test_input(self): 45 | pagerank = PageRank() 46 | scores = pagerank.fit_predict(self.adjacency, force_bipartite=True) 47 | self.assertEqual(len(scores), len(pagerank.scores_col_)) 48 | 49 | def test_damping(self): 50 | pagerank = PageRank(damping_factor=0.99) 51 | scores = pagerank.fit_predict(self.adjacency) 52 | self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.) 53 | 54 | pagerank = PageRank(damping_factor=0.01) 55 | scores = pagerank.fit_predict(self.adjacency) 56 | self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.) 57 | 58 | def test_bigraph(self): 59 | pagerank = PageRank() 60 | for adjacency in [test_graph(), test_digraph(), test_bigraph()]: 61 | pagerank.fit(adjacency, weights_col={0: 1}) 62 | self.assertAlmostEqual(np.linalg.norm(pagerank.scores_col_ - pagerank.predict(columns=True)), 0.) 63 | -------------------------------------------------------------------------------- /sknetwork/ranking/tests/test_postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for postprocessing""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.ranking.postprocess import top_k 10 | 11 | 12 | class TestPostprocessing(unittest.TestCase): 13 | 14 | def test_top_k(self): 15 | scores = np.arange(10) 16 | index = top_k(scores, 3) 17 | self.assertTrue(set(index) == {7, 8, 9}) 18 | index = top_k(scores, 10) 19 | self.assertTrue(len(index) == 10) 20 | index = top_k(scores, 20) 21 | self.assertTrue(len(index) == 10) 22 | scores = [3, 1, 6, 2] 23 | index = top_k(scores, 2) 24 | self.assertTrue(set(index) == {0, 2}) 25 | index = top_k(scores, 2, sort=True) 26 | self.assertTrue(list(index) == [2, 0]) 27 | -------------------------------------------------------------------------------- /sknetwork/regression/__init__.py: -------------------------------------------------------------------------------- 1 | """regression module""" 2 | from sknetwork.regression.base import BaseRegressor 3 | from sknetwork.regression.diffusion import Diffusion, Dirichlet 4 | 5 | -------------------------------------------------------------------------------- /sknetwork/regression/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on April 2022 5 | @author: Thomas Bonald 6 | """ 7 | from abc import ABC 8 | 9 | import numpy as np 10 | 11 | from sknetwork.base import Algorithm 12 | 13 | 14 | class BaseRegressor(Algorithm, ABC): 15 | """Base class for regression algorithms. 16 | 17 | Attributes 18 | ---------- 19 | values_ : np.ndarray 20 | Value of each node. 21 | values_row_: np.ndarray 22 | Values of rows, for bipartite graphs. 23 | values_col_: np.ndarray 24 | Values of columns, for bipartite graphs. 25 | """ 26 | def __init__(self): 27 | self.values_ = None 28 | 29 | def predict(self, columns: bool = False) -> np.ndarray: 30 | """Return the values predicted by the algorithm. 31 | 32 | Parameters 33 | ---------- 34 | columns : bool 35 | If ``True``, return the prediction for columns. 36 | 37 | Returns 38 | ------- 39 | values : np.ndarray 40 | Values. 41 | """ 42 | if columns: 43 | return self.values_col_ 44 | return self.values_ 45 | 46 | def fit_predict(self, *args, **kwargs) -> np.ndarray: 47 | """Fit algorithm to data and return the values. Same parameters as the ``fit`` method. 48 | 49 | Returns 50 | ------- 51 | values : np.ndarray 52 | Values. 53 | """ 54 | self.fit(*args, **kwargs) 55 | return self.values_ 56 | 57 | def _split_vars(self, shape): 58 | n_row = shape[0] 59 | self.values_row_ = self.values_[:n_row] 60 | self.values_col_ = self.values_[n_row:] 61 | self.values_ = self.values_row_ 62 | -------------------------------------------------------------------------------- /sknetwork/regression/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for regression""" 2 | -------------------------------------------------------------------------------- /sknetwork/regression/tests/test_API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for regression API""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import test_bigraph, test_graph, test_digraph 7 | from sknetwork.regression import * 8 | 9 | 10 | class TestAPI(unittest.TestCase): 11 | 12 | def test_basic(self): 13 | methods = [Diffusion(), Dirichlet()] 14 | for adjacency in [test_graph(), test_digraph()]: 15 | n = adjacency.shape[0] 16 | for method in methods: 17 | score = method.fit_predict(adjacency) 18 | self.assertEqual(score.shape, (n, )) 19 | self.assertTrue(min(score) >= 0) 20 | 21 | def test_bipartite(self): 22 | biadjacency = test_bigraph() 23 | n_row, n_col = biadjacency.shape 24 | 25 | methods = [Diffusion(), Dirichlet()] 26 | for method in methods: 27 | method.fit(biadjacency) 28 | values_row = method.values_row_ 29 | values_col = method.values_col_ 30 | 31 | self.assertEqual(values_row.shape, (n_row,)) 32 | self.assertEqual(values_col.shape, (n_col,)) 33 | -------------------------------------------------------------------------------- /sknetwork/regression/tests/test_diffusion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for diffusion.py""" 4 | 5 | import unittest 6 | 7 | from sknetwork.data.test_graphs import * 8 | from sknetwork.regression import Diffusion, Dirichlet 9 | 10 | 11 | # noinspection DuplicatedCode 12 | class TestDiffusion(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.algos = [Diffusion(), Dirichlet()] 16 | 17 | def test_predict(self): 18 | adjacency = test_graph() 19 | for algo in self.algos: 20 | values = algo.fit_predict(adjacency, {0: 0, 1: 1, 2: 0.5}) 21 | values_ = algo.predict() 22 | self.assertAlmostEqual(np.linalg.norm(values - values_), 0) 23 | 24 | def test_no_iter(self): 25 | with self.assertRaises(ValueError): 26 | Diffusion(n_iter=-1) 27 | 28 | def test_single_node_graph(self): 29 | for algo in self.algos: 30 | algo.fit(sparse.identity(1, format='csr'), {0: 1}) 31 | self.assertEqual(algo.values_, [1]) 32 | 33 | def test_range(self): 34 | for adjacency in [test_graph(), test_digraph()]: 35 | for algo in self.algos: 36 | values = algo.fit_predict(adjacency, {0: 0, 1: 1, 2: 0.5}) 37 | self.assertTrue(np.all(values <= 1) and np.all(values >= 0)) 38 | 39 | biadjacency = test_bigraph() 40 | for algo in [Diffusion(), Dirichlet()]: 41 | values = algo.fit_predict(biadjacency, values_row={0: 1}) 42 | self.assertTrue(np.all(values <= 1) and np.all(values >= 0)) 43 | values = algo.fit_predict(biadjacency, values_row={0: 0.1}, values_col={1: 2}, init=0.3) 44 | self.assertTrue(np.all(values <= 2) and np.all(values >= 0.1)) 45 | self.assertAlmostEqual(np.linalg.norm(algo.values_col_ - algo.predict(columns=True)), 0) 46 | 47 | def test_initial_state(self): 48 | for adjacency in [test_graph(), test_digraph()]: 49 | for algo in self.algos: 50 | values = algo.fit_predict(adjacency, {0: 0, 1: 1, 2: 0.5}, 0.3) 51 | self.assertTrue(np.all(values <= 1) and np.all(values >= 0)) 52 | 53 | def test_n_iter(self): 54 | with self.assertRaises(ValueError): 55 | Dirichlet(n_iter=0) 56 | 57 | -------------------------------------------------------------------------------- /sknetwork/sknetwork.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Main module.""" 4 | -------------------------------------------------------------------------------- /sknetwork/test_base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for base.py""" 4 | import unittest 5 | 6 | from sknetwork.base import Algorithm 7 | 8 | 9 | class TestBase(unittest.TestCase): 10 | 11 | def setUp(self): 12 | class NewAlgo(Algorithm): 13 | """Docstring""" 14 | def __init__(self, param: int, name: str): 15 | self.param = param 16 | self.name = name 17 | 18 | def fit(self): 19 | """Docstring""" 20 | pass 21 | self.algo = NewAlgo(1, 'abc') 22 | 23 | def test_repr(self): 24 | self.assertEqual(repr(self.algo), "NewAlgo(param=1, name='abc')") 25 | 26 | def test_get_params(self): 27 | self.assertEqual(len(self.algo.get_params()), 2) 28 | 29 | def test_set_params(self): 30 | self.algo.set_params({'param': 3}) 31 | self.assertEqual(self.algo.param, 3) 32 | 33 | def test_fit(self): 34 | stub = Algorithm() 35 | self.assertRaises(NotImplementedError, stub.fit, None) 36 | -------------------------------------------------------------------------------- /sknetwork/test_log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for verbose.py""" 4 | 5 | import unittest 6 | 7 | from sknetwork.log import Log 8 | 9 | 10 | class TestVerbose(unittest.TestCase): 11 | 12 | def test_prints(self): 13 | logger = Log(verbose=True) 14 | logger.print_log('Hello', 42) 15 | self.assertEqual(str(logger.log), 'Hello 42\n') 16 | -------------------------------------------------------------------------------- /sknetwork/topology/__init__.py: -------------------------------------------------------------------------------- 1 | """Module on topology.""" 2 | from sknetwork.topology.cliques import count_cliques 3 | from sknetwork.topology.core import get_core_decomposition 4 | from sknetwork.topology.triangles import count_triangles, get_clustering_coefficient 5 | from sknetwork.topology.structure import is_connected, is_bipartite, is_symmetric, get_connected_components, \ 6 | get_largest_connected_component 7 | from sknetwork.topology.cycles import is_acyclic, get_cycles, break_cycles 8 | from sknetwork.topology.weisfeiler_lehman import color_weisfeiler_lehman, are_isomorphic 9 | -------------------------------------------------------------------------------- /sknetwork/topology/core.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: language_level=3 3 | """ 4 | Created in June 2020 5 | @author: Julien Simonnet 6 | @author: Yohann Robert 7 | """ 8 | cimport cython 9 | 10 | from typing import Union 11 | 12 | import numpy as np 13 | cimport numpy as np 14 | from scipy import sparse 15 | 16 | from sknetwork.utils.check import check_format 17 | from sknetwork.topology.minheap cimport MinHeap 18 | 19 | 20 | @cython.boundscheck(False) 21 | @cython.wraparound(False) 22 | cdef compute_core(int[:] indptr, int[:] indices): 23 | """Compute the core value of each node. 24 | 25 | Parameters 26 | ---------- 27 | indptr : 28 | CSR format index array of the adjacency matrix. 29 | indices : 30 | CSR format index pointer array of the adjacency matrix. 31 | 32 | Returns 33 | ------- 34 | labels : 35 | Core value of each node. 36 | """ 37 | cdef int n = indptr.shape[0] - 1 38 | cdef int core_value = 0 # current/max core value of the graph 39 | cdef int min_node # current node of minimum degree 40 | cdef int i, j, k 41 | cdef int[:] degrees = np.asarray(indptr)[1:] - np.asarray(indptr)[:n] 42 | cdef np.ndarray[int, ndim=1] labels = np.empty((n,), dtype=np.int32) 43 | cdef MinHeap mh = MinHeap.__new__(MinHeap, n) # minimum heap with an update system 44 | 45 | # insert all nodes in the heap 46 | for i in range(n): 47 | mh.insert_key(i, degrees) 48 | 49 | i = n - 1 50 | while not mh.empty(): 51 | min_node = mh.pop_min(degrees) 52 | core_value = max(core_value, degrees[min_node]) 53 | 54 | # decrease the degree of each neighbor of min_node 55 | for k in range(indptr[min_node], indptr[min_node+1]): 56 | j = indices[k] 57 | degrees[j] -= 1 58 | mh.decrease_key(j, degrees) # update the heap to take the new degree into account 59 | 60 | labels[min_node] = core_value 61 | i -= 1 62 | 63 | return np.asarray(labels) 64 | 65 | 66 | def get_core_decomposition(adjacency: Union[np.ndarray, sparse.csr_matrix]) -> np.ndarray: 67 | """Get the k-core decomposition of a graph. 68 | 69 | Parameters 70 | ---------- 71 | adjacency : 72 | Adjacency matrix of the graph. 73 | 74 | Returns 75 | ------- 76 | core_values : 77 | Core value of each node. 78 | 79 | Example 80 | ------- 81 | >>> from sknetwork.data import karate_club 82 | >>> adjacency = karate_club() 83 | >>> core_values = get_core_decomposition(adjacency) 84 | >>> len(core_values) 85 | 34 86 | """ 87 | adjacency = check_format(adjacency, allow_empty=True) 88 | indptr = adjacency.indptr 89 | indices = adjacency.indices 90 | return compute_core(indptr, indices) 91 | -------------------------------------------------------------------------------- /sknetwork/topology/minheap.pxd: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: language_level=3 3 | """ 4 | Created in June 2020 5 | @author: Julien Simonnet 6 | @author: Yohann Robert 7 | """ 8 | from libcpp.vector cimport vector 9 | 10 | cdef class MinHeap: 11 | 12 | cdef vector[int] val, pos 13 | cdef int size 14 | 15 | cdef int pop_min(self, int[:] scores) 16 | cdef bint empty(self) 17 | cdef void swap(self, int x, int y) 18 | cdef void insert_key(self, int k, int[:] scores) 19 | cdef void decrease_key(self, int i, int[:] scores) 20 | cdef void min_heapify(self, int i, int[:] scores) 21 | -------------------------------------------------------------------------------- /sknetwork/topology/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for topology""" 2 | -------------------------------------------------------------------------------- /sknetwork/topology/tests/test_cliques.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for cliques""" 4 | import unittest 5 | 6 | from scipy.special import comb 7 | 8 | from sknetwork.data.test_graphs import * 9 | from sknetwork.topology.cliques import count_cliques 10 | 11 | 12 | class TestClique(unittest.TestCase): 13 | 14 | def test_empty(self): 15 | adjacency = test_graph_empty() 16 | self.assertEqual(count_cliques(adjacency), 0) 17 | with self.assertRaises(ValueError): 18 | count_cliques(adjacency, 1) 19 | 20 | def test_disconnected(self): 21 | adjacency = test_disconnected_graph() 22 | self.assertEqual(count_cliques(adjacency), 1) 23 | 24 | def test_cliques(self): 25 | adjacency = test_clique() 26 | n = adjacency.shape[0] 27 | self.assertEqual(count_cliques(adjacency), comb(n, 3, exact=True)) 28 | self.assertEqual(count_cliques(adjacency, 4), comb(n, 4, exact=True)) 29 | -------------------------------------------------------------------------------- /sknetwork/topology/tests/test_core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for k-core decomposition""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.topology.core import get_core_decomposition 8 | 9 | 10 | class TestCoreDecomposition(unittest.TestCase): 11 | 12 | def test_empty(self): 13 | adjacency = test_graph_empty() 14 | self.assertEqual(max(get_core_decomposition(adjacency)), 0) 15 | 16 | def test_cliques(self): 17 | adjacency = test_clique() 18 | n = adjacency.shape[0] 19 | self.assertEqual(max(get_core_decomposition(adjacency)), n - 1) 20 | -------------------------------------------------------------------------------- /sknetwork/topology/tests/test_triangles.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for triangle counting""" 4 | 5 | import unittest 6 | 7 | from scipy.special import comb 8 | 9 | from sknetwork.data import karate_club 10 | from sknetwork.data.parse import from_edge_list 11 | from sknetwork.data.test_graphs import * 12 | from sknetwork.topology.triangles import count_triangles, get_clustering_coefficient 13 | 14 | 15 | class TestTriangle(unittest.TestCase): 16 | 17 | def test_empty(self): 18 | adjacency = test_graph_empty() 19 | self.assertEqual(count_triangles(adjacency), 0) 20 | 21 | def test_disconnected(self): 22 | adjacency = test_disconnected_graph() 23 | self.assertEqual(count_triangles(adjacency), 1) 24 | 25 | def test_cliques(self): 26 | adjacency = test_clique() 27 | n = adjacency.shape[0] 28 | self.assertEqual(count_triangles(adjacency), comb(n, 3, exact=True)) 29 | 30 | def test_clustering_coefficient(self): 31 | edges = [(0, 1), (1, 2), (2, 3), (3, 0), (0, 2)] 32 | adjacency = from_edge_list(edges, directed=False, matrix_only=True) 33 | self.assertEqual(0.75, get_clustering_coefficient(adjacency)) 34 | 35 | def test_options(self): 36 | adjacency = karate_club() 37 | self.assertEqual(count_triangles(adjacency, parallelize=False), 45) 38 | self.assertEqual(count_triangles(adjacency, parallelize=True), 45) 39 | -------------------------------------------------------------------------------- /sknetwork/topology/tests/test_wl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for Weisfeiler-Lehman""" 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from sknetwork.data import house, bow_tie, linear_graph 9 | from sknetwork.data.test_graphs import * 10 | from sknetwork.topology import color_weisfeiler_lehman, are_isomorphic 11 | 12 | 13 | class TestWLKernel(unittest.TestCase): 14 | 15 | def test_isomorphism(self): 16 | ref = house() 17 | n = ref.shape[0] 18 | 19 | adjacency = house() 20 | reorder = list(range(n)) 21 | np.random.shuffle(reorder) 22 | adjacency = adjacency[reorder][:, reorder] 23 | self.assertTrue(are_isomorphic(ref, adjacency)) 24 | 25 | adjacency = bow_tie() 26 | self.assertFalse(are_isomorphic(ref, adjacency)) 27 | 28 | adjacency = linear_graph(n) 29 | self.assertFalse(are_isomorphic(ref, adjacency)) 30 | 31 | adjacency = linear_graph(n + 1) 32 | self.assertFalse(are_isomorphic(ref, adjacency)) 33 | 34 | 35 | class TestWLColoring(unittest.TestCase): 36 | 37 | def test_empty(self): 38 | adjacency = test_graph_empty() 39 | labels = color_weisfeiler_lehman(adjacency) 40 | self.assertTrue((labels == np.zeros(10)).all()) 41 | 42 | def test_cliques(self): 43 | adjacency = test_clique() 44 | labels = color_weisfeiler_lehman(adjacency) 45 | self.assertTrue((labels == np.zeros(10)).all()) 46 | 47 | def test_house(self): 48 | adjacency = house() 49 | labels = color_weisfeiler_lehman(adjacency) 50 | self.assertTrue((labels == np.array([0, 2, 1, 1, 2])).all()) 51 | 52 | def test_bow_tie(self): 53 | adjacency = bow_tie() 54 | labels = color_weisfeiler_lehman(adjacency) 55 | self.assertTrue((labels == np.array([1, 0, 0, 0, 0])).all()) 56 | 57 | def test_iso(self): 58 | adjacency = house() 59 | n = adjacency.indptr.shape[0] - 1 60 | reorder = list(range(n)) 61 | np.random.shuffle(reorder) 62 | adjacency2 = adjacency[reorder][:, reorder] 63 | l1 = color_weisfeiler_lehman(adjacency) 64 | l2 = color_weisfeiler_lehman(adjacency2) 65 | l1.sort() 66 | l2.sort() 67 | self.assertTrue((l1 == l2).all()) 68 | 69 | def test_early_stop(self): 70 | adjacency = house() 71 | labels = color_weisfeiler_lehman(adjacency, max_iter=1) 72 | self.assertTrue((labels == np.array([0, 1, 0, 0, 1])).all()) 73 | -------------------------------------------------------------------------------- /sknetwork/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """utils module""" 2 | from sknetwork.data import * 3 | from sknetwork.utils.check import is_symmetric 4 | from sknetwork.utils.format import * 5 | from sknetwork.utils.membership import get_membership, from_membership 6 | from sknetwork.utils.neighbors import get_neighbors, get_degrees, get_weights 7 | from sknetwork.utils.tfidf import get_tfidf 8 | -------------------------------------------------------------------------------- /sknetwork/utils/membership.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in July 2019 5 | @author: Nathan de Lara 6 | @author: Thomas Bonald 7 | """ 8 | from typing import Optional 9 | 10 | import numpy as np 11 | from scipy import sparse 12 | 13 | from sknetwork.utils.neighbors import get_degrees 14 | 15 | 16 | def get_membership(labels: np.ndarray, dtype=bool, n_labels: Optional[int] = None) -> sparse.csr_matrix: 17 | """Build the binary matrix of the label assignments, of shape n_samples x n_labels. 18 | Negative labels are ignored. 19 | 20 | Parameters 21 | ---------- 22 | labels : 23 | Label of each node (integers). 24 | dtype : 25 | Type of the output. Boolean by default. 26 | n_labels : int 27 | Number of labels. 28 | 29 | Returns 30 | ------- 31 | membership : sparse.csr_matrix 32 | Binary matrix of label assignments. 33 | 34 | Example 35 | ------- 36 | >>> from sknetwork.utils import get_membership 37 | >>> labels = np.array([0, 0, 1, 2]) 38 | >>> membership = get_membership(labels) 39 | >>> membership.toarray().astype(int) 40 | array([[1, 0, 0], 41 | [1, 0, 0], 42 | [0, 1, 0], 43 | [0, 0, 1]]) 44 | """ 45 | n: int = len(labels) 46 | if n_labels is None: 47 | shape = (n, max(labels)+1) 48 | else: 49 | shape = (n, n_labels) 50 | ix = (labels >= 0) 51 | data = np.ones(ix.sum()) 52 | row = np.arange(n)[ix] 53 | col = labels[ix] 54 | return sparse.csr_matrix((data, (row, col)), shape=shape, dtype=dtype) 55 | 56 | 57 | def from_membership(membership: sparse.csr_matrix) -> np.ndarray: 58 | """Get the labels from a membership matrix (n_samples x n_labels). 59 | Samples without label get -1. 60 | 61 | Parameters 62 | ---------- 63 | membership : 64 | Membership matrix. 65 | 66 | Returns 67 | ------- 68 | labels : np.ndarray 69 | Labels (columns indices of the membership matrix). 70 | Example 71 | ------- 72 | >>> from scipy import sparse 73 | >>> from sknetwork.utils import from_membership 74 | >>> membership = sparse.eye(3).tocsr() 75 | >>> labels = from_membership(membership) 76 | >>> labels 77 | array([0, 1, 2]) 78 | """ 79 | mask = get_degrees(membership) > 0 80 | labels = -np.ones(membership.shape[0], dtype=int) 81 | labels[mask] = membership.indices 82 | return labels 83 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for utils""" 2 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/test_format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for format.py""" 4 | import unittest 5 | 6 | from sknetwork.data.test_graphs import * 7 | from sknetwork.utils.format import * 8 | 9 | 10 | class TestFormats(unittest.TestCase): 11 | 12 | def setUp(self): 13 | """Basic biadjacency for tests.""" 14 | self.biadjacency = test_bigraph() 15 | 16 | def test_directed2undirected(self): 17 | adjacency = test_digraph() 18 | ref = directed2undirected(adjacency) 19 | self.assertEqual(ref.shape, adjacency.shape) 20 | self.assertTrue(is_symmetric(ref)) 21 | 22 | adjacency = test_graph().astype(bool) 23 | n = adjacency.shape[0] 24 | diff = directed2undirected(adjacency, weighted=False) - adjacency 25 | self.assertEqual(diff.nnz, 0) 26 | 27 | slr = SparseLR(adjacency, [(np.zeros(n), np.zeros(n))]) 28 | self.assertRaises(ValueError, directed2undirected, slr, weighted=False) 29 | slr = 0.5 * directed2undirected(slr) 30 | self.assertEqual(slr.shape, (n, n)) 31 | 32 | x = np.random.randn(n) 33 | error = np.linalg.norm(slr.dot(x) - adjacency.dot(x)) 34 | self.assertAlmostEqual(error, 0) 35 | 36 | def test_bipartite2directed(self): 37 | n_row, n_col = self.biadjacency.shape 38 | n = n_row + n_col 39 | 40 | directed_graph = bipartite2directed(self.biadjacency) 41 | self.assertEqual(directed_graph.shape, (n, n)) 42 | 43 | slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))]) 44 | directed_graph = bipartite2directed(slr) 45 | self.assertTrue(type(directed_graph) == SparseLR) 46 | 47 | def test_bipartite2undirected(self): 48 | n_row, n_col = self.biadjacency.shape 49 | n = n_row + n_col 50 | 51 | undirected_graph = bipartite2undirected(self.biadjacency) 52 | self.assertEqual(undirected_graph.shape, (n, n)) 53 | self.assertTrue(is_symmetric(undirected_graph)) 54 | 55 | slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))]) 56 | undirected_graph = bipartite2undirected(slr) 57 | self.assertTrue(type(undirected_graph) == SparseLR) 58 | 59 | def test_check(self): 60 | with self.assertRaises(ValueError): 61 | check_format(sparse.csr_matrix((3, 4)), allow_empty=False) 62 | adjacency = check_format(np.array([[0, 2], [2, 3]])) 63 | self.assertTrue(adjacency.shape == (2, 2)) 64 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/test_membership.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in July 2022 5 | @author: Thomas Bonald 6 | """ 7 | import unittest 8 | 9 | import numpy as np 10 | 11 | from sknetwork.utils.membership import get_membership, from_membership 12 | 13 | 14 | class TestMembership(unittest.TestCase): 15 | 16 | def test_membership(self): 17 | labels = np.array([0, 0, 1, 2, 1, 1]) 18 | membership = get_membership(labels) 19 | self.assertEqual(membership.nnz, 6) 20 | self.assertEqual(np.linalg.norm(labels - from_membership(membership)), 0) 21 | labels = np.array([0, 0, 1, 2, 1, -1]) 22 | membership = get_membership(labels) 23 | self.assertEqual(membership.nnz, 5) 24 | self.assertEqual(np.linalg.norm(labels - from_membership(membership)), 0) 25 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/test_neighbors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on December 2020 5 | @author: Thomas Bonald 6 | """ 7 | import unittest 8 | 9 | import numpy as np 10 | from numpy.linalg import norm 11 | 12 | from sknetwork.data import karate_club, painters 13 | from sknetwork.utils import get_neighbors, get_degrees, get_weights 14 | 15 | 16 | class TestNeighbors(unittest.TestCase): 17 | 18 | def test_graph(self): 19 | adjacency = karate_club() 20 | neighbors = get_neighbors(adjacency, 5) 21 | degrees = get_degrees(adjacency) 22 | neighbors_true = np.array([0, 6, 10, 16]) 23 | self.assertEqual(norm(neighbors - neighbors_true), 0) 24 | self.assertEqual(degrees[5], 4) 25 | 26 | def test_digraph(self): 27 | adjacency = painters() 28 | neighbors = get_neighbors(adjacency, 0) 29 | out_degrees = get_degrees(adjacency) 30 | out_weights = get_weights(adjacency) 31 | neighbors_true = np.array([3, 10]) 32 | self.assertEqual(norm(neighbors - neighbors_true), 0) 33 | self.assertEqual(out_degrees[0], 2) 34 | self.assertEqual(out_weights[0], 2) 35 | neighbors = get_neighbors(adjacency, 0, transpose=True) 36 | in_degrees = get_degrees(adjacency, transpose=True) 37 | in_weights = get_weights(adjacency, transpose=True) 38 | neighbors_true = np.array([3, 6, 8, 10, 11]) 39 | self.assertEqual(norm(neighbors - neighbors_true), 0) 40 | self.assertEqual(in_degrees[0], 5) 41 | self.assertEqual(in_weights[0], 5) 42 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/test_tfidf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Tests for tfidf.py""" 4 | import unittest 5 | 6 | import numpy as np 7 | from scipy import sparse 8 | 9 | from sknetwork.utils.tfidf import get_tfidf 10 | 11 | 12 | class TestTFIDF(unittest.TestCase): 13 | 14 | def test_tfidf(self): 15 | count = sparse.csr_matrix(np.array([[0, 1, 2], [0, 2, 1], [0, 0, 1]])) 16 | tfidf = get_tfidf(count) 17 | self.assertEqual(count.shape, tfidf.shape) 18 | self.assertEqual(tfidf.nnz, 2) 19 | -------------------------------------------------------------------------------- /sknetwork/utils/tests/test_values.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for values.py""" 4 | 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from sknetwork.utils.values import get_values, stack_values, values2prob 10 | 11 | 12 | class TestValues(unittest.TestCase): 13 | 14 | def test_get_values(self): 15 | n = 10 16 | labels_array = -np.ones(n) 17 | labels_array[:2] = np.arange(2) 18 | labels_dict = {0: 0, 1: 1} 19 | labels_array = get_values((n,), labels_array) 20 | labels_ = get_values((n,), labels_dict) 21 | self.assertTrue(np.allclose(labels_array, labels_)) 22 | with self.assertRaises(ValueError): 23 | get_values((5,), labels_array) 24 | self.assertRaises(TypeError, get_values, 'toto', 3) 25 | with self.assertWarns(Warning): 26 | labels_dict[0] = -1 27 | get_values((n,), labels_dict) 28 | 29 | def test_values2probs(self): 30 | n = 4 31 | values_array = np.array([0, 1, -1, 0]) 32 | values_dict = {0: 0, 1: 1, 3: 0} 33 | 34 | probs1 = values2prob(n, values_array) 35 | probs2 = values2prob(n, values_dict) 36 | self.assertTrue(np.allclose(probs1, probs2)) 37 | 38 | bad_input = np.array([0, 0, -1, 0]) 39 | with self.assertRaises(ValueError): 40 | values2prob(n, bad_input) 41 | 42 | def test_stack_values(self): 43 | shape = 4, 3 44 | values_row_array = np.array([0, 1, -1, 0]) 45 | values_row_dict = {0: 0, 1: 1, 3: 0} 46 | values_col_array = np.array([0, 1, -1]) 47 | values_col_dict = {0: 0, 1: 1} 48 | 49 | values1 = stack_values(shape, values_row_array, values_col_array) 50 | values2 = stack_values(shape, values_row_dict, values_col_dict) 51 | values3 = stack_values(shape, values_row_array, values_col_dict) 52 | values4 = stack_values(shape, values_row_dict, values_col_array) 53 | 54 | self.assertTrue(np.allclose(values1, values2)) 55 | self.assertTrue(np.allclose(values2, values3)) 56 | self.assertTrue(np.allclose(values3, values4)) 57 | 58 | values1 = stack_values(shape, values_row_array, None) 59 | values2 = stack_values(shape, values_row_dict, None) 60 | 61 | self.assertTrue(np.allclose(values1, values2)) 62 | 63 | values1 = stack_values(shape, None, values_col_array) 64 | values2 = stack_values(shape, None, values_col_dict) 65 | 66 | self.assertTrue(np.allclose(values1, values2)) 67 | -------------------------------------------------------------------------------- /sknetwork/utils/tfidf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in February 2023 5 | @author: Thomas Bonald 6 | """ 7 | import numpy as np 8 | from scipy import sparse 9 | 10 | from sknetwork.linalg import normalize 11 | from sknetwork.utils import get_degrees 12 | 13 | 14 | def get_tfidf(count_matrix: sparse.csr_matrix): 15 | """Get the tf-idf from a count matrix in sparse format. 16 | 17 | Parameters 18 | ---------- 19 | count_matrix : sparse.csr_matrix 20 | Count matrix, shape (n_documents, n_words). 21 | 22 | Returns 23 | ------- 24 | tf_idf : sparse.csr_matrix 25 | tf-idf matrix, shape (n_documents, n_words). 26 | 27 | References 28 | ---------- 29 | https://en.wikipedia.org/wiki/Tfidf 30 | """ 31 | n_documents, n_words = count_matrix.shape 32 | tf = normalize(count_matrix) 33 | freq = get_degrees(count_matrix > 0, transpose=True) 34 | idf = np.zeros(n_words) 35 | idf[freq > 0] = np.log(n_documents / freq[freq > 0]) 36 | tf_idf = tf.dot(sparse.diags(idf)) 37 | return tf_idf 38 | -------------------------------------------------------------------------------- /sknetwork/utils/values.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created in April 2019 5 | @author: Nathan de Lara 6 | """ 7 | import warnings 8 | from typing import Optional, Union 9 | 10 | import numpy as np 11 | 12 | 13 | def get_values(shape: tuple, values: Union[np.ndarray, list, dict], default_value: float = -1) -> np.ndarray: 14 | """Get values as array.""" 15 | n = shape[0] 16 | if isinstance(values, list): 17 | values = np.array(values) 18 | if isinstance(values, np.ndarray): 19 | if len(values) != n: 20 | raise ValueError('Dimensions mismatch between adjacency and values.') 21 | else: 22 | values = values.astype(float) 23 | elif isinstance(values, dict): 24 | keys, values_ = np.array(list(values.keys())), np.array(list(values.values())) 25 | if np.min(values_) < 0: 26 | warnings.warn(Warning("Negative values will not be taken into account.")) 27 | values = default_value * np.ones(n) 28 | values[keys] = values_ 29 | else: 30 | values = np.ones(n) 31 | return values 32 | 33 | 34 | def stack_values(shape: tuple, values_row: Optional[Union[np.ndarray, list, dict]], 35 | values_col: Optional[Union[np.ndarray, list, dict]] = None, default_value: float = -1) -> np.ndarray: 36 | """Process values for rows and columns and stack the results into a single vector.""" 37 | n_row, n_col = shape 38 | if values_row is None and values_col is None: 39 | values_row = np.ones(n_row) 40 | values_col = default_value * np.ones(n_col) 41 | elif values_row is None: 42 | values_row = default_value * np.ones(n_row) 43 | elif values_col is None: 44 | values_col = default_value * np.ones(n_col) 45 | values_row = get_values(shape, values_row, default_value) 46 | values_col = get_values((n_col,), values_col, default_value) 47 | return np.hstack((values_row, values_col)) 48 | 49 | 50 | def values2prob(n: int, values: np.ndarray = None) -> np.ndarray: 51 | """Transform seed values into probability vector. 52 | 53 | Parameters 54 | ---------- 55 | n : int 56 | Number of nodes. 57 | values : 58 | If ``None``, the uniform distribution is used. 59 | Otherwise, a non-negative, non-zero vector or a dictionary must be provided. 60 | 61 | Returns 62 | ------- 63 | probs: np.ndarray 64 | A probability vector. 65 | """ 66 | if values is None: 67 | return np.ones(n) / n 68 | else: 69 | values = get_values((n,), values) 70 | probs = np.zeros_like(values, dtype=float) 71 | ix = (values > 0) 72 | probs[ix] = values[ix] 73 | if probs.sum() > 0: 74 | return probs / probs.sum() 75 | else: 76 | raise ValueError('At least one value must be positive.') 77 | -------------------------------------------------------------------------------- /sknetwork/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualization module.""" 2 | 3 | from sknetwork.visualization.dendrograms import visualize_dendrogram, svg_dendrogram 4 | from sknetwork.visualization.graphs import visualize_graph, visualize_bigraph, svg_graph, svg_bigraph 5 | -------------------------------------------------------------------------------- /sknetwork/visualization/colors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on April 2020 5 | @authors: 6 | Thomas Bonald 7 | """ 8 | 9 | import numpy as np 10 | 11 | # standard SVG colors 12 | STANDARD_COLORS = np.array(['blue', 'red', 'green', 'orange', 'purple', 'yellow', 'fuchsia', 'olive', 'aqua', 'brown']) 13 | 14 | # 100 RGB colors of coolwarm color map. 15 | COOLWARM_RGB = np.array([[58, 76, 192], [60, 79, 195], [64, 84, 199], [66, 88, 202], [70, 93, 207], [72, 96, 209], 16 | [76, 102, 214], [80, 107, 218], [82, 110, 220], [86, 115, 224], [88, 118, 226], [92, 123, 229], 17 | [96, 128, 232], [99, 131, 234], [103, 136, 237], [105, 139, 239], [109, 144, 241], 18 | [112, 147, 243], [116, 151, 245], [120, 155, 247], [123, 158, 248], [127, 162, 250], 19 | [130, 165, 251], [134, 169, 252], [138, 173, 253], [141, 175, 253], [145, 179, 254], 20 | [148, 181, 254], [152, 185, 254], [155, 187, 254], [159, 190, 254], [163, 193, 254], 21 | [166, 195, 253], [170, 198, 253], [172, 200, 252], [176, 203, 251], [180, 205, 250], 22 | [183, 207, 249], [187, 209, 247], [189, 210, 246], [193, 212, 244], [197, 213, 242], 23 | [199, 214, 240], [202, 216, 238], [205, 217, 236], [208, 218, 233], [210, 218, 231], 24 | [214, 219, 228], [217, 220, 224], [219, 220, 222], [222, 219, 218], [224, 218, 215], 25 | [227, 217, 211], [230, 215, 207], [231, 214, 204], [234, 211, 199], [236, 210, 196], 26 | [237, 207, 192], [239, 206, 188], [241, 203, 184], [242, 200, 179], [243, 198, 176], 27 | [244, 195, 171], [245, 193, 168], [246, 189, 164], [246, 186, 159], [246, 183, 156], 28 | [247, 179, 151], [247, 177, 148], [247, 173, 143], [246, 169, 138], [246, 166, 135], 29 | [245, 161, 130], [245, 158, 127], [244, 154, 123], [243, 150, 120], [242, 145, 115], 30 | [240, 141, 111], [239, 137, 108], [237, 132, 103], [236, 128, 100], [234, 123, 96], 31 | [231, 117, 92], [230, 114, 89], [227, 108, 84], [225, 104, 82], [222, 98, 78], 32 | [220, 94, 75], [217, 88, 71], [214, 82, 67], [211, 77, 64], [207, 70, 61], 33 | [205, 66, 58], [201, 59, 55], [197, 50, 51], [194, 45, 49], [190, 35, 45], 34 | [187, 26, 43], [182, 13, 40], [179, 3, 38]]) 35 | -------------------------------------------------------------------------------- /sknetwork/visualization/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """tests for visualization""" 2 | -------------------------------------------------------------------------------- /sknetwork/visualization/tests/test_dendrograms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """tests for visualization of dendrograms""" 4 | 5 | import tempfile 6 | import unittest 7 | 8 | import numpy as np 9 | 10 | from sknetwork.data.toy_graphs import karate_club, painters 11 | from sknetwork.hierarchy import Paris 12 | from sknetwork.visualization.dendrograms import visualize_dendrogram, svg_dendrogram, svg_dendrogram_top 13 | 14 | 15 | # noinspection DuplicatedCode 16 | class TestVisualization(unittest.TestCase): 17 | 18 | def test_undirected(self): 19 | adjacency = karate_club() 20 | paris = Paris() 21 | dendrogram = paris.fit_transform(adjacency) 22 | image = svg_dendrogram(dendrogram) 23 | self.assertEqual(image[1:4], 'svg') 24 | n = adjacency.shape[0] 25 | image = visualize_dendrogram(dendrogram, names=np.arange(n), width=200, height=200, margin=10, margin_text=5, 26 | scale=3, n_clusters=3, color='green', colors=['red', 'blue'], font_size=14, 27 | reorder=True, rotate=True) 28 | self.assertEqual(image[1:4], 'svg') 29 | image = svg_dendrogram(dendrogram, names=np.arange(n), width=200, height=200, margin=10, margin_text=5, scale=3, 30 | n_clusters=3, color='green', colors={0: 'red', 1: 'blue'}, font_size=14, reorder=False, 31 | rotate=True) 32 | self.assertEqual(image[1:4], 'svg') 33 | svg_dendrogram_top(dendrogram, names=np.arange(n), width=200, height=200, margin=10, margin_text=5, scale=3, 34 | n_clusters=3, color='green', colors=np.array(['red', 'black', 'blue']), font_size=14, 35 | reorder=False, rotate_names=True, line_width=0.1) 36 | 37 | def test_directed(self): 38 | graph = painters(True) 39 | adjacency = graph.adjacency 40 | names = graph.names 41 | paris = Paris() 42 | dendrogram = paris.fit_transform(adjacency) 43 | image = visualize_dendrogram(dendrogram) 44 | self.assertEqual(image[1:4], 'svg') 45 | image = visualize_dendrogram(dendrogram, names=names, width=200, height=200, margin=10, margin_text=5, scale=3, 46 | n_clusters=3, color='green', font_size=14, reorder=True, rotate=True) 47 | self.assertEqual(image[1:4], 'svg') 48 | 49 | filename = tempfile.gettempdir() + '/image' 50 | _ = visualize_dendrogram(dendrogram, filename=filename) 51 | with open(filename + '.svg', 'r') as f: 52 | row = f.readline() 53 | self.assertEqual(row[1:4], 'svg') 54 | --------------------------------------------------------------------------------