├── .github └── workflows │ ├── coverage.yml │ ├── docs.yml │ ├── linter.yml │ └── unit_tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── changelog.md ├── codecov.yml ├── contributing.md ├── docs ├── Makefile ├── buildDocs.sh ├── conf.py ├── make.bat └── source │ ├── _static │ ├── custom.css │ ├── full_logo.png │ └── logo.png │ ├── _templates │ ├── version.html │ └── versions.html │ ├── api_reference │ ├── index.rst │ ├── modules.rst │ ├── sknet.network_construction.dataset_constructors.rst │ ├── sknet.network_construction.general_constructors.rst │ ├── sknet.network_construction.rst │ ├── sknet.network_construction.tests.rst │ ├── sknet.network_construction.tests.test_network_construction.rst │ ├── sknet.network_construction.time_series_constructors.rst │ ├── sknet.rst │ ├── sknet.semi_supervised.modularity_label_propagation.rst │ ├── sknet.semi_supervised.rst │ ├── sknet.supervised.ease_of_access.rst │ ├── sknet.supervised.high_level_classification.rst │ ├── sknet.supervised.rst │ ├── sknet.supervised.tests.rst │ ├── sknet.supervised.tests.test_ease_of_access.rst │ ├── sknet.supervised.tests.test_high_level_classification.rst │ ├── sknet.unsupervised.rst │ ├── sknet.unsupervised.stochastic_particle_competition.rst │ ├── sknet.utils.low_level_models_handler.rst │ ├── sknet.utils.network_metrics_handler.rst │ ├── sknet.utils.network_types_handler.rst │ └── sknet.utils.rst │ ├── conf.py │ ├── development │ └── index.rst │ ├── getting_started │ ├── index.rst │ ├── installation.rst │ ├── semi_supervised_learning.rst │ ├── supervised_learning.rst │ ├── transforming_data.rst │ └── unsupervised_learning.rst │ ├── index.rst │ └── user_guide │ ├── images │ ├── ease_of_access.png │ ├── epsilon.png │ ├── k-eps.png │ └── knn.png │ └── index.rst ├── requirements.txt ├── setup.py ├── sknet ├── __init__.py ├── network_construction │ ├── __init__.py │ ├── dataset_constructors.py │ ├── general_constructors.py │ ├── tests │ │ ├── __init__.py │ │ └── test_network_construction.py │ └── time_series_constructors.py ├── semi_supervised │ ├── __init__.py │ ├── modularity_label_propagation.py │ └── tests │ │ └── test_modularity_label_propagation.py ├── supervised │ ├── __init__.py │ ├── ease_of_access.py │ ├── high_level_classification.py │ └── tests │ │ ├── __init__.py │ │ ├── test_ease_of_access.py │ │ └── test_high_level_classification.py ├── unsupervised │ ├── __init__.py │ ├── stochastic_particle_competition.py │ └── tests │ │ └── test_stochastic_particle_competition.py └── utils │ ├── __init__.py │ ├── low_level_models_handler.py │ ├── network_metrics_handler.py │ ├── network_types_handler.py │ └── tests │ └── test_network_types_handler.py └── templates ├── issue.md └── pull_request.md /.github/workflows/coverage.yml: -------------------------------------------------------------------------------- 1 | name: CodeCov 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | python-codecov: 7 | runs-on: ubuntu-latest 8 | name: CodeCov 9 | steps: 10 | - name: checkout source repo 11 | uses: actions/checkout@v2 12 | with: 13 | fetch-depth: ‘2’ 14 | 15 | - name: Generate Report 16 | run: | 17 | python -m pip install --upgrade pip 18 | pip install -r requirements.txt 19 | coverage run -m pytest -vv 20 | 21 | - name: Upload Coverage to Codecov 22 | uses: codecov/codecov-action@v1 23 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs_pages_workflow 2 | 3 | # execute this workflow automatically when a we push to master 4 | on: 5 | push: 6 | 7 | jobs: 8 | 9 | build_docs_job: 10 | runs-on: ubuntu-latest 11 | container: debian:buster-slim 12 | 13 | steps: 14 | 15 | - name: Prereqs 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | run: | 19 | apt-get update 20 | apt-get install -y git 21 | git clone "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" . 22 | shell: bash 23 | 24 | - name: Execute script to build our documentation and update pages 25 | env: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 27 | run: | 28 | chmod 775 docs/buildDocs.sh 29 | docs/buildDocs.sh 30 | shell: bash -------------------------------------------------------------------------------- /.github/workflows/linter.yml: -------------------------------------------------------------------------------- 1 | name: Linter 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | python-lint: 7 | runs-on: ubuntu-latest 8 | name: Linter 9 | steps: 10 | - name: checkout source repo 11 | uses: actions/checkout@v2 12 | 13 | - name: linting 14 | uses: alexanderdamiani/pylinter@v1.1.0 15 | with: 16 | skip-mypy: true 17 | skip-isort: true -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | python-pytest: 7 | runs-on: ubuntu-latest 8 | name: unit tests 9 | steps: 10 | - name: checkout source repo 11 | uses: actions/checkout@v2 12 | 13 | - name: setup python 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: 3.7 17 | 18 | - name: install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install -r requirements.txt 22 | - name: Test with pytest 23 | run: pytest -vv -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | *.ipynb 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | .vscode/settings.json 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tiago Toledo Junior 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![sknet Logo](https://github.com/TNanukem/sknet/blob/develop/docs/source/_static/full_logo.png "sknet Logo") 2 | 3 | ![Codecov branch](https://img.shields.io/codecov/c/github/tnanukem/sknet/develop?token=PIQ338YNK1) 4 | 5 | The sknet project is a scikit-learn and NetworkX compatible framework for machine learning in complex networks. It provides learning algorithms for complex networks, as well as transforming methods to turn tabular data into complex networks. 6 | 7 | It started in 2021 as a project from volunteers to help to improve the development of research on the interface between complex networks and machine learning. It main focus 8 | is to help researchers and students to develop solutions using machine learning on complex networks. 9 | 10 | ## :computer: Installation 11 | 12 | The sknet installation is available via PiPy: 13 | 14 | pip install scikit-net 15 | 16 | ## :high_brightness: Quickstart 17 | 18 | The following code snippet shows how one can transform tabular data into a complex network and then use it to create a classifier: 19 | 20 | from sklearn.model_selection import train_test_split 21 | from sklean.metrics import accuracy_score 22 | from sklearn.datasets import load_iris 23 | from sknet.network_construction import KNNConstructor 24 | from sknet.supervised import EaseOfAccessClassifier 25 | 26 | X, y = load_iris(return_X_y = True) 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) 28 | 29 | # The constructor responsible for transforming the tabular data into a complex network 30 | knn_c = KNNConstructor(k=5) 31 | 32 | classifier = EaseOfAccessClassifier() 33 | classifier.fit(X_train, y_train, constructor=knn_c) 34 | y_pred = classifier.predict(X_test) 35 | accuracy_score(y_test, y_pred) 36 | 37 | ## :pencil: Documentation 38 | 39 | We provide an extensive API documentation as well with some user guides. The documentation is available on https://tnanukem.github.io/scikit-net/ 40 | 41 | ## Citation 42 | 43 | If you used the scikit-net on your research project, please cite us using the following publication: 44 | 45 | @article{Toledo2021, 46 | doi = {10.21105/joss.03864}, 47 | url = {https://doi.org/10.21105/joss.03864}, 48 | year = {2021}, 49 | publisher = {The Open Journal}, 50 | volume = {6}, 51 | number = {68}, 52 | pages = {3864}, 53 | author = {Tiago Toledo}, 54 | title = {sknet: A Python framework for Machine Learning in Complex Networks}, 55 | journal = {Journal of Open Source Software} 56 | } 57 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.1.0] - xxxx-xx-xx 9 | 10 | ### Added 11 | - Modularity Label Propagation now allows a network reduction proposed by Silva et al. 12 | - New constructor, based on single linkage clustering heuristics, was added to transform datasets into networks 13 | - New time series constructor for univariate series using the recurrence on the phase space 14 | 15 | ### Changed 16 | 17 | ### Fixed 18 | 19 | ### Removed 20 | 21 | ### Deprecated 22 | 23 | [0.0.1]: https://github.com/TNanukem/scikit-net/releases/tag/v0.0.1 -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | token: 92c22736-5259-4282-a4a6-69bfd6d9f3c7 3 | 4 | ignore: 5 | - "*/tests/*" 6 | - "*/sknet_env/*" -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | First of all, thank you for being interested in contributing to the scikit-net package. 4 | 5 | The scikit-net is an open-source package and it depends on the help 6 | and feedback of the community to keep improving. So you are mostly 7 | welcome to help us out. 8 | 9 | You can find instructions on how you can help with the development at the [documentation](https://tnanukem.github.io/scikit-net/main/development/index.html). -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/buildDocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | apt-get update 5 | 6 | apt-get -y install python3-pip 7 | python3 -m pip install Sphinx==4.1.2 8 | apt-get -y install git rsync build-essential python3-stemmer python3-gitpython3-virtualenv python3-setuptools 9 | python3 -m pip install --upgrade pip 10 | python3 -m pip install -r requirements.txt 11 | python3 -m pip install --upgrade rinohtype pygments 12 | 13 | pwd 14 | ls -lah 15 | export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct) 16 | 17 | # make a new temp dir which will be our GitHub Pages docroot 18 | docroot=`mktemp -d` 19 | 20 | export REPO_NAME="${GITHUB_REPOSITORY##*/}" 21 | 22 | ############## 23 | # BUILD DOCS # 24 | ############## 25 | 26 | make -C docs clean 27 | versions="`git for-each-ref '--format=%(refname:lstrip=-1)' refs/remotes/origin/ | grep -viE '^(HEAD|gh-pages)$'`" 28 | for current_version in ${versions}; do 29 | 30 | # make the current language available to conf.py 31 | export current_version 32 | git checkout ${current_version} 33 | 34 | echo "INFO: Building sites for ${current_version}" 35 | 36 | # skip this branch if it doesn't have our docs dir & sphinx config 37 | if [ ! -e 'docs/conf.py' ]; then 38 | echo -e "\tINFO: Couldn't find 'docs/conf.py' (skipped)" 39 | continue 40 | fi 41 | 42 | # HTML # 43 | sphinx-build -b html docs/source docs/build/html/${current_version} 44 | 45 | # PDF # 46 | sphinx-build -b rinoh docs/source docs/build/rinoh 47 | mkdir -p "${docroot}/${current_version}" 48 | cp "docs/build/rinoh/target.pdf" "${docroot}/${current_version}/helloWorld-docs__${current_version}.pdf" 49 | 50 | # EPUB # 51 | sphinx-build -b epub docs/source docs/build/epub 52 | mkdir -p "${docroot}/${current_version}" 53 | cp "docs/build/epub/target.epub" "${docroot}/${current_version}/helloWorld-docs_${current_version}.epub" 54 | 55 | # copy the static assets produced by the above build into our docroot 56 | cp -a "docs/build/html/${current_version}/." "${docroot}/${current_version}/" 57 | 58 | 59 | done 60 | 61 | # return to master branch 62 | git checkout master 63 | 64 | git config --global user.name "${GITHUB_ACTOR}" 65 | git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" 66 | 67 | pushd "${docroot}" 68 | 69 | git init 70 | git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" 71 | git checkout -b gh-pages 72 | 73 | touch .nojekyll 74 | 75 | # add redirect from the docroot to our default docs language/version 76 | cat > index.html < 78 | 79 | 80 | helloWorld Docs 81 | 82 | 83 | 84 |

Please wait while you're redirected to our documentation.

85 | 86 | 87 | EOF 88 | 89 | cat > README.md <NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .btn-version { 2 | background: #2c7fb8ff; 3 | color: #ffffff; 4 | } 5 | 6 | .btn-version:hover, .btn-version:focus, .btn-version:active, .btn-version.active, .open > .dropdown-toggle.btn-version { 7 | background: #33a6cc; 8 | } 9 | 10 | .btn-version:active, .btn-version.active { 11 | background: #007299; 12 | box-shadow: none; 13 | } -------------------------------------------------------------------------------- /docs/source/_static/full_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/_static/full_logo.png -------------------------------------------------------------------------------- /docs/source/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/_static/logo.png -------------------------------------------------------------------------------- /docs/source/_templates/version.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/_templates/versions.html: -------------------------------------------------------------------------------- 1 | {% if READTHEDOCS or display_lower_left %} 2 | {# Add rst-badge after rst-versions for small badge style. #} 3 |
4 | 5 | Read the Docs 6 | v: {{ current_version }} 7 | 8 | 9 |
10 | {% if languages|length >= 1 %} 11 |
12 |
{{ _('Languages') }}
13 | {% for slug, url in languages %} 14 | {% if slug == current_language %} {% endif %} 15 |
{{ slug }}
16 | {% if slug == current_language %}
{% endif %} 17 | {% endfor %} 18 |
19 | {% endif %} 20 | {% if versions|length >= 1 %} 21 |
22 |
{{ _('Versions') }}
23 | {% for slug, url in versions %} 24 | {% if slug == current_version %} {% endif %} 25 |
{{ slug }}
26 | {% if slug == current_version %}
{% endif %} 27 | {% endfor %} 28 |
29 | {% endif %} 30 | {% if downloads|length >= 1 %} 31 |
32 |
{{ _('Downloads') }}
33 | {% for type, url in downloads %} 34 |
{{ type }}
35 | {% endfor %} 36 |
37 | {% endif %} 38 | {% if READTHEDOCS %} 39 |
40 |
{{ _('On Read the Docs') }}
41 |
42 | {{ _('Project Home') }} 43 |
44 |
45 | {{ _('Builds') }} 46 |
47 |
48 | {% endif %} 49 |
50 | {% trans %}Free document hosting provided by Read the Docs.{% endtrans %} 51 | 52 |
53 |
54 | {% endif %} -------------------------------------------------------------------------------- /docs/source/api_reference/index.rst: -------------------------------------------------------------------------------- 1 | .. sknet documentation master file, created by 2 | sphinx-quickstart on Fri Mar 5 05:44:33 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | API Reference 7 | ============= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | sknet 13 | -------------------------------------------------------------------------------- /docs/source/api_reference/modules.rst: -------------------------------------------------------------------------------- 1 | sknet 2 | ===== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | sknet 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.dataset_constructors.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction.dataset\_constructors module 2 | ======================================================== 3 | 4 | .. automodule:: sknet.network_construction.dataset_constructors 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.general_constructors.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction.general\_constructors module 2 | ======================================================== 3 | 4 | .. automodule:: sknet.network_construction.general_constructors 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.network_construction.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | 18 | sknet.network_construction.dataset_constructors 19 | sknet.network_construction.general_constructors 20 | sknet.network_construction.time_series_constructors 21 | 22 | Module contents 23 | --------------- 24 | 25 | .. automodule:: sknet.network_construction 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.tests.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction.tests package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.network_construction.tests.test_network_construction 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: sknet.network_construction.tests 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.tests.test_network_construction.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction.tests.test\_network\_construction module 2 | ==================================================================== 3 | 4 | .. automodule:: sknet.network_construction.tests.test_network_construction 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.network_construction.time_series_constructors.rst: -------------------------------------------------------------------------------- 1 | sknet.network\_construction.time\_series\_constructors module 2 | ============================================================= 3 | 4 | .. automodule:: sknet.network_construction.time_series_constructors 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.rst: -------------------------------------------------------------------------------- 1 | sknet package 2 | ============= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.network_construction 11 | sknet.semi_supervised 12 | sknet.supervised 13 | sknet.unsupervised 14 | sknet.utils 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: sknet 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.semi_supervised.modularity_label_propagation.rst: -------------------------------------------------------------------------------- 1 | sknet.semi\_supervised.modularity\_label\_propagation module 2 | ============================================================ 3 | 4 | .. automodule:: sknet.semi_supervised.modularity_label_propagation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.semi_supervised.rst: -------------------------------------------------------------------------------- 1 | sknet.semi\_supervised package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.semi_supervised.modularity_label_propagation 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: sknet.semi_supervised 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.ease_of_access.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised.ease\_of\_access module 2 | ======================================== 3 | 4 | .. automodule:: sknet.supervised.ease_of_access 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.high_level_classification.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised.high\_level\_classification module 2 | =================================================== 3 | 4 | .. automodule:: sknet.supervised.high_level_classification 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised package 2 | ======================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.supervised.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | 18 | sknet.supervised.ease_of_access 19 | sknet.supervised.high_level_classification 20 | 21 | Module contents 22 | --------------- 23 | 24 | .. automodule:: sknet.supervised 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.tests.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised.tests package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.supervised.tests.test_ease_of_access 11 | sknet.supervised.tests.test_high_level_classification 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: sknet.supervised.tests 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.tests.test_ease_of_access.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised.tests.test\_ease\_of\_access module 2 | ==================================================== 3 | 4 | .. automodule:: sknet.supervised.tests.test_ease_of_access 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.supervised.tests.test_high_level_classification.rst: -------------------------------------------------------------------------------- 1 | sknet.supervised.tests.test\_high\_level\_classification module 2 | =============================================================== 3 | 4 | .. automodule:: sknet.supervised.tests.test_high_level_classification 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.unsupervised.rst: -------------------------------------------------------------------------------- 1 | sknet.unsupervised package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.unsupervised.stochastic_particle_competition 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: sknet.unsupervised 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.unsupervised.stochastic_particle_competition.rst: -------------------------------------------------------------------------------- 1 | sknet.unsupervised.stochastic\_particle\_competition module 2 | =========================================================== 3 | 4 | .. automodule:: sknet.unsupervised.stochastic_particle_competition 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.utils.low_level_models_handler.rst: -------------------------------------------------------------------------------- 1 | sknet.utils.low\_level\_models\_handler module 2 | ============================================== 3 | 4 | .. automodule:: sknet.utils.low_level_models_handler 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.utils.network_metrics_handler.rst: -------------------------------------------------------------------------------- 1 | sknet.utils.network\_metrics\_handler module 2 | ============================================ 3 | 4 | .. automodule:: sknet.utils.network_metrics_handler 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.utils.network_types_handler.rst: -------------------------------------------------------------------------------- 1 | sknet.utils.network\_types\_handler module 2 | ========================================== 3 | 4 | .. automodule:: sknet.utils.network_types_handler 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api_reference/sknet.utils.rst: -------------------------------------------------------------------------------- 1 | sknet.utils package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | sknet.utils.low_level_models_handler 11 | sknet.utils.network_metrics_handler 12 | sknet.utils.network_types_handler 13 | 14 | Module contents 15 | --------------- 16 | 17 | .. automodule:: sknet.utils 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../../')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'sknet' 21 | copyright = '2021, Tiago Toledo' 22 | author = 'Tiago Toledo' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | 'sphinx.ext.autodoc', 32 | 'sphinx.ext.napoleon' 33 | ] 34 | 35 | master_doc = 'index' 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # List of patterns, relative to source directory, that match files and 41 | # directories to ignore when looking for source files. 42 | # This pattern also affects html_static_path and html_extra_path. 43 | exclude_patterns = [] 44 | 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | 48 | # The theme to use for HTML and HTML Help pages. See the documentation for 49 | # a list of builtin themes. 50 | # 51 | html_theme = 'pydata_sphinx_theme' 52 | html_logo = "_static/logo.png" 53 | 54 | html_theme_options = { 55 | "icon_links": [ 56 | { 57 | "name": "GitHub", 58 | "url": "https://github.com/TNanukem/scikit-net", 59 | "icon": "fab fa-github-square", 60 | }, 61 | { 62 | "name": "Twitter", 63 | "url": "https://twitter.com/TiagoJToledoJr", 64 | "icon": "fab fa-twitter-square", 65 | }, 66 | ], 67 | "navbar_start": ["navbar-logo"], 68 | "navbar_end": ["navbar-icon-links", "version"] 69 | } 70 | 71 | html_context = { 72 | "versions_dropdown": { 73 | "develop": "develop (latest)", 74 | "main": "main (stable)", 75 | }, 76 | } 77 | 78 | # Add any paths that contain custom static files (such as style sheets) here, 79 | # relative to this directory. They are copied after the builtin static files, 80 | # so a file named "default.css" will overwrite the builtin "default.css". 81 | html_static_path = ['_static'] 82 | 83 | 84 | def setup(app): 85 | app.add_css_file("custom.css") 86 | -------------------------------------------------------------------------------- /docs/source/development/index.rst: -------------------------------------------------------------------------------- 1 | .. sknet documentation master file, created by 2 | sphinx-quickstart on Fri Mar 5 05:44:33 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Development 7 | =========== 8 | 9 | This is a guide for anyone interested in helping the development of the sknet. The library is an 10 | open-source project and therefore depends on the community to keep existing, everyone is welcome 11 | to help us improve. 12 | 13 | How to contribute? 14 | ------------------ 15 | 16 | There are several ways of contributing for the sknet. Below we state those ways in ascending order 17 | of complexity. 18 | 19 | Opening an issue 20 | ^^^^^^^^^^^^^^^^ 21 | We oficially use the Issue Tracker on our github repo to hold up every new feature request and bug 22 | tracking. Therefore, you can open up an issue to: 23 | 24 | - Warn us about some bug on the library 25 | - Warn us about documentation errors 26 | - Request a new feature or change for one already implemented algorithm 27 | - Request a brand new algorithm 28 | 29 | We provide a basic template for issues on the ``templates`` folder inside our Github repo, please 30 | refer to it before opening the issue so you can provide us with all of the information we need to 31 | evaluate and (possibly) work on your issue. 32 | 33 | Contributing to the documentation 34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 35 | If you found some problem on the documentation, such as wrong information, typos or something that 36 | is missing or could be better explained, please, open an issue about it. After that, if you want to 37 | correct the documentation yourself, feel free to open up a Pull Request with your correction. Please 38 | remember to cite your issue on the Pull Request. 39 | 40 | Solving an issue 41 | ^^^^^^^^^^^^^^^^ 42 | If you find an issue on the repo and thinks you can solve it, we encourage you to do so. Just verify 43 | previously if no one is already assigned that issue. If this is not the case, then you can ask, on the 44 | issue comments, to be assigned it. 45 | 46 | Once you finished your implementation, open up a Pull Request on the repo. Your Pull Request will be 47 | reviewed. Be aware that several iterations of revision may be required before the code is merged. We 48 | encourage you to see the revision as a chat between two (or more) people trying to deliver the best 49 | possible product to the people using the library. 50 | 51 | Pull Requests 52 | ------------- 53 | For those interested into opening Pull Requests for new features on the code, we will briefly describe 54 | some of the things you must pay attention to. 55 | 56 | First of all, one template for Pull Requests is available on the ``templates`` folder inside the Github 57 | repo. Regarding to your code, some restrictions must be satisfied: 58 | 59 | - Every Pull Request branch must be made from and merged to the ``develop`` branch 60 | - Every new class or method must be unittested with pytest. We will not accept additions to the repo that reduces our coverage without a good reason for doing so 61 | - Every public method must have a docstring using the numpy docstring style 62 | - Every code must adhere to the PEP8. We suggest using flake8 to assess your style 63 | - Performance improvements must contain benchmarks results 64 | - We value good-sense when documenting methods 65 | - Every change to modules with interface to users must have an entry on the documentation 66 | 67 | Our Continous Integration pipeline will help you ensure most of those aspects. However, we strongly 68 | encourage you to run those tests on your machine before submitting the Pull Request to avoid overhead 69 | on the CI. 70 | 71 | Doubts? 72 | ------- 73 | 74 | If any doubt remain, please fell free to contact any of the developers. 75 | -------------------------------------------------------------------------------- /docs/source/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | .. sknet documentation master file, created by 2 | sphinx-quickstart on Fri Mar 5 05:44:33 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Getting Started 7 | =============== 8 | 9 | How to start using the sknet for your projects. 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | installation 15 | transforming_data 16 | supervised_learning 17 | semi_supervised_learning 18 | unsupervised_learning 19 | 20 | -------------------------------------------------------------------------------- /docs/source/getting_started/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | In order to install the sknet you can use pip using the ``scikit-net`` name as follows: 5 | 6 | .. code-block:: console 7 | 8 | pip install scikit-net 9 | -------------------------------------------------------------------------------- /docs/source/getting_started/semi_supervised_learning.rst: -------------------------------------------------------------------------------- 1 | Semi Supervised Learning 2 | ======================== 3 | 4 | The semi-supervised algorithms try to leverage great amounts of unlabeled data 5 | with a smaller amount of labeled data. One can use a semi-supervised algorithm 6 | available on the sknet as follows. 7 | 8 | .. code-block:: python 9 | 10 | from sklearn.datasets import load_iris 11 | from sknet.network_construction import KNNConstructor 12 | from sknet.semi_supervised import ModularityLabelPropagation 13 | X, y = load_iris(return_X_y = True) 14 | knn_c = KNNConstructor(k=5, sep_comp=False) 15 | y[10:20] = np.nan 16 | y[70:80] = np.nan 17 | y[110:120] = np.nan 18 | propagator = ModularityLabelPropagation() 19 | propagator.fit(X, y, constructor=knn_c) 20 | propagator.generated_y 21 | -------------------------------------------------------------------------------- /docs/source/getting_started/supervised_learning.rst: -------------------------------------------------------------------------------- 1 | Supervised Learning 2 | =================== 3 | On a supervised learning setting, the sknet has two main focuses: 4 | - Performing a supervised learning task on a complex network 5 | - Use complex networks to improve the performance of other machine learning algorithms 6 | 7 | We will briefly show two examples of supervised learning algorithms available on sknet. 8 | 9 | Heuristic of Ease of Access 10 | --------------------------- 11 | 12 | This is a learning algorithm to be applied on complex networks and consists on verifying 13 | how new samples affects the similarity between nodes when added to the component related 14 | to a given class. 15 | 16 | The following code snippet shows how to run it using a tabular dataset: 17 | 18 | .. code-block:: python 19 | 20 | from sklearn.datasets import load_iris 21 | from sknet.network_construction import KNNConstructor 22 | from sknet.supervised import EaseOfAccessClassifier 23 | X, y = load_iris(return_X_y = True) 24 | X_train, X_test, y_train, y_test = train_test_split(X, y, 25 | test_size=0.33) 26 | knn_c = KNNConstructor(k=5) 27 | classifier = EaseOfAccessClassifier(t=5) 28 | classifier.fit(X_train, y_train, constructor=knn_c) 29 | ease = classifier.predict(X_test) 30 | 31 | If you want to run it on a Complex Network, then the following snippet shows how to: 32 | 33 | .. code-block:: python 34 | 35 | from sknet.supervised import EaseOfAccessClassifier 36 | 37 | classifier = EaseOfAccessClassifier(t=5) 38 | classifier.fit(G=G) 39 | ease = classifier.predict(G_test) 40 | 41 | High Leval Data Classification 42 | ------------------------------ 43 | 44 | This algorithm leverages both, traditional tabular Machine Learning and Complex 45 | Networks Machine Learning to generate a classifier with better accuracy. In order 46 | to use this method, you must use a tabular dataset with the desired features. 47 | 48 | This algorithm will use the low-level (traditional Machine Learning model) model to 49 | predict the classes probabilities and then will do the same using a Complex Network 50 | method. Then, both of the predictions will be united generating a single probability 51 | prediction. 52 | 53 | The following snippet shows how to use it: 54 | 55 | .. code-block:: python 56 | 57 | from sklearn.datasets import load_iris 58 | from sknet.network_construction import KNNConstructor 59 | from sknet.supervised import HighLevelClassifier 60 | X, y = load_iris(return_X_y = True) 61 | X_train, X_test, y_train, y_test = train_test_split(X, y, 62 | test_size=0.33) 63 | knn_c = KNNConstructor(k=5) 64 | classifier = HighLevelClassifier() 65 | classifier.fit(X_train, y_train, constructor=knn_c) 66 | pred = classifier.predict(X_test) -------------------------------------------------------------------------------- /docs/source/getting_started/transforming_data.rst: -------------------------------------------------------------------------------- 1 | Transforming Data 2 | ================= 3 | 4 | The sknet provides classes to allow data transformation between different kinds. Since 5 | the implemented algorithms may require an specific data type to work, those classes 6 | allow the user to freely transform data and use any of the methods. 7 | 8 | So far, the following transformations are available: 9 | 10 | - Tabular data -> Complex networks 11 | - Time series tabular data -> Complex networks 12 | 13 | Below there is an example of how one can use one of the tabular datasets constructor 14 | to turn tabular data into a complex network. 15 | 16 | .. code-block:: python 17 | 18 | from sklearn.datasets import load_iris 19 | from sknet.network_construction import KNNEpislonRadiusConstructor 20 | X, y = load_iris(return_X_y = True) 21 | ke_c = KNNEpislonRadiusConstructor(k=3, epsilon=0.3) 22 | ke_c.fit(X, y) 23 | G = ke_c.transform() 24 | 25 | And below an example of how one can use one of the time series constructor to turn a 26 | time series into a complex network: 27 | 28 | .. code-block:: python 29 | 30 | from sknet.network_construction import UnivariateCorrelationConstructor 31 | r = 0.5 32 | L = 10 33 | constructor = UnivariateCorrelationConstructor(r, L) 34 | constructor.fit(X) 35 | G = constructor.transform() -------------------------------------------------------------------------------- /docs/source/getting_started/unsupervised_learning.rst: -------------------------------------------------------------------------------- 1 | Unsupervised Learning 2 | ===================== 3 | 4 | The unsupervised learning methods, when applied to complex networks, are usually 5 | called community dectection methods. Their focus is to find groups of nodes where 6 | the number of edges intra-community is way greater than the number of edges extra-community. 7 | 8 | The following code snippet shows how one can use one of the unsupervised methods of the sknet 9 | to clusterize some dataset: 10 | 11 | .. code-block:: python 12 | 13 | from sklearn.datasets import load_iris 14 | from sknet.network_construction import KNNConstructor 15 | from sknet.unsupervised import StochasticParticleCompetition 16 | X, y = load_iris(return_X_y = True) 17 | knn_c = KNNConstructor(k=5, sep_comp=False) 18 | SCP = StochasticParticleCompetition() 19 | SCP.fit(X, y, constructor=knn_c) 20 | SCP.clusters_ 21 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. sknet documentation master file, created by 2 | sphinx-quickstart on Fri Mar 5 05:44:33 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to sknet's documentation! 7 | ================================= 8 | 9 | The sknet is a scikit-learn compatible and NetworkX compatible library that implements tools for 10 | applying machine learning algorithms to complex networks. 11 | 12 | It hopes to help researchers and students on the area to develop solutions to complex problems 13 | and further allow the development of new research on the Complex Networks area. 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | :caption: Contents: 18 | 19 | getting_started/index 20 | user_guide/index 21 | api_reference/index 22 | development/index 23 | -------------------------------------------------------------------------------- /docs/source/user_guide/images/ease_of_access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/ease_of_access.png -------------------------------------------------------------------------------- /docs/source/user_guide/images/epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/epsilon.png -------------------------------------------------------------------------------- /docs/source/user_guide/images/k-eps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/k-eps.png -------------------------------------------------------------------------------- /docs/source/user_guide/images/knn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/knn.png -------------------------------------------------------------------------------- /docs/source/user_guide/index.rst: -------------------------------------------------------------------------------- 1 | .. sknet documentation master file, created by 2 | sphinx-quickstart on Fri Mar 5 05:44:33 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | User Guide 7 | ********** 8 | 9 | This section will introduce the main modules of the sknet and show some examples as well as explaining the theory 10 | behind the implemented algorithms. 11 | 12 | The sknet main structure divide the classes into two main types: auxiliary methods such as utilities and transformations and 13 | machine learning methods which are divided into supervised, unsupervised and semi supervised methods. 14 | 15 | Most of the Machine Learning methods can work both with tabular data (in form of a Pandas Dataframe or a Numpy Array) and with graph data 16 | (in form of a NetworkX complex network), exceptions will be explicit on the documentation. 17 | 18 | Transformation methods 19 | ====================== 20 | 21 | These are the backbones of the inner workings of the sknet. The transformation classes are responsible for transforming data from one 22 | type to another. To this date, the following transformations are possible: 23 | 24 | - Tabular Data -> Complex Network 25 | - Time Series -> Complex Network 26 | 27 | The Machine Learning classes are responsible for transforming data to the appropriate format for each one, however, one can always 28 | insert the already transformed data into the class. 29 | 30 | Dataset Constructors 31 | -------------------- 32 | 33 | Those are the methods responsible for transforming tabular data, from the Pandas DataFrame or the Numpy Array format into a 34 | NetworkX complex network. 35 | 36 | When dealing with Dataset Constructors, one may have the classes of the tabular data availabe (such as on a supervised method), 37 | on that case, one may set the constructor so it will generate separated components for each class. Some Machine Learning models 38 | will require this while others will require that no separated component is generated. Look up for the documentation of each method 39 | to be aware of the requirements for each method. 40 | 41 | KNN Constructor 42 | ^^^^^^^^^^^^^^^ 43 | 44 | The KNN Constructor uses a k-Nearest Neighbors algorithm to create edges between the instances (rows) of our tabular dataset. For that 45 | the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance, and then, for each 46 | instance, the k closest instances are selected and edges are created between them. 47 | 48 | Notice that this methodology does not create a symmetric network since, given node ``i``, node ``j`` could be one of the k closest points to it but 49 | the contrary may not be true. 50 | 51 | .. image:: images/knn.png 52 | :alt: KNN Constructor 53 | 54 | Also, this method does not allow for singletons to be created. If a node is too far away from the others on the generated space, it will 55 | create at least k edges with k other nodes. 56 | 57 | The main drawback of this methodology is that, for dense regions where there are too many nodes close to each other, the degree of each node 58 | will be underestimated. 59 | 60 | Epsilon-Radius Constructor 61 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 62 | 63 | The Epsilon-Radius constructor, for each node (row) of the dataset, connects it to all nodes that are inside a circle of radius epsilon. For that 64 | the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance. 65 | 66 | .. image:: images/epsilon.png 67 | :alt: Epsilon Radius Constructor 68 | 69 | This methodology will create a symmetric network since that, for a node to be inside the radius of another, the contrary must also be true at all times. However 70 | this method allows singletons to be created since it may be that there are no nodes inside the radius, which is a big drawback of this method. 71 | 72 | KNN Epsilon-Radius Constructor 73 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 74 | 75 | To overcome the drawbacks of both, the KNN Constructor and the Epsilon-Radius constructor, the KNN Epsilon-Radius Constructor tries to sum-up the strenghts 76 | of both methods. This constructor will use the Epsilon-Radius method for dense regions of the space and the K-NN method for sparse regions according to the 77 | following equation: 78 | 79 | .. math:: 80 | 81 | \left\{\begin{matrix} 82 | \epsilon\text{-radius}(v_i), & \text{if} |\epsilon\text{-radius}| > k \\ 83 | k\text{-NN}(v_i), & \text{otherwise} 84 | \end{matrix}\right. 85 | 86 | The idea behind this strategy is to add more edges on dense regions that should be more densely connected and to avoid singletons being created on sparse 87 | regions. This way, the generated network will be connected and will have a variable degree level, better representing real world networks. 88 | 89 | .. image:: images/k-eps.png 90 | :alt: KNN Epsilon-Radius Constructor 91 | 92 | Single Linkage Clustering Heuristics Constructor 93 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 94 | 95 | This constructor uses the idea of the Single Linkage heuristic for clustering to generate a network that preserves the original clustering topology 96 | of the dataset. This tries to avoid the over sparsity or over density of the generated networks from the previous constructors that are not able to 97 | guarantee the maintainance of the cluster topology. 98 | 99 | The first step is to calculate the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance. 100 | With that in hands, each node is considered a cluster, then, the two closest clusters are found and the k nearest neighbors between them are connected 101 | by edges if their distance is smaller than a threshold defined by the intra-cluster dissimilarity of each one. 102 | 103 | This process merges the two clusters. Then, it repeats until we have only one cluster left, then the network is complete. 104 | 105 | This method will keep the sparsity between clusters and the density inside a cluster, which, depending on the problem at hand, can be necessary 106 | for the study of the data. 107 | 108 | More information about this method can be found in the following paper: 109 | Cupertino, T.H., Huertas, J., & Zhao, L. (2013). Data clustering using controlled consensus in complex networks. Neurocomputing, 118, 132-140. 110 | 111 | 112 | Time Series Constructors 113 | ------------------------ 114 | 115 | Those are the methods responsible for transforming time series data, univariate or multivariate, into a complex network representation. 116 | 117 | Correlation Constructor 118 | ^^^^^^^^^^^^^^^^^^^^^^^ 119 | 120 | The idea behind the Correlation Constructor is to split the time series into N segments of lenght L 121 | each one which will be a node in our complex network. Then, having those segments, one can calculate the pearson correlation coefficient 122 | between those segments, creating a correlation matrix C. 123 | 124 | Then, an user-defined parameter ``r`` defines the correlation threshold for the creation of an edge between two nodes (segments) of the 125 | network. If the correlation between them is greater than ``r``, then an edge is created. 126 | 127 | Notice that this generate an undirected graph since the correlation between two segments will always be symmetric. This module implements 128 | two variations of this method: one for univariate time series and another for multivariate times series. 129 | 130 | More information about those methods can be found on: Yang, Y., Yang, H.: Complex network-based time series analysis. Physica A 387, 1381–1386 (2008) 131 | 132 | Recurrence Constructor 133 | ^^^^^^^^^^^^^^^^^^^^^^ 134 | 135 | The recurrence constructor uses the concept of recurrence on the phase space of the time series. Given an embedding of the time series (such as the 136 | Takens Embedding), it is said that two states are recurrent if they are similar enough. So, given two states in the phase space defined as: 137 | 138 | .. math:: 139 | x_i = (x(t), x(t + \tau), \dots , x(t + (d - 1)\tau)) 140 | 141 | Two states are recurrent if: 142 | 143 | .. math:: 144 | ||x_i - x_j|| < \epsilon 145 | 146 | Then, after the embedding was made, one can easily calculate a distance matrix between each of the states. Then, the self-loops (diagonals) are 147 | set to zero and every entry smaller than epsilon will generate an edge between the states of the series. 148 | 149 | More information about this method can be found on: Donner, R.V., Zou, Y., Donges, J.F., Marwan, N., Kurths, J.: Recurrence 150 | networks – a novel paradigm for nonlinear time series analysis. New J. Phys. 12, 033025 (2010) 151 | 152 | Supervised Methods 153 | ================== 154 | 155 | Supervised methods have one objective: given a labeled dataset, learn the data patterns to the able to predict the label (continous or discrete) 156 | of new, unseen, data samples. 157 | 158 | Heuristic of Ease of Access 159 | --------------------------- 160 | 161 | This algorithm can be used, both, as a classifier and as a regressor. Its main idea is to consider the network as a Markov Chain to, on the convergence 162 | of the chain, identify which classes (or values) have a higher probability for a given unlabeled instance. 163 | 164 | Given the network with labeled instances we have the weight matrix of the network, which can be considered as the adjacency matrix of a weighted network. 165 | 166 | For each unlabeled instance we add it to the network and calculate the similarity (which in this case can be an Euclidean distance for example) of this 167 | new node to every other node of the network which will be put into a vector ``S``. Using those similarities, we will disturb the weights matrix, using 168 | an parameter epsilon, according to the following formula: 169 | 170 | .. math:: 171 | \hat{W} = W + \epsilon \hat{S} 172 | 173 | where: 174 | 175 | .. math:: 176 | \hat{S} = \begin{bmatrix} 177 | s_1 & \dots & s_1 \\ 178 | s_2 & \dots & s_2\\ 179 | \vdots & \vdots & \vdots\\ 180 | s_L & \dots & s_L 181 | \end{bmatrix} 182 | 183 | The image below shows the effect of adding this new node and removing if it right after. Notice that now self loops are created in the network since we 184 | are summing up a value on every weight. 185 | 186 | .. image:: images/ease_of_access.png 187 | :alt: Ease of Access network change 188 | 189 | Then, we use the weight matrix to calculate the transition probabilities and finally we compute the convergence of the Markov Chain to the limiting 190 | probabilities. At this point, every limiting probability represents a state and can be interpreted as the probability of the unlabeled example 191 | belonging to the class of that state. 192 | 193 | We then select the ``t`` biggest probabilities to define the class of or unlabeled example. In case of a classification, the mode of the top 194 | ``t`` states is considered. If we are dealing with a regression, then the average value of the ``t`` states is used. 195 | 196 | More information about this method can be found on: Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data 197 | classification by using an heuristic of ease of access. Neurocomputing 149(Part A), 86–92 (2015) 198 | 199 | High Level Data Classification 200 | ------------------------------ 201 | 202 | The High Level Data Classification algorithm tries to incorporate the findings from traditional Machine Learning algorithms, such as SVMs and 203 | Random Forests, with the structural pattern recognition promoted by analyzing the metrics of a complex network. In order to do so, it receives 204 | the tabular data in a regular Machine Learning fashion and fits a low-level (traditional ML) classifier on the data. 205 | 206 | Then the dataset is transformed into a complex network with a separated component for each of its classes, using one of the available constructors. 207 | This network is what we call the training network. 208 | 209 | For each of the unlabeled examples we want to predict, two kind of predictions will be done: 210 | 211 | - A low-level prediction where the fitted low-level model will have its ``predict`` or ``predict_proba`` method called to classify the data. 212 | - A high-level prediction where we will use the complex network to calculate a probability of the instance belonging to any of the classes 213 | 214 | Once this is done, the probability of allocation on each class is defined by the equation: 215 | 216 | .. math:: 217 | F_i^{(y)} = (1 - \rho )L_i^{(y)} + \rho H_i^{(y)} 218 | 219 | Where :math:`\rho` is a user-defined parameter, :math:`F_i^{(y)}` is the probability of :math:`i` belonging to class y, :math:`L_i^{(y)}` is the probabilities 220 | associated with the low-level classifier and :math:`H_i^{(y)` are the probabilities associated with the high-level classifier. 221 | 222 | How the high-level classification is done 223 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 224 | 225 | In order to generate the probabilities from :math:`H_i^{(y)}`, each unlabeled example is inserted into each of the components of the network, in 226 | which case we are basically testing it on every class of our data. Then, several metrics are calculated on the network, before and after the 227 | insertion of this new data point. 228 | 229 | If this insertion changes those metrics too much, this is an evidence that maybe it does not belong in that class. On the other hand, if the 230 | metrics remain almost constant, it means that this new example does not change the structure of the network and thus may be part of that class. 231 | 232 | The user can define which metrics will be computed and what is the weight to be used on each metric by means of the list of parameters :math:`\alpha`). 233 | Notice that :math:`\alpha` must sum-up to 1. 234 | 235 | The list of available functions can be seem on the documentation of the NetworkMetrics (colocar link aqui). More information about this method can be 236 | found on: Silva, T.C., Zhao, L.: Network-based high level data classification. IEEE Trans. Neural Netw. Learn. Syst. 23(6), 954–970 (2012) 237 | 238 | Unsupervised Methods 239 | ==================== 240 | 241 | Unsupervised methods, usually called community detection methods on the Complex Network area, are algorithms that try to find patterns on 242 | the data so to group up data samples. 243 | 244 | Stochastic Particle Competition 245 | ------------------------------- 246 | 247 | The Stochastic Particle Competition algorithm lends some of the concepts of the genetic algorithms optimization to find community structure 248 | on complex networks. Given a set of ``K`` initial particles, put at random on the nodes of the network, they will compete against each other 249 | for the dominance of the network nodes. It is expected that after some time this algorithm will converge to a state where each community is 250 | dominated by one of the initial ``K`` particles. 251 | 252 | At each timestep, each particle chooses the the next node to visit by combinating a preferential movement matrix, where it has a greater 253 | probability of visiting previously visited nodes, and a exploration matrix, which will send this particle over to new areas in order to try 254 | to dominate them. 255 | 256 | The :math:`\lambda` parameter is responsible to define how much exploration versus exploitation each of the particles will do during the fitting process. 257 | 258 | Each time one node is visited by a particle, its dominance on the node increases. The same way, if a rival particle visits the same node, then 259 | the dominance level will be reduced. On the same way, every time a particle visits a dominated node, it regains energy, while if it visits a 260 | node dominated by other particle, it loses energy. If a particle runs out of energy, then it is transported back to its dominance region. 261 | 262 | The minimal and maximal energy of each particle is defined by the :math:`\omega_{min}` and :math:`\omega_{max}` parameters respectively. 263 | 264 | The convergence of the system happens when the difference between the dominance levels on two sequential steps is smaller than a user-defined 265 | parameter :math:`\epsilon`. 266 | 267 | More information about this method can be found on: T. C. Silva and L. Zhao, "Stochastic Competitive Learning in Complex 268 | Networks," in IEEE Transactions on Neural Networks and Learning Systems, vol. 23, no. 3, pp. 385-398, March 2012, doi: 10.1109/TNNLS.2011.2181866. 269 | 270 | Semi Supervised Methods 271 | ======================= 272 | 273 | These are methods designed to work with large amounts of unlabeled data given a small amount of labeled data. Usually this kind of method 274 | works towards spreading labels from labeled examples to unlabeled examples. 275 | 276 | Modularity Label Propagation 277 | ---------------------------- 278 | 279 | This algorithm is based on the greedy modularity maximization community detection algorithm. In order to use it, with need a dataset with ``L`` 280 | labeled nodes and several unlabeled nodes. At each step of this algorithm, two communities (nodes) are merged to the same class following some 281 | restrictions, trying to keep the modularity increment the maximum as possible. 282 | 283 | The criteria for the merge at each step is as follows: 284 | 285 | - If both nodes already have a class and are from different classes, the merge does not occour 286 | - If none of the nodes have a class, the merge does not occour 287 | - If the nodes have the same class, the merge occours 288 | - If one of the nodes have a class and the other doesn't, the merge occours 289 | 290 | If we weren't able to merge the pair of nodes with greatest value on the modularity increment matrix :math:`\Delta Q`, we select the next 291 | greatest value and so on until a valid merge takes place. 292 | 293 | The algorithm runs until there is no node without a class remaining. The original paper of this algorithm states a network reduction technique to 294 | improve the algorithms performance. In order to use it, the reduction_factor list parameter should be set during 295 | the class instantiation. 296 | 297 | This parameter will define, for each class, the percentage of the network reduction. The basic working of the method is: 298 | 299 | - Select two nodes from the same class at random 300 | - Remove the first one 301 | - Redirects the edges from the first node to the second 302 | - Repeat until the desired percentage of the nodes are removed 303 | 304 | More information about this method can be found on: Silva, Thiago & Zhao, Liang. (2012). Semi-Supervised Learning Guided 305 | by the Modularity Measure in Complex Networks. Neurocomputing. 78. 30-37. 10.1016/j.neucom.2011.04.042. 306 | 307 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==20.3.0 2 | coverage==5.5 3 | decorator==4.4.2 4 | GitPython==3.1.20 5 | giotto-tda==0.5.1 6 | importlib-metadata==3.7.2 7 | iniconfig==1.1.1 8 | joblib==1.1.1 9 | networkx==2.5 10 | numpy==1.19.5 11 | packaging==20.9 12 | pandas==1.1.5 13 | pluggy==0.13.1 14 | py==1.10.0 15 | pydata-sphinx-theme==0.6.3 16 | pyparsing==2.4.7 17 | pytest==6.2.2 18 | python-dateutil==2.8.1 19 | pytz==2021.1 20 | scikit-learn==0.24.1 21 | scipy==1.5.4 22 | six==1.15.0 23 | threadpoolctl==2.1.0 24 | toml==0.10.2 25 | tqdm==4.59.0 26 | typing-extensions==3.7.4.3 27 | zipp==3.4.1 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from setuptools import setup, find_packages 3 | 4 | HERE = pathlib.Path(__file__).parent 5 | 6 | README = (HERE / "README.md").read_text() 7 | 8 | setup( 9 | name="scikit-net", 10 | version="0.0.2", 11 | description="Machine Learning in Complex Networks", 12 | long_description=README, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/TNanukem/scikit-net", 15 | download_url='https://github.com/TNanukem/scikit-net/archive/refs/tags/v0.0.1.tar.gz', # noqa: E501 16 | keywords=['Machine Learning', 'Complex Networks'], 17 | author="Tiago Toledo Jr", 18 | author_email="tiago.nanu@gmail.com", 19 | license="MIT", 20 | classifiers=[ 21 | "License :: OSI Approved :: MIT License", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.8", 24 | ], 25 | packages=find_packages(exclude=("tests",)), 26 | include_package_data=True, 27 | install_requires=['attrs', 28 | 'decorator', 29 | 'importlib-metadata', 30 | 'iniconfig' 31 | 'giotto-tda', 32 | 'joblib', 33 | 'networkx', 34 | 'numpy', 35 | 'packaging', 36 | 'pandas', 37 | 'pluggy', 38 | 'py', 39 | 'pyparsing', 40 | 'pytest', 41 | 'python-dateutil', 42 | 'pytz', 43 | 'scikit-learn', 44 | 'scipy', 45 | 'six', 46 | 'sklearn', 47 | 'threadpoolctl', 48 | 'toml', 49 | 'tqdm', 50 | 'typing-extensions', 51 | 'zipp'], 52 | ) 53 | -------------------------------------------------------------------------------- /sknet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/__init__.py -------------------------------------------------------------------------------- /sknet/network_construction/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .dataset_constructors import KNNConstructor 3 | from .dataset_constructors import EpsilonRadiusConstructor 4 | from .dataset_constructors import KNNEpislonRadiusConstructor 5 | from .dataset_constructors import SingleLinkageHeuristicConstructor 6 | from .time_series_constructors import UnivariateCorrelationConstructor 7 | from .time_series_constructors import MultivariateCorrelationConstructor 8 | from .time_series_constructors import UnivariateRecurrenceNetworkConstructor 9 | -------------------------------------------------------------------------------- /sknet/network_construction/dataset_constructors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import networkx as nx 4 | 5 | from abc import ABCMeta, abstractmethod 6 | from sklearn.metrics import pairwise_distances 7 | from sklearn.neighbors import KDTree, BallTree 8 | 9 | 10 | class BaseConstructor(metaclass=ABCMeta): 11 | """ 12 | This class allows to transform a dataset into a networkx 13 | complex network by using the several different transformation 14 | methods 15 | 16 | Do not use this abstract class, use the derived classes instead 17 | 18 | """ 19 | 20 | def __init__(self, k, epsilon, metric, leaf_size=40, sep_comp=True): 21 | self.k = k 22 | self.epsilon = epsilon 23 | self.metric = metric 24 | self.leaf_size = leaf_size 25 | self.sep_comp = sep_comp 26 | self.X_ = None 27 | self.y_ = None 28 | 29 | @abstractmethod 30 | def add_nodes(self, X, y=None): 31 | """Add nodes to an existing network inside a fitted transformer 32 | object 33 | 34 | Parameters 35 | ---------- 36 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 37 | The input data. 38 | y : {ndarray, pandas series}, shape (n_samples,) or 39 | (n_samples, n_classes), default=None 40 | The true classes. 41 | 42 | Notes 43 | ----- 44 | If y is set, then the class of each node will be inserted into 45 | the node information under the label 'class'. If sep_comp is true 46 | then each class will be a separated component of the network. 47 | 48 | If by some reason the transformer is not fitted, this will generate 49 | an error. 50 | 51 | After the new nodes are added, one should use the get_network 52 | function to retrieve the network with the new nodes. 53 | 54 | """ 55 | 56 | def fit(self, X, y=None): 57 | """Fit the constructor creating the NetworkX graph 58 | 59 | Parameters 60 | ---------- 61 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 62 | The input data. 63 | y : {ndarray, pandas series}, shape (n_samples,) or 64 | (n_samples, n_classes), default=None 65 | The true classes. 66 | 67 | Notes 68 | ----- 69 | If y is set, then the class of each node will be inserted into 70 | the node information under the label 'class' and each class will 71 | be a separated component of the network 72 | 73 | """ 74 | 75 | self.G_ = nx.Graph() 76 | self.node_count_ = 0 77 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 78 | X = np.array(X) 79 | 80 | self.X_ = X 81 | self.y_ = y 82 | self.fitting = True 83 | self.add_nodes(self.X_, self.y_) 84 | self.fitting = False 85 | 86 | return self 87 | 88 | def transform(self): 89 | """Returns the networkX graph after the constructor is fitted 90 | 91 | Returns 92 | ----- 93 | G : NetworkX graph 94 | The network version of the inserted tabular data 95 | """ 96 | try: 97 | return self.G_ 98 | except AttributeError: 99 | raise Exception("Transformer is not fitted") 100 | 101 | def fit_transform(self, X, y=None): 102 | """Fit the constructor creating the NetworkX graph and returns the graph 103 | 104 | Parameters 105 | ---------- 106 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 107 | The input data. 108 | y : {ndarray, pandas series}, shape (n_samples,) or 109 | (n_samples, n_classes), default=None 110 | The predicted classes. 111 | 112 | Returns 113 | ------- 114 | G : NetworkX graph 115 | The network version of the inserted tabular data 116 | 117 | Notes 118 | ----- 119 | If y is set, then the class of each node will be inserted 120 | into the node information under the label 'class' 121 | 122 | """ 123 | self.fit(X, y) 124 | return self.G_ 125 | 126 | def get_network(self): 127 | """Retrieves the network generated in the constructor class 128 | """ 129 | return self.G_ 130 | 131 | def set_sep_comp(self, sep_comp): 132 | self.sep_comp = sep_comp 133 | 134 | def set_params(self, **parameters): 135 | for parameter, value in parameters.items(): 136 | setattr(self, parameter, value) 137 | return self 138 | 139 | def get_params(self, deep=True): 140 | return {"k": self.k, "epsilon": self.epsilon, 141 | "metric": self.metric, "leaf_size": self.leaf_size, 142 | "sep_comp": self.sep_comp} 143 | 144 | 145 | class KNNConstructor(BaseConstructor): 146 | """ 147 | Using a k-nearest neighbors algorithm, defines an 148 | networkx complex network 149 | 150 | Parameters 151 | ---------- 152 | k : int, default=5 153 | The number of neighbors to be connected to any given node 154 | of the network. 155 | metric : str or DistanceMetric object, default='minkowski' 156 | The distance metric to use for the neighborhood tree. Refer 157 | to the DistanceMetric class documentation from sklearn for a list 158 | of available metrics 159 | leaf_size : int, default=40 160 | Number of points to switch to brute-force search of neighbors 161 | sep_comp : boolean, default=True 162 | If True and if y is not None, then each class of the dataset 163 | will be a separated component, so nodes from one class will only 164 | be connected to those of the same class. If False then this 165 | restriction is not applied. 166 | 167 | Attributes 168 | ---------- 169 | k : int 170 | The k being used to construct the network 171 | metric : str or DistanceMetric object 172 | The distance metric being used 173 | leaf_size : int 174 | The leaf_size being used 175 | G : NetworkX graph 176 | The network version of the inserted tabular data 177 | 178 | Examples 179 | -------- 180 | >>> from sklearn.datasets import load_iris 181 | >>> from dataset_constructors import KNNConstructor 182 | >>> X, y = load_iris(return_X_y = True) 183 | >>> knn_c = KNNConstructor(k=3) 184 | >>> knn_c.fit(X, y) 185 | >>> G = knn_c.transform() 186 | >>> # print(len(G.nodes)) 187 | 150 188 | 189 | Notes 190 | ----- 191 | 192 | References 193 | ---------- 194 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in 195 | Complex Networks. 10.1007/978-3-319-17290-3. 196 | 197 | """ 198 | def __init__(self, k=5, metric='minkowski', leaf_size=40, sep_comp=True): 199 | super().__init__(k, None, metric, leaf_size, sep_comp) 200 | 201 | def add_nodes(self, X, y=None): 202 | """Add nodes to an existing network inside a fitted transformer 203 | object 204 | 205 | Parameters 206 | ---------- 207 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 208 | The input data. 209 | y : {ndarray, pandas series}, shape (n_samples,) or 210 | (n_samples, n_classes), default=None 211 | The true classes. 212 | 213 | Notes 214 | ----- 215 | If y is set, then the class of each node will be inserted into 216 | the node information under the label 'class'. If sep_comp is true 217 | then each class will be a separated component of the network. 218 | 219 | If by some reason the transformer is not fitted, this will generate 220 | an error. 221 | 222 | After the new nodes are added, one should use the get_network 223 | function to retrieve the network with the new nodes. 224 | 225 | """ 226 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 227 | X = np.array(X) 228 | 229 | # Each class will be a separated component 230 | if self.y_ is None: 231 | classes = [0] 232 | else: 233 | classes = np.unique(self.y_) 234 | 235 | for class_ in classes: 236 | 237 | if self.y_ is None: 238 | nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)] # noqa: E501 239 | X_ = X 240 | self.tree_ = _tree_selector(self.X_, self.leaf_size) 241 | label_ind = [i for i in range(len(X))] 242 | 243 | else: 244 | if self.sep_comp: 245 | # Verifies if someone to be added is from class 246 | X_component = np.take(X, np.where(y == class_), axis=0)[0] 247 | if len(X_component) == 0: 248 | continue 249 | 250 | # Calculating the distances for guys on the same component 251 | if self.fitting: 252 | total_y = self.y_ 253 | total_X = self.X_ 254 | else: 255 | total_y = np.append(self.y_, y) 256 | total_X = np.vstack((self.X_, X)) 257 | label_ind = np.where(total_y == class_) 258 | 259 | X_ = np.take(total_X, label_ind, axis=0)[0] 260 | nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)] # noqa: E501 261 | 262 | label_ind = label_ind[0].tolist() 263 | 264 | else: 265 | X_ = X 266 | label_ind = [i for i in range(len(X))] 267 | nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)] # noqa: E501 268 | 269 | self.tree_ = _tree_selector(X_, self.leaf_size) 270 | 271 | neighbors = [self.tree_.query(x.reshape(1, -1), k=self.k+1, return_distance=True) for x in X_] # noqa: E501 272 | distances_aux = [neigh[0] for neigh in neighbors] 273 | indexes_aux = [neigh[1] for neigh in neighbors] 274 | indexes = [node[0] for node in indexes_aux] 275 | distances = [node[0] for node in distances_aux] 276 | edges = [(label_ind[node[0]], label_ind[node[j]], distances[i][j]) for i, node in enumerate(indexes) for j in range(1, self.k+1)] # noqa: E501 277 | 278 | self.G_.add_nodes_from(nodes) 279 | self.G_.add_weighted_edges_from(edges) 280 | self.node_count_ += len(nodes) 281 | 282 | if self.sep_comp is False: 283 | break 284 | 285 | if not np.array_equal(self.X_, X): 286 | self.X_ = np.vstack((self.X_, X)) 287 | if self.y_ is not None: 288 | self.y_ = np.append(self.y_, y) 289 | 290 | 291 | class EpsilonRadiusConstructor(BaseConstructor): 292 | """ 293 | Using an epsilon-radius algorithm, defines an 294 | networkx complex network 295 | 296 | Parameters 297 | ---------- 298 | epsilon : float 299 | The radius to define which neighbors should be connected. 300 | metric : str or DistanceMetric object, default='minkowski' 301 | The distance metric to use for the neighborhood tree. Refer 302 | to the DistanceMetric class documentation from sklearn for a list 303 | of available metrics 304 | leaf_size : int, default=40 305 | Number of points to switch to brute-force search of neighbors 306 | sep_comp : boolean, default=True 307 | If True and if y is not None, then each class of the dataset 308 | will be a separated component, so nodes from one class will only 309 | be connected to those of the same class. If False then this 310 | restriction is not applied. 311 | 312 | Attributes 313 | ---------- 314 | epsilon : float 315 | The epsilon being used to construct the network 316 | metric : str or DistanceMetric object 317 | The distance metric being used 318 | leaf_size : int 319 | The leaf_size being used 320 | G : NetworkX graph 321 | The network version of the inserted tabular data 322 | 323 | Examples 324 | -------- 325 | >>> from sklearn.datasets import load_iris 326 | >>> from dataset_constructors import EpsilonRadiusConstructor 327 | >>> X, y = load_iris(return_X_y = True) 328 | >>> eps_c = EpsilonRadiusConstructor(epsilon=3) 329 | >>> eps_c.fit(X, y) 330 | >>> G = eps_c.transform() 331 | >>> # print(len(G.nodes)) 332 | 150 333 | 334 | Notes 335 | ----- 336 | 337 | References 338 | ---------- 339 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in 340 | Complex Networks. 10.1007/978-3-319-17290-3. 341 | 342 | """ 343 | def __init__(self, epsilon=0.1, metric='minkowski', leaf_size=40, 344 | sep_comp=True): 345 | super().__init__(None, epsilon, metric, leaf_size, sep_comp) 346 | 347 | def add_nodes(self, X, y=None): 348 | """Add nodes to an existing network inside a fitted transformer 349 | object 350 | 351 | Parameters 352 | ---------- 353 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 354 | The input data. 355 | y : {ndarray, pandas series}, shape (n_samples,) or 356 | (n_samples, n_classes), default=None 357 | The true classes. 358 | 359 | Notes 360 | ----- 361 | If y is set, then the class of each node will be inserted into 362 | the node information under the label 'class'. If sep_comp is true 363 | then each class will be a separated component of the network. 364 | 365 | If by some reason the transformer is not fitted, this will generate 366 | an error. 367 | 368 | After the new nodes are added, one should use the get_network 369 | function to retrieve the network with the new nodes. 370 | 371 | """ 372 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 373 | X = np.array(X) 374 | 375 | # Each class will be a separated component 376 | if self.y_ is None: 377 | classes = [0] 378 | else: 379 | classes = np.unique(self.y_) 380 | 381 | for class_ in classes: 382 | if self.y_ is None: 383 | nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)] # noqa: E501 384 | X_ = X 385 | self.tree_ = _tree_selector(self.X_, self.leaf_size) 386 | label_ind = [i for i in range(len(X))] 387 | 388 | else: 389 | if self.sep_comp: 390 | # Verifies if someone to be added is from class 391 | X_component = np.take(X, np.where(y == class_), axis=0)[0] 392 | if len(X_component) == 0: 393 | continue 394 | 395 | # Calculating the distances for guys on the same component 396 | if self.fitting: 397 | total_y = self.y_ 398 | total_X = self.X_ 399 | else: 400 | total_y = np.append(self.y_, y) 401 | total_X = np.vstack((self.X_, X)) 402 | label_ind = np.where(total_y == class_) 403 | 404 | X_ = np.take(total_X, label_ind, axis=0)[0] 405 | nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)] # noqa: E501 406 | 407 | label_ind = label_ind[0].tolist() 408 | 409 | else: 410 | X_ = X 411 | label_ind = [i for i in range(len(X))] 412 | nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)] # noqa: E501 413 | 414 | self.tree_ = _tree_selector(X_, self.leaf_size) 415 | 416 | neighbors = [self.tree_.query_radius(x.reshape(1, -1), r=self.epsilon, return_distance=True, sort_results=True) for x in X_] # noqa: E501 417 | 418 | indexes_aux = [neigh[0] for neigh in neighbors] 419 | distances_aux = [neigh[1] for neigh in neighbors] 420 | distances = [node[0] for node in distances_aux] 421 | indexes = [node[0] for node in indexes_aux] 422 | 423 | edges = [(label_ind[node[0]], label_ind[node[j]], distances[i][j]) for i, node in enumerate(indexes) for j in range(1, len(node))] # noqa: E501 424 | 425 | self.G_.add_nodes_from(nodes) 426 | self.G_.add_weighted_edges_from(edges) 427 | 428 | # Removing self-loops 429 | self.G_.remove_edges_from(nx.selfloop_edges(self.G_)) 430 | self.node_count_ += len(nodes) + 1 431 | 432 | if self.sep_comp is False: 433 | break 434 | 435 | if not np.array_equal(self.X_, X): 436 | self.X_ = np.vstack((self.X_, X)) 437 | if self.y_ is not None: 438 | self.y_ = np.vstack((self.y_, y)) 439 | 440 | 441 | class KNNEpislonRadiusConstructor(BaseConstructor): 442 | """ 443 | Using a k-nearest neighbors algorithm, defines an 444 | networkx complex network 445 | 446 | Parameters 447 | ---------- 448 | k : int, default=5 449 | The number of neighbors to be connected to any given node 450 | of the network. 451 | epsilon : float, default=0.1 452 | The radius to define which neighbors should be connected. 453 | metric : str or DistanceMetric object, default='minkowski' 454 | The distance metric to use for the neighborhood tree. Refer 455 | to the DistanceMetric class documentation from sklearn for a list 456 | of available metrics 457 | leaf_size : int, default=40 458 | Number of points to switch to brute-force search of neighbors 459 | sep_comp : boolean, default=True 460 | If True and if y is not None, then each class of the dataset 461 | will be a separated component, so nodes from one class will only 462 | be connected to those of the same class. If False then this 463 | restriction is not applied. 464 | 465 | Attributes 466 | ---------- 467 | k : int 468 | The k being used to construct the network 469 | epsilon : float 470 | The epsilon being used to construct the network 471 | metric : str or DistanceMetric object 472 | The distance metric being used 473 | leaf_size : int 474 | The leaf_size being used 475 | G : NetworkX graph 476 | The network version of the inserted tabular data 477 | 478 | Examples 479 | -------- 480 | >>> from sklearn.datasets import load_iris 481 | >>> from dataset_constructors import KNNEpislonRadiusConstructor 482 | >>> X, y = load_iris(return_X_y = True) 483 | >>> ke_c = KNNEpislonRadiusConstructor(k=3, epsilon=0.3) 484 | >>> ke_c.fit(X, y) 485 | >>> G = ke_c.transform() 486 | >>> # print(len(G.nodes)) 487 | 150 488 | 489 | Notes 490 | ----- 491 | The KNN is used for sparse regions while the Epsilon-Radius is used for 492 | dense regions. This approach hopes to overcome the limitations of the 493 | individual components, allowing for a better network construction. The 494 | equation that runs this method is defined as: 495 | 496 | ``neighbor(v_i) = epsilon-radius(v_i) if |epsilon-radius(v_i)| > 497 | k else knn(v_i)`` 498 | 499 | References 500 | ---------- 501 | Silva, T.C.; Liang Zhao (2012). Network-Based High Level Data 502 | Classification., 23(6), –. doi:10.1109/tnnls.2012.2195027 503 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex Networks. 504 | 10.1007/978-3-319-17290-3. 505 | 506 | """ 507 | def __init__(self, k=5, epsilon=0.1, metric='minkowski', leaf_size=40, 508 | sep_comp=True): 509 | super().__init__(k, epsilon, metric, leaf_size, sep_comp) 510 | 511 | def add_nodes(self, X, y=None): 512 | """Add nodes to an existing network inside a fitted transformer 513 | object 514 | 515 | Parameters 516 | ---------- 517 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 518 | The input data. 519 | y : {ndarray, pandas series}, shape (n_samples,) or 520 | (n_samples, n_classes), default=None 521 | The true classes. 522 | 523 | Notes 524 | ----- 525 | If y is set, then the class of each node will be inserted into 526 | the node information under the label 'class'. If sep_comp is true 527 | then each class will be a separated component of the network. 528 | 529 | If by some reason the transformer is not fitted, this will generate 530 | an error. 531 | 532 | After the new nodes are added, one should use the get_network 533 | function to retrieve the network with the new nodes. 534 | 535 | """ 536 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 537 | X = np.array(X) 538 | 539 | # Each class will be a separated component 540 | if self.y_ is None: 541 | classes = [0] 542 | else: 543 | classes = np.unique(self.y_) 544 | 545 | for class_ in classes: 546 | 547 | if self.y_ is None: 548 | nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)] # noqa: E501 549 | X_ = X 550 | self.tree_ = _tree_selector(self.X_, self.leaf_size) 551 | label_ind = [i for i in range(len(X))] 552 | 553 | else: 554 | if self.sep_comp: 555 | # Verifies if someone to be added is from class 556 | X_component = np.take(X, np.where(y == class_), axis=0)[0] 557 | if len(X_component) == 0: 558 | continue 559 | 560 | # Calculating the distances for guys on the same component 561 | if self.fitting: 562 | total_y = self.y_ 563 | total_X = self.X_ 564 | else: 565 | total_y = np.append(self.y_, y) 566 | total_X = np.vstack((self.X_, X)) 567 | label_ind = np.where(total_y == class_) 568 | 569 | X_ = np.take(total_X, label_ind, axis=0)[0] 570 | nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)] # noqa: E501 571 | 572 | label_ind = label_ind[0].tolist() 573 | 574 | else: 575 | X_ = X 576 | label_ind = [i for i in range(len(X))] 577 | nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)] # noqa: E501 578 | 579 | self.tree_ = _tree_selector(X_, self.leaf_size) 580 | 581 | radius_neighbors = [self.tree_.query_radius(x.reshape(1, -1), r=self.epsilon, return_distance=True, sort_results=True) for x in X_] # noqa: E501 582 | k_neighbors = [self.tree_.query(x.reshape(1, -1), k=self.k+1, return_distance=True) for x in X_] # noqa: E501 583 | 584 | # Auxiliar lists 585 | indexes_radius_aux = [neigh[0] for neigh in radius_neighbors] 586 | distances_radius_aux = [neigh[1] for neigh in radius_neighbors] # noqa: E501 587 | distances_radius = [node[0] for node in distances_radius_aux] 588 | indexes_radius = [node[0] for node in indexes_radius_aux] 589 | 590 | distances_k_aux = [neigh[0] for neigh in k_neighbors] 591 | indexes_k_aux = [neigh[1] for neigh in k_neighbors] # noqa: E501 592 | indexes_k = [node[0] for node in indexes_k_aux] 593 | distances_k = [node[0] for node in distances_k_aux] 594 | 595 | # Nodes with neighbors inside radius greater than k 596 | greater_than_k_indices = [index for index, neighbors in enumerate(indexes_radius) if len(neighbors) - 1 > self.k] # noqa: E501 597 | 598 | final_k = [neighbors for index, neighbors in enumerate(indexes_k) if index not in greater_than_k_indices] # noqa: E501 599 | final_radius = [neighbors for index, neighbors in enumerate(indexes_radius) if index in greater_than_k_indices] # noqa: E501 600 | final_k_distances = [dist for index, dist in enumerate(distances_k) if index not in greater_than_k_indices] # noqa: E501 601 | final_radius_distances = [distance for index, distance in enumerate(distances_radius) if index in greater_than_k_indices] # noqa: E501 602 | 603 | assert len(final_k) + len(final_radius) == len(nodes) 604 | 605 | edges_radius = [(label_ind[node[0]], label_ind[node[j]], final_radius_distances[i][j]) for i, node in enumerate(final_radius) for j in range(1, len(node))] # noqa: E501 606 | edges_k = [(label_ind[node[0]], label_ind[node[j]], final_k_distances[i][j]) for i, node in enumerate(final_k) for j in range(1, self.k+1)] # noqa: E501 607 | 608 | self.G_ = nx.Graph() 609 | self.G_.add_nodes_from(nodes) 610 | self.G_.add_weighted_edges_from(edges_radius) 611 | self.G_.add_weighted_edges_from(edges_k) 612 | 613 | # Removing self-loops 614 | self.G_.remove_edges_from(nx.selfloop_edges(self.G_)) 615 | self.node_count_ += len(nodes) + 1 616 | 617 | if self.sep_comp is False: 618 | break 619 | 620 | if not np.array_equal(self.X_, X): 621 | self.X_ = np.vstack((self.X_, X)) 622 | if self.y_ is not None: 623 | self.y_ = np.vstack((self.y_, y)) 624 | 625 | 626 | def _tree_selector(X, leaf_size=40, metric='minkowski'): 627 | """ 628 | Selects the better tree approach for given data 629 | 630 | Parameters 631 | ---------- 632 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 633 | The input data. 634 | leaf_size : int, default=40 635 | Number of points to switch to brute-force search of neighbors 636 | metric : str or DistanceMetric object, default='minkowski' 637 | The distance metric to use for the neighborhood tree. Refer 638 | to the DistanceMetric class documentation from sklearn for a list 639 | of available metrics 640 | 641 | Returns 642 | ------- 643 | tree : {KDTree or BallTree} 644 | The best tree to be used to find neighbors given data 645 | """ 646 | 647 | # Low dimensional spaces are fit to KD-Tree 648 | if X.shape[1] < 30: 649 | return KDTree(X, leaf_size=leaf_size, metric=metric) 650 | 651 | # High dimensional spaces are fit to Ball Tree 652 | if X.shape[1] >= 30: 653 | return BallTree(X, leaf_size=leaf_size, metric=metric) 654 | 655 | 656 | class SingleLinkageHeuristicConstructor(BaseConstructor): 657 | """ 658 | Use Single Linkage Heuristics to generate a complex network from 659 | tabular data 660 | 661 | Parameters 662 | ---------- 663 | k : int, default=3 664 | The number of closests points between two grops to be considered 665 | to create an edge. 666 | lambda_ : positive float, default=0.1 667 | Multiplying factor on the average dissimilarity on the groups to 668 | define the critical distance 669 | sep_comp : boolean, default=True 670 | If True and if y is not None, then each class of the dataset 671 | will be a separated component, so nodes from one class will only 672 | be connected to those of the same class. If False then this 673 | restriction is not applied. 674 | metric : str or DistanceMetric object, default='euclidean' 675 | The distance metric to use for the neighborhood tree. Refer 676 | to the DistanceMetric class documentation from sklearn for a list 677 | of available metrics 678 | n_jobs : int, default=None 679 | The number of parallel jobs to run for neighbors search. 680 | None means 1 unless in a joblib.parallel_backend context and -1 means 681 | using all processors. 682 | 683 | Examples 684 | -------- 685 | >>> from sklearn.datasets import load_iris 686 | >>> from dataset_constructors import SingleLinkageHeuristicConstructor 687 | >>> X, y = load_iris(return_X_y = True) 688 | >>> ch = SingleLinkageHeuristicConstructor(k=3, epsilon=0.3) 689 | >>> ch.fit(X, y) 690 | >>> G = ke_c.transform() 691 | >>> # print(len(G.nodes)) 692 | 150 693 | 694 | References 695 | ---------- 696 | Cupertino, T.H., Huertas, J., & Zhao, L. (2013). Data clustering using 697 | controlled consensus in complex networks. Neurocomputing, 118, 132-140. 698 | 699 | """ 700 | def __init__(self, k=3, lambda_=0.1, sep_comp=False, 701 | metric='euclidean', n_jobs=None): 702 | self.k = k 703 | self.lambda_ = lambda_ 704 | self.sep_comp = sep_comp 705 | self.metric = metric 706 | self.n_jobs = n_jobs 707 | 708 | def get_params(self, deep=True): 709 | return {'k': self.k, 'lambda_': self.lambda_, 710 | 'sep_comp': self.sep_comp, 711 | 'metric': self.metric, 'n_jobs': self.n_jobs} 712 | 713 | def add_nodes(self, X, y=None): 714 | """Add nodes to an existing network inside a fitted transformer 715 | object 716 | 717 | Parameters 718 | ---------- 719 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 720 | The input data. 721 | y : {ndarray, pandas series}, shape (n_samples,) or 722 | (n_samples, n_classes), default=None 723 | The true classes. 724 | 725 | Notes 726 | ----- 727 | If y is set, then the class of each node will be inserted into 728 | the node information under the label 'class'. If sep_comp is true 729 | then each class will be a separated component of the network. 730 | 731 | If by some reason the transformer is not fitted, this will generate 732 | an error. 733 | 734 | After the new nodes are added, one should use the get_network 735 | function to retrieve the network with the new nodes. 736 | 737 | """ 738 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 739 | X = np.array(X) 740 | 741 | if self.lambda_ < 0: 742 | raise Exception('lambda_ parameter should be positive') 743 | 744 | if self.fitting: 745 | self.G_ = nx.Graph() 746 | self.groups_ = np.array([i for i in range(len(X))]) 747 | else: 748 | self.groups_.extend( 749 | [i + np.max(np.unique(self.groups)) for i in range(len(X))] 750 | ) 751 | X = np.vstack((self.X_, X)) 752 | 753 | if y is None and self.sep_comp is True: 754 | raise Exception( 755 | """y parameter is required for separated construction, 756 | set sep_comp to False""" 757 | ) 758 | 759 | number_of_groups = len(self.groups_) 760 | 761 | X_dist = pairwise_distances(X, metric=self.metric, 762 | n_jobs=self.n_jobs) 763 | 764 | while number_of_groups > 1: 765 | if number_of_groups == len(X): 766 | dist = X_dist 767 | 768 | else: 769 | dist = self._generate_new_X_dist(X_dist) 770 | 771 | for i in range(dist.shape[0]): 772 | dist[i, i] = np.inf 773 | 774 | # Finds the two closest groups and get their values 775 | i, j = np.unravel_index(dist.argmin(), dist.shape) 776 | 777 | # If the two closest groups are the same, then find other pair 778 | if self.groups_[i] == self.groups_[j]: 779 | while self.groups_[i] == self.groups_[j]: 780 | dist[i][j] = np.inf 781 | i, j = np.unravel_index(dist.argmin(), dist.shape) 782 | 783 | # Finds the nodes that are on the group i and j 784 | g1 = np.where(self.groups_ == self.groups_[i])[0] 785 | g1_idx = i 786 | 787 | g2 = np.where(self.groups_ == self.groups_[j])[0] 788 | g2_idx = j 789 | 790 | # Finds the distance between all members of the group 791 | g1_dists = pairwise_distances(X[g1]) 792 | 793 | g2_dists = pairwise_distances(X[g2]) 794 | 795 | # Finds the average intra-cluster dissimilarity 796 | d1 = np.mean(g1_dists) 797 | d2 = np.mean(g2_dists) 798 | 799 | # Select the k most similar nodes between G1 and G2 800 | group_distance = pairwise_distances(X[g1], X[g2]) 801 | candidates = [] 802 | 803 | if group_distance.shape[0] < self.k: 804 | k = group_distance.shape[0] 805 | else: 806 | k = self.k 807 | 808 | for i in range(k): 809 | i, j = np.unravel_index( 810 | group_distance.argmin(), group_distance.shape 811 | ) 812 | candidates.append((g1[i], g2[j])) 813 | group_distance[i, j] = np.inf 814 | 815 | # Generate edges 816 | dc = self.lambda_ * max(d1, d2) 817 | 818 | for u, v in candidates: 819 | if self.sep_comp is True and y[u] != y[v]: 820 | continue 821 | if dist[u, v] <= dc: 822 | self.G_.add_edge(u, v, weight=dist[u, v]) 823 | 824 | # Merge groups 825 | self.groups_[ 826 | self.groups_ == self.groups_[g2_idx]] = self.groups_[g1_idx] 827 | 828 | # Update number of groups 829 | number_of_groups = len(np.unique(self.groups_)) 830 | 831 | def _generate_new_X_dist(self, X_dist): 832 | number_of_groups = len(self.groups_) 833 | 834 | new_X_dist = np.zeros((number_of_groups, number_of_groups)) 835 | 836 | # Find the distance between the two closest nodes for each group pair 837 | for i in np.unique(self.groups_): 838 | for j in np.unique(self.groups_): 839 | if i != j: 840 | g1_nodes = np.where(self.groups_ == i)[0] 841 | g2_nodes = np.where(self.groups_ == j)[0] 842 | 843 | new_X_dist[i, j] = np.min(X_dist[g1_nodes, :][:, g2_nodes]) 844 | 845 | return new_X_dist 846 | -------------------------------------------------------------------------------- /sknet/network_construction/general_constructors.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | from abc import ABCMeta, abstractmethod 4 | from sknet.utils import NetworkTypesHandler 5 | 6 | 7 | class GeneralConstructor(metaclass=ABCMeta): 8 | def __init__(self, net_type): 9 | self.net_type = net_type 10 | self.network_type_handler = NetworkTypesHandler() 11 | 12 | def set_params(self, **parameters): 13 | for parameter, value in parameters.items(): 14 | setattr(self, parameter, value) 15 | return self 16 | 17 | def get_params(self, deep=True): 18 | return {"net_type": self.net_type} 19 | 20 | @abstractmethod 21 | def fit(self, X, y=None): 22 | pass 23 | 24 | def transform(self): 25 | """Returns the networkX graph after the constructor is fitted 26 | 27 | Returns 28 | ----- 29 | G : NetworkX graph 30 | The network version of the inserted data 31 | """ 32 | try: 33 | return self.G_ 34 | except AttributeError: 35 | raise Exception("Transformer is not fitted") 36 | 37 | def fit_transform(self, X, y=None): 38 | self.fit(X) 39 | return self.G_ 40 | 41 | def get_network(self): 42 | """Retrieves the network generated in the constructor class 43 | """ 44 | return self.G_ 45 | 46 | 47 | class EdgeListConstructor(): 48 | def __init__(self, net_type='graph'): 49 | super().__init__(net_type) 50 | 51 | def fit(self, X, y=None): 52 | network_type = self.network_type_handler.get_net(self.net_type) 53 | self.G_ = nx.read_edgelist(X) 54 | 55 | self.G_ = network_type(self.G_) 56 | return self 57 | 58 | 59 | class AdjacencyListConstructor(): 60 | def __init__(self, net_type='graph'): 61 | super().__init__(net_type) 62 | 63 | def fit(self, X, y=None): 64 | network_type = self.network_type_handler.get_net(self.net_type) 65 | self.G_ = nx.read_adjlist(X) 66 | 67 | self.G_ = network_type(self.G_) 68 | return self 69 | 70 | 71 | class YAMLConstructor(): 72 | def __init__(self, net_type='graph'): 73 | super().__init__(net_type) 74 | 75 | def fit(self, X, y=None): 76 | network_type = self.network_type_handler.get_net(self.net_type) 77 | self.G_ = nx.read_yaml(X) 78 | 79 | self.G_ = network_type(self.G_) 80 | return self 81 | 82 | 83 | class PajekConstructor(): 84 | def __init__(self, path, net_type='graph'): 85 | super().__init__(net_type) 86 | 87 | def fit(self, X, y=None): 88 | network_type = self.network_type_handler.get_net(self.net_type) 89 | self.G_ = nx.read_pajek(X) 90 | 91 | self.G_ = network_type(self.G_) 92 | return self 93 | -------------------------------------------------------------------------------- /sknet/network_construction/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/network_construction/tests/__init__.py -------------------------------------------------------------------------------- /sknet/network_construction/tests/test_network_construction.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | 4 | from sknet.network_construction import dataset_constructors 5 | from sknet.network_construction import time_series_constructors 6 | 7 | 8 | @pytest.fixture 9 | def X_y_generator(): 10 | 11 | X = pd.DataFrame([ 12 | (-2.24, -1.19), 13 | (-3.17, -0.67), 14 | (1.92, 0.57), 15 | (1.6, 1.97), 16 | (3.32, 1.51), 17 | (1.12, 1.21), 18 | (-1.32, -2.39), 19 | (-2.88, -1.83), 20 | (-2.56, 4.01), 21 | (-3.36, 3.25), 22 | (-5.64, 2.57), 23 | (-4.14, 2.85), 24 | (-3.04, 2.15)]) 25 | 26 | y = pd.Series([0, 0, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2]) 27 | 28 | return X, y 29 | 30 | 31 | @pytest.fixture 32 | def X_time_series_generator(): 33 | 34 | X_uni = pd.DataFrame([-5, -3, 2, 4, -5, 1, 4, 6, 7, -3, 3, 2]) 35 | X_multi = pd.DataFrame([ 36 | (-5, 4, 3), 37 | (-3, 8, 4), 38 | (2, -5, 5), 39 | (3, 2, 6), 40 | (4, 3, 4), 41 | (-5, 4, 7), 42 | (1, 6, 7), 43 | (4, -2, -3), 44 | (6, 5, 8), 45 | (7, -4, 4), 46 | (-3, -4, 6), 47 | (3, 4, -6), 48 | (2, 2, 4) 49 | ]) 50 | 51 | return X_uni, X_multi 52 | 53 | 54 | def test_knn_fit(X_y_generator): 55 | 56 | knn = dataset_constructors.KNNConstructor(k=3, sep_comp=False) 57 | 58 | with pytest.raises(Exception): 59 | knn.transform() 60 | 61 | knn.fit(X_y_generator[0], X_y_generator[1]) 62 | 63 | G = knn.transform() 64 | 65 | expected_nodes = [i for i in range(13)] 66 | assert list(G.nodes) == expected_nodes 67 | 68 | expected_edges = [(0, 7), (0, 1), (0, 6), 69 | (1, 7), (1, 6), (2, 5), 70 | (2, 3), (2, 4), (3, 5), 71 | (3, 4), (4, 5), (6, 7), 72 | (8, 9), (8, 12), (8, 11), 73 | (9, 11), (9, 12), (9, 10), 74 | (10, 11), (10, 12), (11, 12)] 75 | assert list(G.edges) == expected_edges 76 | 77 | 78 | def test_epsilon_radius_fit(X_y_generator): 79 | 80 | eps = dataset_constructors.EpsilonRadiusConstructor(epsilon=1, 81 | sep_comp=False) 82 | 83 | with pytest.raises(Exception): 84 | eps.transform() 85 | 86 | eps.fit(X_y_generator[0], X_y_generator[1]) 87 | 88 | G = eps.transform() 89 | 90 | expected_nodes = [i for i in range(13)] 91 | assert list(G.nodes) == expected_nodes 92 | 93 | expected_edges = [(0, 7), (3, 5), (9, 11)] 94 | 95 | assert list(G.edges) == expected_edges 96 | 97 | 98 | def test_epsilon_radius_fit_true_sep_comp(X_y_generator): 99 | 100 | eps = dataset_constructors.EpsilonRadiusConstructor(epsilon=1, 101 | sep_comp=True) 102 | 103 | with pytest.raises(Exception): 104 | eps.transform() 105 | 106 | eps.fit(X_y_generator[0], X_y_generator[1]) 107 | 108 | G = eps.transform() 109 | 110 | expected_nodes = [0, 1, 2, 3, 7, 5, 6, 8, 10, 11, 12, 13, 14, 9] 111 | assert list(G.nodes) == expected_nodes 112 | 113 | expected_edges = [(0, 7), (3, 5), (11, 9)] 114 | 115 | assert list(G.edges) == expected_edges 116 | 117 | 118 | def test_clustering_heuristics_fit(X_y_generator): 119 | clustering = dataset_constructors.SingleLinkageHeuristicConstructor( 120 | sep_comp=False) 121 | 122 | with pytest.raises(Exception): 123 | clustering.transform() 124 | 125 | clustering.fit(X_y_generator[0], X_y_generator[1]) 126 | 127 | G = clustering.transform() 128 | 129 | expected_nodes = [0, 11, 2, 9, 3, 4, 5, 6, 7, 8, 10, 12] 130 | 131 | assert list(G.nodes) == expected_nodes 132 | 133 | expected_edges = [(0, 11), (0, 2), (0, 3), (0, 4), 134 | (0, 6), (0, 7), (11, 8), (11, 10), 135 | (11, 12), (2, 9), (2, 3), (2, 4), 136 | (2, 5), (9, 3), (9, 8), (9, 10), 137 | (9, 12), (3, 4), (3, 5), (4, 5), 138 | (5, 6), (5, 8), (6, 7), (8, 10), 139 | (8, 12)] 140 | 141 | assert list(G.edges) == expected_edges 142 | 143 | 144 | def test_clustering_heuristics_fit_true_sep_comp(X_y_generator): 145 | clustering = dataset_constructors.SingleLinkageHeuristicConstructor( 146 | sep_comp=True) 147 | 148 | with pytest.raises(Exception): 149 | clustering.transform() 150 | 151 | clustering.fit(X_y_generator[0], X_y_generator[1]) 152 | 153 | G = clustering.transform() 154 | 155 | expected_nodes = [2, 3, 4, 5, 0, 6, 7, 9, 8, 11, 10, 12] 156 | assert list(G.nodes) == expected_nodes 157 | 158 | expected_edges = [(2, 3), (2, 4), (2, 5), (3, 4), (3, 5), 159 | (4, 5), (0, 6), (0, 7), (6, 7), (9, 8), 160 | (9, 10), (9, 12), (8, 11), (8, 10), 161 | (8, 12), (11, 10), (11, 12)] 162 | 163 | assert list(G.edges) == expected_edges 164 | 165 | 166 | def test_knn_epsilon_fit(X_y_generator): 167 | 168 | eps_knn = dataset_constructors.KNNEpislonRadiusConstructor( 169 | k=2, epsilon=1.5, sep_comp=False) 170 | 171 | with pytest.raises(Exception): 172 | eps_knn.transform() 173 | 174 | eps_knn.fit(X_y_generator[0], X_y_generator[1]) 175 | 176 | G = eps_knn.transform() 177 | 178 | expected_nodes = [i for i in range(13)] 179 | assert list(G.nodes) == expected_nodes 180 | 181 | expected_edges = [(0, 7), (0, 1), (0, 6), 182 | (1, 7), (2, 5), (2, 3), 183 | (2, 4), (3, 5), (3, 4), 184 | (6, 7), (8, 9), (8, 12), 185 | (9, 11), (9, 12), (9, 10), 186 | (10, 11), (11, 12)] 187 | 188 | assert list(G.edges) == expected_edges 189 | 190 | 191 | def test_knn_epsilon_fit_true_sep_comp(X_y_generator): 192 | 193 | eps_knn = dataset_constructors.KNNEpislonRadiusConstructor( 194 | k=2, epsilon=1.5, sep_comp=True) 195 | 196 | with pytest.raises(Exception): 197 | eps_knn.transform() 198 | 199 | eps_knn.fit(X_y_generator[0], X_y_generator[1]) 200 | 201 | G = eps_knn.transform() 202 | 203 | expected_nodes = [10, 11, 12, 13, 14, 9, 8] 204 | assert list(G.nodes) == expected_nodes 205 | 206 | expected_edges = [(10, 11), (10, 9), (11, 9), (11, 12), (12, 9), 207 | (12, 8), (9, 8)] 208 | 209 | assert list(G.edges) == expected_edges 210 | 211 | 212 | def test_univariate_series_fit(X_time_series_generator): 213 | constructor = ( 214 | time_series_constructors.UnivariateCorrelationConstructor( 215 | 0.3, 4 216 | ) 217 | ) 218 | 219 | constructor.fit(X_time_series_generator[0]) 220 | G = constructor.transform() 221 | 222 | expected_nodes = [i for i in range(9)] 223 | assert list(G.nodes) == expected_nodes 224 | 225 | expected_edges = [(0, 0), (0, 4), (0, 5), (1, 1), 226 | (1, 6), (2, 2), (2, 7), (3, 3), 227 | (3, 8), (4, 4), (4, 5), (5, 5), 228 | (6, 6), (7, 7), (8, 8)] 229 | 230 | assert list(G.edges) == expected_edges 231 | 232 | G = constructor.fit_transform(X_time_series_generator[0]) 233 | assert list(G.nodes) == expected_nodes 234 | assert list(G.edges) == expected_edges 235 | 236 | 237 | def test_univariate_recurrence_fit(X_time_series_generator): 238 | constructor = ( 239 | time_series_constructors.UnivariateRecurrenceNetworkConstructor( 240 | 10 241 | ) 242 | ) 243 | 244 | constructor.fit(X_time_series_generator[0]) 245 | G = constructor.transform() 246 | 247 | expected_nodes = [i for i in range(10)] 248 | assert list(G.nodes) == expected_nodes 249 | 250 | expected_edges = [(0, 1), (0, 3), (0, 4), (0, 9), 251 | (1, 4), (1, 5), (1, 6), (1, 9), 252 | (2, 7), (2, 9), (3, 8), (4, 5), 253 | (4, 9), (5, 6), (5, 8), (5, 9), 254 | (6, 9)] 255 | 256 | assert list(G.edges) == expected_edges 257 | 258 | G = constructor.fit_transform(X_time_series_generator[0]) 259 | assert list(G.nodes) == expected_nodes 260 | assert list(G.edges) == expected_edges 261 | 262 | 263 | def test_multivariate_series_fit(X_time_series_generator): 264 | constructor = ( 265 | time_series_constructors.MultivariateCorrelationConstructor( 266 | 0.1 267 | ) 268 | ) 269 | 270 | constructor.fit(X_time_series_generator[1]) 271 | G = constructor.transform() 272 | 273 | expected_nodes = [i for i in range(3)] 274 | assert list(G.nodes) == expected_nodes 275 | 276 | expected_edges = [(0, 0), (1, 1), (2, 2)] 277 | 278 | assert list(G.edges) == expected_edges 279 | 280 | G = constructor.fit_transform(X_time_series_generator[1]) 281 | assert list(G.nodes) == expected_nodes 282 | assert list(G.edges) == expected_edges 283 | 284 | 285 | def test_get_set_params(): 286 | # Time series constructors 287 | constructor = ( 288 | time_series_constructors.UnivariateCorrelationConstructor() 289 | ) 290 | param_dict = {'r': 0.3, 'L': 5} 291 | constructor.set_params(**param_dict) 292 | assert param_dict == constructor.get_params() 293 | 294 | constructor = ( 295 | time_series_constructors.MultivariateCorrelationConstructor() 296 | ) 297 | param_dict = {'r': 0.3} 298 | constructor.set_params(**param_dict) 299 | assert param_dict == constructor.get_params() 300 | 301 | constructor = ( 302 | time_series_constructors.UnivariateRecurrenceNetworkConstructor() 303 | ) 304 | param_dict = {'epsilon': 0.1, 'd': 2, 'tau': 1, 305 | 'metric': 'euclidean', 'n_jobs': None} 306 | constructor.set_params(**param_dict) 307 | assert param_dict == constructor.get_params() 308 | 309 | # Dataset constructors 310 | param_dict = {"k": 3, "epsilon": None, "metric": 'minkowski', 311 | "leaf_size": 40, "sep_comp": True} 312 | constructor = dataset_constructors.KNNConstructor() 313 | constructor.set_params(**param_dict) 314 | assert param_dict == constructor.get_params() 315 | 316 | param_dict['k'] = None 317 | param_dict['epsilon'] = 0.1 318 | constructor = dataset_constructors.EpsilonRadiusConstructor() 319 | constructor.set_params(**param_dict) 320 | assert param_dict == constructor.get_params() 321 | 322 | param_dict['k'] = 2 323 | constructor = dataset_constructors.KNNEpislonRadiusConstructor() 324 | constructor.set_params(**param_dict) 325 | assert param_dict == constructor.get_params() 326 | 327 | constructor = dataset_constructors.SingleLinkageHeuristicConstructor() 328 | param_dict = {'k': 3, 'lambda_': 0.1, 329 | 'n_jobs': 2, 'sep_comp': True, 330 | 'metric': 'euclidean'} 331 | constructor.set_params(**param_dict) 332 | assert param_dict == constructor.get_params() 333 | 334 | 335 | def test_not_fitted_raise(): 336 | with pytest.raises(Exception): 337 | dataset_constructors.KNNConstructor().transform() 338 | 339 | with pytest.raises(Exception): 340 | dataset_constructors.EpsilonRadiusConstructor().transform() 341 | 342 | with pytest.raises(Exception): 343 | dataset_constructors.KNNEpislonRadiusConstructor().transform() 344 | 345 | with pytest.raises(Exception): 346 | dataset_constructors.ClusteringHeuristicConstructor().transform() 347 | 348 | with pytest.raises(Exception): 349 | time_series_constructors.UnivariateCorrelationConstructor().transform() 350 | 351 | with pytest.raises(Exception): 352 | time_series_constructors.MultivariateCorrelationConstructor( 353 | ).transform() 354 | 355 | with pytest.raises(Exception): 356 | time_series_constructors.UnivariateRecurrenceNetworkConstructor( 357 | ).transform() 358 | -------------------------------------------------------------------------------- /sknet/network_construction/time_series_constructors.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import pandas as pd 3 | import networkx as nx 4 | import numpy as np 5 | 6 | from scipy.stats import pearsonr 7 | from abc import ABCMeta, abstractmethod 8 | from sklearn.metrics import pairwise_distances 9 | from gtda.time_series import SingleTakensEmbedding 10 | 11 | 12 | class TimeSeriesBaseConstructor(metaclass=ABCMeta): 13 | """ 14 | This class allows to transform a time series into a networkx 15 | complex network by using the several different transformation 16 | methods 17 | 18 | Do not use this abstract class, use derived classes instead 19 | """ 20 | 21 | def fit(self, X, y=None): 22 | """Fit the constructor creating the NetworkX graph 23 | 24 | Parameters 25 | ---------- 26 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 27 | The input data. 28 | y : ignored, used just for API convention 29 | """ 30 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 31 | X = np.array(X) 32 | 33 | self.X_ = None 34 | 35 | self.add_nodes(X) 36 | 37 | return self 38 | 39 | def transform(self): 40 | """Returns the networkX graph after the constructor is fitted 41 | 42 | Returns 43 | ----- 44 | G_ : NetworkX graph 45 | The network version of the inserted time series data 46 | """ 47 | try: 48 | return self.G_ 49 | except AttributeError: 50 | raise Exception("Transformer is not fitted") 51 | 52 | def get_network(self): 53 | """Retrieves the network generated in the constructor class 54 | """ 55 | return self.G_ 56 | 57 | def fit_transform(self, X, y=None): 58 | """Fit the constructor creating the NetworkX graph and returns the graph 59 | 60 | Parameters 61 | ---------- 62 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 63 | The input data. 64 | y : ignored, used just for API convention 65 | 66 | Returns 67 | ------- 68 | G_ : NetworkX graph 69 | The network version of the inserted time series data 70 | """ 71 | self.fit(X, y) 72 | return self.G_ 73 | 74 | @abstractmethod 75 | def add_nodes(self, X, y=None): 76 | """Adds a node to the graph""" 77 | 78 | def set_params(self, **parameters): 79 | for parameter, value in parameters.items(): 80 | setattr(self, parameter, value) 81 | return self 82 | 83 | 84 | class UnivariateCorrelationConstructor(TimeSeriesBaseConstructor): 85 | """ 86 | Creates a networkX complex network from a univariate time series 87 | by splitting it into segments of length L and generating the correlation 88 | between those segments 89 | 90 | Parameters 91 | ---------- 92 | r : float 93 | The minimun correlation threshold between two segments 94 | to create an edge between them on the network. Value must be 95 | between 0 and 1 96 | L : int 97 | The lenght of each segment to be considered on the correlations 98 | 99 | Attributes 100 | ---------- 101 | G : NetworkX graph 102 | The network version of the inserted time series data 103 | 104 | Examples 105 | -------- 106 | >>> from sknet.network_construction import UnivariateCorrelationConstructor 107 | >>> r = 0.5 108 | >>> L = 10 109 | >>> constructor = UnivariateCorrelationConstructor(r, L) 110 | >>> constructor.fit(X) 111 | >>> G_ = constructor.transform() 112 | 113 | References 114 | ---------- 115 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in 116 | Complex Networks. 10.1007/978-3-319-17290-3. 117 | 118 | Yang, Y., Yang, H.: Complex network-based time series 119 | analysis. Physica A 387, 1381–1386 (2008) 120 | 121 | """ 122 | def __init__(self, r=0.5, L=10): 123 | self.r = r 124 | self.L = L 125 | self.X_ = None 126 | 127 | def get_params(self, deep=True): 128 | return {"r": self.r, 'L': self.L} 129 | 130 | def add_nodes(self, X, y=None): 131 | """Add nodes to an existing network inside a fitted transformer 132 | object 133 | 134 | Parameters 135 | ---------- 136 | X : {array-like, pandas dataframe} of shape (n_samples, 1) 137 | The input data. 138 | y : ignored, used just for API convention 139 | """ 140 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 141 | X = np.array(X) 142 | 143 | if X.shape[1] != 1: 144 | warnings.warn( 145 | """More than one feature identified in the series. 146 | For multivariate time series use the 147 | MultivariateCorrelationConstructor""" 148 | ) 149 | 150 | if self.X_ is not None: 151 | X = np.vstack((self.X_, X)) 152 | 153 | # Create the segments of size L 154 | segments = [] 155 | for i in range(len(X)): 156 | segment = X[i:self.L + i] 157 | if len(segment) < self.L: 158 | continue 159 | segments.append(segment) 160 | C = np.zeros((len(segments), len(segments))) 161 | 162 | # Make the correlation matrix 163 | # Turn into list comprehension later 164 | for i in range(len(segments)): 165 | for j in range(len(segments)): 166 | C[i][j] = pearsonr(np.array(segments[i]).flatten(), 167 | np.array(segments[j]).flatten())[0] 168 | 169 | # Make the D matrix 170 | C[C < self.r] = 0 171 | C[C >= self.r] = 1 172 | 173 | self.G_ = nx.from_numpy_array(C) 174 | 175 | self.X_ = X 176 | 177 | 178 | class MultivariateCorrelationConstructor(TimeSeriesBaseConstructor): 179 | """ 180 | Creates a networkX complex network from a multivariate time series 181 | by creating edges between highly correlated series 182 | 183 | Parameters 184 | ---------- 185 | r : float 186 | The minimun correlation threshold between two series 187 | to create an edge between them on the network. Value must be 188 | between 0 and 1 189 | 190 | Attributes 191 | ---------- 192 | G_ : NetworkX graph 193 | The network version of the inserted time series data 194 | 195 | Examples 196 | -------- 197 | >>> from sknet.network_construction import MultivariateCorrelationConstructor # noqa: E501 198 | >>> r = 0.5 199 | >>> L = 10 200 | >>> constructor = MultivariateCorrelationConstructor(r, L) 201 | >>> constructor.fit(X) 202 | >>> G_ = constructor.transform() 203 | 204 | References 205 | ---------- 206 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in 207 | Complex Networks. 10.1007/978-3-319-17290-3. 208 | 209 | Yang, Y., Yang, H.: Complex network-based time series 210 | analysis. Physica A 387, 1381–1386 (2008) 211 | 212 | """ 213 | def __init__(self, r=0.5): 214 | self.r = r 215 | 216 | def get_params(self, deep=True): 217 | return {"r": self.r} 218 | 219 | def add_nodes(self, X, y=None): 220 | """Add nodes to an existing network inside a fitted transformer 221 | object 222 | 223 | Parameters 224 | ---------- 225 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 226 | The input data. 227 | y : ignored, used just for API convention 228 | """ 229 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 230 | X = np.array(X) 231 | 232 | if X.shape[1] == 1: 233 | warnings.warn( 234 | """Only one feature identified in the series. 235 | For univariate time series use the 236 | UnivariateCorrelationConstructor""" 237 | ) 238 | 239 | if self.X_ is not None: 240 | X = np.vstack((self.X_, X)) 241 | 242 | C = np.zeros((X.shape[1], X.shape[1])) 243 | 244 | # Make the correlation matrix 245 | # Turn into list comprehension later 246 | for i in range(X.shape[1]): 247 | for j in range(X.shape[1]): 248 | C[i][j] = pearsonr(X[:, i], X[:, j])[0] 249 | 250 | # Make the D matrix 251 | C[C < self.r] = 0 252 | C[C >= self.r] = 1 253 | 254 | self.G_ = nx.from_numpy_array(C) 255 | 256 | self.X_ = X 257 | 258 | 259 | class UnivariateRecurrenceNetworkConstructor(TimeSeriesBaseConstructor): 260 | """ 261 | Creates a networkX complex network from a univariate time series 262 | by creating edges between recurrent (close) states on the phase space 263 | of the series. 264 | 265 | The phase space is constructed using the Takens Embedding Theorem. 266 | 267 | Parameters 268 | ---------- 269 | epsilon : float, optional (default=0.1) 270 | The required distance between two states for them to be considered a 271 | recurrence and hence be connected 272 | d : int, optional (default=None) 273 | The dimension of the embedding to be used for the Takens embedding. 274 | If None and tau is also None, the best parameter will be 275 | automatically chosen. 276 | tau : int, optional (default=None) 277 | The time delay to be used on the Takens Embedding. If None and 278 | tau is also None, the best parameter will be automatically chosen. 279 | metric : str, optional (default='euclidean') 280 | The distance metric to be used to calculate the distance between 281 | two points on the phase space 282 | n_jobs : int, optional (default=None) 283 | The number of parallel processes to be used when applying the 284 | Takens embedding and when calculating the distances between the 285 | states. None means 1 core will be used, -1 means use all cores. 286 | Attributes 287 | ---------- 288 | G_ : NetworkX graph 289 | The network version of the inserted time series data 290 | 291 | Examples 292 | -------- 293 | >>> from sknet.network_construction import UnivariateRecurrenceNetworkConstructor # noqa: E501 294 | >>> constructor = UnivariateRecurrenceNetworkConstructor(10) 295 | >>> constructor.fit(X) 296 | >>> G_ = constructor.transform() 297 | 298 | References 299 | ---------- 300 | Donner, R.V., Zou, Y., Donges, J.F., Marwan, N., Kurths, J.: Recurrence 301 | networks – a novel paradigm for nonlinear time series analysis. 302 | New J. Phys. 12, 033025 (2010) 303 | 304 | """ 305 | def __init__(self, epsilon=0.1, d=None, tau=None, metric='euclidean', 306 | n_jobs=None): 307 | self.epsilon = epsilon 308 | self.d = d 309 | self.tau = tau 310 | self.metric = metric 311 | self.n_jobs = n_jobs 312 | 313 | def get_params(self, deep=True): 314 | return {'epsilon': self.epsilon, 'd': self.d, 'tau': self.tau, 315 | 'metric': self.metric, 'n_jobs': self.n_jobs} 316 | 317 | def add_nodes(self, X, y=None): 318 | """Add nodes to an existing network inside a fitted transformer 319 | object 320 | 321 | Parameters 322 | ---------- 323 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 324 | The input data. 325 | y : ignored, used just for API convention 326 | """ 327 | if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 328 | X = np.array(X) 329 | 330 | if X.shape[1] > 1: 331 | warnings.warn( 332 | """More than one feature identified in the series. 333 | Recurrence should not be used on multivariate series""" 334 | ) 335 | 336 | if self.X_ is not None: 337 | X = np.vstack((self.X_, X)) 338 | 339 | # Generate the state space of the time series X 340 | if self.d is None and self.tau is None: 341 | takens_dict = {'parameters_type': 'search'} 342 | else: 343 | takens_dict = {'parameters_type': 'fixed', 'dimension': self.d, 344 | 'time_delay': self.tau} 345 | 346 | takens_dict['n_jobs'] = self.n_jobs 347 | 348 | takens = SingleTakensEmbedding(**takens_dict) 349 | 350 | space = takens.fit_transform(X) 351 | 352 | # Get distance metric 353 | R = pairwise_distances(space, metric=self.metric, n_jobs=self.n_jobs) 354 | 355 | # Generate the recurrence matrix 356 | R[R <= self.epsilon] = 1 357 | R[R > self.epsilon] = 0 358 | 359 | # Remove self-loops 360 | for i in range(R.shape[0]): 361 | R[i, i] = 0 362 | 363 | self.G_ = nx.from_numpy_array(R) 364 | 365 | self.X_ = X 366 | -------------------------------------------------------------------------------- /sknet/semi_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .modularity_label_propagation import ModularityLabelPropagation 3 | -------------------------------------------------------------------------------- /sknet/semi_supervised/modularity_label_propagation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | from sknet.network_construction import KNNConstructor 5 | 6 | 7 | class ModularityLabelPropagation(): 8 | """ 9 | Semi-supervised method that propagates labels to instances not 10 | classified using the Modularity Propagation method. 11 | 12 | Parameters 13 | ---------- 14 | reduction_factor : None or list of floats, optional (default=None) 15 | If not None, the aggregation algorithm proposed by Silva & Zhao will be 16 | applied to reduce the network and speed up the processing. The values 17 | on the list will be the reduction factor for each class 18 | 19 | Attributes 20 | ---------- 21 | generated_y_ : {ndarray, pandas series}, shape (n_samples, 1) 22 | The label list 23 | generated_G_ : NetworkX Network 24 | The constructed network on the fit of the model 25 | 26 | Examples 27 | -------- 28 | >>> from sklearn.datasets import load_iris 29 | >>> from sknet.network_construction import KNNConstructor 30 | >>> from sknet.semi_supervised import ModularityLabelPropagation 31 | >>> X, y = load_iris(return_X_y = True) 32 | >>> knn_c = KNNConstructor(k=5, sep_comp=False) 33 | >>> y[10:20] = np.nan 34 | >>> y[70:80] = np.nan 35 | >>> y[110:120] = np.nan 36 | >>> propagator = ModularityLabelPropagation() 37 | >>> propagator.fit(X, y, constructor=knn_c) 38 | >>> propagator.generated_y_ 39 | array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 40 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 41 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 42 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 43 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 44 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 45 | 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 46 | 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 47 | 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]) 48 | 49 | References 50 | ---------- 51 | Silva, Thiago & Zhao, Liang. (2012). Semi-Supervised Learning Guided 52 | by the Modularity Measure in Complex Networks. Neurocomputing. 78. 53 | 30-37. 10.1016/j.neucom.2011.04.042. 54 | 55 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 56 | Networks. 10.1007/978-3-319-17290-3. 57 | 58 | """ 59 | def __init__(self, reduction_factor=None, random_state=None): 60 | self.estimator_type = 'classifier' 61 | self.reduction_factor = reduction_factor 62 | self.random_state = random_state 63 | np.random.seed(random_state) # Arrumar 64 | 65 | def set_params(self, **parameters): 66 | for parameter, value in parameters.items(): 67 | setattr(self, parameter, value) 68 | return self 69 | 70 | def get_params(self, deep=True): 71 | return {'reduction_factor': self.reduction_factor, 72 | 'random_state': self.random_state} 73 | 74 | def fit(self, X=None, y=None, G=None, 75 | constructor=KNNConstructor(5, sep_comp=False)): 76 | """Fit the propagator by using the modularity measure 77 | to propagate the labels to non-labeled examples 78 | 79 | Parameters 80 | ---------- 81 | X : {array-like, pandas dataframe} of shape 82 | (n_samples, n_features), optional (default=None) 83 | The input data samples. Can be None if G is set. 84 | y : {ndarray, pandas series}, shape (n_samples,) or 85 | (n_samples, n_classes), optional (default=None) 86 | The target classes. Can be None if G is set. Missing labels 87 | should have the np.nan value 88 | G : NetworkX Network, optional (default=None) 89 | The network with missing labels to be propagated. Can be 90 | None if X and y are not None in which case the constructor 91 | will be used to generate the network. Labels must be into 92 | the data of each node with the 'class' key. Missing labels 93 | should be valued np.nan 94 | constructor : BaseConstructor inhrerited class, optional(default= 95 | KNNConstructor(5, sep_comp=False)) 96 | A constructor class to transform the tabular data into a 97 | network. It can be set to None if a complex network is directly 98 | passed to the ``fit`` method. Notice that you should use 'sep_com' 99 | as False on the constructor. 100 | 101 | """ 102 | self.constructor = constructor 103 | if y is None and G is None: 104 | raise Exception('Both y and G are None!') 105 | 106 | if self.constructor is None and G is None: 107 | raise Exception( 108 | 'You either have to set the constructor or the network' 109 | ) 110 | 111 | if y is not None and self.constructor is not None: 112 | G = self.constructor.fit_transform(X, y) 113 | elif y is None and G is not None: 114 | y = np.array([node[1]['class'] for node in G.nodes(data=True)]) 115 | 116 | if self.reduction_factor is not None: 117 | if not isinstance(self.reduction_factor, list): 118 | raise Exception('Reduction_factor must be a list or None') 119 | 120 | if np.max(self.reduction_factor) > 1 or np.min(self.reduction_factor) < 0: # noqa: E501 121 | raise Exception('Reduction_factor must be between 0 and 1') 122 | 123 | if len(np.unique(y[~np.isnan(y)])) != len(self.reduction_factor): 124 | raise Exception('The number of reduction factors must be equal' 125 | ' to the number of classes') 126 | 127 | missing_elements = len(y[np.isnan(y)]) 128 | 129 | if self.reduction_factor is not None: 130 | original_G = G.copy() 131 | original_y = y.copy() 132 | 133 | G = self._reduce_graph(G, y) 134 | 135 | positions_dict = {i: node for i, node in enumerate(list(G.nodes()))} # noqa: E501 136 | G = nx.convert_node_labels_to_integers(G) 137 | y = np.array([node[1]['class'] for node in G.nodes(data=True)]) 138 | 139 | # Generate modularity matrix 140 | Q = self._increment_modularity_matrix(G) 141 | 142 | while missing_elements != 0: 143 | propagated = False 144 | 145 | while not propagated: 146 | # Select the i and j of argmax 147 | i, j = np.unravel_index(Q.argmax(), Q.shape) 148 | 149 | Q[i][j] = -np.inf 150 | Q[j][i] = -np.inf 151 | 152 | if y[i] != y[j]: 153 | if (~np.isnan(y[i])) and (~np.isnan(y[j])): 154 | continue 155 | if np.isnan(y[i]): 156 | y[i] = y[j] 157 | G.nodes[i]['class'] = y[i] 158 | propagated = True 159 | if np.isnan(y[j]): 160 | y[j] = y[i] 161 | G.nodes[j]['class'] = y[j] 162 | propagated = True 163 | else: 164 | continue 165 | 166 | missing_elements = len(y[np.isnan(y)]) 167 | 168 | if self.reduction_factor is not None: 169 | for key in positions_dict: 170 | original_y[positions_dict[key]] = y[key] 171 | original_G.nodes[positions_dict[key]]['class'] = y[key] # noqa: E501 172 | 173 | y = original_y 174 | G = original_G 175 | 176 | self.generated_y_ = y 177 | self.generated_G_ = G 178 | 179 | return self 180 | 181 | def get_propagated_labels(self): 182 | """ 183 | Return the labels list with the propagated classes 184 | 185 | Returns 186 | ------- 187 | generated_y_ : {ndarray, pandas series}, shape (n_samples, 1) 188 | The label list 189 | """ 190 | 191 | return self.generated_y_ 192 | 193 | def get_propagated_network(self): 194 | """ 195 | Returns the generated network with the propagated labels 196 | 197 | Returns 198 | -------- 199 | generated_G_ : NetworkX Network 200 | The constructed network on the fit of the model""" 201 | 202 | return self.generated_G_ 203 | 204 | def _increment_modularity_matrix(self, G): 205 | N = len(G.nodes) 206 | E = len(G.edges) 207 | k = [val for (node, val) in G.degree()] 208 | 209 | Q = [[0 for i in range(N)] for j in range(N)] 210 | 211 | for i in range(N): 212 | for j in range(N): 213 | if i not in G.neighbors(j): 214 | Q[i][j] = 0 215 | else: 216 | Q[i][j] = (1/(2*E)) - (k[i]*k[j])/((2*E)**2) 217 | return np.array(Q) 218 | 219 | def _reduce_graph(self, G, y): 220 | """ 221 | Reduce the graph using the algorithm from Silva & Zhao (2012) 222 | 223 | Parameters 224 | ---------- 225 | G : NetworkX Network 226 | The network to be reduced 227 | y : {ndarray, pandas series}, shape (n_samples,) 228 | The label list 229 | 230 | Returns 231 | ------- 232 | G : NetworkX Network 233 | The reduced network 234 | """ 235 | G = G.copy() 236 | classes = np.unique(y[~np.isnan(y)]) 237 | classes.sort() 238 | for idx, class_ in enumerate(classes): 239 | factor = self.reduction_factor[idx] 240 | 241 | if factor == 0: 242 | continue 243 | 244 | N = len([i for i in G.nodes(data=True) if i[1]['class'] == class_]) # noqa: E501 245 | N_tilda = N 246 | 247 | if factor != 1: 248 | desired_value = round((1-factor) * N) 249 | else: 250 | desired_value = 1 251 | 252 | while(N_tilda != desired_value): 253 | # Randomly select two nodes from the class 254 | nodes = np.random.choice( 255 | [i[0] for i in G.nodes(data=True) if i[1]['class'] == class_], # noqa: E501 256 | size=2, 257 | replace=False) 258 | 259 | # Get the edges from first node 260 | edges = [i for i in G.edges(nodes[0])] 261 | 262 | # Remove the first node from the network 263 | G.remove_node(nodes[0]) 264 | 265 | # Remove self-loops 266 | G.remove_edges_from(nx.selfloop_edges(G)) 267 | 268 | # Redistribute the edges from the first node to the second node 269 | for edge in edges: 270 | # Avoid self-loops 271 | if edge[0] == edge[1]: 272 | continue 273 | if edge[0] == nodes[0]: 274 | G.add_edge(nodes[1], edge[1]) 275 | else: 276 | G.add_edge(edge[0], nodes[1]) 277 | 278 | N_tilda = len([i for i in G.nodes(data=True) if i[1]['class'] == class_]) # noqa: E501 279 | 280 | # Remove any possible remaining self-loop 281 | G.remove_edges_from(nx.selfloop_edges(G)) 282 | return G 283 | -------------------------------------------------------------------------------- /sknet/semi_supervised/tests/test_modularity_label_propagation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.datasets import load_iris 5 | 6 | from sknet.network_construction import KNNConstructor 7 | from sknet.semi_supervised import ModularityLabelPropagation 8 | 9 | 10 | @pytest.fixture 11 | def X_y_generator(): 12 | 13 | X, y = load_iris(return_X_y=True) 14 | y = np.array(y, dtype='float32') 15 | y[10:40] = np.nan 16 | y[60:70] = np.nan 17 | y[110:140] = np.nan 18 | 19 | return X, y 20 | 21 | 22 | @pytest.fixture 23 | def result_generator(): 24 | result = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 25 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 26 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 27 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 28 | 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 29 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 30 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 31 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 32 | 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 33 | 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1., 34 | 2., 2., 2., 1., 2., 2., 1., 1., 2., 2., 2., 2., 35 | 2., 1., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 36 | 2., 2., 2., 2., 2., 2.] 37 | 38 | return result 39 | 40 | 41 | def test_fit_y(X_y_generator, result_generator): 42 | knn_c = KNNConstructor(k=5, sep_comp=False) 43 | ML = ModularityLabelPropagation() 44 | ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c) 45 | 46 | np.testing.assert_equal(result_generator, 47 | np.array(ML.get_propagated_labels(), 48 | dtype='float32') 49 | ) 50 | 51 | 52 | def test_fit_G(X_y_generator, result_generator): 53 | knn_c = KNNConstructor(k=5, sep_comp=False) 54 | G = knn_c.fit_transform(X_y_generator[0], X_y_generator[1]) 55 | ML = ModularityLabelPropagation() 56 | ML.fit(G=G) 57 | 58 | np.testing.assert_equal(result_generator, 59 | np.array(ML.get_propagated_labels(), 60 | dtype='float32') 61 | ) 62 | 63 | 64 | def test_set_get_params(): 65 | ML = ModularityLabelPropagation() 66 | ML.set_params(reduction_factor=None) 67 | assert ML.get_params() == {'reduction_factor': None, 68 | 'random_state': None} 69 | 70 | 71 | def test_raise_on_fit_1(X_y_generator): 72 | ML = ModularityLabelPropagation() 73 | with pytest.raises(Exception): 74 | ML.fit(X=X_y_generator[0], y=None, G=None) 75 | 76 | 77 | def test_raise_on_fit_2(X_y_generator): 78 | ML = ModularityLabelPropagation() 79 | with pytest.raises(Exception): 80 | ML.fit(X=X_y_generator[0], y=X_y_generator[1], G=None, 81 | constructor=None) 82 | 83 | 84 | def test_raises_on_aggregation(X_y_generator): 85 | knn_c = KNNConstructor(k=5, sep_comp=False) 86 | 87 | ML = ModularityLabelPropagation(reduction_factor=0.3) 88 | with pytest.raises(Exception): 89 | ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c) 90 | 91 | ML = ModularityLabelPropagation(reduction_factor=[0.5, 0.2]) 92 | with pytest.raises(Exception): 93 | ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c) 94 | 95 | ML = ModularityLabelPropagation(reduction_factor=[2, 13, 9]) 96 | with pytest.raises(Exception): 97 | ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c) 98 | 99 | 100 | def test_aggregation(X_y_generator): 101 | knn_c = KNNConstructor(k=5, sep_comp=False) 102 | ML = ModularityLabelPropagation(reduction_factor=[0.5, 0.5, 0], 103 | random_state=42) 104 | ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c) 105 | 106 | print(ML.get_propagated_labels()) 107 | print(len(ML.get_propagated_labels())) 108 | 109 | expected_result = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 110 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 111 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 112 | 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 113 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 114 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 115 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 116 | 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 117 | 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 118 | 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 119 | 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.] 120 | np.testing.assert_equal(expected_result, 121 | np.array(ML.get_propagated_labels(), 122 | dtype='float32') 123 | ) 124 | -------------------------------------------------------------------------------- /sknet/supervised/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .ease_of_access import EaseOfAccessClassifier 3 | from .ease_of_access import EaseOfAccessRegressor 4 | from .high_level_classification import HighLevelClassifier 5 | -------------------------------------------------------------------------------- /sknet/supervised/ease_of_access.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import networkx as nx 4 | 5 | from scipy.stats import mode 6 | from abc import ABCMeta, abstractmethod 7 | from sklearn.neighbors import DistanceMetric 8 | 9 | from sknet.network_construction import KNNConstructor 10 | 11 | 12 | class EaseOfAccess(metaclass=ABCMeta): 13 | """ 14 | Ease of Access method to learn network patterns on data 15 | 16 | Parameters 17 | ---------- 18 | epsilon : float, default=0.2 19 | The perturbance to be applied to the weights matrix after the 20 | insertion of the test data 21 | t : int, default=3 22 | Number of points on the convergence probabilities vector 23 | to classify the test data 24 | method : str, default='eigenvalue' 25 | Which method to use to compute the markov chain limiting 26 | probabilties. Options are 'eigenvalue' and 'power'. 27 | 28 | Attributes 29 | ---------- 30 | constructor_ : BaseConstructor inhrerited class 31 | The transformer used to transform the tabular data into network 32 | epsilon : float 33 | The disturbance applied to the weights matrix 34 | t : int 35 | Number of points used on the convergence probabilities 36 | method : str 37 | Method used to compute the limiting probabilities of the Markov chain 38 | G_ : NetworkX network 39 | The network generated from the tabular data 40 | W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples) 41 | The adjacency matrix of the network G 42 | X_ : {array-like, pandas dataframe} of shape (n_samples, n_features) 43 | The used tabular data features 44 | y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes) 45 | The classes of each node 46 | 47 | Notes 48 | ----- 49 | Do not use this abstract class, use derived classes instead 50 | 51 | References 52 | ---------- 53 | Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data 54 | classification by using an heuristic of ease of access. Neurocomputing 55 | 149(Part A), 86–92 (2015) 56 | 57 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 58 | Networks. 10.1007/978-3-319-17290-3. 59 | 60 | """ 61 | 62 | def __init__(self, epsilon=0.2, t=3, method='eigenvalue'): 63 | self.epsilon = epsilon 64 | self.t = t 65 | self.method = method 66 | 67 | def set_params(self, **parameters): 68 | for parameter, value in parameters.items(): 69 | setattr(self, parameter, value) 70 | return self 71 | 72 | def get_params(self, deep=True): 73 | return {'epsilon': self.epsilon, 't': self.t, 'method': self.method} 74 | 75 | def fit(self, X, y, G=None, constructor=KNNConstructor(5, sep_comp=True)): 76 | """ 77 | Fit the model, internalizing the graph that should be used 78 | 79 | Parameters 80 | ---------- 81 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 82 | The input data. 83 | y : {ndarray, pandas series}, shape (n_samples,) or (n_samples, 84 | n_classes) 85 | The true classes. 86 | G : NetworkX Graph, default=None 87 | If the graph was already generated, then this parameter will 88 | make as so the transformer is not called 89 | constructor : BaseConstructor inhrerited class, optional(default= 90 | KNNConstructor(5, sep_comp=True)) 91 | A constructor class to transform the tabular data into a 92 | network 93 | 94 | Notes 95 | ----- 96 | Even though the G can be passed directly, X is required so the 97 | distance between test classes and the other nodes on the graph 98 | can be calculated 99 | 100 | According to the paper implementation, the network should not 101 | have separated components for each class, if passing an already 102 | created network to the method, be mindful of that 103 | 104 | """ 105 | self.constructor_ = constructor 106 | if G is None: 107 | # Generates the graph from X and y 108 | self.constructor_.set_sep_comp(False) 109 | self.G_ = self.constructor_.fit_transform(X, y) 110 | else: 111 | self.G_ = G 112 | 113 | # Transforms X into undirected 114 | if nx.is_directed(self.G_): 115 | self.G_ = self.G_.to_undirected() 116 | 117 | # Generates W matrix 118 | self.W_ = nx.to_numpy_array(self.G_) 119 | 120 | self.X_ = X 121 | self.y_ = y 122 | 123 | return self 124 | 125 | def predict(self, X): 126 | """ 127 | Predicts the labels of the test samples from X 128 | 129 | Parameters 130 | ---------- 131 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 132 | The test data to be predicted. 133 | 134 | Returns 135 | ------- 136 | predictions : array-like of shape (n_samples) 137 | The predicted label for each sample 138 | """ 139 | predictions = [] 140 | dist = self._get_distance_metric() 141 | 142 | for x in X: 143 | 144 | # For each instance, calculates the s vector 145 | s = [dist.pairwise((x, x_t))[1][0] for x_t in self.X_] 146 | L = len(s) 147 | 148 | # Generates the S_tilda vector 149 | S_tilda = np.zeros((L, L)) 150 | 151 | for i in range(L): 152 | S_tilda[i] = [s[i]] * L 153 | 154 | # Alter the weight matrix 155 | w_tilda = self.W_ + self.epsilon * S_tilda 156 | 157 | # Generates probability matrix 158 | norm_factor = np.sum(w_tilda, axis=1) 159 | P = [x / norm_factor[i] for i, x in enumerate(w_tilda)] 160 | P = np.array(P).reshape(w_tilda.shape) 161 | 162 | # Computes the convergence 163 | self.P_inf = self._stationary_distribution(P, self.method) 164 | 165 | # Associates each class with the probabilities 166 | res = pd.DataFrame() 167 | res['prob'] = self.P_inf 168 | res['y'] = self.y_ 169 | res.sort_values('prob', inplace=True, ascending=False) 170 | 171 | # Gets the t classes from P_inf and set to the majority 172 | self.tau_ = res.iloc[:self.t] 173 | 174 | predictions.append(self._aggregation_method(self.tau_)) 175 | 176 | return predictions 177 | 178 | @abstractmethod 179 | def _aggregation_method(self, tau): 180 | """Defines which aggregation method to use""" 181 | 182 | def _get_distance_metric(self): 183 | metric = self.constructor_.metric 184 | 185 | if type(metric) is str: 186 | return DistanceMetric.get_metric(metric) 187 | 188 | return metric 189 | 190 | def _stationary_distribution(self, W, method): 191 | 192 | if method == 'power': 193 | return np.linalg.matrix_power(np.array(W), 50)[0] 194 | 195 | elif method == 'eigenvalue': 196 | evals, evecs = np.linalg.eig(np.array(W).T) 197 | evec1 = evecs[:, np.isclose(evals, 1)] 198 | 199 | evec1 = evec1[:, 0] 200 | 201 | stationary = evec1 / evec1.sum() 202 | 203 | return stationary.real 204 | 205 | else: 206 | raise Exception("{} is not an available method to calculate the markov chain \ 207 | convergence. Available methods are 'power' and \ 208 | 'eigenvalue'".format(method)) 209 | 210 | 211 | class EaseOfAccessClassifier(EaseOfAccess): 212 | """ 213 | Ease of Access Classifier 214 | 215 | Classifier that uses the heuristic of ease of access to classify 216 | new instances inside a network 217 | 218 | Parameters 219 | ---------- 220 | epsilon : float, default=0.2 221 | The perturbance to be applied to the weights matrix after the 222 | insertion of the test data 223 | t : int, deafult=3 224 | Number of points on the convergence probabilities vector 225 | to classify the test data 226 | method : str, default='eigenvalue' 227 | Which method to use to compute the markov chain limiting 228 | probabilties. Options are 'eigenvalue' and 'power'. 229 | 230 | Attributes 231 | ---------- 232 | constructor_ : BaseConstructor inhrerited class 233 | The constructor used to transform the tabular data into network 234 | epsilon : float 235 | The disturbance applied to the weights matrix 236 | t : int 237 | Number of points used on the convergence probabilities 238 | method : str 239 | Method used to compute the limiting probabilities of the Markov chain 240 | G_ : NetworkX network 241 | The network generated from the tabular data 242 | W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples) 243 | The adjacency matrix of the network G 244 | X_ : {array-like, pandas dataframe} of shape (n_samples, n_features) 245 | The used tabular data features 246 | y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes) 247 | The classes of each node 248 | 249 | Examples 250 | -------- 251 | >>> from sklearn.datasets import load_iris 252 | >>> from sknet.network_construction import KNNConstructor 253 | >>> from sknet.supervised import EaseOfAccessClassifier 254 | >>> X, y = load_iris(return_X_y = True) 255 | >>> X_train, X_test, y_train, y_test = train_test_split(X, y, 256 | test_size=0.33) 257 | >>> knn_c = KNNConstructor(k=5) 258 | >>> classifier = EaseOfAccessClassifier(t=5) 259 | >>> classifier.fit(X_train, y_train, constructor=knn_c) 260 | >>> ease = classifier.predict(X_test) 261 | >>> accuracy_score(y_test, ease) 262 | 0.92 263 | 264 | Notes 265 | ----- 266 | 267 | References 268 | ---------- 269 | Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data 270 | classification by using an heuristic of ease of access. Neurocomputing 271 | 149(Part A), 86–92 (2015) 272 | 273 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 274 | Networks. 10.1007/978-3-319-17290-3. 275 | 276 | """ 277 | 278 | _estimator_type = 'classifier' 279 | 280 | def __init__(self, epsilon=0.2, t=3, method='eigenvalue'): 281 | super().__init__(epsilon, t, method) 282 | 283 | def _aggregation_method(self, tau): 284 | return mode(tau['y'])[0][0] 285 | 286 | 287 | class EaseOfAccessRegressor(EaseOfAccess): 288 | """ 289 | Ease of Access Regressor 290 | 291 | Regressor that uses the heuristic of ease of access to classify 292 | the real-value of the target of new instances inside a network 293 | 294 | Parameters 295 | ---------- 296 | epsilon : float, default=0.2 297 | The perturbance to be applied to the weights matrix after the 298 | insertion of the test data 299 | t : int, deafult=3 300 | Number of points on the convergence probabilities vector 301 | to classify the test data 302 | method : str, default='eigenvalue' 303 | Which method to use to compute the markov chain limiting 304 | probabilties. Options are 'eigenvalue' and 'power'. 305 | 306 | Attributes 307 | ---------- 308 | constructor_ : BaseConstructor inhrerited class 309 | The constructor used to transform the tabular data into network 310 | epsilon : float 311 | The disturbance applied to the weights matrix 312 | t : int 313 | Number of points used on the convergence probabilities 314 | method : str 315 | Method used to compute the limiting probabilities of the Markov chain 316 | G_ : NetworkX network 317 | The network generated from the tabular data 318 | W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples) 319 | The adjacency matrix of the network G 320 | X_ : {array-like, pandas dataframe} of shape (n_samples, n_features) 321 | The used tabular data features 322 | y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes) 323 | The classes of each node 324 | 325 | Examples 326 | -------- 327 | >>> from sklearn.datasets import load_boston 328 | >>> from dataset_constructors import KNNConstructor 329 | >>> from ease_of_access import EaseOfAccessRegressor 330 | >>> X, y = load_boston(return_X_y = True) 331 | >>> X_train, X_test, y_train, y_test = train_test_split(X, y, 332 | test_size=0.33) 333 | >>> knn_c = KNNConstructor(k=5) 334 | >>> reg = EaseOfAccessRegressor(t=5) 335 | >>> reg.fit(X_train, y_train, constructor=knn_c) 336 | >>> ease = reg.predict(X_test) 337 | 338 | Notes 339 | ----- 340 | 341 | References 342 | ---------- 343 | Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data 344 | classification by using an heuristic of ease of access. Neurocomputing 345 | 149(Part A), 86–92 (2015) 346 | 347 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 348 | Networks. 10.1007/978-3-319-17290-3. 349 | 350 | """ 351 | 352 | _estimator_type = 'regressor' 353 | 354 | def __init__(self, epsilon=0.2, t=3, method='eigenvalue'): 355 | super().__init__(epsilon, t, method) 356 | 357 | def _aggregation_method(self, tau): 358 | return tau['y'].mean() 359 | -------------------------------------------------------------------------------- /sknet/supervised/high_level_classification.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | from tqdm import tqdm 4 | 5 | from sknet.utils import NetworkMetricsHandler 6 | from sknet.utils import LowLevelModelsHandler 7 | 8 | from sknet.network_construction import KNNConstructor 9 | 10 | 11 | class HighLevelClassifier(): 12 | """ 13 | Classifies a dataset using a high-level approach where the predictions 14 | from a low-level model (standard ML) and a high-level model (Complex 15 | Network) are combined to generate a final inference about the class of 16 | each data point. 17 | 18 | Parameters 19 | ---------- 20 | low_level : str, optional(default='random_forest') 21 | The low-level model to be used. See available options on the 22 | low_level_models_handler documentation 23 | p : float, optional(default=0.5) 24 | The weight to be used on the ponderation between the 25 | low-level and the high-level model predictions. The formula 26 | is: 27 | ``(1 - p) * low_level + p * high_level`` 28 | This number should be less or equal than one 29 | alphas : list of floats, optional(default=[0.5, .5]) 30 | The weight to be used on each high-level metric for the 31 | classification. This list should sum up to one. 32 | metrics: list of str, optional(default=['clustering_coefficient', 33 | 'assortativity']) 34 | Which complex networks metrics to use to generate the high-level 35 | prediction. See available options on the network_metrics_handler 36 | low_level_parameters : dict, optional(default={}) 37 | Parameters to be set on the low-level classifier 38 | 39 | Attributes 40 | ---------- 41 | constructor_ : BaseConstructor inhrerited class 42 | The transformer used to transform the tabular data into network 43 | low_level_pred_ : {ndarray, pandas series}, shape (n_samples, n_classes) 44 | The probability of each class from the low-level prediction 45 | high_level_pred_ : {ndarray, pandas series}, shape (n_samples, n_classes) 46 | The probability of each class from the high-level prediction 47 | original_constructor_ : NetworkX Network 48 | The constructed network on the fit of the model 49 | 50 | Examples 51 | -------- 52 | >>> from sklearn.datasets import load_iris 53 | >>> from sknet.network_construction import KNNConstructor 54 | >>> from sknet.supervised import HighLevelClassifier 55 | >>> X, y = load_iris(return_X_y = True) 56 | >>> X_train, X_test, y_train, y_test = train_test_split(X, y, 57 | test_size=0.33) 58 | >>> knn_c = KNNConstructor(k=5) 59 | >>> classifier = HighLevelClassifier(t=5) 60 | >>> classifier.fit(X_train, y_train, constructor=knn_c) 61 | >>> pred = classifier.predict(X_test) 62 | 63 | References 64 | ---------- 65 | Silva, T.C., Zhao, L.: Network-based high level data classification. 66 | IEEE Trans. Neural Netw. Learn. Syst. 23(6), 954–970 (2012) 67 | 68 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 69 | Networks. 10.1007/978-3-319-17290-3. 70 | 71 | """ 72 | _estimator_type = 'classifier' 73 | 74 | def __init__(self, low_level='random_forest', 75 | p=0.5, alphas=[0.5, 0.5], 76 | metrics=['clustering_coefficient', 'assortativity'], 77 | low_level_parameters={}): 78 | self.p = p 79 | self.alphas = alphas 80 | self.metrics = metrics 81 | self.low_level = low_level 82 | self.low_level_parameters = low_level_parameters 83 | self.metrics = metrics 84 | 85 | def set_params(self, **parameters): 86 | for parameter, value in parameters.items(): 87 | setattr(self, parameter, value) 88 | return self 89 | 90 | def get_params(self, deep=True): 91 | return {'p': self.p, 'alphas': self.alphas, 'metrics': self.metrics, 92 | 'low_level': self.low_level, 93 | 'low_level_parameters': self.low_level_parameters, 94 | 'self.metrics': self.metrics} 95 | 96 | def fit(self, X, y, G=None, constructor=KNNConstructor(5)): 97 | """Fit the classifier by fitting the low-level model and 98 | creating the high-level classification network 99 | 100 | Parameters 101 | ---------- 102 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 103 | The input data samples 104 | y : {ndarray, pandas series}, shape (n_samples,) or 105 | (n_samples, n_classes), default=None 106 | The true classes 107 | G : NetworkX Graph, default=None 108 | If the graph was already generated, then this parameter will 109 | make as so the transformer is not called. Notice that each class 110 | should be formed of only one class 111 | constructor : BaseConstructor inhrerited class, optional(default= 112 | KNNConstructor(5)) 113 | A constructor class to transform the tabular data into a 114 | network 115 | 116 | """ 117 | self.constructor_ = constructor 118 | 119 | # Basic configuration 120 | self.metrics_handler = NetworkMetricsHandler() 121 | self.low_level_handler = LowLevelModelsHandler() 122 | 123 | self.low_level_model = self.low_level_handler.get_model( 124 | self.low_level, self.low_level_parameters 125 | ) 126 | self.metric_func = [] 127 | self.default_values = [] 128 | for metric in self.metrics: 129 | self.metric_func.append(self.metrics_handler.get_metric(metric)) 130 | self.default_values.append(self.metrics_handler.get_default_value( 131 | metric) 132 | ) 133 | 134 | assert self.p <= 1 135 | 136 | if np.sum(self.alphas) != 1: 137 | raise ValueError('Alphas should sum to one') 138 | 139 | # Fits the constructor to generate the network 140 | if G is not None: 141 | self.G_ = G 142 | else: 143 | self.constructor_.set_sep_comp(True) 144 | self.G_ = self.constructor_.fit_transform(X, y) 145 | 146 | # Fits the low level model 147 | self.low_level_model.fit(X, y) 148 | 149 | self.X = X 150 | self.y = y 151 | 152 | return self 153 | 154 | def predict_proba(self, X_test): 155 | """Predicts the probability, for each test sample 156 | that it belongs to any of the training classes 157 | 158 | Parameters 159 | ---------- 160 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 161 | The input data samples 162 | 163 | """ 164 | # Gets the low level predictions 165 | self.low_level_pred_ = self.low_level_model.predict_proba(X_test) 166 | 167 | classes = np.unique(self.y) 168 | self.high_level_pred_ = np.zeros((len(X_test), len(classes))) 169 | total_training_nodes = len(self.G_.nodes) 170 | class_proportions = np.zeros((len(classes))) 171 | 172 | # We need to keep the original constructor 173 | self.original_constructor = copy.deepcopy(self.constructor_) 174 | 175 | for i, class_ in enumerate(classes): 176 | label_ind = np.where(self.y == class_) 177 | X_ = np.take(self.X, label_ind, axis=0)[0] 178 | 179 | class_proportions[i] = len(X_) / total_training_nodes 180 | 181 | for i, x in tqdm(enumerate(X_test)): 182 | original_G = self.original_constructor.get_network() 183 | 184 | delta_G = np.zeros((len(self.metric_func), len(classes))) 185 | # Tries to put the node into each component 186 | for class_id, class_ in enumerate(classes): 187 | 188 | # Selects subset of original G 189 | original_G_sub = self._get_subgraph(original_G, class_) 190 | 191 | # Adds the node to the network on the component of the class 192 | singleton = False 193 | self.constructor_.add_nodes([x], [class_]) 194 | new_G = self.constructor_.get_network() 195 | new_G_sub = self._get_subgraph(new_G, class_) 196 | 197 | # Verifies if the added node has a neighbor 198 | node = list(new_G_sub.nodes())[-1] 199 | 200 | if new_G_sub.adj[node] == {}: 201 | singleton = True 202 | 203 | f = np.zeros((len(self.metric_func))) 204 | 205 | # Gets the variation from the addition 206 | for idx, metric in enumerate(self.metric_func): 207 | if not singleton: 208 | original = metric(original_G_sub) 209 | new = metric(new_G_sub) 210 | delta_G[idx][class_id] = original - new 211 | else: 212 | delta_G[idx][class_id] = self.default_values[idx] 213 | 214 | # Return the original constructor 215 | self.constructor_ = copy.deepcopy(self.original_constructor) 216 | 217 | delta_G / delta_G.sum(axis=1)[:, np.newaxis] 218 | f = delta_G * class_proportions 219 | 220 | for k, f_ in enumerate(f): 221 | 222 | self.high_level_pred_[i] = self.alphas[k] * (1 - f_) 223 | 224 | # Normalize the high_level_pred 225 | self.high_level_pred_ = ( 226 | self.high_level_pred_ / self.high_level_pred_.sum( 227 | axis=1)[:, np.newaxis] 228 | ) 229 | 230 | final_pred = ( 231 | (1 - 232 | self.p) * self.low_level_pred_ + self.p * self.high_level_pred_ 233 | ) 234 | 235 | return final_pred 236 | 237 | def predict(self, X_test): 238 | """Predicts the class for each test sample 239 | 240 | Parameters 241 | ---------- 242 | X : {array-like, pandas dataframe} of shape (n_samples, n_features) 243 | The input data samples 244 | 245 | """ 246 | predictions = np.argmax(self.predict_proba(X_test), axis=1) 247 | return predictions 248 | 249 | def _get_subgraph(self, G, class_): 250 | nodes = (node for node, data in G.nodes(data=True) 251 | if data.get('class') == class_) 252 | 253 | return G.subgraph(nodes) 254 | -------------------------------------------------------------------------------- /sknet/supervised/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/supervised/tests/__init__.py -------------------------------------------------------------------------------- /sknet/supervised/tests/test_ease_of_access.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from sklearn.datasets import load_iris, load_boston 6 | from sklearn.model_selection import train_test_split 7 | 8 | from sknet.network_construction import KNNConstructor 9 | from sknet.supervised import EaseOfAccessClassifier, EaseOfAccessRegressor 10 | 11 | 12 | @pytest.fixture 13 | def X_y_generator_classification(): 14 | 15 | X, y = load_iris(return_X_y=True) 16 | X_train, X_test, y_train, y_test = train_test_split(X, y, 17 | test_size=0.33, 18 | random_state=42) 19 | 20 | return X_train, y_train, X_test, y_test 21 | 22 | 23 | @pytest.fixture 24 | def X_y_generator_regression(): 25 | 26 | X, y = load_boston(return_X_y=True) 27 | X = X[:150, :] 28 | y = y[:150] 29 | X_train, X_test, y_train, y_test = train_test_split(X, y, 30 | test_size=0.33, 31 | random_state=42) 32 | 33 | return X_train, y_train, X_test, y_test 34 | 35 | 36 | @pytest.fixture 37 | def module_generator_eigen_classifier(X_y_generator_classification): 38 | knn = KNNConstructor(k=3) 39 | classifier = EaseOfAccessClassifier(t=5) 40 | classifier.fit(X_y_generator_classification[0], 41 | X_y_generator_classification[1], constructor=knn) 42 | 43 | return classifier 44 | 45 | 46 | @pytest.fixture 47 | def module_generator_power_classifier(X_y_generator_classification): 48 | knn = KNNConstructor(k=3) 49 | classifier = EaseOfAccessClassifier(t=5, method='power') 50 | classifier.fit(X_y_generator_classification[0], 51 | X_y_generator_classification[1], constructor=knn) 52 | 53 | return classifier 54 | 55 | 56 | @pytest.fixture 57 | def module_generator_eigen_regressor(X_y_generator_regression): 58 | knn = KNNConstructor(k=3) 59 | regressor = EaseOfAccessRegressor(t=5) 60 | regressor.fit(X_y_generator_regression[0], 61 | X_y_generator_regression[1], constructor=knn) 62 | 63 | return regressor 64 | 65 | 66 | @pytest.fixture 67 | def module_generator_power_regressor(X_y_generator_regression): 68 | knn = KNNConstructor(k=3) 69 | regressor = EaseOfAccessRegressor(t=5, method='power') 70 | regressor.fit(X_y_generator_regression[0], 71 | X_y_generator_regression[1], constructor=knn) 72 | 73 | return regressor 74 | 75 | 76 | @pytest.fixture 77 | def class_generator_classifier(module_generator_eigen_classifier, 78 | module_generator_power_classifier, 79 | X_y_generator_classification): 80 | 81 | pred_eig = module_generator_eigen_classifier.predict( 82 | X_y_generator_classification[2] 83 | ) 84 | pred_power = module_generator_power_classifier.predict( 85 | X_y_generator_classification[2] 86 | ) 87 | 88 | return (module_generator_eigen_classifier, 89 | module_generator_power_classifier, pred_eig, pred_power) 90 | 91 | 92 | def test__stationary_distribution_classifier(class_generator_classifier): 93 | np.testing.assert_almost_equal(class_generator_classifier[0].P_inf, 94 | class_generator_classifier[1].P_inf) 95 | 96 | pd.testing.assert_frame_equal(class_generator_classifier[0].tau_, 97 | class_generator_classifier[1].tau_) 98 | 99 | 100 | @pytest.fixture 101 | def class_generator_regressor(module_generator_eigen_regressor, 102 | module_generator_power_regressor, 103 | X_y_generator_regression): 104 | 105 | pred_eig = module_generator_eigen_regressor.predict( 106 | X_y_generator_regression[2] 107 | ) 108 | pred_power = module_generator_power_regressor.predict( 109 | X_y_generator_regression[2] 110 | ) 111 | 112 | return (module_generator_eigen_regressor, 113 | module_generator_power_regressor, pred_eig, pred_power) 114 | 115 | 116 | def test__stationary_distribution_regressor(class_generator_regressor): 117 | np.testing.assert_almost_equal(class_generator_regressor[0].P_inf, 118 | class_generator_regressor[1].P_inf) 119 | 120 | pd.testing.assert_frame_equal(class_generator_regressor[0].tau_, 121 | class_generator_regressor[1].tau_) 122 | 123 | 124 | def test_predictions_classifier(class_generator_classifier): 125 | 126 | expected = [1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 127 | 2, 1, 1, 2, 0, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 128 | 1, 0, 0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 129 | 1, 2] 130 | 131 | eigen_pred = class_generator_classifier[2] 132 | power_pred = class_generator_classifier[3] 133 | 134 | assert eigen_pred == power_pred == expected 135 | 136 | 137 | def test_predictions_regressor(class_generator_regressor): 138 | 139 | expected = [21.28, 18.0, 17.26, 20.74, 17.2, 15.62, 20.98, 140 | 17.759999999999998, 19.52, 23.479999999999997, 141 | 20.080000000000002, 19.64, 18.76, 15.62, 18.22, 142 | 22.779999999999998, 18.560000000000002, 19.52, 143 | 21.619999999999997, 17.759999999999998, 15.440000000000001, 144 | 16.04, 15.440000000000001, 16.66, 16.860000000000003, 18.0, 145 | 16.259999999999998, 17.619999999999997, 25.080000000000002, 146 | 15.5, 14.62, 18.6, 21.660000000000004, 17.619999999999997, 147 | 20.1, 18.0, 19.860000000000003, 17.5, 21.24, 17.5, 148 | 17.619999999999997, 25.22, 25.340000000000003, 149 | 19.639999999999997, 14.66, 19.619999999999997, 150 | 16.9, 16.04, 22.52, 17.6] 151 | 152 | eigen_pred = class_generator_regressor[2] 153 | power_pred = class_generator_regressor[3] 154 | 155 | assert eigen_pred == power_pred == expected 156 | 157 | 158 | def test_raise_on_predict(X_y_generator_classification): 159 | 160 | knn = KNNConstructor(k=3) 161 | classifier = EaseOfAccessClassifier(t=5, method='something') 162 | 163 | with pytest.raises(Exception): 164 | classifier.fit(X_y_generator_classification[0], 165 | X_y_generator_classification[1], constructor=knn) 166 | classifier.predict(X_y_generator_classification[2]) 167 | 168 | 169 | def test_set_get_params(module_generator_eigen_classifier): 170 | classifier = module_generator_eigen_classifier 171 | classifier.set_params(t=5) 172 | assert classifier.get_params()['t'] == 5 173 | -------------------------------------------------------------------------------- /sknet/supervised/tests/test_high_level_classification.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | 7 | from sknet.network_construction import KNNConstructor 8 | from sknet.supervised import HighLevelClassifier 9 | 10 | 11 | @pytest.fixture 12 | def X_y_generator(): 13 | 14 | X, y = load_iris(return_X_y=True) 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, 16 | test_size=0.33, 17 | random_state=42) 18 | 19 | return X_train, y_train, X_test, y_test 20 | 21 | 22 | @pytest.fixture 23 | def module_generator(X_y_generator): 24 | knn = KNNConstructor(k=3) 25 | classifier = HighLevelClassifier( 26 | 'random_forest', 0.5, [0.5, 0.5], 27 | ['clustering_coefficient', 'assortativity'] 28 | ) 29 | classifier.fit(X_y_generator[0], X_y_generator[1], constructor=knn) 30 | 31 | return classifier 32 | 33 | 34 | def test_predict(module_generator, X_y_generator): 35 | 36 | expected = [1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 37 | 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 38 | 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 39 | 1, 1, 2, 1, 2] 40 | pred = module_generator.predict(X_y_generator[2]) 41 | np.testing.assert_equal(expected, pred) 42 | 43 | 44 | def test_set_get_param(module_generator): 45 | module_generator.set_params(p=0.2) 46 | assert module_generator.get_params()['p'] == 0.2 47 | 48 | 49 | def test_alpha_raise_on_fit(module_generator): 50 | module_generator.set_params(alpha=[0.7, 0.9]) 51 | with pytest.raises(Exception): 52 | module_generator.fit(X_y_generator[0], X_y_generator[1]) 53 | -------------------------------------------------------------------------------- /sknet/unsupervised/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .stochastic_particle_competition import StochasticParticleCompetition -------------------------------------------------------------------------------- /sknet/unsupervised/stochastic_particle_competition.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | from sknet.network_construction import KNNConstructor 5 | 6 | 7 | class StochasticParticleCompetition(): 8 | """ 9 | Non supervised method that uses a stochastic particle competition to 10 | group the data into K clusters. 11 | 12 | This class still has major performance issues, taking too long to 13 | converge. Further optimization shall happen, be advised when using 14 | 15 | Parameters 16 | ---------- 17 | K : int, optional(default=3) 18 | The number of particles to compete which will be the number of 19 | resulting clusters 20 | lambda_ : float, optional(default=0.5) 21 | The probability of a particle choosing the preferential movement 22 | (exploitation) against the random movement (exploration) 23 | delta : int, optional(default=10) 24 | The amount of energy gained at each step for each particle 25 | omega_max : float, optional(default=10) 26 | The maximum amount of energy that a particle can have at any given time 27 | omega_min : float, optional(default=1) 28 | The minimum amount of energy before a particle is exhausted 29 | epsilon : float, optional(default=0.01) 30 | The minimum difference between the dominance matrix variation 31 | before finishing the competition. 32 | n_iter : int, optional(default=500) 33 | The maximum number of steps before finishing the competition. 34 | The process will stop when either the convergence happens given epsilon 35 | or the maximum number of steps is reached 36 | 37 | Attributes 38 | ---------- 39 | clusters_ : {ndarray, pandas series}, shape (n_samples, 1) 40 | The cluster of each sample 41 | 42 | Examples 43 | -------- 44 | >>> from sklearn.datasets import load_iris 45 | >>> from sknet.network_construction import KNNConstructor 46 | >>> from sknet.unsupervised import StochasticParticleCompetition 47 | >>> X, y = load_iris(return_X_y = True) 48 | >>> knn_c = KNNConstructor(k=5, sep_comp=False) 49 | >>> SCP = StochasticParticleCompetition() 50 | >>> SCP.fit(X, y, constructor=knn_c) 51 | >>> SCP.clusters_ 52 | array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 53 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 54 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 55 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 56 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 57 | 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 58 | 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 59 | 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 60 | 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]) 61 | 62 | References 63 | ---------- 64 | T. C. Silva and L. Zhao, "Stochastic Competitive Learning in Complex 65 | Networks," in IEEE Transactions on Neural Networks and Learning 66 | Systems, vol. 23, no. 3, pp. 385-398, March 2012, 67 | doi: 10.1109/TNNLS.2011.2181866. 68 | 69 | Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex 70 | Networks. 10.1007/978-3-319-17290-3. 71 | 72 | """ 73 | _estimator_type = 'clusterer' 74 | 75 | def __init__(self, K=3, lambda_=0.5, delta=0.1, 76 | omega_max=10, omega_min=1, epsilon=0.01, n_iter=500, 77 | random_state=None): 78 | self.K = K 79 | self.lambda_ = lambda_ 80 | self.delta = delta 81 | self.epsilon = epsilon 82 | self.omega_max = omega_max 83 | self.omega_min = omega_min 84 | self.n_iter = n_iter 85 | self.random_state = random_state 86 | np.random.seed(self.random_state) # Arrumar 87 | 88 | def set_params(self, **parameters): 89 | for parameter, value in parameters.items(): 90 | setattr(self, parameter, value) 91 | return self 92 | 93 | def get_params(self, deep=True): 94 | return {'K': self.K, 'lambda_': self.lambda_, 'delta': self.delta, 95 | 'omega_max': self.omega_max, 'omega_min': self.omega_min, 96 | 'n_iter': self.n_iter, 'random_state': self.random_state} 97 | 98 | def fit(self, X=None, y=None, G=None, 99 | constructor=KNNConstructor(5, sep_comp=False)): 100 | """Fit the algorithms by using the particle competition 101 | to cluster the data points 102 | 103 | Parameters 104 | ---------- 105 | X : {array-like, pandas dataframe} of shape 106 | (n_samples, n_features), optional (default=None) 107 | The input data samples. Can be None if G is set. 108 | y : {ndarray, pandas series}, shape (n_samples,) or 109 | (n_samples, n_classes), optional (default=None) 110 | The target classes. Ignored for this class, used only 111 | to keep API consistency 112 | G : NetworkX Network, optional (default=None) 113 | The network to have its communities detected. Can be 114 | None if X is not None in which case the constructor 115 | will be used to generate the network. 116 | constructor : BaseConstructor inhrerited class, optional( 117 | default=KNNConstructor(5, sep_comp=False)) 118 | A constructor class to transform the tabular data into a 119 | network. It can be set to None if a complex network is directly 120 | passed to the ``fit`` method. Notice that you should use 'sep_com' 121 | as False on the constructor. 122 | 123 | """ 124 | self.constructor = constructor 125 | if X is None and G is None: 126 | raise Exception('X or G must be defined') 127 | 128 | if X is None and G is not None: 129 | self.G = G 130 | else: 131 | self.G = self.constructor.fit_transform(X, y) 132 | 133 | A = nx.to_numpy_array(self.G) 134 | self.V = A.shape[0] 135 | 136 | P_pref = np.zeros((self.V, self.V, self.K)) 137 | 138 | P_rean = np.zeros((self.V, self.V, self.K)) 139 | 140 | P_rand = self._create_p_rand(A) 141 | 142 | # Set the initial random position of the particles 143 | node_list = np.array(list(self.G)) 144 | p = np.random.choice(node_list, self.K, False) 145 | 146 | # Calculate initial N 147 | N = self._calculate_initial_N(p) 148 | 149 | N_bar = self._calculate_initial_N_bar(N) 150 | 151 | # Calculate initial E 152 | initial_energy = self.omega_min + ( 153 | (self.omega_max - self.omega_min) / self.K 154 | ) 155 | E = np.array([initial_energy] * self.K) 156 | 157 | # Calculate initial S 158 | S = np.zeros(self.K) 159 | 160 | P_tran = np.zeros((self.V, self.V, self.K)) 161 | 162 | convergence = False 163 | t = 0 164 | while not convergence and t < self.n_iter: 165 | 166 | # Updates the movement matrices 167 | P_pref = self._calculate_P_pref(A, N_bar) 168 | 169 | P_rean = self._calculate_P_rean(N_bar) 170 | 171 | P_tran = self._calculate_P_tran(P_rand, 172 | P_pref, P_rean, S, -1) 173 | 174 | p = self._choose_next_vertices(P_tran, p) 175 | 176 | N = self._update_N(p, N) 177 | old_N_Bar = N_bar.copy() 178 | N_bar = self._update_N_bar(N) 179 | E = self._update_E(E, N_bar, p) 180 | S = self._update_S(E) 181 | 182 | # Update time and verify convergence 183 | t += 1 184 | convergence = self._verify_convergence(N_bar, old_N_Bar) 185 | 186 | self.clusters_ = np.argmax(N_bar, axis=1) 187 | 188 | return self 189 | 190 | def predict(self, X=None, G=None): 191 | """ 192 | Returns the clusters after the model was fitted. 193 | 194 | Parameters 195 | ---------- 196 | 197 | X : {array-like, pandas dataframe} of shape 198 | (n_samples, n_features), optional (default=None) 199 | Ignored on this method 200 | G : NetworkX Network, optional (default=None) 201 | Ignored on this method 202 | """ 203 | return self.clusters_ 204 | 205 | def fit_predict(self, X=None, y=None, G=None): 206 | """Fit the algorithms by using the particle competition 207 | to cluster the data points 208 | 209 | Parameters 210 | ---------- 211 | X : {array-like, pandas dataframe} of shape 212 | (n_samples, n_features), optional (default=None) 213 | The input data samples. Can be None if G is set. 214 | y : {ndarray, pandas series}, shape (n_samples,) or 215 | (n_samples, n_classes), optional (default=None) 216 | The target classes. Ignored for this class, used only 217 | to keep API consistency 218 | G : NetworkX Network, optional (default=None) 219 | The network to have its communities detected. Can be 220 | None if X is not None in which case the constructor 221 | will be used to generate the network. 222 | 223 | Returns 224 | ------- 225 | clusters_ : {array-like} of shape (n_samples) 226 | The cluster of each data point 227 | 228 | """ 229 | self.fit(X, y, G) 230 | return self.predict() 231 | 232 | def _verify_convergence(self, N_bar, old_N_bar): 233 | diff = np.sum(np.abs(N_bar - old_N_bar)) 234 | return diff < self.epsilon 235 | 236 | def _create_p_rand(self, A): 237 | P_rand = A / A.sum(axis=1, keepdims=True) 238 | return P_rand 239 | 240 | def _calculate_initial_N(self, p): 241 | N = np.ones((self.V, self.K)) 242 | for k, i in enumerate(p): 243 | N[int(i)][k] = 2 244 | return N 245 | 246 | def _calculate_initial_N_bar(self, N): 247 | N_bar = N/N.sum(axis=1, keepdims=True) 248 | return N_bar 249 | 250 | def _calculate_P_pref(self, A, N_bar): 251 | aux = np.zeros((self.V, self.V, self.K)) 252 | 253 | num = [[[A[i, j] * N_bar[j, k] for k in range(self.K) 254 | ] for j in range(self.V)] for i in range(self.V)] 255 | den = [[[np.sum([ 256 | A[i, l_]*N_bar[l_, k] for l_ in range(self.V) 257 | ]) for k in range(self.K)] for j in range(self.V) 258 | ] for i in range(self.V)] 259 | aux[:, :, :] = np.divide(np.array(num), np.array(den)) 260 | 261 | return aux 262 | 263 | def _calculate_P_rean(self, N_bar): 264 | aux = np.zeros((self.V, self.V, self.K)) 265 | 266 | for k in range(self.K): 267 | den = np.sum( 268 | [np.argmax(N_bar[u, :]) == k for u in range(self.V)] 269 | ) 270 | for j in range(self.V): 271 | num = 0 272 | if np.argmax(N_bar[j, :]) == k: 273 | num = 1 274 | 275 | aux[:, j, k] = [num/den for i in range(self.V)] 276 | 277 | return aux 278 | 279 | def _calculate_P_tran(self, P_rand, P_pref, P_rean, S, t): 280 | aux = np.zeros((self.V, self.V, self.K)) 281 | for k in range(self.K): 282 | 283 | non_exhausted = ( 284 | 1 - S[k]) * ( 285 | self.lambda_ * P_pref[:, :, k] + ( 286 | 1 - self.lambda_) * P_rand 287 | ) 288 | 289 | exhausted = S[k] * P_rean[:, :, k] 290 | aux[:, :, k] = non_exhausted + exhausted 291 | 292 | return aux 293 | 294 | def _choose_next_vertices(self, P_tran, p): 295 | aux = np.zeros(self.K) 296 | for k in range(self.K): 297 | aux[k] = np.random.choice( 298 | [i for i in range(self.V)], 299 | p=P_tran[int(p[k]), :, k] 300 | ) 301 | 302 | return aux 303 | 304 | def _update_N(self, p, N): 305 | aux = N.copy() 306 | for k, i in enumerate(p): 307 | aux[int(i), k] = aux[int(i), k] + 1 308 | 309 | return aux 310 | 311 | def _update_N_bar(self, N): 312 | N_bar_updated = self._calculate_initial_N_bar(N) 313 | 314 | return N_bar_updated 315 | 316 | def _update_E(self, E, N_bar, p): 317 | aux = np.zeros(self.K) 318 | for k in range(self.K): 319 | if self._is_owner(k, p, N_bar): 320 | aux[k] = min(E[k] + self.delta, self.omega_max) 321 | else: 322 | aux[k] = max(E[k] - self.delta, self.omega_min) 323 | 324 | return aux 325 | 326 | def _update_S(self, E): 327 | aux = np.zeros(self.K) 328 | for k in range(self.K): 329 | if E[k] == self.omega_min: 330 | aux[k] = 1 331 | else: 332 | aux[k] = 0 333 | 334 | return aux 335 | 336 | def _is_owner(self, k, p, N_bar): 337 | if np.argmax(N_bar[int(p[k]), :]) == k: 338 | return True 339 | else: 340 | return False 341 | -------------------------------------------------------------------------------- /sknet/unsupervised/tests/test_stochastic_particle_competition.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.datasets import load_iris 5 | 6 | from sknet.network_construction import KNNConstructor 7 | from sknet.unsupervised import StochasticParticleCompetition 8 | 9 | 10 | @pytest.fixture 11 | def X_y_generator(): 12 | 13 | X, y = load_iris(return_X_y=True) 14 | y = np.array(y, dtype='float32') 15 | 16 | return X, y 17 | 18 | 19 | @pytest.fixture 20 | def result_generator(): 21 | result = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 28 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 29 | 30 | return result 31 | 32 | 33 | def test_fit_X(X_y_generator, result_generator): 34 | knn_c = KNNConstructor(k=5, sep_comp=False) 35 | SPC = StochasticParticleCompetition(random_state=42, n_iter=3) 36 | SPC.fit(X_y_generator[0], constructor=knn_c) 37 | 38 | np.testing.assert_equal(result_generator, 39 | np.array(SPC.clusters_, 40 | dtype='float32') 41 | ) 42 | 43 | 44 | def test_fit_G(X_y_generator, result_generator): 45 | knn_c = KNNConstructor(k=5, sep_comp=False) 46 | G = knn_c.fit_transform(X_y_generator[0], X_y_generator[1]) 47 | SPC = StochasticParticleCompetition(random_state=42, n_iter=3) 48 | SPC.fit(G=G) 49 | 50 | np.testing.assert_equal(result_generator, 51 | np.array(SPC.clusters_, 52 | dtype='float32') 53 | ) 54 | -------------------------------------------------------------------------------- /sknet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .network_metrics_handler import NetworkMetricsHandler 3 | from .low_level_models_handler import LowLevelModelsHandler 4 | from .network_types_handler import NetworkTypesHandler -------------------------------------------------------------------------------- /sknet/utils/low_level_models_handler.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC, SVR 2 | from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor 3 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 4 | 5 | 6 | class LowLevelModelsHandler(): 7 | """""" 8 | 9 | def __init__(self): 10 | self.mapper = self._generate_models_mapper() 11 | 12 | def _generate_models_mapper(self): 13 | mapper = { 14 | 'random_forest': [RandomForestClassifier, RandomForestRegressor], 15 | 'svm': [SVC, SVR], 16 | 'knn': [KNeighborsClassifier, KNeighborsRegressor], 17 | } 18 | return mapper 19 | 20 | def get_model(self, model, parameters, type_='classification'): 21 | index = False 22 | if type_ == 'classification': 23 | index = 0 24 | elif type_ == 'regression': 25 | index = 1 26 | 27 | low_level_model = self.mapper[model][index] 28 | low_level_model.set_params(parameters) 29 | 30 | return low_level_model() 31 | -------------------------------------------------------------------------------- /sknet/utils/network_metrics_handler.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | 4 | class NetworkMetricsHandler(): 5 | """""" 6 | 7 | def __init__(self): 8 | self.mapper = self._generate_metrics_mapper() 9 | self.default_values = self._default_values_mapper() 10 | 11 | def _generate_metrics_mapper(self): 12 | mapper = { 13 | 'assortativity': nx.degree_assortativity_coefficient, 14 | 'clustering_coefficient': nx.average_clustering, 15 | 'average_degree': nx.average_degree_connectivity, 16 | 'transitivity': nx.transitivity, 17 | 'connectivity': nx.average_node_connectivity, 18 | } 19 | return mapper 20 | 21 | def _default_values_mapper(self): 22 | mapper = { 23 | 'assortativity': 2, 24 | 'clustering_coefficient': 1, 25 | 'average_degree': 0, 26 | 'transitivity': 0, 27 | 'connectivity': 0, 28 | } 29 | return mapper 30 | 31 | def get_metric(self, metric): 32 | return self.mapper[metric] 33 | 34 | def get_default_value(self, metric): 35 | return self.default_values[metric] 36 | -------------------------------------------------------------------------------- /sknet/utils/network_types_handler.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | 4 | class NetworkTypesHandler(): 5 | """""" 6 | 7 | def __init__(self): 8 | self.mapper = self._generate_types_mapper() 9 | 10 | def _generate_types_mapper(self): 11 | mapper = { 12 | 'graph': nx.Graph, 13 | 'digraph': nx.DiGraph, 14 | 'multi_graph': nx.MultiGraph, 15 | 'multi_digraph': nx.MultiDiGraph, 16 | } 17 | return mapper 18 | 19 | def get_net(self, metric): 20 | return self.mapper[metric] 21 | -------------------------------------------------------------------------------- /sknet/utils/tests/test_network_types_handler.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | from sknet.utils import NetworkTypesHandler 4 | 5 | 6 | def test_handler_mapper(): 7 | handler = NetworkTypesHandler() 8 | assert handler.mapper == {'graph': nx.Graph, 9 | 'digraph': nx.DiGraph, 10 | 'multi_graph': nx.MultiGraph, 11 | 'multi_digraph': nx.MultiDiGraph} 12 | 13 | 14 | def test_get_net(): 15 | handler = NetworkTypesHandler() 16 | assert handler.get_net('graph') == nx.Graph 17 | assert handler.get_net('digraph') == nx.DiGraph 18 | assert handler.get_net('multi_graph') == nx.MultiGraph 19 | assert handler.get_net('multi_digraph') == nx.MultiDiGraph 20 | -------------------------------------------------------------------------------- /templates/issue.md: -------------------------------------------------------------------------------- 1 | ## Issue type 2 | [Bug Fix, Performance Improvement, Documentation, New Feature/Algorithm, Code Design Improvement] 3 | 4 | ## Issue description 5 | Brief description of the issue 6 | 7 | ## Replicable code 8 | If it is a bug, minimal replicable code 9 | 10 | ## Traceback and Versioning 11 | In case it is a bug, post full version of dependencies and possible tracebacks 12 | 13 | ## Possible solutions and/or where to start 14 | If you already have something in mind, give here an initial roadmap for the solution 15 | 16 | ## References and other comments 17 | If you have some references or other comments, use this section -------------------------------------------------------------------------------- /templates/pull_request.md: -------------------------------------------------------------------------------- 1 | # Title 2 | The title of your Pull Request 3 | 4 | ## Related Issue 5 | Please link to the issue related to your PR 6 | 7 | ## Description of the problem and solution 8 | Briefly describe the problem stated on the issue and extensively describe your 9 | implemented solution 10 | 11 | ## Benchmarks 12 | If you PR is aimed at improving performance, link useful benchmarks so the reviewers 13 | can assure an improvement was made 14 | 15 | ## Dependencies 16 | If any new dependency is added, or any dependency is change, state it here 17 | 18 | ## Breaking stuff 19 | In case your change breaks anything on the main repo, state it here 20 | 21 | ## Code example 22 | Add a code snippet so we can use your change and test it 23 | 24 | ## Other comments 25 | In case you have any other comment about this PR, use this section --------------------------------------------------------------------------------