├── .github
    └── workflows
    │   ├── coverage.yml
    │   ├── docs.yml
    │   ├── linter.yml
    │   └── unit_tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── changelog.md
├── codecov.yml
├── contributing.md
├── docs
    ├── Makefile
    ├── buildDocs.sh
    ├── conf.py
    ├── make.bat
    └── source
    │   ├── _static
    │       ├── custom.css
    │       ├── full_logo.png
    │       └── logo.png
    │   ├── _templates
    │       ├── version.html
    │       └── versions.html
    │   ├── api_reference
    │       ├── index.rst
    │       ├── modules.rst
    │       ├── sknet.network_construction.dataset_constructors.rst
    │       ├── sknet.network_construction.general_constructors.rst
    │       ├── sknet.network_construction.rst
    │       ├── sknet.network_construction.tests.rst
    │       ├── sknet.network_construction.tests.test_network_construction.rst
    │       ├── sknet.network_construction.time_series_constructors.rst
    │       ├── sknet.rst
    │       ├── sknet.semi_supervised.modularity_label_propagation.rst
    │       ├── sknet.semi_supervised.rst
    │       ├── sknet.supervised.ease_of_access.rst
    │       ├── sknet.supervised.high_level_classification.rst
    │       ├── sknet.supervised.rst
    │       ├── sknet.supervised.tests.rst
    │       ├── sknet.supervised.tests.test_ease_of_access.rst
    │       ├── sknet.supervised.tests.test_high_level_classification.rst
    │       ├── sknet.unsupervised.rst
    │       ├── sknet.unsupervised.stochastic_particle_competition.rst
    │       ├── sknet.utils.low_level_models_handler.rst
    │       ├── sknet.utils.network_metrics_handler.rst
    │       ├── sknet.utils.network_types_handler.rst
    │       └── sknet.utils.rst
    │   ├── conf.py
    │   ├── development
    │       └── index.rst
    │   ├── getting_started
    │       ├── index.rst
    │       ├── installation.rst
    │       ├── semi_supervised_learning.rst
    │       ├── supervised_learning.rst
    │       ├── transforming_data.rst
    │       └── unsupervised_learning.rst
    │   ├── index.rst
    │   └── user_guide
    │       ├── images
    │           ├── ease_of_access.png
    │           ├── epsilon.png
    │           ├── k-eps.png
    │           └── knn.png
    │       └── index.rst
├── requirements.txt
├── setup.py
├── sknet
    ├── __init__.py
    ├── network_construction
    │   ├── __init__.py
    │   ├── dataset_constructors.py
    │   ├── general_constructors.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_network_construction.py
    │   └── time_series_constructors.py
    ├── semi_supervised
    │   ├── __init__.py
    │   ├── modularity_label_propagation.py
    │   └── tests
    │   │   └── test_modularity_label_propagation.py
    ├── supervised
    │   ├── __init__.py
    │   ├── ease_of_access.py
    │   ├── high_level_classification.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_ease_of_access.py
    │   │   └── test_high_level_classification.py
    ├── unsupervised
    │   ├── __init__.py
    │   ├── stochastic_particle_competition.py
    │   └── tests
    │   │   └── test_stochastic_particle_competition.py
    └── utils
    │   ├── __init__.py
    │   ├── low_level_models_handler.py
    │   ├── network_metrics_handler.py
    │   ├── network_types_handler.py
    │   └── tests
    │       └── test_network_types_handler.py
└── templates
    ├── issue.md
    └── pull_request.md


/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: CodeCov
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   python-codecov:
 7 |     runs-on: ubuntu-latest
 8 |     name: CodeCov
 9 |     steps:
10 |     - name: checkout source repo
11 |       uses: actions/checkout@v2
12 |       with:
13 |         fetch-depth: ‘2’
14 | 
15 |     - name: Generate Report
16 |       run: |
17 |         python -m pip install --upgrade pip
18 |         pip install -r requirements.txt
19 |         coverage run -m pytest -vv
20 | 
21 |     - name: Upload Coverage to Codecov
22 |       uses: codecov/codecov-action@v1
23 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs_pages_workflow
 2 |  
 3 | # execute this workflow automatically when a we push to master
 4 | on:
 5 |   push:
 6 |  
 7 | jobs:
 8 |  
 9 |   build_docs_job:
10 |     runs-on: ubuntu-latest
11 |     container: debian:buster-slim
12 |  
13 |     steps:
14 |  
15 |     - name: Prereqs
16 |       env:
17 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |       run: |
19 |         apt-get update
20 |         apt-get install -y git
21 |         git clone "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" .
22 |       shell: bash
23 |  
24 |     - name: Execute script to build our documentation and update pages
25 |       env:
26 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
27 |       run: |
28 |         chmod 775 docs/buildDocs.sh
29 |         docs/buildDocs.sh
30 |       shell: bash


--------------------------------------------------------------------------------
/.github/workflows/linter.yml:
--------------------------------------------------------------------------------
 1 | name: Linter
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   python-lint:
 7 |     runs-on: ubuntu-latest
 8 |     name: Linter
 9 |     steps:
10 |     - name: checkout source repo
11 |       uses: actions/checkout@v2
12 | 
13 |     - name: linting
14 |       uses: alexanderdamiani/pylinter@v1.1.0
15 |       with:
16 |           skip-mypy: true
17 |           skip-isort: true


--------------------------------------------------------------------------------
/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit Tests
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   python-pytest:
 7 |     runs-on: ubuntu-latest
 8 |     name: unit tests
 9 |     steps:
10 |     - name: checkout source repo
11 |       uses: actions/checkout@v2
12 |     
13 |     - name: setup python
14 |       uses: actions/setup-python@v1
15 |       with:
16 |         python-version: 3.7
17 | 
18 |     - name: install dependencies
19 |       run: |
20 |           python -m pip install --upgrade pip
21 |           pip install -r requirements.txt
22 |     - name: Test with pytest
23 |       run: pytest -vv


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | *.ipynb
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | .vscode/settings.json
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Tiago Toledo Junior
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![sknet Logo](https://github.com/TNanukem/sknet/blob/develop/docs/source/_static/full_logo.png "sknet Logo")
 2 | 
 3 | ![Codecov branch](https://img.shields.io/codecov/c/github/tnanukem/sknet/develop?token=PIQ338YNK1)
 4 | 
 5 | The sknet project is a scikit-learn and NetworkX compatible framework for machine learning in complex networks. It provides learning algorithms for complex networks, as well as transforming methods to turn tabular data into complex networks.
 6 | 
 7 | It started in 2021 as a project from volunteers to help to improve the development of research on the interface between complex networks and machine learning. It main focus
 8 | is to help researchers and students to develop solutions using machine learning on complex networks.
 9 | 
10 | ## :computer: Installation
11 | 
12 | The sknet installation is available via PiPy:
13 | 
14 |     pip install scikit-net
15 | 
16 | ## :high_brightness: Quickstart
17 | 
18 | The following code snippet shows how one can transform tabular data into a complex network and then use it to create a classifier:
19 | 
20 |     from sklearn.model_selection import train_test_split
21 |     from sklean.metrics import accuracy_score
22 |     from sklearn.datasets import load_iris
23 |     from sknet.network_construction import KNNConstructor
24 |     from sknet.supervised import EaseOfAccessClassifier
25 | 
26 |     X, y = load_iris(return_X_y = True)
27 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
28 | 
29 |     # The constructor responsible for transforming the tabular data into a complex network
30 |     knn_c = KNNConstructor(k=5)
31 | 
32 |     classifier = EaseOfAccessClassifier()
33 |     classifier.fit(X_train, y_train, constructor=knn_c)
34 |     y_pred = classifier.predict(X_test)
35 |     accuracy_score(y_test, y_pred)
36 | 
37 | ## :pencil: Documentation
38 | 
39 | We provide an extensive API documentation as well with some user guides. The documentation is available on https://tnanukem.github.io/scikit-net/
40 | 
41 | ## Citation
42 | 
43 | If you used the scikit-net on your research project, please cite us using the following publication:
44 | 
45 |     @article{Toledo2021,
46 |     doi = {10.21105/joss.03864},
47 |     url = {https://doi.org/10.21105/joss.03864},
48 |     year = {2021},
49 |     publisher = {The Open Journal},
50 |     volume = {6},
51 |     number = {68},
52 |     pages = {3864},
53 |     author = {Tiago Toledo},
54 |     title = {sknet: A Python framework for Machine Learning in Complex Networks},
55 |     journal = {Journal of Open Source Software}
56 |     }
57 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.1.0] - xxxx-xx-xx
 9 | 
10 | ### Added
11 | - Modularity Label Propagation now allows a network reduction proposed by Silva et al.
12 | - New constructor, based on single linkage clustering heuristics, was added to transform datasets into networks
13 | - New time series constructor for univariate series using the recurrence on the phase space
14 | 
15 | ### Changed
16 | 
17 | ### Fixed
18 | 
19 | ### Removed
20 | 
21 | ### Deprecated
22 | 
23 | [0.0.1]: https://github.com/TNanukem/scikit-net/releases/tag/v0.0.1


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 |   token: 92c22736-5259-4282-a4a6-69bfd6d9f3c7
3 | 
4 | ignore:
5 |   - "*/tests/*"
6 |   - "*/sknet_env/*"


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | ## Contributing
2 | 
3 | First of all, thank you for being interested in contributing to the scikit-net package.
4 | 
5 | The scikit-net is an open-source package and it depends on the help
6 | and feedback of the community to keep improving. So you are mostly
7 | welcome to help us out.
8 | 
9 | You can find instructions on how you can help with the development at the [documentation](https://tnanukem.github.io/scikit-net/main/development/index.html).


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/buildDocs.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | 
  4 | apt-get update
  5 | 
  6 | apt-get -y install python3-pip
  7 | python3 -m pip install Sphinx==4.1.2
  8 | apt-get -y install git rsync build-essential python3-stemmer python3-gitpython3-virtualenv python3-setuptools
  9 | python3 -m pip install --upgrade pip
 10 | python3 -m pip install -r requirements.txt
 11 | python3 -m pip install --upgrade rinohtype pygments
 12 | 
 13 | pwd
 14 | ls -lah
 15 | export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
 16 | 
 17 | # make a new temp dir which will be our GitHub Pages docroot
 18 | docroot=`mktemp -d`
 19 |  
 20 | export REPO_NAME="${GITHUB_REPOSITORY##*/}"
 21 |   
 22 | ##############
 23 | # BUILD DOCS #
 24 | ##############
 25 | 
 26 | make -C docs clean
 27 | versions="`git for-each-ref '--format=%(refname:lstrip=-1)' refs/remotes/origin/ | grep -viE '^(HEAD|gh-pages)$'`"
 28 | for current_version in ${versions}; do
 29 |   
 30 |    # make the current language available to conf.py
 31 |    export current_version
 32 |    git checkout ${current_version}
 33 |   
 34 |    echo "INFO: Building sites for ${current_version}"
 35 |   
 36 |    # skip this branch if it doesn't have our docs dir & sphinx config
 37 |    if [ ! -e 'docs/conf.py' ]; then
 38 |       echo -e "\tINFO: Couldn't find 'docs/conf.py' (skipped)"
 39 |       continue
 40 |    fi
 41 | 
 42 |     # HTML #
 43 |     sphinx-build -b html docs/source docs/build/html/${current_version}
 44 | 
 45 |     # PDF #
 46 |     sphinx-build -b rinoh docs/source docs/build/rinoh
 47 |     mkdir -p "${docroot}/${current_version}"
 48 |     cp "docs/build/rinoh/target.pdf" "${docroot}/${current_version}/helloWorld-docs__${current_version}.pdf"
 49 | 
 50 |     # EPUB #
 51 |     sphinx-build -b epub docs/source docs/build/epub
 52 |     mkdir -p "${docroot}/${current_version}"
 53 |     cp "docs/build/epub/target.epub" "${docroot}/${current_version}/helloWorld-docs_${current_version}.epub"
 54 | 
 55 |     # copy the static assets produced by the above build into our docroot
 56 |     cp -a "docs/build/html/${current_version}/." "${docroot}/${current_version}/"
 57 | 
 58 |   
 59 | done
 60 |  
 61 |  # return to master branch
 62 | git checkout master
 63 | 
 64 | git config --global user.name "${GITHUB_ACTOR}"
 65 | git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
 66 |  
 67 | pushd "${docroot}"
 68 | 
 69 | git init
 70 | git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
 71 | git checkout -b gh-pages
 72 |  
 73 | touch .nojekyll
 74 |  
 75 |  # add redirect from the docroot to our default docs language/version
 76 | cat > index.html <<EOF
 77 | <!DOCTYPE html>
 78 | <html>
 79 |    <head>
 80 |       <title>helloWorld Docs</title>
 81 |       <meta http-equiv = "refresh" content="0; url='/${REPO_NAME}/main/'" />
 82 |    </head>
 83 |    <body>
 84 |       <p>Please wait while you're redirected to our <a href="/${REPO_NAME}/main/">documentation</a>.</p>
 85 |    </body>
 86 | </html>
 87 | EOF
 88 | 
 89 | cat > README.md <<EOF
 90 | # GitHub Pages Cache
 91 |  
 92 | Nothing to see here. The contents of this branch are essentially a cache that's not intended to be viewed on github.com.
 93 |  
 94 |  
 95 | If you're looking to update our documentation, check the relevant development branch's 'docs/' dir.
 96 | EOF
 97 | 
 98 | git add .
 99 |  
100 | # commit all the new files
101 | msg="Updating Docs for commit ${GITHUB_SHA} made on `date -d"@${SOURCE_DATE_EPOCH}" --iso-8601=seconds` from ${GITHUB_REF} by ${GITHUB_ACTOR}"
102 | git commit -am "${msg}"
103 |  
104 | # overwrite the contents of the gh-pages branch on our github.com repo
105 | git push deploy gh-pages --force
106 |  
107 | popd # return to main repo sandbox root
108 |  
109 | # exit cleanly
110 | exit 0


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'sknet'
21 | copyright = '2021, Tiago Toledo'
22 | author = 'Tiago Toledo'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     'sphinx.ext.autodoc',
32 |     'sphinx.ext.napoleon'
33 | ]
34 | 
35 | master_doc = 'index'
36 | 
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['_templates']
39 | 
40 | # List of patterns, relative to source directory, that match files and
41 | # directories to ignore when looking for source files.
42 | # This pattern also affects html_static_path and html_extra_path.
43 | exclude_patterns = []
44 | 
45 | 
46 | # -- Options for HTML output -------------------------------------------------
47 | 
48 | # The theme to use for HTML and HTML Help pages.  See the documentation for
49 | # a list of builtin themes.
50 | #
51 | html_theme = 'pydata_sphinx_theme'
52 | html_logo = "_static/logo.png"
53 | 
54 | html_theme_options = {
55 |     "icon_links": [
56 |         {
57 |             "name": "GitHub",
58 |             "url": "https://github.com/TNanukem/scikit-net",
59 |             "icon": "fab fa-github-square",
60 |         },
61 |         {
62 |             "name": "Twitter",
63 |             "url": "https://twitter.com/TiagoJToledoJr",
64 |             "icon": "fab fa-twitter-square",
65 |         },
66 |     ],
67 |     "navbar_start": ["navbar-logo"],
68 |     "navbar_end": ["navbar-icon-links", "version"]
69 | }
70 | 
71 | html_context = {
72 |     "versions_dropdown": {
73 |         "develop": "develop (latest)",
74 |         "main": "main (stable)",
75 |     },
76 | }
77 | 
78 | # Add any paths that contain custom static files (such as style sheets) here,
79 | # relative to this directory. They are copied after the builtin static files,
80 | # so a file named "default.css" will overwrite the builtin "default.css".
81 | html_static_path = ['_static']
82 | 
83 | 
84 | def setup(app):
85 |     app.add_css_file("custom.css")
86 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
 1 | .btn-version {
 2 |     background: #2c7fb8ff;
 3 |     color: #ffffff;
 4 | }
 5 | 
 6 | .btn-version:hover, .btn-version:focus, .btn-version:active, .btn-version.active, .open > .dropdown-toggle.btn-version {
 7 |     background: #33a6cc;
 8 | }
 9 | 
10 | .btn-version:active, .btn-version.active {
11 |     background: #007299;
12 |     box-shadow: none;
13 | }


--------------------------------------------------------------------------------
/docs/source/_static/full_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/_static/full_logo.png


--------------------------------------------------------------------------------
/docs/source/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/_static/logo.png


--------------------------------------------------------------------------------
/docs/source/_templates/version.html:
--------------------------------------------------------------------------------
 1 | <ul class="navbar-nav">
 2 |     <li class="mr-2 dropdown">
 3 |         <button type="button" class="btn btn-version btn-sm navbar-btn dropdown-toggle" id="dLabelMore" data-toggle="dropdown">
 4 |             v{{ release }}
 5 |             <span class="caret"></span>
 6 |         </button>
 7 |         <ul class="dropdown-menu" aria-labelledby="dLabelMore">
 8 |             {%- for ver, txt in versions_dropdown.items() %}
 9 |             <li><a href="https://tnanukem.github.io/scikit-net/{{ ver }}/">{{ txt }}</a></li>
10 |             {%- endfor %}
11 |         </ul>
12 |     </li>
13 | </ul>


--------------------------------------------------------------------------------
/docs/source/_templates/versions.html:
--------------------------------------------------------------------------------
 1 | {% if READTHEDOCS or display_lower_left %}
 2 | {# Add rst-badge after rst-versions for small badge style. #}
 3 |   <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
 4 |     <span class="rst-current-version" data-toggle="rst-current-version">
 5 |       <span class="fa fa-book"> Read the Docs</span>
 6 |       v: {{ current_version }}
 7 |       <span class="fa fa-caret-down"></span>
 8 |     </span>
 9 |     <div class="rst-other-versions">
10 |       {% if languages|length >= 1 %}
11 |       <dl>
12 |         <dt>{{ _('Languages') }}</dt>
13 |         {% for slug, url in languages %}
14 |           {% if slug == current_language %} <strong> {% endif %}
15 |           <dd><a href="{{ url }}">{{ slug }}</a></dd>
16 |           {% if slug == current_language %} </strong> {% endif %}
17 |         {% endfor %}
18 |       </dl>
19 |       {% endif %}
20 |       {% if versions|length >= 1 %}
21 |       <dl>
22 |         <dt>{{ _('Versions') }}</dt>
23 |         {% for slug, url in versions %}
24 |           {% if slug == current_version %} <strong> {% endif %}
25 |           <dd><a href="{{ url }}">{{ slug }}</a></dd>
26 |           {% if slug == current_version %} </strong> {% endif %}
27 |         {% endfor %}
28 |       </dl>
29 |       {% endif %}
30 |       {% if downloads|length >= 1 %}
31 |       <dl>
32 |         <dt>{{ _('Downloads') }}</dt>
33 |         {% for type, url in downloads %}
34 |           <dd><a href="{{ url }}">{{ type }}</a></dd>
35 |         {% endfor %}
36 |       </dl>
37 |       {% endif %}
38 |       {% if READTHEDOCS %}
39 |       <dl>
40 |         <dt>{{ _('On Read the Docs') }}</dt>
41 |           <dd>
42 |             <a href="//{{ PRODUCTION_DOMAIN }}/projects/{{ slug }}/?fromdocs={{ slug }}">{{ _('Project Home') }}</a>
43 |           </dd>
44 |           <dd>
45 |             <a href="//{{ PRODUCTION_DOMAIN }}/builds/{{ slug }}/?fromdocs={{ slug }}">{{ _('Builds') }}</a>
46 |           </dd>
47 |       </dl>
48 |       {% endif %}
49 |       <hr/>
50 |       {% trans %}Free document hosting provided by <a href="http://www.readthedocs.org">Read the Docs</a>.{% endtrans %}
51 |  
52 |     </div>
53 |   </div>
54 | {% endif %}


--------------------------------------------------------------------------------
/docs/source/api_reference/index.rst:
--------------------------------------------------------------------------------
 1 | .. sknet documentation master file, created by
 2 |    sphinx-quickstart on Fri Mar  5 05:44:33 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | API Reference
 7 | =============
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    sknet
13 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/modules.rst:
--------------------------------------------------------------------------------
1 | sknet
2 | =====
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    sknet
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.dataset_constructors.rst:
--------------------------------------------------------------------------------
1 | sknet.network\_construction.dataset\_constructors module
2 | ========================================================
3 | 
4 | .. automodule:: sknet.network_construction.dataset_constructors
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.general_constructors.rst:
--------------------------------------------------------------------------------
1 | sknet.network\_construction.general\_constructors module
2 | ========================================================
3 | 
4 | .. automodule:: sknet.network_construction.general_constructors
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.rst:
--------------------------------------------------------------------------------
 1 | sknet.network\_construction package
 2 | ===================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.network_construction.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 | 
18 |    sknet.network_construction.dataset_constructors
19 |    sknet.network_construction.general_constructors
20 |    sknet.network_construction.time_series_constructors
21 | 
22 | Module contents
23 | ---------------
24 | 
25 | .. automodule:: sknet.network_construction
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.tests.rst:
--------------------------------------------------------------------------------
 1 | sknet.network\_construction.tests package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.network_construction.tests.test_network_construction
11 | 
12 | Module contents
13 | ---------------
14 | 
15 | .. automodule:: sknet.network_construction.tests
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.tests.test_network_construction.rst:
--------------------------------------------------------------------------------
1 | sknet.network\_construction.tests.test\_network\_construction module
2 | ====================================================================
3 | 
4 | .. automodule:: sknet.network_construction.tests.test_network_construction
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.network_construction.time_series_constructors.rst:
--------------------------------------------------------------------------------
1 | sknet.network\_construction.time\_series\_constructors module
2 | =============================================================
3 | 
4 | .. automodule:: sknet.network_construction.time_series_constructors
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.rst:
--------------------------------------------------------------------------------
 1 | sknet package
 2 | =============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.network_construction
11 |    sknet.semi_supervised
12 |    sknet.supervised
13 |    sknet.unsupervised
14 |    sknet.utils
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: sknet
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.semi_supervised.modularity_label_propagation.rst:
--------------------------------------------------------------------------------
1 | sknet.semi\_supervised.modularity\_label\_propagation module
2 | ============================================================
3 | 
4 | .. automodule:: sknet.semi_supervised.modularity_label_propagation
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.semi_supervised.rst:
--------------------------------------------------------------------------------
 1 | sknet.semi\_supervised package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.semi_supervised.modularity_label_propagation
11 | 
12 | Module contents
13 | ---------------
14 | 
15 | .. automodule:: sknet.semi_supervised
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.ease_of_access.rst:
--------------------------------------------------------------------------------
1 | sknet.supervised.ease\_of\_access module
2 | ========================================
3 | 
4 | .. automodule:: sknet.supervised.ease_of_access
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.high_level_classification.rst:
--------------------------------------------------------------------------------
1 | sknet.supervised.high\_level\_classification module
2 | ===================================================
3 | 
4 | .. automodule:: sknet.supervised.high_level_classification
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.rst:
--------------------------------------------------------------------------------
 1 | sknet.supervised package
 2 | ========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.supervised.tests
11 | 
12 | Submodules
13 | ----------
14 | 
15 | .. toctree::
16 |    :maxdepth: 2
17 | 
18 |    sknet.supervised.ease_of_access
19 |    sknet.supervised.high_level_classification
20 | 
21 | Module contents
22 | ---------------
23 | 
24 | .. automodule:: sknet.supervised
25 |    :members:
26 |    :undoc-members:
27 |    :show-inheritance:
28 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.tests.rst:
--------------------------------------------------------------------------------
 1 | sknet.supervised.tests package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.supervised.tests.test_ease_of_access
11 |    sknet.supervised.tests.test_high_level_classification
12 | 
13 | Module contents
14 | ---------------
15 | 
16 | .. automodule:: sknet.supervised.tests
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.tests.test_ease_of_access.rst:
--------------------------------------------------------------------------------
1 | sknet.supervised.tests.test\_ease\_of\_access module
2 | ====================================================
3 | 
4 | .. automodule:: sknet.supervised.tests.test_ease_of_access
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.supervised.tests.test_high_level_classification.rst:
--------------------------------------------------------------------------------
1 | sknet.supervised.tests.test\_high\_level\_classification module
2 | ===============================================================
3 | 
4 | .. automodule:: sknet.supervised.tests.test_high_level_classification
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.unsupervised.rst:
--------------------------------------------------------------------------------
 1 | sknet.unsupervised package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.unsupervised.stochastic_particle_competition
11 | 
12 | Module contents
13 | ---------------
14 | 
15 | .. automodule:: sknet.unsupervised
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.unsupervised.stochastic_particle_competition.rst:
--------------------------------------------------------------------------------
1 | sknet.unsupervised.stochastic\_particle\_competition module
2 | ===========================================================
3 | 
4 | .. automodule:: sknet.unsupervised.stochastic_particle_competition
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.utils.low_level_models_handler.rst:
--------------------------------------------------------------------------------
1 | sknet.utils.low\_level\_models\_handler module
2 | ==============================================
3 | 
4 | .. automodule:: sknet.utils.low_level_models_handler
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.utils.network_metrics_handler.rst:
--------------------------------------------------------------------------------
1 | sknet.utils.network\_metrics\_handler module
2 | ============================================
3 | 
4 | .. automodule:: sknet.utils.network_metrics_handler
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.utils.network_types_handler.rst:
--------------------------------------------------------------------------------
1 | sknet.utils.network\_types\_handler module
2 | ==========================================
3 | 
4 | .. automodule:: sknet.utils.network_types_handler
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api_reference/sknet.utils.rst:
--------------------------------------------------------------------------------
 1 | sknet.utils package
 2 | ===================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    sknet.utils.low_level_models_handler
11 |    sknet.utils.network_metrics_handler
12 |    sknet.utils.network_types_handler
13 | 
14 | Module contents
15 | ---------------
16 | 
17 | .. automodule:: sknet.utils
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'sknet'
21 | copyright = '2021, Tiago Toledo'
22 | author = 'Tiago Toledo'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     'sphinx.ext.autodoc',
32 |     'sphinx.ext.napoleon'
33 | ]
34 | 
35 | master_doc = 'index'
36 | 
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['_templates']
39 | 
40 | # List of patterns, relative to source directory, that match files and
41 | # directories to ignore when looking for source files.
42 | # This pattern also affects html_static_path and html_extra_path.
43 | exclude_patterns = []
44 | 
45 | 
46 | # -- Options for HTML output -------------------------------------------------
47 | 
48 | # The theme to use for HTML and HTML Help pages.  See the documentation for
49 | # a list of builtin themes.
50 | #
51 | html_theme = 'pydata_sphinx_theme'
52 | html_logo = "_static/logo.png"
53 | 
54 | html_theme_options = {
55 |     "icon_links": [
56 |         {
57 |             "name": "GitHub",
58 |             "url": "https://github.com/TNanukem/scikit-net",
59 |             "icon": "fab fa-github-square",
60 |         },
61 |         {
62 |             "name": "Twitter",
63 |             "url": "https://twitter.com/TiagoJToledoJr",
64 |             "icon": "fab fa-twitter-square",
65 |         },
66 |     ],
67 |     "navbar_start": ["navbar-logo"],
68 |     "navbar_end": ["navbar-icon-links", "version"]
69 | }
70 | 
71 | html_context = {
72 |     "versions_dropdown": {
73 |         "develop": "develop (latest)",
74 |         "main": "main (stable)",
75 |     },
76 | }
77 | 
78 | # Add any paths that contain custom static files (such as style sheets) here,
79 | # relative to this directory. They are copied after the builtin static files,
80 | # so a file named "default.css" will overwrite the builtin "default.css".
81 | html_static_path = ['_static']
82 | 
83 | 
84 | def setup(app):
85 |     app.add_css_file("custom.css")
86 | 


--------------------------------------------------------------------------------
/docs/source/development/index.rst:
--------------------------------------------------------------------------------
 1 | .. sknet documentation master file, created by
 2 |    sphinx-quickstart on Fri Mar  5 05:44:33 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Development
 7 | ===========
 8 | 
 9 | This is a guide for anyone interested in helping the development of the sknet. The library is an
10 | open-source project and therefore depends on the community to keep existing, everyone is welcome
11 | to help us improve.
12 | 
13 | How to contribute?
14 | ------------------
15 | 
16 | There are several ways of contributing for the sknet. Below we state those ways in ascending order
17 | of complexity.
18 | 
19 | Opening an issue
20 | ^^^^^^^^^^^^^^^^
21 | We oficially use the Issue Tracker on our github repo to hold up every new feature request and bug
22 | tracking. Therefore, you can open up an issue to:
23 | 
24 | - Warn us about some bug on the library
25 | - Warn us about documentation errors
26 | - Request a new feature or change for one already implemented algorithm
27 | - Request a brand new algorithm
28 | 
29 | We provide a basic template for issues on the ``templates`` folder inside our Github repo, please
30 | refer to it before opening the issue so you can provide us with all of the information we need to
31 | evaluate and (possibly) work on your issue.
32 | 
33 | Contributing to the documentation
34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35 | If you found some problem on the documentation, such as wrong information, typos or something that
36 | is missing or could be better explained, please, open an issue about it. After that, if you want to
37 | correct the documentation yourself, feel free to open up a Pull Request with your correction. Please
38 | remember to cite your issue on the Pull Request.
39 | 
40 | Solving an issue
41 | ^^^^^^^^^^^^^^^^
42 | If you find an issue on the repo and thinks you can solve it, we encourage you to do so. Just verify
43 | previously if no one is already assigned that issue. If this is not the case, then you can ask, on the
44 | issue comments, to be assigned it.
45 | 
46 | Once you finished your implementation, open up a Pull Request on the repo. Your Pull Request will be
47 | reviewed. Be aware that several iterations of revision may be required before the code is merged. We
48 | encourage you to see the revision as a chat between two (or more) people trying to deliver the best
49 | possible product to the people using the library.
50 | 
51 | Pull Requests
52 | -------------
53 | For those interested into opening Pull Requests for new features on the code, we will briefly describe
54 | some of the things you must pay attention to.
55 | 
56 | First of all, one template for Pull Requests is available on the ``templates`` folder inside the Github
57 | repo. Regarding to your code, some restrictions must be satisfied:
58 | 
59 | - Every Pull Request branch must be made from and merged to the ``develop`` branch
60 | - Every new class or method must be unittested with pytest. We will not accept additions to the repo that reduces our coverage without a good reason for doing so
61 | - Every public method must have a docstring using the numpy docstring style
62 | - Every code must adhere to the PEP8. We suggest using flake8 to assess your style
63 | - Performance improvements must contain benchmarks results
64 | - We value good-sense when documenting methods
65 | - Every change to modules with interface to users must have an entry on the documentation
66 | 
67 | Our Continous Integration pipeline will help you ensure most of those aspects. However, we strongly
68 | encourage you to run those tests on your machine before submitting the Pull Request to avoid overhead
69 | on the CI.
70 | 
71 | Doubts?
72 | -------
73 | 
74 | If any doubt remain, please fell free to contact any of the developers.
75 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/index.rst:
--------------------------------------------------------------------------------
 1 | .. sknet documentation master file, created by
 2 |    sphinx-quickstart on Fri Mar  5 05:44:33 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Getting Started
 7 | ===============
 8 | 
 9 | How to start using the sknet for your projects.
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    installation
15 |    transforming_data
16 |    supervised_learning
17 |    semi_supervised_learning
18 |    unsupervised_learning
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 | 
4 | In order to install the sknet you can use pip using the ``scikit-net`` name as follows:
5 | 
6 | .. code-block:: console
7 | 
8 |     pip install scikit-net
9 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/semi_supervised_learning.rst:
--------------------------------------------------------------------------------
 1 | Semi Supervised Learning
 2 | ========================
 3 | 
 4 | The semi-supervised algorithms try to leverage great amounts of unlabeled data
 5 | with a smaller amount of labeled data. One can use a semi-supervised algorithm
 6 | available on the sknet as follows.
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     from sklearn.datasets import load_iris
11 |     from sknet.network_construction import KNNConstructor
12 |     from sknet.semi_supervised import ModularityLabelPropagation
13 |     X, y = load_iris(return_X_y = True)
14 |     knn_c = KNNConstructor(k=5, sep_comp=False)
15 |     y[10:20] = np.nan
16 |     y[70:80] = np.nan
17 |     y[110:120] = np.nan
18 |     propagator = ModularityLabelPropagation()
19 |     propagator.fit(X, y, constructor=knn_c)
20 |     propagator.generated_y
21 | 


--------------------------------------------------------------------------------
/docs/source/getting_started/supervised_learning.rst:
--------------------------------------------------------------------------------
 1 | Supervised Learning
 2 | ===================
 3 | On a supervised learning setting, the sknet has two main focuses:
 4 | - Performing a supervised learning task on a complex network
 5 | - Use complex networks to improve the performance of other machine learning algorithms
 6 | 
 7 | We will briefly show two examples of supervised learning algorithms available on sknet.
 8 | 
 9 | Heuristic of Ease of Access
10 | ---------------------------
11 | 
12 | This is a learning algorithm to be applied on complex networks and consists on verifying
13 | how new samples affects the similarity between nodes when added to the component related
14 | to a given class.
15 | 
16 | The following code snippet shows how to run it using a tabular dataset:
17 | 
18 | .. code-block:: python
19 | 
20 |     from sklearn.datasets import load_iris
21 |     from sknet.network_construction import KNNConstructor
22 |     from sknet.supervised import EaseOfAccessClassifier
23 |     X, y = load_iris(return_X_y = True)
24 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
25 |                                                         test_size=0.33)
26 |     knn_c = KNNConstructor(k=5)
27 |     classifier = EaseOfAccessClassifier(t=5)
28 |     classifier.fit(X_train, y_train, constructor=knn_c)
29 |     ease = classifier.predict(X_test)
30 | 
31 | If you want to run it on a Complex Network, then the following snippet shows how to:
32 | 
33 | .. code-block:: python
34 | 
35 |     from sknet.supervised import EaseOfAccessClassifier
36 | 
37 |     classifier = EaseOfAccessClassifier(t=5)
38 |     classifier.fit(G=G)
39 |     ease = classifier.predict(G_test)
40 | 
41 | High Leval Data Classification
42 | ------------------------------
43 | 
44 | This algorithm leverages both, traditional tabular Machine Learning and Complex
45 | Networks Machine Learning to generate a classifier with better accuracy. In order
46 | to use this method, you must use a tabular dataset with the desired features. 
47 | 
48 | This algorithm will use the low-level (traditional Machine Learning model) model to
49 | predict the classes probabilities and then will do the same using a Complex Network
50 | method. Then, both of the predictions will be united generating a single probability
51 | prediction.
52 | 
53 | The following snippet shows how to use it:
54 | 
55 | .. code-block:: python
56 | 
57 |     from sklearn.datasets import load_iris
58 |     from sknet.network_construction import KNNConstructor
59 |     from sknet.supervised import HighLevelClassifier
60 |     X, y = load_iris(return_X_y = True)
61 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
62 |                                                         test_size=0.33)
63 |     knn_c = KNNConstructor(k=5)
64 |     classifier = HighLevelClassifier()
65 |     classifier.fit(X_train, y_train, constructor=knn_c)
66 |     pred = classifier.predict(X_test)


--------------------------------------------------------------------------------
/docs/source/getting_started/transforming_data.rst:
--------------------------------------------------------------------------------
 1 | Transforming Data
 2 | =================
 3 | 
 4 | The sknet provides classes to allow data transformation between different kinds. Since
 5 | the implemented algorithms may require an specific data type to work, those classes
 6 | allow the user to freely transform data and use any of the methods.
 7 | 
 8 | So far, the following transformations are available:
 9 | 
10 | - Tabular data -> Complex networks
11 | - Time series tabular data -> Complex networks
12 | 
13 | Below there is an example of how one can use one of the tabular datasets constructor
14 | to turn tabular data into a complex network.
15 | 
16 | .. code-block:: python
17 | 
18 |     from sklearn.datasets import load_iris
19 |     from sknet.network_construction import KNNEpislonRadiusConstructor
20 |     X, y = load_iris(return_X_y = True)
21 |     ke_c = KNNEpislonRadiusConstructor(k=3, epsilon=0.3)
22 |     ke_c.fit(X, y)
23 |     G = ke_c.transform()
24 | 
25 | And below an example of how one can use one of the time series constructor to turn a
26 | time series into a complex network:
27 | 
28 | .. code-block:: python
29 | 
30 |     from sknet.network_construction import UnivariateCorrelationConstructor
31 |     r = 0.5
32 |     L = 10
33 |     constructor = UnivariateCorrelationConstructor(r, L)
34 |     constructor.fit(X)
35 |     G = constructor.transform()


--------------------------------------------------------------------------------
/docs/source/getting_started/unsupervised_learning.rst:
--------------------------------------------------------------------------------
 1 | Unsupervised Learning
 2 | =====================
 3 | 
 4 | The unsupervised learning methods, when applied to complex networks, are usually
 5 | called community dectection methods. Their focus is to find groups of nodes where
 6 | the number of edges intra-community is way greater than the number of edges extra-community.
 7 | 
 8 | The following code snippet shows how one can use one of the unsupervised methods of the sknet
 9 | to clusterize some dataset:
10 | 
11 | .. code-block:: python
12 | 
13 |     from sklearn.datasets import load_iris
14 |     from sknet.network_construction import KNNConstructor
15 |     from sknet.unsupervised import StochasticParticleCompetition
16 |     X, y = load_iris(return_X_y = True)
17 |     knn_c = KNNConstructor(k=5, sep_comp=False)
18 |     SCP = StochasticParticleCompetition()
19 |     SCP.fit(X, y, constructor=knn_c)
20 |     SCP.clusters_
21 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. sknet documentation master file, created by
 2 |    sphinx-quickstart on Fri Mar  5 05:44:33 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to sknet's documentation!
 7 | =================================
 8 | 
 9 | The sknet is a scikit-learn compatible and NetworkX compatible library that implements tools for
10 | applying machine learning algorithms to complex networks.
11 | 
12 | It hopes to help researchers and students on the area to develop solutions to complex problems
13 | and further allow the development of new research on the Complex Networks area.
14 | 
15 | .. toctree::
16 |    :maxdepth: 1
17 |    :caption: Contents:
18 | 
19 |    getting_started/index
20 |    user_guide/index
21 |    api_reference/index
22 |    development/index
23 | 


--------------------------------------------------------------------------------
/docs/source/user_guide/images/ease_of_access.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/ease_of_access.png


--------------------------------------------------------------------------------
/docs/source/user_guide/images/epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/epsilon.png


--------------------------------------------------------------------------------
/docs/source/user_guide/images/k-eps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/k-eps.png


--------------------------------------------------------------------------------
/docs/source/user_guide/images/knn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/docs/source/user_guide/images/knn.png


--------------------------------------------------------------------------------
/docs/source/user_guide/index.rst:
--------------------------------------------------------------------------------
  1 | .. sknet documentation master file, created by
  2 |    sphinx-quickstart on Fri Mar  5 05:44:33 2021.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | User Guide
  7 | **********
  8 | 
  9 | This section will introduce the main modules of the sknet and show some examples as well as explaining the theory
 10 | behind the implemented algorithms.
 11 | 
 12 | The sknet main structure divide the classes into two main types: auxiliary methods such as utilities and transformations and
 13 | machine learning methods which are divided into supervised, unsupervised and semi supervised methods.
 14 | 
 15 | Most of the Machine Learning methods can work both with tabular data (in form of a Pandas Dataframe or a Numpy Array) and with graph data
 16 | (in form of a NetworkX complex network), exceptions will be explicit on the documentation.
 17 | 
 18 | Transformation methods
 19 | ======================
 20 | 
 21 | These are the backbones of the inner workings of the sknet. The transformation classes are responsible for transforming data from one
 22 | type to another. To this date, the following transformations are possible:
 23 | 
 24 | - Tabular Data -> Complex Network
 25 | - Time Series -> Complex Network
 26 | 
 27 | The Machine Learning classes are responsible for transforming data to the appropriate format for each one, however, one can always
 28 | insert the already transformed data into the class.
 29 | 
 30 | Dataset Constructors
 31 | --------------------
 32 | 
 33 | Those are the methods responsible for transforming tabular data, from the Pandas DataFrame or the Numpy Array format into a
 34 | NetworkX complex network.
 35 | 
 36 | When dealing with Dataset Constructors, one may have the classes of the tabular data availabe (such as on a supervised method),
 37 | on that case, one may set the constructor so it will generate separated components for each class. Some Machine Learning models
 38 | will require this while others will require that no separated component is generated. Look up for the documentation of each method
 39 | to be aware of the requirements for each method.
 40 | 
 41 | KNN Constructor
 42 | ^^^^^^^^^^^^^^^
 43 | 
 44 | The KNN Constructor uses a k-Nearest Neighbors algorithm to create edges between the instances (rows) of our tabular dataset. For that
 45 | the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance, and then, for each
 46 | instance, the k closest instances are selected and edges are created between them.
 47 | 
 48 | Notice that this methodology does not create a symmetric network since, given node ``i``, node ``j`` could be one of the k closest points to it but
 49 | the contrary may not be true.
 50 | 
 51 | .. image:: images/knn.png
 52 |    :alt: KNN Constructor
 53 | 
 54 | Also, this method does not allow for singletons to be created. If a node is too far away from the others on the generated space, it will
 55 | create at least k edges with k other nodes.
 56 | 
 57 | The main drawback of this methodology is that, for dense regions where there are too many nodes close to each other, the degree of each node
 58 | will be underestimated.
 59 | 
 60 | Epsilon-Radius Constructor
 61 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
 62 | 
 63 | The Epsilon-Radius constructor, for each node (row) of the dataset, connects it to all nodes that are inside a circle of radius epsilon.  For that
 64 | the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance.
 65 | 
 66 | .. image:: images/epsilon.png
 67 |    :alt: Epsilon Radius Constructor
 68 | 
 69 | This methodology will create a symmetric network since that, for a node to be inside the radius of another, the contrary must also be true at all times. However
 70 | this method allows singletons to be created since it may be that there are no nodes inside the radius, which is a big drawback of this method.
 71 | 
 72 | KNN Epsilon-Radius Constructor
 73 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 74 | 
 75 | To overcome the drawbacks of both, the KNN Constructor and the Epsilon-Radius constructor, the KNN Epsilon-Radius Constructor tries to sum-up the strenghts
 76 | of both methods. This constructor will use the Epsilon-Radius method for dense regions of the space and the K-NN method for sparse regions according to the
 77 | following equation:
 78 | 
 79 | .. math::
 80 | 
 81 |    \left\{\begin{matrix}
 82 |       \epsilon\text{-radius}(v_i), & \text{if} |\epsilon\text{-radius}| > k \\ 
 83 |       k\text{-NN}(v_i), & \text{otherwise} 
 84 |    \end{matrix}\right.
 85 | 
 86 | The idea behind this strategy is to add more edges on dense regions that should be more densely connected and to avoid singletons being created on sparse
 87 | regions. This way, the generated network will be connected and will have a variable degree level, better representing real world networks.
 88 | 
 89 | .. image:: images/k-eps.png
 90 |    :alt: KNN Epsilon-Radius Constructor
 91 | 
 92 | Single Linkage Clustering Heuristics Constructor
 93 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 94 | 
 95 | This constructor uses the idea of the Single Linkage heuristic for clustering to generate a network that preserves the original clustering topology
 96 | of the dataset. This tries to avoid the over sparsity or over density of the generated networks from the previous constructors that are not able to
 97 | guarantee the maintainance of the cluster topology.
 98 | 
 99 | The first step is to calculate the distance between each instance of the dataset is calculated using some distance metric, like the Euclidean Distance.
100 | With that in hands, each node is considered a cluster, then, the two closest clusters are found and the k nearest neighbors between them are connected
101 | by edges if their distance is smaller than a threshold defined by the intra-cluster dissimilarity of each one.
102 | 
103 | This process merges the two clusters. Then, it repeats until we have only one cluster left, then the network is complete.
104 | 
105 | This method will keep the sparsity between clusters and the density inside a cluster, which, depending on the problem at hand, can be necessary
106 | for the study of the data.
107 | 
108 | More information about this method can be found in the following paper:
109 | Cupertino, T.H., Huertas, J., & Zhao, L. (2013). Data clustering using controlled consensus in complex networks. Neurocomputing, 118, 132-140.
110 | 
111 | 
112 | Time Series Constructors
113 | ------------------------
114 | 
115 | Those are the methods responsible for transforming time series data, univariate or multivariate, into a complex network representation.
116 | 
117 | Correlation Constructor
118 | ^^^^^^^^^^^^^^^^^^^^^^^
119 | 
120 | The idea behind the Correlation Constructor is to split the time series into N segments of lenght L
121 | each one which will be a node in our complex network. Then, having those segments, one can calculate the pearson correlation coefficient
122 | between those segments, creating a correlation matrix C.
123 | 
124 | Then, an user-defined parameter ``r`` defines the correlation threshold for the creation of an edge between two nodes (segments) of the
125 | network. If the correlation between them is greater than ``r``, then an edge is created.
126 | 
127 | Notice that this generate an undirected graph since the correlation between two segments will always be symmetric. This module implements
128 | two variations of this method: one for univariate time series and another for multivariate times series.
129 | 
130 | More information about those methods can be found on: Yang, Y., Yang, H.: Complex network-based time series analysis. Physica A 387, 1381–1386 (2008)
131 | 
132 | Recurrence Constructor
133 | ^^^^^^^^^^^^^^^^^^^^^^
134 | 
135 | The recurrence constructor uses the concept of recurrence on the phase space of the time series. Given an embedding of the time series (such as the
136 | Takens Embedding), it is said that two states are recurrent if they are similar enough. So, given two states in the phase space defined as:
137 | 
138 | .. math::
139 |     x_i = (x(t), x(t + \tau), \dots , x(t + (d - 1)\tau))
140 | 
141 | Two states are recurrent if:
142 | 
143 | .. math::
144 |     ||x_i - x_j|| < \epsilon
145 | 
146 | Then, after the embedding was made, one can easily calculate a distance matrix between each of the states. Then, the self-loops (diagonals) are
147 | set to zero and every entry smaller than epsilon will generate an edge between the states of the series.
148 | 
149 | More information about this method can be found on: Donner, R.V., Zou, Y., Donges, J.F., Marwan, N., Kurths, J.: Recurrence
150 | networks – a novel paradigm for nonlinear time series analysis. New J. Phys. 12, 033025 (2010)
151 | 
152 | Supervised Methods
153 | ==================
154 | 
155 | Supervised methods have one objective: given a labeled dataset, learn the data patterns to the able to predict the label (continous or discrete)
156 | of new, unseen, data samples.
157 | 
158 | Heuristic of Ease of Access
159 | ---------------------------
160 | 
161 | This algorithm can be used, both, as a classifier and as a regressor. Its main idea is to consider the network as a Markov Chain to, on the convergence
162 | of the chain, identify which classes (or values) have a higher probability for a given unlabeled instance.
163 | 
164 | Given the network with labeled instances we have the weight matrix of the network, which can be considered as the adjacency matrix of a weighted network.
165 | 
166 | For each unlabeled instance we add it to the network and calculate the similarity (which in this case can be an Euclidean distance for example) of this
167 | new node to every other node of the network which will be put into a vector ``S``. Using those similarities, we will disturb the weights matrix, using
168 | an parameter epsilon, according to the following formula:
169 | 
170 | .. math::
171 |    \hat{W} = W + \epsilon \hat{S}
172 | 
173 | where:
174 | 
175 | .. math::
176 |    \hat{S} = \begin{bmatrix}
177 |       s_1 & \dots & s_1 \\ 
178 |       s_2 & \dots & s_2\\ 
179 |       \vdots & \vdots & \vdots\\ 
180 |       s_L & \dots & s_L
181 |       \end{bmatrix}
182 | 
183 | The image below shows the effect of adding this new node and removing if it right after. Notice that now self loops are created in the network since we
184 | are summing up a value on every weight. 
185 | 
186 | .. image:: images/ease_of_access.png
187 |    :alt: Ease of Access network change
188 | 
189 | Then, we use the weight matrix to calculate the transition probabilities and finally we compute the convergence of the Markov Chain to the limiting
190 | probabilities. At this point, every limiting probability represents a state and can be interpreted as the probability of the unlabeled example
191 | belonging to the class of that state.
192 | 
193 | We then select the ``t`` biggest probabilities to define the class of or unlabeled example. In case of a classification, the mode of the top
194 | ``t`` states is considered. If we are dealing with a regression, then the average value of the ``t`` states is used.
195 | 
196 | More information about this method can be found on: Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data
197 | classification by using an heuristic of ease of access. Neurocomputing 149(Part A), 86–92 (2015)
198 | 
199 | High Level Data Classification
200 | ------------------------------
201 | 
202 | The High Level Data Classification algorithm tries to incorporate the findings from traditional Machine Learning algorithms, such as SVMs and
203 | Random Forests, with the structural pattern recognition promoted by analyzing the metrics of a complex network. In order to do so, it receives
204 | the tabular data in a regular Machine Learning fashion and fits a low-level (traditional ML) classifier on the data.
205 | 
206 | Then the dataset is transformed into a complex network with a separated component for each of its classes, using one of the available constructors.
207 | This network is what we call the training network.
208 | 
209 | For each of the unlabeled examples we want to predict, two kind of predictions will be done:
210 | 
211 |  - A low-level prediction where the fitted low-level model will have its ``predict`` or ``predict_proba`` method called to classify the data.
212 |  - A high-level prediction where we will use the complex network to calculate a probability of the instance belonging to any of the classes
213 | 
214 | Once this is done, the probability of allocation on each class is defined by the equation:
215 | 
216 | .. math::
217 |    F_i^{(y)} = (1 - \rho )L_i^{(y)} + \rho H_i^{(y)}
218 | 
219 | Where :math:`\rho` is a user-defined parameter, :math:`F_i^{(y)}` is the probability of :math:`i` belonging to class y, :math:`L_i^{(y)}` is the probabilities
220 | associated with the low-level classifier and :math:`H_i^{(y)` are the probabilities associated with the high-level classifier.
221 | 
222 | How the high-level classification is done
223 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
224 | 
225 | In order to generate the probabilities from :math:`H_i^{(y)}`, each unlabeled example is inserted into each of the components of the network, in 
226 | which case we are basically testing it on every class of our data. Then, several metrics are calculated on the network, before and after the
227 | insertion of this new data point. 
228 | 
229 | If this insertion changes those metrics too much, this is an evidence that maybe it does not belong in that class. On the other hand, if the
230 | metrics remain almost constant, it means that this new example does not change the structure of the network and thus may be part of that class.
231 | 
232 | The user can define which metrics will be computed and what is the weight to be used on each metric by means of the list of parameters :math:`\alpha`).
233 | Notice that :math:`\alpha` must sum-up to 1.
234 | 
235 | The list of available functions can be seem on the documentation of the NetworkMetrics (colocar link aqui). More information about this method can be
236 | found on: Silva, T.C., Zhao, L.: Network-based high level data classification. IEEE Trans. Neural Netw. Learn. Syst. 23(6), 954–970 (2012)
237 | 
238 | Unsupervised Methods
239 | ====================
240 | 
241 | Unsupervised methods, usually called community detection methods on the Complex Network area, are algorithms that try to find patterns on
242 | the data so to group up data samples.
243 | 
244 | Stochastic Particle Competition
245 | -------------------------------
246 | 
247 | The Stochastic Particle Competition algorithm lends some of the concepts of the genetic algorithms optimization to find community structure
248 | on complex networks. Given a set of ``K`` initial particles, put at random on the nodes of the network, they will compete against each other
249 | for the dominance of the network nodes. It is expected that after some time this algorithm will converge to a state where each community is
250 | dominated by one of the initial ``K`` particles.
251 | 
252 | At each timestep, each particle chooses the the next node to visit by combinating a preferential movement matrix, where it has a greater
253 | probability of visiting previously visited nodes, and a exploration matrix, which will send this particle over to new areas in order to try
254 | to dominate them. 
255 | 
256 | The :math:`\lambda` parameter is responsible to define how much exploration versus exploitation each of the particles will do during the fitting process.
257 | 
258 | Each time one node is visited by a particle, its dominance on the node increases. The same way, if a rival particle visits the same node, then
259 | the dominance level will be reduced. On the same way, every time a particle visits a dominated node, it regains energy, while if it visits a
260 | node dominated by other particle, it loses energy. If a particle runs out of energy, then it is transported back to its dominance region.
261 | 
262 | The minimal and maximal energy of each particle is defined by the :math:`\omega_{min}` and :math:`\omega_{max}` parameters respectively.
263 | 
264 | The convergence of the system happens when the difference between the dominance levels on two sequential steps is smaller than a user-defined
265 | parameter :math:`\epsilon`.
266 | 
267 | More information about this method can be found on: T. C. Silva and L. Zhao, "Stochastic Competitive Learning in Complex
268 | Networks," in IEEE Transactions on Neural Networks and Learning Systems, vol. 23, no. 3, pp. 385-398, March 2012, doi: 10.1109/TNNLS.2011.2181866.
269 | 
270 | Semi Supervised Methods
271 | =======================
272 | 
273 | These are methods designed to work with large amounts of unlabeled data given a small amount of labeled data. Usually this kind of method
274 | works towards spreading labels from labeled examples to unlabeled examples.
275 | 
276 | Modularity Label Propagation
277 | ----------------------------
278 | 
279 | This algorithm is based on the greedy modularity maximization community detection algorithm. In order to use it, with need a dataset with ``L``
280 | labeled nodes and several unlabeled nodes. At each step of this algorithm, two communities (nodes) are merged to the same class following some
281 | restrictions, trying to keep the modularity increment the maximum as possible.
282 | 
283 | The criteria for the merge at each step is as follows:
284 | 
285 | - If both nodes already have a class and are from different classes, the merge does not occour
286 | - If none of the nodes have a class, the merge does not occour
287 | - If the nodes have the same class, the merge occours
288 | - If one of the nodes have a class and the other doesn't, the merge occours
289 | 
290 | If we weren't able to merge the pair of nodes with greatest value on the modularity increment matrix :math:`\Delta Q`, we select the next
291 | greatest value and so on until a valid merge takes place.
292 | 
293 | The algorithm runs until there is no node without a class remaining. The original paper of this algorithm states a network reduction technique to
294 | improve the algorithms performance. In order to use it, the reduction_factor list parameter should be set during
295 | the class instantiation.
296 | 
297 | This parameter will define, for each class, the percentage of the network reduction. The basic working of the method is:
298 | 
299 | - Select two nodes from the same class at random
300 | - Remove the first one
301 | - Redirects the edges from the first node to the second
302 | - Repeat until the desired percentage of the nodes are removed
303 | 
304 | More information about this method can be found on: Silva, Thiago & Zhao, Liang. (2012). Semi-Supervised Learning Guided
305 | by the Modularity Measure in Complex Networks. Neurocomputing. 78. 30-37. 10.1016/j.neucom.2011.04.042.
306 | 
307 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==20.3.0
 2 | coverage==5.5
 3 | decorator==4.4.2
 4 | GitPython==3.1.20
 5 | giotto-tda==0.5.1
 6 | importlib-metadata==3.7.2
 7 | iniconfig==1.1.1
 8 | joblib==1.1.1
 9 | networkx==2.5
10 | numpy==1.19.5
11 | packaging==20.9
12 | pandas==1.1.5
13 | pluggy==0.13.1
14 | py==1.10.0
15 | pydata-sphinx-theme==0.6.3
16 | pyparsing==2.4.7
17 | pytest==6.2.2
18 | python-dateutil==2.8.1
19 | pytz==2021.1
20 | scikit-learn==0.24.1
21 | scipy==1.5.4
22 | six==1.15.0
23 | threadpoolctl==2.1.0
24 | toml==0.10.2
25 | tqdm==4.59.0
26 | typing-extensions==3.7.4.3
27 | zipp==3.4.1
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from setuptools import setup, find_packages
 3 | 
 4 | HERE = pathlib.Path(__file__).parent
 5 | 
 6 | README = (HERE / "README.md").read_text()
 7 | 
 8 | setup(
 9 |     name="scikit-net",
10 |     version="0.0.2",
11 |     description="Machine Learning in Complex Networks",
12 |     long_description=README,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/TNanukem/scikit-net",
15 |     download_url='https://github.com/TNanukem/scikit-net/archive/refs/tags/v0.0.1.tar.gz',  # noqa: E501
16 |     keywords=['Machine Learning', 'Complex Networks'],
17 |     author="Tiago Toledo Jr",
18 |     author_email="tiago.nanu@gmail.com",
19 |     license="MIT",
20 |     classifiers=[
21 |         "License :: OSI Approved :: MIT License",
22 |         "Programming Language :: Python :: 3",
23 |         "Programming Language :: Python :: 3.8",
24 |     ],
25 |     packages=find_packages(exclude=("tests",)),
26 |     include_package_data=True,
27 |     install_requires=['attrs',
28 |                       'decorator',
29 |                       'importlib-metadata',
30 |                       'iniconfig'
31 |                       'giotto-tda',
32 |                       'joblib',
33 |                       'networkx',
34 |                       'numpy',
35 |                       'packaging',
36 |                       'pandas',
37 |                       'pluggy',
38 |                       'py',
39 |                       'pyparsing',
40 |                       'pytest',
41 |                       'python-dateutil',
42 |                       'pytz',
43 |                       'scikit-learn',
44 |                       'scipy',
45 |                       'six',
46 |                       'sklearn',
47 |                       'threadpoolctl',
48 |                       'toml',
49 |                       'tqdm',
50 |                       'typing-extensions',
51 |                       'zipp'],
52 | )
53 | 


--------------------------------------------------------------------------------
/sknet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/__init__.py


--------------------------------------------------------------------------------
/sknet/network_construction/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .dataset_constructors import KNNConstructor
3 | from .dataset_constructors import EpsilonRadiusConstructor
4 | from .dataset_constructors import KNNEpislonRadiusConstructor
5 | from .dataset_constructors import SingleLinkageHeuristicConstructor
6 | from .time_series_constructors import UnivariateCorrelationConstructor
7 | from .time_series_constructors import MultivariateCorrelationConstructor
8 | from .time_series_constructors import UnivariateRecurrenceNetworkConstructor
9 | 


--------------------------------------------------------------------------------
/sknet/network_construction/dataset_constructors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import networkx as nx
  4 | 
  5 | from abc import ABCMeta, abstractmethod
  6 | from sklearn.metrics import pairwise_distances
  7 | from sklearn.neighbors import KDTree, BallTree
  8 | 
  9 | 
 10 | class BaseConstructor(metaclass=ABCMeta):
 11 |     """
 12 |     This class allows to transform a dataset into a networkx
 13 |     complex network by using the several different transformation
 14 |     methods
 15 | 
 16 |     Do not use this abstract class, use the derived classes instead
 17 | 
 18 |     """
 19 | 
 20 |     def __init__(self, k, epsilon, metric, leaf_size=40, sep_comp=True):
 21 |         self.k = k
 22 |         self.epsilon = epsilon
 23 |         self.metric = metric
 24 |         self.leaf_size = leaf_size
 25 |         self.sep_comp = sep_comp
 26 |         self.X_ = None
 27 |         self.y_ = None
 28 | 
 29 |     @abstractmethod
 30 |     def add_nodes(self, X, y=None):
 31 |         """Add nodes to an existing network inside a fitted transformer
 32 |         object
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
 37 |             The input data.
 38 |         y : {ndarray, pandas series}, shape (n_samples,) or
 39 |         (n_samples, n_classes), default=None
 40 |             The true classes.
 41 | 
 42 |         Notes
 43 |         -----
 44 |         If y is set, then the class of each node will be inserted into
 45 |         the node information under the label 'class'. If sep_comp is true
 46 |         then each class will be a separated component of the network.
 47 | 
 48 |         If by some reason the transformer is not fitted, this will generate
 49 |         an error.
 50 | 
 51 |         After the new nodes are added, one should use the get_network
 52 |         function to retrieve the network with the new nodes.
 53 | 
 54 |         """
 55 | 
 56 |     def fit(self, X, y=None):
 57 |         """Fit the constructor creating the NetworkX graph
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
 62 |             The input data.
 63 |         y : {ndarray, pandas series}, shape (n_samples,) or
 64 |         (n_samples, n_classes), default=None
 65 |             The true classes.
 66 | 
 67 |         Notes
 68 |         -----
 69 |         If y is set, then the class of each node will be inserted into
 70 |         the node information under the label 'class' and each class will
 71 |         be a separated component of the network
 72 | 
 73 |         """
 74 | 
 75 |         self.G_ = nx.Graph()
 76 |         self.node_count_ = 0
 77 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
 78 |             X = np.array(X)
 79 | 
 80 |         self.X_ = X
 81 |         self.y_ = y
 82 |         self.fitting = True
 83 |         self.add_nodes(self.X_, self.y_)
 84 |         self.fitting = False
 85 | 
 86 |         return self
 87 | 
 88 |     def transform(self):
 89 |         """Returns the networkX graph after the constructor is fitted
 90 | 
 91 |         Returns
 92 |         -----
 93 |         G : NetworkX graph
 94 |             The network version of the inserted tabular data
 95 |         """
 96 |         try:
 97 |             return self.G_
 98 |         except AttributeError:
 99 |             raise Exception("Transformer is not fitted")
100 | 
101 |     def fit_transform(self, X, y=None):
102 |         """Fit the constructor creating the NetworkX graph and returns the graph
103 | 
104 |         Parameters
105 |         ----------
106 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
107 |             The input data.
108 |         y : {ndarray, pandas series}, shape (n_samples,) or
109 |         (n_samples, n_classes), default=None
110 |             The predicted classes.
111 | 
112 |         Returns
113 |         -------
114 |         G : NetworkX graph
115 |             The network version of the inserted tabular data
116 | 
117 |         Notes
118 |         -----
119 |         If y is set, then the class of each node will be inserted
120 |         into the node information under the label 'class'
121 | 
122 |         """
123 |         self.fit(X, y)
124 |         return self.G_
125 | 
126 |     def get_network(self):
127 |         """Retrieves the network generated in the constructor class
128 |         """
129 |         return self.G_
130 | 
131 |     def set_sep_comp(self, sep_comp):
132 |         self.sep_comp = sep_comp
133 | 
134 |     def set_params(self, **parameters):
135 |         for parameter, value in parameters.items():
136 |             setattr(self, parameter, value)
137 |         return self
138 | 
139 |     def get_params(self, deep=True):
140 |         return {"k": self.k, "epsilon": self.epsilon,
141 |                 "metric": self.metric, "leaf_size": self.leaf_size,
142 |                 "sep_comp": self.sep_comp}
143 | 
144 | 
145 | class KNNConstructor(BaseConstructor):
146 |     """
147 |     Using a k-nearest neighbors algorithm, defines an
148 |     networkx complex network
149 | 
150 |     Parameters
151 |     ----------
152 |     k : int, default=5
153 |         The number of neighbors to be connected to any given node
154 |         of the network.
155 |     metric : str or DistanceMetric object, default='minkowski'
156 |         The distance metric to use for the neighborhood tree. Refer
157 |         to the DistanceMetric class documentation from sklearn for a list
158 |         of available metrics
159 |     leaf_size : int, default=40
160 |         Number of points to switch to brute-force search of neighbors
161 |     sep_comp : boolean, default=True
162 |         If True and if y is not None, then each class of the dataset
163 |         will be a separated component, so nodes from one class will only
164 |         be connected to those of the same class. If False then this
165 |         restriction is not applied.
166 | 
167 |     Attributes
168 |     ----------
169 |     k : int
170 |         The k being used to construct the network
171 |     metric : str or DistanceMetric object
172 |         The distance metric being used
173 |     leaf_size : int
174 |         The leaf_size being used
175 |     G : NetworkX graph
176 |         The network version of the inserted tabular data
177 | 
178 |     Examples
179 |     --------
180 |     >>> from sklearn.datasets import load_iris
181 |     >>> from dataset_constructors import KNNConstructor
182 |     >>> X, y = load_iris(return_X_y = True)
183 |     >>> knn_c = KNNConstructor(k=3)
184 |     >>> knn_c.fit(X, y)
185 |     >>> G = knn_c.transform()
186 |     >>> # print(len(G.nodes))
187 |     150
188 | 
189 |     Notes
190 |     -----
191 | 
192 |     References
193 |     ----------
194 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in
195 |     Complex Networks. 10.1007/978-3-319-17290-3.
196 | 
197 |     """
198 |     def __init__(self, k=5, metric='minkowski', leaf_size=40, sep_comp=True):
199 |         super().__init__(k, None, metric, leaf_size, sep_comp)
200 | 
201 |     def add_nodes(self, X, y=None):
202 |         """Add nodes to an existing network inside a fitted transformer
203 |         object
204 | 
205 |         Parameters
206 |         ----------
207 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
208 |             The input data.
209 |         y : {ndarray, pandas series}, shape (n_samples,) or
210 |         (n_samples, n_classes), default=None
211 |             The true classes.
212 | 
213 |         Notes
214 |         -----
215 |         If y is set, then the class of each node will be inserted into
216 |         the node information under the label 'class'. If sep_comp is true
217 |         then each class will be a separated component of the network.
218 | 
219 |         If by some reason the transformer is not fitted, this will generate
220 |         an error.
221 | 
222 |         After the new nodes are added, one should use the get_network
223 |         function to retrieve the network with the new nodes.
224 | 
225 |         """
226 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
227 |             X = np.array(X)
228 | 
229 |         # Each class will be a separated component
230 |         if self.y_ is None:
231 |             classes = [0]
232 |         else:
233 |             classes = np.unique(self.y_)
234 | 
235 |         for class_ in classes:
236 | 
237 |             if self.y_ is None:
238 |                 nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)]  # noqa: E501
239 |                 X_ = X
240 |                 self.tree_ = _tree_selector(self.X_, self.leaf_size)
241 |                 label_ind = [i for i in range(len(X))]
242 | 
243 |             else:
244 |                 if self.sep_comp:
245 |                     # Verifies if someone to be added is from class
246 |                     X_component = np.take(X, np.where(y == class_), axis=0)[0]
247 |                     if len(X_component) == 0:
248 |                         continue
249 | 
250 |                     # Calculating the distances for guys on the same component
251 |                     if self.fitting:
252 |                         total_y = self.y_
253 |                         total_X = self.X_
254 |                     else:
255 |                         total_y = np.append(self.y_, y)
256 |                         total_X = np.vstack((self.X_, X))
257 |                     label_ind = np.where(total_y == class_)
258 | 
259 |                     X_ = np.take(total_X, label_ind, axis=0)[0]
260 |                     nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)]  # noqa: E501
261 | 
262 |                     label_ind = label_ind[0].tolist()
263 | 
264 |                 else:
265 |                     X_ = X
266 |                     label_ind = [i for i in range(len(X))]
267 |                     nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)]  # noqa: E501
268 | 
269 |                 self.tree_ = _tree_selector(X_, self.leaf_size)
270 | 
271 |             neighbors = [self.tree_.query(x.reshape(1, -1), k=self.k+1, return_distance=True) for x in X_]  # noqa: E501
272 |             distances_aux = [neigh[0] for neigh in neighbors]
273 |             indexes_aux = [neigh[1] for neigh in neighbors]
274 |             indexes = [node[0] for node in indexes_aux]
275 |             distances = [node[0] for node in distances_aux]
276 |             edges = [(label_ind[node[0]], label_ind[node[j]], distances[i][j]) for i, node in enumerate(indexes) for j in range(1, self.k+1)]  # noqa: E501
277 | 
278 |             self.G_.add_nodes_from(nodes)
279 |             self.G_.add_weighted_edges_from(edges)
280 |             self.node_count_ += len(nodes)
281 | 
282 |             if self.sep_comp is False:
283 |                 break
284 | 
285 |         if not np.array_equal(self.X_, X):
286 |             self.X_ = np.vstack((self.X_, X))
287 |             if self.y_ is not None:
288 |                 self.y_ = np.append(self.y_, y)
289 | 
290 | 
291 | class EpsilonRadiusConstructor(BaseConstructor):
292 |     """
293 |     Using an epsilon-radius algorithm, defines an
294 |     networkx complex network
295 | 
296 |     Parameters
297 |     ----------
298 |     epsilon : float
299 |         The radius to define which neighbors should be connected.
300 |     metric : str or DistanceMetric object, default='minkowski'
301 |         The distance metric to use for the neighborhood tree. Refer
302 |         to the DistanceMetric class documentation from sklearn for a list
303 |         of available metrics
304 |     leaf_size : int, default=40
305 |         Number of points to switch to brute-force search of neighbors
306 |     sep_comp : boolean, default=True
307 |         If True and if y is not None, then each class of the dataset
308 |         will be a separated component, so nodes from one class will only
309 |         be connected to those of the same class. If False then this
310 |         restriction is not applied.
311 | 
312 |     Attributes
313 |     ----------
314 |     epsilon : float
315 |         The epsilon being used to construct the network
316 |     metric : str or DistanceMetric object
317 |         The distance metric being used
318 |     leaf_size : int
319 |         The leaf_size being used
320 |     G : NetworkX graph
321 |         The network version of the inserted tabular data
322 | 
323 |     Examples
324 |     --------
325 |     >>> from sklearn.datasets import load_iris
326 |     >>> from dataset_constructors import EpsilonRadiusConstructor
327 |     >>> X, y = load_iris(return_X_y = True)
328 |     >>> eps_c = EpsilonRadiusConstructor(epsilon=3)
329 |     >>> eps_c.fit(X, y)
330 |     >>> G = eps_c.transform()
331 |     >>> # print(len(G.nodes))
332 |     150
333 | 
334 |     Notes
335 |     -----
336 | 
337 |     References
338 |     ----------
339 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in
340 |     Complex Networks. 10.1007/978-3-319-17290-3.
341 | 
342 |     """
343 |     def __init__(self, epsilon=0.1, metric='minkowski', leaf_size=40,
344 |                  sep_comp=True):
345 |         super().__init__(None, epsilon, metric, leaf_size, sep_comp)
346 | 
347 |     def add_nodes(self, X, y=None):
348 |         """Add nodes to an existing network inside a fitted transformer
349 |         object
350 | 
351 |         Parameters
352 |         ----------
353 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
354 |             The input data.
355 |         y : {ndarray, pandas series}, shape (n_samples,) or
356 |         (n_samples, n_classes), default=None
357 |             The true classes.
358 | 
359 |         Notes
360 |         -----
361 |         If y is set, then the class of each node will be inserted into
362 |         the node information under the label 'class'. If sep_comp is true
363 |         then each class will be a separated component of the network.
364 | 
365 |         If by some reason the transformer is not fitted, this will generate
366 |         an error.
367 | 
368 |         After the new nodes are added, one should use the get_network
369 |         function to retrieve the network with the new nodes.
370 | 
371 |         """
372 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
373 |             X = np.array(X)
374 | 
375 |         # Each class will be a separated component
376 |         if self.y_ is None:
377 |             classes = [0]
378 |         else:
379 |             classes = np.unique(self.y_)
380 | 
381 |         for class_ in classes:
382 |             if self.y_ is None:
383 |                 nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)]  # noqa: E501
384 |                 X_ = X
385 |                 self.tree_ = _tree_selector(self.X_, self.leaf_size)
386 |                 label_ind = [i for i in range(len(X))]
387 | 
388 |             else:
389 |                 if self.sep_comp:
390 |                     # Verifies if someone to be added is from class
391 |                     X_component = np.take(X, np.where(y == class_), axis=0)[0]
392 |                     if len(X_component) == 0:
393 |                         continue
394 | 
395 |                     # Calculating the distances for guys on the same component
396 |                     if self.fitting:
397 |                         total_y = self.y_
398 |                         total_X = self.X_
399 |                     else:
400 |                         total_y = np.append(self.y_, y)
401 |                         total_X = np.vstack((self.X_, X))
402 |                     label_ind = np.where(total_y == class_)
403 | 
404 |                     X_ = np.take(total_X, label_ind, axis=0)[0]
405 |                     nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)]  # noqa: E501
406 | 
407 |                     label_ind = label_ind[0].tolist()
408 | 
409 |                 else:
410 |                     X_ = X
411 |                     label_ind = [i for i in range(len(X))]
412 |                     nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)]  # noqa: E501
413 | 
414 |                 self.tree_ = _tree_selector(X_, self.leaf_size)
415 | 
416 |             neighbors = [self.tree_.query_radius(x.reshape(1, -1), r=self.epsilon, return_distance=True, sort_results=True) for x in X_]  # noqa: E501
417 | 
418 |             indexes_aux = [neigh[0] for neigh in neighbors]
419 |             distances_aux = [neigh[1] for neigh in neighbors]
420 |             distances = [node[0] for node in distances_aux]
421 |             indexes = [node[0] for node in indexes_aux]
422 | 
423 |             edges = [(label_ind[node[0]], label_ind[node[j]], distances[i][j]) for i, node in enumerate(indexes) for j in range(1, len(node))]  # noqa: E501
424 | 
425 |             self.G_.add_nodes_from(nodes)
426 |             self.G_.add_weighted_edges_from(edges)
427 | 
428 |             # Removing self-loops
429 |             self.G_.remove_edges_from(nx.selfloop_edges(self.G_))
430 |             self.node_count_ += len(nodes) + 1
431 | 
432 |             if self.sep_comp is False:
433 |                 break
434 | 
435 |         if not np.array_equal(self.X_, X):
436 |             self.X_ = np.vstack((self.X_, X))
437 |             if self.y_ is not None:
438 |                 self.y_ = np.vstack((self.y_, y))
439 | 
440 | 
441 | class KNNEpislonRadiusConstructor(BaseConstructor):
442 |     """
443 |     Using a k-nearest neighbors algorithm, defines an
444 |     networkx complex network
445 | 
446 |     Parameters
447 |     ----------
448 |     k : int, default=5
449 |         The number of neighbors to be connected to any given node
450 |         of the network.
451 |     epsilon : float, default=0.1
452 |         The radius to define which neighbors should be connected.
453 |     metric : str or DistanceMetric object, default='minkowski'
454 |         The distance metric to use for the neighborhood tree. Refer
455 |         to the DistanceMetric class documentation from sklearn for a list
456 |         of available metrics
457 |     leaf_size : int, default=40
458 |         Number of points to switch to brute-force search of neighbors
459 |     sep_comp : boolean, default=True
460 |         If True and if y is not None, then each class of the dataset
461 |         will be a separated component, so nodes from one class will only
462 |         be connected to those of the same class. If False then this
463 |         restriction is not applied.
464 | 
465 |     Attributes
466 |     ----------
467 |     k : int
468 |         The k being used to construct the network
469 |     epsilon : float
470 |         The epsilon being used to construct the network
471 |     metric : str or DistanceMetric object
472 |         The distance metric being used
473 |     leaf_size : int
474 |         The leaf_size being used
475 |     G : NetworkX graph
476 |         The network version of the inserted tabular data
477 | 
478 |     Examples
479 |     --------
480 |     >>> from sklearn.datasets import load_iris
481 |     >>> from dataset_constructors import KNNEpislonRadiusConstructor
482 |     >>> X, y = load_iris(return_X_y = True)
483 |     >>> ke_c = KNNEpislonRadiusConstructor(k=3, epsilon=0.3)
484 |     >>> ke_c.fit(X, y)
485 |     >>> G = ke_c.transform()
486 |     >>> # print(len(G.nodes))
487 |     150
488 | 
489 |     Notes
490 |     -----
491 |     The KNN is used for sparse regions while the Epsilon-Radius is used for
492 |     dense regions. This approach hopes to overcome the limitations of the
493 |     individual components, allowing for a better network construction. The
494 |     equation that runs this method is defined as:
495 | 
496 |     ``neighbor(v_i) = epsilon-radius(v_i) if |epsilon-radius(v_i)| >
497 |     k else knn(v_i)``
498 | 
499 |     References
500 |     ----------
501 |     Silva, T.C.; Liang Zhao (2012). Network-Based High Level Data
502 |     Classification., 23(6), –. doi:10.1109/tnnls.2012.2195027
503 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex Networks.
504 |     10.1007/978-3-319-17290-3.
505 | 
506 |     """
507 |     def __init__(self, k=5, epsilon=0.1, metric='minkowski', leaf_size=40,
508 |                  sep_comp=True):
509 |         super().__init__(k, epsilon, metric, leaf_size, sep_comp)
510 | 
511 |     def add_nodes(self, X, y=None):
512 |         """Add nodes to an existing network inside a fitted transformer
513 |         object
514 | 
515 |         Parameters
516 |         ----------
517 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
518 |             The input data.
519 |         y : {ndarray, pandas series}, shape (n_samples,) or
520 |         (n_samples, n_classes), default=None
521 |             The true classes.
522 | 
523 |         Notes
524 |         -----
525 |         If y is set, then the class of each node will be inserted into
526 |         the node information under the label 'class'. If sep_comp is true
527 |         then each class will be a separated component of the network.
528 | 
529 |         If by some reason the transformer is not fitted, this will generate
530 |         an error.
531 | 
532 |         After the new nodes are added, one should use the get_network
533 |         function to retrieve the network with the new nodes.
534 | 
535 |         """
536 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
537 |             X = np.array(X)
538 | 
539 |         # Each class will be a separated component
540 |         if self.y_ is None:
541 |             classes = [0]
542 |         else:
543 |             classes = np.unique(self.y_)
544 | 
545 |         for class_ in classes:
546 | 
547 |             if self.y_ is None:
548 |                 nodes = [node for node in range(self.node_count_, len(X) + self.node_count_)]  # noqa: E501
549 |                 X_ = X
550 |                 self.tree_ = _tree_selector(self.X_, self.leaf_size)
551 |                 label_ind = [i for i in range(len(X))]
552 | 
553 |             else:
554 |                 if self.sep_comp:
555 |                     # Verifies if someone to be added is from class
556 |                     X_component = np.take(X, np.where(y == class_), axis=0)[0]
557 |                     if len(X_component) == 0:
558 |                         continue
559 | 
560 |                     # Calculating the distances for guys on the same component
561 |                     if self.fitting:
562 |                         total_y = self.y_
563 |                         total_X = self.X_
564 |                     else:
565 |                         total_y = np.append(self.y_, y)
566 |                         total_X = np.vstack((self.X_, X))
567 |                     label_ind = np.where(total_y == class_)
568 | 
569 |                     X_ = np.take(total_X, label_ind, axis=0)[0]
570 |                     nodes = [(node, {'class': class_}) for node in range(self.node_count_, len(X_component) + self.node_count_)]  # noqa: E501
571 | 
572 |                     label_ind = label_ind[0].tolist()
573 | 
574 |                 else:
575 |                     X_ = X
576 |                     label_ind = [i for i in range(len(X))]
577 |                     nodes = [(node, {'class': y[node - self.node_count_]}) for node in range(self.node_count_, len(X_) + self.node_count_)]  # noqa: E501
578 | 
579 |                 self.tree_ = _tree_selector(X_, self.leaf_size)
580 | 
581 |             radius_neighbors = [self.tree_.query_radius(x.reshape(1, -1), r=self.epsilon, return_distance=True, sort_results=True) for x in X_]  # noqa: E501
582 |             k_neighbors = [self.tree_.query(x.reshape(1, -1), k=self.k+1, return_distance=True) for x in X_]  # noqa: E501
583 | 
584 |             # Auxiliar lists
585 |             indexes_radius_aux = [neigh[0] for neigh in radius_neighbors]
586 |             distances_radius_aux = [neigh[1] for neigh in radius_neighbors]  # noqa: E501
587 |             distances_radius = [node[0] for node in distances_radius_aux]
588 |             indexes_radius = [node[0] for node in indexes_radius_aux]
589 | 
590 |             distances_k_aux = [neigh[0] for neigh in k_neighbors]
591 |             indexes_k_aux = [neigh[1] for neigh in k_neighbors]  # noqa: E501
592 |             indexes_k = [node[0] for node in indexes_k_aux]
593 |             distances_k = [node[0] for node in distances_k_aux]
594 | 
595 |             # Nodes with neighbors inside radius greater than k
596 |             greater_than_k_indices = [index for index, neighbors in enumerate(indexes_radius) if len(neighbors) - 1 > self.k]  # noqa: E501
597 | 
598 |             final_k = [neighbors for index, neighbors in enumerate(indexes_k) if index not in greater_than_k_indices]  # noqa: E501
599 |             final_radius = [neighbors for index, neighbors in enumerate(indexes_radius) if index in greater_than_k_indices]  # noqa: E501
600 |             final_k_distances = [dist for index, dist in enumerate(distances_k) if index not in greater_than_k_indices]  # noqa: E501
601 |             final_radius_distances = [distance for index, distance in enumerate(distances_radius) if index in greater_than_k_indices]  # noqa: E501
602 | 
603 |             assert len(final_k) + len(final_radius) == len(nodes)
604 | 
605 |             edges_radius = [(label_ind[node[0]], label_ind[node[j]], final_radius_distances[i][j]) for i, node in enumerate(final_radius) for j in range(1, len(node))]  # noqa: E501
606 |             edges_k = [(label_ind[node[0]], label_ind[node[j]], final_k_distances[i][j]) for i, node in enumerate(final_k) for j in range(1, self.k+1)]  # noqa: E501
607 | 
608 |             self.G_ = nx.Graph()
609 |             self.G_.add_nodes_from(nodes)
610 |             self.G_.add_weighted_edges_from(edges_radius)
611 |             self.G_.add_weighted_edges_from(edges_k)
612 | 
613 |             # Removing self-loops
614 |             self.G_.remove_edges_from(nx.selfloop_edges(self.G_))
615 |             self.node_count_ += len(nodes) + 1
616 | 
617 |             if self.sep_comp is False:
618 |                 break
619 | 
620 |         if not np.array_equal(self.X_, X):
621 |             self.X_ = np.vstack((self.X_, X))
622 |             if self.y_ is not None:
623 |                 self.y_ = np.vstack((self.y_, y))
624 | 
625 | 
626 | def _tree_selector(X, leaf_size=40, metric='minkowski'):
627 |     """
628 |     Selects the better tree approach for given data
629 | 
630 |     Parameters
631 |     ----------
632 |     X : {array-like, pandas dataframe} of shape (n_samples, n_features)
633 |         The input data.
634 |     leaf_size : int, default=40
635 |         Number of points to switch to brute-force search of neighbors
636 |     metric : str or DistanceMetric object, default='minkowski'
637 |         The distance metric to use for the neighborhood tree. Refer
638 |         to the DistanceMetric class documentation from sklearn for a list
639 |         of available metrics
640 | 
641 |     Returns
642 |     -------
643 |     tree : {KDTree or BallTree}
644 |         The best tree to be used to find neighbors given data
645 |     """
646 | 
647 |     # Low dimensional spaces are fit to KD-Tree
648 |     if X.shape[1] < 30:
649 |         return KDTree(X, leaf_size=leaf_size, metric=metric)
650 | 
651 |     # High dimensional spaces are fit to Ball Tree
652 |     if X.shape[1] >= 30:
653 |         return BallTree(X, leaf_size=leaf_size, metric=metric)
654 | 
655 | 
656 | class SingleLinkageHeuristicConstructor(BaseConstructor):
657 |     """
658 |     Use Single Linkage Heuristics to generate a complex network from
659 |     tabular data
660 | 
661 |     Parameters
662 |     ----------
663 |     k : int, default=3
664 |         The number of closests points between two grops to be considered
665 |         to create an edge.
666 |     lambda_ : positive float, default=0.1
667 |         Multiplying factor on the average dissimilarity on the groups to
668 |         define the critical distance
669 |     sep_comp : boolean, default=True
670 |         If True and if y is not None, then each class of the dataset
671 |         will be a separated component, so nodes from one class will only
672 |         be connected to those of the same class. If False then this
673 |         restriction is not applied.
674 |     metric : str or DistanceMetric object, default='euclidean'
675 |         The distance metric to use for the neighborhood tree. Refer
676 |         to the DistanceMetric class documentation from sklearn for a list
677 |         of available metrics
678 |     n_jobs : int, default=None
679 |         The number of parallel jobs to run for neighbors search.
680 |         None means 1 unless in a joblib.parallel_backend context and -1 means
681 |         using all processors.
682 | 
683 |     Examples
684 |     --------
685 |     >>> from sklearn.datasets import load_iris
686 |     >>> from dataset_constructors import SingleLinkageHeuristicConstructor
687 |     >>> X, y = load_iris(return_X_y = True)
688 |     >>> ch = SingleLinkageHeuristicConstructor(k=3, epsilon=0.3)
689 |     >>> ch.fit(X, y)
690 |     >>> G = ke_c.transform()
691 |     >>> # print(len(G.nodes))
692 |     150
693 | 
694 |     References
695 |     ----------
696 |     Cupertino, T.H., Huertas, J., & Zhao, L. (2013). Data clustering using
697 |     controlled consensus in complex networks. Neurocomputing, 118, 132-140.
698 | 
699 |     """
700 |     def __init__(self, k=3, lambda_=0.1, sep_comp=False,
701 |                  metric='euclidean', n_jobs=None):
702 |         self.k = k
703 |         self.lambda_ = lambda_
704 |         self.sep_comp = sep_comp
705 |         self.metric = metric
706 |         self.n_jobs = n_jobs
707 | 
708 |     def get_params(self, deep=True):
709 |         return {'k': self.k, 'lambda_': self.lambda_,
710 |                 'sep_comp': self.sep_comp,
711 |                 'metric': self.metric, 'n_jobs': self.n_jobs}
712 | 
713 |     def add_nodes(self, X, y=None):
714 |         """Add nodes to an existing network inside a fitted transformer
715 |         object
716 | 
717 |         Parameters
718 |         ----------
719 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
720 |             The input data.
721 |         y : {ndarray, pandas series}, shape (n_samples,) or
722 |         (n_samples, n_classes), default=None
723 |             The true classes.
724 | 
725 |         Notes
726 |         -----
727 |         If y is set, then the class of each node will be inserted into
728 |         the node information under the label 'class'. If sep_comp is true
729 |         then each class will be a separated component of the network.
730 | 
731 |         If by some reason the transformer is not fitted, this will generate
732 |         an error.
733 | 
734 |         After the new nodes are added, one should use the get_network
735 |         function to retrieve the network with the new nodes.
736 | 
737 |         """
738 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
739 |             X = np.array(X)
740 | 
741 |         if self.lambda_ < 0:
742 |             raise Exception('lambda_ parameter should be positive')
743 | 
744 |         if self.fitting:
745 |             self.G_ = nx.Graph()
746 |             self.groups_ = np.array([i for i in range(len(X))])
747 |         else:
748 |             self.groups_.extend(
749 |                 [i + np.max(np.unique(self.groups)) for i in range(len(X))]
750 |             )
751 |             X = np.vstack((self.X_, X))
752 | 
753 |         if y is None and self.sep_comp is True:
754 |             raise Exception(
755 |                 """y parameter is required for separated construction,
756 |                 set sep_comp to False"""
757 |             )
758 | 
759 |         number_of_groups = len(self.groups_)
760 | 
761 |         X_dist = pairwise_distances(X, metric=self.metric,
762 |                                     n_jobs=self.n_jobs)
763 | 
764 |         while number_of_groups > 1:
765 |             if number_of_groups == len(X):
766 |                 dist = X_dist
767 | 
768 |             else:
769 |                 dist = self._generate_new_X_dist(X_dist)
770 | 
771 |             for i in range(dist.shape[0]):
772 |                 dist[i, i] = np.inf
773 | 
774 |             # Finds the two closest groups and get their values
775 |             i, j = np.unravel_index(dist.argmin(), dist.shape)
776 | 
777 |             # If the two closest groups are the same, then find other pair
778 |             if self.groups_[i] == self.groups_[j]:
779 |                 while self.groups_[i] == self.groups_[j]:
780 |                     dist[i][j] = np.inf
781 |                     i, j = np.unravel_index(dist.argmin(), dist.shape)
782 | 
783 |             # Finds the nodes that are on the group i and j
784 |             g1 = np.where(self.groups_ == self.groups_[i])[0]
785 |             g1_idx = i
786 | 
787 |             g2 = np.where(self.groups_ == self.groups_[j])[0]
788 |             g2_idx = j
789 | 
790 |             # Finds the distance between all members of the group
791 |             g1_dists = pairwise_distances(X[g1])
792 | 
793 |             g2_dists = pairwise_distances(X[g2])
794 | 
795 |             # Finds the average intra-cluster dissimilarity
796 |             d1 = np.mean(g1_dists)
797 |             d2 = np.mean(g2_dists)
798 | 
799 |             # Select the k most similar nodes between G1 and G2
800 |             group_distance = pairwise_distances(X[g1], X[g2])
801 |             candidates = []
802 | 
803 |             if group_distance.shape[0] < self.k:
804 |                 k = group_distance.shape[0]
805 |             else:
806 |                 k = self.k
807 | 
808 |             for i in range(k):
809 |                 i, j = np.unravel_index(
810 |                     group_distance.argmin(), group_distance.shape
811 |                 )
812 |                 candidates.append((g1[i], g2[j]))
813 |                 group_distance[i, j] = np.inf
814 | 
815 |             # Generate edges
816 |             dc = self.lambda_ * max(d1, d2)
817 | 
818 |             for u, v in candidates:
819 |                 if self.sep_comp is True and y[u] != y[v]:
820 |                     continue
821 |                 if dist[u, v] <= dc:
822 |                     self.G_.add_edge(u, v, weight=dist[u, v])
823 | 
824 |             # Merge groups
825 |             self.groups_[
826 |                 self.groups_ == self.groups_[g2_idx]] = self.groups_[g1_idx]
827 | 
828 |             # Update number of groups
829 |             number_of_groups = len(np.unique(self.groups_))
830 | 
831 |     def _generate_new_X_dist(self, X_dist):
832 |         number_of_groups = len(self.groups_)
833 | 
834 |         new_X_dist = np.zeros((number_of_groups, number_of_groups))
835 | 
836 |         # Find the distance between the two closest nodes for each group pair
837 |         for i in np.unique(self.groups_):
838 |             for j in np.unique(self.groups_):
839 |                 if i != j:
840 |                     g1_nodes = np.where(self.groups_ == i)[0]
841 |                     g2_nodes = np.where(self.groups_ == j)[0]
842 | 
843 |                     new_X_dist[i, j] = np.min(X_dist[g1_nodes, :][:, g2_nodes])
844 | 
845 |         return new_X_dist
846 | 


--------------------------------------------------------------------------------
/sknet/network_construction/general_constructors.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | from abc import ABCMeta, abstractmethod
 4 | from sknet.utils import NetworkTypesHandler
 5 | 
 6 | 
 7 | class GeneralConstructor(metaclass=ABCMeta):
 8 |     def __init__(self, net_type):
 9 |         self.net_type = net_type
10 |         self.network_type_handler = NetworkTypesHandler()
11 | 
12 |     def set_params(self, **parameters):
13 |         for parameter, value in parameters.items():
14 |             setattr(self, parameter, value)
15 |         return self
16 | 
17 |     def get_params(self, deep=True):
18 |         return {"net_type": self.net_type}
19 | 
20 |     @abstractmethod
21 |     def fit(self, X, y=None):
22 |         pass
23 | 
24 |     def transform(self):
25 |         """Returns the networkX graph after the constructor is fitted
26 | 
27 |         Returns
28 |         -----
29 |         G : NetworkX graph
30 |             The network version of the inserted data
31 |         """
32 |         try:
33 |             return self.G_
34 |         except AttributeError:
35 |             raise Exception("Transformer is not fitted")
36 | 
37 |     def fit_transform(self, X, y=None):
38 |         self.fit(X)
39 |         return self.G_
40 | 
41 |     def get_network(self):
42 |         """Retrieves the network generated in the constructor class
43 |         """
44 |         return self.G_
45 | 
46 | 
47 | class EdgeListConstructor():
48 |     def __init__(self, net_type='graph'):
49 |         super().__init__(net_type)
50 | 
51 |     def fit(self, X, y=None):
52 |         network_type = self.network_type_handler.get_net(self.net_type)
53 |         self.G_ = nx.read_edgelist(X)
54 | 
55 |         self.G_ = network_type(self.G_)
56 |         return self
57 | 
58 | 
59 | class AdjacencyListConstructor():
60 |     def __init__(self, net_type='graph'):
61 |         super().__init__(net_type)
62 | 
63 |     def fit(self, X, y=None):
64 |         network_type = self.network_type_handler.get_net(self.net_type)
65 |         self.G_ = nx.read_adjlist(X)
66 | 
67 |         self.G_ = network_type(self.G_)
68 |         return self
69 | 
70 | 
71 | class YAMLConstructor():
72 |     def __init__(self, net_type='graph'):
73 |         super().__init__(net_type)
74 | 
75 |     def fit(self, X, y=None):
76 |         network_type = self.network_type_handler.get_net(self.net_type)
77 |         self.G_ = nx.read_yaml(X)
78 | 
79 |         self.G_ = network_type(self.G_)
80 |         return self
81 | 
82 | 
83 | class PajekConstructor():
84 |     def __init__(self, path, net_type='graph'):
85 |         super().__init__(net_type)
86 | 
87 |     def fit(self, X, y=None):
88 |         network_type = self.network_type_handler.get_net(self.net_type)
89 |         self.G_ = nx.read_pajek(X)
90 | 
91 |         self.G_ = network_type(self.G_)
92 |         return self
93 | 


--------------------------------------------------------------------------------
/sknet/network_construction/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/network_construction/tests/__init__.py


--------------------------------------------------------------------------------
/sknet/network_construction/tests/test_network_construction.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import pandas as pd
  3 | 
  4 | from sknet.network_construction import dataset_constructors
  5 | from sknet.network_construction import time_series_constructors
  6 | 
  7 | 
  8 | @pytest.fixture
  9 | def X_y_generator():
 10 | 
 11 |     X = pd.DataFrame([
 12 |       (-2.24, -1.19),
 13 |       (-3.17, -0.67),
 14 |       (1.92, 0.57),
 15 |       (1.6, 1.97),
 16 |       (3.32, 1.51),
 17 |       (1.12, 1.21),
 18 |       (-1.32, -2.39),
 19 |       (-2.88, -1.83),
 20 |       (-2.56, 4.01),
 21 |       (-3.36, 3.25),
 22 |       (-5.64, 2.57),
 23 |       (-4.14, 2.85),
 24 |       (-3.04, 2.15)])
 25 | 
 26 |     y = pd.Series([0, 0, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2])
 27 | 
 28 |     return X, y
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def X_time_series_generator():
 33 | 
 34 |     X_uni = pd.DataFrame([-5, -3, 2, 4, -5, 1, 4, 6, 7, -3, 3, 2])
 35 |     X_multi = pd.DataFrame([
 36 |         (-5, 4, 3),
 37 |         (-3, 8, 4),
 38 |         (2, -5, 5),
 39 |         (3, 2, 6),
 40 |         (4, 3, 4),
 41 |         (-5, 4, 7),
 42 |         (1, 6, 7),
 43 |         (4, -2, -3),
 44 |         (6, 5, 8),
 45 |         (7, -4, 4),
 46 |         (-3, -4, 6),
 47 |         (3, 4, -6),
 48 |         (2, 2, 4)
 49 |     ])
 50 | 
 51 |     return X_uni, X_multi
 52 | 
 53 | 
 54 | def test_knn_fit(X_y_generator):
 55 | 
 56 |     knn = dataset_constructors.KNNConstructor(k=3, sep_comp=False)
 57 | 
 58 |     with pytest.raises(Exception):
 59 |         knn.transform()
 60 | 
 61 |     knn.fit(X_y_generator[0], X_y_generator[1])
 62 | 
 63 |     G = knn.transform()
 64 | 
 65 |     expected_nodes = [i for i in range(13)]
 66 |     assert list(G.nodes) == expected_nodes
 67 | 
 68 |     expected_edges = [(0, 7), (0, 1), (0, 6),
 69 |                       (1, 7), (1, 6), (2, 5),
 70 |                       (2, 3), (2, 4), (3, 5),
 71 |                       (3, 4), (4, 5), (6, 7),
 72 |                       (8, 9), (8, 12), (8, 11),
 73 |                       (9, 11), (9, 12), (9, 10),
 74 |                       (10, 11), (10, 12), (11, 12)]
 75 |     assert list(G.edges) == expected_edges
 76 | 
 77 | 
 78 | def test_epsilon_radius_fit(X_y_generator):
 79 | 
 80 |     eps = dataset_constructors.EpsilonRadiusConstructor(epsilon=1,
 81 |                                                         sep_comp=False)
 82 | 
 83 |     with pytest.raises(Exception):
 84 |         eps.transform()
 85 | 
 86 |     eps.fit(X_y_generator[0], X_y_generator[1])
 87 | 
 88 |     G = eps.transform()
 89 | 
 90 |     expected_nodes = [i for i in range(13)]
 91 |     assert list(G.nodes) == expected_nodes
 92 | 
 93 |     expected_edges = [(0, 7), (3, 5), (9, 11)]
 94 | 
 95 |     assert list(G.edges) == expected_edges
 96 | 
 97 | 
 98 | def test_epsilon_radius_fit_true_sep_comp(X_y_generator):
 99 | 
100 |     eps = dataset_constructors.EpsilonRadiusConstructor(epsilon=1,
101 |                                                         sep_comp=True)
102 | 
103 |     with pytest.raises(Exception):
104 |         eps.transform()
105 | 
106 |     eps.fit(X_y_generator[0], X_y_generator[1])
107 | 
108 |     G = eps.transform()
109 | 
110 |     expected_nodes = [0, 1, 2, 3, 7, 5, 6, 8, 10, 11, 12, 13, 14, 9]
111 |     assert list(G.nodes) == expected_nodes
112 | 
113 |     expected_edges = [(0, 7), (3, 5), (11, 9)]
114 | 
115 |     assert list(G.edges) == expected_edges
116 | 
117 | 
118 | def test_clustering_heuristics_fit(X_y_generator):
119 |     clustering = dataset_constructors.SingleLinkageHeuristicConstructor(
120 |         sep_comp=False)
121 | 
122 |     with pytest.raises(Exception):
123 |         clustering.transform()
124 | 
125 |     clustering.fit(X_y_generator[0], X_y_generator[1])
126 | 
127 |     G = clustering.transform()
128 | 
129 |     expected_nodes = [0, 11, 2, 9, 3, 4, 5, 6, 7, 8, 10, 12]
130 | 
131 |     assert list(G.nodes) == expected_nodes
132 | 
133 |     expected_edges = [(0, 11), (0, 2), (0, 3), (0, 4),
134 |                       (0, 6), (0, 7), (11, 8), (11, 10),
135 |                       (11, 12), (2, 9), (2, 3), (2, 4),
136 |                       (2, 5), (9, 3), (9, 8), (9, 10),
137 |                       (9, 12), (3, 4), (3, 5), (4, 5),
138 |                       (5, 6), (5, 8), (6, 7), (8, 10),
139 |                       (8, 12)]
140 | 
141 |     assert list(G.edges) == expected_edges
142 | 
143 | 
144 | def test_clustering_heuristics_fit_true_sep_comp(X_y_generator):
145 |     clustering = dataset_constructors.SingleLinkageHeuristicConstructor(
146 |         sep_comp=True)
147 | 
148 |     with pytest.raises(Exception):
149 |         clustering.transform()
150 | 
151 |     clustering.fit(X_y_generator[0], X_y_generator[1])
152 | 
153 |     G = clustering.transform()
154 | 
155 |     expected_nodes = [2, 3, 4, 5, 0, 6, 7, 9, 8, 11, 10, 12]
156 |     assert list(G.nodes) == expected_nodes
157 | 
158 |     expected_edges = [(2, 3), (2, 4), (2, 5), (3, 4), (3, 5),
159 |                       (4, 5), (0, 6), (0, 7), (6, 7), (9, 8),
160 |                       (9, 10), (9, 12), (8, 11), (8, 10),
161 |                       (8, 12), (11, 10), (11, 12)]
162 | 
163 |     assert list(G.edges) == expected_edges
164 | 
165 | 
166 | def test_knn_epsilon_fit(X_y_generator):
167 | 
168 |     eps_knn = dataset_constructors.KNNEpislonRadiusConstructor(
169 |       k=2, epsilon=1.5, sep_comp=False)
170 | 
171 |     with pytest.raises(Exception):
172 |         eps_knn.transform()
173 | 
174 |     eps_knn.fit(X_y_generator[0], X_y_generator[1])
175 | 
176 |     G = eps_knn.transform()
177 | 
178 |     expected_nodes = [i for i in range(13)]
179 |     assert list(G.nodes) == expected_nodes
180 | 
181 |     expected_edges = [(0, 7), (0, 1), (0, 6),
182 |                       (1, 7), (2, 5), (2, 3),
183 |                       (2, 4), (3, 5), (3, 4),
184 |                       (6, 7), (8, 9), (8, 12),
185 |                       (9, 11), (9, 12), (9, 10),
186 |                       (10, 11), (11, 12)]
187 | 
188 |     assert list(G.edges) == expected_edges
189 | 
190 | 
191 | def test_knn_epsilon_fit_true_sep_comp(X_y_generator):
192 | 
193 |     eps_knn = dataset_constructors.KNNEpislonRadiusConstructor(
194 |       k=2, epsilon=1.5, sep_comp=True)
195 | 
196 |     with pytest.raises(Exception):
197 |         eps_knn.transform()
198 | 
199 |     eps_knn.fit(X_y_generator[0], X_y_generator[1])
200 | 
201 |     G = eps_knn.transform()
202 | 
203 |     expected_nodes = [10, 11, 12, 13, 14, 9, 8]
204 |     assert list(G.nodes) == expected_nodes
205 | 
206 |     expected_edges = [(10, 11), (10, 9), (11, 9), (11, 12), (12, 9),
207 |                       (12, 8), (9, 8)]
208 | 
209 |     assert list(G.edges) == expected_edges
210 | 
211 | 
212 | def test_univariate_series_fit(X_time_series_generator):
213 |     constructor = (
214 |         time_series_constructors.UnivariateCorrelationConstructor(
215 |             0.3, 4
216 |         )
217 |     )
218 | 
219 |     constructor.fit(X_time_series_generator[0])
220 |     G = constructor.transform()
221 | 
222 |     expected_nodes = [i for i in range(9)]
223 |     assert list(G.nodes) == expected_nodes
224 | 
225 |     expected_edges = [(0, 0), (0, 4), (0, 5), (1, 1),
226 |                       (1, 6), (2, 2), (2, 7), (3, 3),
227 |                       (3, 8), (4, 4), (4, 5), (5, 5),
228 |                       (6, 6), (7, 7), (8, 8)]
229 | 
230 |     assert list(G.edges) == expected_edges
231 | 
232 |     G = constructor.fit_transform(X_time_series_generator[0])
233 |     assert list(G.nodes) == expected_nodes
234 |     assert list(G.edges) == expected_edges
235 | 
236 | 
237 | def test_univariate_recurrence_fit(X_time_series_generator):
238 |     constructor = (
239 |         time_series_constructors.UnivariateRecurrenceNetworkConstructor(
240 |             10
241 |         )
242 |     )
243 | 
244 |     constructor.fit(X_time_series_generator[0])
245 |     G = constructor.transform()
246 | 
247 |     expected_nodes = [i for i in range(10)]
248 |     assert list(G.nodes) == expected_nodes
249 | 
250 |     expected_edges = [(0, 1), (0, 3), (0, 4), (0, 9),
251 |                       (1, 4), (1, 5), (1, 6), (1, 9),
252 |                       (2, 7), (2, 9), (3, 8), (4, 5),
253 |                       (4, 9), (5, 6), (5, 8), (5, 9),
254 |                       (6, 9)]
255 | 
256 |     assert list(G.edges) == expected_edges
257 | 
258 |     G = constructor.fit_transform(X_time_series_generator[0])
259 |     assert list(G.nodes) == expected_nodes
260 |     assert list(G.edges) == expected_edges
261 | 
262 | 
263 | def test_multivariate_series_fit(X_time_series_generator):
264 |     constructor = (
265 |         time_series_constructors.MultivariateCorrelationConstructor(
266 |             0.1
267 |         )
268 |     )
269 | 
270 |     constructor.fit(X_time_series_generator[1])
271 |     G = constructor.transform()
272 | 
273 |     expected_nodes = [i for i in range(3)]
274 |     assert list(G.nodes) == expected_nodes
275 | 
276 |     expected_edges = [(0, 0), (1, 1), (2, 2)]
277 | 
278 |     assert list(G.edges) == expected_edges
279 | 
280 |     G = constructor.fit_transform(X_time_series_generator[1])
281 |     assert list(G.nodes) == expected_nodes
282 |     assert list(G.edges) == expected_edges
283 | 
284 | 
285 | def test_get_set_params():
286 |     # Time series constructors
287 |     constructor = (
288 |         time_series_constructors.UnivariateCorrelationConstructor()
289 |     )
290 |     param_dict = {'r': 0.3, 'L': 5}
291 |     constructor.set_params(**param_dict)
292 |     assert param_dict == constructor.get_params()
293 | 
294 |     constructor = (
295 |         time_series_constructors.MultivariateCorrelationConstructor()
296 |     )
297 |     param_dict = {'r': 0.3}
298 |     constructor.set_params(**param_dict)
299 |     assert param_dict == constructor.get_params()
300 | 
301 |     constructor = (
302 |         time_series_constructors.UnivariateRecurrenceNetworkConstructor()
303 |     )
304 |     param_dict = {'epsilon': 0.1, 'd': 2, 'tau': 1,
305 |                   'metric': 'euclidean', 'n_jobs': None}
306 |     constructor.set_params(**param_dict)
307 |     assert param_dict == constructor.get_params()
308 | 
309 |     # Dataset constructors
310 |     param_dict = {"k": 3, "epsilon": None, "metric": 'minkowski',
311 |                   "leaf_size": 40, "sep_comp": True}
312 |     constructor = dataset_constructors.KNNConstructor()
313 |     constructor.set_params(**param_dict)
314 |     assert param_dict == constructor.get_params()
315 | 
316 |     param_dict['k'] = None
317 |     param_dict['epsilon'] = 0.1
318 |     constructor = dataset_constructors.EpsilonRadiusConstructor()
319 |     constructor.set_params(**param_dict)
320 |     assert param_dict == constructor.get_params()
321 | 
322 |     param_dict['k'] = 2
323 |     constructor = dataset_constructors.KNNEpislonRadiusConstructor()
324 |     constructor.set_params(**param_dict)
325 |     assert param_dict == constructor.get_params()
326 | 
327 |     constructor = dataset_constructors.SingleLinkageHeuristicConstructor()
328 |     param_dict = {'k': 3, 'lambda_': 0.1,
329 |                   'n_jobs': 2, 'sep_comp': True,
330 |                   'metric': 'euclidean'}
331 |     constructor.set_params(**param_dict)
332 |     assert param_dict == constructor.get_params()
333 | 
334 | 
335 | def test_not_fitted_raise():
336 |     with pytest.raises(Exception):
337 |         dataset_constructors.KNNConstructor().transform()
338 | 
339 |     with pytest.raises(Exception):
340 |         dataset_constructors.EpsilonRadiusConstructor().transform()
341 | 
342 |     with pytest.raises(Exception):
343 |         dataset_constructors.KNNEpislonRadiusConstructor().transform()
344 | 
345 |     with pytest.raises(Exception):
346 |         dataset_constructors.ClusteringHeuristicConstructor().transform()
347 | 
348 |     with pytest.raises(Exception):
349 |         time_series_constructors.UnivariateCorrelationConstructor().transform()
350 | 
351 |     with pytest.raises(Exception):
352 |         time_series_constructors.MultivariateCorrelationConstructor(
353 |         ).transform()
354 | 
355 |     with pytest.raises(Exception):
356 |         time_series_constructors.UnivariateRecurrenceNetworkConstructor(
357 |         ).transform()
358 | 


--------------------------------------------------------------------------------
/sknet/network_construction/time_series_constructors.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import pandas as pd
  3 | import networkx as nx
  4 | import numpy as np
  5 | 
  6 | from scipy.stats import pearsonr
  7 | from abc import ABCMeta, abstractmethod
  8 | from sklearn.metrics import pairwise_distances
  9 | from gtda.time_series import SingleTakensEmbedding
 10 | 
 11 | 
 12 | class TimeSeriesBaseConstructor(metaclass=ABCMeta):
 13 |     """
 14 |     This class allows to transform a time series into a networkx
 15 |     complex network by using the several different transformation
 16 |     methods
 17 | 
 18 |     Do not use this abstract class, use derived classes instead
 19 |     """
 20 | 
 21 |     def fit(self, X, y=None):
 22 |         """Fit the constructor creating the NetworkX graph
 23 | 
 24 |         Parameters
 25 |         ----------
 26 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
 27 |             The input data.
 28 |         y : ignored, used just for API convention
 29 |         """
 30 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
 31 |             X = np.array(X)
 32 | 
 33 |         self.X_ = None
 34 | 
 35 |         self.add_nodes(X)
 36 | 
 37 |         return self
 38 | 
 39 |     def transform(self):
 40 |         """Returns the networkX graph after the constructor is fitted
 41 | 
 42 |         Returns
 43 |         -----
 44 |         G_ : NetworkX graph
 45 |             The network version of the inserted time series data
 46 |         """
 47 |         try:
 48 |             return self.G_
 49 |         except AttributeError:
 50 |             raise Exception("Transformer is not fitted")
 51 | 
 52 |     def get_network(self):
 53 |         """Retrieves the network generated in the constructor class
 54 |         """
 55 |         return self.G_
 56 | 
 57 |     def fit_transform(self, X, y=None):
 58 |         """Fit the constructor creating the NetworkX graph and returns the graph
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
 63 |             The input data.
 64 |         y : ignored, used just for API convention
 65 | 
 66 |         Returns
 67 |         -------
 68 |         G_ : NetworkX graph
 69 |             The network version of the inserted time series data
 70 |         """
 71 |         self.fit(X, y)
 72 |         return self.G_
 73 | 
 74 |     @abstractmethod
 75 |     def add_nodes(self, X, y=None):
 76 |         """Adds a node to the graph"""
 77 | 
 78 |     def set_params(self, **parameters):
 79 |         for parameter, value in parameters.items():
 80 |             setattr(self, parameter, value)
 81 |         return self
 82 | 
 83 | 
 84 | class UnivariateCorrelationConstructor(TimeSeriesBaseConstructor):
 85 |     """
 86 |     Creates a networkX complex network from a univariate time series
 87 |     by splitting it into segments of length L and generating the correlation
 88 |     between those segments
 89 | 
 90 |     Parameters
 91 |     ----------
 92 |     r : float
 93 |         The minimun correlation threshold between two segments
 94 |         to create an edge between them on the network. Value must be
 95 |         between 0 and 1
 96 |     L : int
 97 |         The lenght of each segment to be considered on the correlations
 98 | 
 99 |     Attributes
100 |     ----------
101 |     G : NetworkX graph
102 |         The network version of the inserted time series data
103 | 
104 |     Examples
105 |     --------
106 |     >>> from sknet.network_construction import UnivariateCorrelationConstructor
107 |     >>> r = 0.5
108 |     >>> L = 10
109 |     >>> constructor = UnivariateCorrelationConstructor(r, L)
110 |     >>> constructor.fit(X)
111 |     >>> G_ = constructor.transform()
112 | 
113 |     References
114 |     ----------
115 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in
116 |     Complex Networks. 10.1007/978-3-319-17290-3.
117 | 
118 |     Yang, Y., Yang, H.: Complex network-based time series
119 |     analysis. Physica A 387, 1381–1386 (2008)
120 | 
121 |     """
122 |     def __init__(self, r=0.5, L=10):
123 |         self.r = r
124 |         self.L = L
125 |         self.X_ = None
126 | 
127 |     def get_params(self, deep=True):
128 |         return {"r": self.r, 'L': self.L}
129 | 
130 |     def add_nodes(self, X, y=None):
131 |         """Add nodes to an existing network inside a fitted transformer
132 |         object
133 | 
134 |         Parameters
135 |         ----------
136 |         X : {array-like, pandas dataframe} of shape (n_samples, 1)
137 |             The input data.
138 |         y : ignored, used just for API convention
139 |         """
140 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
141 |             X = np.array(X)
142 | 
143 |         if X.shape[1] != 1:
144 |             warnings.warn(
145 |                 """More than one feature identified in the series.
146 |                    For multivariate time series use the
147 |                    MultivariateCorrelationConstructor"""
148 |             )
149 | 
150 |         if self.X_ is not None:
151 |             X = np.vstack((self.X_, X))
152 | 
153 |         # Create the segments of size L
154 |         segments = []
155 |         for i in range(len(X)):
156 |             segment = X[i:self.L + i]
157 |             if len(segment) < self.L:
158 |                 continue
159 |             segments.append(segment)
160 |         C = np.zeros((len(segments), len(segments)))
161 | 
162 |         # Make the correlation matrix
163 |         # Turn into list comprehension later
164 |         for i in range(len(segments)):
165 |             for j in range(len(segments)):
166 |                 C[i][j] = pearsonr(np.array(segments[i]).flatten(),
167 |                                    np.array(segments[j]).flatten())[0]
168 | 
169 |         # Make the D matrix
170 |         C[C < self.r] = 0
171 |         C[C >= self.r] = 1
172 | 
173 |         self.G_ = nx.from_numpy_array(C)
174 | 
175 |         self.X_ = X
176 | 
177 | 
178 | class MultivariateCorrelationConstructor(TimeSeriesBaseConstructor):
179 |     """
180 |     Creates a networkX complex network from a multivariate time series
181 |     by creating edges between highly correlated series
182 | 
183 |     Parameters
184 |     ----------
185 |     r : float
186 |         The minimun correlation threshold between two series
187 |         to create an edge between them on the network. Value must be
188 |         between 0 and 1
189 | 
190 |     Attributes
191 |     ----------
192 |     G_ : NetworkX graph
193 |         The network version of the inserted time series data
194 | 
195 |     Examples
196 |     --------
197 |     >>> from sknet.network_construction import MultivariateCorrelationConstructor  # noqa: E501
198 |     >>> r = 0.5
199 |     >>> L = 10
200 |     >>> constructor = MultivariateCorrelationConstructor(r, L)
201 |     >>> constructor.fit(X)
202 |     >>> G_ = constructor.transform()
203 | 
204 |     References
205 |     ----------
206 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in
207 |     Complex Networks. 10.1007/978-3-319-17290-3.
208 | 
209 |     Yang, Y., Yang, H.: Complex network-based time series
210 |     analysis. Physica A 387, 1381–1386 (2008)
211 | 
212 |     """
213 |     def __init__(self, r=0.5):
214 |         self.r = r
215 | 
216 |     def get_params(self, deep=True):
217 |         return {"r": self.r}
218 | 
219 |     def add_nodes(self, X, y=None):
220 |         """Add nodes to an existing network inside a fitted transformer
221 |         object
222 | 
223 |         Parameters
224 |         ----------
225 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
226 |             The input data.
227 |         y : ignored, used just for API convention
228 |         """
229 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
230 |             X = np.array(X)
231 | 
232 |         if X.shape[1] == 1:
233 |             warnings.warn(
234 |                 """Only one feature identified in the series.
235 |                    For univariate time series use the
236 |                    UnivariateCorrelationConstructor"""
237 |             )
238 | 
239 |         if self.X_ is not None:
240 |             X = np.vstack((self.X_, X))
241 | 
242 |         C = np.zeros((X.shape[1], X.shape[1]))
243 | 
244 |         # Make the correlation matrix
245 |         # Turn into list comprehension later
246 |         for i in range(X.shape[1]):
247 |             for j in range(X.shape[1]):
248 |                 C[i][j] = pearsonr(X[:, i], X[:, j])[0]
249 | 
250 |         # Make the D matrix
251 |         C[C < self.r] = 0
252 |         C[C >= self.r] = 1
253 | 
254 |         self.G_ = nx.from_numpy_array(C)
255 | 
256 |         self.X_ = X
257 | 
258 | 
259 | class UnivariateRecurrenceNetworkConstructor(TimeSeriesBaseConstructor):
260 |     """
261 |     Creates a networkX complex network from a univariate time series
262 |     by creating edges between recurrent (close) states on the phase space
263 |     of the series.
264 | 
265 |     The phase space is constructed using the Takens Embedding Theorem.
266 | 
267 |     Parameters
268 |     ----------
269 |     epsilon : float, optional (default=0.1)
270 |         The required distance between two states for them to be considered a
271 |         recurrence and hence be connected
272 |     d : int, optional (default=None)
273 |         The dimension of the embedding to be used for the Takens embedding.
274 |         If None and tau is also None, the best parameter will be
275 |         automatically chosen.
276 |     tau : int, optional (default=None)
277 |         The time delay to be used on the Takens Embedding. If None and
278 |         tau is also None, the best parameter will be automatically chosen.
279 |     metric : str, optional (default='euclidean')
280 |         The distance metric to be used to calculate the distance between
281 |         two points on the phase space
282 |     n_jobs : int, optional (default=None)
283 |         The number of parallel processes to be used when applying the
284 |         Takens embedding and when calculating the distances between the
285 |         states. None means 1 core will be used, -1 means use all cores.
286 |     Attributes
287 |     ----------
288 |     G_ : NetworkX graph
289 |         The network version of the inserted time series data
290 | 
291 |     Examples
292 |     --------
293 |     >>> from sknet.network_construction import UnivariateRecurrenceNetworkConstructor  # noqa: E501
294 |     >>> constructor = UnivariateRecurrenceNetworkConstructor(10)
295 |     >>> constructor.fit(X)
296 |     >>> G_ = constructor.transform()
297 | 
298 |     References
299 |     ----------
300 |     Donner, R.V., Zou, Y., Donges, J.F., Marwan, N., Kurths, J.: Recurrence
301 |     networks – a novel paradigm for nonlinear time series analysis.
302 |     New J. Phys. 12, 033025 (2010)
303 | 
304 |     """
305 |     def __init__(self, epsilon=0.1, d=None, tau=None, metric='euclidean',
306 |                  n_jobs=None):
307 |         self.epsilon = epsilon
308 |         self.d = d
309 |         self.tau = tau
310 |         self.metric = metric
311 |         self.n_jobs = n_jobs
312 | 
313 |     def get_params(self, deep=True):
314 |         return {'epsilon': self.epsilon, 'd': self.d, 'tau': self.tau,
315 |                 'metric': self.metric, 'n_jobs': self.n_jobs}
316 | 
317 |     def add_nodes(self, X, y=None):
318 |         """Add nodes to an existing network inside a fitted transformer
319 |         object
320 | 
321 |         Parameters
322 |         ----------
323 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
324 |             The input data.
325 |         y : ignored, used just for API convention
326 |         """
327 |         if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
328 |             X = np.array(X)
329 | 
330 |         if X.shape[1] > 1:
331 |             warnings.warn(
332 |                 """More than one feature identified in the series.
333 |                    Recurrence should not be used on multivariate series"""
334 |             )
335 | 
336 |         if self.X_ is not None:
337 |             X = np.vstack((self.X_, X))
338 | 
339 |         # Generate the state space of the time series X
340 |         if self.d is None and self.tau is None:
341 |             takens_dict = {'parameters_type': 'search'}
342 |         else:
343 |             takens_dict = {'parameters_type': 'fixed', 'dimension': self.d,
344 |                            'time_delay': self.tau}
345 | 
346 |         takens_dict['n_jobs'] = self.n_jobs
347 | 
348 |         takens = SingleTakensEmbedding(**takens_dict)
349 | 
350 |         space = takens.fit_transform(X)
351 | 
352 |         # Get distance metric
353 |         R = pairwise_distances(space, metric=self.metric, n_jobs=self.n_jobs)
354 | 
355 |         # Generate the recurrence matrix
356 |         R[R <= self.epsilon] = 1
357 |         R[R > self.epsilon] = 0
358 | 
359 |         # Remove self-loops
360 |         for i in range(R.shape[0]):
361 |             R[i, i] = 0
362 | 
363 |         self.G_ = nx.from_numpy_array(R)
364 | 
365 |         self.X_ = X
366 | 


--------------------------------------------------------------------------------
/sknet/semi_supervised/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .modularity_label_propagation import ModularityLabelPropagation
3 | 


--------------------------------------------------------------------------------
/sknet/semi_supervised/modularity_label_propagation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import networkx as nx
  3 | 
  4 | from sknet.network_construction import KNNConstructor
  5 | 
  6 | 
  7 | class ModularityLabelPropagation():
  8 |     """
  9 |     Semi-supervised method that propagates labels to instances not
 10 |     classified using the Modularity Propagation method.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     reduction_factor : None or list of floats, optional (default=None)
 15 |         If not None, the aggregation algorithm proposed by Silva & Zhao will be
 16 |         applied to reduce the network and speed up the processing. The values
 17 |         on the list will be the reduction factor for each class
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     generated_y_ : {ndarray, pandas series}, shape (n_samples, 1)
 22 |         The label list
 23 |     generated_G_ : NetworkX Network
 24 |         The constructed network on the fit of the model
 25 | 
 26 |     Examples
 27 |     --------
 28 |     >>> from sklearn.datasets import load_iris
 29 |     >>> from sknet.network_construction import KNNConstructor
 30 |     >>> from sknet.semi_supervised import ModularityLabelPropagation
 31 |     >>> X, y = load_iris(return_X_y = True)
 32 |     >>> knn_c = KNNConstructor(k=5, sep_comp=False)
 33 |     >>> y[10:20] = np.nan
 34 |     >>> y[70:80] = np.nan
 35 |     >>> y[110:120] = np.nan
 36 |     >>> propagator = ModularityLabelPropagation()
 37 |     >>> propagator.fit(X, y, constructor=knn_c)
 38 |     >>> propagator.generated_y_
 39 |     array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 40 |        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 41 |        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
 42 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 43 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 44 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2.,
 45 |        2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
 46 |        1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
 47 |        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])
 48 | 
 49 |     References
 50 |     ----------
 51 |     Silva, Thiago & Zhao, Liang. (2012). Semi-Supervised Learning Guided
 52 |     by the Modularity Measure in Complex Networks. Neurocomputing. 78.
 53 |     30-37. 10.1016/j.neucom.2011.04.042.
 54 | 
 55 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
 56 |     Networks. 10.1007/978-3-319-17290-3.
 57 | 
 58 |     """
 59 |     def __init__(self, reduction_factor=None, random_state=None):
 60 |         self.estimator_type = 'classifier'
 61 |         self.reduction_factor = reduction_factor
 62 |         self.random_state = random_state
 63 |         np.random.seed(random_state)  # Arrumar
 64 | 
 65 |     def set_params(self, **parameters):
 66 |         for parameter, value in parameters.items():
 67 |             setattr(self, parameter, value)
 68 |         return self
 69 | 
 70 |     def get_params(self, deep=True):
 71 |         return {'reduction_factor': self.reduction_factor,
 72 |                 'random_state': self.random_state}
 73 | 
 74 |     def fit(self, X=None, y=None, G=None,
 75 |             constructor=KNNConstructor(5, sep_comp=False)):
 76 |         """Fit the propagator by using the modularity measure
 77 |         to propagate the labels to non-labeled examples
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         X : {array-like, pandas dataframe} of shape
 82 |         (n_samples, n_features), optional (default=None)
 83 |             The input data samples. Can be None if G is set.
 84 |         y : {ndarray, pandas series}, shape (n_samples,) or
 85 |         (n_samples, n_classes), optional (default=None)
 86 |             The target classes. Can be None if G is set. Missing labels
 87 |             should have the np.nan value
 88 |         G : NetworkX Network, optional (default=None)
 89 |             The network with missing labels to be propagated. Can be
 90 |             None if X and y are not None in which case the constructor
 91 |             will be used to generate the network. Labels must be into
 92 |             the data of each node with the 'class' key. Missing labels
 93 |             should be valued np.nan
 94 |         constructor : BaseConstructor inhrerited class, optional(default=
 95 |             KNNConstructor(5, sep_comp=False))
 96 |             A constructor class to transform the tabular data into a
 97 |             network. It can be set to None if a complex network is directly
 98 |             passed to the ``fit`` method. Notice that you should use 'sep_com'
 99 |             as False on the constructor.
100 | 
101 |         """
102 |         self.constructor = constructor
103 |         if y is None and G is None:
104 |             raise Exception('Both y and G are None!')
105 | 
106 |         if self.constructor is None and G is None:
107 |             raise Exception(
108 |                 'You either have to set the constructor or the network'
109 |             )
110 | 
111 |         if y is not None and self.constructor is not None:
112 |             G = self.constructor.fit_transform(X, y)
113 |         elif y is None and G is not None:
114 |             y = np.array([node[1]['class'] for node in G.nodes(data=True)])
115 | 
116 |         if self.reduction_factor is not None:
117 |             if not isinstance(self.reduction_factor, list):
118 |                 raise Exception('Reduction_factor must be a list or None')
119 | 
120 |             if np.max(self.reduction_factor) > 1 or np.min(self.reduction_factor) < 0:  # noqa: E501
121 |                 raise Exception('Reduction_factor must be between 0 and 1')
122 | 
123 |             if len(np.unique(y[~np.isnan(y)])) != len(self.reduction_factor):
124 |                 raise Exception('The number of reduction factors must be equal'
125 |                       ' to the number of classes')
126 | 
127 |         missing_elements = len(y[np.isnan(y)])
128 | 
129 |         if self.reduction_factor is not None:
130 |             original_G = G.copy()
131 |             original_y = y.copy()
132 | 
133 |             G = self._reduce_graph(G, y)
134 | 
135 |             positions_dict = {i: node for i, node in enumerate(list(G.nodes()))}  # noqa: E501
136 |             G = nx.convert_node_labels_to_integers(G)
137 |             y = np.array([node[1]['class'] for node in G.nodes(data=True)])
138 | 
139 |         # Generate modularity matrix
140 |         Q = self._increment_modularity_matrix(G)
141 | 
142 |         while missing_elements != 0:
143 |             propagated = False
144 | 
145 |             while not propagated:
146 |                 # Select the i and j of argmax
147 |                 i, j = np.unravel_index(Q.argmax(), Q.shape)
148 | 
149 |                 Q[i][j] = -np.inf
150 |                 Q[j][i] = -np.inf
151 | 
152 |                 if y[i] != y[j]:
153 |                     if (~np.isnan(y[i])) and (~np.isnan(y[j])):
154 |                         continue
155 |                     if np.isnan(y[i]):
156 |                         y[i] = y[j]
157 |                         G.nodes[i]['class'] = y[i]
158 |                         propagated = True
159 |                     if np.isnan(y[j]):
160 |                         y[j] = y[i]
161 |                         G.nodes[j]['class'] = y[j]
162 |                         propagated = True
163 |                 else:
164 |                     continue
165 | 
166 |             missing_elements = len(y[np.isnan(y)])
167 | 
168 |         if self.reduction_factor is not None:
169 |             for key in positions_dict:
170 |                 original_y[positions_dict[key]] = y[key]
171 |                 original_G.nodes[positions_dict[key]]['class'] = y[key]  # noqa: E501
172 | 
173 |             y = original_y
174 |             G = original_G
175 | 
176 |         self.generated_y_ = y
177 |         self.generated_G_ = G
178 | 
179 |         return self
180 | 
181 |     def get_propagated_labels(self):
182 |         """
183 |         Return the labels list with the propagated classes
184 | 
185 |         Returns
186 |         -------
187 |         generated_y_ : {ndarray, pandas series}, shape (n_samples, 1)
188 |             The label list
189 |         """
190 | 
191 |         return self.generated_y_
192 | 
193 |     def get_propagated_network(self):
194 |         """
195 |         Returns the generated network with the propagated labels
196 | 
197 |         Returns
198 |         --------
199 |         generated_G_ : NetworkX Network
200 |             The constructed network on the fit of the model"""
201 | 
202 |         return self.generated_G_
203 | 
204 |     def _increment_modularity_matrix(self, G):
205 |         N = len(G.nodes)
206 |         E = len(G.edges)
207 |         k = [val for (node, val) in G.degree()]
208 | 
209 |         Q = [[0 for i in range(N)] for j in range(N)]
210 | 
211 |         for i in range(N):
212 |             for j in range(N):
213 |                 if i not in G.neighbors(j):
214 |                     Q[i][j] = 0
215 |                 else:
216 |                     Q[i][j] = (1/(2*E)) - (k[i]*k[j])/((2*E)**2)
217 |         return np.array(Q)
218 | 
219 |     def _reduce_graph(self, G, y):
220 |         """
221 |         Reduce the graph using the algorithm from Silva & Zhao (2012)
222 | 
223 |         Parameters
224 |         ----------
225 |         G : NetworkX Network
226 |             The network to be reduced
227 |         y : {ndarray, pandas series}, shape (n_samples,)
228 |             The label list
229 | 
230 |         Returns
231 |         -------
232 |         G : NetworkX Network
233 |             The reduced network
234 |         """
235 |         G = G.copy()
236 |         classes = np.unique(y[~np.isnan(y)])
237 |         classes.sort()
238 |         for idx, class_ in enumerate(classes):
239 |             factor = self.reduction_factor[idx]
240 | 
241 |             if factor == 0:
242 |                 continue
243 | 
244 |             N = len([i for i in G.nodes(data=True) if i[1]['class'] == class_])  # noqa: E501
245 |             N_tilda = N
246 | 
247 |             if factor != 1:
248 |                 desired_value = round((1-factor) * N)
249 |             else:
250 |                 desired_value = 1
251 | 
252 |             while(N_tilda != desired_value):
253 |                 # Randomly select two nodes from the class
254 |                 nodes = np.random.choice(
255 |                     [i[0] for i in G.nodes(data=True) if i[1]['class'] == class_],  # noqa: E501
256 |                     size=2,
257 |                     replace=False)
258 | 
259 |                 # Get the edges from first node
260 |                 edges = [i for i in G.edges(nodes[0])]
261 | 
262 |                 # Remove the first node from the network
263 |                 G.remove_node(nodes[0])
264 | 
265 |                 # Remove self-loops
266 |                 G.remove_edges_from(nx.selfloop_edges(G))
267 | 
268 |                 # Redistribute the edges from the first node to the second node
269 |                 for edge in edges:
270 |                     # Avoid self-loops
271 |                     if edge[0] == edge[1]:
272 |                         continue
273 |                     if edge[0] == nodes[0]:
274 |                         G.add_edge(nodes[1], edge[1])
275 |                     else:
276 |                         G.add_edge(edge[0], nodes[1])
277 | 
278 |                 N_tilda = len([i for i in G.nodes(data=True) if i[1]['class'] == class_])  # noqa: E501
279 | 
280 |         # Remove any possible remaining self-loop
281 |         G.remove_edges_from(nx.selfloop_edges(G))
282 |         return G
283 | 


--------------------------------------------------------------------------------
/sknet/semi_supervised/tests/test_modularity_label_propagation.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | 
  4 | from sklearn.datasets import load_iris
  5 | 
  6 | from sknet.network_construction import KNNConstructor
  7 | from sknet.semi_supervised import ModularityLabelPropagation
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def X_y_generator():
 12 | 
 13 |     X, y = load_iris(return_X_y=True)
 14 |     y = np.array(y, dtype='float32')
 15 |     y[10:40] = np.nan
 16 |     y[60:70] = np.nan
 17 |     y[110:140] = np.nan
 18 | 
 19 |     return X, y
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | def result_generator():
 24 |     result = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 25 |               0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 26 |               0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 27 |               0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 28 |               0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 29 |               1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 30 |               1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 31 |               1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 32 |               1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
 33 |               2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1.,
 34 |               2., 2., 2., 1., 2., 2., 1., 1., 2., 2., 2., 2.,
 35 |               2., 1., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2.,
 36 |               2., 2., 2., 2., 2., 2.]
 37 | 
 38 |     return result
 39 | 
 40 | 
 41 | def test_fit_y(X_y_generator, result_generator):
 42 |     knn_c = KNNConstructor(k=5, sep_comp=False)
 43 |     ML = ModularityLabelPropagation()
 44 |     ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c)
 45 | 
 46 |     np.testing.assert_equal(result_generator,
 47 |                             np.array(ML.get_propagated_labels(),
 48 |                                      dtype='float32')
 49 |                             )
 50 | 
 51 | 
 52 | def test_fit_G(X_y_generator, result_generator):
 53 |     knn_c = KNNConstructor(k=5, sep_comp=False)
 54 |     G = knn_c.fit_transform(X_y_generator[0], X_y_generator[1])
 55 |     ML = ModularityLabelPropagation()
 56 |     ML.fit(G=G)
 57 | 
 58 |     np.testing.assert_equal(result_generator,
 59 |                             np.array(ML.get_propagated_labels(),
 60 |                                      dtype='float32')
 61 |                             )
 62 | 
 63 | 
 64 | def test_set_get_params():
 65 |     ML = ModularityLabelPropagation()
 66 |     ML.set_params(reduction_factor=None)
 67 |     assert ML.get_params() == {'reduction_factor': None,
 68 |                                'random_state': None}
 69 | 
 70 | 
 71 | def test_raise_on_fit_1(X_y_generator):
 72 |     ML = ModularityLabelPropagation()
 73 |     with pytest.raises(Exception):
 74 |         ML.fit(X=X_y_generator[0], y=None, G=None)
 75 | 
 76 | 
 77 | def test_raise_on_fit_2(X_y_generator):
 78 |     ML = ModularityLabelPropagation()
 79 |     with pytest.raises(Exception):
 80 |         ML.fit(X=X_y_generator[0], y=X_y_generator[1], G=None,
 81 |                constructor=None)
 82 | 
 83 | 
 84 | def test_raises_on_aggregation(X_y_generator):
 85 |     knn_c = KNNConstructor(k=5, sep_comp=False)
 86 | 
 87 |     ML = ModularityLabelPropagation(reduction_factor=0.3)
 88 |     with pytest.raises(Exception):
 89 |         ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c)
 90 | 
 91 |     ML = ModularityLabelPropagation(reduction_factor=[0.5, 0.2])
 92 |     with pytest.raises(Exception):
 93 |         ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c)
 94 | 
 95 |     ML = ModularityLabelPropagation(reduction_factor=[2, 13, 9])
 96 |     with pytest.raises(Exception):
 97 |         ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c)
 98 | 
 99 | 
100 | def test_aggregation(X_y_generator):
101 |     knn_c = KNNConstructor(k=5, sep_comp=False)
102 |     ML = ModularityLabelPropagation(reduction_factor=[0.5, 0.5, 0],
103 |                                     random_state=42)
104 |     ML.fit(X_y_generator[0], X_y_generator[1], constructor=knn_c)
105 | 
106 |     print(ML.get_propagated_labels())
107 |     print(len(ML.get_propagated_labels()))
108 | 
109 |     expected_result = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
110 |                        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
111 |                        0., 0., 0.,  0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
112 |                        0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
113 |                        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
114 |                        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
115 |                        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
116 |                        1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2.,
117 |                        2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2.,
118 |                        2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
119 |                        2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]
120 |     np.testing.assert_equal(expected_result,
121 |                             np.array(ML.get_propagated_labels(),
122 |                                      dtype='float32')
123 |                             )
124 | 


--------------------------------------------------------------------------------
/sknet/supervised/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .ease_of_access import EaseOfAccessClassifier
3 | from .ease_of_access import EaseOfAccessRegressor
4 | from .high_level_classification import HighLevelClassifier
5 | 


--------------------------------------------------------------------------------
/sknet/supervised/ease_of_access.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import networkx as nx
  4 | 
  5 | from scipy.stats import mode
  6 | from abc import ABCMeta, abstractmethod
  7 | from sklearn.neighbors import DistanceMetric
  8 | 
  9 | from sknet.network_construction import KNNConstructor
 10 | 
 11 | 
 12 | class EaseOfAccess(metaclass=ABCMeta):
 13 |     """
 14 |     Ease of Access method to learn network patterns on data
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     epsilon : float, default=0.2
 19 |         The perturbance to be applied to the weights matrix after the
 20 |         insertion of the test data
 21 |     t : int, default=3
 22 |         Number of points on the convergence probabilities vector
 23 |         to classify the test data
 24 |     method : str, default='eigenvalue'
 25 |         Which method to use to compute the markov chain limiting
 26 |         probabilties. Options are 'eigenvalue' and 'power'.
 27 | 
 28 |     Attributes
 29 |     ----------
 30 |     constructor_ : BaseConstructor inhrerited class
 31 |         The transformer used to transform the tabular data into network
 32 |     epsilon : float
 33 |         The disturbance applied to the weights matrix
 34 |     t : int
 35 |         Number of points used on the convergence probabilities
 36 |     method : str
 37 |         Method used to compute the limiting probabilities of the Markov chain
 38 |     G_ : NetworkX network
 39 |         The network generated from the tabular data
 40 |     W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples)
 41 |         The adjacency matrix of the network G
 42 |     X_ : {array-like, pandas dataframe} of shape (n_samples, n_features)
 43 |         The used tabular data features
 44 |     y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes)
 45 |         The classes of each node
 46 | 
 47 |     Notes
 48 |     -----
 49 |     Do not use this abstract class, use derived classes instead
 50 | 
 51 |     References
 52 |     ----------
 53 |     Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data
 54 |     classification by using an heuristic of ease of access. Neurocomputing
 55 |     149(Part A), 86–92 (2015)
 56 | 
 57 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
 58 |     Networks. 10.1007/978-3-319-17290-3.
 59 | 
 60 |     """
 61 | 
 62 |     def __init__(self, epsilon=0.2, t=3, method='eigenvalue'):
 63 |         self.epsilon = epsilon
 64 |         self.t = t
 65 |         self.method = method
 66 | 
 67 |     def set_params(self, **parameters):
 68 |         for parameter, value in parameters.items():
 69 |             setattr(self, parameter, value)
 70 |         return self
 71 | 
 72 |     def get_params(self, deep=True):
 73 |         return {'epsilon': self.epsilon, 't': self.t, 'method': self.method}
 74 | 
 75 |     def fit(self, X, y, G=None, constructor=KNNConstructor(5, sep_comp=True)):
 76 |         """
 77 |         Fit the model, internalizing the graph that should be used
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
 82 |             The input data.
 83 |         y : {ndarray, pandas series}, shape (n_samples,) or (n_samples,
 84 |         n_classes)
 85 |             The true classes.
 86 |         G : NetworkX Graph, default=None
 87 |             If the graph was already generated, then this parameter will
 88 |             make as so the transformer is not called
 89 |         constructor : BaseConstructor inhrerited class, optional(default=
 90 |         KNNConstructor(5, sep_comp=True))
 91 |             A constructor class to transform the tabular data into a
 92 |             network
 93 | 
 94 |         Notes
 95 |         -----
 96 |         Even though the G can be passed directly, X is required so the
 97 |         distance between test classes and the other nodes on the graph
 98 |         can be calculated
 99 | 
100 |         According to the paper implementation, the network should not
101 |         have separated components for each class, if passing an already
102 |         created network to the method, be mindful of that
103 | 
104 |         """
105 |         self.constructor_ = constructor
106 |         if G is None:
107 |             # Generates the graph from X and y
108 |             self.constructor_.set_sep_comp(False)
109 |             self.G_ = self.constructor_.fit_transform(X, y)
110 |         else:
111 |             self.G_ = G
112 | 
113 |         # Transforms X into undirected
114 |         if nx.is_directed(self.G_):
115 |             self.G_ = self.G_.to_undirected()
116 | 
117 |         # Generates W matrix
118 |         self.W_ = nx.to_numpy_array(self.G_)
119 | 
120 |         self.X_ = X
121 |         self.y_ = y
122 | 
123 |         return self
124 | 
125 |     def predict(self, X):
126 |         """
127 |         Predicts the labels of the test samples from X
128 | 
129 |         Parameters
130 |         ----------
131 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
132 |             The test data to be predicted.
133 | 
134 |         Returns
135 |         -------
136 |         predictions : array-like of shape (n_samples)
137 |             The predicted label for each sample
138 |         """
139 |         predictions = []
140 |         dist = self._get_distance_metric()
141 | 
142 |         for x in X:
143 | 
144 |             # For each instance, calculates the s vector
145 |             s = [dist.pairwise((x, x_t))[1][0] for x_t in self.X_]
146 |             L = len(s)
147 | 
148 |             # Generates the S_tilda vector
149 |             S_tilda = np.zeros((L, L))
150 | 
151 |             for i in range(L):
152 |                 S_tilda[i] = [s[i]] * L
153 | 
154 |             # Alter the weight matrix
155 |             w_tilda = self.W_ + self.epsilon * S_tilda
156 | 
157 |             # Generates probability matrix
158 |             norm_factor = np.sum(w_tilda, axis=1)
159 |             P = [x / norm_factor[i] for i, x in enumerate(w_tilda)]
160 |             P = np.array(P).reshape(w_tilda.shape)
161 | 
162 |             # Computes the convergence
163 |             self.P_inf = self._stationary_distribution(P, self.method)
164 | 
165 |             # Associates each class with the probabilities
166 |             res = pd.DataFrame()
167 |             res['prob'] = self.P_inf
168 |             res['y'] = self.y_
169 |             res.sort_values('prob', inplace=True, ascending=False)
170 | 
171 |             # Gets the t classes from P_inf and set to the majority
172 |             self.tau_ = res.iloc[:self.t]
173 | 
174 |             predictions.append(self._aggregation_method(self.tau_))
175 | 
176 |         return predictions
177 | 
178 |     @abstractmethod
179 |     def _aggregation_method(self, tau):
180 |         """Defines which aggregation method to use"""
181 | 
182 |     def _get_distance_metric(self):
183 |         metric = self.constructor_.metric
184 | 
185 |         if type(metric) is str:
186 |             return DistanceMetric.get_metric(metric)
187 | 
188 |         return metric
189 | 
190 |     def _stationary_distribution(self, W, method):
191 | 
192 |         if method == 'power':
193 |             return np.linalg.matrix_power(np.array(W), 50)[0]
194 | 
195 |         elif method == 'eigenvalue':
196 |             evals, evecs = np.linalg.eig(np.array(W).T)
197 |             evec1 = evecs[:, np.isclose(evals, 1)]
198 | 
199 |             evec1 = evec1[:, 0]
200 | 
201 |             stationary = evec1 / evec1.sum()
202 | 
203 |             return stationary.real
204 | 
205 |         else:
206 |             raise Exception("{} is not an available method to calculate the markov chain \
207 |                 convergence. Available methods are 'power' and \
208 |                 'eigenvalue'".format(method))
209 | 
210 | 
211 | class EaseOfAccessClassifier(EaseOfAccess):
212 |     """
213 |     Ease of Access Classifier
214 | 
215 |     Classifier that uses the heuristic of ease of access to classify
216 |     new instances inside a network
217 | 
218 |     Parameters
219 |     ----------
220 |     epsilon : float, default=0.2
221 |         The perturbance to be applied to the weights matrix after the
222 |         insertion of the test data
223 |     t : int, deafult=3
224 |         Number of points on the convergence probabilities vector
225 |         to classify the test data
226 |     method : str, default='eigenvalue'
227 |         Which method to use to compute the markov chain limiting
228 |         probabilties. Options are 'eigenvalue' and 'power'.
229 | 
230 |     Attributes
231 |     ----------
232 |     constructor_ : BaseConstructor inhrerited class
233 |         The constructor used to transform the tabular data into network
234 |     epsilon : float
235 |         The disturbance applied to the weights matrix
236 |     t : int
237 |         Number of points used on the convergence probabilities
238 |     method : str
239 |         Method used to compute the limiting probabilities of the Markov chain
240 |     G_ : NetworkX network
241 |         The network generated from the tabular data
242 |     W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples)
243 |         The adjacency matrix of the network G
244 |     X_ : {array-like, pandas dataframe} of shape (n_samples, n_features)
245 |         The used tabular data features
246 |     y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes)
247 |         The classes of each node
248 | 
249 |     Examples
250 |     --------
251 |     >>> from sklearn.datasets import load_iris
252 |     >>> from sknet.network_construction import KNNConstructor
253 |     >>> from sknet.supervised import EaseOfAccessClassifier
254 |     >>> X, y = load_iris(return_X_y = True)
255 |     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
256 |                                                             test_size=0.33)
257 |     >>> knn_c = KNNConstructor(k=5)
258 |     >>> classifier = EaseOfAccessClassifier(t=5)
259 |     >>> classifier.fit(X_train, y_train, constructor=knn_c)
260 |     >>> ease = classifier.predict(X_test)
261 |     >>> accuracy_score(y_test, ease)
262 |     0.92
263 | 
264 |     Notes
265 |     -----
266 | 
267 |     References
268 |     ----------
269 |     Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data
270 |     classification by using an heuristic of ease of access. Neurocomputing
271 |     149(Part A), 86–92 (2015)
272 | 
273 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
274 |     Networks. 10.1007/978-3-319-17290-3.
275 | 
276 |     """
277 | 
278 |     _estimator_type = 'classifier'
279 | 
280 |     def __init__(self, epsilon=0.2, t=3, method='eigenvalue'):
281 |         super().__init__(epsilon, t, method)
282 | 
283 |     def _aggregation_method(self, tau):
284 |         return mode(tau['y'])[0][0]
285 | 
286 | 
287 | class EaseOfAccessRegressor(EaseOfAccess):
288 |     """
289 |     Ease of Access Regressor
290 | 
291 |     Regressor that uses the heuristic of ease of access to classify
292 |     the real-value of the target of new instances inside a network
293 | 
294 |     Parameters
295 |     ----------
296 |     epsilon : float, default=0.2
297 |         The perturbance to be applied to the weights matrix after the
298 |         insertion of the test data
299 |     t : int, deafult=3
300 |         Number of points on the convergence probabilities vector
301 |         to classify the test data
302 |     method : str, default='eigenvalue'
303 |         Which method to use to compute the markov chain limiting
304 |         probabilties. Options are 'eigenvalue' and 'power'.
305 | 
306 |     Attributes
307 |     ----------
308 |     constructor_ : BaseConstructor inhrerited class
309 |         The constructor used to transform the tabular data into network
310 |     epsilon : float
311 |         The disturbance applied to the weights matrix
312 |     t : int
313 |         Number of points used on the convergence probabilities
314 |     method : str
315 |         Method used to compute the limiting probabilities of the Markov chain
316 |     G_ : NetworkX network
317 |         The network generated from the tabular data
318 |     W_ : {array-like, pandas dataframe} of shape (n_samples, n_samples)
319 |         The adjacency matrix of the network G
320 |     X_ : {array-like, pandas dataframe} of shape (n_samples, n_features)
321 |         The used tabular data features
322 |     y_ : {ndarray, pandas series}, shape (n_samples,) or (n_samples, n_classes)
323 |         The classes of each node
324 | 
325 |     Examples
326 |     --------
327 |     >>> from sklearn.datasets import load_boston
328 |     >>> from dataset_constructors import KNNConstructor
329 |     >>> from ease_of_access import EaseOfAccessRegressor
330 |     >>> X, y = load_boston(return_X_y = True)
331 |     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
332 |                                                             test_size=0.33)
333 |     >>> knn_c = KNNConstructor(k=5)
334 |     >>> reg = EaseOfAccessRegressor(t=5)
335 |     >>> reg.fit(X_train, y_train, constructor=knn_c)
336 |     >>> ease = reg.predict(X_test)
337 | 
338 |     Notes
339 |     -----
340 | 
341 |     References
342 |     ----------
343 |     Cupertino, T.H., Zhao, L., Carneiro, M.G.: Network-based supervised data
344 |     classification by using an heuristic of ease of access. Neurocomputing
345 |     149(Part A), 86–92 (2015)
346 | 
347 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
348 |     Networks. 10.1007/978-3-319-17290-3.
349 | 
350 |     """
351 | 
352 |     _estimator_type = 'regressor'
353 | 
354 |     def __init__(self, epsilon=0.2, t=3, method='eigenvalue'):
355 |         super().__init__(epsilon, t, method)
356 | 
357 |     def _aggregation_method(self, tau):
358 |         return tau['y'].mean()
359 | 


--------------------------------------------------------------------------------
/sknet/supervised/high_level_classification.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | 
  5 | from sknet.utils import NetworkMetricsHandler
  6 | from sknet.utils import LowLevelModelsHandler
  7 | 
  8 | from sknet.network_construction import KNNConstructor
  9 | 
 10 | 
 11 | class HighLevelClassifier():
 12 |     """
 13 |     Classifies a dataset using a high-level approach where the predictions
 14 |     from a low-level model (standard ML) and a high-level model (Complex
 15 |     Network) are combined to generate a final inference about the class of
 16 |     each data point.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     low_level : str, optional(default='random_forest')
 21 |         The low-level model to be used. See available options on the
 22 |         low_level_models_handler documentation
 23 |     p : float, optional(default=0.5)
 24 |         The weight to be used on the ponderation between the
 25 |         low-level and the high-level model predictions. The formula
 26 |         is:
 27 |         ``(1 - p) * low_level + p * high_level``
 28 |         This number should be less or equal than one
 29 |     alphas : list of floats, optional(default=[0.5, .5])
 30 |         The weight to be used on each high-level metric for the
 31 |         classification. This list should sum up to one.
 32 |     metrics: list of str, optional(default=['clustering_coefficient',
 33 |                                             'assortativity'])
 34 |         Which complex networks metrics to use to generate the high-level
 35 |         prediction. See available options on the network_metrics_handler
 36 |     low_level_parameters : dict, optional(default={})
 37 |         Parameters to be set on the low-level classifier
 38 | 
 39 |     Attributes
 40 |     ----------
 41 |     constructor_ : BaseConstructor inhrerited class
 42 |         The transformer used to transform the tabular data into network
 43 |     low_level_pred_ : {ndarray, pandas series}, shape (n_samples, n_classes)
 44 |         The probability of each class from the low-level prediction
 45 |     high_level_pred_ : {ndarray, pandas series}, shape (n_samples, n_classes)
 46 |         The probability of each class from the high-level prediction
 47 |     original_constructor_ : NetworkX Network
 48 |         The constructed network on the fit of the model
 49 | 
 50 |     Examples
 51 |     --------
 52 |     >>> from sklearn.datasets import load_iris
 53 |     >>> from sknet.network_construction import KNNConstructor
 54 |     >>> from sknet.supervised import HighLevelClassifier
 55 |     >>> X, y = load_iris(return_X_y = True)
 56 |     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
 57 |                                                             test_size=0.33)
 58 |     >>> knn_c = KNNConstructor(k=5)
 59 |     >>> classifier = HighLevelClassifier(t=5)
 60 |     >>> classifier.fit(X_train, y_train, constructor=knn_c)
 61 |     >>> pred = classifier.predict(X_test)
 62 | 
 63 |     References
 64 |     ----------
 65 |     Silva, T.C., Zhao, L.: Network-based high level data classification.
 66 |     IEEE Trans. Neural Netw. Learn. Syst. 23(6), 954–970 (2012)
 67 | 
 68 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
 69 |     Networks. 10.1007/978-3-319-17290-3.
 70 | 
 71 |     """
 72 |     _estimator_type = 'classifier'
 73 | 
 74 |     def __init__(self, low_level='random_forest',
 75 |                  p=0.5, alphas=[0.5, 0.5],
 76 |                  metrics=['clustering_coefficient', 'assortativity'],
 77 |                  low_level_parameters={}):
 78 |         self.p = p
 79 |         self.alphas = alphas
 80 |         self.metrics = metrics
 81 |         self.low_level = low_level
 82 |         self.low_level_parameters = low_level_parameters
 83 |         self.metrics = metrics
 84 | 
 85 |     def set_params(self, **parameters):
 86 |         for parameter, value in parameters.items():
 87 |             setattr(self, parameter, value)
 88 |         return self
 89 | 
 90 |     def get_params(self, deep=True):
 91 |         return {'p': self.p, 'alphas': self.alphas, 'metrics': self.metrics,
 92 |                 'low_level': self.low_level,
 93 |                 'low_level_parameters': self.low_level_parameters,
 94 |                 'self.metrics': self.metrics}
 95 | 
 96 |     def fit(self, X, y, G=None, constructor=KNNConstructor(5)):
 97 |         """Fit the classifier by fitting the low-level model and
 98 |         creating the high-level classification network
 99 | 
100 |         Parameters
101 |         ----------
102 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
103 |             The input data samples
104 |         y : {ndarray, pandas series}, shape (n_samples,) or
105 |         (n_samples, n_classes), default=None
106 |             The true classes
107 |         G : NetworkX Graph, default=None
108 |             If the graph was already generated, then this parameter will
109 |             make as so the transformer is not called. Notice that each class
110 |             should be formed of only one class
111 |         constructor : BaseConstructor inhrerited class, optional(default=
112 |         KNNConstructor(5))
113 |             A constructor class to transform the tabular data into a
114 |             network
115 | 
116 |         """
117 |         self.constructor_ = constructor
118 | 
119 |         # Basic configuration
120 |         self.metrics_handler = NetworkMetricsHandler()
121 |         self.low_level_handler = LowLevelModelsHandler()
122 | 
123 |         self.low_level_model = self.low_level_handler.get_model(
124 |             self.low_level, self.low_level_parameters
125 |         )
126 |         self.metric_func = []
127 |         self.default_values = []
128 |         for metric in self.metrics:
129 |             self.metric_func.append(self.metrics_handler.get_metric(metric))
130 |             self.default_values.append(self.metrics_handler.get_default_value(
131 |                 metric)
132 |             )
133 | 
134 |         assert self.p <= 1
135 | 
136 |         if np.sum(self.alphas) != 1:
137 |             raise ValueError('Alphas should sum to one')
138 | 
139 |         # Fits the constructor to generate the network
140 |         if G is not None:
141 |             self.G_ = G
142 |         else:
143 |             self.constructor_.set_sep_comp(True)
144 |             self.G_ = self.constructor_.fit_transform(X, y)
145 | 
146 |         # Fits the low level model
147 |         self.low_level_model.fit(X, y)
148 | 
149 |         self.X = X
150 |         self.y = y
151 | 
152 |         return self
153 | 
154 |     def predict_proba(self, X_test):
155 |         """Predicts the probability, for each test sample
156 |         that it belongs to any of the training classes
157 | 
158 |         Parameters
159 |         ----------
160 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
161 |             The input data samples
162 | 
163 |         """
164 |         # Gets the low level predictions
165 |         self.low_level_pred_ = self.low_level_model.predict_proba(X_test)
166 | 
167 |         classes = np.unique(self.y)
168 |         self.high_level_pred_ = np.zeros((len(X_test), len(classes)))
169 |         total_training_nodes = len(self.G_.nodes)
170 |         class_proportions = np.zeros((len(classes)))
171 | 
172 |         # We need to keep the original constructor
173 |         self.original_constructor = copy.deepcopy(self.constructor_)
174 | 
175 |         for i, class_ in enumerate(classes):
176 |             label_ind = np.where(self.y == class_)
177 |             X_ = np.take(self.X, label_ind, axis=0)[0]
178 | 
179 |             class_proportions[i] = len(X_) / total_training_nodes
180 | 
181 |         for i, x in tqdm(enumerate(X_test)):
182 |             original_G = self.original_constructor.get_network()
183 | 
184 |             delta_G = np.zeros((len(self.metric_func), len(classes)))
185 |             # Tries to put the node into each component
186 |             for class_id, class_ in enumerate(classes):
187 | 
188 |                 # Selects subset of original G
189 |                 original_G_sub = self._get_subgraph(original_G, class_)
190 | 
191 |                 # Adds the node to the network on the component of the class
192 |                 singleton = False
193 |                 self.constructor_.add_nodes([x], [class_])
194 |                 new_G = self.constructor_.get_network()
195 |                 new_G_sub = self._get_subgraph(new_G, class_)
196 | 
197 |                 # Verifies if the added node has a neighbor
198 |                 node = list(new_G_sub.nodes())[-1]
199 | 
200 |                 if new_G_sub.adj[node] == {}:
201 |                     singleton = True
202 | 
203 |                 f = np.zeros((len(self.metric_func)))
204 | 
205 |                 # Gets the variation from the addition
206 |                 for idx, metric in enumerate(self.metric_func):
207 |                     if not singleton:
208 |                         original = metric(original_G_sub)
209 |                         new = metric(new_G_sub)
210 |                         delta_G[idx][class_id] = original - new
211 |                     else:
212 |                         delta_G[idx][class_id] = self.default_values[idx]
213 | 
214 |                 # Return the original constructor
215 |                 self.constructor_ = copy.deepcopy(self.original_constructor)
216 | 
217 |             delta_G / delta_G.sum(axis=1)[:, np.newaxis]
218 |             f = delta_G * class_proportions
219 | 
220 |             for k, f_ in enumerate(f):
221 | 
222 |                 self.high_level_pred_[i] = self.alphas[k] * (1 - f_)
223 | 
224 |         # Normalize the high_level_pred
225 |         self.high_level_pred_ = (
226 |             self.high_level_pred_ / self.high_level_pred_.sum(
227 |                 axis=1)[:, np.newaxis]
228 |             )
229 | 
230 |         final_pred = (
231 |             (1 -
232 |                 self.p) * self.low_level_pred_ + self.p * self.high_level_pred_
233 |         )
234 | 
235 |         return final_pred
236 | 
237 |     def predict(self, X_test):
238 |         """Predicts the class for each test sample
239 | 
240 |         Parameters
241 |         ----------
242 |         X : {array-like, pandas dataframe} of shape (n_samples, n_features)
243 |             The input data samples
244 | 
245 |         """
246 |         predictions = np.argmax(self.predict_proba(X_test), axis=1)
247 |         return predictions
248 | 
249 |     def _get_subgraph(self, G, class_):
250 |         nodes = (node for node, data in G.nodes(data=True)
251 |                  if data.get('class') == class_)
252 | 
253 |         return G.subgraph(nodes)
254 | 


--------------------------------------------------------------------------------
/sknet/supervised/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TNanukem/scikit-net/df8534268bc04787340e64a6eac2ee4beddc3068/sknet/supervised/tests/__init__.py


--------------------------------------------------------------------------------
/sknet/supervised/tests/test_ease_of_access.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from sklearn.datasets import load_iris, load_boston
  6 | from sklearn.model_selection import train_test_split
  7 | 
  8 | from sknet.network_construction import KNNConstructor
  9 | from sknet.supervised import EaseOfAccessClassifier, EaseOfAccessRegressor
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def X_y_generator_classification():
 14 | 
 15 |     X, y = load_iris(return_X_y=True)
 16 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
 17 |                                                         test_size=0.33,
 18 |                                                         random_state=42)
 19 | 
 20 |     return X_train, y_train, X_test, y_test
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def X_y_generator_regression():
 25 | 
 26 |     X, y = load_boston(return_X_y=True)
 27 |     X = X[:150, :]
 28 |     y = y[:150]
 29 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
 30 |                                                         test_size=0.33,
 31 |                                                         random_state=42)
 32 | 
 33 |     return X_train, y_train, X_test, y_test
 34 | 
 35 | 
 36 | @pytest.fixture
 37 | def module_generator_eigen_classifier(X_y_generator_classification):
 38 |     knn = KNNConstructor(k=3)
 39 |     classifier = EaseOfAccessClassifier(t=5)
 40 |     classifier.fit(X_y_generator_classification[0],
 41 |                    X_y_generator_classification[1], constructor=knn)
 42 | 
 43 |     return classifier
 44 | 
 45 | 
 46 | @pytest.fixture
 47 | def module_generator_power_classifier(X_y_generator_classification):
 48 |     knn = KNNConstructor(k=3)
 49 |     classifier = EaseOfAccessClassifier(t=5, method='power')
 50 |     classifier.fit(X_y_generator_classification[0],
 51 |                    X_y_generator_classification[1], constructor=knn)
 52 | 
 53 |     return classifier
 54 | 
 55 | 
 56 | @pytest.fixture
 57 | def module_generator_eigen_regressor(X_y_generator_regression):
 58 |     knn = KNNConstructor(k=3)
 59 |     regressor = EaseOfAccessRegressor(t=5)
 60 |     regressor.fit(X_y_generator_regression[0],
 61 |                   X_y_generator_regression[1], constructor=knn)
 62 | 
 63 |     return regressor
 64 | 
 65 | 
 66 | @pytest.fixture
 67 | def module_generator_power_regressor(X_y_generator_regression):
 68 |     knn = KNNConstructor(k=3)
 69 |     regressor = EaseOfAccessRegressor(t=5, method='power')
 70 |     regressor.fit(X_y_generator_regression[0],
 71 |                   X_y_generator_regression[1], constructor=knn)
 72 | 
 73 |     return regressor
 74 | 
 75 | 
 76 | @pytest.fixture
 77 | def class_generator_classifier(module_generator_eigen_classifier,
 78 |                                module_generator_power_classifier,
 79 |                                X_y_generator_classification):
 80 | 
 81 |     pred_eig = module_generator_eigen_classifier.predict(
 82 |         X_y_generator_classification[2]
 83 |     )
 84 |     pred_power = module_generator_power_classifier.predict(
 85 |         X_y_generator_classification[2]
 86 |     )
 87 | 
 88 |     return (module_generator_eigen_classifier,
 89 |             module_generator_power_classifier, pred_eig, pred_power)
 90 | 
 91 | 
 92 | def test__stationary_distribution_classifier(class_generator_classifier):
 93 |     np.testing.assert_almost_equal(class_generator_classifier[0].P_inf,
 94 |                                    class_generator_classifier[1].P_inf)
 95 | 
 96 |     pd.testing.assert_frame_equal(class_generator_classifier[0].tau_,
 97 |                                   class_generator_classifier[1].tau_)
 98 | 
 99 | 
100 | @pytest.fixture
101 | def class_generator_regressor(module_generator_eigen_regressor,
102 |                               module_generator_power_regressor,
103 |                               X_y_generator_regression):
104 | 
105 |     pred_eig = module_generator_eigen_regressor.predict(
106 |         X_y_generator_regression[2]
107 |     )
108 |     pred_power = module_generator_power_regressor.predict(
109 |         X_y_generator_regression[2]
110 |     )
111 | 
112 |     return (module_generator_eigen_regressor,
113 |             module_generator_power_regressor, pred_eig, pred_power)
114 | 
115 | 
116 | def test__stationary_distribution_regressor(class_generator_regressor):
117 |     np.testing.assert_almost_equal(class_generator_regressor[0].P_inf,
118 |                                    class_generator_regressor[1].P_inf)
119 | 
120 |     pd.testing.assert_frame_equal(class_generator_regressor[0].tau_,
121 |                                   class_generator_regressor[1].tau_)
122 | 
123 | 
124 | def test_predictions_classifier(class_generator_classifier):
125 | 
126 |     expected = [1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1,
127 |                 2, 1, 1, 2, 0, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0,
128 |                 1, 0, 0, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2,
129 |                 1, 2]
130 | 
131 |     eigen_pred = class_generator_classifier[2]
132 |     power_pred = class_generator_classifier[3]
133 | 
134 |     assert eigen_pred == power_pred == expected
135 | 
136 | 
137 | def test_predictions_regressor(class_generator_regressor):
138 | 
139 |     expected = [21.28, 18.0, 17.26, 20.74, 17.2, 15.62, 20.98,
140 |                 17.759999999999998, 19.52, 23.479999999999997,
141 |                 20.080000000000002, 19.64, 18.76, 15.62, 18.22,
142 |                 22.779999999999998, 18.560000000000002, 19.52,
143 |                 21.619999999999997, 17.759999999999998, 15.440000000000001,
144 |                 16.04, 15.440000000000001, 16.66, 16.860000000000003, 18.0,
145 |                 16.259999999999998, 17.619999999999997, 25.080000000000002,
146 |                 15.5, 14.62, 18.6, 21.660000000000004, 17.619999999999997,
147 |                 20.1, 18.0, 19.860000000000003, 17.5, 21.24, 17.5,
148 |                 17.619999999999997, 25.22, 25.340000000000003,
149 |                 19.639999999999997, 14.66, 19.619999999999997,
150 |                 16.9, 16.04, 22.52, 17.6]
151 | 
152 |     eigen_pred = class_generator_regressor[2]
153 |     power_pred = class_generator_regressor[3]
154 | 
155 |     assert eigen_pred == power_pred == expected
156 | 
157 | 
158 | def test_raise_on_predict(X_y_generator_classification):
159 | 
160 |     knn = KNNConstructor(k=3)
161 |     classifier = EaseOfAccessClassifier(t=5, method='something')
162 | 
163 |     with pytest.raises(Exception):
164 |         classifier.fit(X_y_generator_classification[0],
165 |                        X_y_generator_classification[1], constructor=knn)
166 |         classifier.predict(X_y_generator_classification[2])
167 | 
168 | 
169 | def test_set_get_params(module_generator_eigen_classifier):
170 |     classifier = module_generator_eigen_classifier
171 |     classifier.set_params(t=5)
172 |     assert classifier.get_params()['t'] == 5
173 | 


--------------------------------------------------------------------------------
/sknet/supervised/tests/test_high_level_classification.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from sklearn.datasets import load_iris
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | from sknet.network_construction import KNNConstructor
 8 | from sknet.supervised import HighLevelClassifier
 9 | 
10 | 
11 | @pytest.fixture
12 | def X_y_generator():
13 | 
14 |     X, y = load_iris(return_X_y=True)
15 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
16 |                                                         test_size=0.33,
17 |                                                         random_state=42)
18 | 
19 |     return X_train, y_train, X_test, y_test
20 | 
21 | 
22 | @pytest.fixture
23 | def module_generator(X_y_generator):
24 |     knn = KNNConstructor(k=3)
25 |     classifier = HighLevelClassifier(
26 |         'random_forest', 0.5, [0.5, 0.5],
27 |         ['clustering_coefficient', 'assortativity']
28 |     )
29 |     classifier.fit(X_y_generator[0], X_y_generator[1], constructor=knn)
30 | 
31 |     return classifier
32 | 
33 | 
34 | def test_predict(module_generator, X_y_generator):
35 | 
36 |     expected = [1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0,
37 |                 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0,
38 |                 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0,
39 |                 1, 1, 2, 1, 2]
40 |     pred = module_generator.predict(X_y_generator[2])
41 |     np.testing.assert_equal(expected, pred)
42 | 
43 | 
44 | def test_set_get_param(module_generator):
45 |     module_generator.set_params(p=0.2)
46 |     assert module_generator.get_params()['p'] == 0.2
47 | 
48 | 
49 | def test_alpha_raise_on_fit(module_generator):
50 |     module_generator.set_params(alpha=[0.7, 0.9])
51 |     with pytest.raises(Exception):
52 |         module_generator.fit(X_y_generator[0], X_y_generator[1])
53 | 


--------------------------------------------------------------------------------
/sknet/unsupervised/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .stochastic_particle_competition import StochasticParticleCompetition


--------------------------------------------------------------------------------
/sknet/unsupervised/stochastic_particle_competition.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import networkx as nx
  3 | 
  4 | from sknet.network_construction import KNNConstructor
  5 | 
  6 | 
  7 | class StochasticParticleCompetition():
  8 |     """
  9 |     Non supervised method that uses a stochastic particle competition to
 10 |     group the data into K clusters.
 11 | 
 12 |     This class still has major performance issues, taking too long to
 13 |     converge. Further optimization shall happen, be advised when using
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     K : int, optional(default=3)
 18 |         The number of particles to compete which will be the number of
 19 |         resulting clusters
 20 |     lambda_ : float, optional(default=0.5)
 21 |         The probability of a particle choosing the preferential movement
 22 |         (exploitation) against the random movement (exploration)
 23 |     delta : int, optional(default=10)
 24 |         The amount of energy gained at each step for each particle
 25 |     omega_max : float, optional(default=10)
 26 |         The maximum amount of energy that a particle can have at any given time
 27 |     omega_min : float, optional(default=1)
 28 |         The minimum amount of energy before a particle is exhausted
 29 |     epsilon : float, optional(default=0.01)
 30 |         The minimum difference between the dominance matrix variation
 31 |         before finishing the competition.
 32 |     n_iter : int, optional(default=500)
 33 |         The maximum number of steps before finishing the competition.
 34 |         The process will stop when either the convergence happens given epsilon
 35 |         or the maximum number of steps is reached
 36 | 
 37 |     Attributes
 38 |     ----------
 39 |     clusters_ : {ndarray, pandas series}, shape (n_samples, 1)
 40 |         The cluster of each sample
 41 | 
 42 |     Examples
 43 |     --------
 44 |     >>> from sklearn.datasets import load_iris
 45 |     >>> from sknet.network_construction import KNNConstructor
 46 |     >>> from sknet.unsupervised import StochasticParticleCompetition
 47 |     >>> X, y = load_iris(return_X_y = True)
 48 |     >>> knn_c = KNNConstructor(k=5, sep_comp=False)
 49 |     >>> SCP = StochasticParticleCompetition()
 50 |     >>> SCP.fit(X, y, constructor=knn_c)
 51 |     >>> SCP.clusters_
 52 |     array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 53 |        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
 54 |        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
 55 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 56 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 57 |        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2.,
 58 |        2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
 59 |        1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
 60 |        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])
 61 | 
 62 |     References
 63 |     ----------
 64 |     T. C. Silva and L. Zhao, "Stochastic Competitive Learning in Complex
 65 |     Networks," in IEEE Transactions on Neural Networks and Learning
 66 |     Systems, vol. 23, no. 3, pp. 385-398, March 2012,
 67 |     doi: 10.1109/TNNLS.2011.2181866.
 68 | 
 69 |     Silva, Thiago & Zhao, Liang. (2016). Machine Learning in Complex
 70 |     Networks. 10.1007/978-3-319-17290-3.
 71 | 
 72 |     """
 73 |     _estimator_type = 'clusterer'
 74 | 
 75 |     def __init__(self, K=3, lambda_=0.5, delta=0.1,
 76 |                  omega_max=10, omega_min=1, epsilon=0.01, n_iter=500,
 77 |                  random_state=None):
 78 |         self.K = K
 79 |         self.lambda_ = lambda_
 80 |         self.delta = delta
 81 |         self.epsilon = epsilon
 82 |         self.omega_max = omega_max
 83 |         self.omega_min = omega_min
 84 |         self.n_iter = n_iter
 85 |         self.random_state = random_state
 86 |         np.random.seed(self.random_state)  # Arrumar
 87 | 
 88 |     def set_params(self, **parameters):
 89 |         for parameter, value in parameters.items():
 90 |             setattr(self, parameter, value)
 91 |         return self
 92 | 
 93 |     def get_params(self, deep=True):
 94 |         return {'K': self.K, 'lambda_': self.lambda_, 'delta': self.delta,
 95 |                 'omega_max': self.omega_max, 'omega_min': self.omega_min,
 96 |                 'n_iter': self.n_iter, 'random_state': self.random_state}
 97 | 
 98 |     def fit(self, X=None, y=None, G=None,
 99 |             constructor=KNNConstructor(5, sep_comp=False)):
100 |         """Fit the algorithms by using the particle competition
101 |         to cluster the data points
102 | 
103 |         Parameters
104 |         ----------
105 |         X : {array-like, pandas dataframe} of shape
106 |             (n_samples, n_features), optional (default=None)
107 |             The input data samples. Can be None if G is set.
108 |         y : {ndarray, pandas series}, shape (n_samples,) or
109 |             (n_samples, n_classes), optional (default=None)
110 |             The target classes. Ignored for this class, used only
111 |             to keep API consistency
112 |         G : NetworkX Network, optional (default=None)
113 |             The network to have its communities detected. Can be
114 |             None if X is not None in which case the constructor
115 |             will be used to generate the network.
116 |         constructor : BaseConstructor inhrerited class, optional(
117 |             default=KNNConstructor(5, sep_comp=False))
118 |             A constructor class to transform the tabular data into a
119 |             network. It can be set to None if a complex network is directly
120 |             passed to the ``fit`` method. Notice that you should use 'sep_com'
121 |             as False on the constructor.
122 | 
123 |         """
124 |         self.constructor = constructor
125 |         if X is None and G is None:
126 |             raise Exception('X or G must be defined')
127 | 
128 |         if X is None and G is not None:
129 |             self.G = G
130 |         else:
131 |             self.G = self.constructor.fit_transform(X, y)
132 | 
133 |         A = nx.to_numpy_array(self.G)
134 |         self.V = A.shape[0]
135 | 
136 |         P_pref = np.zeros((self.V, self.V, self.K))
137 | 
138 |         P_rean = np.zeros((self.V, self.V, self.K))
139 | 
140 |         P_rand = self._create_p_rand(A)
141 | 
142 |         # Set the initial random position of the particles
143 |         node_list = np.array(list(self.G))
144 |         p = np.random.choice(node_list, self.K, False)
145 | 
146 |         # Calculate initial N
147 |         N = self._calculate_initial_N(p)
148 | 
149 |         N_bar = self._calculate_initial_N_bar(N)
150 | 
151 |         # Calculate initial E
152 |         initial_energy = self.omega_min + (
153 |             (self.omega_max - self.omega_min) / self.K
154 |         )
155 |         E = np.array([initial_energy] * self.K)
156 | 
157 |         # Calculate initial S
158 |         S = np.zeros(self.K)
159 | 
160 |         P_tran = np.zeros((self.V, self.V, self.K))
161 | 
162 |         convergence = False
163 |         t = 0
164 |         while not convergence and t < self.n_iter:
165 | 
166 |             # Updates the movement matrices
167 |             P_pref = self._calculate_P_pref(A, N_bar)
168 | 
169 |             P_rean = self._calculate_P_rean(N_bar)
170 | 
171 |             P_tran = self._calculate_P_tran(P_rand,
172 |                                             P_pref, P_rean, S, -1)
173 | 
174 |             p = self._choose_next_vertices(P_tran, p)
175 | 
176 |             N = self._update_N(p, N)
177 |             old_N_Bar = N_bar.copy()
178 |             N_bar = self._update_N_bar(N)
179 |             E = self._update_E(E, N_bar, p)
180 |             S = self._update_S(E)
181 | 
182 |             # Update time and verify convergence
183 |             t += 1
184 |             convergence = self._verify_convergence(N_bar, old_N_Bar)
185 | 
186 |         self.clusters_ = np.argmax(N_bar, axis=1)
187 | 
188 |         return self
189 | 
190 |     def predict(self, X=None, G=None):
191 |         """
192 |         Returns the clusters after the model was fitted.
193 | 
194 |         Parameters
195 |         ----------
196 | 
197 |         X : {array-like, pandas dataframe} of shape
198 |             (n_samples, n_features), optional (default=None)
199 |             Ignored on this method
200 |         G : NetworkX Network, optional (default=None)
201 |             Ignored on this method
202 |         """
203 |         return self.clusters_
204 | 
205 |     def fit_predict(self, X=None, y=None, G=None):
206 |         """Fit the algorithms by using the particle competition
207 |         to cluster the data points
208 | 
209 |         Parameters
210 |         ----------
211 |         X : {array-like, pandas dataframe} of shape
212 |             (n_samples, n_features), optional (default=None)
213 |             The input data samples. Can be None if G is set.
214 |         y : {ndarray, pandas series}, shape (n_samples,) or
215 |             (n_samples, n_classes), optional (default=None)
216 |             The target classes. Ignored for this class, used only
217 |             to keep API consistency
218 |         G : NetworkX Network, optional (default=None)
219 |             The network to have its communities detected. Can be
220 |             None if X is not None in which case the constructor
221 |             will be used to generate the network.
222 | 
223 |         Returns
224 |         -------
225 |         clusters_ : {array-like} of shape (n_samples)
226 |                    The cluster of each data point
227 | 
228 |         """
229 |         self.fit(X, y, G)
230 |         return self.predict()
231 | 
232 |     def _verify_convergence(self, N_bar, old_N_bar):
233 |         diff = np.sum(np.abs(N_bar - old_N_bar))
234 |         return diff < self.epsilon
235 | 
236 |     def _create_p_rand(self, A):
237 |         P_rand = A / A.sum(axis=1, keepdims=True)
238 |         return P_rand
239 | 
240 |     def _calculate_initial_N(self, p):
241 |         N = np.ones((self.V, self.K))
242 |         for k, i in enumerate(p):
243 |             N[int(i)][k] = 2
244 |         return N
245 | 
246 |     def _calculate_initial_N_bar(self, N):
247 |         N_bar = N/N.sum(axis=1, keepdims=True)
248 |         return N_bar
249 | 
250 |     def _calculate_P_pref(self, A, N_bar):
251 |         aux = np.zeros((self.V, self.V, self.K))
252 | 
253 |         num = [[[A[i, j] * N_bar[j, k] for k in range(self.K)
254 |                  ] for j in range(self.V)] for i in range(self.V)]
255 |         den = [[[np.sum([
256 |             A[i, l_]*N_bar[l_, k] for l_ in range(self.V)
257 |             ]) for k in range(self.K)] for j in range(self.V)
258 |             ] for i in range(self.V)]
259 |         aux[:, :, :] = np.divide(np.array(num), np.array(den))
260 | 
261 |         return aux
262 | 
263 |     def _calculate_P_rean(self, N_bar):
264 |         aux = np.zeros((self.V, self.V, self.K))
265 | 
266 |         for k in range(self.K):
267 |             den = np.sum(
268 |                     [np.argmax(N_bar[u, :]) == k for u in range(self.V)]
269 |                 )
270 |             for j in range(self.V):
271 |                 num = 0
272 |                 if np.argmax(N_bar[j, :]) == k:
273 |                     num = 1
274 | 
275 |                 aux[:, j, k] = [num/den for i in range(self.V)]
276 | 
277 |         return aux
278 | 
279 |     def _calculate_P_tran(self, P_rand, P_pref, P_rean, S, t):
280 |         aux = np.zeros((self.V, self.V, self.K))
281 |         for k in range(self.K):
282 | 
283 |             non_exhausted = (
284 |                 1 - S[k]) * (
285 |                     self.lambda_ * P_pref[:, :, k] + (
286 |                         1 - self.lambda_) * P_rand
287 |                     )
288 | 
289 |             exhausted = S[k] * P_rean[:, :, k]
290 |             aux[:, :, k] = non_exhausted + exhausted
291 | 
292 |         return aux
293 | 
294 |     def _choose_next_vertices(self, P_tran, p):
295 |         aux = np.zeros(self.K)
296 |         for k in range(self.K):
297 |             aux[k] = np.random.choice(
298 |                 [i for i in range(self.V)],
299 |                 p=P_tran[int(p[k]), :, k]
300 |             )
301 | 
302 |         return aux
303 | 
304 |     def _update_N(self, p, N):
305 |         aux = N.copy()
306 |         for k, i in enumerate(p):
307 |             aux[int(i), k] = aux[int(i), k] + 1
308 | 
309 |         return aux
310 | 
311 |     def _update_N_bar(self, N):
312 |         N_bar_updated = self._calculate_initial_N_bar(N)
313 | 
314 |         return N_bar_updated
315 | 
316 |     def _update_E(self, E, N_bar, p):
317 |         aux = np.zeros(self.K)
318 |         for k in range(self.K):
319 |             if self._is_owner(k, p, N_bar):
320 |                 aux[k] = min(E[k] + self.delta, self.omega_max)
321 |             else:
322 |                 aux[k] = max(E[k] - self.delta, self.omega_min)
323 | 
324 |         return aux
325 | 
326 |     def _update_S(self, E):
327 |         aux = np.zeros(self.K)
328 |         for k in range(self.K):
329 |             if E[k] == self.omega_min:
330 |                 aux[k] = 1
331 |             else:
332 |                 aux[k] = 0
333 | 
334 |         return aux
335 | 
336 |     def _is_owner(self, k, p, N_bar):
337 |         if np.argmax(N_bar[int(p[k]), :]) == k:
338 |             return True
339 |         else:
340 |             return False
341 | 


--------------------------------------------------------------------------------
/sknet/unsupervised/tests/test_stochastic_particle_competition.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | 
 4 | from sklearn.datasets import load_iris
 5 | 
 6 | from sknet.network_construction import KNNConstructor
 7 | from sknet.unsupervised import StochasticParticleCompetition
 8 | 
 9 | 
10 | @pytest.fixture
11 | def X_y_generator():
12 | 
13 |     X, y = load_iris(return_X_y=True)
14 |     y = np.array(y, dtype='float32')
15 | 
16 |     return X, y
17 | 
18 | 
19 | @pytest.fixture
20 | def result_generator():
21 |     result = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
22 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 |               0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
27 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0,
28 |               0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
29 | 
30 |     return result
31 | 
32 | 
33 | def test_fit_X(X_y_generator, result_generator):
34 |     knn_c = KNNConstructor(k=5, sep_comp=False)
35 |     SPC = StochasticParticleCompetition(random_state=42, n_iter=3)
36 |     SPC.fit(X_y_generator[0], constructor=knn_c)
37 | 
38 |     np.testing.assert_equal(result_generator,
39 |                             np.array(SPC.clusters_,
40 |                                      dtype='float32')
41 |                             )
42 | 
43 | 
44 | def test_fit_G(X_y_generator, result_generator):
45 |     knn_c = KNNConstructor(k=5, sep_comp=False)
46 |     G = knn_c.fit_transform(X_y_generator[0], X_y_generator[1])
47 |     SPC = StochasticParticleCompetition(random_state=42, n_iter=3)
48 |     SPC.fit(G=G)
49 | 
50 |     np.testing.assert_equal(result_generator,
51 |                             np.array(SPC.clusters_,
52 |                                      dtype='float32')
53 |                             )
54 | 


--------------------------------------------------------------------------------
/sknet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .network_metrics_handler import NetworkMetricsHandler
3 | from .low_level_models_handler import LowLevelModelsHandler
4 | from .network_types_handler import NetworkTypesHandler


--------------------------------------------------------------------------------
/sknet/utils/low_level_models_handler.py:
--------------------------------------------------------------------------------
 1 | from sklearn.svm import SVC, SVR
 2 | from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
 3 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 4 | 
 5 | 
 6 | class LowLevelModelsHandler():
 7 |     """"""
 8 | 
 9 |     def __init__(self):
10 |         self.mapper = self._generate_models_mapper()
11 | 
12 |     def _generate_models_mapper(self):
13 |         mapper = {
14 |             'random_forest': [RandomForestClassifier, RandomForestRegressor],
15 |             'svm': [SVC, SVR],
16 |             'knn': [KNeighborsClassifier, KNeighborsRegressor],
17 |         }
18 |         return mapper
19 | 
20 |     def get_model(self, model, parameters, type_='classification'):
21 |         index = False
22 |         if type_ == 'classification':
23 |             index = 0
24 |         elif type_ == 'regression':
25 |             index = 1
26 | 
27 |         low_level_model = self.mapper[model][index]
28 |         low_level_model.set_params(parameters)
29 | 
30 |         return low_level_model()
31 | 


--------------------------------------------------------------------------------
/sknet/utils/network_metrics_handler.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | 
 4 | class NetworkMetricsHandler():
 5 |     """"""
 6 | 
 7 |     def __init__(self):
 8 |         self.mapper = self._generate_metrics_mapper()
 9 |         self.default_values = self._default_values_mapper()
10 | 
11 |     def _generate_metrics_mapper(self):
12 |         mapper = {
13 |             'assortativity': nx.degree_assortativity_coefficient,
14 |             'clustering_coefficient': nx.average_clustering,
15 |             'average_degree': nx.average_degree_connectivity,
16 |             'transitivity': nx.transitivity,
17 |             'connectivity': nx.average_node_connectivity,
18 |         }
19 |         return mapper
20 | 
21 |     def _default_values_mapper(self):
22 |         mapper = {
23 |             'assortativity': 2,
24 |             'clustering_coefficient': 1,
25 |             'average_degree': 0,
26 |             'transitivity': 0,
27 |             'connectivity': 0,
28 |         }
29 |         return mapper
30 | 
31 |     def get_metric(self, metric):
32 |         return self.mapper[metric]
33 | 
34 |     def get_default_value(self, metric):
35 |         return self.default_values[metric]
36 | 


--------------------------------------------------------------------------------
/sknet/utils/network_types_handler.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | 
 4 | class NetworkTypesHandler():
 5 |     """"""
 6 | 
 7 |     def __init__(self):
 8 |         self.mapper = self._generate_types_mapper()
 9 | 
10 |     def _generate_types_mapper(self):
11 |         mapper = {
12 |             'graph': nx.Graph,
13 |             'digraph': nx.DiGraph,
14 |             'multi_graph': nx.MultiGraph,
15 |             'multi_digraph': nx.MultiDiGraph,
16 |         }
17 |         return mapper
18 | 
19 |     def get_net(self, metric):
20 |         return self.mapper[metric]
21 | 


--------------------------------------------------------------------------------
/sknet/utils/tests/test_network_types_handler.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | from sknet.utils import NetworkTypesHandler
 4 | 
 5 | 
 6 | def test_handler_mapper():
 7 |     handler = NetworkTypesHandler()
 8 |     assert handler.mapper == {'graph': nx.Graph,
 9 |                               'digraph': nx.DiGraph,
10 |                               'multi_graph': nx.MultiGraph,
11 |                               'multi_digraph': nx.MultiDiGraph}
12 | 
13 | 
14 | def test_get_net():
15 |     handler = NetworkTypesHandler()
16 |     assert handler.get_net('graph') == nx.Graph
17 |     assert handler.get_net('digraph') == nx.DiGraph
18 |     assert handler.get_net('multi_graph') == nx.MultiGraph
19 |     assert handler.get_net('multi_digraph') == nx.MultiDiGraph
20 | 


--------------------------------------------------------------------------------
/templates/issue.md:
--------------------------------------------------------------------------------
 1 | ## Issue type
 2 | [Bug Fix, Performance Improvement, Documentation, New Feature/Algorithm, Code Design Improvement]
 3 | 
 4 | ## Issue description
 5 | Brief description of the issue
 6 | 
 7 | ## Replicable code
 8 | If it is a bug, minimal replicable code
 9 | 
10 | ## Traceback and Versioning
11 | In case it is a bug, post full version of dependencies and possible tracebacks
12 | 
13 | ## Possible solutions and/or where to start
14 | If you already have something in mind, give here an initial roadmap for the solution
15 | 
16 | ## References and other comments
17 | If you have some references or other comments, use this section


--------------------------------------------------------------------------------
/templates/pull_request.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | The title of your Pull Request
 3 | 
 4 | ## Related Issue
 5 | Please link to the issue related to your PR
 6 | 
 7 | ## Description of the problem and solution
 8 | Briefly describe the problem stated on the issue and extensively describe your
 9 | implemented solution
10 | 
11 | ## Benchmarks
12 | If you PR is aimed at improving performance, link useful benchmarks so the reviewers
13 | can assure an improvement was made
14 | 
15 | ## Dependencies
16 | If any new dependency is added, or any dependency is change, state it here
17 | 
18 | ## Breaking stuff
19 | In case your change breaks anything on the main repo, state it here
20 | 
21 | ## Code example
22 | Add a code snippet so we can use your change and test it
23 | 
24 | ## Other comments
25 | In case you have any other comment about this PR, use this section


--------------------------------------------------------------------------------