├── .circleci
    └── config.yml
├── .coveragerc
├── .gitignore
├── .pep8speaks.yml
├── .readthedocs.yml
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── benchmarks
    ├── bench_ds_performance_faiss.py
    ├── bench_knn_backbone.py
    └── bench_speed_faiss.py
├── deslib
    ├── __init__.py
    ├── base.py
    ├── dcs
    │   ├── __init__.py
    │   ├── a_posteriori.py
    │   ├── a_priori.py
    │   ├── base.py
    │   ├── lca.py
    │   ├── mcb.py
    │   ├── mla.py
    │   ├── ola.py
    │   └── rank.py
    ├── des
    │   ├── __init__.py
    │   ├── base.py
    │   ├── des_clustering.py
    │   ├── des_knn.py
    │   ├── des_mi.py
    │   ├── des_p.py
    │   ├── knop.py
    │   ├── knora_e.py
    │   ├── knora_u.py
    │   ├── meta_des.py
    │   └── probabilistic
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── deskl.py
    │   │   ├── exponential.py
    │   │   ├── logarithmic.py
    │   │   ├── minimum_difference.py
    │   │   └── rrc.py
    ├── static
    │   ├── __init__.py
    │   ├── base.py
    │   ├── oracle.py
    │   ├── single_best.py
    │   ├── stacked.py
    │   └── static_selection.py
    ├── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── dcs
    │   │   ├── __init__.py
    │   │   ├── test_a_posteriori.py
    │   │   ├── test_a_priori.py
    │   │   ├── test_base.py
    │   │   ├── test_lca.py
    │   │   ├── test_mcb.py
    │   │   ├── test_mla.py
    │   │   ├── test_ola.py
    │   │   └── test_rank.py
    │   ├── des
    │   │   ├── __init__.py
    │   │   ├── test_base.py
    │   │   ├── test_des_clustering.py
    │   │   ├── test_des_knn.py
    │   │   ├── test_des_mi.py
    │   │   ├── test_desp.py
    │   │   ├── test_knop.py
    │   │   ├── test_knorae.py
    │   │   ├── test_knorau.py
    │   │   ├── test_meta_des.py
    │   │   └── test_probabilistic.py
    │   ├── expected_values
    │   │   ├── des_clustering_proba_integration.npy
    │   │   ├── desknn_proba_integration.npy
    │   │   ├── desknn_probas_DFP.npy
    │   │   ├── desp_proba_DFP.npy
    │   │   ├── desp_proba_integration.npy
    │   │   ├── kne_knn_proba_integration.npy
    │   │   ├── kne_proba_DFP.npy
    │   │   ├── kne_proba_integration.npy
    │   │   ├── knop_proba_integration.npy
    │   │   ├── mcb_proba_DFP.npy
    │   │   ├── mcb_proba_integration.npy
    │   │   ├── ola_proba_DFP.npy
    │   │   └── ola_proba_integration.npy
    │   ├── static
    │   │   ├── __init__.py
    │   │   ├── test_oracle.py
    │   │   ├── test_single_best.py
    │   │   ├── test_stacked.py
    │   │   └── test_static_selection.py
    │   ├── test_base.py
    │   ├── test_des_integration.py
    │   ├── test_des_integration_multiclass.py
    │   ├── test_integration_DFP_IH.py
    │   ├── test_integration_dfp.py
    │   ├── test_metric.py
    │   └── util
    │   │   ├── __init__.py
    │   │   ├── test_aggregation.py
    │   │   ├── test_datasets.py
    │   │   ├── test_diversity.py
    │   │   ├── test_diversity_batch.py
    │   │   ├── test_faiss.py
    │   │   ├── test_fire.py
    │   │   ├── test_instance_hardness.py
    │   │   ├── test_knne.py
    │   │   └── test_prob_functions.py
    └── util
    │   ├── __init__.py
    │   ├── aggregation.py
    │   ├── datasets.py
    │   ├── dfp.py
    │   ├── diversity.py
    │   ├── diversity_batch.py
    │   ├── faiss_knn_wrapper.py
    │   ├── instance_hardness.py
    │   ├── knne.py
    │   └── prob_functions.py
├── docs
    ├── .gitignore
    ├── Makefile
    ├── _static
    │   └── .keep
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── modules
    │   ├── dcs
    │   │   ├── a_posteriori.rst
    │   │   ├── a_priori.rst
    │   │   ├── lca.rst
    │   │   ├── mcb.rst
    │   │   ├── mla.rst
    │   │   ├── ola.rst
    │   │   └── rank.rst
    │   ├── des
    │   │   ├── des_clustering.rst
    │   │   ├── des_p.rst
    │   │   ├── deskl.rst
    │   │   ├── desmi.rst
    │   │   ├── ds_knn.rst
    │   │   ├── exponential.rst
    │   │   ├── knop.rst
    │   │   ├── knora_e.rst
    │   │   ├── knora_u.rst
    │   │   ├── logarithmic.rst
    │   │   ├── meta_des.rst
    │   │   ├── minimum_difference.rst
    │   │   ├── probabilistic.rst
    │   │   └── rrc.rst
    │   ├── static
    │   │   ├── oracle.rst
    │   │   ├── single_best.rst
    │   │   ├── stacked.rst
    │   │   └── static_selection.rst
    │   └── util
    │   │   ├── aggregation.rst
    │   │   ├── datasets.rst
    │   │   ├── dfp.rst
    │   │   ├── diversity.rst
    │   │   ├── faiss_knn_wrapper.rst
    │   │   ├── instance_hardness.rst
    │   │   ├── knne.rst
    │   │   └── prob_functions.rst
    ├── news.rst
    ├── news
    │   ├── v0.1.rst
    │   ├── v0.2.rst
    │   ├── v0.3.5.rst
    │   └── v0.3.rst
    ├── user_guide.rst
    └── user_guide
    │   ├── development.rst
    │   ├── installation.rst
    │   ├── known_issues.rst
    │   ├── packaging.rst
    │   └── tutorial.rst
├── examples
    ├── README.txt
    ├── example_calibrating_classifiers.py
    ├── example_heterogeneous.py
    ├── plot_comparing_dynamic_static.py
    ├── plot_example_DFP.py
    ├── plot_example_P2.py
    ├── plot_influence_k_value.py
    ├── plot_random_forest.py
    ├── plot_using_instance_hardness.py
    ├── plot_xor_example.py
    └── simple_example.py
├── requirements-dev.txt
├── requirements.txt
└── setup.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   python3:
 8 |     docker:
 9 |       # specify the version you desire here
10 |       - image: circleci/python:3.9
11 |       # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
12 |     environment:
13 |       - USERNAME: "Menelau"
14 |       - DOC_REPO: "DESlib"
15 |       - DOC_URL: ""
16 |       - EMAIL: "rafaelmenelau@gmail.com"
17 |       - MINICONDA_PATH: ~/miniconda
18 |       - CONDA_ENV_NAME: testenv
19 |       - PYTHON_VERSION: 3
20 | 
21 |       # Specify service dependencies here if necessary
22 |       # CircleCI maintains a library of pre-built images
23 |       # documented at https://circleci.com/docs/2.0/circleci-images/
24 |       # - image: circleci/postgres:9.4
25 | 
26 |     working_directory: ~/repo
27 | 
28 |     steps:
29 |       - checkout
30 |       - run:
31 |           no_output_timeout: 30m
32 |           command: |
33 |             wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
34 |             chmod +x miniconda.sh && ./miniconda.sh -b -p ~/miniconda
35 |             export PATH="~/miniconda/bin:$PATH"
36 |             conda update --yes --quiet conda
37 |             conda create -n testenv --yes --quiet python=3.9
38 |             source activate testenv
39 |             conda install --yes pip numpy
40 |             pip install -r requirements-dev.txt
41 |             pip install .
42 |             cd docs
43 |             make html
44 |       - store_artifacts:
45 |           path: docs/_build/html
46 |           destination: docs
47 |       - store_artifacts:
48 |           path: ~/log.txt
49 |       - persist_to_workspace:
50 |           root: docs/_build/html
51 |           paths: .
52 |       - attach_workspace:
53 |           at: docs/_build/html
54 |       - run: ls -ltrh docs/_build/html
55 |     filters:
56 |       branches:
57 |         ignore: gh-pages
58 | 
59 | workflows:
60 |   version: 2
61 |   build-doc-and-deploy:
62 |     jobs:
63 |       - python3
64 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source = deslib
 4 | include = */deslib/*
 5 | omit =
 6 |     */setup.py
 7 |     deslib/tests/*
 8 | 
 9 | [report]
10 | exclude_lines =
11 |     if self.debug:
12 |     pragma: no cover
13 |     raise NotImplementedError
14 |     if __name__ == .__main__.:
15 | ignore_errors = True
16 | show_missing = True
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Sphinx documentation
 59 | docs/_build/
 60 | 
 61 | # IPython
 62 | profile_default/
 63 | ipython_config.py
 64 | 
 65 | # pyenv
 66 | #   For a library or package, you might want to ignore these files since the code is
 67 | #   intended to run in multiple environments; otherwise, check them in:
 68 | .python-version
 69 | 
 70 | # pipenv
 71 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 72 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 73 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 74 | #   install all needed dependencies.
 75 | Pipfile.lock
 76 | 
 77 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 78 | __pypackages__/
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | dev_env/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | .dmypy.json
101 | dmypy.json
102 | 
103 | # Pyre type checker
104 | .pyre/
105 | 
106 | # pytype static type analyzer
107 | .pytype/
108 | 
109 | # Cython debug symbols
110 | cython_debug/
111 | 
112 | 
113 | ## vscode
114 | 
115 | .vscode/


--------------------------------------------------------------------------------
/.pep8speaks.yml:
--------------------------------------------------------------------------------
 1 | # File : .pep8speaks.yml
 2 | 
 3 | scanner:
 4 |     diff_only: True  # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
 5 | 
 6 | no_blank_comment: True  # If True, no comment is made on PR without any errors.
 7 | descending_issues_order: False  # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
 8 | only_mention_files_with_errors: True  # If False, a separate status comment for each file is made.
 9 | 
10 | message:
11 |     opened:
12 |         header: "Hello @{name}! Thanks for opening this PR. "
13 |         footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
14 |     updated:
15 |         header: "Hello @{name}! Thanks for updating this PR. "
16 |         footer: ""  # Why to comment the link to the style guide everytime? :)
17 |     no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | 
3 | build:
4 |   image: latest
5 | 
6 | python:
7 |   version: 3.6
8 |   setup_py_install: true


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing to DESlib
 2 | ========================
 3 | 
 4 | You can contribute to the project in several ways:
 5 | 
 6 | - Reporting bugs
 7 | - Requesting features
 8 | - Improving the documentation
 9 | - Adding examples to use the library
10 | - Implementing new features and fixing bugs
11 | 
12 | Reporting Bugs and requesting features:
13 | ---------------------------------------
14 | 
15 | We use Github issues to track all bugs and feature requests; feel free to
16 | open an issue if you have found a bug or wish to see a new feature implemented.
17 | Before opening a new issue, please check if the issue is not being currently addressed:
18 | [Issues](https://github.com/Menelau/DESlib/issues)
19 | 
20 | For reporting bugs:
21 | 
22 | -  Include information of your working environment. This information
23 |    can be found by running the following code snippet:
24 | 
25 |    ```python
26 |    import platform; print(platform.platform())
27 |    import sys; print("Python", sys.version)
28 |    import numpy; print("NumPy", numpy.__version__)
29 |    import scipy; print("SciPy", scipy.__version__)
30 |    import sklearn; print("Scikit-Learn", sklearn.__version__)
31 |    ```
32 | 
33 | -  Include a [reproducible](https://stackoverflow.com/help/mcve) code snippet
34 |    or link to a [gist](https://gist.github.com). If an exception is raised,
35 |    please provide the traceback.
36 | 
37 | Documentation:
38 | --------------
39 | 
40 | We are glad to accept any sort of documentation: function docstrings,
41 | reStructuredText documents (like this one), tutorials, etc.
42 | reStructuredText documents live in the source code repository under the
43 | doc/ directory.
44 | 
45 | You can edit the documentation using any text editor and then generate
46 | the HTML output by typing ``make html`` from the doc/ directory.
47 | Alternatively, ``make`` can be used to quickly generate the
48 | documentation without the example gallery. The resulting HTML files will
49 | be placed in _build/html/ and are viewable in a web browser. See the
50 | README file in the doc/ directory for more information.
51 | 
52 | For building the documentation, you will need to install sphinx and sphinx_rtd_theme. This
53 | can be easily done by installing the requirements for development using the following command:
54 | 
55 | pip install -r requirements-dev.txt
56 | 
57 | Contributing with code:
58 | -----------------------
59 | 
60 | The preferred way to contribute is to fork the main repository to your account:
61 | 
62 | 1. Fork the [project repository](https://github.com/Menelau/DESlib):
63 |    click on the 'Fork' button near the top of the page. This creates
64 |    a copy of the code under your account on the GitHub server.
65 | 
66 | 2. Clone this copy to your local disk:
67 | 
68 |         $ git clone git@github.com:YourLogin/DESlib.git
69 |         $ cd DESlib
70 | 
71 | 3. Install all requirements for development:
72 | 
73 |         $ pip install -r requirements-dev.txt
74 |         $ pip install --editable .
75 | 
76 | 4. Create a branch to hold your changes:
77 | 
78 |         $ git checkout -b branch_name
79 | 
80 | Where ``branch_name`` is the new feature or bug to be fixed. Do not work directly on the ``master`` branch.
81 | 
82 | 5. Work on this copy on your computer using Git to do the version
83 |    control. To record your changes in Git, then push them to GitHub with:
84 | 
85 |         $ git push -u origin branch_name
86 | 
87 | It is important to assert your code is well covered by test routines (coverage of at least 90%), well documented and
88 | follows PEP8 guidelines.
89 | 
90 | 6. Create a 'Pull request' to send your changes for review.
91 | 
92 |    If your pull request addresses an issue, please use the title to describe
93 |    the issue and mention the issue number in the pull request description to
94 |    ensure a link is created to the original issue.
95 | 
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2018 Rafael Menelau Oliveira e Cruz
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 
13 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.rst
 2 | include CONTRIBUTING.md
 3 | include LICENSE.txt
 4 | include requirements.txt
 5 | include requirements-dev.txt
 6 | 
 7 | recursive-include examples *.py
 8 | recursive-include docs *.rst conf.py *.css Makefile make.bat
 9 | 
10 | 


--------------------------------------------------------------------------------
/benchmarks/bench_ds_performance_faiss.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import os
 3 | import shutil
 4 | import threading
 5 | import time
 6 | import urllib.request
 7 | 
 8 | import pandas as pd
 9 | from sklearn.model_selection import train_test_split
10 | 
11 | from deslib.des.knora_e import KNORAE
12 | 
13 | 
14 | def sk_KNORAE_knn(XTrain, YTrain, k, XTest, YTest):
15 |     start = time.clock()
16 |     knorae_sk = KNORAE(k=k, knn_classifier='knn')
17 |     knorae_sk.fit(XTrain, YTrain)
18 |     score = knorae_sk.score(XTest, YTest)
19 |     print("sklearn_knn_knorae run_time: {}".format(time.clock() - start))
20 |     print("sklearn_knn_knorae score: {}".format(score))
21 | 
22 | 
23 | def faiss_KNORAE_knn(XTrain, YTrain, k, XTest, YTest):
24 |     start = time.clock()
25 |     knorae_sk = KNORAE(k=k, knn_classifier='faiss')
26 |     knorae_sk.fit(XTrain, YTrain)
27 |     score = knorae_sk.score(XTest, YTest)
28 |     print("faiss_knn_knorae run_time: {}".format(time.clock() - start))
29 |     print("faiss_knn_knorae score: {}".format(score))
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" \
34 |           "00280/HIGGS.csv.gz"
35 |     if not os.path.exists("../../HIGGS.csv"):
36 |         print("Downloading HIGGS dataset from {}".format(url))
37 |         if not os.path.exists("../../HIGGS.gz"):
38 |             filedata = urllib.request.urlopen(url)
39 |             data2write = filedata.read()
40 |             with open('../../HIGGS.gz', 'wb') as f:
41 |                 f.write(data2write)
42 |         print("Finished downloading")
43 |         print("Extracting HIGGS.gz")
44 |         if not os.path.exists("../../HIGGS.csv"):
45 |             with gzip.open('../../HIGGS.gz', 'rb') as f:
46 |                 with open('../../HIGGS.csv', 'wb') as csv_out:
47 |                     shutil.copyfileobj(f, csv_out)
48 |         print("Extracted csv")
49 | 
50 |     df = pd.read_csv('../../HIGGS.csv', header=None)
51 |     data = df.values
52 |     X = data[:, 1:]
53 |     Y = data[:, 0]
54 | 
55 |     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33)
56 |     num_samples_list = [1000000]
57 |     num_of_k_list = [2, 5, 7, 10]
58 |     num_of_test_inputs = [100, 1000, 10000]
59 | 
60 |     for nsamples in num_samples_list:
61 |         for n_k in num_of_k_list:
62 |             for n_t in num_of_test_inputs:
63 |                 print("running experiment: num_of_train_samples: {}, "
64 |                       "num_of_k: {}, num_of_tests: {}".format(nsamples, n_k,
65 |                                                               n_t))
66 |                 faiss_KNORAE_knn(X_train[:nsamples], Y_train[:nsamples], n_k,
67 |                                  X_test[:n_t], Y_test[:n_t])
68 |                 t = threading.Thread(target=sk_KNORAE_knn, args=(
69 |                 X_train[:nsamples], Y_train[:nsamples], n_k, X_test[:n_t],
70 |                 Y_test[:n_t]))
71 | 
72 |                 t.start()
73 |                 t.join(timeout=600)
74 |                 if t.is_alive():
75 |                     print(
76 |                         "sklearn_knn, num_of_train_samples: {}, num_of_k: {}, "
77 |                         "num_of_tests: {}, run_time: timeout".format(nsamples,
78 |                                                                      n_k,
79 |                                                                      n_t))
80 | 


--------------------------------------------------------------------------------
/benchmarks/bench_knn_backbone.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from sklearn.datasets import make_classification
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.neighbors import KNeighborsClassifier
 7 | 
 8 | from deslib.util.faiss_knn_wrapper import FaissKNNClassifier
 9 | 
10 | n_samples = [1000, 10000, 100000, 1000000, 10000000]
11 | rng = 42
12 | 
13 | faiss_brute = FaissKNNClassifier(n_neighbors=7,
14 |                                  algorithm='brute')
15 | faiss_voronoi = FaissKNNClassifier(n_neighbors=7,
16 |                                    algorithm='voronoi')
17 | faiss_hierarchical = FaissKNNClassifier(n_neighbors=7,
18 |                                         algorithm='hierarchical')
19 | 
20 | all_knns = [faiss_brute, faiss_voronoi, faiss_hierarchical]
21 | names = ['faiss_brute', 'faiss_voronoi', 'faiss_hierarchical']
22 | 
23 | list_fitting_time = []
24 | list_search_time = []
25 | 
26 | for n in n_samples:
27 | 
28 |     print("Number of samples: {}" .format(n))
29 |     X, y = make_classification(n_samples=n,
30 |                                n_features=20,
31 |                                random_state=rng)
32 |     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
33 |     temp_fitting_time = []
34 |     temp_search_time = []
35 |     for name, knn in zip(names, all_knns):
36 |         start = time.clock()
37 |         knn.fit(X_train, y_train)
38 |         fitting_time = time.clock() - start
39 |         print("{} fitting time: {}" .format(name, fitting_time))
40 | 
41 |         start = time.clock()
42 |         neighbors, dists = knn.kneighbors(X_test)
43 |         search_time = time.clock() - start
44 |         print("{} neighborhood search time: {}" .format(name, search_time))
45 | 
46 |         temp_fitting_time.append(fitting_time)
47 |         temp_search_time.append(search_time)
48 | 
49 |     list_fitting_time.append(temp_fitting_time)
50 |     list_search_time.append(temp_search_time)
51 | 
52 | plt.plot(n_samples, list_search_time)
53 | plt.legend(names)
54 | plt.xlabel("Number of samples")
55 | plt.ylabel("K neighbors search time")
56 | plt.savefig('knn_backbone_benchmark.png')
57 | 


--------------------------------------------------------------------------------
/benchmarks/bench_speed_faiss.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import os
  3 | import shutil
  4 | import time
  5 | import urllib.request
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from sklearn.ensemble import BaggingClassifier
 10 | from sklearn.model_selection import train_test_split
 11 | 
 12 | from deslib.des.knora_e import KNORAE
 13 | 
 14 | 
 15 | def run_knorae(pool_classifiers, X_DSEL, y_DSEL, X_test, y_test, knn_type):
 16 |     knorae = KNORAE(pool_classifiers=pool_classifiers,
 17 |                     knn_classifier=knn_type)
 18 | 
 19 |     knorae.fit(X_DSEL, y_DSEL)
 20 | 
 21 |     start = time.clock()
 22 |     score = knorae.score(X_test, y_test)
 23 |     end = time.clock() - start
 24 | 
 25 |     return score, end
 26 | 
 27 | 
 28 | def fetch_HIGGS():
 29 |     url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" \
 30 |           "00280/HIGGS.csv.gz"
 31 |     if not os.path.exists("../../HIGGS.csv"):
 32 | 
 33 |         print("Downloading HIGGS dataset from {}".format(url))
 34 | 
 35 |         if not os.path.exists("../../HIGGS.gz"):
 36 |             filedata = urllib.request.urlopen(url)
 37 |             data2write = filedata.read()
 38 | 
 39 |             with open('../../HIGGS.gz', 'wb') as f:
 40 |                 f.write(data2write)
 41 | 
 42 |         print("Finished downloading")
 43 |         print("Extracting HIGGS.gz")
 44 | 
 45 |         if not os.path.exists("../../HIGGS.csv"):
 46 |             with gzip.open('../../HIGGS.gz', 'rb') as f:
 47 |                 with open('../../HIGGS.csv', 'wb') as csv_out:
 48 |                     shutil.copyfileobj(f, csv_out)
 49 | 
 50 |         print("Extracted csv")
 51 |     print('Reading CSV file')
 52 |     df = pd.read_csv('../../HIGGS.csv', header=None)
 53 |     data = df.values
 54 |     X = data[:, 1:]
 55 |     y = data[:, 0]
 56 | 
 57 |     return X, y
 58 | 
 59 | 
 60 | if __name__ == "__main__":
 61 |     rng = np.random.RandomState(123456)
 62 | 
 63 |     print('Preparing dataset')
 64 |     X, y = fetch_HIGGS()
 65 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
 66 |                                                         random_state=rng)
 67 | 
 68 |     X_DSEL, X_train, y_DSEL, y_train = train_test_split(X_train, y_train,
 69 |                                                         test_size=0.50,
 70 |                                                         random_state=rng)
 71 |     pool_classifiers = BaggingClassifier(n_estimators=100,
 72 |                                          random_state=rng,
 73 |                                          n_jobs=-1)
 74 | 
 75 |     print('Fitting base classifiers...')
 76 |     pool_classifiers.fit(X_train, y_train)
 77 | 
 78 |     n_samples = 1000000
 79 |     num_of_test_inputs = [100, 1000, 10000]
 80 | 
 81 |     for n_t in num_of_test_inputs:
 82 |         print("running experiment: num_of_DSEL_samples: {}, "
 83 |               "num_of_tests: {}".format(y_DSEL.size, n_t))
 84 | 
 85 |         score_sklearn, time_sklearn = run_knorae(pool_classifiers,
 86 |                                                  X_DSEL[:n_samples],
 87 |                                                  y_DSEL[:n_samples],
 88 |                                                  X_test[:n_t],
 89 |                                                  y_test[:n_t],
 90 |                                                  knn_type='knn')
 91 | 
 92 |         print("sklearn_knorae score = {}, time = {}".format(score_sklearn,
 93 |                                                             time_sklearn))
 94 | 
 95 |         score_faiss, time_faiss = run_knorae(pool_classifiers,
 96 |                                              X_DSEL[:n_samples],
 97 |                                              y_DSEL[:n_samples],
 98 |                                              X_test[:n_t],
 99 |                                              y_test[:n_t],
100 |                                              knn_type='faiss')
101 | 
102 |         print("faiss_knorae score = {}, time = {}".format(score_faiss,
103 |                                                           time_faiss))
104 | 


--------------------------------------------------------------------------------
/deslib/__init__.py:
--------------------------------------------------------------------------------
 1 | """A Python library for Dynamic Ensemble Selection.
 2 | 
 3 | ``DESlib`` is a library containing the implementation of the state-of-the art
 4 | dynamic classifier and ensemble selection techniques. The library also provides
 5 | some static ensemble methods that are used as baseline comparison.
 6 | 
 7 | Subpackages
 8 | -----------
 9 | des
10 |     The implementation of several DES techniques.
11 | 
12 | dcs
13 |     The implementation of several DCS techniques.
14 | 
15 | static
16 |     The implementation of baseline ensemble methods.
17 | 
18 | util
19 |     A collection of aggregation functions and diversity measures for ensemble
20 |     of classifiers.
21 | """
22 | 
23 | # list of all modules available in the library
24 | __all__ = ['des', 'dcs', 'static', 'util', 'tests']
25 | 
26 | __version__ = '0.3.7'
27 | 


--------------------------------------------------------------------------------
/deslib/dcs/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`deslib.dcs` provides a set of key dynamic classifier selection
 3 | algorithms (DCS).
 4 | """
 5 | 
 6 | from .a_posteriori import APosteriori
 7 | from .a_priori import APriori
 8 | from .base import BaseDCS
 9 | from .lca import LCA
10 | from .mcb import MCB
11 | from .mla import MLA
12 | from .ola import OLA
13 | from .rank import Rank
14 | 
15 | __all__ = ['BaseDCS',
16 |            'APosteriori',
17 |            'APriori',
18 |            'LCA',
19 |            'OLA',
20 |            'MLA',
21 |            'MCB',
22 |            'Rank']
23 | 


--------------------------------------------------------------------------------
/deslib/des/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`deslib.des` provides a set of key dynamic ensemble selection
 3 | algorithms (DES).
 4 | """
 5 | 
 6 | from .base import BaseDES
 7 | from .des_clustering import DESClustering
 8 | from .des_knn import DESKNN
 9 | from .des_mi import DESMI
10 | from .des_p import DESP
11 | from .knop import KNOP
12 | from .knora_e import KNORAE
13 | from .knora_u import KNORAU
14 | from .meta_des import METADES
15 | from deslib.des.probabilistic.base import BaseProbabilistic
16 | from deslib.des.probabilistic.minimum_difference import MinimumDifference
17 | from deslib.des.probabilistic.deskl import DESKL
18 | from deslib.des.probabilistic.rrc import RRC
19 | from deslib.des.probabilistic.exponential import Exponential
20 | from deslib.des.probabilistic.logarithmic import Logarithmic
21 | 
22 | __all__ = ['BaseDES',
23 |            'METADES',
24 |            'KNORAE',
25 |            'KNORAU',
26 |            'KNOP',
27 |            'DESP',
28 |            'DESKNN',
29 |            'DESClustering',
30 |            'DESMI',
31 |            'BaseProbabilistic',
32 |            'RRC',
33 |            'DESKL',
34 |            'MinimumDifference',
35 |            'Exponential',
36 |            'Logarithmic']
37 | 


--------------------------------------------------------------------------------
/deslib/des/probabilistic/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseProbabilistic
 2 | from .deskl import DESKL
 3 | from .exponential import Exponential
 4 | from .logarithmic import Logarithmic
 5 | from .minimum_difference import MinimumDifference
 6 | from .rrc import RRC
 7 | 
 8 | 
 9 | __all__ = ['BaseProbabilistic',
10 |            'DESKL',
11 |            'Exponential',
12 |            'Logarithmic',
13 |            'MinimumDifference',
14 |            'RRC']
15 | 


--------------------------------------------------------------------------------
/deslib/des/probabilistic/logarithmic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from deslib.des.probabilistic import BaseProbabilistic
  4 | from deslib.util import log_func
  5 | 
  6 | 
  7 | class Logarithmic(BaseProbabilistic):
  8 |     """ This method estimates the competence of the classifier based on
  9 |     the logarithmic difference between the supports obtained by the
 10 |     base classifier.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |      pool_classifiers : list of classifiers (Default = None)
 15 |         The generated_pool of classifiers trained for the corresponding
 16 |         classification problem. Each base classifiers should support the method
 17 |         "predict". If None, then the pool of classifiers is a bagging
 18 |         classifier.
 19 | 
 20 |     k : int (Default = 7)
 21 |         Number of neighbors used to estimate the competence of the base
 22 |         classifiers.
 23 | 
 24 |     DFP : Boolean (Default = False)
 25 |         Determines if the dynamic frienemy pruning is applied.
 26 | 
 27 |     with_IH : Boolean (Default = False)
 28 |         Whether the hardness level of the region of competence is used to
 29 |         decide between using the DS algorithm or the KNN for classification of
 30 |         a given query sample.
 31 | 
 32 |     safe_k : int (default = None)
 33 |         The size of the indecision region.
 34 | 
 35 |     IH_rate : float (default = 0.3)
 36 |         Hardness threshold. If the hardness level of the competence region is
 37 |         lower than the IH_rate the KNN classifier is used. Otherwise, the DS
 38 |         algorithm is used for classification.
 39 | 
 40 |     mode : String (Default = "selection")
 41 |            Whether the technique will perform dynamic selection,
 42 |            dynamic weighting or an hybrid approach for classification.
 43 | 
 44 |     random_state : int, RandomState instance or None, optional (default=None)
 45 |         If int, random_state is the seed used by the random number generator;
 46 |         If RandomState instance, random_state is the random number generator;
 47 |         If None, the random number generator is the RandomState instance used
 48 |         by `np.random`.
 49 | 
 50 |     knn_classifier : {'knn', 'faiss', None} (Default = 'knn')
 51 |          The algorithm used to estimate the region of competence:
 52 | 
 53 |          - 'knn' will use :class:`KNeighborsClassifier` from sklearn
 54 | 
 55 |          - 'faiss' will use Facebook's Faiss similarity search through the
 56 |            class :class:`FaissKNNClassifier`
 57 | 
 58 |          - None, will use sklearn :class:`KNeighborsClassifier`.
 59 | 
 60 |     knn_metric : {'minkowski', 'cosine', 'mahalanobis'} (Default = 'minkowski')
 61 |         The metric used by the k-NN classifier to estimate distances.
 62 | 
 63 |         - 'minkowski' will use minkowski distance.
 64 | 
 65 |         - 'cosine' will use the cosine distance.
 66 | 
 67 |         - 'mahalanobis' will use the mahalonibis distance.
 68 | 
 69 |     DSEL_perc : float (Default = 0.5)
 70 |         Percentage of the input data used to fit DSEL.
 71 |         Note: This parameter is only used if the pool of classifier is None or
 72 |         unfitted.
 73 | 
 74 |     voting : {'hard', 'soft'}, default='hard'
 75 |             If 'hard', uses predicted class labels for majority rule voting.
 76 |             Else if 'soft', predicts the class label based on the argmax of
 77 |             the sums of the predicted probabilities, which is recommended for
 78 |             an ensemble of well-calibrated classifiers.
 79 | 
 80 |     n_jobs : int, default=-1
 81 |         The number of parallel jobs to run. None means 1 unless in
 82 |         a joblib.parallel_backend context. -1 means using all processors.
 83 |         Doesn’t affect fit method.
 84 | 
 85 |     References
 86 |     ----------
 87 |     B. Antosik, M. Kurzynski, New measures of classifier competence
 88 |     – heuristics and application to the design of
 89 |     multiple classifier systems., in: Computer recognition systems
 90 |     4., 2011, pp. 197–206.
 91 | 
 92 |     T.Woloszynski, M. Kurzynski, A measure of competence based on randomized
 93 |     reference classifier for dynamic ensemble selection, in: International
 94 |     Conference on Pattern Recognition (ICPR), 2010, pp. 4194–4197.
 95 |     """
 96 | 
 97 |     def __init__(self, pool_classifiers=None, k=None, DFP=False, with_IH=False,
 98 |                  safe_k=None, IH_rate=0.30, mode='selection',
 99 |                  random_state=None, knn_classifier='knn',
100 |                  knn_metric='minkowski', DSEL_perc=0.5, n_jobs=-1,
101 |                  voting='hard'):
102 |         super(Logarithmic, self).__init__(pool_classifiers=pool_classifiers,
103 |                                           k=k,
104 |                                           DFP=DFP,
105 |                                           with_IH=with_IH,
106 |                                           safe_k=safe_k,
107 |                                           IH_rate=IH_rate,
108 |                                           mode=mode,
109 |                                           random_state=random_state,
110 |                                           knn_classifier=knn_classifier,
111 |                                           knn_metric=knn_metric,
112 |                                           DSEL_perc=DSEL_perc,
113 |                                           n_jobs=n_jobs,
114 |                                           voting=voting)
115 | 
116 |     def source_competence(self):
117 |         """The source of competence C_src at the validation point
118 |         :math:`\\mathbf{x}_{k}` is calculated by
119 |         logarithm function in the support obtained by the base classifier.
120 | 
121 |         Returns
122 |         ----------
123 |         C_src : array of shape (n_samples, n_classifiers)
124 |             The competence source for each base classifier at each data point.
125 |         """
126 |         C_src = np.zeros((self.n_samples_, self.n_classifiers_))
127 |         for clf_index in range(self.n_classifiers_):
128 |             supports = self.dsel_scores_[:, clf_index, :]
129 |             support_correct = supports[
130 |                 np.arange(self.n_samples_), self.DSEL_target_]
131 | 
132 |             C_src[:, clf_index] = log_func(self.n_classes_, support_correct)
133 | 
134 |         return C_src
135 | 


--------------------------------------------------------------------------------
/deslib/des/probabilistic/rrc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from deslib.des.probabilistic import BaseProbabilistic
  4 | from deslib.util import ccprmod
  5 | 
  6 | 
  7 | class RRC(BaseProbabilistic):
  8 |     """DES technique based on the Randomized Reference Classifier method
  9 |     (DES-RRC).
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |      pool_classifiers : list of classifiers (Default = None)
 14 |         The generated_pool of classifiers trained for the corresponding
 15 |         classification problem. Each base classifiers should support the method
 16 |         "predict". If None, then the pool of classifiers is a bagging
 17 |         classifier.
 18 | 
 19 |     k : int (Default = 7)
 20 |         Number of neighbors used to estimate the competence of the base
 21 |         classifiers.
 22 | 
 23 |     DFP : Boolean (Default = False)
 24 |         Determines if the dynamic frienemy pruning is applied.
 25 | 
 26 |     with_IH : Boolean (Default = False)
 27 |         Whether the hardness level of the region of competence is used to
 28 |         decide between using the DS algorithm or the KNN for classification of
 29 |         a given query sample.
 30 | 
 31 |     safe_k : int (default = None)
 32 |         The size of the indecision region.
 33 | 
 34 |     IH_rate : float (default = 0.3)
 35 |         Hardness threshold. If the hardness level of the competence region is
 36 |         lower than the IH_rate the KNN classifier is used. Otherwise, the DS
 37 |         algorithm is used for classification.
 38 | 
 39 |     mode : String (Default = "selection")
 40 |            Whether the technique will perform dynamic selection,
 41 |            dynamic weighting or an hybrid approach for classification.
 42 | 
 43 |     random_state : int, RandomState instance or None, optional (default=None)
 44 |         If int, random_state is the seed used by the random number generator;
 45 |         If RandomState instance, random_state is the random number generator;
 46 |         If None, the random number generator is the RandomState instance used
 47 |         by `np.random`.
 48 | 
 49 |     knn_classifier : {'knn', 'faiss', None} (Default = 'knn')
 50 |          The algorithm used to estimate the region of competence:
 51 | 
 52 |          - 'knn' will use :class:`KNeighborsClassifier` from sklearn
 53 | 
 54 |          - 'faiss' will use Facebook's Faiss similarity search through the
 55 |            class :class:`FaissKNNClassifier`
 56 | 
 57 |          - None, will use sklearn :class:`KNeighborsClassifier`.
 58 | 
 59 |     knn_metric : {'minkowski', 'cosine', 'mahalanobis'} (Default = 'minkowski')
 60 |         The metric used by the k-NN classifier to estimate distances.
 61 | 
 62 |         - 'minkowski' will use minkowski distance.
 63 | 
 64 |         - 'cosine' will use the cosine distance.
 65 | 
 66 |         - 'mahalanobis' will use the mahalonibis distance.
 67 | 
 68 |     DSEL_perc : float (Default = 0.5)
 69 |         Percentage of the input data used to fit DSEL.
 70 |         Note: This parameter is only used if the pool of classifier is None or
 71 |         unfitted.
 72 | 
 73 |     voting : {'hard', 'soft'}, default='hard'
 74 |             If 'hard', uses predicted class labels for majority rule voting.
 75 |             Else if 'soft', predicts the class label based on the argmax of
 76 |             the sums of the predicted probabilities, which is recommended for
 77 |             an ensemble of well-calibrated classifiers.
 78 | 
 79 |     n_jobs : int, default=-1
 80 |         The number of parallel jobs to run. None means 1 unless in
 81 |         a joblib.parallel_backend context. -1 means using all processors.
 82 |         Doesn’t affect fit method.
 83 | 
 84 |     References
 85 |     ----------
 86 |     Woloszynski, Tomasz, and Marek Kurzynski. "A probabilistic model of
 87 |     classifier competence for dynamic ensemble selection." Pattern Recognition
 88 |     44.10 (2011): 2656-2668.
 89 | 
 90 |     R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier
 91 |     selection: Recent advances and perspectives,”
 92 |     Information Fusion, vol. 41, pp. 195 – 216, 2018.
 93 | 
 94 |     """
 95 | 
 96 |     def __init__(self, pool_classifiers=None, k=None, DFP=False, with_IH=False,
 97 |                  safe_k=None, IH_rate=0.30, mode='selection',
 98 |                  random_state=None, knn_classifier='knn',
 99 |                  knn_metric='minkowski', DSEL_perc=0.5, n_jobs=-1,
100 |                  voting='hard'):
101 | 
102 |         super(RRC, self).__init__(pool_classifiers=pool_classifiers,
103 |                                   k=k,
104 |                                   DFP=DFP,
105 |                                   with_IH=with_IH,
106 |                                   safe_k=safe_k,
107 |                                   IH_rate=IH_rate,
108 |                                   mode=mode,
109 |                                   random_state=random_state,
110 |                                   knn_classifier=knn_classifier,
111 |                                   knn_metric=knn_metric,
112 |                                   DSEL_perc=DSEL_perc,
113 |                                   n_jobs=n_jobs,
114 |                                   voting=voting)
115 | 
116 |         self.selection_threshold = None
117 | 
118 |     def source_competence(self):
119 |         """
120 |         Calculates the source of competence using the randomized reference
121 |         classifier (RRC) method.
122 | 
123 |         The source of competence C_src at the validation point
124 |         :math:`\\mathbf{x}_{k}` calculated using the probabilistic model
125 |         based on the supports obtained by the base classifier and
126 |         randomized reference classifier (RRC) model. The probabilistic
127 |         modeling of the classifier competence is calculated using
128 |         the ccprmod function.
129 | 
130 |         Returns
131 |         ----------
132 |         C_src : array of shape (n_samples, n_classifiers)
133 |             The competence source for each base classifier at each data point.
134 |         """
135 |         c_src = np.zeros((self.n_samples_, self.n_classifiers_))
136 | 
137 |         for clf_index in range(self.n_classifiers_):
138 |             # Get supports for all samples in DSEL
139 |             supports = self.dsel_scores_[:, clf_index, :]
140 |             c_src[:, clf_index] = ccprmod(supports, self.DSEL_target_)
141 | 
142 |         return c_src
143 | 


--------------------------------------------------------------------------------
/deslib/static/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`deslib.static` provides a set of static ensemble methods which are
 3 | often used as a baseline to compare the performance of dynamic selection
 4 | algorithms.
 5 | """
 6 | 
 7 | from .oracle import Oracle
 8 | from .single_best import SingleBest
 9 | from .static_selection import StaticSelection
10 | from .stacked import StackedClassifier
11 | 
12 | __all__ = ['Oracle',
13 |            'SingleBest',
14 |            'StaticSelection',
15 |            'StackedClassifier']
16 | 


--------------------------------------------------------------------------------
/deslib/static/base.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | from abc import abstractmethod, ABCMeta
  4 | 
  5 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  6 | #
  7 | # License: BSD 3 clause
  8 | import numpy as np
  9 | from sklearn.base import BaseEstimator, ClassifierMixin
 10 | from sklearn.ensemble import BaseEnsemble, BaggingClassifier
 11 | from sklearn.preprocessing import LabelEncoder
 12 | from sklearn.utils.validation import check_random_state
 13 | 
 14 | 
 15 | class BaseStaticEnsemble(BaseEstimator, ClassifierMixin):
 16 |     """Base class for static ensembles.
 17 | 
 18 |     All static ensemble techniques should inherit from this class.
 19 | 
 20 |     Warning: This class should not be instantiated directly, use derived
 21 |     classes instead.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     pool_classifiers : list of classifiers (Default = None)
 26 |         The generated_pool of classifiers trained for the corresponding
 27 |         classification problem. Each base classifiers should support the method
 28 |         "predict". If None, then the pool of classifiers is a bagging
 29 |         classifier.
 30 | 
 31 |     random_state : int, RandomState instance or None, optional (default=None)
 32 |         If int, random_state is the seed used by the random number generator;
 33 |         If RandomState instance, random_state is the random number generator;
 34 |         If None, the random number generator is the RandomState instance used
 35 |         by `np.random`.
 36 | 
 37 |     n_jobs : int, default=-1
 38 |         The number of parallel jobs to run. None means 1 unless in
 39 |         a joblib.parallel_backend context. -1 means using all processors.
 40 |         Doesn’t affect fit method.
 41 | 
 42 |     References
 43 |     ----------
 44 |     Kuncheva, Ludmila I. Combining pattern classifiers: methods and algorithms.
 45 |     John Wiley & Sons, 2004.
 46 | 
 47 |     R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier
 48 |     selection: Recent advances and perspectives,”
 49 |     Information Fusion, vol. 41, pp. 195 – 216, 2018.
 50 | 
 51 |     """
 52 |     __metaclass__ = ABCMeta
 53 | 
 54 |     @abstractmethod
 55 |     def __init__(self, pool_classifiers=None, random_state=None, n_jobs=-1):
 56 |         self.pool_classifiers = pool_classifiers
 57 |         self.random_state = random_state
 58 |         self.n_jobs = n_jobs
 59 | 
 60 |     def fit(self, X, y):
 61 |         """Fit the model according to the given training data.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         X : array of shape (n_samples, n_features)
 66 |             Data used to fit the model.
 67 | 
 68 |         y : array of shape (n_samples)
 69 |             class labels of each example in X.
 70 | 
 71 |         Returns
 72 |         -------
 73 |         self : object
 74 |             Returns self.
 75 |         """
 76 |         self.random_state_ = check_random_state(self.random_state)
 77 | 
 78 |         # Check if the pool of classifiers is None. If yes, use a
 79 |         # BaggingClassifier for the pool.
 80 |         if self.pool_classifiers is None:
 81 |             self.pool_classifiers_ = BaggingClassifier(
 82 |                 random_state=self.random_state_, n_jobs=self.n_jobs)
 83 |             self.pool_classifiers_.fit(X, y)
 84 | 
 85 |         else:
 86 |             self.pool_classifiers_ = self.pool_classifiers
 87 | 
 88 |         self.n_classifiers_ = len(self.pool_classifiers_)
 89 |         # allow base models with feature subspaces.
 90 |         if hasattr(self.pool_classifiers_, "estimators_features_"):
 91 |             self.estimator_features_ = \
 92 |                 np.array(self.pool_classifiers_.estimators_features_)
 93 |         else:
 94 |             indices = np.arange(X.shape[1])
 95 |             self.estimator_features_ = np.tile(indices,
 96 |                                                (self.n_classifiers_, 1))
 97 | 
 98 |         self._validate_pool()
 99 |         # dealing with label encoder
100 |         self._check_label_encoder()
101 |         self.y_enc_ = self._setup_label_encoder(y)
102 |         self.n_classes_ = self.classes_.size
103 |         self.n_features_ = X.shape[1]
104 | 
105 |         return self
106 | 
107 |     def _check_label_encoder(self):
108 |         # Check if base classifiers are not using LabelEncoder (the case for
109 |         # scikit-learn's ensembles):
110 |         if isinstance(self.pool_classifiers_, BaseEnsemble):
111 |             if np.array_equal(self.pool_classifiers_.classes_,
112 |                               self.pool_classifiers_[0].classes_):
113 |                 self.base_already_encoded_ = False
114 |             else:
115 |                 self.base_already_encoded_ = True
116 |         else:
117 |             self.base_already_encoded_ = False
118 | 
119 |     def _setup_label_encoder(self, y):
120 |         """
121 |         Setup the label encoder
122 |         """
123 |         self.enc_ = LabelEncoder()
124 |         y_ind = self.enc_.fit_transform(y)
125 |         self.classes_ = self.enc_.classes_
126 | 
127 |         return y_ind
128 | 
129 |     def _encode_base_labels(self, y):
130 |         if self.base_already_encoded_:
131 |             return y
132 |         else:
133 |             return self.enc_.transform(y)
134 | 
135 |     def _validate_pool(self):
136 |         """ Check the estimator and the n_estimator attribute, set the
137 |         `base_estimator_` attribute.
138 | 
139 |         Raises
140 |         -------
141 |         ValueError
142 |             If the pool of classifiers is empty or just a single model.
143 |         """
144 |         if self.n_classifiers_ <= 1:
145 |             raise ValueError("n_classifiers must be greater than one, "
146 |                              "got {}.".format(len(self.pool_classifiers)))
147 | 


--------------------------------------------------------------------------------
/deslib/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from .conftest import *
2 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/dcs/__init__.py


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_a_posteriori.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from sklearn.linear_model import Perceptron
  6 | from sklearn.utils.estimator_checks import check_estimator
  7 | 
  8 | from deslib.dcs.a_posteriori import APosteriori
  9 | 
 10 | 
 11 | def test_check_estimator():
 12 |     check_estimator(APosteriori(selection_method='best'))
 13 | 
 14 | 
 15 | # Should always be 1.0 since the supports for the correct class is always 1.
 16 | @pytest.mark.parametrize('index', [0, 1, 2])
 17 | def test_estimate_competence_all_ones(index, example_all_ones):
 18 |     _, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones
 19 | 
 20 |     query = np.atleast_2d([1, 1])
 21 | 
 22 |     a_posteriori_test = APosteriori()
 23 |     a_posteriori_test.n_classifiers_ = 3
 24 |     a_posteriori_test.DSEL_processed_ = dsel_processed
 25 |     a_posteriori_test.dsel_scores_ = dsel_scores
 26 |     a_posteriori_test.DSEL_target_ = y
 27 | 
 28 |     neighbors = neighbors[index, :].reshape(1, -1)
 29 |     distances = distances[index, :].reshape(1, -1)
 30 | 
 31 |     expected = [1.0, 1.0, 1.0]
 32 |     predictions = np.array([0, 1, 0])
 33 | 
 34 |     competences = a_posteriori_test.estimate_competence(neighbors,
 35 |                                                         distances,
 36 |                                                         predictions=np.array(
 37 |                                                             predictions))
 38 |     assert np.isclose(competences, expected).all()
 39 | 
 40 | 
 41 | # Testing example from kuncheva's book (combining pattern classifiers)
 42 | def test_estimate_competence_kuncheva_ex(example_kuncheva):
 43 |     query = np.atleast_2d([1, 1])
 44 | 
 45 |     a_posteriori_test = APosteriori(k=example_kuncheva['k'])
 46 |     a_posteriori_test.n_classifiers_ = 1
 47 | 
 48 |     a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
 49 |     a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']
 50 |     a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent']
 51 |     a_posteriori_test.n_classes_ = example_kuncheva['n_classes']
 52 | 
 53 |     neighbors = example_kuncheva['neighbors'].reshape(1, -1)
 54 |     distances = example_kuncheva['distances'].reshape(1, -1)
 55 | 
 56 |     predictions = np.array([[1]])
 57 | 
 58 |     competences = a_posteriori_test.estimate_competence(neighbors,
 59 |                                                         distances,
 60 |                                                         predictions=np.array(
 61 |                                                             predictions))
 62 |     assert np.isclose(competences, 0.95, atol=0.01)
 63 | 
 64 | 
 65 | # Testing example from kuncheva's book (combining pattern classifiers)
 66 | def test_estimate_competence_kuncheva_ex_batch(example_kuncheva):
 67 |     # considering a batch composed of 10 samples
 68 |     query = np.ones((10, 2))
 69 |     classifier = MagicMock()
 70 |     classifier.predict.return_value = [1]
 71 |     classifier.predict_proba.return_value = None
 72 | 
 73 |     a_posteriori_test = APosteriori(pool_classifiers=classifier,
 74 |                                     k=example_kuncheva['k'])
 75 | 
 76 |     a_posteriori_test.n_classifiers_ = 1
 77 |     a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed']
 78 |     a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent']
 79 |     a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores']
 80 |     a_posteriori_test.n_classes_ = example_kuncheva['n_classes']
 81 | 
 82 |     # repeating the same matrix in a new axis to simulate a batch input.
 83 |     neighbors = example_kuncheva['neighbors']
 84 |     distances = example_kuncheva['distances']
 85 | 
 86 |     predictions = [1]
 87 |     competences = a_posteriori_test.estimate_competence(neighbors,
 88 |                                                         distances,
 89 |                                                         predictions=np.array(
 90 |                                                             predictions))
 91 |     assert np.allclose(competences, 0.95, atol=0.01)
 92 | 
 93 | 
 94 | # in this test case, the target of the neighbors is always different
 95 | # than the predicted. So
 96 | # the estimation of competence should always be zero
 97 | @pytest.mark.parametrize('index', [0, 1, 2])
 98 | def test_estimate_competence_diff_target(index, example_all_ones):
 99 |     _, _, neighbors, distances, dsel_processed, _ = example_all_ones
100 | 
101 |     query = np.atleast_2d([1, 1])
102 |     a_posteriori_test = APosteriori()
103 |     a_posteriori_test.n_classifiers_ = 3
104 |     a_posteriori_test.DSEL_processed_ = dsel_processed
105 |     a_posteriori_test.dsel_scores_ = np.ones((15, 3, 3))
106 |     a_posteriori_test.DSEL_target_ = np.ones(15, dtype=int) * 2
107 |     a_posteriori_test.n_classes_ = 2
108 | 
109 |     neighbors = neighbors[index, :].reshape(1, -1)
110 |     distances = distances[index, :].reshape(1, -1)
111 | 
112 |     expected = [0.0, 0.0, 0.0]
113 | 
114 |     predictions = np.array([0, 1, 0])
115 |     competences = a_posteriori_test.estimate_competence(neighbors,
116 |                                                         distances,
117 |                                                         predictions=np.array(
118 |                                                             predictions))
119 |     assert np.isclose(competences, expected).all()
120 | 
121 | 
122 | # Check if the fit method is pre-calculating the classifier scores correctly
123 | def test_fit(create_X_y, create_pool_classifiers):
124 |     X, y = create_X_y
125 |     a_posteriori_test = APosteriori(create_pool_classifiers)
126 |     a_posteriori_test.fit(X, y)
127 |     expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]])
128 |     expected = np.tile(expected, (15, 1, 1))
129 |     assert np.array_equal(a_posteriori_test.dsel_scores_, expected)
130 | 
131 | 
132 | # Test if the class is raising an error when the base classifiers do not
133 | # implements the predict_proba method. Should raise an exception when the
134 | # base classifier cannot estimate posterior probabilities (predict_proba)
135 | # Using Perceptron classifier as it does not implements predict_proba.
136 | def test_not_predict_proba(create_X_y):
137 |     X, y = create_X_y
138 |     clf1 = Perceptron()
139 |     clf1.fit(X, y)
140 |     with pytest.raises(ValueError):
141 |         APosteriori([clf1, clf1]).fit(X, y)
142 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_a_priori.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from sklearn.linear_model import Perceptron
 4 | from sklearn.utils.estimator_checks import check_estimator
 5 | 
 6 | from deslib.dcs.a_priori import APriori
 7 | 
 8 | 
 9 | def test_check_estimator():
10 |     check_estimator(APriori(selection_method='best'))
11 | 
12 | 
13 | # Should always be 1.0 since the supports for the correct class is always 1.
14 | @pytest.mark.parametrize('index, expected', [(0, [1.0, 1.0, 1.0]),
15 |                                              (1, [1.0, 1.0, 1.0]),
16 |                                              (2, [1.0, 1.0, 1.0])])
17 | def test_estimate_competence_all_ones(index, expected, example_all_ones):
18 |     X, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones
19 | 
20 |     a_priori_test = APriori()
21 | 
22 |     a_priori_test.DSEL_processed_ = dsel_processed
23 |     a_priori_test.dsel_scores_ = dsel_scores
24 |     a_priori_test.DSEL_target_ = y
25 |     a_priori_test.n_classes_ = 2
26 | 
27 |     neighbors = neighbors[index, :].reshape(1, -1)
28 |     distances = distances[index, :].reshape(1, -1)
29 | 
30 |     competences = a_priori_test.estimate_competence(neighbors, distances)
31 |     assert np.isclose(competences, expected).all()
32 | 
33 | 
34 | # Testing example from kuncheva's book (combining pattern classifiers)
35 | def test_estimate_competence_kuncheva_ex(example_kuncheva):
36 |     a_priori_test = APriori(k=example_kuncheva['k'])
37 |     test_example = example_kuncheva
38 |     a_priori_test.DSEL_processed_ = test_example['dsel_processed']
39 |     a_priori_test.dsel_scores_ = test_example['dsel_scores']
40 |     a_priori_test.DSEL_target_ = test_example['y_independent']
41 |     a_priori_test.n_classes_ = test_example['n_classes']
42 | 
43 |     neighbors = test_example['neighbors'].reshape(1, -1)
44 |     distances = test_example['distances'].reshape(1, -1)
45 | 
46 |     competences = a_priori_test.estimate_competence(neighbors, distances)
47 |     assert np.isclose(competences, 0.70, atol=0.01)
48 | 
49 | 
50 | # Test the estimate competence method receiving n samples as input
51 | def test_estimate_competence_batch(example_estimate_competence):
52 |     _, y, nn, _, dsel_processed, dsel_scores = example_estimate_competence
53 |     expected = np.array([[0.333333, 0.50000, 0.40000],
54 |                          [0.666666, 0.50000, 0.60000],
55 |                          [0.000000, 0.50000, 0.20000]])
56 | 
57 |     # Using 3 neighbors to facilitate the calculations
58 |     a_priori_test = APriori(k=3)
59 | 
60 |     a_priori_test.DSEL_processed_ = dsel_processed
61 |     a_priori_test.dsel_scores_ = dsel_scores
62 |     a_priori_test.DSEL_target_ = y
63 |     a_priori_test.n_classes_ = 2
64 | 
65 |     nn = nn[:, 0:3]
66 |     distances = np.ones((3, 3))
67 | 
68 |     competences = a_priori_test.estimate_competence(nn,
69 |                                                     distances)
70 |     assert np.allclose(competences, expected, atol=0.01)
71 | 
72 | 
73 | def test_fit(create_pool_classifiers, create_X_y):
74 |     X, y = create_X_y
75 | 
76 |     a_priori_test = APriori(create_pool_classifiers)
77 |     a_priori_test.fit(X, y)
78 |     expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]])
79 |     expected = np.tile(expected, (15, 1, 1))
80 |     assert np.array_equal(a_priori_test.dsel_scores_, expected)
81 | 
82 | 
83 | # Test if the class is raising an error when the base classifiers do not
84 | # implements the predict_proba method. Should raise an exception when the
85 | # base classifier cannot estimate posterior probabilities (predict_proba)
86 | # Using Perceptron classifier as it does not implements predict_proba.
87 | def test_not_predict_proba(create_X_y):
88 |     X, y = create_X_y
89 | 
90 |     clf1 = Perceptron()
91 |     clf1.fit(X, y)
92 |     with pytest.raises(ValueError):
93 |         APriori([clf1, clf1]).fit(X, y)
94 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_lca.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from sklearn.linear_model import Perceptron
 4 | from sklearn.utils.estimator_checks import check_estimator
 5 | 
 6 | from deslib.dcs.lca import LCA
 7 | 
 8 | 
 9 | def test_check_estimator():
10 |     check_estimator(LCA())
11 | 
12 | 
13 | def test_estimate_competence_batch(example_estimate_competence):
14 |     _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence
15 | 
16 |     expected = np.array([[0.75000000,  0.66666667,  0.75000000],
17 |                          [0.80000000, 1.00000000, 0.80000000],
18 |                          [1.00000000, 0.60000000, 0.50000000]])
19 |     lca_test = LCA()
20 |     lca_test.DSEL_processed_ = dsel_processed
21 |     lca_test.DSEL_target_ = y
22 | 
23 |     query = np.ones((3, 2))
24 | 
25 |     predictions = np.array([[0, 1, 0]])
26 |     competences = lca_test.estimate_competence(neighbors,
27 |                                                distances=distances,
28 |                                                predictions=np.array(
29 |                                                    predictions))
30 | 
31 |     assert np.isclose(competences, expected).all()
32 | 
33 | 
34 | # in this test case, the target of the neighbors is always different than
35 | # the predicted class. So the estimation of competence should always be zero
36 | @pytest.mark.parametrize('index', [0, 1, 2])
37 | def test_estimate_competence_diff_target(index,
38 |                                          example_estimate_competence,
39 |                                          create_pool_classifiers):
40 |     _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence
41 | 
42 |     lca_test = LCA(create_pool_classifiers)
43 |     lca_test.DSEL_processed_ = dsel_processed
44 |     lca_test.DSEL_target_ = np.ones(15, dtype=int) * 3
45 | 
46 |     neighbors = neighbors[index, :].reshape(1, -1)
47 |     distances = distances[index, :].reshape(1, -1)
48 | 
49 |     query = np.atleast_2d([1, 1])
50 |     expected = [0.0, 0.0, 0.0]
51 | 
52 |     predictions = np.array([[0, 1, 0]])
53 |     competences = lca_test.estimate_competence(neighbors,
54 |                                                distances=distances,
55 |                                                predictions=np.array(
56 |                                                    predictions))
57 | 
58 |     assert np.isclose(competences, expected).all()
59 | 
60 | 
61 | # Test if the class is raising an error when the base classifiers do not
62 | # implements the predict_proba method. In this case the test should not raise
63 | # an error since this class does not require base classifiers that
64 | # can estimate probabilities
65 | def test_predict_proba(create_X_y):
66 |     X, y = create_X_y
67 | 
68 |     clf1 = Perceptron()
69 |     clf1.fit(X, y)
70 |     LCA([clf1, clf1]).fit(X, y)
71 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_mcb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | from sklearn.linear_model import Perceptron
  4 | from sklearn.utils.estimator_checks import check_estimator
  5 | 
  6 | from deslib.dcs.mcb import MCB
  7 | 
  8 | # ex1 the similarity will always be 100%
  9 | bks_dsel_ex1 = np.hstack(
 10 |     (np.hstack((np.zeros((15, 1)), np.ones((15, 1)))), np.zeros((15, 1))))
 11 | 
 12 | # Change a bit to check if the filtering by similarity is working as intended.
 13 | bks_dsel_ex2 = np.hstack(
 14 |     (np.hstack((np.zeros((15, 1)), np.ones((15, 1)))), np.zeros((15, 1))))
 15 | bks_dsel_ex2[1, :] = 2
 16 | 
 17 | bks_dsel_ex3 = bks_dsel_ex1 + 1
 18 | 
 19 | 
 20 | def test_check_estimator():
 21 |     check_estimator(MCB())
 22 | 
 23 | 
 24 | @pytest.mark.parametrize('similarity_threshold', [2.0, -1.0, -0.5])
 25 | def test_similarity_threshold(similarity_threshold, create_X_y):
 26 |     X, y = create_X_y
 27 |     with pytest.raises(ValueError):
 28 |         mcb = MCB(similarity_threshold=similarity_threshold)
 29 |         mcb.fit(X, y)
 30 | 
 31 | 
 32 | @pytest.mark.parametrize('similarity_threshold', [None, 'a'])
 33 | def test_similarity_threshold_type(similarity_threshold, create_X_y):
 34 |     X, y = create_X_y
 35 |     with pytest.raises(TypeError):
 36 |         mcb = MCB(similarity_threshold=similarity_threshold)
 37 |         mcb.fit(X, y)
 38 | 
 39 | 
 40 | @pytest.mark.parametrize('index, expected', [(0, [0.66666666,
 41 |                                                   0.83333333,
 42 |                                                   0.66666666]),
 43 |                                              (1, [0.83333333,
 44 |                                                   1.0,
 45 |                                                   0.66666666])])
 46 | def test_estimate_competence2(index, expected, example_estimate_competence):
 47 | 
 48 |     _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence
 49 | 
 50 |     mcb_test = MCB()
 51 |     mcb_test.n_classifiers_ = 3
 52 |     mcb_test.DSEL_processed_ = dsel_processed
 53 | 
 54 |     neighbors = neighbors[index, :].reshape(1, -1)
 55 |     distances = distances[index, :].reshape(1, -1)
 56 |     # Only changing the pre-processed BKS to see if the filter works.
 57 |     mcb_test.BKS_DSEL_ = bks_dsel_ex2
 58 | 
 59 |     predictions = np.array([[0, 1, 0]])
 60 | 
 61 |     competences = mcb_test.estimate_competence(neighbors,
 62 |                                                distances=distances,
 63 |                                                predictions=np.atleast_2d(
 64 |                                                    predictions))
 65 |     assert np.isclose(competences, expected).all()
 66 | 
 67 | 
 68 | # This third test uses an totally wrong bks matrix, so that the technique
 69 | # is obligated to use the whole it also considers batch processing
 70 | # region of competence
 71 | def test_estimate_competence_batch(example_estimate_competence):
 72 |     _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence
 73 | 
 74 |     expected = np.array([[0.57142857, 0.71428571, 0.71428571],
 75 |                          [0.71428571, 0.85714286, 0.71428571],
 76 |                          [0.57142857, 0.71428571, 0.57142857]])
 77 |     mcb_test = MCB()
 78 |     mcb_test.n_classifiers_ = 3
 79 |     mcb_test.DSEL_processed_ = dsel_processed
 80 | 
 81 |     # Only changing the pre-processed BKS to see if the filter works.
 82 |     mcb_test.BKS_DSEL_ = bks_dsel_ex3
 83 | 
 84 |     predictions = np.array([0, 1, 0])
 85 | 
 86 |     competences = mcb_test.estimate_competence(neighbors,
 87 |                                                distances=distances,
 88 |                                                predictions=np.tile(predictions,
 89 |                                                                    (3, 1)))
 90 |     assert np.isclose(competences, expected).all()
 91 | 
 92 | 
 93 | # Test if the class is raising an error when the base classifiers do not
 94 | # implements the predict_proba method. # In this case the test should not
 95 | # raise an error since this class does not require base classifiers that
 96 | # can estimate probabilities
 97 | def test_predict_proba(create_X_y):
 98 |     X, y = create_X_y
 99 | 
100 |     clf1 = Perceptron()
101 |     clf1.fit(X, y)
102 |     MCB([clf1, clf1]).fit(X, y)
103 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_mla.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | from sklearn.linear_model import Perceptron
  4 | from sklearn.utils.estimator_checks import check_estimator
  5 | 
  6 | from deslib.dcs.mla import MLA
  7 | 
  8 | 
  9 | def test_check_estimator():
 10 |     check_estimator(MLA())
 11 | 
 12 | 
 13 | # Should always be 1.0 since the supports for the correct class is always 1.
 14 | @pytest.mark.parametrize('index', [0, 1, 2])
 15 | def test_estimate_competence_all_ones(index, example_all_ones):
 16 |     _, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones
 17 | 
 18 |     mla_test = MLA()
 19 |     mla_test.n_classifiers_ = 3
 20 | 
 21 |     mla_test.DSEL_processed_ = dsel_processed
 22 |     mla_test.DSEL_scores = dsel_scores
 23 |     mla_test.DSEL_target_ = y
 24 |     mla_test.n_classes_ = 2
 25 | 
 26 |     neighbors = neighbors[index, :].reshape(1, -1)
 27 |     distances = distances[index, :].reshape(1, -1)
 28 | 
 29 |     expected = [1.0, 1.0, 1.0]
 30 | 
 31 |     predictions = np.array([[0, 1, 0]])
 32 | 
 33 |     competences = mla_test.estimate_competence(neighbors,
 34 |                                                distances=distances,
 35 |                                                predictions=predictions)
 36 | 
 37 |     assert np.isclose(competences, expected).all()
 38 | 
 39 | 
 40 | def test_estimate_competence_batch(example_estimate_competence):
 41 | 
 42 |     _, y, neighbors, _, dsel_processed, _ = example_estimate_competence
 43 | 
 44 |     expected = np.array([[0.750,  0.666,  0.750],
 45 |                          [0.800,  1.000,  0.800],
 46 |                          [1.000,  0.600,  0.500]])
 47 | 
 48 |     mla_test = MLA()
 49 |     mla_test.n_classifiers_ = 3
 50 |     mla_test.DSEL_processed_ = dsel_processed
 51 |     distances = np.ones((3, 7))
 52 | 
 53 |     mla_test.DSEL_target_ = y
 54 |     mla_test.n_classes_ = 2
 55 |     predictions = np.array([[0, 1, 0]])
 56 | 
 57 |     competences = mla_test.estimate_competence(competence_region=neighbors,
 58 |                                                distances=distances,
 59 |                                                predictions=predictions)
 60 | 
 61 |     assert np.allclose(competences, expected, atol=0.01)
 62 | 
 63 | 
 64 | # in this test case, the target of the neighbors is always different than the
 65 | # predicted. So the estimation of competence should always be zero
 66 | @pytest.mark.parametrize('index', [0, 1, 2])
 67 | def test_estimate_competence_diff_target(index, example_estimate_competence):
 68 |     _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence
 69 | 
 70 |     mla_test = MLA()
 71 |     mla_test.n_classifiers_ = 3
 72 | 
 73 |     mla_test.DSEL_processed_ = dsel_processed
 74 |     mla_test.DSEL_target_ = np.ones(15, dtype=int) * 3
 75 | 
 76 |     neighbors = neighbors[index, :].reshape(1, -1)
 77 |     distances = distances[index, :].reshape(1, -1)
 78 | 
 79 |     expected = [0.0, 0.0, 0.0]
 80 | 
 81 |     predictions = np.array([[0, 1, 0]])
 82 | 
 83 |     competences = mla_test.estimate_competence(neighbors,
 84 |                                                distances=distances,
 85 |                                                predictions=predictions)
 86 | 
 87 |     assert np.isclose(competences, expected).all()
 88 | 
 89 | 
 90 | # Testing example from kuncheva's book (combining pattern classifiers)
 91 | def test_estimate_competence_kuncheva_ex(example_kuncheva):
 92 |     example_kuncheva = example_kuncheva
 93 | 
 94 |     mla_test = MLA(k=example_kuncheva['k'])
 95 |     mla_test.n_classifiers_ = 2
 96 | 
 97 |     mla_test.DSEL_processed_ = np.repeat(example_kuncheva['dsel_processed'],
 98 |                                          2,
 99 |                                          axis=1)
100 | 
101 |     mla_test.dsel_scores_ = example_kuncheva['dsel_scores']
102 |     mla_test.DSEL_target_ = example_kuncheva['y_dependent']
103 |     mla_test.n_classes_ = example_kuncheva['n_classes']
104 | 
105 |     neighbors = example_kuncheva['neighbors'].reshape(1, -1)
106 |     distances = example_kuncheva['distances'].reshape(1, -1)
107 | 
108 |     predictions = np.array([[1, 1]])
109 |     competences = mla_test.estimate_competence(neighbors,
110 |                                                distances=distances,
111 |                                                predictions=predictions)
112 | 
113 |     assert np.allclose(competences, [0.95, 0.95], atol=0.01)
114 | 
115 | 
116 | # Test if the class is raising an error when the base classifiers do not
117 | # implements the predict_proba method. In this case the test should not raise
118 | # an error since this class does not require base classifiers that
119 | # can estimate probabilities
120 | def test_predict_proba(create_X_y):
121 |     X, y = create_X_y
122 | 
123 |     clf1 = Perceptron()
124 |     clf1.fit(X, y)
125 |     MLA([clf1, clf1]).fit(X, y)
126 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_ola.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import Perceptron
 3 | from sklearn.utils.estimator_checks import check_estimator
 4 | 
 5 | from deslib.dcs.ola import OLA
 6 | 
 7 | 
 8 | def test_check_estimator():
 9 |     check_estimator(OLA())
10 | 
11 | 
12 | def test_estimate_competence_batch(example_estimate_competence):
13 |     _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence
14 |     expected = np.array([[0.57142857,  0.71428571,  0.71428571],
15 |                          [0.71428571,  0.85714286,  0.71428571],
16 |                          [0.57142857, 0.71428571, 0.57142857]])
17 | 
18 |     ola_test = OLA()
19 |     ola_test.DSEL_processed_ = dsel_processed
20 | 
21 |     ola_test.DFP_mask = np.ones((3, 3))
22 |     competences = ola_test.estimate_competence(neighbors,
23 |                                                distances=distances)
24 |     assert np.allclose(competences, expected)
25 | 
26 | 
27 | # Test if the class is raising an error when the base classifiers do not
28 | # implements the predict_proba method. In this case the test should not raise
29 | # an error since this class does not require base classifiers that
30 | # can estimate probabilities
31 | def test_predict_proba(create_X_y):
32 |     X, y = create_X_y
33 |     clf1 = Perceptron()
34 |     clf1.fit(X, y)
35 |     OLA([clf1, clf1]).fit(X, y)
36 | 


--------------------------------------------------------------------------------
/deslib/tests/dcs/test_rank.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import Perceptron
 3 | from sklearn.utils.estimator_checks import check_estimator
 4 | 
 5 | from deslib.dcs.rank import Rank
 6 | 
 7 | 
 8 | def test_check_estimator():
 9 |     check_estimator(Rank())
10 | 
11 | 
12 | def test_estimate_competence_batch(example_estimate_competence):
13 |     _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence
14 | 
15 |     expected = np.array([[1, 5, 0],
16 |                          [1, 1, 2],
17 |                          [0, 0, 1]])
18 |     rank_test = Rank()
19 |     rank_test.DSEL_processed_ = dsel_processed
20 |     competences = rank_test.estimate_competence(neighbors,
21 |                                                 distances=distances)
22 |     assert np.allclose(competences, expected)
23 | 
24 | 
25 | # Test if the class is raising an error when the base classifiers do not
26 | # implements the predict_proba method. In this case the test should not raise
27 | # an error since this class does not require base classifiers that
28 | # can estimate probabilities
29 | def test_predict_proba(create_X_y):
30 |     X, y = create_X_y
31 | 
32 |     clf1 = Perceptron()
33 |     clf1.fit(X, y)
34 |     Rank([clf1, clf1]).fit(X, y)
35 | 


--------------------------------------------------------------------------------
/deslib/tests/des/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/des/__init__.py


--------------------------------------------------------------------------------
/deslib/tests/des/test_des_mi.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from sklearn.linear_model import Perceptron
  6 | from sklearn.utils.estimator_checks import check_estimator
  7 | 
  8 | from deslib.des.des_mi import DESMI
  9 | 
 10 | 
 11 | def test_check_estimator():
 12 |     check_estimator(DESMI())
 13 | 
 14 | 
 15 | # TODO: create test routine for the estimate_competence method
 16 | 
 17 | 
 18 | @pytest.mark.parametrize('alpha', [-1.0, -0.5, 0.0])
 19 | def test_check_alpha_value(alpha, create_X_y):
 20 |     X, y = create_X_y
 21 |     with pytest.raises(ValueError):
 22 |         desmi = DESMI(alpha=alpha)
 23 |         desmi.fit(X, y)
 24 | 
 25 | 
 26 | @pytest.mark.parametrize('alpha', ['a', None, 'string', 1])
 27 | def test_check_alpha_type(alpha, create_X_y):
 28 |     X, y = create_X_y
 29 |     with pytest.raises(TypeError):
 30 |         desmi = DESMI(alpha=alpha)
 31 |         desmi.fit(X, y)
 32 | 
 33 | 
 34 | @pytest.mark.parametrize('pct_accuracy', [-1.0, -0.5, 0.0, 1.01])
 35 | def test_check_pct_accuracy_value(pct_accuracy, create_X_y):
 36 |     X, y = create_X_y
 37 |     with pytest.raises(ValueError):
 38 |         desmi = DESMI(pct_accuracy=pct_accuracy)
 39 |         desmi.fit(X, y)
 40 | 
 41 | 
 42 | # Test if the class is raising an error when the base classifiers do not
 43 | # implements the predict_proba method.
 44 | # In this case the test should not raise an error since this class does not
 45 | # require base classifiers that can estimate probabilities
 46 | def test_require_proba():
 47 |     X = np.random.randn(5, 5)
 48 |     y = np.array([0, 1, 0, 0, 0])
 49 |     clf1 = Perceptron()
 50 |     clf1.fit(X, y)
 51 |     DESMI([clf1, clf1, clf1])
 52 | 
 53 | 
 54 | def test_select_single_sample():
 55 |     des_mi = DESMI(pct_accuracy=0.7)
 56 |     des_mi.N_ = 2
 57 |     competences = np.array([0.7, 0.2, 1.0])
 58 |     selected_clf = des_mi.select(competences)
 59 |     expected = np.array([0, 2])
 60 |     assert np.array_equal(np.unique(selected_clf), np.unique(expected))
 61 | 
 62 | 
 63 | def test_select_batch_samples():
 64 |     n_samples = 10
 65 |     des_mi = DESMI(pct_accuracy=0.7)
 66 |     des_mi.N_ = 2
 67 |     competences = np.tile(np.array([0.7, 0.2, 1.0]), (n_samples, 1))
 68 |     selected_clf = des_mi.select(competences)
 69 |     expected = np.tile(np.array([0, 2]), (n_samples, 1))
 70 |     assert np.array_equal(np.unique(selected_clf), np.unique(expected))
 71 | 
 72 | 
 73 | def test_classify_with_ds_batch_samples():
 74 |     n_samples = 10
 75 | 
 76 |     # simulated predictions of the pool of classifiers
 77 |     predictions = np.tile(np.array([0, 1, 0]), (n_samples, 1))
 78 | 
 79 |     desmi_test = DESMI()
 80 |     desmi_test.n_classes_ = 2
 81 |     desmi_test.estimate_competence = MagicMock(
 82 |         return_value=(np.ones((n_samples, 3))))
 83 |     desmi_test.select = MagicMock(
 84 |         return_value=np.tile(np.array([[0, 2]]), (n_samples, 1)))
 85 |     result = desmi_test.classify_with_ds(predictions)
 86 |     assert np.allclose(result, np.zeros(10))
 87 | 
 88 | 
 89 | def test_predict_proba_with_ds_soft(create_pool_classifiers):
 90 |     expected = np.array([0.61, 0.39])
 91 |     DFP_mask = np.ones((1, 6))
 92 |     predictions = np.array([[0, 1, 0, 0, 1, 0]])
 93 |     probabilities = np.array([[[0.5, 0.5], [1, 0], [0.33, 0.67],
 94 |                                [0.5, 0.5], [1, 0], [0.33, 0.67]]])
 95 |     pool_classifiers = create_pool_classifiers + create_pool_classifiers
 96 |     desmi_test = DESMI(pool_classifiers, DFP=True, voting='soft')
 97 |     desmi_test.n_classes_ = 2
 98 |     selected_indices = np.array([[0, 1, 5]])
 99 |     desmi_test.estimate_competence = MagicMock(return_value=np.ones(6))
100 |     desmi_test.select = MagicMock(return_value=selected_indices)
101 | 
102 |     predicted_proba = desmi_test.predict_proba_with_ds(predictions,
103 |                                                        probabilities,
104 |                                                        DFP_mask=DFP_mask)
105 |     assert np.isclose(predicted_proba, expected, atol=0.01).all()
106 | 
107 | 
108 | def test_predict_proba_with_ds_hard(create_pool_classifiers):
109 |     expected = np.array([0.666, 0.333])
110 |     DFP_mask = np.ones((1, 6))
111 |     predictions = np.array([[0, 1, 0, 0, 1, 0]])
112 |     probabilities = np.array([[[0.5, 0.5], [1, 0], [0.33, 0.67],
113 |                                [0.5, 0.5], [1, 0], [0.33, 0.67]]])
114 |     pool_classifiers = create_pool_classifiers + create_pool_classifiers
115 |     desmi_test = DESMI(pool_classifiers, DFP=True, voting='hard')
116 |     desmi_test.n_classes_ = 2
117 |     selected_indices = np.array([[0, 1, 5]])
118 |     desmi_test.estimate_competence = MagicMock(return_value=np.ones(6))
119 |     desmi_test.select = MagicMock(return_value=selected_indices)
120 | 
121 |     predicted_proba = desmi_test.predict_proba_with_ds(predictions,
122 |                                                        probabilities,
123 |                                                        DFP_mask=DFP_mask)
124 |     assert np.isclose(predicted_proba, expected, atol=0.01).all()
125 | 
126 | 
127 | def test_soft_voting_no_proba(create_X_y):
128 |     from sklearn.linear_model import Perceptron
129 |     X, y = create_X_y
130 |     clf = Perceptron()
131 |     clf.fit(X, y)
132 |     with pytest.raises(ValueError):
133 |         DESMI([clf, clf, clf, clf], voting='soft').fit(X, y)
134 | 
135 | 
136 | @pytest.mark.parametrize('voting', [None, 'product', 1])
137 | def test_wrong_voting_value(voting, create_X_y, create_pool_classifiers):
138 |     X, y = create_X_y
139 |     pool = create_pool_classifiers
140 |     with pytest.raises(ValueError):
141 |         DESMI(pool, voting=voting).fit(X, y)
142 | 


--------------------------------------------------------------------------------
/deslib/tests/des/test_desp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import Perceptron
 3 | from sklearn.utils.estimator_checks import check_estimator
 4 | 
 5 | from deslib.des.des_p import DESP
 6 | 
 7 | 
 8 | def test_check_estimator():
 9 |     check_estimator(DESP())
10 | 
11 | 
12 | # Test the estimate competence method receiving n samples as input
13 | def test_estimate_competence_batch(example_estimate_competence,
14 |                                    create_pool_classifiers):
15 |     X, y, neighbors, distances, dsel_processed, _ = example_estimate_competence
16 | 
17 |     expected = np.array([[0.57142857, 0.4285714, 0.57142857],
18 |                          [0.71428571, 0.2857142, 0.71428571],
19 |                          [0.2857142, 0.71428571, 0.2857142]])
20 | 
21 |     des_p_test = DESP(create_pool_classifiers)
22 |     des_p_test.fit(X, y)
23 |     competences = des_p_test.estimate_competence(neighbors, distances)
24 |     assert np.allclose(competences, expected, atol=0.01)
25 | 
26 | 
27 | def test_select_two_classes():
28 |     des_p_test = DESP()
29 |     des_p_test.n_classes_ = 2
30 |     expected = np.array([[True, False, True],
31 |                          [True, False, True],
32 |                          [False, True, False]])
33 | 
34 |     competences = np.array([[0.51, 0.0, 0.51],
35 |                             [0.51, 0.0, 0.51],
36 |                             [0.49, 1.0, 0.49]])
37 | 
38 |     selected = des_p_test.select(competences)
39 | 
40 |     assert np.array_equal(selected, expected)
41 | 
42 | 
43 | # In this example, since the number of classes is 3, the competence level
44 | # expected to be selected is > 0.33
45 | def test_select_three_classes():
46 |     des_p_test = DESP()
47 |     des_p_test.n_classes_ = 3
48 |     expected = np.array([[True, False, True],
49 |                          [True, False, True],
50 |                          [False, True, False]])
51 | 
52 |     competences = np.array([[0.34, 0.32, 1.0],
53 |                             [0.50, 0.30, 1.01],
54 |                             [0.25, 1.0, 0.25]])
55 | 
56 |     selected = des_p_test.select(competences)
57 | 
58 |     assert np.array_equal(selected, expected)
59 | 
60 | 
61 | def test_select_none_competent():
62 |     n_classifiers = 3
63 |     des_p_test = DESP()
64 |     des_p_test.n_classes_ = 2
65 |     competences = np.ones(n_classifiers) * 0.49
66 |     indices = des_p_test.select(competences)
67 |     expected = np.array([[True, True, True]])
68 |     assert np.array_equal(expected, indices)
69 | 
70 | 
71 | # Test if the class is raising an error when the base classifiers do not
72 | # implements the predict_proba method. In this case the test should not raise
73 | # an error since this class does not require base classifiers that
74 | # can estimate probabilities
75 | def test_predict_proba(create_X_y):
76 |     X, y = create_X_y
77 |     clf1 = Perceptron()
78 |     clf1.fit(X, y)
79 |     DESP([clf1, clf1]).fit(X, y)
80 | 


--------------------------------------------------------------------------------
/deslib/tests/des/test_knop.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from sklearn.linear_model import Perceptron
 6 | from sklearn.utils.estimator_checks import check_estimator
 7 | 
 8 | from deslib.des.knop import KNOP
 9 | 
10 | 
11 | def test_check_estimator():
12 |     check_estimator(KNOP())
13 | 
14 | 
15 | # Test the estimate competence method receiving n samples as input
16 | def test_estimate_competence_batch(example_estimate_competence,
17 |                                    create_pool_classifiers):
18 |     X, y, neighbors, distances, _, _ = example_estimate_competence
19 |     query = np.ones((3, 2))
20 |     expected = np.array([[4.0, 3.0, 4.0],
21 |                          [5.0, 2.0, 5.0],
22 |                          [2.0, 5.0, 2.0]])
23 | 
24 |     knop_test = KNOP(create_pool_classifiers)
25 |     knop_test.fit(X, y)
26 |     knop_test.neighbors = neighbors
27 |     knop_test.distances = distances
28 | 
29 |     knop_test._get_similar_out_profiles = Mock(return_value=(None, neighbors))
30 |     probabilities = np.zeros((3, 6))
31 | 
32 |     competences = knop_test.estimate_competence_from_proba(query,
33 |                                                            probabilities)
34 |     assert np.allclose(competences, expected, atol=0.01)
35 | 
36 | 
37 | def test_weights_zero():
38 |     knop_test = KNOP()
39 |     competences = np.zeros((1, 3))
40 |     result = knop_test.select(competences)
41 | 
42 |     assert np.all(result)
43 | 
44 | 
45 | def test_fit(example_estimate_competence, create_pool_classifiers):
46 |     X, y = example_estimate_competence[0:2]
47 | 
48 |     knop_test = KNOP(create_pool_classifiers)
49 |     knop_test.fit(X, y)
50 |     expected_scores = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]])
51 |     expected_scores = np.tile(expected_scores, (15, 1, 1))
52 | 
53 |     assert np.array_equal(expected_scores, knop_test.dsel_scores_)
54 | 
55 |     # Assert the roc_algorithm_ is fitted to the scores (decision space)
56 |     # rather than the features (feature space)
57 |     expected_roc_data = knop_test.dsel_scores_[:, :, 0]
58 |     assert np.array_equal(knop_test.op_knn_._fit_X, expected_roc_data)
59 | 
60 | 
61 | # Test if the class is raising an error when the base classifiers do not
62 | # implements the predict_proba method. Should raise an exception when the
63 | # base classifier cannot estimate posterior probabilities (predict_proba)
64 | # Using Perceptron classifier as it does not implements predict_proba.
65 | def test_not_predict_proba(create_X_y):
66 |     X, y = create_X_y
67 | 
68 |     clf1 = Perceptron()
69 |     clf1.fit(X, y)
70 |     with pytest.raises(ValueError):
71 |         knop = KNOP([clf1, clf1])
72 |         knop.fit(X, y)
73 | 
74 | 
75 | def test_select():
76 |     knop_test = KNOP()
77 |     competences = np.ones(3)
78 |     competences[0] = 0
79 |     expected = np.atleast_2d([False, True, True])
80 |     selected = knop_test.select(competences)
81 |     assert np.array_equal(expected, selected)
82 | 


--------------------------------------------------------------------------------
/deslib/tests/des/test_knorae.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from sklearn.linear_model import Perceptron
 4 | from sklearn.utils.estimator_checks import check_estimator
 5 | 
 6 | from deslib.des.knora_e import KNORAE
 7 | 
 8 | 
 9 | def test_check_estimator():
10 |     check_estimator(KNORAE())
11 | 
12 | 
13 | def test_estimate_competence_batch(example_estimate_competence,
14 |                                    create_pool_classifiers):
15 |     X, y, neighbors, distances, _, _ = example_estimate_competence
16 | 
17 |     expected = np.array([[1.0, 0.0, 1.0],
18 |                          [2.0, 0.0, 2.0],
19 |                          [0.0, 3.0, 0.0]])
20 | 
21 |     knora_e_test = KNORAE(create_pool_classifiers)
22 |     knora_e_test.fit(X, y)
23 | 
24 |     competences = knora_e_test.estimate_competence(neighbors,
25 |                                                    distances=distances)
26 |     assert np.allclose(competences, expected)
27 | 
28 | 
29 | @pytest.mark.parametrize('index, expected', [(0, [[True, False, True]]),
30 |                                              (1, [[True, False, True]]),
31 |                                              (2, [[False, True, False]])])
32 | def test_select(index, expected, create_pool_classifiers,
33 |                 example_estimate_competence):
34 |     X, y, neighbors, distances, _, _ = example_estimate_competence
35 | 
36 |     knora_e_test = KNORAE(create_pool_classifiers)
37 |     knora_e_test.fit(X, y)
38 |     neighbors = neighbors[index, :].reshape(1, -1)
39 |     distances = distances[index, :].reshape(1, -1)
40 |     competences = knora_e_test.estimate_competence(neighbors,
41 |                                                    distances=distances)
42 |     selected = knora_e_test.select(competences)
43 | 
44 |     assert np.array_equal(selected, expected)
45 | 
46 | 
47 | # No classifier here is selected, since the always predict class 2 where there
48 | # are only samples labeled as class 0 and 1
49 | # in the region of competence
50 | def test_select_none_competent():
51 |     knora_e_test = KNORAE()
52 |     competences = np.zeros(100)
53 |     selected = knora_e_test.select(competences)
54 |     expected = np.atleast_2d([True] * 100)
55 | 
56 |     assert np.array_equal(expected, selected)
57 | 
58 | 
59 | # Test if the class is raising an error when the base classifiers do not
60 | # implements the predict_proba method. In this case the test should not raise
61 | # an error since this class does not require base classifiers that
62 | # can estimate probabilities
63 | def test_predict_proba(create_X_y):
64 |     X, y = create_X_y
65 | 
66 |     clf1 = Perceptron()
67 |     clf1.fit(X, y)
68 |     KNORAE([clf1, clf1]).fit(X, y)
69 | 


--------------------------------------------------------------------------------
/deslib/tests/des/test_knorau.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import Perceptron
 3 | from sklearn.utils.estimator_checks import check_estimator
 4 | 
 5 | from deslib.des.knora_u import KNORAU
 6 | 
 7 | 
 8 | def test_check_estimator():
 9 |     check_estimator(KNORAU())
10 | 
11 | 
12 | # Test the estimate competence method receiving n samples as input
13 | def test_estimate_competence_batch(example_estimate_competence,
14 |                                    create_pool_classifiers):
15 | 
16 |     X, y, neighbors = example_estimate_competence[0:3]
17 | 
18 |     expected = np.array([[4.0, 3.0, 4.0],
19 |                          [5.0, 2.0, 5.0],
20 |                          [2.0, 5.0, 2.0]])
21 |     knora_u_test = KNORAU(create_pool_classifiers)
22 |     knora_u_test.fit(X, y)
23 | 
24 |     competences = knora_u_test.estimate_competence(neighbors)
25 |     assert np.allclose(competences, expected, atol=0.01)
26 | 
27 | 
28 | def test_weights_zero():
29 |     knorau_test = KNORAU()
30 |     competences = np.zeros((1, 3))
31 |     result = knorau_test.select(competences)
32 | 
33 |     assert np.all(result)
34 | 
35 | 
36 | # Test if the class is raising an error when the base classifiers do not
37 | # implements the predict_proba method. In this case the test should not raise
38 | # an error since this class does not require base classifiers that
39 | # can estimate probabilities
40 | def test_predict_proba(create_X_y):
41 |     X, y = create_X_y
42 | 
43 |     clf1 = Perceptron()
44 |     clf1.fit(X, y)
45 |     KNORAU([clf1, clf1]).fit(X, y)
46 | 
47 | 
48 | def test_select():
49 |     knorau_test = KNORAU()
50 |     competences = np.ones(3)
51 |     competences[0] = 0
52 |     expected = np.atleast_2d([False, True, True])
53 |     selected = knorau_test.select(competences)
54 |     assert np.array_equal(expected, selected)
55 | 


--------------------------------------------------------------------------------
/deslib/tests/expected_values/des_clustering_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/des_clustering_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/desknn_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desknn_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/desknn_probas_DFP.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desknn_probas_DFP.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/desp_proba_DFP.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desp_proba_DFP.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/desp_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desp_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/kne_knn_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_knn_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/kne_proba_DFP.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_proba_DFP.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/kne_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/knop_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/knop_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/mcb_proba_DFP.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/mcb_proba_DFP.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/mcb_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/mcb_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/ola_proba_DFP.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/ola_proba_DFP.npy


--------------------------------------------------------------------------------
/deslib/tests/expected_values/ola_proba_integration.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/ola_proba_integration.npy


--------------------------------------------------------------------------------
/deslib/tests/static/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/static/__init__.py


--------------------------------------------------------------------------------
/deslib/tests/static/test_oracle.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.datasets import make_classification
 3 | from sklearn.ensemble import RandomForestClassifier
 4 | 
 5 | from deslib.static.oracle import Oracle
 6 | 
 7 | 
 8 | def test_predict(create_X_y, create_pool_classifiers):
 9 |     X, y = create_X_y
10 | 
11 |     oracle_test = Oracle(create_pool_classifiers)
12 |     oracle_test.fit(X, y)
13 |     predicted_labels = oracle_test.predict(X, y)
14 |     assert np.equal(predicted_labels, y).all()
15 | 
16 |     assert oracle_test.score(X, y) == 1.0
17 | 
18 | 
19 | # All classifiers predicts the same label. This test only the samples
20 | # with label == 0 are correctly classified by the Oracle.
21 | # The misclassified samples are set to -1.
22 | def test_predict_all_same(create_X_y, create_pool_all_agree):
23 |     X, y = create_X_y
24 | 
25 |     expected = y
26 |     oracle_test = Oracle(create_pool_all_agree)
27 |     oracle_test.fit(X, y)
28 |     expected[expected == 1] = 0
29 |     predicted_labels = oracle_test.predict(X, y)
30 |     assert np.equal(predicted_labels, expected).all()
31 | 
32 | 
33 | def test_predict_proba_shape():
34 |     n_test_samples = 200
35 |     X, y = make_classification(n_samples=1000)
36 |     X_test, y_test = make_classification(n_samples=n_test_samples)
37 |     pool = RandomForestClassifier(max_depth=3).fit(X, y)
38 |     oracle = Oracle(pool_classifiers=pool).fit(X, y)
39 | 
40 |     proba = oracle.predict_proba(X_test, y_test)
41 |     assert proba.shape == (n_test_samples, 2)
42 | 
43 | 
44 | def test_predict_proba_right_class():
45 |     n_test_samples = 200
46 |     X, y = make_classification(n_samples=1000)
47 |     X_test, y_test = make_classification(n_samples=n_test_samples)
48 |     pool = RandomForestClassifier(max_depth=3).fit(X, y)
49 |     oracle = Oracle(pool_classifiers=pool).fit(X, y)
50 | 
51 |     preds = oracle.predict(X_test, y_test)
52 |     proba = oracle.predict_proba(X_test, y_test)
53 |     probas_max = np.argmax(proba, axis=1)
54 |     assert np.allclose(probas_max, preds)
55 | 
56 | 
57 | def test_label_encoder_base_ensemble():
58 |     from sklearn.ensemble import RandomForestClassifier
59 |     X, y = make_classification()
60 |     y[y == 1] = 2
61 |     y = y.astype(float)
62 |     pool = RandomForestClassifier().fit(X, y)
63 |     oracle = Oracle(pool)
64 |     oracle.fit(X, y)
65 |     pred = oracle.predict(X, y)
66 |     assert np.isin(oracle.classes_, pred).all()
67 | 


--------------------------------------------------------------------------------
/deslib/tests/static/test_single_best.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from sklearn.datasets import make_classification
  6 | from sklearn.ensemble import AdaBoostClassifier
  7 | from sklearn.exceptions import NotFittedError
  8 | from sklearn.metrics import roc_auc_score
  9 | from sklearn.utils.estimator_checks import check_estimator
 10 | 
 11 | from deslib.static.single_best import SingleBest
 12 | 
 13 | 
 14 | def test_check_estimator():
 15 |     check_estimator(SingleBest())
 16 | 
 17 | 
 18 | # Testing if the fit function selects the correct classifier (the one with
 19 | # highest classification accuracy). # Note: clf[0] and clf[2] have the
 20 | # same accuracy since they always predict the same label.
 21 | def test_fit(create_X_y, create_pool_classifiers):
 22 |     X, y = create_X_y
 23 | 
 24 |     pool_classifiers = create_pool_classifiers
 25 |     single_best_test = SingleBest(pool_classifiers)
 26 |     single_best_test._estimate_performances = MagicMock(
 27 |         return_value=[1.0, 0.5, 0.99])
 28 | 
 29 |     single_best_test.fit(X, y)
 30 | 
 31 |     assert single_best_test.best_clf_index_ == 0
 32 | 
 33 | 
 34 | # The classifier with highest accuracy always predicts 0. So the expected
 35 | # prediction should always be equal zero.
 36 | def test_predict(create_X_y, create_pool_classifiers):
 37 |     X, y = create_X_y
 38 | 
 39 |     pool_classifiers = create_pool_classifiers
 40 |     single_best_test = SingleBest(pool_classifiers=pool_classifiers)
 41 |     single_best_test.fit(X, y)
 42 | 
 43 |     predicted_labels = single_best_test.predict(X)
 44 |     assert np.equal(predicted_labels, 0).all()
 45 | 
 46 | 
 47 | # The probabilities predicted must always be equals to the probabilities
 48 | # predicted by the base classifier with index 0.
 49 | def test_predict_proba(create_X_y, create_pool_classifiers):
 50 |     X, y = create_X_y
 51 | 
 52 |     pool_classifiers = create_pool_classifiers
 53 |     single_best_test = SingleBest(pool_classifiers)
 54 |     single_best_test.fit(X, y)
 55 | 
 56 |     predicted_proba = single_best_test.predict_proba(X)
 57 |     assert np.equal(predicted_proba,
 58 |                     pool_classifiers[0].predict_proba(X)).all()
 59 | 
 60 | 
 61 | def test_not_fitted():
 62 |     single_best_test = SingleBest()
 63 |     with pytest.raises(NotFittedError):
 64 |         single_best_test.predict(np.array([[1, -1]]))
 65 | 
 66 | 
 67 | # Test calling the predict_proba function with classifiers that do not
 68 | # implement the predict_proba
 69 | def test_not_predict_proba(create_X_y):
 70 |     X, y = create_X_y
 71 | 
 72 |     classifier = MagicMock()
 73 |     classifier.predict.return_value = [0]
 74 |     single_best_test = SingleBest([classifier] * 10)
 75 |     single_best_test.fit(X, y)
 76 |     with pytest.raises(ValueError):
 77 |         single_best_test.predict_proba(X)
 78 | 
 79 | 
 80 | def test_label_encoder(create_label_encoder_test):
 81 |     X, y, pool = create_label_encoder_test
 82 |     sb = SingleBest(pool).fit(X, y)
 83 |     pred = sb.predict(X)
 84 |     assert np.array_equal(pred, y)
 85 | 
 86 | 
 87 | def test_label_encoder_base_ensemble():
 88 |     from sklearn.ensemble import RandomForestClassifier
 89 |     X, y = make_classification()
 90 |     y[y == 1] = 2
 91 |     y = y.astype(float)
 92 |     pool = RandomForestClassifier().fit(X, y)
 93 |     sb = SingleBest(pool)
 94 |     sb.fit(X, y)
 95 |     pred = sb.predict(X)
 96 |     assert np.isin(sb.classes_, pred).all()
 97 | 
 98 | 
 99 | def test_different_scorer():
100 |     X, y = make_classification(n_samples=100, random_state=42)
101 |     X_val, y_val = make_classification(n_samples=25, random_state=123)
102 |     pool = AdaBoostClassifier(n_estimators=10).fit(X, y)
103 |     performances = []
104 |     for clf in pool:
105 |         preds = clf.predict_proba(X_val)
106 |         performances.append(roc_auc_score(y_val.ravel(), preds[:, -1]))
107 |     id_best = np.argmax(performances)
108 |     sb = SingleBest(pool_classifiers=pool, scoring='roc_auc')
109 |     sb.fit(X_val, y_val)
110 |     assert id_best == sb.best_clf_index_
111 | 


--------------------------------------------------------------------------------
/deslib/tests/static/test_stacked.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from sklearn.datasets import make_classification
 4 | from sklearn.linear_model import Perceptron
 5 | from sklearn.tree import DecisionTreeClassifier
 6 | from sklearn.utils.estimator_checks import check_estimator
 7 | 
 8 | from deslib.static.stacked import StackedClassifier
 9 | 
10 | 
11 | def test_check_estimator():
12 |     check_estimator(StackedClassifier())
13 | 
14 | 
15 | # Test if the class is raising an error when the base classifiers do not
16 | # implements the predict_proba method. Should raise an exception when the
17 | # base classifier cannot estimate posterior probabilities (predict_proba)
18 | # Using Perceptron classifier as it does not implements predict_proba.
19 | def test_not_predict_proba(create_X_y):
20 |     X, y = create_X_y
21 | 
22 |     clf1 = Perceptron()
23 |     clf1.fit(X, y)
24 |     with pytest.raises(ValueError):
25 |         StackedClassifier([clf1, clf1]).fit(X, y)
26 | 
27 | 
28 | # Test if the class is raising an error when the meta classifiers do not
29 | # implements the predict_proba method. Should raise an exception when the
30 | # base classifier cannot estimate posterior probabilities (predict_proba)
31 | # Using Perceptron classifier as it does not implements predict_proba.
32 | def test_not_predict_proba_meta(create_X_y, create_pool_classifiers):
33 |     X, y = create_X_y
34 | 
35 |     pool = create_pool_classifiers
36 |     with pytest.raises(ValueError):
37 |         meta_clf = StackedClassifier(pool_classifiers=pool,
38 |                                      meta_classifier=Perceptron())
39 |         meta_clf.fit(X, y)
40 |         meta_clf.predict_proba(X)
41 | 
42 | 
43 | def test_label_encoder():
44 |     y = ['one', 'one', 'one', 'zero', 'zero', 'two']
45 |     X = np.random.rand(6, 3)
46 |     pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)]
47 |     stacked = StackedClassifier(pool).fit(X, y)
48 |     pred = stacked.predict(X)
49 |     assert np.array_equal(pred, y)
50 | 
51 | 
52 | def test_label_encoder_base_ensemble():
53 |     from sklearn.ensemble import RandomForestClassifier
54 |     X, y = make_classification()
55 |     y[y == 1] = 2
56 |     y = y.astype(float)
57 |     pool = RandomForestClassifier().fit(X, y)
58 |     st = StackedClassifier(pool)
59 |     st.fit(X, y)
60 |     pred = st.predict(X)
61 |     assert np.isin(st.classes_, pred).all()
62 | 
63 | 
64 | def test_one_class_meta_dataset(create_X_y):
65 |     X, y = create_X_y
66 |     pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)]
67 |     stacked = StackedClassifier(pool)
68 |     X_meta = np.random.rand(10, 2)
69 |     y_meta = np.zeros(10, dtype=int)
70 |     with pytest.raises(ValueError):
71 |         stacked.fit(X_meta, y_meta)
72 | 
73 | 
74 | def test_passthrough_true(create_X_y):
75 |     X, y = create_X_y
76 |     pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)]
77 |     stacked = StackedClassifier(pool, passthrough=True)
78 |     stacked.fit(X, y)
79 |     assert stacked.meta_classifier_.coef_.shape == (1, 7)
80 | 
81 | 
82 | def test_passthrough_false(create_X_y):
83 |     X, y = create_X_y
84 |     pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)]
85 |     stacked = StackedClassifier(pool, passthrough=False)
86 |     stacked.fit(X, y)
87 |     assert stacked.meta_classifier_.coef_.shape == (1, 5)
88 | 
89 | 
90 | def test_single_model_pool(create_X_y):
91 |     X, y = create_X_y
92 |     pool = [DecisionTreeClassifier().fit(X, y)]
93 |     with pytest.raises(ValueError):
94 |         StackedClassifier(pool_classifiers=pool).fit(X, y)
95 | 


--------------------------------------------------------------------------------
/deslib/tests/static/test_static_selection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | from sklearn.datasets import make_classification
  4 | from sklearn.ensemble import AdaBoostClassifier
  5 | from sklearn.exceptions import NotFittedError
  6 | from sklearn.metrics import log_loss
  7 | from sklearn.utils.estimator_checks import check_estimator
  8 | 
  9 | from deslib.static.static_selection import StaticSelection
 10 | 
 11 | 
 12 | def test_check_estimator():
 13 |     check_estimator(StaticSelection())
 14 | 
 15 | 
 16 | # Testing if the fit function selects the correct classifiers.
 17 | # The 50 last classifiers should be selected.
 18 | def test_fit(example_static_selection):
 19 |     X, y, pool = example_static_selection
 20 |     static_selection_test = StaticSelection(pool, 0.5)
 21 |     static_selection_test.fit(X, y)
 22 | 
 23 |     assert static_selection_test.n_classifiers_ensemble_ == 50
 24 |     assert static_selection_test.n_classifiers_ensemble_ == len(
 25 |         static_selection_test.clf_indices_)
 26 |     assert np.array_equal(np.sort(static_selection_test.clf_indices_),
 27 |                           list(range(50, 100)))
 28 | 
 29 | 
 30 | # The classifier with highest accuracy always predicts 0. So the expected
 31 | # prediction should always be equal zero.
 32 | def test_predict(example_static_selection, create_pool_classifiers):
 33 |     X, y, _ = example_static_selection
 34 | 
 35 |     static_selection_test = StaticSelection(create_pool_classifiers*10, 0.25)
 36 |     static_selection_test.fit(X, y)
 37 | 
 38 |     predicted_labels = static_selection_test.predict(X)
 39 |     assert np.equal(predicted_labels, 0).all()
 40 | 
 41 | 
 42 | # Classifiers predicting different labels are selected
 43 | def test_predict_diff(example_static_selection):
 44 |     X, y, pool = example_static_selection
 45 | 
 46 |     static_selection_test = StaticSelection(pool, 0.75)
 47 |     static_selection_test.fit(X, y)
 48 | 
 49 |     predicted_labels = static_selection_test.predict(X)
 50 |     assert np.equal(predicted_labels, 1).all()
 51 | 
 52 | 
 53 | def test_not_fitted():
 54 |     static_selection_test = StaticSelection()
 55 |     with pytest.raises(NotFittedError):
 56 |         static_selection_test.predict(np.array([[1, -1]]))
 57 | 
 58 | 
 59 | def test_invalid_pct():
 60 |     with pytest.raises(TypeError):
 61 |         test = StaticSelection(pct_classifiers='something')
 62 |         test.fit(np.random.rand(10, 2), np.ones(10))
 63 | 
 64 | 
 65 | def test_invalid_pct2():
 66 |     with pytest.raises(ValueError):
 67 |         test = StaticSelection(pct_classifiers=1.2)
 68 |         test.fit(np.random.rand(10, 2), np.ones(10))
 69 | 
 70 | 
 71 | def test_label_encoder(create_label_encoder_test):
 72 |     X, y, pool = create_label_encoder_test
 73 |     static = StaticSelection(pool).fit(X, y)
 74 |     pred = static.predict(X)
 75 |     assert np.array_equal(pred, y)
 76 | 
 77 | 
 78 | def test_label_encoder_base_ensemble():
 79 |     from sklearn.ensemble import RandomForestClassifier
 80 |     X, y = make_classification()
 81 |     y[y == 1] = 2
 82 |     y = y.astype(float)
 83 |     pool = RandomForestClassifier().fit(X, y)
 84 |     ss = StaticSelection(pool)
 85 |     ss.fit(X, y)
 86 |     pred = ss.predict(X)
 87 |     assert np.isin(ss.classes_, pred).all()
 88 | 
 89 | 
 90 | def test_predict_proba(example_static_selection):
 91 |     X, y, pool = example_static_selection
 92 |     expected = np.tile([0.52, 0.48], (y.size, 1))
 93 |     static_selection_test = StaticSelection(pool, 0.5)
 94 |     static_selection_test.fit(X, y)
 95 |     proba = static_selection_test.predict_proba(X)
 96 |     assert np.allclose(proba, expected)
 97 | 
 98 | 
 99 | # Test if static_selection can select the best classifier according to a
100 | # metric that needs to be minimized.
101 | def test_different_scorer():
102 |     X, y = make_classification(n_samples=100, random_state=42)
103 |     X_val, y_val = make_classification(n_samples=25, random_state=123)
104 |     pool = AdaBoostClassifier(n_estimators=10).fit(X, y)
105 |     performances = []
106 |     for clf in pool:
107 |         preds = clf.predict_proba(X_val)
108 |         performances.append(log_loss(y_val.ravel(), preds[:, -1]))
109 |     id_best = np.argsort(performances)
110 |     ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss')
111 |     ss.fit(X_val, y_val)
112 |     assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all()
113 | 
114 | 
115 | # Test if static_selection can select the best classifier according to a
116 | # metric that needs to be minimized.
117 | def test_different_scorer():
118 |     X, y = make_classification(n_samples=100, random_state=42)
119 |     X_val, y_val = make_classification(n_samples=25, random_state=123)
120 |     pool = AdaBoostClassifier(n_estimators=10).fit(X, y)
121 |     performances = []
122 |     for clf in pool:
123 |         preds = clf.predict_proba(X_val)
124 |         performances.append(log_loss(y_val.ravel(), preds[:, -1]))
125 |     id_best = np.argsort(performances)
126 |     ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss')
127 |     ss.fit(X_val, y_val)
128 |     assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all()
129 | 


--------------------------------------------------------------------------------
/deslib/tests/test_des_integration_multiclass.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.datasets import make_classification
 3 | from sklearn.ensemble import AdaBoostClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.preprocessing import StandardScaler
 6 | 
 7 | # DCS techniques
 8 | from deslib.dcs.a_priori import APriori
 9 | from deslib.dcs.mcb import MCB
10 | # DES techniques
11 | from deslib.des.des_mi import DESMI
12 | from deslib.des.des_p import DESP
13 | from deslib.des.knop import KNOP
14 | from deslib.des.meta_des import METADES
15 | 
16 | 
17 | def setup_classifiers():
18 |     rng = np.random.RandomState(123456)
19 | 
20 |     X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(rng)
21 |     # Train a pool of 100 classifiers
22 |     pool_classifiers = AdaBoostClassifier(random_state=rng)
23 |     pool_classifiers.fit(X_train, y_train)
24 |     return pool_classifiers, X_dsel, y_dsel, X_test, y_test
25 | 
26 | 
27 | def load_dataset(rng):
28 |     # Generate a classification dataset
29 |     weights = [0.1, 0.2, 0.7]
30 |     X, y = make_classification(n_classes=3, n_samples=2000, n_informative=3,
31 |                                random_state=rng, weights=weights)
32 | 
33 |     # split the data into training and test data
34 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
35 |                                                         random_state=rng)
36 |     # Scale the variables to have 0 mean and unit variance
37 |     scalar = StandardScaler()
38 |     X_train = scalar.fit_transform(X_train)
39 |     X_test = scalar.transform(X_test)
40 |     # Split the data into training and DSEL for DS techniques
41 |     X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
42 |                                                         test_size=0.5,
43 |                                                         random_state=rng)
44 |     # Considering a pool composed of 10 base classifiers
45 |     # Calibrating Perceptrons to estimate probabilities
46 |     return X_dsel, X_test, X_train, y_dsel, y_test, y_train
47 | 
48 | 
49 | def test_desp():
50 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
51 | 
52 |     desp = DESP(pool_classifiers)
53 |     desp.fit(X_dsel, y_dsel)
54 |     assert np.isclose(desp.score(X_test, y_test), 0.6954545454545454)
55 | 
56 | 
57 | def test_mcb():
58 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
59 |     rng = np.random.RandomState(123456)
60 | 
61 |     mcb = MCB(pool_classifiers, random_state=rng)
62 |     mcb.fit(X_dsel, y_dsel)
63 |     assert np.isclose(mcb.score(X_test, y_test), 0.7196969696969697)
64 | 
65 | 
66 | def test_apriori():
67 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
68 |     rng = np.random.RandomState(123456)
69 | 
70 |     apriori = APriori(pool_classifiers, random_state=rng)
71 |     apriori.fit(X_dsel, y_dsel)
72 |     assert np.isclose(apriori.score(X_test, y_test), 0.6878787878787879)
73 | 
74 | 
75 | def test_meta():
76 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
77 | 
78 |     meta_des = METADES(pool_classifiers)
79 |     meta_des.fit(X_dsel, y_dsel)
80 |     assert np.isclose(meta_des.score(X_test, y_test), 0.796969696969697)
81 | 
82 | 
83 | def test_knop():
84 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
85 | 
86 |     knop = KNOP(pool_classifiers)
87 |     knop.fit(X_dsel, y_dsel)
88 |     assert np.isclose(knop.score(X_test, y_test), 0.8106060606060606)
89 | 
90 | 
91 | def test_mi():
92 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
93 | 
94 |     desmi = DESMI(pool_classifiers, alpha=0.9)
95 |     desmi.fit(X_dsel, y_dsel)
96 |     assert np.isclose(desmi.score(X_test, y_test), 0.3500000000)
97 | 


--------------------------------------------------------------------------------
/deslib/tests/test_integration_DFP_IH.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.calibration import CalibratedClassifierCV
 3 | from sklearn.datasets import make_classification
 4 | from sklearn.ensemble import BaggingClassifier
 5 | from sklearn.linear_model import Perceptron
 6 | from sklearn.model_selection import train_test_split
 7 | from sklearn.preprocessing import StandardScaler
 8 | 
 9 | # DCS techniques
10 | from deslib.dcs.a_posteriori import APosteriori
11 | from deslib.dcs.mcb import MCB
12 | from deslib.dcs.ola import OLA
13 | from deslib.des import DESClustering
14 | # DES techniques
15 | from deslib.des.des_p import DESP
16 | from deslib.des.knora_u import KNORAU
17 | 
18 | 
19 | def setup_classifiers():
20 |     rng = np.random.RandomState(654321)
21 | 
22 |     # Generate a classification dataset
23 |     X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8],
24 |                                random_state=rng)
25 |     # split the data into training and test data
26 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
27 |                                                         random_state=rng)
28 | 
29 |     # Scale the variables to have 0 mean and unit variance
30 |     scalar = StandardScaler()
31 |     X_train = scalar.fit_transform(X_train)
32 |     X_test = scalar.transform(X_test)
33 | 
34 |     # Split the data into training and DSEL for DS techniques
35 |     X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
36 |                                                         test_size=0.5,
37 |                                                         random_state=rng)
38 |     # Considering a pool composed of 10 base classifiers
39 |     model = CalibratedClassifierCV(Perceptron(max_iter=100), cv=5)
40 | 
41 |     pool_classifiers = BaggingClassifier(model, n_estimators=100, n_jobs=-1,
42 |                                          random_state=rng)
43 |     pool_classifiers.fit(X_train, y_train)
44 |     return pool_classifiers, X_dsel, y_dsel, X_test, y_test
45 | 
46 | 
47 | def test_knorau():
48 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
49 | 
50 |     knorau = KNORAU(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1)
51 |     knorau.fit(X_dsel, y_dsel)
52 |     assert np.isclose(knorau.score(X_test, y_test), 0.9)
53 | 
54 | 
55 | def test_desp():
56 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
57 | 
58 |     desp = DESP(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1)
59 |     desp.fit(X_dsel, y_dsel)
60 |     assert np.isclose(desp.score(X_test, y_test), 0.90)
61 | 
62 | 
63 | def test_ola():
64 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
65 | 
66 |     ola = OLA(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1)
67 |     ola.fit(X_dsel, y_dsel)
68 |     assert np.isclose(ola.score(X_test, y_test), 0.9030303030303031)
69 | 
70 | 
71 | def test_mcb():
72 |     pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
73 |     rng = np.random.RandomState(123456)
74 | 
75 |     mcb = MCB(pool_classifiers, random_state=rng, DFP=True, with_IH=True,
76 |               IH_rate=0.1)
77 |     mcb.fit(X_dsel, y_dsel)
78 |     assert np.isclose(mcb.score(X_test, y_test), 0.8878787878787879)
79 | 


--------------------------------------------------------------------------------
/deslib/tests/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/util/__init__.py


--------------------------------------------------------------------------------
/deslib/tests/util/test_aggregation.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from deslib.util.aggregation import *
  4 | from ..conftest import create_base_classifier
  5 | 
  6 | 
  7 | def test_majority_voting():
  8 |     query = np.array([[1, -1], [0, 0], [3, -1]])
  9 | 
 10 |     ensemble_classifiers = [create_base_classifier(return_value=0)] * 10 + [
 11 |         create_base_classifier(return_value=1)] * 9
 12 | 
 13 |     predicted = majority_voting(ensemble_classifiers, query)
 14 |     assert predicted.all() == 0 and predicted.size == 3
 15 | 
 16 | 
 17 | def test_majority_voting_multi_class():
 18 |     query = np.array([1, -1])
 19 |     ensemble_classifiers = ([create_base_classifier(return_value=0)] * 10) + \
 20 |         [create_base_classifier(return_value=2)] * 9 + \
 21 |         [create_base_classifier(return_value=1)] * 20
 22 | 
 23 |     predicted = majority_voting(ensemble_classifiers, query)
 24 |     assert predicted.all() == 1 and predicted.size == 1
 25 | 
 26 | 
 27 | def test_weighted_majority_voting():
 28 |     query = np.array([[1, -1], [0, 0], [3, -1]])
 29 |     ensemble_classifiers = ([create_base_classifier(return_value=0)] * 10) + \
 30 |         [create_base_classifier(return_value=2)] * 9
 31 |     weights = np.array([([0.5] * 10) + ([0.8] * 9), ([0.5] * 10) + ([0.8] * 9),
 32 |                         ([0.5] * 10) + ([0.8] * 9)])
 33 |     predicted = weighted_majority_voting(ensemble_classifiers, weights, query)
 34 |     assert predicted.all() == 1 and predicted.size == 3
 35 | 
 36 | 
 37 | def test_weighted_majority_voting_single_sample():
 38 |     query = np.array([1, -1])
 39 |     clf_1 = create_base_classifier(return_value=1)
 40 |     clf_2 = create_base_classifier(return_value=1)
 41 |     clf_3 = create_base_classifier(return_value=2)
 42 |     ensemble_classifiers = [clf_2, clf_1, clf_3]
 43 |     weights = np.atleast_2d([0.2, 0.5, 1.0])
 44 |     predicted = weighted_majority_voting(ensemble_classifiers, weights, query)
 45 |     assert predicted == 2 and predicted.size == 1
 46 | 
 47 | 
 48 | def test_predict_proba(create_pool_classifiers):
 49 |     query = np.array([[1, -1]])
 50 |     ensemble_classifiers = create_pool_classifiers
 51 |     predicted_proba = predict_proba_ensemble(ensemble_classifiers, query)
 52 |     assert np.isclose(predicted_proba, [0.61, 0.39]).all()
 53 | 
 54 | 
 55 | # This experiment should raise an error since we have 3 base classifiers
 56 | # and 4 weights.
 57 | def test_wrong_weights_votes(create_pool_classifiers):
 58 |     query = np.array([[1, -1]])
 59 |     ensemble_classifiers = create_pool_classifiers
 60 |     weights = np.array([1.0, 1.0, 1.0, 1.0])
 61 |     with pytest.raises(ValueError):
 62 |         weighted_majority_voting(ensemble_classifiers, weights, query)
 63 | 
 64 | 
 65 | # -------Test routines for the ensemble combination methods-------
 66 | # These routines calculates the matrix with the supports given for
 67 | # each class for each base classifier and them Aggregates the supports
 68 | 
 69 | def test_product_combiner(create_pool_classifiers):
 70 |     query = np.array([[1, -1]])
 71 |     ensemble_classifiers = create_pool_classifiers
 72 |     expected = 0
 73 |     result = product_combiner(ensemble_classifiers, query)
 74 |     assert np.allclose(expected, result)
 75 | 
 76 | 
 77 | def test_average_combiner(create_pool_classifiers):
 78 |     query = np.array([[1, -1]])
 79 |     ensemble_classifiers = create_pool_classifiers
 80 |     expected = 0
 81 |     result = average_combiner(ensemble_classifiers, query)
 82 |     assert result == expected
 83 | 
 84 | 
 85 | def test_minimum_combiner(create_pool_classifiers):
 86 |     query = np.array([[1, -1]])
 87 |     ensemble_classifiers = create_pool_classifiers
 88 |     expected = 0
 89 |     result = minimum_combiner(ensemble_classifiers, query)
 90 |     assert np.allclose(expected, result)
 91 | 
 92 | 
 93 | def test_maximum_combiner(create_pool_classifiers):
 94 |     query = np.array([[1, -1]])
 95 |     ensemble_classifiers = create_pool_classifiers
 96 |     expected = 0
 97 |     result = maximum_combiner(ensemble_classifiers, query)
 98 |     assert np.allclose(expected, result)
 99 | 
100 | 
101 | def test_median_combiner(create_pool_classifiers):
102 |     query = np.array([[1, -1]])
103 |     ensemble_classifiers = create_pool_classifiers
104 |     expected = 0
105 |     result = median_combiner(ensemble_classifiers, query)
106 |     assert np.allclose(expected, result)
107 | 
108 | 
109 | def test_check_predictions():
110 |     predictions = example_kuncheva
111 |     with pytest.raises(ValueError):
112 |         average_rule(predictions)
113 | 
114 | 
115 | # -------Test routines for the fusion rules receiving prediction directly------
116 | # These receives the matrix with the supports given for each class and
117 | # returns the class labels (max score)
118 | 
119 | # Test example taken from Kuncheva's book: Combining pattern classifiers
120 | 
121 | 
122 | example_kuncheva = np.array(
123 |     [[0.1, 0.5, 0.4], [0.0, 0.0, 1.0], [0.4, 0.3, 0.4], [0.2, 0.7, 0.1],
124 |      [0.1, 0.8, 0.2]])
125 | example_kuncheva_batch = np.expand_dims(example_kuncheva, axis=0)
126 | example_kuncheva_batch = np.repeat(example_kuncheva_batch, 10, axis=0)
127 | 
128 | 
129 | def test_product_rule():
130 |     expected = 2
131 |     result = product_rule(example_kuncheva_batch)
132 |     assert np.allclose(expected, result)
133 | 
134 | 
135 | def test_average_rule():
136 |     expected = 1
137 |     result = average_rule(example_kuncheva_batch)
138 |     assert np.allclose(expected, result)
139 | 
140 | 
141 | def test_minimum_rule():
142 |     expected = 2
143 |     result = minimum_rule(example_kuncheva_batch)
144 |     assert np.allclose(expected, result)
145 | 
146 | 
147 | def test_maximum_rule():
148 |     expected = 2
149 |     result = maximum_rule(example_kuncheva_batch)
150 |     assert np.allclose(expected, result)
151 | 
152 | 
153 | def test_median_rule():
154 |     expected = 1
155 |     result = median_rule(example_kuncheva_batch)
156 |     assert np.allclose(expected, result)
157 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_datasets.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from deslib.util.datasets import make_P2
 4 | from deslib.util.datasets import make_banana
 5 | from deslib.util.datasets import make_banana2
 6 | from deslib.util.datasets import make_circle_square
 7 | from deslib.util.datasets import make_xor
 8 | 
 9 | 
10 | def setup_class_sizes():
11 | 
12 |     size_class0 = np.random.randint(1, 1000)
13 |     size_class1 = np.random.randint(1, 1000)
14 |     return size_class0, size_class1
15 | 
16 | 
17 | def test_P2_class_distribution():
18 | 
19 |     s0, s1 = setup_class_sizes()
20 |     _, y = make_P2(size_classes=[s0, s1])
21 |     assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1
22 | 
23 | 
24 | def test_banana2_class_distribution():
25 |     s0, s1 = setup_class_sizes()
26 |     _, y = make_banana2(size_classes=[s0, s1])
27 |     assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1
28 | 
29 | 
30 | def test_banana_class_distribution():
31 |     s0, s1 = setup_class_sizes()
32 |     _, y = make_banana(size_classes=[s0, s1])
33 |     assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1
34 | 
35 | 
36 | def test_circle_square_class_distribution():
37 |     s0, s1 = setup_class_sizes()
38 |     _, y = make_circle_square(size_classes=[s0, s1])
39 |     assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1
40 | 
41 | 
42 | def test_xor_size():
43 |     n_samples = np.random.randint(100, 2000)
44 |     X, y = make_xor(n_samples)
45 |     assert y.size == n_samples
46 | 
47 | 
48 | def test_xor():
49 |     n_samples = np.random.randint(100, 2000)
50 |     X, y = make_xor(n_samples)
51 |     X_0, X_1 = X[y == 0], X[y == 1]
52 |     for x in X_0:
53 |         assert np.all(x[0] < 0.5 and x[1] < 0.5) or (x[0] > 0.5 and x[1] > 0.5)
54 | 
55 | 
56 | def test_banana_n_higher_than_one():
57 |     s0, s1 = setup_class_sizes()
58 |     na = np.random.rand() + 1
59 |     with pytest.raises(ValueError):
60 |         make_banana([s0, s1], na)
61 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_diversity_batch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from deslib.util.diversity_batch import (_process_predictions,
  5 |                                          double_fault,
  6 |                                          Q_statistic,
  7 |                                          ratio_errors,
  8 |                                          agreement_measure,
  9 |                                          disagreement_measure,
 10 |                                          correlation_coefficient)
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def create_X_y():
 15 |     # ex1: The distribution of samples of a test example.
 16 |     X = np.array(
 17 |         [
 18 |             [-1, 1],
 19 |             [-0.75, 0.5],
 20 |             [-1.5, 1.5],
 21 |             [1, 1],
 22 |             [0.75, 0.5],
 23 |             [1.5, 1.5],
 24 |             [1, -1],
 25 |             [-0.5, 0.5],
 26 |             [0.5, 0.5],
 27 |             [0, -1],
 28 |             [0.75, -0.5],
 29 |             [0.0, 0.0],
 30 |             [-1, -1],
 31 |             [0, -0.5],
 32 |             [1, -1],
 33 |         ]
 34 |     )
 35 |     # Labels associated with the samples. This information is used
 36 |     # by techniques based on a posteriori information.
 37 |     y = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0])
 38 |     return X, y
 39 | 
 40 | 
 41 | @pytest.fixture
 42 | def example_diversity(create_X_y):
 43 |     y_pred_classifier1 = np.array([0, 0, 0, 1, 0, 1, 0, 0, 0, 0])
 44 |     y_pred_classifier2 = np.tile(np.array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1]),
 45 |                                  (5, 1))
 46 | 
 47 |     y_real = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 1])
 48 | 
 49 |     y_ex1 = create_X_y[1]
 50 |     return y_pred_classifier1, y_pred_classifier2, y_real, y_ex1
 51 | 
 52 | 
 53 | @pytest.fixture
 54 | def example_diversity_ones_zeros(create_X_y):
 55 |     y = create_X_y[1]
 56 |     y_pred_ones = np.ones(15)
 57 |     y_pred_zeros = np.zeros((5, 15))
 58 |     return y, y_pred_ones, y_pred_zeros
 59 | 
 60 | 
 61 | def test_process_predictions_ones_zeros(example_diversity_ones_zeros):
 62 |     y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros
 63 |     N00, N10, N01, N11 = _process_predictions(y, y_pred_ones, y_pred_zeros)
 64 |     assert (
 65 |             (N00 == np.full((5,), 0.0)).all() and
 66 |             (N11 == np.full((5,), 0.0)).all() and
 67 |             (N01 == np.full((5,), 9.0 / 15.0)).all() and
 68 |             (N10 == np.full((5,), 6.0 / 15.0)).all()
 69 |     )
 70 | 
 71 | 
 72 | def test_double_fault_ones_zeros(example_diversity_ones_zeros):
 73 |     y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros
 74 |     df = double_fault(y, y_pred_ones, y_pred_zeros)
 75 |     assert (df == np.full((5,), 0)).all()
 76 | 
 77 | 
 78 | def test_double_fault():
 79 |     labels = np.array([0, 0, 0, 0, 1, 1, 1])
 80 |     pred1 = np.array([1, 0, 1, 0, 0, 0, 0])
 81 |     pred2 = np.tile(np.array([1, 0, 0, 0, 1, 0, 0]), (5, 1))
 82 | 
 83 |     actual = double_fault(labels, pred1, pred2)
 84 | 
 85 |     assert (
 86 |             actual == np.full((5,), 3.0 / 7)
 87 |     ).all()  # three common errors out of 7 predictions
 88 | 
 89 | 
 90 | def test_q_statistic_ones_zeros(example_diversity_ones_zeros):
 91 |     y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros
 92 |     Q = Q_statistic(y, y_pred_ones, y_pred_zeros)
 93 |     assert (Q == np.full((5,), -1.0)).all()
 94 | 
 95 | 
 96 | def test_ratio_errors_diff_classifiers(example_diversity):
 97 |     y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity
 98 |     ratio = ratio_errors(y_real, y_pred_classifier1, y_pred_classifier2)
 99 |     assert np.isclose(ratio, 1.66, atol=0.01).all()
100 | 
101 | 
102 | def test_agreement(example_diversity):
103 |     y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity
104 |     agreement = agreement_measure(y_real,
105 |                                   y_pred_classifier1,
106 |                                   y_pred_classifier2)
107 |     assert np.isclose(agreement, 0.5).all()
108 | 
109 | 
110 | def test_disagreement(example_diversity):
111 |     y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity
112 |     disagreement = disagreement_measure(y_real,
113 |                                         y_pred_classifier1,
114 |                                         y_pred_classifier2)
115 |     assert np.isclose(disagreement, 0.5).all()
116 | 
117 | 
118 | def test_coefficient_correlation(example_diversity):
119 |     y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity
120 |     coefficient = correlation_coefficient(
121 |         y_real, y_pred_classifier1, y_pred_classifier2
122 |     )
123 |     assert np.isclose(coefficient, 0.0).all()
124 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_faiss.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from deslib.tests.test_des_integration import load_dataset
 5 | from deslib.util import faiss_knn_wrapper
 6 | 
 7 | 
 8 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(),
 9 |                     reason="requires the faiss library")
10 | def test_faiss_predict():
11 |     rng = np.random.RandomState(123456)
12 |     _, X_test, X_train, _, _, y_train = load_dataset(None, rng)
13 |     k = 7
14 |     X_train = X_train.astype(np.float32)
15 |     X_test = X_test.astype(np.float32)
16 |     f_knn_test = faiss_knn_wrapper.FaissKNNClassifier(n_neighbors=k)
17 |     f_knn_test.fit(X_train, y_train)
18 |     f_knn_preds = f_knn_test.predict(X_test)
19 | 
20 |     knn_test = KNeighborsClassifier(n_neighbors=k)
21 |     knn_test.fit(X_train, y_train)
22 |     knn_preds = knn_test.predict(X_test)
23 | 
24 |     assert ((f_knn_preds - knn_preds).sum() == 0)
25 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_fire.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from deslib.util.dfp import frienemy_pruning
 4 | from deslib.util.dfp import frienemy_pruning_preprocessed
 5 | from ..conftest import create_base_classifier
 6 | 
 7 | 
 8 | # Since no classifier crosses the region of competence,
 9 | # all of them must be selected
10 | def test_frienemy_no_classifier_crosses(example_estimate_competence):
11 |     _, y, neighbors = example_estimate_competence[0:3]
12 |     n_classifiers = 3
13 |     predictions = np.zeros((y.size, n_classifiers))
14 |     mask = frienemy_pruning_preprocessed(neighbors, y, predictions)
15 |     assert mask.all()
16 | 
17 | 
18 | # In this example, all base classifier should be considered crossing the
19 | # region of competence since they always predicts the correct label for
20 | # the samples in DSEL.
21 | def test_frienemy_all_classifiers_crosses(example_all_ones):
22 |     X, y, neighbors, _, dsel_processed, _ = example_all_ones
23 |     result = frienemy_pruning_preprocessed(neighbors, y, dsel_processed)
24 |     assert result.all()
25 | 
26 | 
27 | # Check if the batch processing is working by passing multiple
28 | # samples at the same time.
29 | def test_frienemy_not_all_classifiers_crosses(example_estimate_competence):
30 |     expected = np.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
31 |     _, y, neighbors, _, dsel_processed, _ = example_estimate_competence
32 | 
33 |     # passing three samples to compute the DFP at the same time
34 |     result = frienemy_pruning_preprocessed(neighbors[:, :3], y, dsel_processed)
35 |     assert np.array_equal(result, expected)
36 | 
37 | 
38 | # Test the case where the sample is located in a safe region
39 | # (i.e., all neighbors comes from the same class)
40 | def test_frienemy_safe_region(example_estimate_competence):
41 |     X, y, _, _, dsel_processed, _ = example_estimate_competence
42 |     neighbors = np.tile(np.array([0, 1, 2, 6, 7, 8, 14]), (10, 1))
43 | 
44 |     result = frienemy_pruning_preprocessed(neighbors, y, dsel_processed)
45 |     assert result.all()
46 | 
47 | 
48 | def test_frienemy_not_processed():
49 |     X = np.random.rand(5, 2)
50 |     y = np.array([0, 0, 0, 1, 1])
51 |     X_query = np.random.rand(1, 2)
52 |     clf1 = create_base_classifier(return_value=[0, 1, 0, 0, 1])
53 |     clf2 = create_base_classifier(return_value=[1, 1, 1, 1, 1])
54 |     clf3 = create_base_classifier(return_value=[0, 0, 0, 0, 0])
55 |     clf4 = create_base_classifier(return_value=[0, 0, 0, 1, 1])
56 |     pool = [clf1, clf2, clf3, clf4]
57 |     dfp_mask = frienemy_pruning(X_query, X, y, pool, 5)
58 |     assert np.array_equal(dfp_mask, np.array([[1, 0, 0, 1]]))
59 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_instance_hardness.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from deslib.util.instance_hardness import hardness_region_competence
 5 | from deslib.util.instance_hardness import kdn_score
 6 | 
 7 | 
 8 | # ------Test routines for the Instance Hardness calculation------------------
 9 | @pytest.mark.parametrize('index, expected', [(0, 0.42),
10 |                                              (1, 0.28),
11 |                                              (2, 0.28)])
12 | def test_instance_hardness_region(index,
13 |                                   expected,
14 |                                   example_estimate_competence):
15 |     y, neighbors = example_estimate_competence[1:3]
16 |     k = 7
17 |     neighbors = neighbors[index, :]
18 |     IH = hardness_region_competence(neighbors, y, k)
19 |     assert np.isclose(IH, expected, atol=0.01)
20 | 
21 | 
22 | def test_instance_hardness_region_batch(example_estimate_competence):
23 |     expected = np.array([0.42, 0.28, 0.28])
24 |     y, neighbors = example_estimate_competence[1:3]
25 | 
26 |     k = 7
27 |     IH = hardness_region_competence(neighbors, y, k)
28 |     assert np.allclose(IH, expected, atol=0.01)
29 | 
30 | 
31 | def test_instance_hardness_region_all_same(example_estimate_competence):
32 |     y = example_estimate_competence[1]
33 |     k = 7
34 |     neighbors = np.array([0, 1, 2, 6, 7, 8, 13])
35 |     IH = hardness_region_competence(neighbors, y, k)
36 |     assert IH == 0.0
37 | 
38 | 
39 | def test_kdn_score(example_estimate_competence):
40 |     X, y, neigh, dist, _, _ = example_estimate_competence
41 |     X, y = X[0:6, :], y[0:6]
42 |     score, _ = kdn_score(X, y, 3)
43 |     assert np.allclose(score, 0.3333333)
44 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_knne.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from deslib.util import faiss_knn_wrapper
 4 | from deslib.util import KNNE
 5 | 
 6 | 
 7 | def setup_test(n_neighbors, knn_classifier='sklearn'):
 8 |     X = np.tile(np.arange(15).reshape(-1, 1), 3)
 9 |     y = np.array(5 * [0] + 5 * [1] + 5 * [2])
10 |     knne = KNNE(n_neighbors=n_neighbors, knn_classifier=knn_classifier)
11 |     knne.fit(X, y)
12 |     return X, y, knne
13 | 
14 | 
15 | def test_equal_classes():
16 |     X, y, knne = setup_test(n_neighbors=6)
17 |     dist, inds = knne.kneighbors()
18 |     classes = y[inds]
19 |     b = np.apply_along_axis(np.bincount, 1, classes)
20 |     assert np.equal(b, 2).all()
21 | 
22 | 
23 | def test_uneven_k():
24 |     X, y, knne = setup_test(n_neighbors=7)
25 |     test = X[0, :].reshape(1, -1)
26 |     dist, inds = knne.kneighbors(test, 7)
27 |     assert np.allclose([0, 1, 5, 6, 10, 11, 2], inds)
28 | 
29 | 
30 | def test_predict():
31 |     X, y, knne = setup_test(n_neighbors=6)
32 |     prediction = knne.predict(X)
33 |     assert np.equal(prediction, y).all()
34 | 
35 | 
36 | def test_predict_proba():
37 |     X, y, knne = setup_test(n_neighbors=6)
38 |     probas = knne.predict_proba(X)
39 |     assert np.all(probas[0:5, 0] > probas[0:5, 1])
40 |     assert np.all(probas[5:-1, 1] > probas[5:-1, 0])
41 | 
42 | 
43 | def test_labels_not_encoded():
44 |     X = np.tile(np.arange(10).reshape(-1, 1), 3)
45 |     y = np.array(5 * ['cat'] + 5 * ['dog'])
46 |     knne = KNNE(n_neighbors=6)
47 |     knne.fit(X, y)
48 |     dist, inds = knne.kneighbors()
49 |     classes = y[inds]
50 |     a = np.sum(classes == 'dog')
51 |     b = np.sum(classes == 'cat')
52 |     assert np.equal(b, a).all() and a == 30
53 | 
54 | 
55 | def test_n_neighbors_none():
56 |     X = np.tile(np.arange(10).reshape(-1, 1), 3)
57 |     y = np.array(5 * ['cat'] + 5 * ['dog'])
58 |     knne = KNNE(n_neighbors=None)
59 |     with pytest.raises(ValueError):
60 |         knne.fit(X, y)
61 | 
62 | 
63 | def test_return_indices_only():
64 |     X, y, knne = setup_test(n_neighbors=6)
65 |     inds = knne.kneighbors(X, return_distance=False)
66 |     assert inds.shape == (15, 6)
67 | 
68 | 
69 | def test_n_neighbors_less_n_classes():
70 |     with pytest.raises(ValueError):
71 |         setup_test(n_neighbors=2)
72 | 
73 | 
74 | def test_n_neighbors_not_integer():
75 |     with pytest.raises(TypeError):
76 |         setup_test(n_neighbors=5.5)
77 | 
78 | 
79 | def test_n_neighbors_not_multiple_raise_warning():
80 |     with pytest.warns(Warning):
81 |         setup_test(n_neighbors=7)
82 | 
83 | 
84 | # ------Tests using KNNE using faiss for similarity search------------------
85 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(),
86 |                     reason="requires the faiss library")
87 | def test_faiss_knne():
88 |     X, y, knne = setup_test(n_neighbors=6, knn_classifier='faiss')
89 |     y_pred = knne.predict(X)
90 |     assert np.allclose(y, y_pred)
91 | 
92 | 
93 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(),
94 |                     reason="requires the faiss library")
95 | def test_faiss_knne_inds():
96 |     X, y, knne = setup_test(n_neighbors=6, knn_classifier='faiss')
97 |     inds = knne.kneighbors(X, return_distance=False)
98 |     assert inds.shape == (15, 6)
99 | 


--------------------------------------------------------------------------------
/deslib/tests/util/test_prob_functions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from deslib.util.prob_functions import (ccprmod,
  5 |                                         log_func,
  6 |                                         min_difference,
  7 |                                         softmax,
  8 |                                         exponential_func,
  9 |                                         entropy_func)
 10 | 
 11 | 
 12 | # Example from the original paper "A probabilistic model of classifier
 13 | # competence for dynamic ensemble selection"
 14 | @pytest.mark.parametrize("supports, idx_correct_label, expected",
 15 |                          [([[0.3, 0.6, 0.1], [1.0 / 3, 1.0 / 3, 1.0 / 3]],
 16 |                            [1, 0], [0.784953394056843, 0.332872292262951]),
 17 |                           ([[0.5, 0.2, 0.3], [0.5, 0.2, 0.3]], [0, 1],
 18 |                            [0.6428, 0.1194])])
 19 | def test_ccprmod_return_value(supports, idx_correct_label, expected):
 20 |     value = ccprmod(supports, idx_correct_label)
 21 |     assert np.isclose(value, expected, atol=0.001).all()
 22 | 
 23 | 
 24 | @pytest.mark.parametrize('B', [0, -1, None, 0.55])
 25 | def test_valid_ccprmod_beta(B):
 26 |     supports = [0.3, 0.6, 0.1]
 27 |     idx_correct_label = [1]
 28 | 
 29 |     with pytest.raises((ValueError, TypeError)):
 30 |         ccprmod(supports, idx_correct_label, B)
 31 | 
 32 | 
 33 | def test_ccprmod_zero_support():
 34 |     supports = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.8, 0.2]]
 35 |     idx_correct_label = [0, 2, 0]
 36 |     assert np.isclose(ccprmod(supports, idx_correct_label), 0, atol=0.01).all()
 37 | 
 38 | 
 39 | def test_ccprmod_one_support():
 40 |     supports = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0]]
 41 |     idx_correct_label = [2, 1]
 42 |     assert np.isclose(ccprmod(supports, idx_correct_label), 1, atol=0.01).all()
 43 | 
 44 | 
 45 | def test_softmax_sum_to_one():
 46 |     test = np.random.rand(10)
 47 |     assert np.allclose(np.sum(softmax(test)), 1.0, atol=0.001)
 48 | 
 49 | 
 50 | @pytest.mark.parametrize('vector, expected',
 51 |                          [([0, 1, -0.5, 0.5],
 52 |                            [0.1674, 0.4551, 0.1015, 0.2760]),
 53 |                           ([3.20, 5, 7.856, 9.65],
 54 |                            [0.0013, 0.0081, 0.1412, 0.8493])])
 55 | def test_softmax(vector, expected):
 56 |     assert np.isclose(softmax(vector), expected, atol=0.001).all()
 57 | 
 58 | 
 59 | @pytest.mark.parametrize('supports_correct, expected',
 60 |                          [(np.array([0.5]), 0),
 61 |                           (np.array([0.0]), -1.0),
 62 |                           (np.array([1.0]), 1.0)])
 63 | def test_log_func_two_classes(supports_correct, expected):
 64 |     n_classes = 2
 65 |     result = log_func(n_classes, supports_correct)
 66 |     assert np.isclose(result, expected)
 67 | 
 68 | 
 69 | @pytest.mark.parametrize('supports_correct, expected',
 70 |                          [(np.array([0.33]), 0),
 71 |                           (np.array([0.0]), -1.0),
 72 |                           (np.array([1.0]), 1.0)])
 73 | def test_log_func_multi_class(supports_correct, expected):
 74 |     n_classes = 3
 75 |     result = log_func(n_classes, supports_correct)
 76 |     assert np.isclose(result, expected, atol=0.01)
 77 | 
 78 | 
 79 | # Example from the paper "A probabilistic model of classifier competence for
 80 | # dynamic ensemble selection"
 81 | @pytest.mark.parametrize("supports, idx_correct_label, expected",
 82 |                          [(np.array([[0.3, 0.6, 0.1], [0.33, 0.33, 0.33]]),
 83 |                           [1, 0], [0.3, 0.0]),
 84 |                           (np.array([[0.5, 0.2, 0.3], [0.5, 0.2, 0.3]]),
 85 |                           [0, 1], [0.2, -0.3])])
 86 | def test_min_difference(supports, idx_correct_label, expected):
 87 |     result = min_difference(supports, idx_correct_label)
 88 |     assert np.isclose(result, expected, atol=0.01).all()
 89 | 
 90 | 
 91 | @pytest.mark.parametrize('supports_correct, expected',
 92 |                          [(np.array([0.33]), -0.01),
 93 |                           (np.array([0.0]), -1.0),
 94 |                           (np.array([1.0]), 1.0)])
 95 | def test_exponential_func_multi_class(supports_correct, expected):
 96 |     n_classes = 3
 97 |     result = exponential_func(n_classes, supports_correct)
 98 |     assert np.isclose(result, expected, atol=0.01).all()
 99 | 
100 | 
101 | def test_exponential_func_multi_class_batch():
102 |     supports_correct = np.array([0.33, 0.0, 1.0])
103 |     expected = [-0.01, -1.0, 1.0]
104 |     n_classes = 3
105 |     result = exponential_func(n_classes, supports_correct)
106 |     assert np.allclose(result, expected, atol=0.01)
107 | 
108 | 
109 | @pytest.mark.parametrize('supports_correct, expected',
110 |                          [(np.array([0.5]), 0.00),
111 |                           (np.array([0.0]), -1.0),
112 |                           (np.array([1.0]), 1.0)])
113 | def test_exponential_func_two_class(supports_correct, expected):
114 |     n_classes = 2
115 |     result = exponential_func(n_classes, supports_correct)
116 |     assert np.isclose(result, expected, atol=0.01).all()
117 | 
118 | 
119 | def test_exponential_func():
120 |     n_classes = 2
121 |     result1 = exponential_func(n_classes, np.array([0.2]))
122 |     assert result1 < 0.0
123 | 
124 |     result2 = exponential_func(n_classes, np.array([0.8]))
125 |     assert result2 > 0.0
126 | 
127 |     assert result2 > result1
128 | 
129 |     result3 = exponential_func(n_classes, np.array([1.0]))
130 |     result4 = exponential_func(n_classes, np.array([0.9]))
131 |     assert result3 > result4 > result2 > result1
132 | 
133 | 
134 | def test_entropy_func_three_classes():
135 |     n_classes = 3
136 |     supports = np.array([[0.33, 0.33, 0.33], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]])
137 |     is_correct = np.array([0, 1, 0])
138 |     expected = [0.0, 1.0, -1.0]
139 |     result = entropy_func(n_classes, supports, is_correct)
140 |     assert np.isclose(result, expected, atol=0.01).all()
141 | 
142 | 
143 | def test_entropy_func_parameter_shape():
144 |     with pytest.raises(ValueError):
145 |         entropy_func(2, np.array([0, 1]), np.array([0]))
146 | 


--------------------------------------------------------------------------------
/deslib/util/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`deslib.util` This module includes various utilities. They are divided
 3 | into four parts:
 4 | 
 5 | deslib.util.aggregation - Implementation of aggregation functions such as
 6 | majority voting and averaging. Such functions can be applied to any list of
 7 | classifiers.
 8 | 
 9 | deslib.util.diversity - Implementation of different measures of diversity
10 | between classifiers.
11 | 
12 | deslib.util.prob_functions - Functions to estimate the competence of a base
13 | classifier based on the
14 | probability estimates.
15 | 
16 | deslib.util.instance_hardness - Functions to measure the hardness level of a
17 | given instance
18 | 
19 | deslib.util.faiss_knn_wrapper - Wrapper for Facebook AI fast similarity search
20 | on GPU
21 | 
22 | deslib.util.datasets - Provides methods to generate synthetic data.
23 | 
24 | deslib.util.knne - Implementation of the K-Nearest Neighbors Equality
25 | technique
26 | """
27 | 
28 | from .aggregation import *
29 | from .diversity import *
30 | from .instance_hardness import *
31 | from .prob_functions import *
32 | from .datasets import *
33 | from .knne import KNNE
34 | from .faiss_knn_wrapper import FaissKNNClassifier
35 | 


--------------------------------------------------------------------------------
/deslib/util/dfp.py:
--------------------------------------------------------------------------------
  1 | """Implementation of the Dynamic Frienemy Pruning (DFP) algorithm for online
  2 | pruning of base classifiers.
  3 | 
  4 | References
  5 | ----------
  6 | Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., Online Pruning
  7 | of Base Classifiers for Dynamic Ensemble Selection,
  8 | Pattern Recognition, vol. 72, December 2017, pp 44-58.
  9 | 
 10 | Cruz, Rafael MO, Dayvid VR Oliveira, George DC Cavalcanti, and Robert Sabourin.
 11 | "FIRE-DES++: Enhanced online pruning of base classifiers for dynamic ensemble
 12 | selection." Pattern Recognition 85 (2019): 149-160.
 13 | """
 14 | 
 15 | # coding=utf-8
 16 | 
 17 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
 18 | #
 19 | # License: BSD 3 clause
 20 | 
 21 | 
 22 | import numpy as np
 23 | from sklearn.neighbors import KNeighborsClassifier
 24 | 
 25 | 
 26 | def frienemy_pruning(X_query, X_dsel, y_dsel, ensemble, k):
 27 |     """Implements the Online Pruning method (frienemy) which prunes base
 28 |     classifiers that do not cross the region of competence of a given instance.
 29 |     A classifier crosses the region of competence if it correctly
 30 |     classify at least one sample for each different class in the region.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     X_query : array-like of shape (n_samples, n_features)
 35 |         Test set.
 36 |     X_dsel : array-like of shape (n_samples, n_features)
 37 |         Dynamic selection set.
 38 |     y_dsel : array-like of shape (n_samples,)
 39 |         The target values (Dynamic selection set).
 40 |     ensemble : list of shape = [n_classifiers]
 41 |         The ensemble of classifiers to be pruned.
 42 |     k : int
 43 |         Number of neighbors used to compute the regions of competence.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     DFP_mask : array-like of shape = [n_samples, n_classifiers]
 48 |                Mask containing 1 for the selected base classifier and 0
 49 |                otherwise.
 50 | 
 51 |     """
 52 |     predictions = np.zeros((X_dsel.shape[0], len(ensemble)),
 53 |                            dtype=np.intp)
 54 |     for index, clf in enumerate(ensemble):
 55 |         predictions[:, index] = clf.predict(X_dsel)
 56 |     hit_miss = predictions == y_dsel[:, np.newaxis]
 57 |     competence_region = KNeighborsClassifier(n_neighbors=k).fit(X_dsel, y_dsel)
 58 |     neighbors = competence_region.kneighbors(X_query, return_distance=False)
 59 |     return frienemy_pruning_preprocessed(neighbors, y_dsel, hit_miss)
 60 | 
 61 | 
 62 | def frienemy_pruning_preprocessed(neighbors, y_val, hit_miss):
 63 |     """Implements the Online Pruning method (frienemy) which prunes base
 64 |     classifiers that do not cross the region of competence of a given instance.
 65 |     A classifier crosses the region of competence if it correctly
 66 |     classify at least one sample for each different class in the region.
 67 | 
 68 |     Notes
 69 |     -----
 70 |     This implementation assumes the regions of competence of each query example
 71 |     (neighbors) and the predictions for the dynamic selection data (hit_miss)
 72 |     were already pre-computed.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     neighbors : array-like of shape (n_samples, n_neighbors)
 77 |         Indices of the k nearest neighbors.
 78 |     y_val : array-like of shape (n_samples,)
 79 |         The target values (class labels).
 80 |     hit_miss : array-like of shape (n_samples, n_classifiers)
 81 |         Matrix containing 1 when the base classifier made the correct
 82 |         prediction, 0 otherwise.
 83 | 
 84 |     Returns
 85 |     -------
 86 |     DFP_mask : array-like of shape = [n_samples, n_classifiers]
 87 |                Mask containing 1 for the selected base classifier and 0
 88 |                otherwise.
 89 |     """
 90 |     if neighbors.ndim < 2:
 91 |         neighbors = neighbors.reshape(1, -1)
 92 | 
 93 |     n_samples = neighbors.shape[0]
 94 |     n_classifiers = hit_miss.shape[1]
 95 |     dfp_mask = np.zeros((n_samples, n_classifiers))
 96 | 
 97 |     # TODO: vectorize this code?
 98 |     for sample_idx in range(n_samples):
 99 |         curr_neighbors = neighbors[sample_idx]
100 |         neighbors_y = y_val[curr_neighbors]
101 |         if len(set(neighbors_y)) > 1:
102 |             # Indecision region. Check if the base classifier predict the
103 |             # correct label for a sample belonging to each class.
104 |             for clf_index in range(n_classifiers):
105 |                 [mask] = np.where(hit_miss[curr_neighbors, clf_index])
106 |                 if len(set(neighbors_y[mask])) > 1:
107 |                     dfp_mask[sample_idx, clf_index] = 1.0
108 |         else:
109 |             # Safe region.
110 |             dfp_mask[sample_idx, :] = 1.0
111 |     # rows that all classifiers were pruned are set to 1.0
112 |     dfp_mask[np.all(dfp_mask == 0, axis=1)] = 1.0
113 |     return dfp_mask
114 | 


--------------------------------------------------------------------------------
/deslib/util/instance_hardness.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  4 | #
  5 | # License: BSD 3 clause
  6 | 
  7 | import numpy as np
  8 | from scipy.stats import mode
  9 | from sklearn.neighbors import NearestNeighbors
 10 | 
 11 | """
 12 | This file contains the implementation of different functions to measure
 13 | instance hardness. Instance hardness can be defined as the likelihood that a
 14 | given sample will be misclassified by different learning algorithms.
 15 | 
 16 | References
 17 | ----------
 18 | Smith, M.R., Martinez, T. and Giraud-Carrier, C., 2014. An instance level
 19 | analysis of data complexity.
 20 | Machine learning, 95(2), pp.225-256
 21 | """
 22 | 
 23 | 
 24 | def hardness_region_competence(neighbors_idx, labels, safe_k):
 25 |     """Calculate the Instance hardness of the sample based on its neighborhood.
 26 |     The sample is deemed hard to classify when there is overlap between
 27 |     different classes in the region of competence. This method does not
 28 |     takes into account the target label of the test sample
 29 | 
 30 |     This hardness measure is used to select whether use DS or use the KNN for
 31 |     the classification of a given query sample
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     neighbors_idx : array of shape = [n_samples_test, k]
 36 |         Indices of the nearest neighbors for each considered sample
 37 | 
 38 |     labels : array of shape = [n_samples_train]
 39 |         labels associated with each training sample
 40 | 
 41 |     safe_k : int
 42 |         Number of neighbors used to estimate the hardness of the corresponding
 43 |         region
 44 | 
 45 |     Returns
 46 |     -------
 47 |     hardness : array of shape = [n_samples_test]
 48 |         The Hardness level associated with each example.
 49 | 
 50 |     References
 51 |     ----------
 52 |     Smith, M.R., Martinez, T. and Giraud-Carrier, C., 2014. An instance level
 53 |     analysis of data complexity.
 54 |     Machine learning, 95(2), pp.225-256
 55 |     """
 56 |     if neighbors_idx.ndim < 2:
 57 |         neighbors_idx = np.atleast_2d(neighbors_idx)
 58 | 
 59 |     neighbors_y = labels[neighbors_idx[:, :safe_k]]
 60 |     _, num_majority_class = mode(neighbors_y, axis=1)
 61 |     hardness = ((safe_k - num_majority_class) / safe_k).reshape(-1, )
 62 | 
 63 |     return hardness
 64 | 
 65 | 
 66 | def kdn_score(X, y, k):
 67 |     """
 68 |     Calculates the K-Disagreeing Neighbors score (KDN) of each sample in the
 69 |     input dataset.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     X : array of shape (n_samples, n_features)
 74 |         The input data.
 75 | 
 76 |     y : array of shape (n_samples)
 77 |         class labels of each example in X.
 78 | 
 79 |     k : int
 80 |         Neighborhood size for calculating the KDN score.
 81 | 
 82 |     Returns
 83 |     -------
 84 | 
 85 |     score : array of shape = [n_samples,1]
 86 |         KDN score of each sample in X.
 87 | 
 88 |     neighbors : array of shape = [n_samples,k]
 89 |         Indexes of the k neighbors of each sample in X.
 90 | 
 91 | 
 92 |     References
 93 |     ----------
 94 |     M. R. Smith, T. Martinez, C. Giraud-Carrier, An instance level analysis of
 95 |     data complexity,
 96 |     Machine Learning 95 (2) (2014) 225-256.
 97 | 
 98 |     """
 99 | 
100 |     nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm='kd_tree').fit(X)
101 |     _, indices = nbrs.kneighbors(X)
102 |     neighbors = indices[:, 1:]
103 |     diff_class = np.tile(y, (k, 1)).transpose() != y[neighbors]
104 |     score = np.sum(diff_class, axis=1) / k
105 |     return score, neighbors
106 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/docs/_static/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/docs/_static/.keep


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api-reference:
 2 | 
 3 | ######################
 4 | API Reference
 5 | ######################
 6 | 
 7 | This is the full API documentation of the `DESlib`. Currently the library is divided into four modules:
 8 | 
 9 | Dynamic Classifier Selection (DCS)
10 | -----------------------------------
11 | 
12 | This module contains the implementation of techniques in which only the base
13 | classifier that attained the highest competence level is selected for the classification of the query.
14 | 
15 | .. automodule:: deslib.dcs
16 | 
17 | .. toctree::
18 |     :maxdepth: 3
19 | 
20 |     modules/dcs/a_posteriori
21 |     modules/dcs/a_priori
22 |     modules/dcs/lca
23 |     modules/dcs/mcb
24 |     modules/dcs/mla
25 |     modules/dcs/ola
26 |     modules/dcs/rank
27 | 
28 | Dynamic Ensemble Selection (DES)
29 | -----------------------------------
30 | 
31 | Dynamic ensemble selection strategies refer to techniques that select an ensemble of classifier rather than a single one.
32 | All base classifiers that attain a minimum competence level are selected to compose the ensemble of classifiers.
33 | 
34 | .. automodule:: deslib.des
35 | 
36 | .. toctree::
37 |     :maxdepth: 3
38 | 
39 |     modules/des/meta_des
40 |     modules/des/des_clustering
41 |     modules/des/des_p
42 |     modules/des/ds_knn
43 |     modules/des/knop
44 |     modules/des/knora_e
45 |     modules/des/knora_u
46 |     modules/des/desmi
47 |     modules/des/probabilistic
48 | 
49 | Static ensembles
50 | -----------------------------------
51 | 
52 | This module provides the implementation of static ensemble techniques that are usually used as a baseline for the
53 | comparison of DS methods: Single Best (SB), Static Selection (SS), Stacked classifier and Oracle.
54 | 
55 | 
56 | .. automodule:: deslib.static
57 | 
58 | .. toctree::
59 |     :maxdepth: 3
60 | 
61 |     modules/static/oracle
62 |     modules/static/single_best
63 |     modules/static/static_selection
64 |     modules/static/stacked
65 | 
66 | Utils
67 | -----------------------------------
68 | Utility functions for ensemble methods such as diversity and aggregation methods.
69 | 
70 | .. automodule:: deslib.util
71 | 
72 | .. toctree::
73 |     :maxdepth: 3
74 | 
75 |     modules/util/diversity
76 |     modules/util/aggregation
77 |     modules/util/prob_functions
78 |     modules/util/instance_hardness
79 |     modules/util/dfp
80 |     modules/util/knne
81 |     modules/util/faiss_knn_wrapper
82 |     modules/util/datasets
83 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | Welcome to DESlib documentation!
  2 | =================================================
  3 | 
  4 | DESlib is an ensemble learning library focusing the implementation of the state-of-the-art techniques for dynamic classifier
  5 | and ensemble selection.
  6 | 
  7 | DESlib is a work in progress. Contributions are welcomed through its GitHub page: https://github.com/scikit-learn-contrib/DESlib.
  8 | 
  9 | Introduction
 10 | --------------
 11 | Dynamic Selection (DS) refers to techniques in which the base classifiers are selected
 12 | on the fly, according to each new sample to be classified. Only the most competent, or an ensemble containing the most competent classifiers is selected to predict
 13 | the label of a specific test sample. The rationale for such techniques is that not every classifier in
 14 | the pool is an expert in classifying all unknown samples; rather, each base classifier is an expert in
 15 | a different local region of the feature space.
 16 | 
 17 | DS is one of the most promising MCS approaches due to the fact that
 18 | more and more works are reporting the superior performance of such techniques over static combination methods. Such techniques
 19 | have achieved better classification performance especially when dealing with small-sized and imbalanced datasets. A
 20 | comprehensive review of dynamic selection can be found in the following papers [1]_ [2]_
 21 | 
 22 | Philosophy
 23 | -----------
 24 | DESlib was developed with two objectives in mind: to make it easy to integrate Dynamic Selection algorithms to
 25 | machine learning projects, and to facilitate research on this topic, by providing implementations of the main
 26 | DES and DCS methods, as well as the commonly used baseline methods. Each algorithm implements the main methods
 27 | in the scikit-learn_ API **scikit-learn**: **fit(X, y)**, **predict(X)**, **predict_proba(X)**
 28 | and **score(X, y)**.
 29 | 
 30 | The implementation of the DS methods is modular, following a taxonomy defined in [1]_.
 31 | This taxonomy considers the main characteristics of DS methods, that are centered in three components:
 32 | 
 33 | 1. the methodology used to define the local region, in which the competence level of the base classifiers are estimated (region of competence);
 34 | 2. the source of information used to estimate the competence level of the base classifiers.
 35 | 3. the selection approach to define the best classifier (for DCS) or the best set of classifiers (for DES).
 36 | 
 37 | This modular approach makes it easy for researchers to implement new DS methods, in many cases requiring only the
 38 | implementation of the method **estimate_competence**, that is, how the local competence of the base classifier is measured.
 39 | 
 40 | `API Reference <api.html>`_
 41 | ----------------------------
 42 | 
 43 | If you are looking for information on a specific function, class or
 44 | method, this part of the documentation is for you.
 45 | 
 46 | .. toctree::
 47 |     :hidden:
 48 | 
 49 |     user_guide
 50 |     api
 51 |     auto_examples/index
 52 |     news
 53 | 
 54 | 
 55 | `Example <auto_examples/index.html>`_
 56 | ----------------------------------------
 57 | 
 58 | Here we present an example of the KNORA-E techniques using a random forest to generate the pool of classifiers:
 59 | 
 60 | .. code-block:: python
 61 | 
 62 |     from sklearn.ensemble import RandomForestClassifier
 63 |     from deslib.des.knora_e import KNORAE
 64 | 
 65 |     # Train a pool of 10 classifiers
 66 |     pool_classifiers = RandomForestClassifier(n_estimators=10)
 67 |     pool_classifiers.fit(X_train, y_train)
 68 | 
 69 |     # Initialize the DES model
 70 |     knorae = KNORAE(pool_classifiers)
 71 | 
 72 |     # Preprocess the Dynamic Selection dataset (DSEL)
 73 |     knorae.fit(X_dsel, y_dsel)
 74 | 
 75 |     # Predict new examples:
 76 |     knorae.predict(X_test)
 77 | 
 78 | The library accepts any list of classifiers (from scikit-learn) as input, including a list containing different classifier models (heterogeneous ensembles).
 79 | More examples to use the API can be found in the `examples page <auto_examples/index.html>`_.
 80 | 
 81 | 
 82 | Citation
 83 | ==================
 84 | 
 85 | If you use DESLib in a scientific paper, please consider citing the following paper:
 86 | 
 87 | Rafael M. O. Cruz, Luiz G. Hafemann, Robert Sabourin and George D. C. Cavalcanti **DESlib: A Dynamic ensemble selection library in Python.** arXiv preprint arXiv:1802.04967 (2018).
 88 | 
 89 | .. code-block:: text
 90 | 
 91 |     @article{JMLR:v21:18-144,
 92 |         author  = {Rafael M. O. Cruz and Luiz G. Hafemann and Robert Sabourin and George D. C. Cavalcanti},
 93 |         title   = {DESlib: A Dynamic ensemble selection library in Python},
 94 |         journal = {Journal of Machine Learning Research},
 95 |         year    = {2020},
 96 |         volume  = {21},
 97 |         number  = {8},
 98 |         pages   = {1-5},
 99 |         url     = {http://jmlr.org/papers/v21/18-144.html}
100 |     }
101 | 
102 | 
103 | References
104 | -----------
105 | .. [1] : R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier selection: Recent advances and perspectives,” Information Fusion, vol. 41, pp. 195 – 216, 2018.
106 | 
107 | .. [2] : A. S. Britto, R. Sabourin, L. E. S. de Oliveira, Dynamic selection of classifiers - A comprehensive review, Pattern Recognition 47 (11) (2014) 3665–3680.
108 | 
109 | .. _scikit-learn: http://scikit-learn.org/stable/
110 | 
111 | .. _GitHub: https://github.com/scikit-learn-contrib/DESlib
112 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/a_posteriori.rst:
--------------------------------------------------------------------------------
1 | A posteriori
2 | ------------
3 | 
4 | .. automodule:: deslib.dcs.a_posteriori
5 | 
6 | .. autoclass:: APosteriori
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/a_priori.rst:
--------------------------------------------------------------------------------
1 | A Priori
2 | ----------
3 | 
4 | .. automodule:: deslib.dcs.a_priori
5 | 
6 | .. autoclass:: APriori
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/lca.rst:
--------------------------------------------------------------------------------
1 | Local Class Accuracy (LCA)
2 | --------------------------
3 | 
4 | .. automodule:: deslib.dcs.lca
5 | 
6 | .. autoclass:: LCA
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/mcb.rst:
--------------------------------------------------------------------------------
1 | Multiple Classifier Behaviour (MCB)
2 | -----------------------------------
3 | 
4 | .. automodule:: deslib.dcs.mcb
5 | 
6 | .. autoclass:: MCB
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/mla.rst:
--------------------------------------------------------------------------------
1 | Modified Local Accuracy (MLA)
2 | -----------------------------
3 | 
4 | .. automodule:: deslib.dcs.mla
5 | 
6 | .. autoclass:: MLA
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/ola.rst:
--------------------------------------------------------------------------------
1 | Overall Local Accuracy (OLA)
2 | -----------------------------
3 | 
4 | .. automodule:: deslib.dcs.ola
5 | 
6 | .. autoclass:: OLA
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/dcs/rank.rst:
--------------------------------------------------------------------------------
1 | Modified Rank
2 | ---------------
3 | 
4 | .. automodule:: deslib.dcs.rank
5 | 
6 | .. autoclass:: Rank
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/modules/des/des_clustering.rst:
--------------------------------------------------------------------------------
1 | DES Clustering
2 | --------------
3 | 
4 | .. automodule:: deslib.des.des_clustering
5 | 
6 | .. autoclass:: DESClustering
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/des_p.rst:
--------------------------------------------------------------------------------
1 | Dynamic Ensemble Selection performance (DES-P)
2 | ----------------------------------------------
3 | 
4 | .. automodule:: deslib.des.des_p
5 | 
6 | .. autoclass:: DESP
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/deskl.rst:
--------------------------------------------------------------------------------
1 | DES-Kullback Leibler
2 | ---------------------
3 | 
4 | .. automodule:: deslib.des.probabilistic
5 | 
6 | .. autoclass:: DESKL
7 |     :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/desmi.rst:
--------------------------------------------------------------------------------
1 | DES Multiclass Imbalance (DES-MI)
2 | ----------------------------------
3 | 
4 | .. automodule:: deslib.des.des_mi
5 | 
6 | .. autoclass:: DESMI
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/ds_knn.rst:
--------------------------------------------------------------------------------
1 | DES-KNN
2 | --------------
3 | 
4 | .. automodule:: deslib.des.des_knn
5 | 
6 | .. autoclass:: DESKNN
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/exponential.rst:
--------------------------------------------------------------------------------
1 | DES-Exponential
2 | ----------------
3 | 
4 | .. automodule:: deslib.des.probabilistic
5 | 
6 | .. autoclass:: Exponential
7 |     :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select


--------------------------------------------------------------------------------
/docs/modules/des/knop.rst:
--------------------------------------------------------------------------------
1 | k-Nearest Output Profiles (KNOP)
2 | --------------------------------
3 | 
4 | .. automodule:: deslib.des.knop
5 | 
6 | .. autoclass:: KNOP
7 |     :members: fit, predict, predict_proba, score, estimate_competence_from_proba, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/knora_e.rst:
--------------------------------------------------------------------------------
1 | k-Nearest Oracle-Eliminate (KNORA-E)
2 | --------------------------------------
3 | 
4 | .. automodule:: deslib.des.knora_e
5 | 
6 | .. autoclass:: KNORAE
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/knora_u.rst:
--------------------------------------------------------------------------------
1 | k-Nearest Oracle Union (KNORA-U)
2 | --------------------------------
3 | 
4 | .. automodule:: deslib.des.knora_u
5 | 
6 | .. autoclass:: KNORAU
7 |     :members: fit, predict, predict_proba, score, estimate_competence, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/logarithmic.rst:
--------------------------------------------------------------------------------
1 | DES-Logarithmic
2 | ---------------
3 | 
4 | .. automodule:: deslib.des.probabilistic
5 | 
6 | .. autoclass:: Logarithmic
7 |     :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select


--------------------------------------------------------------------------------
/docs/modules/des/meta_des.rst:
--------------------------------------------------------------------------------
1 | META-DES
2 | ========
3 | 
4 | .. automodule:: deslib.des.meta_des
5 | 
6 | .. autoclass:: METADES
7 |     :members: fit, predict, predict_proba, score, estimate_competence_from_proba, select
8 | 


--------------------------------------------------------------------------------
/docs/modules/des/minimum_difference.rst:
--------------------------------------------------------------------------------
1 | DES-Minimum Difference
2 | ----------------------
3 | 
4 | .. automodule:: deslib.des.probabilistic
5 | 
6 | .. autoclass:: MinimumDifference
7 |     :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select


--------------------------------------------------------------------------------
/docs/modules/des/probabilistic.rst:
--------------------------------------------------------------------------------
 1 | Probabilistic
 2 | -------------
 3 | 
 4 | .. automodule:: deslib.des.probabilistic
 5 | 
 6 | .. autoclass:: BaseProbabilistic
 7 |     :members:
 8 | 
 9 | .. toctree::
10 |     rrc
11 |     deskl
12 |     minimum_difference
13 |     exponential
14 |     logarithmic
15 | 


--------------------------------------------------------------------------------
/docs/modules/des/rrc.rst:
--------------------------------------------------------------------------------
1 | Randomized Reference Classifier (RRC)
2 | -------------------------------------
3 | 
4 | .. automodule:: deslib.des.probabilistic
5 | 
6 | .. autoclass:: RRC
7 |     :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select


--------------------------------------------------------------------------------
/docs/modules/static/oracle.rst:
--------------------------------------------------------------------------------
1 | Oracle
2 | --------------
3 | 
4 | .. automodule:: deslib.static.oracle
5 | 
6 | .. autoclass:: Oracle
7 |     :members:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/modules/static/single_best.rst:
--------------------------------------------------------------------------------
1 | Single Best
2 | --------------
3 | 
4 | .. automodule:: deslib.static.single_best
5 | 
6 | .. autoclass:: SingleBest
7 |     :members: fit, predict, predict_proba, score
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/modules/static/stacked.rst:
--------------------------------------------------------------------------------
1 | Stacked Classifier
2 | ------------------
3 | 
4 | .. automodule:: deslib.static.stacked
5 | 
6 | .. autoclass:: StackedClassifier
7 |     :members: fit, predict, predict_proba, score
8 | 


--------------------------------------------------------------------------------
/docs/modules/static/static_selection.rst:
--------------------------------------------------------------------------------
1 | Static Selection
2 | ----------------
3 | 
4 | .. automodule:: deslib.static.static_selection
5 | 
6 | .. autoclass:: StaticSelection
7 |     :members: fit, predict, predict_proba, score
8 | 


--------------------------------------------------------------------------------
/docs/modules/util/aggregation.rst:
--------------------------------------------------------------------------------
1 | Aggregation
2 | --------------
3 | 
4 | This file contains the implementation of different aggregation functions to combine the outputs of the base
5 | classifiers to give the final decision.
6 | 
7 | .. automodule:: deslib.util.aggregation
8 |     :members:
9 | 


--------------------------------------------------------------------------------
/docs/modules/util/datasets.rst:
--------------------------------------------------------------------------------
 1 | Datasets
 2 | --------
 3 | 
 4 | This file contains routines to generate 2D classification datasets
 5 | that can be used to test the performance of different machine learning
 6 | algorithms.
 7 | 
 8 | - P2 Dataset
 9 | - Circle and Square
10 | - Banana
11 | - Banana 2
12 | 
13 | 
14 | .. automodule:: deslib.util.datasets
15 |     :members:
16 | 


--------------------------------------------------------------------------------
/docs/modules/util/dfp.rst:
--------------------------------------------------------------------------------
1 | Frienemy Pruning
2 | ----------------
3 | 
4 | .. automodule:: deslib.util.dfp
5 |     :members:


--------------------------------------------------------------------------------
/docs/modules/util/diversity.rst:
--------------------------------------------------------------------------------
 1 | Diversity
 2 | --------------
 3 | 
 4 | This file contains the implementation of key diversity measures found in the ensemble literature:
 5 | 
 6 | - Double Fault 
 7 | - Negative Double fault
 8 | - Q-statistics
 9 | - Ratio of errors
10 | 
11 | The implementation are made according to the specifications from the book "Combining Pattern Classifiers".
12 | 
13 | 
14 | .. automodule:: deslib.util.diversity
15 |     :members:
16 | 


--------------------------------------------------------------------------------
/docs/modules/util/faiss_knn_wrapper.rst:
--------------------------------------------------------------------------------
1 | FAISS Wrapper
2 | --------------
3 | 
4 | .. automodule:: deslib.util.faiss_knn_wrapper
5 | 
6 | .. autoclass:: FaissKNNClassifier
7 |     :members: fit, predict, predict_proba, kneighbors
8 | 


--------------------------------------------------------------------------------
/docs/modules/util/instance_hardness.rst:
--------------------------------------------------------------------------------
1 | Instance Hardness
2 | -------------------
3 | 
4 | This file contains the implementation of different measures of instance hardness.
5 | 
6 | .. automodule:: deslib.util.instance_hardness
7 |     :members:
8 | 


--------------------------------------------------------------------------------
/docs/modules/util/knne.rst:
--------------------------------------------------------------------------------
1 | KNN-Equality
2 | ------------
3 | 
4 | .. automodule:: deslib.util.knne
5 | 
6 | .. autoclass:: KNNE
7 |     :members: fit, predict, predict_proba, kneighbors
8 | 


--------------------------------------------------------------------------------
/docs/modules/util/prob_functions.rst:
--------------------------------------------------------------------------------
1 | Probabilistic Functions
2 | ------------------------
3 | This file contains the implementation of several functions used to estimate the competence
4 | level of a base classifiers based on posterior probabilities predicted for each class.
5 | 
6 | .. automodule:: deslib.util.prob_functions
7 |     :members:
8 | 


--------------------------------------------------------------------------------
/docs/news.rst:
--------------------------------------------------------------------------------
 1 | .. currentmodule:: deslib
 2 | 
 3 | ===============
 4 | Release history
 5 | ===============
 6 | 
 7 | .. include:: news/v0.3.rst
 8 | 
 9 | .. include:: news/v0.2.rst
10 | 
11 | .. include:: news/v0.1.rst
12 | 
13 | .. toctree::
14 |     :hidden:
15 | 
16 |     news/v0.1
17 |     news/v0.2
18 |     news/v0.3


--------------------------------------------------------------------------------
/docs/news/v0.1.rst:
--------------------------------------------------------------------------------
 1 | Version 0.1
 2 | ===========
 3 | 
 4 | API
 5 | ~~~
 6 | 
 7 | - First release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_.
 8 | 
 9 | Implemented methods:
10 | ~~~~~~~~~~~~~~~~~~~~~
11 | 
12 | * DES techniques currently available are:
13 |     1. META-DES
14 |     2. K-Nearest-Oracle-Eliminate (KNORA-E)
15 |     3. K-Nearest-Oracle-Union (KNORA-U)
16 |     4. Dynamic Ensemble Selection-Performance(DES-P)
17 |     5. K-Nearest-Output Profiles (KNOP)
18 |     6. Randomized Reference Classifier (DES-RRC)
19 |     7. DES Kullback-Leibler Divergence (DES-KL)
20 |     8. DES-Exponential
21 |     9. DES-Logarithmic
22 |     10. DES-Minimum Difference
23 |     11. DES-Clustering
24 |     12. DES-KNN
25 | 
26 | * DCS techniques:
27 |     1. Modified Classifier Rank (Rank)
28 |     2. Overall Locall Accuracy (OLA)
29 |     3. Local Class Accuracy (LCA)
30 |     4. Modified Local Accuracy (MLA)
31 |     5. Multiple Classifier Behaviour (MCB)
32 |     6. A Priori Selection (A Priori)
33 |     7. A Posteriori Selection (A Posteriori)
34 | 
35 | * Baseline methods:
36 |     1. Oracle
37 |     2. Single Best
38 |     3. Static Selection
39 | 
40 | * Dynamic Frienemy Prunning (DFP)
41 | * Diversity measures
42 | * Aggregation functions
43 | 
44 | .. _Rafael M O Cruz: https://github.com/Menelau
45 | .. _Luiz G Hafemann: https://github.com/luizgh
46 | 


--------------------------------------------------------------------------------
/docs/news/v0.2.rst:
--------------------------------------------------------------------------------
 1 | Version 0.2
 2 | ===========
 3 | 
 4 | - Second release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_.
 5 | 
 6 | Changes
 7 | ~~~~~~~~~~~~~~~~~~~~~
 8 | 
 9 | * Implemented Label Encoding: labels are no longer required to be integers starting from 0. Categorical (strings) and non-sequential integers are supported (similarly to scikit-learn).
10 | * Batch processing: Vectorized implementation of predictions. Large speed-up in computation time (100x faster in some cases).
11 | * Predict proba: only required (in the base estimators) if using methods that rely on probabilities (or if requesting probabilities from the ensemble).
12 | * Improved documentation: Included additional examples, a step-by-step tutorial on how to use the library.
13 | * New integration tests: Now covering predict_proba, IH and DFP.
14 | * Bug fixes on 1) predict_proba 2) KNOP with DFP.
15 | 
16 | .. _Rafael M O Cruz: https://github.com/Menelau
17 | .. _Luiz G Hafemann: https://github.com/luizgh
18 | 


--------------------------------------------------------------------------------
/docs/news/v0.3.5.rst:
--------------------------------------------------------------------------------
 1 | Version 0.3.5
 2 | =============
 3 | 
 4 | - Fourth release of the stable API. By `Rafael M O Cruz`_  and `Luiz G Hafemann`_. This release was mainly focused on compatibility with newer scikit-learn versions, performance improvement and bug fixes.
 5 | 
 6 | 
 7 | Changes
 8 | ~~~~~~~~~~~~~~~~~~~~~
 9 | * Update tests according to the new scikit-learn standards.
10 | * Added n_jobs parameter for parallelization.
11 | * Refactored FIRE-DES for faster processing.
12 | * Added new approximated KNN methods using Facebook FAISS search.
13 | * Added passtrhough features for StackedClassifier.
14 | * Added different scoring methods (e.g., AUC, F1-Score) for Single best and Static Selection methods
15 | * Added different scoring methods for DESClustering
16 | * Added predict_proba for the Oracle method.
17 | * Added batch processing for probabilistic methods.
18 | * Added KNearest Neighbors equality option and 'n_neighbors parameter.
19 | * Improved weighted majority voting performance with bath processing.
20 | * Removal of redundant information in documentation.
21 | * Update reference article.
22 | 
23 | Bug Fixes
24 | ~~~~~~~~~~~~
25 | 
26 | * Fixed randomness with APosteriori and APriori methods during test.
27 | * Fixed error with label encoder for the Oracle and static combination methods
28 | * Methods do not allow a pool containing a single classifier mdoel.
29 | * Removal of Collinear features in stacked classifier.
30 | * Fixed meta-classfier when passing a classifier model to the META-DES technique.
31 | * Fixed DCS-OLA documentation.
32 | * Fixed bug when support given to a class is very small but not zero.
33 | * Fixed FAISS batch processing mode.
34 | 
35 | 
36 | 
37 | .. _Rafael M O Cruz: https://github.com/Menelau
38 | .. _Luiz G Hafemann: https://github.com/luizgh
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/news/v0.3.rst:
--------------------------------------------------------------------------------
 1 | Version 0.3
 2 | ===========
 3 | 
 4 | - Third release of the stable API. By `Rafael M O Cruz`_  and `Luiz G Hafemann`_
 5 | 
 6 | Changes
 7 | ~~~~~~~~~~~~~~~~~~~~~
 8 | * All techniques are now sklearn estimators and passes the check_estimator tests.
 9 | * All techniques can now be instantiated without a trained pool of classifiers.
10 | * Pool of classifiers can now be fitted together with the ensemble techniques. See `simple example <auto_examples/simple_example.html>`_.
11 | * Added support for Faiss (Facebook AI Similarity Search) for fast region of competence estimation on GPU.
12 | * Added DES Multi-class Imbalance method :class:`deslib.des.des_mi.DESMI`.
13 | * Added stacked classifier model, :class:`deslib.static.stacked.StackedClassifier` to the static ensemble module.
14 | * Added a new Instance Hardness measure :func:`utils.instance_hardness.kdn_score`.
15 | * Added Instance Hardness support when using DES-Clustering.
16 | * Added label encoder for the :mod:`static` module.
17 | * Added a script :mod:`utils.datasets` with routines to generate synthetic datasets (e.g., the P2 and XOR datasets).
18 | * Changed name of base classes (Adding Base to their following scikit-learn standards).
19 | * Removal of **DFP_mask**, **neighbors** and **distances** as class variables.
20 | * Changed signature of methods **estimate_competence**, **predict_with_ds**, **predict_proba_with_ds**. They now require the neighbors and distances to be passed as input arguments.
21 | * Added random_state parameter to all methods in order to have reproducible results.
22 | * Added Python 3.7 support.
23 | * New and updated `examples <auto_examples/index.html>`_.
24 | * Added performance tests comparing the speed of Faiss vs sklearn KNN.
25 | 
26 | Bug Fixes
27 | ~~~~~~~~~~~~
28 | 
29 | * Fixed bug with META-DES when checking if the meta-classifier was already fitted.
30 | * Fixed bug with random state on DCS techniques.
31 | * Fixed high memory consumption on DES probabilistic methods.
32 | * Fixed bug on Heterogeneous ensembles example and notebooks examples.
33 | * Fixed bug on :class:`deslib.des.probabilistic.MinimumDifference` when only samples from a single class are provided.
34 | * Fixed problem with DS methods when the number of training examples was lower than the k value.
35 | * Fixed division by zero problems with :class:`APosteriori` :class:`APriori` :class:`MLA` when the distance is equal to zero.
36 | * Fixed bug on :func:`deslib.utils.prob_functions.exponential_func` when the support obtained for the correct class was equal to one.
37 | 
38 | 
39 | .. _Rafael M O Cruz: https://github.com/Menelau
40 | .. _Luiz G Hafemann: https://github.com/luizgh
41 | 
42 | 


--------------------------------------------------------------------------------
/docs/user_guide.rst:
--------------------------------------------------------------------------------
 1 | .. _user_guide:
 2 | 
 3 | ######################
 4 | User guide
 5 | ######################
 6 | 
 7 | This user guide explains how to install DESlib, how to contribute to the library and
 8 | presents a step-by-step tutorial to fit and predict new instances using several dynamic selection techniques.
 9 | 
10 | .. toctree::
11 |     :maxdepth: 2
12 | 
13 |     user_guide/installation
14 |     user_guide/development
15 |     user_guide/tutorial
16 |     user_guide/known_issues
17 |     user_guide/packaging
18 | 
19 | 


--------------------------------------------------------------------------------
/docs/user_guide/development.rst:
--------------------------------------------------------------------------------
  1 | .. _development:
  2 | 
  3 | Development
  4 | ===========
  5 | 
  6 | DESlib was started by Rafael M. O. Cruz as a way to facilitate research in this topic by providing other researchers
  7 | a toolbox with everything that is required to easily develop and compare different dynamic ensemble techniques.
  8 | 
  9 | The library is a work in progress. As an open-source project, any type of contribution is welcomed and encouraged!
 10 | 
 11 | 
 12 | Contributing to DESlib
 13 | ----------------------
 14 | 
 15 | You can contribute to the project in several ways:
 16 | 
 17 | - Reporting bugs
 18 | - Requesting features
 19 | - Improving the documentation
 20 | - Adding examples to use the library
 21 | - Implementing new features and fixing bugs
 22 | 
 23 | Reporting Bugs and requesting features
 24 | ---------------------------------------
 25 | 
 26 | We use Github issues to track all bugs and feature requests; feel free to
 27 | open an issue if you have found a bug or wish to see a new feature implemented.
 28 | Before opening a new issue, please check if the issue is not being currently addressed:
 29 | [Issues](https://github.com/scikit-learn-contrib/DESlib/issues)
 30 | 
 31 | For reporting bugs:
 32 | 
 33 | -  Include information of your working environment. This information
 34 |    can be found by running the following code snippet:
 35 | 
 36 | .. code-block:: python
 37 | 
 38 |    import platform; print(platform.platform())
 39 |    import sys; print("Python", sys.version)
 40 |    import numpy; print("NumPy", numpy.__version__)
 41 |    import scipy; print("SciPy", scipy.__version__)
 42 |    import sklearn; print("Scikit-Learn", sklearn.__version__)
 43 | 
 44 | -  Include a [reproducible](https://stackoverflow.com/help/mcve) code snippet
 45 |    or link to a [gist](https://gist.github.com). If an exception is raised,
 46 |    please provide the traceback.
 47 | 
 48 | Documentation
 49 | --------------
 50 | 
 51 | We are glad to accept any sort of documentation: function docstrings,
 52 | reStructuredText documents (like this one), tutorials, etc.
 53 | reStructuredText documents live in the source code repository under the
 54 | doc/ directory.
 55 | 
 56 | You can edit the documentation using any text editor and then generate
 57 | the HTML output by typing ``make html`` from the doc/ directory.
 58 | Alternatively, ``make`` can be used to quickly generate the
 59 | documentation without the example gallery. The resulting HTML files will
 60 | be placed in _build/html/ and are viewable in a web browser. See the
 61 | README file in the doc/ directory for more information.
 62 | 
 63 | For building the documentation, you will need to install sphinx and sphinx_rtd_theme. This
 64 | can be easily done by installing the requirements for development using the following command:
 65 | 
 66 | .. code-block:: bash
 67 | 
 68 |     pip install -r requirements-dev.txt
 69 | 
 70 | Contributing with code
 71 | -----------------------
 72 | 
 73 | The preferred way to contribute is to fork the main repository to your account:
 74 | 
 75 | 1. Fork the [project repository](https://github.com/scikit-learn-contrib/DESlib):
 76 |    click on the 'Fork' button near the top of the page. This creates
 77 |    a copy of the code under your account on the GitHub server.
 78 | 
 79 | 2. Clone this copy to your local disk:
 80 | 
 81 | .. code-block:: bash
 82 | 
 83 |         git clone git@github.com:YourLogin/DESlib.git
 84 |         cd DESlib
 85 | 
 86 | 3. Install all requirements for development:
 87 | 
 88 | .. code-block:: bash
 89 | 
 90 |         pip install -r requirements-dev.txt
 91 |         pip install --editable .
 92 | 
 93 | 4. Create a branch to hold your changes:
 94 | 
 95 | .. code-block:: bash
 96 | 
 97 |         git checkout -b branch_name
 98 | 
 99 | Where ``branch_name`` is the new feature or bug to be fixed. Do not work directly on the ``master`` branch.
100 | 
101 | 5. Work on this copy on your computer using Git to do the version
102 |    control. To record your changes in Git, then push them to GitHub with:
103 | 
104 | .. code-block:: bash
105 | 
106 |         git push -u origin branch_name
107 | 
108 | It is important to assert your code is well covered by test routines (coverage of at least 90%), well documented and
109 | follows PEP8 guidelines.
110 | 
111 | 6. Create a 'Pull request' to send your changes for review.
112 | 
113 |    If your pull request addresses an issue, please use the title to describe
114 |    the issue and mention the issue number in the pull request description to
115 |    ensure a link is created to the original issue.
116 | 
117 | .. _GitHub: https://github.com/scikit-learn-contrib/DESlib.
118 | 
119 | .. _scikit-learn: http://scikit-learn.org/stable/


--------------------------------------------------------------------------------
/docs/user_guide/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | The library can be installed using pip:
 8 | 
 9 | Stable version:
10 | 
11 | .. code-block:: bash
12 | 
13 |     pip install deslib
14 | 
15 | Latest version (under development):
16 | 
17 | .. code-block:: bash
18 | 
19 |     pip install git+https://github.com/scikit-learn-contrib/DESlib
20 | 
21 | DESlib is tested to work with Python 3.5, 3.6 and 3.7. The dependency requirements are:
22 | 
23 | * scipy(>=0.13.3)
24 | * numpy(>=1.10.4)
25 | * scikit-learn(>=0.19.0)
26 | 
27 | These dependencies are automatically installed using the pip commands above.
28 | 
29 | Optional dependencies
30 | =====================
31 | To use Faiss (Fair AI Similarity Search), a fast implementation of KNN that can use GPUs, follow the instructions below:
32 | https://github.com/facebookresearch/faiss/blob/master/INSTALL.md
33 | 
34 | Note that Faiss is only available on Linux and MacOS.


--------------------------------------------------------------------------------
/docs/user_guide/known_issues.rst:
--------------------------------------------------------------------------------
 1 | .. _known_issues:
 2 | 
 3 | Known Issues
 4 | ============
 5 | 
 6 | The estimators in this library are not compatible with scikit-learn's GridSearch, and other CV methods. That is, the following is not supported:
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     from deslib.des.knora_e import KNORAE
11 |     from sklearn.model_selection import GridSearchCV
12 | 
13 |     # (...) initialize a pool of classifiers
14 |     kne = KNORAE(pool_classifiers)
15 | 
16 |     # Do a grid search on KNORAE's "k" parameter
17 |     params = {'k': [1, 3, 5, 7]}
18 | 
19 |     grid = GridSearchCV(kne, params)
20 |     grid.fit(X_dsel, y_dsel)  # Raises an error
21 | 
22 | This is due to a limitation of a scikit-learn method (sklearn.base.clone), under discussion in this issue_
23 | 
24 | .. _issue: https://github.com/scikit-learn/scikit-learn/issues/8370


--------------------------------------------------------------------------------
/docs/user_guide/packaging.rst:
--------------------------------------------------------------------------------
 1 | .. _packaging:
 2 | 
 3 | Releasing a new version
 4 | =======================
 5 | 
 6 | Publishing new version involves:
 7 | 
 8 | 1) Updating the version numbers and creating a new tag in git (which also updates the "stable" version of the documentation)
 9 | 2) Creating the distribution (.tar.gz and wheel files), and uploading them to pypi
10 | 
11 | Some important things to have in mind:
12 |  * Read the "Packaging and Distributing Projects" guide: https://packaging.python.org/tutorials/distributing-packages/
13 |  * The version numbers (in setup.py and __init__.py) are used as metadata for pypi and for the readthedocs documentation - pay attention to them or some things can break. In general, you should be working on a version such as "0.2.dev". You then rename it to "0.2" and create a tag "v0.2". After you finish everything, you update the version to "0.3.dev" to indicate that new developments are being made for the next version.
14 | 
15 | 
16 | Step-by-step process
17 | --------------------
18 | 
19 | 
20 | * Create an account in PyPi production: https://pypi.org/ and test: https://test.pypi.org/
21 | * Make sure you have twine installed:
22 | 
23 |  .. code-block:: bash
24 | 
25 |   pip install twine
26 | 
27 | * Update version on setup.py (e.g. "0.1")
28 | * Update version on deslib/__init__.py
29 | * Create tag: :code:`git tag <version>` (example: "git tag 'v0.1'")
30 | * Push the tag :code:`git push origin <version>`
31 | * Create the source and wheels distributions
32 | 
33 |  .. code-block:: bash
34 | 
35 |     python setup.py sdist # source distribution
36 |     python setup.py bdist_wheel # wheel distribution for current python version
37 | 
38 | * Upload to test pypi and check
39 | 
40 |   - uploading the package:
41 | 
42 |   .. code-block:: bash
43 | 
44 |     twine upload --repository-url https://test.pypi.org/legacy/ dist/*
45 | 
46 |   - Note: if you do this multiple times (e.g. to fix an issue), you will need to rename the files under the "dist" folder: a filename can only be submitted once to pypi. You may also need to manually delete the "source" version of the distribution, since there can only be one source file per version of the software
47 | 
48 |   - Test an installation from the testing pypi environment.
49 | 
50 |   .. code-block:: bash
51 | 
52 |      conda create -y -n testdes python=3
53 |      source activate testdes
54 |      pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple deslib
55 |      conda remove -y --name testdes --all  #remove temporary environment
56 | 
57 | * Upload to production pypi
58 | 
59 |   .. code-block:: bash
60 | 
61 |      twine upload dist/*
62 | 
63 | * Mark the new stable version to be built on readthedocs:
64 | 
65 |  - Go to https://readthedocs.org/projects/deslib/versions/, find the new tag and click "Edit". Mark the "active" checkbox and save.
66 | 
67 | * Update version on setup.py and __init.py__ to mention the new version in development (e.g. "0.2.dev")
68 | 
69 | 
70 | Note #1: Read the docs is automatically updated:
71 | 
72 | * When a new commit is done in master (this updates the "master" version)
73 | * When a new tag is pushed to github (this updates the "stable" version)  -> This seems to not aways work - it is better to check
74 | 
75 | Note #2: The documentation automatically links to source files for the methods/classes. This only works if the tag is pushed to github, and matches the __version__ variable in __init.py__. Example:
76 | __version__ = "0.1" and the tag being:
77 | git tag "v0.1"
78 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 | 
3 | General examples
4 | ----------------
5 | 
6 | Examples showing how to use different aspect of the library


--------------------------------------------------------------------------------
/examples/example_calibrating_classifiers.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  4 | #
  5 | # License: BSD 3 clause
  6 | """
  7 | ====================================================================
  8 | Calibrating base classifiers to estimate probabilities
  9 | ====================================================================
 10 | 
 11 | In this example we show how to apply different DCS and DES techniques for a
 12 | classification dataset.
 13 | 
 14 | A very important aspect in dynamic selection is the generation of a pool
 15 | of classifiers. A common practice in the dynamic selection literature is to
 16 | use the Bagging (Bootstrap Aggregating) method to generate a pool containing
 17 | base classifiers that are both diverse and informative.
 18 | 
 19 | In this example we generate a pool of classifiers using the Bagging technique
 20 | implemented on the Scikit-learn library. Then, we compare the results obtained
 21 | by combining this pool of classifiers using the standard Bagging combination
 22 | approach versus the application of dynamic selection technique to select the
 23 | set of most competent classifiers
 24 | """
 25 | 
 26 | 
 27 | import numpy as np
 28 | from sklearn.calibration import CalibratedClassifierCV
 29 | from sklearn.datasets import load_breast_cancer
 30 | from sklearn.ensemble import BaggingClassifier
 31 | from sklearn.linear_model import Perceptron
 32 | from sklearn.model_selection import train_test_split
 33 | from sklearn.preprocessing import StandardScaler
 34 | 
 35 | from deslib.dcs.a_priori import APriori
 36 | from deslib.dcs.mcb import MCB
 37 | from deslib.dcs.ola import OLA
 38 | from deslib.des.des_p import DESP
 39 | from deslib.des.knora_e import KNORAE
 40 | from deslib.des.knora_u import KNORAU
 41 | from deslib.des.meta_des import METADES
 42 | 
 43 | ###############################################################################
 44 | # Preparing the dataset
 45 | # ---------------------
 46 | # In this part we load the breast cancer dataset from scikit-learn and
 47 | # preprocess it in order to pass to the DS models. An important point here is
 48 | # to normalize the data so that it has zero mean and unit variance, which is
 49 | # a common requirement for many machine learning algorithms.
 50 | # This step can be easily done using the StandardScaler class.
 51 | 
 52 | rng = np.random.RandomState(123)
 53 | data = load_breast_cancer()
 54 | X = data.data
 55 | y = data.target
 56 | # split the data into training and test data
 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
 58 |                                                     random_state=rng)
 59 | 
 60 | # Scale the variables to have 0 mean and unit variance
 61 | scaler = StandardScaler()
 62 | X_train = scaler.fit_transform(X_train)
 63 | X_test = scaler.transform(X_test)
 64 | 
 65 | # Split the data into training and DSEL for DS techniques
 66 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
 67 |                                                     test_size=0.5,
 68 |                                                     random_state=rng)
 69 | 
 70 | # Train a pool of 100 base classifiers
 71 | pool_classifiers = BaggingClassifier(Perceptron(max_iter=10),
 72 |                                      n_estimators=100, random_state=rng)
 73 | pool_classifiers.fit(X_train, y_train)
 74 | 
 75 | # Initialize the DS techniques
 76 | knorau = KNORAU(pool_classifiers)
 77 | kne = KNORAE(pool_classifiers)
 78 | desp = DESP(pool_classifiers)
 79 | ola = OLA(pool_classifiers)
 80 | mcb = MCB(pool_classifiers, random_state=rng)
 81 | 
 82 | ###############################################################################
 83 | # Calibrating base classifiers
 84 | # -----------------------------
 85 | # Some dynamic selection techniques requires that the base classifiers estimate
 86 | # probabilities in order to estimate its competence level. Since the Perceptron
 87 | # model is not a probabilistic classifier (does not implements the
 88 | # predict_proba method, it needs to be calibrated for
 89 | # probability estimation before being used by such DS techniques. This step can
 90 | # be conducted using the CalibrateClassifierCV class from scikit-learn. Note
 91 | # that in this example we pass a prefited pool of classifiers to the
 92 | # calibration method in order to use exactly the same pool used in the other
 93 | # DS methods.
 94 | calibrated_pool = []
 95 | for clf in pool_classifiers:
 96 |     calibrated = CalibratedClassifierCV(estimator=clf, cv='prefit')
 97 |     calibrated.fit(X_dsel, y_dsel)
 98 |     calibrated_pool.append(calibrated)
 99 | 
100 | apriori = APriori(calibrated_pool, random_state=rng)
101 | meta = METADES(calibrated_pool)
102 | 
103 | 
104 | knorau.fit(X_dsel, y_dsel)
105 | kne.fit(X_dsel, y_dsel)
106 | desp.fit(X_dsel, y_dsel)
107 | ola.fit(X_dsel, y_dsel)
108 | mcb.fit(X_dsel, y_dsel)
109 | apriori.fit(X_dsel, y_dsel)
110 | meta.fit(X_dsel, y_dsel)
111 | 
112 | ###############################################################################
113 | # Evaluating the methods
114 | # -----------------------
115 | # Let's now evaluate the methods on the test set. We also use the performance
116 | # of Bagging (pool of classifiers without any selection) as a baseline
117 | # comparison. We can see that  the majority of DS methods achieve higher
118 | # classification accuracy.
119 | 
120 | print('Evaluating DS techniques:')
121 | print('Classification accuracy KNORA-Union: ',
122 |       knorau.score(X_test, y_test))
123 | print('Classification accuracy KNORA-Eliminate: ',
124 |       kne.score(X_test, y_test))
125 | print('Classification accuracy DESP: ', desp.score(X_test, y_test))
126 | print('Classification accuracy OLA: ', ola.score(X_test, y_test))
127 | print('Classification accuracy A priori: ', apriori.score(X_test, y_test))
128 | print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
129 | print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
130 | print('Classification accuracy Bagging: ',
131 |       pool_classifiers.score(X_test, y_test))
132 | 


--------------------------------------------------------------------------------
/examples/example_heterogeneous.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ====================================================================
  3 | Example using heterogeneous ensemble
  4 | ====================================================================
  5 | DESlib accepts different classifier models in the pool of classifiers.
  6 | Such pool of classifiers is called Heterogeneous.
  7 | 
  8 | In this example, we consider a pool of classifiers composed of a
  9 | Gaussian Naive Bayes, Perceptron, k-NN, Decision tree and Gaussian SVM. We
 10 | also compare the result of DS methods with the voting classifier from sklearn.
 11 | """
 12 | import numpy as np
 13 | from sklearn.calibration import CalibratedClassifierCV
 14 | # Importing dataset and preprocessing routines
 15 | from sklearn.datasets import fetch_openml
 16 | from sklearn.ensemble import VotingClassifier
 17 | # Base classifier models:
 18 | from sklearn.linear_model import Perceptron
 19 | from sklearn.model_selection import train_test_split
 20 | from sklearn.naive_bayes import GaussianNB
 21 | from sklearn.neighbors import KNeighborsClassifier
 22 | from sklearn.preprocessing import StandardScaler
 23 | from sklearn.svm import SVC
 24 | from sklearn.tree import DecisionTreeClassifier
 25 | 
 26 | from deslib.dcs import MCB
 27 | # Example of DCS techniques
 28 | from deslib.dcs import OLA
 29 | from deslib.des import DESP
 30 | # Example of DES techniques
 31 | from deslib.des import KNORAE
 32 | from deslib.des import KNORAU
 33 | from deslib.des import METADES
 34 | from deslib.static import StackedClassifier
 35 | 
 36 | rng = np.random.RandomState(42)
 37 | data = fetch_openml(name='phoneme', cache=False, as_frame=False)
 38 | X = data.data
 39 | y = data.target
 40 | 
 41 | # split the data into training and test data
 42 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
 43 |                                                     random_state=rng)
 44 | 
 45 | scaler = StandardScaler()
 46 | X_train = scaler.fit_transform(X_train)
 47 | X_test = scaler.transform(X_test)
 48 | 
 49 | # Split the data into training and DSEL for DS techniques
 50 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
 51 |                                                     test_size=0.5,
 52 |                                                     random_state=rng)
 53 | 
 54 | model_perceptron = CalibratedClassifierCV(Perceptron(max_iter=100,
 55 |                                                      random_state=rng),
 56 |                                           cv=3)
 57 | 
 58 | model_perceptron.fit(X_train, y_train)
 59 | model_svc = SVC(probability=True, gamma='auto',
 60 |                 random_state=rng).fit(X_train, y_train)
 61 | model_bayes = GaussianNB().fit(X_train, y_train)
 62 | model_tree = DecisionTreeClassifier(random_state=rng,
 63 |                                     max_depth=10).fit(X_train, y_train)
 64 | model_knn = KNeighborsClassifier(n_neighbors=7).fit(X_train, y_train)
 65 | 
 66 | pool_classifiers = [model_perceptron,
 67 |                     model_svc,
 68 |                     model_bayes,
 69 |                     model_tree,
 70 |                     model_knn]
 71 | 
 72 | voting_classifiers = [("perceptron", model_perceptron),
 73 |                       ("svc", model_svc),
 74 |                       ("bayes", model_bayes),
 75 |                       ("tree", model_tree),
 76 |                       ("knn", model_knn)]
 77 | 
 78 | model_voting = VotingClassifier(estimators=voting_classifiers).fit(
 79 |     X_train, y_train)
 80 | 
 81 | # Initializing the techniques
 82 | knorau = KNORAU(pool_classifiers)
 83 | kne = KNORAE(pool_classifiers)
 84 | desp = DESP(pool_classifiers)
 85 | metades = METADES(pool_classifiers)
 86 | # DCS techniques
 87 | ola = OLA(pool_classifiers)
 88 | mcb = MCB(pool_classifiers)
 89 | 
 90 | ##############################################################################
 91 | # Adding stacked classifier as baseline comparison. Stacked classifier can
 92 | # be found in the static module. In this experiment we consider two types
 93 | # of stacking: one using logistic regression as meta-classifier
 94 | # (default configuration) and the other using a Decision Tree.
 95 | stacked_lr = StackedClassifier(pool_classifiers, random_state=rng)
 96 | stacked_dt = StackedClassifier(pool_classifiers,
 97 |                                random_state=rng,
 98 |                                meta_classifier=DecisionTreeClassifier())
 99 | # Fitting the DS techniques
100 | knorau.fit(X_dsel, y_dsel)
101 | kne.fit(X_dsel, y_dsel)
102 | desp.fit(X_dsel, y_dsel)
103 | metades.fit(X_dsel, y_dsel)
104 | ola.fit(X_dsel, y_dsel)
105 | mcb.fit(X_dsel, y_dsel)
106 | 
107 | # Fitting the tacking models
108 | stacked_lr.fit(X_dsel, y_dsel)
109 | stacked_dt.fit(X_dsel, y_dsel)
110 | 
111 | # Calculate classification accuracy of each technique
112 | print('Evaluating DS techniques:')
113 | print('Classification accuracy of Majority voting the pool: ',
114 |       model_voting.score(X_test, y_test))
115 | print('Classification accuracy of KNORA-U: ', knorau.score(X_test, y_test))
116 | print('Classification accuracy of KNORA-E: ', kne.score(X_test, y_test))
117 | print('Classification accuracy of DESP: ', desp.score(X_test, y_test))
118 | print('Classification accuracy of META-DES: ', metades.score(X_test, y_test))
119 | print('Classification accuracy of OLA: ', ola.score(X_test, y_test))
120 | print('Classification accuracy Stacking LR', stacked_lr.score(X_test, y_test))
121 | print('Classification accuracy Stacking DT', stacked_dt.score(X_test, y_test))
122 | 


--------------------------------------------------------------------------------
/examples/plot_example_DFP.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  4 | #
  5 | # License: BSD 3 clause
  6 | """
  7 | ====================================================================
  8 | Using the Dynamic Frienemy Pruning (DFP)
  9 | ====================================================================
 10 | 
 11 | In this example we show how to apply the dynamic frienemy pruning (DFP) to
 12 | different dynamic selection techniques.
 13 | 
 14 | The DFP method is an online pruning model which analyzes the region
 15 | of competence to know if it is composed of samples from different classes
 16 | (indecision region). Then, it remove the base classifiers that do not correctly
 17 | classifies at least a pair of samples coming from different classes, i.e., the
 18 | base classifiers that cannot separate the classes in the local region.
 19 | More information on this method can be found in refs [1] and [2].
 20 | 
 21 | DES techniques using the DFP algorithm are called FIRE-DES (Frienemy Indecision
 22 | REgion Dynamic Ensemble Selection).
 23 | The FIRE-DES is shown to significantly improve the performance of several
 24 | dynamic selection algorithms when dealing with imbalanced classification
 25 | problems as it avoids the classifiers that are biased towards the majority
 26 | class in predicting the label for the query.
 27 | 
 28 | References
 29 | ----------
 30 | [1] Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., "Online Pruning
 31 | of Base Classifiers for Dynamic Ensemble Selection", Pattern Recognition,
 32 | vol. 72, 2017, pp 44-58.
 33 | 
 34 | [2] Cruz, R.M.O., Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R.,
 35 | "FIRE-DES++: Enhanced online pruning of base classifiers for dynamic ensemble
 36 | selection"., Pattern Recognition, vol. 85, 2019, pp 149-160.
 37 | """
 38 | 
 39 | import numpy as np
 40 | from sklearn.datasets import make_classification
 41 | from sklearn.ensemble import RandomForestClassifier
 42 | from sklearn.model_selection import train_test_split
 43 | from sklearn.metrics import roc_auc_score
 44 | import matplotlib.pyplot as plt
 45 | from deslib.dcs import APosteriori
 46 | from deslib.dcs import APriori
 47 | from deslib.dcs import LCA
 48 | from deslib.dcs import OLA
 49 | from deslib.des import DESP
 50 | from deslib.des import METADES
 51 | 
 52 | rng = np.random.RandomState(654321)
 53 | 
 54 | # Generate an imbalanced classification dataset
 55 | X, y = make_classification(n_classes=2, n_samples=2000, weights=[0.05, 0.95],
 56 |                            random_state=rng)
 57 | # split the data into training and test data
 58 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
 59 |                                                     random_state=rng)
 60 | 
 61 | # Split the data into training and DSEL for DS techniques
 62 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
 63 |                                                     test_size=0.5,
 64 |                                                     random_state=rng)
 65 | # Considering a pool composed of 10 base classifiers
 66 | pool_classifiers = RandomForestClassifier(n_estimators=10, random_state=rng,
 67 |                                           max_depth=10)
 68 | pool_classifiers.fit(X_train, y_train)
 69 | 
 70 | ds_names = ['A Priori', 'A Posteriori', 'OLA', 'LCA', 'DES-P', 'META-DES']
 71 | 
 72 | # DS techniques without DFP
 73 | apriori = APriori(pool_classifiers, random_state=rng)
 74 | aposteriori = APosteriori(pool_classifiers, random_state=rng)
 75 | ola = OLA(pool_classifiers)
 76 | lca = LCA(pool_classifiers)
 77 | desp = DESP(pool_classifiers)
 78 | meta = METADES(pool_classifiers)
 79 | 
 80 | # FIRE-DS techniques (with DFP)
 81 | fire_apriori = APriori(pool_classifiers, DFP=True, random_state=rng)
 82 | fire_aposteriori = APosteriori(pool_classifiers, DFP=True, random_state=rng)
 83 | fire_ola = OLA(pool_classifiers, DFP=True)
 84 | fire_lca = LCA(pool_classifiers, DFP=True)
 85 | fire_desp = DESP(pool_classifiers, DFP=True)
 86 | fire_meta = METADES(pool_classifiers, DFP=True)
 87 | 
 88 | list_ds = [apriori, aposteriori, ola, lca, desp, meta]
 89 | list_fire_ds = [fire_apriori, fire_aposteriori, fire_ola,
 90 |                 fire_lca, fire_desp, fire_meta]
 91 | 
 92 | scores_ds = []
 93 | for ds in list_ds:
 94 |     ds.fit(X_dsel, y_dsel)
 95 |     scores_ds.append(roc_auc_score(y_test, ds.predict(X_test)))
 96 | 
 97 | scores_fire_ds = []
 98 | for fire_ds in list_fire_ds:
 99 |     fire_ds.fit(X_dsel, y_dsel)
100 |     scores_fire_ds.append(roc_auc_score(y_test, fire_ds.predict(X_test)))
101 | 
102 | ###############################################################################
103 | # Comparing DS techniques with FIRE-DES techniques
104 | # ------------------------------------------------
105 | # Let's now evaluate the DES methods on the test set. Since we are dealing with
106 | # imbalanced data, we use the area under the roc curve (AUC) as performance
107 | # metric instead of classification accuracy. The AUC can be easily calculated
108 | # using the `sklearn.metrics.roc_auc_score` function from scikit-learn.
109 | 
110 | width = 0.35
111 | ind = np.arange(len(ds_names))
112 | plt.bar(ind, scores_ds, width, label='DES', edgecolor='k')
113 | plt.bar(ind + width, scores_fire_ds, width, label='FIRE-DES', edgecolor='k')
114 | 
115 | plt.ylabel('Area under the roc curve (AUC)')
116 | plt.title('AUC Performance: DS vs FIRE-DES')
117 | plt.ylim((0.60, 0.81))
118 | plt.xticks(ind + width / 2, ds_names)
119 | plt.legend(loc='best')
120 | plt.show()
121 | 


--------------------------------------------------------------------------------
/examples/plot_influence_k_value.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
 4 | #
 5 | # License: BSD 3 clause
 6 | """
 7 | ====================================================================
 8 | Measuring the influence of the region of competence
 9 | ====================================================================
10 | 
11 | This example shows how the size of the region of competence (parameter k)
12 | can influence the final performance of DS techniques.
13 | 
14 | In this example we vary the value of the parameter k from 3 to 15 and measure
15 | the performance of 7 different dynamic selection technique using the same
16 | pool of classifiers.
17 | 
18 | """
19 | 
20 | ###############################################################################
21 | # Let's start by importing all required modules. In this example we use the
22 | # new sklearn-OpenML interface to fetch the diabetes classification problem.
23 | 
24 | import matplotlib.pyplot as plt
25 | import numpy as np
26 | from sklearn.datasets import fetch_openml
27 | from sklearn.ensemble import BaggingClassifier
28 | from sklearn.linear_model import Perceptron
29 | from sklearn.model_selection import train_test_split
30 | from sklearn.preprocessing import StandardScaler
31 | 
32 | from deslib.dcs import LCA
33 | # DCS techniques
34 | from deslib.dcs import MCB
35 | from deslib.dcs import OLA
36 | from deslib.dcs import Rank
37 | # DES techniques
38 | from deslib.des import DESP
39 | from deslib.des import KNORAE
40 | from deslib.des import KNORAU
41 | 
42 | rng = np.random.RandomState(123456)
43 | 
44 | data = fetch_openml(name='diabetes', cache=False, as_frame=False)
45 | X = data.data
46 | y = data.target
47 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
48 | 
49 | # Normalizing the dataset to have 0 mean and unit variance.
50 | scaler = StandardScaler()
51 | X_train = scaler.fit_transform(X_train)
52 | X_test = scaler.transform(X_test)
53 | 
54 | pool_classifiers = BaggingClassifier(Perceptron(max_iter=100),
55 |                                      random_state=rng)
56 | pool_classifiers.fit(X_train, y_train)
57 | 
58 | # Setting with_IH
59 | mcb = MCB(pool_classifiers, random_state=rng)
60 | ola = OLA(pool_classifiers)
61 | des_p = DESP(pool_classifiers)
62 | knu = KNORAU(pool_classifiers)
63 | lca = LCA(pool_classifiers)
64 | kne = KNORAE(pool_classifiers)
65 | rank = Rank(pool_classifiers)
66 | list_ds_methods = [mcb, ola, des_p, knu, lca, kne, rank]
67 | names = ['MCB', 'OLA', 'DES-P', 'KNORA-U', 'LCA', 'KNORA-E', 'Rank']
68 | 
69 | k_value_list = range(3, 16)
70 | 
71 | ###############################################################################
72 | # Plot accuracy x region of competence size.
73 | # -------------------------------------------
74 | # We can see the this parameter can have a huge influence in the performance
75 | # of certain DS techniques. The main exception being the KNORA-E and Rank
76 | # which have built-in mechanism to automatically adjust the region
77 | # of competence size during the competence level estimation.
78 | 
79 | fig, ax = plt.subplots()
80 | for ds_method, name in zip(list_ds_methods, names):
81 |     accuracy = []
82 |     for k in k_value_list:
83 |         ds_method.k = k
84 |         ds_method.fit(X_train, y_train)
85 |         accuracy.append(ds_method.score(X_test, y_test))
86 |     ax.plot(k_value_list, accuracy, label=name)
87 | 
88 | plt.xticks(k_value_list)
89 | ax.set_ylim(0.60, 0.80)
90 | ax.set_xlabel('Region of competence size (K value)', fontsize=13)
91 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13)
92 | ax.legend(loc='lower right')
93 | plt.show()
94 | 


--------------------------------------------------------------------------------
/examples/plot_random_forest.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  4 | #
  5 | # License: BSD 3 clause
  6 | """
  7 | ====================================================================
  8 | Comparing dynamic selection with Random Forest
  9 | ====================================================================
 10 | 
 11 | In this example we use a pool of classifiers generated using the Random Forest
 12 | method rather than Bagging. We also show how to change the size of the region
 13 | of competence, used to estimate the local competence of the base classifiers.
 14 | 
 15 | This demonstrates that the library accepts any kind of base classifiers as
 16 | long as they implement the predict and predict proba functions. Moreover,
 17 | any ensemble generation method such as Boosting or Rotation Trees can be used
 18 | to generate a pool containing diverse base classifiers. We also included the
 19 | performance of the RandomForest classifier as a baseline comparison.
 20 | """
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | import numpy as np
 24 | from matplotlib.cm import get_cmap
 25 | from matplotlib.ticker import FuncFormatter
 26 | from sklearn.datasets import fetch_openml
 27 | # Pool of base classifiers
 28 | from sklearn.ensemble import RandomForestClassifier
 29 | from sklearn.linear_model import LogisticRegression
 30 | from sklearn.model_selection import train_test_split
 31 | 
 32 | from deslib.dcs.mcb import MCB
 33 | # Example of a dcs techniques
 34 | from deslib.dcs.ola import OLA
 35 | # Example of a des techniques
 36 | from deslib.des.des_p import DESP
 37 | from deslib.des.knora_e import KNORAE
 38 | from deslib.des.knora_u import KNORAU
 39 | from deslib.des.meta_des import METADES
 40 | # Example of stacked model
 41 | from deslib.static.stacked import StackedClassifier
 42 | 
 43 | rng = np.random.RandomState(42)
 44 | 
 45 | # Fetch a classification dataset from OpenML
 46 | data = fetch_openml(name='phoneme', version=1,
 47 |                     cache=False, as_frame=False)
 48 | X = data.data
 49 | y = data.target
 50 | # split the data into training and test data
 51 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
 52 |                                                     random_state=rng)
 53 | 
 54 | # Training a random forest to be used as the pool of classifiers.
 55 | # We set the maximum depth of the tree so that it
 56 | # can estimate probabilities
 57 | RF = RandomForestClassifier(random_state=rng, n_estimators=10)
 58 | RF.fit(X_train, y_train)
 59 | 
 60 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
 61 |                                                     test_size=0.750,
 62 |                                                     random_state=rng)
 63 | 
 64 | stacked = StackedClassifier(RF, LogisticRegression())
 65 | stacked.fit(X_dsel, y_dsel)
 66 | 
 67 | # Initialize a DS technique. Here we specify the size of
 68 | # the region of competence (5 neighbors)
 69 | knorau = KNORAU(RF, k=5, random_state=rng)
 70 | kne = KNORAE(RF, k=5, random_state=rng)
 71 | desp = DESP(RF, k=5, random_state=rng)
 72 | ola = OLA(RF, k=5, random_state=rng)
 73 | mcb = MCB(RF, k=5, random_state=rng)
 74 | meta = METADES(RF, k=5, random_state=rng)
 75 | 
 76 | # Fit the DS techniques
 77 | knorau.fit(X_dsel, y_dsel)
 78 | kne.fit(X_dsel, y_dsel)
 79 | desp.fit(X_dsel, y_dsel)
 80 | meta.fit(X_dsel, y_dsel)
 81 | ola.fit(X_dsel, y_dsel)
 82 | mcb.fit(X_dsel, y_dsel)
 83 | 
 84 | ###############################################################################
 85 | # Plotting the results
 86 | # -----------------------
 87 | # Let's now evaluate the methods on the test set.
 88 | 
 89 | rf_score = RF.score(X_test, y_test)
 90 | stacked_score = stacked.score(X_test, y_test)
 91 | knorau_score = knorau.score(X_test, y_test)
 92 | kne_score = kne.score(X_test, y_test)
 93 | desp_score = desp.score(X_test, y_test)
 94 | ola_score = ola.score(X_test, y_test)
 95 | mcb_score = mcb.score(X_test, y_test)
 96 | meta_score = meta.score(X_test, y_test)
 97 | print('Classification accuracy RF: ', rf_score)
 98 | print('Classification accuracy Stacked: ', stacked_score)
 99 | print('Evaluating DS techniques:')
100 | print('Classification accuracy KNORA-U: ', knorau_score)
101 | print('Classification accuracy KNORA-E: ', kne_score)
102 | print('Classification accuracy DESP: ', desp_score)
103 | print('Classification accuracy OLA: ', ola_score)
104 | print('Classification accuracy MCB: ', mcb_score)
105 | print('Classification accuracy META-DES: ', meta_score)
106 | 
107 | cmap = get_cmap('Dark2')
108 | colors = [cmap(i) for i in np.linspace(0, 1, 7)]
109 | labels = ['RF', 'Stacked', 'KNORA-U', 'KNORA-E', 'DESP', 'OLA', 'MCB',
110 |           'META-DES']
111 | 
112 | fig, ax = plt.subplots()
113 | pct_formatter = FuncFormatter(lambda x, pos: '{:.1f}'.format(x * 100))
114 | ax.bar(np.arange(8),
115 |        [rf_score, stacked_score, knorau_score, kne_score, desp_score,
116 |         ola_score, mcb_score, meta_score],
117 |        color=colors,
118 |        tick_label=labels)
119 | ax.set_ylim(0.65, 0.80)
120 | ax.set_xlabel('Method', fontsize=13)
121 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13)
122 | ax.yaxis.set_major_formatter(pct_formatter)
123 | for tick in ax.get_xticklabels():
124 |     tick.set_rotation(45)
125 | plt.subplots_adjust(bottom=0.15)
126 | plt.show()
127 | 


--------------------------------------------------------------------------------
/examples/plot_using_instance_hardness.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
  4 | #
  5 | # License: BSD 3 clause
  6 | """
  7 | ====================================================================
  8 | Dynamic selection vs K-NN: Using instance hardness
  9 | ====================================================================
 10 | 
 11 | One aspect about dynamic selection techniques is that it can better deal with
 12 | the classification of test examples associated with high degree of instance
 13 | hardness. Such examples are often found close to the border of the classes,
 14 | with the majority of its neighbors belonging to different classes.
 15 | On the other hand, the KNN method, which is often used to estimate the region
 16 | of competence in DS methods works better in the classification of examples
 17 | associated with low instance hardness [1].
 18 | 
 19 | DESlib already implements a switch mechanism between DS techniques and the KNN
 20 | classifier according to the hardness level of an instance. This example
 21 | varies the threshold in which KNN is used for classification instead of DS
 22 | methods. It also compares the classification results with the standard KNN
 23 | as a baseline.
 24 | 
 25 | The switch mechanism also reduces the computational cost involved since only
 26 | part of the test samples are classified by the DS method.
 27 | 
 28 | References
 29 | ----------
 30 | [1] Cruz, Rafael MO, et al. "Dynamic Ensemble Selection VS K-NN: why and
 31 | when Dynamic Selection obtains higher classification performance?."
 32 | arXiv preprint arXiv:1804.07882 (2018).
 33 | """
 34 | 
 35 | ###############################################################################
 36 | # Let's start by importing all required modules. In this example we use the
 37 | # new sklearn-OpenML interface to fetch the diabetes classification problem.
 38 | 
 39 | import matplotlib.pyplot as plt
 40 | import numpy as np
 41 | from sklearn.datasets import fetch_openml
 42 | from sklearn.ensemble import BaggingClassifier
 43 | from sklearn.model_selection import train_test_split
 44 | from sklearn.preprocessing import StandardScaler
 45 | from sklearn.tree import DecisionTreeClassifier
 46 | 
 47 | from deslib.dcs import MCB
 48 | from deslib.dcs import OLA
 49 | from deslib.dcs import Rank
 50 | from deslib.des import DESP
 51 | from deslib.des import KNORAE
 52 | from deslib.des import KNORAU
 53 | 
 54 | rng = np.random.RandomState(123456)
 55 | 
 56 | data = fetch_openml(name='diabetes', version=1, cache=False, as_frame=False)
 57 | X = data.data
 58 | y = data.target
 59 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
 60 | 
 61 | # Normalizing the dataset to have 0 mean and unit variance.
 62 | scaler = StandardScaler()
 63 | X_train = scaler.fit_transform(X_train)
 64 | X_test = scaler.transform(X_test)
 65 | 
 66 | # Training a pool of classifiers using the bagging technique.
 67 | pool_classifiers = BaggingClassifier(DecisionTreeClassifier(random_state=rng),
 68 |                                      random_state=rng)
 69 | pool_classifiers.fit(X_train, y_train)
 70 | 
 71 | ###############################################################################
 72 | # Setting DS method to use the switch mechanism
 73 | # ----------------------------------------------
 74 | # In order to activate the functionality to switch between DS and KNN according
 75 | # to the instance hardness level we need to set the DS techniques to use this
 76 | # information. This is done by setting the hyperparameter `with_IH` to True.
 77 | # In this example we consider four different values for te threshold
 78 | mcb = MCB(pool_classifiers, with_IH=True, random_state=rng)
 79 | ola = OLA(pool_classifiers, with_IH=True, random_state=rng)
 80 | rank = Rank(pool_classifiers, with_IH=True, random_state=rng)
 81 | des_p = DESP(pool_classifiers, with_IH=True, random_state=rng)
 82 | kne = KNORAE(pool_classifiers, with_IH=True, random_state=rng)
 83 | knu = KNORAU(pool_classifiers, with_IH=True, random_state=rng)
 84 | list_ih_values = [0.0, 1./7., 2./7., 3./7.]
 85 | 
 86 | list_ds_methods = [method.fit(X_train, y_train) for method in
 87 |                    [mcb, ola, rank, des_p, kne, knu]]
 88 | names = ['MCB', 'OLA', 'Mod. Rank', 'DES-P', 'KNORA-E', 'KNORA-U']
 89 | 
 90 | # Plot accuracy x IH
 91 | fig, ax = plt.subplots()
 92 | for ds_method, name in zip(list_ds_methods, names):
 93 |     accuracy = []
 94 |     for idx_ih, ih_rate in enumerate([0.0, 0.14, 0.28, 0.42]):
 95 |         ds_method.IH_rate = ih_rate
 96 |         accuracy.append(ds_method.score(X_test, y_test))
 97 |     ax.plot(list_ih_values, accuracy, label=name)
 98 | 
 99 | plt.xticks(list_ih_values)
100 | ax.set_ylim(0.65, 0.80)
101 | ax.set_xlabel('IH value', fontsize=13)
102 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13)
103 | ax.legend()
104 | 
105 | plt.show()
106 | 


--------------------------------------------------------------------------------
/examples/simple_example.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | # Author: Rafael Menelau Oliveira e Cruz <rafaelmenelau@gmail.com>
 4 | #
 5 | # License: BSD 3 clause
 6 | """
 7 | ====================================================================
 8 | Simple example
 9 | ====================================================================
10 | 
11 | In this example we show how to apply different DCS and DES techniques for a
12 | classification dataset.
13 | 
14 | """
15 | import numpy as np
16 | from sklearn.datasets import make_classification
17 | from sklearn.model_selection import train_test_split
18 | from deslib.des import METADES
19 | from deslib.des import KNORAE
20 | 
21 | 
22 | # Setting up the random state to have consistent results
23 | rng = np.random.RandomState(42)
24 | 
25 | # Generate a classification dataset
26 | X, y = make_classification(n_samples=1000, random_state=rng)
27 | # split the data into training and test data
28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
29 |                                                     random_state=rng)
30 | 
31 | # Split the data into training and DSEL for DS techniques
32 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
33 |                                                     test_size=0.5,
34 |                                                     random_state=rng)
35 | 
36 | # Initialize the DS techniques. DS methods can be initialized without
37 | # specifying a single input parameter. In this example, we just pass the random
38 | # state in order to always have the same result.
39 | kne = KNORAE(random_state=rng)
40 | meta = METADES(random_state=rng)
41 | 
42 | # Fitting the des techniques
43 | kne.fit(X_dsel, y_dsel)
44 | meta.fit(X_dsel, y_dsel)
45 | 
46 | # Calculate classification accuracy of each technique
47 | print('Evaluating DS techniques:')
48 | print('Classification accuracy KNORA-Eliminate: ',
49 |       kne.score(X_test, y_test))
50 | print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
51 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.17.0
 2 | scipy>=1.4.0
 3 | scikit-learn>=1.0.2
 4 | sphinx
 5 | sphinx_rtd_theme
 6 | numpydoc
 7 | pytest
 8 | coverage
 9 | pytest-cov
10 | pillow
11 | sphinx_gallery
12 | matplotlib>=2
13 | nose
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy>=1.4.0
2 | numpy>=1.17.0
3 | scikit-learn>=1.0.2
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import codecs
 4 | import os
 5 | from distutils.core import setup
 6 | 
 7 | from setuptools import find_packages
 8 | 
 9 | setup_path = os.path.abspath(os.path.dirname(__file__))
10 | with codecs.open(os.path.join(setup_path, 'README.rst'), encoding='utf-8-sig') as f:
11 |     README = f.read()
12 | 
13 | setup(name='DESlib',
14 |       version='0.3.7',
15 |       url='https://github.com/Menelau/DESlib',
16 |       maintainer='Rafael M. O. Cruz, L. G. Hafemann',
17 |       maintainer_email='rafaelmenelau@gmail.com',
18 |       description='Implementation of Dynamic Ensemble Selection methods',
19 |       long_description=README,
20 |       author='Rafael M. O. Cruz',
21 |       author_email='rafaelmenelau@gmail.com',
22 |       license='BSD 3-clause "New" or "Revised License"',
23 | 
24 |       classifiers=[
25 |           'Development Status :: 3 - Alpha',
26 |           'Intended Audience :: Developers',
27 |           'Intended Audience :: Science/Research',
28 |           'License :: OSI Approved :: BSD License',
29 |           'Programming Language :: Python :: 3',
30 |           'Programming Language :: Python :: 3.5',
31 |           'Programming Language :: Python :: 3.6',
32 |           'Programming Language :: Python :: 3.7',
33 |           'Programming Language :: Python :: 3.8',
34 |           'Programming Language :: Python :: 3.9',
35 |           'Programming Language :: Python :: 3.10',
36 |           'Programming Language :: Python :: 3.11',
37 |           'Programming Language :: Python :: 3.12',
38 |           'Topic :: Scientific/Engineering :: Artificial Intelligence',
39 |       ],
40 |       install_requires=[
41 |           'scikit-learn>=1.0.2',
42 |           'numpy>=1.17.0',
43 |           'scipy>=1.4.0',
44 |       ],
45 |       python_requires='>=3',      
46 | 
47 |       packages=find_packages())
48 | 
49 | 


--------------------------------------------------------------------------------