├── .circleci └── config.yml ├── .coveragerc ├── .gitignore ├── .pep8speaks.yml ├── .readthedocs.yml ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── benchmarks ├── bench_ds_performance_faiss.py ├── bench_knn_backbone.py └── bench_speed_faiss.py ├── deslib ├── __init__.py ├── base.py ├── dcs │ ├── __init__.py │ ├── a_posteriori.py │ ├── a_priori.py │ ├── base.py │ ├── lca.py │ ├── mcb.py │ ├── mla.py │ ├── ola.py │ └── rank.py ├── des │ ├── __init__.py │ ├── base.py │ ├── des_clustering.py │ ├── des_knn.py │ ├── des_mi.py │ ├── des_p.py │ ├── knop.py │ ├── knora_e.py │ ├── knora_u.py │ ├── meta_des.py │ └── probabilistic │ │ ├── __init__.py │ │ ├── base.py │ │ ├── deskl.py │ │ ├── exponential.py │ │ ├── logarithmic.py │ │ ├── minimum_difference.py │ │ └── rrc.py ├── static │ ├── __init__.py │ ├── base.py │ ├── oracle.py │ ├── single_best.py │ ├── stacked.py │ └── static_selection.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── dcs │ │ ├── __init__.py │ │ ├── test_a_posteriori.py │ │ ├── test_a_priori.py │ │ ├── test_base.py │ │ ├── test_lca.py │ │ ├── test_mcb.py │ │ ├── test_mla.py │ │ ├── test_ola.py │ │ └── test_rank.py │ ├── des │ │ ├── __init__.py │ │ ├── test_base.py │ │ ├── test_des_clustering.py │ │ ├── test_des_knn.py │ │ ├── test_des_mi.py │ │ ├── test_desp.py │ │ ├── test_knop.py │ │ ├── test_knorae.py │ │ ├── test_knorau.py │ │ ├── test_meta_des.py │ │ └── test_probabilistic.py │ ├── expected_values │ │ ├── des_clustering_proba_integration.npy │ │ ├── desknn_proba_integration.npy │ │ ├── desknn_probas_DFP.npy │ │ ├── desp_proba_DFP.npy │ │ ├── desp_proba_integration.npy │ │ ├── kne_knn_proba_integration.npy │ │ ├── kne_proba_DFP.npy │ │ ├── kne_proba_integration.npy │ │ ├── knop_proba_integration.npy │ │ ├── mcb_proba_DFP.npy │ │ ├── mcb_proba_integration.npy │ │ ├── ola_proba_DFP.npy │ │ └── ola_proba_integration.npy │ ├── static │ │ ├── __init__.py │ │ ├── test_oracle.py │ │ ├── test_single_best.py │ │ ├── test_stacked.py │ │ └── test_static_selection.py │ ├── test_base.py │ ├── test_des_integration.py │ ├── test_des_integration_multiclass.py │ ├── test_integration_DFP_IH.py │ ├── test_integration_dfp.py │ ├── test_metric.py │ └── util │ │ ├── __init__.py │ │ ├── test_aggregation.py │ │ ├── test_datasets.py │ │ ├── test_diversity.py │ │ ├── test_diversity_batch.py │ │ ├── test_faiss.py │ │ ├── test_fire.py │ │ ├── test_instance_hardness.py │ │ ├── test_knne.py │ │ └── test_prob_functions.py └── util │ ├── __init__.py │ ├── aggregation.py │ ├── datasets.py │ ├── dfp.py │ ├── diversity.py │ ├── diversity_batch.py │ ├── faiss_knn_wrapper.py │ ├── instance_hardness.py │ ├── knne.py │ └── prob_functions.py ├── docs ├── .gitignore ├── Makefile ├── _static │ └── .keep ├── api.rst ├── conf.py ├── index.rst ├── make.bat ├── modules │ ├── dcs │ │ ├── a_posteriori.rst │ │ ├── a_priori.rst │ │ ├── lca.rst │ │ ├── mcb.rst │ │ ├── mla.rst │ │ ├── ola.rst │ │ └── rank.rst │ ├── des │ │ ├── des_clustering.rst │ │ ├── des_p.rst │ │ ├── deskl.rst │ │ ├── desmi.rst │ │ ├── ds_knn.rst │ │ ├── exponential.rst │ │ ├── knop.rst │ │ ├── knora_e.rst │ │ ├── knora_u.rst │ │ ├── logarithmic.rst │ │ ├── meta_des.rst │ │ ├── minimum_difference.rst │ │ ├── probabilistic.rst │ │ └── rrc.rst │ ├── static │ │ ├── oracle.rst │ │ ├── single_best.rst │ │ ├── stacked.rst │ │ └── static_selection.rst │ └── util │ │ ├── aggregation.rst │ │ ├── datasets.rst │ │ ├── dfp.rst │ │ ├── diversity.rst │ │ ├── faiss_knn_wrapper.rst │ │ ├── instance_hardness.rst │ │ ├── knne.rst │ │ └── prob_functions.rst ├── news.rst ├── news │ ├── v0.1.rst │ ├── v0.2.rst │ ├── v0.3.5.rst │ └── v0.3.rst ├── user_guide.rst └── user_guide │ ├── development.rst │ ├── installation.rst │ ├── known_issues.rst │ ├── packaging.rst │ └── tutorial.rst ├── examples ├── README.txt ├── example_calibrating_classifiers.py ├── example_heterogeneous.py ├── plot_comparing_dynamic_static.py ├── plot_example_DFP.py ├── plot_example_P2.py ├── plot_influence_k_value.py ├── plot_random_forest.py ├── plot_using_instance_hardness.py ├── plot_xor_example.py └── simple_example.py ├── requirements-dev.txt ├── requirements.txt └── setup.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | python3: 8 | docker: 9 | # specify the version you desire here 10 | - image: circleci/python:3.9 11 | # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers` 12 | environment: 13 | - USERNAME: "Menelau" 14 | - DOC_REPO: "DESlib" 15 | - DOC_URL: "" 16 | - EMAIL: "rafaelmenelau@gmail.com" 17 | - MINICONDA_PATH: ~/miniconda 18 | - CONDA_ENV_NAME: testenv 19 | - PYTHON_VERSION: 3 20 | 21 | # Specify service dependencies here if necessary 22 | # CircleCI maintains a library of pre-built images 23 | # documented at https://circleci.com/docs/2.0/circleci-images/ 24 | # - image: circleci/postgres:9.4 25 | 26 | working_directory: ~/repo 27 | 28 | steps: 29 | - checkout 30 | - run: 31 | no_output_timeout: 30m 32 | command: | 33 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 34 | chmod +x miniconda.sh && ./miniconda.sh -b -p ~/miniconda 35 | export PATH="~/miniconda/bin:$PATH" 36 | conda update --yes --quiet conda 37 | conda create -n testenv --yes --quiet python=3.9 38 | source activate testenv 39 | conda install --yes pip numpy 40 | pip install -r requirements-dev.txt 41 | pip install . 42 | cd docs 43 | make html 44 | - store_artifacts: 45 | path: docs/_build/html 46 | destination: docs 47 | - store_artifacts: 48 | path: ~/log.txt 49 | - persist_to_workspace: 50 | root: docs/_build/html 51 | paths: . 52 | - attach_workspace: 53 | at: docs/_build/html 54 | - run: ls -ltrh docs/_build/html 55 | filters: 56 | branches: 57 | ignore: gh-pages 58 | 59 | workflows: 60 | version: 2 61 | build-doc-and-deploy: 62 | jobs: 63 | - python3 64 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = deslib 4 | include = */deslib/* 5 | omit = 6 | */setup.py 7 | deslib/tests/* 8 | 9 | [report] 10 | exclude_lines = 11 | if self.debug: 12 | pragma: no cover 13 | raise NotImplementedError 14 | if __name__ == .__main__.: 15 | ignore_errors = True 16 | show_missing = True 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # IPython 62 | profile_default/ 63 | ipython_config.py 64 | 65 | # pyenv 66 | # For a library or package, you might want to ignore these files since the code is 67 | # intended to run in multiple environments; otherwise, check them in: 68 | .python-version 69 | 70 | # pipenv 71 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 72 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 73 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 74 | # install all needed dependencies. 75 | Pipfile.lock 76 | 77 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 78 | __pypackages__/ 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | dev_env/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | .spyproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ 100 | .dmypy.json 101 | dmypy.json 102 | 103 | # Pyre type checker 104 | .pyre/ 105 | 106 | # pytype static type analyzer 107 | .pytype/ 108 | 109 | # Cython debug symbols 110 | cython_debug/ 111 | 112 | 113 | ## vscode 114 | 115 | .vscode/ -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | # File : .pep8speaks.yml 2 | 3 | scanner: 4 | diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned. 5 | 6 | no_blank_comment: True # If True, no comment is made on PR without any errors. 7 | descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file 8 | only_mention_files_with_errors: True # If False, a separate status comment for each file is made. 9 | 10 | message: 11 | opened: 12 | header: "Hello @{name}! Thanks for opening this PR. " 13 | footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)" 14 | updated: 15 | header: "Hello @{name}! Thanks for updating this PR. " 16 | footer: "" # Why to comment the link to the style guide everytime? :) 17 | no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: " -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | 3 | build: 4 | image: latest 5 | 6 | python: 7 | version: 3.6 8 | setup_py_install: true -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing to DESlib 2 | ======================== 3 | 4 | You can contribute to the project in several ways: 5 | 6 | - Reporting bugs 7 | - Requesting features 8 | - Improving the documentation 9 | - Adding examples to use the library 10 | - Implementing new features and fixing bugs 11 | 12 | Reporting Bugs and requesting features: 13 | --------------------------------------- 14 | 15 | We use Github issues to track all bugs and feature requests; feel free to 16 | open an issue if you have found a bug or wish to see a new feature implemented. 17 | Before opening a new issue, please check if the issue is not being currently addressed: 18 | [Issues](https://github.com/Menelau/DESlib/issues) 19 | 20 | For reporting bugs: 21 | 22 | - Include information of your working environment. This information 23 | can be found by running the following code snippet: 24 | 25 | ```python 26 | import platform; print(platform.platform()) 27 | import sys; print("Python", sys.version) 28 | import numpy; print("NumPy", numpy.__version__) 29 | import scipy; print("SciPy", scipy.__version__) 30 | import sklearn; print("Scikit-Learn", sklearn.__version__) 31 | ``` 32 | 33 | - Include a [reproducible](https://stackoverflow.com/help/mcve) code snippet 34 | or link to a [gist](https://gist.github.com). If an exception is raised, 35 | please provide the traceback. 36 | 37 | Documentation: 38 | -------------- 39 | 40 | We are glad to accept any sort of documentation: function docstrings, 41 | reStructuredText documents (like this one), tutorials, etc. 42 | reStructuredText documents live in the source code repository under the 43 | doc/ directory. 44 | 45 | You can edit the documentation using any text editor and then generate 46 | the HTML output by typing ``make html`` from the doc/ directory. 47 | Alternatively, ``make`` can be used to quickly generate the 48 | documentation without the example gallery. The resulting HTML files will 49 | be placed in _build/html/ and are viewable in a web browser. See the 50 | README file in the doc/ directory for more information. 51 | 52 | For building the documentation, you will need to install sphinx and sphinx_rtd_theme. This 53 | can be easily done by installing the requirements for development using the following command: 54 | 55 | pip install -r requirements-dev.txt 56 | 57 | Contributing with code: 58 | ----------------------- 59 | 60 | The preferred way to contribute is to fork the main repository to your account: 61 | 62 | 1. Fork the [project repository](https://github.com/Menelau/DESlib): 63 | click on the 'Fork' button near the top of the page. This creates 64 | a copy of the code under your account on the GitHub server. 65 | 66 | 2. Clone this copy to your local disk: 67 | 68 | $ git clone git@github.com:YourLogin/DESlib.git 69 | $ cd DESlib 70 | 71 | 3. Install all requirements for development: 72 | 73 | $ pip install -r requirements-dev.txt 74 | $ pip install --editable . 75 | 76 | 4. Create a branch to hold your changes: 77 | 78 | $ git checkout -b branch_name 79 | 80 | Where ``branch_name`` is the new feature or bug to be fixed. Do not work directly on the ``master`` branch. 81 | 82 | 5. Work on this copy on your computer using Git to do the version 83 | control. To record your changes in Git, then push them to GitHub with: 84 | 85 | $ git push -u origin branch_name 86 | 87 | It is important to assert your code is well covered by test routines (coverage of at least 90%), well documented and 88 | follows PEP8 guidelines. 89 | 90 | 6. Create a 'Pull request' to send your changes for review. 91 | 92 | If your pull request addresses an issue, please use the title to describe 93 | the issue and mention the issue number in the pull request description to 94 | ensure a link is created to the original issue. 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 Rafael Menelau Oliveira e Cruz 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | 13 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | include CONTRIBUTING.md 3 | include LICENSE.txt 4 | include requirements.txt 5 | include requirements-dev.txt 6 | 7 | recursive-include examples *.py 8 | recursive-include docs *.rst conf.py *.css Makefile make.bat 9 | 10 | -------------------------------------------------------------------------------- /benchmarks/bench_ds_performance_faiss.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import shutil 4 | import threading 5 | import time 6 | import urllib.request 7 | 8 | import pandas as pd 9 | from sklearn.model_selection import train_test_split 10 | 11 | from deslib.des.knora_e import KNORAE 12 | 13 | 14 | def sk_KNORAE_knn(XTrain, YTrain, k, XTest, YTest): 15 | start = time.clock() 16 | knorae_sk = KNORAE(k=k, knn_classifier='knn') 17 | knorae_sk.fit(XTrain, YTrain) 18 | score = knorae_sk.score(XTest, YTest) 19 | print("sklearn_knn_knorae run_time: {}".format(time.clock() - start)) 20 | print("sklearn_knn_knorae score: {}".format(score)) 21 | 22 | 23 | def faiss_KNORAE_knn(XTrain, YTrain, k, XTest, YTest): 24 | start = time.clock() 25 | knorae_sk = KNORAE(k=k, knn_classifier='faiss') 26 | knorae_sk.fit(XTrain, YTrain) 27 | score = knorae_sk.score(XTest, YTest) 28 | print("faiss_knn_knorae run_time: {}".format(time.clock() - start)) 29 | print("faiss_knn_knorae score: {}".format(score)) 30 | 31 | 32 | if __name__ == "__main__": 33 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" \ 34 | "00280/HIGGS.csv.gz" 35 | if not os.path.exists("../../HIGGS.csv"): 36 | print("Downloading HIGGS dataset from {}".format(url)) 37 | if not os.path.exists("../../HIGGS.gz"): 38 | filedata = urllib.request.urlopen(url) 39 | data2write = filedata.read() 40 | with open('../../HIGGS.gz', 'wb') as f: 41 | f.write(data2write) 42 | print("Finished downloading") 43 | print("Extracting HIGGS.gz") 44 | if not os.path.exists("../../HIGGS.csv"): 45 | with gzip.open('../../HIGGS.gz', 'rb') as f: 46 | with open('../../HIGGS.csv', 'wb') as csv_out: 47 | shutil.copyfileobj(f, csv_out) 48 | print("Extracted csv") 49 | 50 | df = pd.read_csv('../../HIGGS.csv', header=None) 51 | data = df.values 52 | X = data[:, 1:] 53 | Y = data[:, 0] 54 | 55 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33) 56 | num_samples_list = [1000000] 57 | num_of_k_list = [2, 5, 7, 10] 58 | num_of_test_inputs = [100, 1000, 10000] 59 | 60 | for nsamples in num_samples_list: 61 | for n_k in num_of_k_list: 62 | for n_t in num_of_test_inputs: 63 | print("running experiment: num_of_train_samples: {}, " 64 | "num_of_k: {}, num_of_tests: {}".format(nsamples, n_k, 65 | n_t)) 66 | faiss_KNORAE_knn(X_train[:nsamples], Y_train[:nsamples], n_k, 67 | X_test[:n_t], Y_test[:n_t]) 68 | t = threading.Thread(target=sk_KNORAE_knn, args=( 69 | X_train[:nsamples], Y_train[:nsamples], n_k, X_test[:n_t], 70 | Y_test[:n_t])) 71 | 72 | t.start() 73 | t.join(timeout=600) 74 | if t.is_alive(): 75 | print( 76 | "sklearn_knn, num_of_train_samples: {}, num_of_k: {}, " 77 | "num_of_tests: {}, run_time: timeout".format(nsamples, 78 | n_k, 79 | n_t)) 80 | -------------------------------------------------------------------------------- /benchmarks/bench_knn_backbone.py: -------------------------------------------------------------------------------- 1 | import time 2 | import matplotlib.pyplot as plt 3 | 4 | from sklearn.datasets import make_classification 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.neighbors import KNeighborsClassifier 7 | 8 | from deslib.util.faiss_knn_wrapper import FaissKNNClassifier 9 | 10 | n_samples = [1000, 10000, 100000, 1000000, 10000000] 11 | rng = 42 12 | 13 | faiss_brute = FaissKNNClassifier(n_neighbors=7, 14 | algorithm='brute') 15 | faiss_voronoi = FaissKNNClassifier(n_neighbors=7, 16 | algorithm='voronoi') 17 | faiss_hierarchical = FaissKNNClassifier(n_neighbors=7, 18 | algorithm='hierarchical') 19 | 20 | all_knns = [faiss_brute, faiss_voronoi, faiss_hierarchical] 21 | names = ['faiss_brute', 'faiss_voronoi', 'faiss_hierarchical'] 22 | 23 | list_fitting_time = [] 24 | list_search_time = [] 25 | 26 | for n in n_samples: 27 | 28 | print("Number of samples: {}" .format(n)) 29 | X, y = make_classification(n_samples=n, 30 | n_features=20, 31 | random_state=rng) 32 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) 33 | temp_fitting_time = [] 34 | temp_search_time = [] 35 | for name, knn in zip(names, all_knns): 36 | start = time.clock() 37 | knn.fit(X_train, y_train) 38 | fitting_time = time.clock() - start 39 | print("{} fitting time: {}" .format(name, fitting_time)) 40 | 41 | start = time.clock() 42 | neighbors, dists = knn.kneighbors(X_test) 43 | search_time = time.clock() - start 44 | print("{} neighborhood search time: {}" .format(name, search_time)) 45 | 46 | temp_fitting_time.append(fitting_time) 47 | temp_search_time.append(search_time) 48 | 49 | list_fitting_time.append(temp_fitting_time) 50 | list_search_time.append(temp_search_time) 51 | 52 | plt.plot(n_samples, list_search_time) 53 | plt.legend(names) 54 | plt.xlabel("Number of samples") 55 | plt.ylabel("K neighbors search time") 56 | plt.savefig('knn_backbone_benchmark.png') 57 | -------------------------------------------------------------------------------- /benchmarks/bench_speed_faiss.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import shutil 4 | import time 5 | import urllib.request 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn.ensemble import BaggingClassifier 10 | from sklearn.model_selection import train_test_split 11 | 12 | from deslib.des.knora_e import KNORAE 13 | 14 | 15 | def run_knorae(pool_classifiers, X_DSEL, y_DSEL, X_test, y_test, knn_type): 16 | knorae = KNORAE(pool_classifiers=pool_classifiers, 17 | knn_classifier=knn_type) 18 | 19 | knorae.fit(X_DSEL, y_DSEL) 20 | 21 | start = time.clock() 22 | score = knorae.score(X_test, y_test) 23 | end = time.clock() - start 24 | 25 | return score, end 26 | 27 | 28 | def fetch_HIGGS(): 29 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/" \ 30 | "00280/HIGGS.csv.gz" 31 | if not os.path.exists("../../HIGGS.csv"): 32 | 33 | print("Downloading HIGGS dataset from {}".format(url)) 34 | 35 | if not os.path.exists("../../HIGGS.gz"): 36 | filedata = urllib.request.urlopen(url) 37 | data2write = filedata.read() 38 | 39 | with open('../../HIGGS.gz', 'wb') as f: 40 | f.write(data2write) 41 | 42 | print("Finished downloading") 43 | print("Extracting HIGGS.gz") 44 | 45 | if not os.path.exists("../../HIGGS.csv"): 46 | with gzip.open('../../HIGGS.gz', 'rb') as f: 47 | with open('../../HIGGS.csv', 'wb') as csv_out: 48 | shutil.copyfileobj(f, csv_out) 49 | 50 | print("Extracted csv") 51 | print('Reading CSV file') 52 | df = pd.read_csv('../../HIGGS.csv', header=None) 53 | data = df.values 54 | X = data[:, 1:] 55 | y = data[:, 0] 56 | 57 | return X, y 58 | 59 | 60 | if __name__ == "__main__": 61 | rng = np.random.RandomState(123456) 62 | 63 | print('Preparing dataset') 64 | X, y = fetch_HIGGS() 65 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, 66 | random_state=rng) 67 | 68 | X_DSEL, X_train, y_DSEL, y_train = train_test_split(X_train, y_train, 69 | test_size=0.50, 70 | random_state=rng) 71 | pool_classifiers = BaggingClassifier(n_estimators=100, 72 | random_state=rng, 73 | n_jobs=-1) 74 | 75 | print('Fitting base classifiers...') 76 | pool_classifiers.fit(X_train, y_train) 77 | 78 | n_samples = 1000000 79 | num_of_test_inputs = [100, 1000, 10000] 80 | 81 | for n_t in num_of_test_inputs: 82 | print("running experiment: num_of_DSEL_samples: {}, " 83 | "num_of_tests: {}".format(y_DSEL.size, n_t)) 84 | 85 | score_sklearn, time_sklearn = run_knorae(pool_classifiers, 86 | X_DSEL[:n_samples], 87 | y_DSEL[:n_samples], 88 | X_test[:n_t], 89 | y_test[:n_t], 90 | knn_type='knn') 91 | 92 | print("sklearn_knorae score = {}, time = {}".format(score_sklearn, 93 | time_sklearn)) 94 | 95 | score_faiss, time_faiss = run_knorae(pool_classifiers, 96 | X_DSEL[:n_samples], 97 | y_DSEL[:n_samples], 98 | X_test[:n_t], 99 | y_test[:n_t], 100 | knn_type='faiss') 101 | 102 | print("faiss_knorae score = {}, time = {}".format(score_faiss, 103 | time_faiss)) 104 | -------------------------------------------------------------------------------- /deslib/__init__.py: -------------------------------------------------------------------------------- 1 | """A Python library for Dynamic Ensemble Selection. 2 | 3 | ``DESlib`` is a library containing the implementation of the state-of-the art 4 | dynamic classifier and ensemble selection techniques. The library also provides 5 | some static ensemble methods that are used as baseline comparison. 6 | 7 | Subpackages 8 | ----------- 9 | des 10 | The implementation of several DES techniques. 11 | 12 | dcs 13 | The implementation of several DCS techniques. 14 | 15 | static 16 | The implementation of baseline ensemble methods. 17 | 18 | util 19 | A collection of aggregation functions and diversity measures for ensemble 20 | of classifiers. 21 | """ 22 | 23 | # list of all modules available in the library 24 | __all__ = ['des', 'dcs', 'static', 'util', 'tests'] 25 | 26 | __version__ = '0.3.7' 27 | -------------------------------------------------------------------------------- /deslib/dcs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`deslib.dcs` provides a set of key dynamic classifier selection 3 | algorithms (DCS). 4 | """ 5 | 6 | from .a_posteriori import APosteriori 7 | from .a_priori import APriori 8 | from .base import BaseDCS 9 | from .lca import LCA 10 | from .mcb import MCB 11 | from .mla import MLA 12 | from .ola import OLA 13 | from .rank import Rank 14 | 15 | __all__ = ['BaseDCS', 16 | 'APosteriori', 17 | 'APriori', 18 | 'LCA', 19 | 'OLA', 20 | 'MLA', 21 | 'MCB', 22 | 'Rank'] 23 | -------------------------------------------------------------------------------- /deslib/des/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`deslib.des` provides a set of key dynamic ensemble selection 3 | algorithms (DES). 4 | """ 5 | 6 | from .base import BaseDES 7 | from .des_clustering import DESClustering 8 | from .des_knn import DESKNN 9 | from .des_mi import DESMI 10 | from .des_p import DESP 11 | from .knop import KNOP 12 | from .knora_e import KNORAE 13 | from .knora_u import KNORAU 14 | from .meta_des import METADES 15 | from deslib.des.probabilistic.base import BaseProbabilistic 16 | from deslib.des.probabilistic.minimum_difference import MinimumDifference 17 | from deslib.des.probabilistic.deskl import DESKL 18 | from deslib.des.probabilistic.rrc import RRC 19 | from deslib.des.probabilistic.exponential import Exponential 20 | from deslib.des.probabilistic.logarithmic import Logarithmic 21 | 22 | __all__ = ['BaseDES', 23 | 'METADES', 24 | 'KNORAE', 25 | 'KNORAU', 26 | 'KNOP', 27 | 'DESP', 28 | 'DESKNN', 29 | 'DESClustering', 30 | 'DESMI', 31 | 'BaseProbabilistic', 32 | 'RRC', 33 | 'DESKL', 34 | 'MinimumDifference', 35 | 'Exponential', 36 | 'Logarithmic'] 37 | -------------------------------------------------------------------------------- /deslib/des/probabilistic/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseProbabilistic 2 | from .deskl import DESKL 3 | from .exponential import Exponential 4 | from .logarithmic import Logarithmic 5 | from .minimum_difference import MinimumDifference 6 | from .rrc import RRC 7 | 8 | 9 | __all__ = ['BaseProbabilistic', 10 | 'DESKL', 11 | 'Exponential', 12 | 'Logarithmic', 13 | 'MinimumDifference', 14 | 'RRC'] 15 | -------------------------------------------------------------------------------- /deslib/des/probabilistic/logarithmic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from deslib.des.probabilistic import BaseProbabilistic 4 | from deslib.util import log_func 5 | 6 | 7 | class Logarithmic(BaseProbabilistic): 8 | """ This method estimates the competence of the classifier based on 9 | the logarithmic difference between the supports obtained by the 10 | base classifier. 11 | 12 | Parameters 13 | ---------- 14 | pool_classifiers : list of classifiers (Default = None) 15 | The generated_pool of classifiers trained for the corresponding 16 | classification problem. Each base classifiers should support the method 17 | "predict". If None, then the pool of classifiers is a bagging 18 | classifier. 19 | 20 | k : int (Default = 7) 21 | Number of neighbors used to estimate the competence of the base 22 | classifiers. 23 | 24 | DFP : Boolean (Default = False) 25 | Determines if the dynamic frienemy pruning is applied. 26 | 27 | with_IH : Boolean (Default = False) 28 | Whether the hardness level of the region of competence is used to 29 | decide between using the DS algorithm or the KNN for classification of 30 | a given query sample. 31 | 32 | safe_k : int (default = None) 33 | The size of the indecision region. 34 | 35 | IH_rate : float (default = 0.3) 36 | Hardness threshold. If the hardness level of the competence region is 37 | lower than the IH_rate the KNN classifier is used. Otherwise, the DS 38 | algorithm is used for classification. 39 | 40 | mode : String (Default = "selection") 41 | Whether the technique will perform dynamic selection, 42 | dynamic weighting or an hybrid approach for classification. 43 | 44 | random_state : int, RandomState instance or None, optional (default=None) 45 | If int, random_state is the seed used by the random number generator; 46 | If RandomState instance, random_state is the random number generator; 47 | If None, the random number generator is the RandomState instance used 48 | by `np.random`. 49 | 50 | knn_classifier : {'knn', 'faiss', None} (Default = 'knn') 51 | The algorithm used to estimate the region of competence: 52 | 53 | - 'knn' will use :class:`KNeighborsClassifier` from sklearn 54 | 55 | - 'faiss' will use Facebook's Faiss similarity search through the 56 | class :class:`FaissKNNClassifier` 57 | 58 | - None, will use sklearn :class:`KNeighborsClassifier`. 59 | 60 | knn_metric : {'minkowski', 'cosine', 'mahalanobis'} (Default = 'minkowski') 61 | The metric used by the k-NN classifier to estimate distances. 62 | 63 | - 'minkowski' will use minkowski distance. 64 | 65 | - 'cosine' will use the cosine distance. 66 | 67 | - 'mahalanobis' will use the mahalonibis distance. 68 | 69 | DSEL_perc : float (Default = 0.5) 70 | Percentage of the input data used to fit DSEL. 71 | Note: This parameter is only used if the pool of classifier is None or 72 | unfitted. 73 | 74 | voting : {'hard', 'soft'}, default='hard' 75 | If 'hard', uses predicted class labels for majority rule voting. 76 | Else if 'soft', predicts the class label based on the argmax of 77 | the sums of the predicted probabilities, which is recommended for 78 | an ensemble of well-calibrated classifiers. 79 | 80 | n_jobs : int, default=-1 81 | The number of parallel jobs to run. None means 1 unless in 82 | a joblib.parallel_backend context. -1 means using all processors. 83 | Doesn’t affect fit method. 84 | 85 | References 86 | ---------- 87 | B. Antosik, M. Kurzynski, New measures of classifier competence 88 | – heuristics and application to the design of 89 | multiple classifier systems., in: Computer recognition systems 90 | 4., 2011, pp. 197–206. 91 | 92 | T.Woloszynski, M. Kurzynski, A measure of competence based on randomized 93 | reference classifier for dynamic ensemble selection, in: International 94 | Conference on Pattern Recognition (ICPR), 2010, pp. 4194–4197. 95 | """ 96 | 97 | def __init__(self, pool_classifiers=None, k=None, DFP=False, with_IH=False, 98 | safe_k=None, IH_rate=0.30, mode='selection', 99 | random_state=None, knn_classifier='knn', 100 | knn_metric='minkowski', DSEL_perc=0.5, n_jobs=-1, 101 | voting='hard'): 102 | super(Logarithmic, self).__init__(pool_classifiers=pool_classifiers, 103 | k=k, 104 | DFP=DFP, 105 | with_IH=with_IH, 106 | safe_k=safe_k, 107 | IH_rate=IH_rate, 108 | mode=mode, 109 | random_state=random_state, 110 | knn_classifier=knn_classifier, 111 | knn_metric=knn_metric, 112 | DSEL_perc=DSEL_perc, 113 | n_jobs=n_jobs, 114 | voting=voting) 115 | 116 | def source_competence(self): 117 | """The source of competence C_src at the validation point 118 | :math:`\\mathbf{x}_{k}` is calculated by 119 | logarithm function in the support obtained by the base classifier. 120 | 121 | Returns 122 | ---------- 123 | C_src : array of shape (n_samples, n_classifiers) 124 | The competence source for each base classifier at each data point. 125 | """ 126 | C_src = np.zeros((self.n_samples_, self.n_classifiers_)) 127 | for clf_index in range(self.n_classifiers_): 128 | supports = self.dsel_scores_[:, clf_index, :] 129 | support_correct = supports[ 130 | np.arange(self.n_samples_), self.DSEL_target_] 131 | 132 | C_src[:, clf_index] = log_func(self.n_classes_, support_correct) 133 | 134 | return C_src 135 | -------------------------------------------------------------------------------- /deslib/des/probabilistic/rrc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from deslib.des.probabilistic import BaseProbabilistic 4 | from deslib.util import ccprmod 5 | 6 | 7 | class RRC(BaseProbabilistic): 8 | """DES technique based on the Randomized Reference Classifier method 9 | (DES-RRC). 10 | 11 | Parameters 12 | ---------- 13 | pool_classifiers : list of classifiers (Default = None) 14 | The generated_pool of classifiers trained for the corresponding 15 | classification problem. Each base classifiers should support the method 16 | "predict". If None, then the pool of classifiers is a bagging 17 | classifier. 18 | 19 | k : int (Default = 7) 20 | Number of neighbors used to estimate the competence of the base 21 | classifiers. 22 | 23 | DFP : Boolean (Default = False) 24 | Determines if the dynamic frienemy pruning is applied. 25 | 26 | with_IH : Boolean (Default = False) 27 | Whether the hardness level of the region of competence is used to 28 | decide between using the DS algorithm or the KNN for classification of 29 | a given query sample. 30 | 31 | safe_k : int (default = None) 32 | The size of the indecision region. 33 | 34 | IH_rate : float (default = 0.3) 35 | Hardness threshold. If the hardness level of the competence region is 36 | lower than the IH_rate the KNN classifier is used. Otherwise, the DS 37 | algorithm is used for classification. 38 | 39 | mode : String (Default = "selection") 40 | Whether the technique will perform dynamic selection, 41 | dynamic weighting or an hybrid approach for classification. 42 | 43 | random_state : int, RandomState instance or None, optional (default=None) 44 | If int, random_state is the seed used by the random number generator; 45 | If RandomState instance, random_state is the random number generator; 46 | If None, the random number generator is the RandomState instance used 47 | by `np.random`. 48 | 49 | knn_classifier : {'knn', 'faiss', None} (Default = 'knn') 50 | The algorithm used to estimate the region of competence: 51 | 52 | - 'knn' will use :class:`KNeighborsClassifier` from sklearn 53 | 54 | - 'faiss' will use Facebook's Faiss similarity search through the 55 | class :class:`FaissKNNClassifier` 56 | 57 | - None, will use sklearn :class:`KNeighborsClassifier`. 58 | 59 | knn_metric : {'minkowski', 'cosine', 'mahalanobis'} (Default = 'minkowski') 60 | The metric used by the k-NN classifier to estimate distances. 61 | 62 | - 'minkowski' will use minkowski distance. 63 | 64 | - 'cosine' will use the cosine distance. 65 | 66 | - 'mahalanobis' will use the mahalonibis distance. 67 | 68 | DSEL_perc : float (Default = 0.5) 69 | Percentage of the input data used to fit DSEL. 70 | Note: This parameter is only used if the pool of classifier is None or 71 | unfitted. 72 | 73 | voting : {'hard', 'soft'}, default='hard' 74 | If 'hard', uses predicted class labels for majority rule voting. 75 | Else if 'soft', predicts the class label based on the argmax of 76 | the sums of the predicted probabilities, which is recommended for 77 | an ensemble of well-calibrated classifiers. 78 | 79 | n_jobs : int, default=-1 80 | The number of parallel jobs to run. None means 1 unless in 81 | a joblib.parallel_backend context. -1 means using all processors. 82 | Doesn’t affect fit method. 83 | 84 | References 85 | ---------- 86 | Woloszynski, Tomasz, and Marek Kurzynski. "A probabilistic model of 87 | classifier competence for dynamic ensemble selection." Pattern Recognition 88 | 44.10 (2011): 2656-2668. 89 | 90 | R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier 91 | selection: Recent advances and perspectives,” 92 | Information Fusion, vol. 41, pp. 195 – 216, 2018. 93 | 94 | """ 95 | 96 | def __init__(self, pool_classifiers=None, k=None, DFP=False, with_IH=False, 97 | safe_k=None, IH_rate=0.30, mode='selection', 98 | random_state=None, knn_classifier='knn', 99 | knn_metric='minkowski', DSEL_perc=0.5, n_jobs=-1, 100 | voting='hard'): 101 | 102 | super(RRC, self).__init__(pool_classifiers=pool_classifiers, 103 | k=k, 104 | DFP=DFP, 105 | with_IH=with_IH, 106 | safe_k=safe_k, 107 | IH_rate=IH_rate, 108 | mode=mode, 109 | random_state=random_state, 110 | knn_classifier=knn_classifier, 111 | knn_metric=knn_metric, 112 | DSEL_perc=DSEL_perc, 113 | n_jobs=n_jobs, 114 | voting=voting) 115 | 116 | self.selection_threshold = None 117 | 118 | def source_competence(self): 119 | """ 120 | Calculates the source of competence using the randomized reference 121 | classifier (RRC) method. 122 | 123 | The source of competence C_src at the validation point 124 | :math:`\\mathbf{x}_{k}` calculated using the probabilistic model 125 | based on the supports obtained by the base classifier and 126 | randomized reference classifier (RRC) model. The probabilistic 127 | modeling of the classifier competence is calculated using 128 | the ccprmod function. 129 | 130 | Returns 131 | ---------- 132 | C_src : array of shape (n_samples, n_classifiers) 133 | The competence source for each base classifier at each data point. 134 | """ 135 | c_src = np.zeros((self.n_samples_, self.n_classifiers_)) 136 | 137 | for clf_index in range(self.n_classifiers_): 138 | # Get supports for all samples in DSEL 139 | supports = self.dsel_scores_[:, clf_index, :] 140 | c_src[:, clf_index] = ccprmod(supports, self.DSEL_target_) 141 | 142 | return c_src 143 | -------------------------------------------------------------------------------- /deslib/static/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`deslib.static` provides a set of static ensemble methods which are 3 | often used as a baseline to compare the performance of dynamic selection 4 | algorithms. 5 | """ 6 | 7 | from .oracle import Oracle 8 | from .single_best import SingleBest 9 | from .static_selection import StaticSelection 10 | from .stacked import StackedClassifier 11 | 12 | __all__ = ['Oracle', 13 | 'SingleBest', 14 | 'StaticSelection', 15 | 'StackedClassifier'] 16 | -------------------------------------------------------------------------------- /deslib/static/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | from abc import abstractmethod, ABCMeta 4 | 5 | # Author: Rafael Menelau Oliveira e Cruz 6 | # 7 | # License: BSD 3 clause 8 | import numpy as np 9 | from sklearn.base import BaseEstimator, ClassifierMixin 10 | from sklearn.ensemble import BaseEnsemble, BaggingClassifier 11 | from sklearn.preprocessing import LabelEncoder 12 | from sklearn.utils.validation import check_random_state 13 | 14 | 15 | class BaseStaticEnsemble(BaseEstimator, ClassifierMixin): 16 | """Base class for static ensembles. 17 | 18 | All static ensemble techniques should inherit from this class. 19 | 20 | Warning: This class should not be instantiated directly, use derived 21 | classes instead. 22 | 23 | Parameters 24 | ---------- 25 | pool_classifiers : list of classifiers (Default = None) 26 | The generated_pool of classifiers trained for the corresponding 27 | classification problem. Each base classifiers should support the method 28 | "predict". If None, then the pool of classifiers is a bagging 29 | classifier. 30 | 31 | random_state : int, RandomState instance or None, optional (default=None) 32 | If int, random_state is the seed used by the random number generator; 33 | If RandomState instance, random_state is the random number generator; 34 | If None, the random number generator is the RandomState instance used 35 | by `np.random`. 36 | 37 | n_jobs : int, default=-1 38 | The number of parallel jobs to run. None means 1 unless in 39 | a joblib.parallel_backend context. -1 means using all processors. 40 | Doesn’t affect fit method. 41 | 42 | References 43 | ---------- 44 | Kuncheva, Ludmila I. Combining pattern classifiers: methods and algorithms. 45 | John Wiley & Sons, 2004. 46 | 47 | R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier 48 | selection: Recent advances and perspectives,” 49 | Information Fusion, vol. 41, pp. 195 – 216, 2018. 50 | 51 | """ 52 | __metaclass__ = ABCMeta 53 | 54 | @abstractmethod 55 | def __init__(self, pool_classifiers=None, random_state=None, n_jobs=-1): 56 | self.pool_classifiers = pool_classifiers 57 | self.random_state = random_state 58 | self.n_jobs = n_jobs 59 | 60 | def fit(self, X, y): 61 | """Fit the model according to the given training data. 62 | 63 | Parameters 64 | ---------- 65 | X : array of shape (n_samples, n_features) 66 | Data used to fit the model. 67 | 68 | y : array of shape (n_samples) 69 | class labels of each example in X. 70 | 71 | Returns 72 | ------- 73 | self : object 74 | Returns self. 75 | """ 76 | self.random_state_ = check_random_state(self.random_state) 77 | 78 | # Check if the pool of classifiers is None. If yes, use a 79 | # BaggingClassifier for the pool. 80 | if self.pool_classifiers is None: 81 | self.pool_classifiers_ = BaggingClassifier( 82 | random_state=self.random_state_, n_jobs=self.n_jobs) 83 | self.pool_classifiers_.fit(X, y) 84 | 85 | else: 86 | self.pool_classifiers_ = self.pool_classifiers 87 | 88 | self.n_classifiers_ = len(self.pool_classifiers_) 89 | # allow base models with feature subspaces. 90 | if hasattr(self.pool_classifiers_, "estimators_features_"): 91 | self.estimator_features_ = \ 92 | np.array(self.pool_classifiers_.estimators_features_) 93 | else: 94 | indices = np.arange(X.shape[1]) 95 | self.estimator_features_ = np.tile(indices, 96 | (self.n_classifiers_, 1)) 97 | 98 | self._validate_pool() 99 | # dealing with label encoder 100 | self._check_label_encoder() 101 | self.y_enc_ = self._setup_label_encoder(y) 102 | self.n_classes_ = self.classes_.size 103 | self.n_features_ = X.shape[1] 104 | 105 | return self 106 | 107 | def _check_label_encoder(self): 108 | # Check if base classifiers are not using LabelEncoder (the case for 109 | # scikit-learn's ensembles): 110 | if isinstance(self.pool_classifiers_, BaseEnsemble): 111 | if np.array_equal(self.pool_classifiers_.classes_, 112 | self.pool_classifiers_[0].classes_): 113 | self.base_already_encoded_ = False 114 | else: 115 | self.base_already_encoded_ = True 116 | else: 117 | self.base_already_encoded_ = False 118 | 119 | def _setup_label_encoder(self, y): 120 | """ 121 | Setup the label encoder 122 | """ 123 | self.enc_ = LabelEncoder() 124 | y_ind = self.enc_.fit_transform(y) 125 | self.classes_ = self.enc_.classes_ 126 | 127 | return y_ind 128 | 129 | def _encode_base_labels(self, y): 130 | if self.base_already_encoded_: 131 | return y 132 | else: 133 | return self.enc_.transform(y) 134 | 135 | def _validate_pool(self): 136 | """ Check the estimator and the n_estimator attribute, set the 137 | `base_estimator_` attribute. 138 | 139 | Raises 140 | ------- 141 | ValueError 142 | If the pool of classifiers is empty or just a single model. 143 | """ 144 | if self.n_classifiers_ <= 1: 145 | raise ValueError("n_classifiers must be greater than one, " 146 | "got {}.".format(len(self.pool_classifiers))) 147 | -------------------------------------------------------------------------------- /deslib/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from .conftest import * 2 | -------------------------------------------------------------------------------- /deslib/tests/dcs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/dcs/__init__.py -------------------------------------------------------------------------------- /deslib/tests/dcs/test_a_posteriori.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import numpy as np 4 | import pytest 5 | from sklearn.linear_model import Perceptron 6 | from sklearn.utils.estimator_checks import check_estimator 7 | 8 | from deslib.dcs.a_posteriori import APosteriori 9 | 10 | 11 | def test_check_estimator(): 12 | check_estimator(APosteriori(selection_method='best')) 13 | 14 | 15 | # Should always be 1.0 since the supports for the correct class is always 1. 16 | @pytest.mark.parametrize('index', [0, 1, 2]) 17 | def test_estimate_competence_all_ones(index, example_all_ones): 18 | _, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones 19 | 20 | query = np.atleast_2d([1, 1]) 21 | 22 | a_posteriori_test = APosteriori() 23 | a_posteriori_test.n_classifiers_ = 3 24 | a_posteriori_test.DSEL_processed_ = dsel_processed 25 | a_posteriori_test.dsel_scores_ = dsel_scores 26 | a_posteriori_test.DSEL_target_ = y 27 | 28 | neighbors = neighbors[index, :].reshape(1, -1) 29 | distances = distances[index, :].reshape(1, -1) 30 | 31 | expected = [1.0, 1.0, 1.0] 32 | predictions = np.array([0, 1, 0]) 33 | 34 | competences = a_posteriori_test.estimate_competence(neighbors, 35 | distances, 36 | predictions=np.array( 37 | predictions)) 38 | assert np.isclose(competences, expected).all() 39 | 40 | 41 | # Testing example from kuncheva's book (combining pattern classifiers) 42 | def test_estimate_competence_kuncheva_ex(example_kuncheva): 43 | query = np.atleast_2d([1, 1]) 44 | 45 | a_posteriori_test = APosteriori(k=example_kuncheva['k']) 46 | a_posteriori_test.n_classifiers_ = 1 47 | 48 | a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed'] 49 | a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores'] 50 | a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent'] 51 | a_posteriori_test.n_classes_ = example_kuncheva['n_classes'] 52 | 53 | neighbors = example_kuncheva['neighbors'].reshape(1, -1) 54 | distances = example_kuncheva['distances'].reshape(1, -1) 55 | 56 | predictions = np.array([[1]]) 57 | 58 | competences = a_posteriori_test.estimate_competence(neighbors, 59 | distances, 60 | predictions=np.array( 61 | predictions)) 62 | assert np.isclose(competences, 0.95, atol=0.01) 63 | 64 | 65 | # Testing example from kuncheva's book (combining pattern classifiers) 66 | def test_estimate_competence_kuncheva_ex_batch(example_kuncheva): 67 | # considering a batch composed of 10 samples 68 | query = np.ones((10, 2)) 69 | classifier = MagicMock() 70 | classifier.predict.return_value = [1] 71 | classifier.predict_proba.return_value = None 72 | 73 | a_posteriori_test = APosteriori(pool_classifiers=classifier, 74 | k=example_kuncheva['k']) 75 | 76 | a_posteriori_test.n_classifiers_ = 1 77 | a_posteriori_test.DSEL_processed_ = example_kuncheva['dsel_processed'] 78 | a_posteriori_test.DSEL_target_ = example_kuncheva['y_dependent'] 79 | a_posteriori_test.dsel_scores_ = example_kuncheva['dsel_scores'] 80 | a_posteriori_test.n_classes_ = example_kuncheva['n_classes'] 81 | 82 | # repeating the same matrix in a new axis to simulate a batch input. 83 | neighbors = example_kuncheva['neighbors'] 84 | distances = example_kuncheva['distances'] 85 | 86 | predictions = [1] 87 | competences = a_posteriori_test.estimate_competence(neighbors, 88 | distances, 89 | predictions=np.array( 90 | predictions)) 91 | assert np.allclose(competences, 0.95, atol=0.01) 92 | 93 | 94 | # in this test case, the target of the neighbors is always different 95 | # than the predicted. So 96 | # the estimation of competence should always be zero 97 | @pytest.mark.parametrize('index', [0, 1, 2]) 98 | def test_estimate_competence_diff_target(index, example_all_ones): 99 | _, _, neighbors, distances, dsel_processed, _ = example_all_ones 100 | 101 | query = np.atleast_2d([1, 1]) 102 | a_posteriori_test = APosteriori() 103 | a_posteriori_test.n_classifiers_ = 3 104 | a_posteriori_test.DSEL_processed_ = dsel_processed 105 | a_posteriori_test.dsel_scores_ = np.ones((15, 3, 3)) 106 | a_posteriori_test.DSEL_target_ = np.ones(15, dtype=int) * 2 107 | a_posteriori_test.n_classes_ = 2 108 | 109 | neighbors = neighbors[index, :].reshape(1, -1) 110 | distances = distances[index, :].reshape(1, -1) 111 | 112 | expected = [0.0, 0.0, 0.0] 113 | 114 | predictions = np.array([0, 1, 0]) 115 | competences = a_posteriori_test.estimate_competence(neighbors, 116 | distances, 117 | predictions=np.array( 118 | predictions)) 119 | assert np.isclose(competences, expected).all() 120 | 121 | 122 | # Check if the fit method is pre-calculating the classifier scores correctly 123 | def test_fit(create_X_y, create_pool_classifiers): 124 | X, y = create_X_y 125 | a_posteriori_test = APosteriori(create_pool_classifiers) 126 | a_posteriori_test.fit(X, y) 127 | expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) 128 | expected = np.tile(expected, (15, 1, 1)) 129 | assert np.array_equal(a_posteriori_test.dsel_scores_, expected) 130 | 131 | 132 | # Test if the class is raising an error when the base classifiers do not 133 | # implements the predict_proba method. Should raise an exception when the 134 | # base classifier cannot estimate posterior probabilities (predict_proba) 135 | # Using Perceptron classifier as it does not implements predict_proba. 136 | def test_not_predict_proba(create_X_y): 137 | X, y = create_X_y 138 | clf1 = Perceptron() 139 | clf1.fit(X, y) 140 | with pytest.raises(ValueError): 141 | APosteriori([clf1, clf1]).fit(X, y) 142 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_a_priori.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.linear_model import Perceptron 4 | from sklearn.utils.estimator_checks import check_estimator 5 | 6 | from deslib.dcs.a_priori import APriori 7 | 8 | 9 | def test_check_estimator(): 10 | check_estimator(APriori(selection_method='best')) 11 | 12 | 13 | # Should always be 1.0 since the supports for the correct class is always 1. 14 | @pytest.mark.parametrize('index, expected', [(0, [1.0, 1.0, 1.0]), 15 | (1, [1.0, 1.0, 1.0]), 16 | (2, [1.0, 1.0, 1.0])]) 17 | def test_estimate_competence_all_ones(index, expected, example_all_ones): 18 | X, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones 19 | 20 | a_priori_test = APriori() 21 | 22 | a_priori_test.DSEL_processed_ = dsel_processed 23 | a_priori_test.dsel_scores_ = dsel_scores 24 | a_priori_test.DSEL_target_ = y 25 | a_priori_test.n_classes_ = 2 26 | 27 | neighbors = neighbors[index, :].reshape(1, -1) 28 | distances = distances[index, :].reshape(1, -1) 29 | 30 | competences = a_priori_test.estimate_competence(neighbors, distances) 31 | assert np.isclose(competences, expected).all() 32 | 33 | 34 | # Testing example from kuncheva's book (combining pattern classifiers) 35 | def test_estimate_competence_kuncheva_ex(example_kuncheva): 36 | a_priori_test = APriori(k=example_kuncheva['k']) 37 | test_example = example_kuncheva 38 | a_priori_test.DSEL_processed_ = test_example['dsel_processed'] 39 | a_priori_test.dsel_scores_ = test_example['dsel_scores'] 40 | a_priori_test.DSEL_target_ = test_example['y_independent'] 41 | a_priori_test.n_classes_ = test_example['n_classes'] 42 | 43 | neighbors = test_example['neighbors'].reshape(1, -1) 44 | distances = test_example['distances'].reshape(1, -1) 45 | 46 | competences = a_priori_test.estimate_competence(neighbors, distances) 47 | assert np.isclose(competences, 0.70, atol=0.01) 48 | 49 | 50 | # Test the estimate competence method receiving n samples as input 51 | def test_estimate_competence_batch(example_estimate_competence): 52 | _, y, nn, _, dsel_processed, dsel_scores = example_estimate_competence 53 | expected = np.array([[0.333333, 0.50000, 0.40000], 54 | [0.666666, 0.50000, 0.60000], 55 | [0.000000, 0.50000, 0.20000]]) 56 | 57 | # Using 3 neighbors to facilitate the calculations 58 | a_priori_test = APriori(k=3) 59 | 60 | a_priori_test.DSEL_processed_ = dsel_processed 61 | a_priori_test.dsel_scores_ = dsel_scores 62 | a_priori_test.DSEL_target_ = y 63 | a_priori_test.n_classes_ = 2 64 | 65 | nn = nn[:, 0:3] 66 | distances = np.ones((3, 3)) 67 | 68 | competences = a_priori_test.estimate_competence(nn, 69 | distances) 70 | assert np.allclose(competences, expected, atol=0.01) 71 | 72 | 73 | def test_fit(create_pool_classifiers, create_X_y): 74 | X, y = create_X_y 75 | 76 | a_priori_test = APriori(create_pool_classifiers) 77 | a_priori_test.fit(X, y) 78 | expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) 79 | expected = np.tile(expected, (15, 1, 1)) 80 | assert np.array_equal(a_priori_test.dsel_scores_, expected) 81 | 82 | 83 | # Test if the class is raising an error when the base classifiers do not 84 | # implements the predict_proba method. Should raise an exception when the 85 | # base classifier cannot estimate posterior probabilities (predict_proba) 86 | # Using Perceptron classifier as it does not implements predict_proba. 87 | def test_not_predict_proba(create_X_y): 88 | X, y = create_X_y 89 | 90 | clf1 = Perceptron() 91 | clf1.fit(X, y) 92 | with pytest.raises(ValueError): 93 | APriori([clf1, clf1]).fit(X, y) 94 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_lca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.linear_model import Perceptron 4 | from sklearn.utils.estimator_checks import check_estimator 5 | 6 | from deslib.dcs.lca import LCA 7 | 8 | 9 | def test_check_estimator(): 10 | check_estimator(LCA()) 11 | 12 | 13 | def test_estimate_competence_batch(example_estimate_competence): 14 | _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence 15 | 16 | expected = np.array([[0.75000000, 0.66666667, 0.75000000], 17 | [0.80000000, 1.00000000, 0.80000000], 18 | [1.00000000, 0.60000000, 0.50000000]]) 19 | lca_test = LCA() 20 | lca_test.DSEL_processed_ = dsel_processed 21 | lca_test.DSEL_target_ = y 22 | 23 | query = np.ones((3, 2)) 24 | 25 | predictions = np.array([[0, 1, 0]]) 26 | competences = lca_test.estimate_competence(neighbors, 27 | distances=distances, 28 | predictions=np.array( 29 | predictions)) 30 | 31 | assert np.isclose(competences, expected).all() 32 | 33 | 34 | # in this test case, the target of the neighbors is always different than 35 | # the predicted class. So the estimation of competence should always be zero 36 | @pytest.mark.parametrize('index', [0, 1, 2]) 37 | def test_estimate_competence_diff_target(index, 38 | example_estimate_competence, 39 | create_pool_classifiers): 40 | _, y, neighbors, distances, dsel_processed, _ = example_estimate_competence 41 | 42 | lca_test = LCA(create_pool_classifiers) 43 | lca_test.DSEL_processed_ = dsel_processed 44 | lca_test.DSEL_target_ = np.ones(15, dtype=int) * 3 45 | 46 | neighbors = neighbors[index, :].reshape(1, -1) 47 | distances = distances[index, :].reshape(1, -1) 48 | 49 | query = np.atleast_2d([1, 1]) 50 | expected = [0.0, 0.0, 0.0] 51 | 52 | predictions = np.array([[0, 1, 0]]) 53 | competences = lca_test.estimate_competence(neighbors, 54 | distances=distances, 55 | predictions=np.array( 56 | predictions)) 57 | 58 | assert np.isclose(competences, expected).all() 59 | 60 | 61 | # Test if the class is raising an error when the base classifiers do not 62 | # implements the predict_proba method. In this case the test should not raise 63 | # an error since this class does not require base classifiers that 64 | # can estimate probabilities 65 | def test_predict_proba(create_X_y): 66 | X, y = create_X_y 67 | 68 | clf1 = Perceptron() 69 | clf1.fit(X, y) 70 | LCA([clf1, clf1]).fit(X, y) 71 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_mcb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.linear_model import Perceptron 4 | from sklearn.utils.estimator_checks import check_estimator 5 | 6 | from deslib.dcs.mcb import MCB 7 | 8 | # ex1 the similarity will always be 100% 9 | bks_dsel_ex1 = np.hstack( 10 | (np.hstack((np.zeros((15, 1)), np.ones((15, 1)))), np.zeros((15, 1)))) 11 | 12 | # Change a bit to check if the filtering by similarity is working as intended. 13 | bks_dsel_ex2 = np.hstack( 14 | (np.hstack((np.zeros((15, 1)), np.ones((15, 1)))), np.zeros((15, 1)))) 15 | bks_dsel_ex2[1, :] = 2 16 | 17 | bks_dsel_ex3 = bks_dsel_ex1 + 1 18 | 19 | 20 | def test_check_estimator(): 21 | check_estimator(MCB()) 22 | 23 | 24 | @pytest.mark.parametrize('similarity_threshold', [2.0, -1.0, -0.5]) 25 | def test_similarity_threshold(similarity_threshold, create_X_y): 26 | X, y = create_X_y 27 | with pytest.raises(ValueError): 28 | mcb = MCB(similarity_threshold=similarity_threshold) 29 | mcb.fit(X, y) 30 | 31 | 32 | @pytest.mark.parametrize('similarity_threshold', [None, 'a']) 33 | def test_similarity_threshold_type(similarity_threshold, create_X_y): 34 | X, y = create_X_y 35 | with pytest.raises(TypeError): 36 | mcb = MCB(similarity_threshold=similarity_threshold) 37 | mcb.fit(X, y) 38 | 39 | 40 | @pytest.mark.parametrize('index, expected', [(0, [0.66666666, 41 | 0.83333333, 42 | 0.66666666]), 43 | (1, [0.83333333, 44 | 1.0, 45 | 0.66666666])]) 46 | def test_estimate_competence2(index, expected, example_estimate_competence): 47 | 48 | _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence 49 | 50 | mcb_test = MCB() 51 | mcb_test.n_classifiers_ = 3 52 | mcb_test.DSEL_processed_ = dsel_processed 53 | 54 | neighbors = neighbors[index, :].reshape(1, -1) 55 | distances = distances[index, :].reshape(1, -1) 56 | # Only changing the pre-processed BKS to see if the filter works. 57 | mcb_test.BKS_DSEL_ = bks_dsel_ex2 58 | 59 | predictions = np.array([[0, 1, 0]]) 60 | 61 | competences = mcb_test.estimate_competence(neighbors, 62 | distances=distances, 63 | predictions=np.atleast_2d( 64 | predictions)) 65 | assert np.isclose(competences, expected).all() 66 | 67 | 68 | # This third test uses an totally wrong bks matrix, so that the technique 69 | # is obligated to use the whole it also considers batch processing 70 | # region of competence 71 | def test_estimate_competence_batch(example_estimate_competence): 72 | _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence 73 | 74 | expected = np.array([[0.57142857, 0.71428571, 0.71428571], 75 | [0.71428571, 0.85714286, 0.71428571], 76 | [0.57142857, 0.71428571, 0.57142857]]) 77 | mcb_test = MCB() 78 | mcb_test.n_classifiers_ = 3 79 | mcb_test.DSEL_processed_ = dsel_processed 80 | 81 | # Only changing the pre-processed BKS to see if the filter works. 82 | mcb_test.BKS_DSEL_ = bks_dsel_ex3 83 | 84 | predictions = np.array([0, 1, 0]) 85 | 86 | competences = mcb_test.estimate_competence(neighbors, 87 | distances=distances, 88 | predictions=np.tile(predictions, 89 | (3, 1))) 90 | assert np.isclose(competences, expected).all() 91 | 92 | 93 | # Test if the class is raising an error when the base classifiers do not 94 | # implements the predict_proba method. # In this case the test should not 95 | # raise an error since this class does not require base classifiers that 96 | # can estimate probabilities 97 | def test_predict_proba(create_X_y): 98 | X, y = create_X_y 99 | 100 | clf1 = Perceptron() 101 | clf1.fit(X, y) 102 | MCB([clf1, clf1]).fit(X, y) 103 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_mla.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.linear_model import Perceptron 4 | from sklearn.utils.estimator_checks import check_estimator 5 | 6 | from deslib.dcs.mla import MLA 7 | 8 | 9 | def test_check_estimator(): 10 | check_estimator(MLA()) 11 | 12 | 13 | # Should always be 1.0 since the supports for the correct class is always 1. 14 | @pytest.mark.parametrize('index', [0, 1, 2]) 15 | def test_estimate_competence_all_ones(index, example_all_ones): 16 | _, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones 17 | 18 | mla_test = MLA() 19 | mla_test.n_classifiers_ = 3 20 | 21 | mla_test.DSEL_processed_ = dsel_processed 22 | mla_test.DSEL_scores = dsel_scores 23 | mla_test.DSEL_target_ = y 24 | mla_test.n_classes_ = 2 25 | 26 | neighbors = neighbors[index, :].reshape(1, -1) 27 | distances = distances[index, :].reshape(1, -1) 28 | 29 | expected = [1.0, 1.0, 1.0] 30 | 31 | predictions = np.array([[0, 1, 0]]) 32 | 33 | competences = mla_test.estimate_competence(neighbors, 34 | distances=distances, 35 | predictions=predictions) 36 | 37 | assert np.isclose(competences, expected).all() 38 | 39 | 40 | def test_estimate_competence_batch(example_estimate_competence): 41 | 42 | _, y, neighbors, _, dsel_processed, _ = example_estimate_competence 43 | 44 | expected = np.array([[0.750, 0.666, 0.750], 45 | [0.800, 1.000, 0.800], 46 | [1.000, 0.600, 0.500]]) 47 | 48 | mla_test = MLA() 49 | mla_test.n_classifiers_ = 3 50 | mla_test.DSEL_processed_ = dsel_processed 51 | distances = np.ones((3, 7)) 52 | 53 | mla_test.DSEL_target_ = y 54 | mla_test.n_classes_ = 2 55 | predictions = np.array([[0, 1, 0]]) 56 | 57 | competences = mla_test.estimate_competence(competence_region=neighbors, 58 | distances=distances, 59 | predictions=predictions) 60 | 61 | assert np.allclose(competences, expected, atol=0.01) 62 | 63 | 64 | # in this test case, the target of the neighbors is always different than the 65 | # predicted. So the estimation of competence should always be zero 66 | @pytest.mark.parametrize('index', [0, 1, 2]) 67 | def test_estimate_competence_diff_target(index, example_estimate_competence): 68 | _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence 69 | 70 | mla_test = MLA() 71 | mla_test.n_classifiers_ = 3 72 | 73 | mla_test.DSEL_processed_ = dsel_processed 74 | mla_test.DSEL_target_ = np.ones(15, dtype=int) * 3 75 | 76 | neighbors = neighbors[index, :].reshape(1, -1) 77 | distances = distances[index, :].reshape(1, -1) 78 | 79 | expected = [0.0, 0.0, 0.0] 80 | 81 | predictions = np.array([[0, 1, 0]]) 82 | 83 | competences = mla_test.estimate_competence(neighbors, 84 | distances=distances, 85 | predictions=predictions) 86 | 87 | assert np.isclose(competences, expected).all() 88 | 89 | 90 | # Testing example from kuncheva's book (combining pattern classifiers) 91 | def test_estimate_competence_kuncheva_ex(example_kuncheva): 92 | example_kuncheva = example_kuncheva 93 | 94 | mla_test = MLA(k=example_kuncheva['k']) 95 | mla_test.n_classifiers_ = 2 96 | 97 | mla_test.DSEL_processed_ = np.repeat(example_kuncheva['dsel_processed'], 98 | 2, 99 | axis=1) 100 | 101 | mla_test.dsel_scores_ = example_kuncheva['dsel_scores'] 102 | mla_test.DSEL_target_ = example_kuncheva['y_dependent'] 103 | mla_test.n_classes_ = example_kuncheva['n_classes'] 104 | 105 | neighbors = example_kuncheva['neighbors'].reshape(1, -1) 106 | distances = example_kuncheva['distances'].reshape(1, -1) 107 | 108 | predictions = np.array([[1, 1]]) 109 | competences = mla_test.estimate_competence(neighbors, 110 | distances=distances, 111 | predictions=predictions) 112 | 113 | assert np.allclose(competences, [0.95, 0.95], atol=0.01) 114 | 115 | 116 | # Test if the class is raising an error when the base classifiers do not 117 | # implements the predict_proba method. In this case the test should not raise 118 | # an error since this class does not require base classifiers that 119 | # can estimate probabilities 120 | def test_predict_proba(create_X_y): 121 | X, y = create_X_y 122 | 123 | clf1 = Perceptron() 124 | clf1.fit(X, y) 125 | MLA([clf1, clf1]).fit(X, y) 126 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_ola.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import Perceptron 3 | from sklearn.utils.estimator_checks import check_estimator 4 | 5 | from deslib.dcs.ola import OLA 6 | 7 | 8 | def test_check_estimator(): 9 | check_estimator(OLA()) 10 | 11 | 12 | def test_estimate_competence_batch(example_estimate_competence): 13 | _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence 14 | expected = np.array([[0.57142857, 0.71428571, 0.71428571], 15 | [0.71428571, 0.85714286, 0.71428571], 16 | [0.57142857, 0.71428571, 0.57142857]]) 17 | 18 | ola_test = OLA() 19 | ola_test.DSEL_processed_ = dsel_processed 20 | 21 | ola_test.DFP_mask = np.ones((3, 3)) 22 | competences = ola_test.estimate_competence(neighbors, 23 | distances=distances) 24 | assert np.allclose(competences, expected) 25 | 26 | 27 | # Test if the class is raising an error when the base classifiers do not 28 | # implements the predict_proba method. In this case the test should not raise 29 | # an error since this class does not require base classifiers that 30 | # can estimate probabilities 31 | def test_predict_proba(create_X_y): 32 | X, y = create_X_y 33 | clf1 = Perceptron() 34 | clf1.fit(X, y) 35 | OLA([clf1, clf1]).fit(X, y) 36 | -------------------------------------------------------------------------------- /deslib/tests/dcs/test_rank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import Perceptron 3 | from sklearn.utils.estimator_checks import check_estimator 4 | 5 | from deslib.dcs.rank import Rank 6 | 7 | 8 | def test_check_estimator(): 9 | check_estimator(Rank()) 10 | 11 | 12 | def test_estimate_competence_batch(example_estimate_competence): 13 | _, _, neighbors, distances, dsel_processed, _ = example_estimate_competence 14 | 15 | expected = np.array([[1, 5, 0], 16 | [1, 1, 2], 17 | [0, 0, 1]]) 18 | rank_test = Rank() 19 | rank_test.DSEL_processed_ = dsel_processed 20 | competences = rank_test.estimate_competence(neighbors, 21 | distances=distances) 22 | assert np.allclose(competences, expected) 23 | 24 | 25 | # Test if the class is raising an error when the base classifiers do not 26 | # implements the predict_proba method. In this case the test should not raise 27 | # an error since this class does not require base classifiers that 28 | # can estimate probabilities 29 | def test_predict_proba(create_X_y): 30 | X, y = create_X_y 31 | 32 | clf1 = Perceptron() 33 | clf1.fit(X, y) 34 | Rank([clf1, clf1]).fit(X, y) 35 | -------------------------------------------------------------------------------- /deslib/tests/des/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/des/__init__.py -------------------------------------------------------------------------------- /deslib/tests/des/test_des_mi.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import numpy as np 4 | import pytest 5 | from sklearn.linear_model import Perceptron 6 | from sklearn.utils.estimator_checks import check_estimator 7 | 8 | from deslib.des.des_mi import DESMI 9 | 10 | 11 | def test_check_estimator(): 12 | check_estimator(DESMI()) 13 | 14 | 15 | # TODO: create test routine for the estimate_competence method 16 | 17 | 18 | @pytest.mark.parametrize('alpha', [-1.0, -0.5, 0.0]) 19 | def test_check_alpha_value(alpha, create_X_y): 20 | X, y = create_X_y 21 | with pytest.raises(ValueError): 22 | desmi = DESMI(alpha=alpha) 23 | desmi.fit(X, y) 24 | 25 | 26 | @pytest.mark.parametrize('alpha', ['a', None, 'string', 1]) 27 | def test_check_alpha_type(alpha, create_X_y): 28 | X, y = create_X_y 29 | with pytest.raises(TypeError): 30 | desmi = DESMI(alpha=alpha) 31 | desmi.fit(X, y) 32 | 33 | 34 | @pytest.mark.parametrize('pct_accuracy', [-1.0, -0.5, 0.0, 1.01]) 35 | def test_check_pct_accuracy_value(pct_accuracy, create_X_y): 36 | X, y = create_X_y 37 | with pytest.raises(ValueError): 38 | desmi = DESMI(pct_accuracy=pct_accuracy) 39 | desmi.fit(X, y) 40 | 41 | 42 | # Test if the class is raising an error when the base classifiers do not 43 | # implements the predict_proba method. 44 | # In this case the test should not raise an error since this class does not 45 | # require base classifiers that can estimate probabilities 46 | def test_require_proba(): 47 | X = np.random.randn(5, 5) 48 | y = np.array([0, 1, 0, 0, 0]) 49 | clf1 = Perceptron() 50 | clf1.fit(X, y) 51 | DESMI([clf1, clf1, clf1]) 52 | 53 | 54 | def test_select_single_sample(): 55 | des_mi = DESMI(pct_accuracy=0.7) 56 | des_mi.N_ = 2 57 | competences = np.array([0.7, 0.2, 1.0]) 58 | selected_clf = des_mi.select(competences) 59 | expected = np.array([0, 2]) 60 | assert np.array_equal(np.unique(selected_clf), np.unique(expected)) 61 | 62 | 63 | def test_select_batch_samples(): 64 | n_samples = 10 65 | des_mi = DESMI(pct_accuracy=0.7) 66 | des_mi.N_ = 2 67 | competences = np.tile(np.array([0.7, 0.2, 1.0]), (n_samples, 1)) 68 | selected_clf = des_mi.select(competences) 69 | expected = np.tile(np.array([0, 2]), (n_samples, 1)) 70 | assert np.array_equal(np.unique(selected_clf), np.unique(expected)) 71 | 72 | 73 | def test_classify_with_ds_batch_samples(): 74 | n_samples = 10 75 | 76 | # simulated predictions of the pool of classifiers 77 | predictions = np.tile(np.array([0, 1, 0]), (n_samples, 1)) 78 | 79 | desmi_test = DESMI() 80 | desmi_test.n_classes_ = 2 81 | desmi_test.estimate_competence = MagicMock( 82 | return_value=(np.ones((n_samples, 3)))) 83 | desmi_test.select = MagicMock( 84 | return_value=np.tile(np.array([[0, 2]]), (n_samples, 1))) 85 | result = desmi_test.classify_with_ds(predictions) 86 | assert np.allclose(result, np.zeros(10)) 87 | 88 | 89 | def test_predict_proba_with_ds_soft(create_pool_classifiers): 90 | expected = np.array([0.61, 0.39]) 91 | DFP_mask = np.ones((1, 6)) 92 | predictions = np.array([[0, 1, 0, 0, 1, 0]]) 93 | probabilities = np.array([[[0.5, 0.5], [1, 0], [0.33, 0.67], 94 | [0.5, 0.5], [1, 0], [0.33, 0.67]]]) 95 | pool_classifiers = create_pool_classifiers + create_pool_classifiers 96 | desmi_test = DESMI(pool_classifiers, DFP=True, voting='soft') 97 | desmi_test.n_classes_ = 2 98 | selected_indices = np.array([[0, 1, 5]]) 99 | desmi_test.estimate_competence = MagicMock(return_value=np.ones(6)) 100 | desmi_test.select = MagicMock(return_value=selected_indices) 101 | 102 | predicted_proba = desmi_test.predict_proba_with_ds(predictions, 103 | probabilities, 104 | DFP_mask=DFP_mask) 105 | assert np.isclose(predicted_proba, expected, atol=0.01).all() 106 | 107 | 108 | def test_predict_proba_with_ds_hard(create_pool_classifiers): 109 | expected = np.array([0.666, 0.333]) 110 | DFP_mask = np.ones((1, 6)) 111 | predictions = np.array([[0, 1, 0, 0, 1, 0]]) 112 | probabilities = np.array([[[0.5, 0.5], [1, 0], [0.33, 0.67], 113 | [0.5, 0.5], [1, 0], [0.33, 0.67]]]) 114 | pool_classifiers = create_pool_classifiers + create_pool_classifiers 115 | desmi_test = DESMI(pool_classifiers, DFP=True, voting='hard') 116 | desmi_test.n_classes_ = 2 117 | selected_indices = np.array([[0, 1, 5]]) 118 | desmi_test.estimate_competence = MagicMock(return_value=np.ones(6)) 119 | desmi_test.select = MagicMock(return_value=selected_indices) 120 | 121 | predicted_proba = desmi_test.predict_proba_with_ds(predictions, 122 | probabilities, 123 | DFP_mask=DFP_mask) 124 | assert np.isclose(predicted_proba, expected, atol=0.01).all() 125 | 126 | 127 | def test_soft_voting_no_proba(create_X_y): 128 | from sklearn.linear_model import Perceptron 129 | X, y = create_X_y 130 | clf = Perceptron() 131 | clf.fit(X, y) 132 | with pytest.raises(ValueError): 133 | DESMI([clf, clf, clf, clf], voting='soft').fit(X, y) 134 | 135 | 136 | @pytest.mark.parametrize('voting', [None, 'product', 1]) 137 | def test_wrong_voting_value(voting, create_X_y, create_pool_classifiers): 138 | X, y = create_X_y 139 | pool = create_pool_classifiers 140 | with pytest.raises(ValueError): 141 | DESMI(pool, voting=voting).fit(X, y) 142 | -------------------------------------------------------------------------------- /deslib/tests/des/test_desp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import Perceptron 3 | from sklearn.utils.estimator_checks import check_estimator 4 | 5 | from deslib.des.des_p import DESP 6 | 7 | 8 | def test_check_estimator(): 9 | check_estimator(DESP()) 10 | 11 | 12 | # Test the estimate competence method receiving n samples as input 13 | def test_estimate_competence_batch(example_estimate_competence, 14 | create_pool_classifiers): 15 | X, y, neighbors, distances, dsel_processed, _ = example_estimate_competence 16 | 17 | expected = np.array([[0.57142857, 0.4285714, 0.57142857], 18 | [0.71428571, 0.2857142, 0.71428571], 19 | [0.2857142, 0.71428571, 0.2857142]]) 20 | 21 | des_p_test = DESP(create_pool_classifiers) 22 | des_p_test.fit(X, y) 23 | competences = des_p_test.estimate_competence(neighbors, distances) 24 | assert np.allclose(competences, expected, atol=0.01) 25 | 26 | 27 | def test_select_two_classes(): 28 | des_p_test = DESP() 29 | des_p_test.n_classes_ = 2 30 | expected = np.array([[True, False, True], 31 | [True, False, True], 32 | [False, True, False]]) 33 | 34 | competences = np.array([[0.51, 0.0, 0.51], 35 | [0.51, 0.0, 0.51], 36 | [0.49, 1.0, 0.49]]) 37 | 38 | selected = des_p_test.select(competences) 39 | 40 | assert np.array_equal(selected, expected) 41 | 42 | 43 | # In this example, since the number of classes is 3, the competence level 44 | # expected to be selected is > 0.33 45 | def test_select_three_classes(): 46 | des_p_test = DESP() 47 | des_p_test.n_classes_ = 3 48 | expected = np.array([[True, False, True], 49 | [True, False, True], 50 | [False, True, False]]) 51 | 52 | competences = np.array([[0.34, 0.32, 1.0], 53 | [0.50, 0.30, 1.01], 54 | [0.25, 1.0, 0.25]]) 55 | 56 | selected = des_p_test.select(competences) 57 | 58 | assert np.array_equal(selected, expected) 59 | 60 | 61 | def test_select_none_competent(): 62 | n_classifiers = 3 63 | des_p_test = DESP() 64 | des_p_test.n_classes_ = 2 65 | competences = np.ones(n_classifiers) * 0.49 66 | indices = des_p_test.select(competences) 67 | expected = np.array([[True, True, True]]) 68 | assert np.array_equal(expected, indices) 69 | 70 | 71 | # Test if the class is raising an error when the base classifiers do not 72 | # implements the predict_proba method. In this case the test should not raise 73 | # an error since this class does not require base classifiers that 74 | # can estimate probabilities 75 | def test_predict_proba(create_X_y): 76 | X, y = create_X_y 77 | clf1 = Perceptron() 78 | clf1.fit(X, y) 79 | DESP([clf1, clf1]).fit(X, y) 80 | -------------------------------------------------------------------------------- /deslib/tests/des/test_knop.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import numpy as np 4 | import pytest 5 | from sklearn.linear_model import Perceptron 6 | from sklearn.utils.estimator_checks import check_estimator 7 | 8 | from deslib.des.knop import KNOP 9 | 10 | 11 | def test_check_estimator(): 12 | check_estimator(KNOP()) 13 | 14 | 15 | # Test the estimate competence method receiving n samples as input 16 | def test_estimate_competence_batch(example_estimate_competence, 17 | create_pool_classifiers): 18 | X, y, neighbors, distances, _, _ = example_estimate_competence 19 | query = np.ones((3, 2)) 20 | expected = np.array([[4.0, 3.0, 4.0], 21 | [5.0, 2.0, 5.0], 22 | [2.0, 5.0, 2.0]]) 23 | 24 | knop_test = KNOP(create_pool_classifiers) 25 | knop_test.fit(X, y) 26 | knop_test.neighbors = neighbors 27 | knop_test.distances = distances 28 | 29 | knop_test._get_similar_out_profiles = Mock(return_value=(None, neighbors)) 30 | probabilities = np.zeros((3, 6)) 31 | 32 | competences = knop_test.estimate_competence_from_proba(query, 33 | probabilities) 34 | assert np.allclose(competences, expected, atol=0.01) 35 | 36 | 37 | def test_weights_zero(): 38 | knop_test = KNOP() 39 | competences = np.zeros((1, 3)) 40 | result = knop_test.select(competences) 41 | 42 | assert np.all(result) 43 | 44 | 45 | def test_fit(example_estimate_competence, create_pool_classifiers): 46 | X, y = example_estimate_competence[0:2] 47 | 48 | knop_test = KNOP(create_pool_classifiers) 49 | knop_test.fit(X, y) 50 | expected_scores = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) 51 | expected_scores = np.tile(expected_scores, (15, 1, 1)) 52 | 53 | assert np.array_equal(expected_scores, knop_test.dsel_scores_) 54 | 55 | # Assert the roc_algorithm_ is fitted to the scores (decision space) 56 | # rather than the features (feature space) 57 | expected_roc_data = knop_test.dsel_scores_[:, :, 0] 58 | assert np.array_equal(knop_test.op_knn_._fit_X, expected_roc_data) 59 | 60 | 61 | # Test if the class is raising an error when the base classifiers do not 62 | # implements the predict_proba method. Should raise an exception when the 63 | # base classifier cannot estimate posterior probabilities (predict_proba) 64 | # Using Perceptron classifier as it does not implements predict_proba. 65 | def test_not_predict_proba(create_X_y): 66 | X, y = create_X_y 67 | 68 | clf1 = Perceptron() 69 | clf1.fit(X, y) 70 | with pytest.raises(ValueError): 71 | knop = KNOP([clf1, clf1]) 72 | knop.fit(X, y) 73 | 74 | 75 | def test_select(): 76 | knop_test = KNOP() 77 | competences = np.ones(3) 78 | competences[0] = 0 79 | expected = np.atleast_2d([False, True, True]) 80 | selected = knop_test.select(competences) 81 | assert np.array_equal(expected, selected) 82 | -------------------------------------------------------------------------------- /deslib/tests/des/test_knorae.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.linear_model import Perceptron 4 | from sklearn.utils.estimator_checks import check_estimator 5 | 6 | from deslib.des.knora_e import KNORAE 7 | 8 | 9 | def test_check_estimator(): 10 | check_estimator(KNORAE()) 11 | 12 | 13 | def test_estimate_competence_batch(example_estimate_competence, 14 | create_pool_classifiers): 15 | X, y, neighbors, distances, _, _ = example_estimate_competence 16 | 17 | expected = np.array([[1.0, 0.0, 1.0], 18 | [2.0, 0.0, 2.0], 19 | [0.0, 3.0, 0.0]]) 20 | 21 | knora_e_test = KNORAE(create_pool_classifiers) 22 | knora_e_test.fit(X, y) 23 | 24 | competences = knora_e_test.estimate_competence(neighbors, 25 | distances=distances) 26 | assert np.allclose(competences, expected) 27 | 28 | 29 | @pytest.mark.parametrize('index, expected', [(0, [[True, False, True]]), 30 | (1, [[True, False, True]]), 31 | (2, [[False, True, False]])]) 32 | def test_select(index, expected, create_pool_classifiers, 33 | example_estimate_competence): 34 | X, y, neighbors, distances, _, _ = example_estimate_competence 35 | 36 | knora_e_test = KNORAE(create_pool_classifiers) 37 | knora_e_test.fit(X, y) 38 | neighbors = neighbors[index, :].reshape(1, -1) 39 | distances = distances[index, :].reshape(1, -1) 40 | competences = knora_e_test.estimate_competence(neighbors, 41 | distances=distances) 42 | selected = knora_e_test.select(competences) 43 | 44 | assert np.array_equal(selected, expected) 45 | 46 | 47 | # No classifier here is selected, since the always predict class 2 where there 48 | # are only samples labeled as class 0 and 1 49 | # in the region of competence 50 | def test_select_none_competent(): 51 | knora_e_test = KNORAE() 52 | competences = np.zeros(100) 53 | selected = knora_e_test.select(competences) 54 | expected = np.atleast_2d([True] * 100) 55 | 56 | assert np.array_equal(expected, selected) 57 | 58 | 59 | # Test if the class is raising an error when the base classifiers do not 60 | # implements the predict_proba method. In this case the test should not raise 61 | # an error since this class does not require base classifiers that 62 | # can estimate probabilities 63 | def test_predict_proba(create_X_y): 64 | X, y = create_X_y 65 | 66 | clf1 = Perceptron() 67 | clf1.fit(X, y) 68 | KNORAE([clf1, clf1]).fit(X, y) 69 | -------------------------------------------------------------------------------- /deslib/tests/des/test_knorau.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import Perceptron 3 | from sklearn.utils.estimator_checks import check_estimator 4 | 5 | from deslib.des.knora_u import KNORAU 6 | 7 | 8 | def test_check_estimator(): 9 | check_estimator(KNORAU()) 10 | 11 | 12 | # Test the estimate competence method receiving n samples as input 13 | def test_estimate_competence_batch(example_estimate_competence, 14 | create_pool_classifiers): 15 | 16 | X, y, neighbors = example_estimate_competence[0:3] 17 | 18 | expected = np.array([[4.0, 3.0, 4.0], 19 | [5.0, 2.0, 5.0], 20 | [2.0, 5.0, 2.0]]) 21 | knora_u_test = KNORAU(create_pool_classifiers) 22 | knora_u_test.fit(X, y) 23 | 24 | competences = knora_u_test.estimate_competence(neighbors) 25 | assert np.allclose(competences, expected, atol=0.01) 26 | 27 | 28 | def test_weights_zero(): 29 | knorau_test = KNORAU() 30 | competences = np.zeros((1, 3)) 31 | result = knorau_test.select(competences) 32 | 33 | assert np.all(result) 34 | 35 | 36 | # Test if the class is raising an error when the base classifiers do not 37 | # implements the predict_proba method. In this case the test should not raise 38 | # an error since this class does not require base classifiers that 39 | # can estimate probabilities 40 | def test_predict_proba(create_X_y): 41 | X, y = create_X_y 42 | 43 | clf1 = Perceptron() 44 | clf1.fit(X, y) 45 | KNORAU([clf1, clf1]).fit(X, y) 46 | 47 | 48 | def test_select(): 49 | knorau_test = KNORAU() 50 | competences = np.ones(3) 51 | competences[0] = 0 52 | expected = np.atleast_2d([False, True, True]) 53 | selected = knorau_test.select(competences) 54 | assert np.array_equal(expected, selected) 55 | -------------------------------------------------------------------------------- /deslib/tests/expected_values/des_clustering_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/des_clustering_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/desknn_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desknn_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/desknn_probas_DFP.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desknn_probas_DFP.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/desp_proba_DFP.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desp_proba_DFP.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/desp_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/desp_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/kne_knn_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_knn_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/kne_proba_DFP.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_proba_DFP.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/kne_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/kne_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/knop_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/knop_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/mcb_proba_DFP.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/mcb_proba_DFP.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/mcb_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/mcb_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/ola_proba_DFP.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/ola_proba_DFP.npy -------------------------------------------------------------------------------- /deslib/tests/expected_values/ola_proba_integration.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/expected_values/ola_proba_integration.npy -------------------------------------------------------------------------------- /deslib/tests/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/static/__init__.py -------------------------------------------------------------------------------- /deslib/tests/static/test_oracle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import make_classification 3 | from sklearn.ensemble import RandomForestClassifier 4 | 5 | from deslib.static.oracle import Oracle 6 | 7 | 8 | def test_predict(create_X_y, create_pool_classifiers): 9 | X, y = create_X_y 10 | 11 | oracle_test = Oracle(create_pool_classifiers) 12 | oracle_test.fit(X, y) 13 | predicted_labels = oracle_test.predict(X, y) 14 | assert np.equal(predicted_labels, y).all() 15 | 16 | assert oracle_test.score(X, y) == 1.0 17 | 18 | 19 | # All classifiers predicts the same label. This test only the samples 20 | # with label == 0 are correctly classified by the Oracle. 21 | # The misclassified samples are set to -1. 22 | def test_predict_all_same(create_X_y, create_pool_all_agree): 23 | X, y = create_X_y 24 | 25 | expected = y 26 | oracle_test = Oracle(create_pool_all_agree) 27 | oracle_test.fit(X, y) 28 | expected[expected == 1] = 0 29 | predicted_labels = oracle_test.predict(X, y) 30 | assert np.equal(predicted_labels, expected).all() 31 | 32 | 33 | def test_predict_proba_shape(): 34 | n_test_samples = 200 35 | X, y = make_classification(n_samples=1000) 36 | X_test, y_test = make_classification(n_samples=n_test_samples) 37 | pool = RandomForestClassifier(max_depth=3).fit(X, y) 38 | oracle = Oracle(pool_classifiers=pool).fit(X, y) 39 | 40 | proba = oracle.predict_proba(X_test, y_test) 41 | assert proba.shape == (n_test_samples, 2) 42 | 43 | 44 | def test_predict_proba_right_class(): 45 | n_test_samples = 200 46 | X, y = make_classification(n_samples=1000) 47 | X_test, y_test = make_classification(n_samples=n_test_samples) 48 | pool = RandomForestClassifier(max_depth=3).fit(X, y) 49 | oracle = Oracle(pool_classifiers=pool).fit(X, y) 50 | 51 | preds = oracle.predict(X_test, y_test) 52 | proba = oracle.predict_proba(X_test, y_test) 53 | probas_max = np.argmax(proba, axis=1) 54 | assert np.allclose(probas_max, preds) 55 | 56 | 57 | def test_label_encoder_base_ensemble(): 58 | from sklearn.ensemble import RandomForestClassifier 59 | X, y = make_classification() 60 | y[y == 1] = 2 61 | y = y.astype(float) 62 | pool = RandomForestClassifier().fit(X, y) 63 | oracle = Oracle(pool) 64 | oracle.fit(X, y) 65 | pred = oracle.predict(X, y) 66 | assert np.isin(oracle.classes_, pred).all() 67 | -------------------------------------------------------------------------------- /deslib/tests/static/test_single_best.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import numpy as np 4 | import pytest 5 | from sklearn.datasets import make_classification 6 | from sklearn.ensemble import AdaBoostClassifier 7 | from sklearn.exceptions import NotFittedError 8 | from sklearn.metrics import roc_auc_score 9 | from sklearn.utils.estimator_checks import check_estimator 10 | 11 | from deslib.static.single_best import SingleBest 12 | 13 | 14 | def test_check_estimator(): 15 | check_estimator(SingleBest()) 16 | 17 | 18 | # Testing if the fit function selects the correct classifier (the one with 19 | # highest classification accuracy). # Note: clf[0] and clf[2] have the 20 | # same accuracy since they always predict the same label. 21 | def test_fit(create_X_y, create_pool_classifiers): 22 | X, y = create_X_y 23 | 24 | pool_classifiers = create_pool_classifiers 25 | single_best_test = SingleBest(pool_classifiers) 26 | single_best_test._estimate_performances = MagicMock( 27 | return_value=[1.0, 0.5, 0.99]) 28 | 29 | single_best_test.fit(X, y) 30 | 31 | assert single_best_test.best_clf_index_ == 0 32 | 33 | 34 | # The classifier with highest accuracy always predicts 0. So the expected 35 | # prediction should always be equal zero. 36 | def test_predict(create_X_y, create_pool_classifiers): 37 | X, y = create_X_y 38 | 39 | pool_classifiers = create_pool_classifiers 40 | single_best_test = SingleBest(pool_classifiers=pool_classifiers) 41 | single_best_test.fit(X, y) 42 | 43 | predicted_labels = single_best_test.predict(X) 44 | assert np.equal(predicted_labels, 0).all() 45 | 46 | 47 | # The probabilities predicted must always be equals to the probabilities 48 | # predicted by the base classifier with index 0. 49 | def test_predict_proba(create_X_y, create_pool_classifiers): 50 | X, y = create_X_y 51 | 52 | pool_classifiers = create_pool_classifiers 53 | single_best_test = SingleBest(pool_classifiers) 54 | single_best_test.fit(X, y) 55 | 56 | predicted_proba = single_best_test.predict_proba(X) 57 | assert np.equal(predicted_proba, 58 | pool_classifiers[0].predict_proba(X)).all() 59 | 60 | 61 | def test_not_fitted(): 62 | single_best_test = SingleBest() 63 | with pytest.raises(NotFittedError): 64 | single_best_test.predict(np.array([[1, -1]])) 65 | 66 | 67 | # Test calling the predict_proba function with classifiers that do not 68 | # implement the predict_proba 69 | def test_not_predict_proba(create_X_y): 70 | X, y = create_X_y 71 | 72 | classifier = MagicMock() 73 | classifier.predict.return_value = [0] 74 | single_best_test = SingleBest([classifier] * 10) 75 | single_best_test.fit(X, y) 76 | with pytest.raises(ValueError): 77 | single_best_test.predict_proba(X) 78 | 79 | 80 | def test_label_encoder(create_label_encoder_test): 81 | X, y, pool = create_label_encoder_test 82 | sb = SingleBest(pool).fit(X, y) 83 | pred = sb.predict(X) 84 | assert np.array_equal(pred, y) 85 | 86 | 87 | def test_label_encoder_base_ensemble(): 88 | from sklearn.ensemble import RandomForestClassifier 89 | X, y = make_classification() 90 | y[y == 1] = 2 91 | y = y.astype(float) 92 | pool = RandomForestClassifier().fit(X, y) 93 | sb = SingleBest(pool) 94 | sb.fit(X, y) 95 | pred = sb.predict(X) 96 | assert np.isin(sb.classes_, pred).all() 97 | 98 | 99 | def test_different_scorer(): 100 | X, y = make_classification(n_samples=100, random_state=42) 101 | X_val, y_val = make_classification(n_samples=25, random_state=123) 102 | pool = AdaBoostClassifier(n_estimators=10).fit(X, y) 103 | performances = [] 104 | for clf in pool: 105 | preds = clf.predict_proba(X_val) 106 | performances.append(roc_auc_score(y_val.ravel(), preds[:, -1])) 107 | id_best = np.argmax(performances) 108 | sb = SingleBest(pool_classifiers=pool, scoring='roc_auc') 109 | sb.fit(X_val, y_val) 110 | assert id_best == sb.best_clf_index_ 111 | -------------------------------------------------------------------------------- /deslib/tests/static/test_stacked.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.datasets import make_classification 4 | from sklearn.linear_model import Perceptron 5 | from sklearn.tree import DecisionTreeClassifier 6 | from sklearn.utils.estimator_checks import check_estimator 7 | 8 | from deslib.static.stacked import StackedClassifier 9 | 10 | 11 | def test_check_estimator(): 12 | check_estimator(StackedClassifier()) 13 | 14 | 15 | # Test if the class is raising an error when the base classifiers do not 16 | # implements the predict_proba method. Should raise an exception when the 17 | # base classifier cannot estimate posterior probabilities (predict_proba) 18 | # Using Perceptron classifier as it does not implements predict_proba. 19 | def test_not_predict_proba(create_X_y): 20 | X, y = create_X_y 21 | 22 | clf1 = Perceptron() 23 | clf1.fit(X, y) 24 | with pytest.raises(ValueError): 25 | StackedClassifier([clf1, clf1]).fit(X, y) 26 | 27 | 28 | # Test if the class is raising an error when the meta classifiers do not 29 | # implements the predict_proba method. Should raise an exception when the 30 | # base classifier cannot estimate posterior probabilities (predict_proba) 31 | # Using Perceptron classifier as it does not implements predict_proba. 32 | def test_not_predict_proba_meta(create_X_y, create_pool_classifiers): 33 | X, y = create_X_y 34 | 35 | pool = create_pool_classifiers 36 | with pytest.raises(ValueError): 37 | meta_clf = StackedClassifier(pool_classifiers=pool, 38 | meta_classifier=Perceptron()) 39 | meta_clf.fit(X, y) 40 | meta_clf.predict_proba(X) 41 | 42 | 43 | def test_label_encoder(): 44 | y = ['one', 'one', 'one', 'zero', 'zero', 'two'] 45 | X = np.random.rand(6, 3) 46 | pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)] 47 | stacked = StackedClassifier(pool).fit(X, y) 48 | pred = stacked.predict(X) 49 | assert np.array_equal(pred, y) 50 | 51 | 52 | def test_label_encoder_base_ensemble(): 53 | from sklearn.ensemble import RandomForestClassifier 54 | X, y = make_classification() 55 | y[y == 1] = 2 56 | y = y.astype(float) 57 | pool = RandomForestClassifier().fit(X, y) 58 | st = StackedClassifier(pool) 59 | st.fit(X, y) 60 | pred = st.predict(X) 61 | assert np.isin(st.classes_, pred).all() 62 | 63 | 64 | def test_one_class_meta_dataset(create_X_y): 65 | X, y = create_X_y 66 | pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)] 67 | stacked = StackedClassifier(pool) 68 | X_meta = np.random.rand(10, 2) 69 | y_meta = np.zeros(10, dtype=int) 70 | with pytest.raises(ValueError): 71 | stacked.fit(X_meta, y_meta) 72 | 73 | 74 | def test_passthrough_true(create_X_y): 75 | X, y = create_X_y 76 | pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)] 77 | stacked = StackedClassifier(pool, passthrough=True) 78 | stacked.fit(X, y) 79 | assert stacked.meta_classifier_.coef_.shape == (1, 7) 80 | 81 | 82 | def test_passthrough_false(create_X_y): 83 | X, y = create_X_y 84 | pool = [DecisionTreeClassifier().fit(X, y) for _ in range(5)] 85 | stacked = StackedClassifier(pool, passthrough=False) 86 | stacked.fit(X, y) 87 | assert stacked.meta_classifier_.coef_.shape == (1, 5) 88 | 89 | 90 | def test_single_model_pool(create_X_y): 91 | X, y = create_X_y 92 | pool = [DecisionTreeClassifier().fit(X, y)] 93 | with pytest.raises(ValueError): 94 | StackedClassifier(pool_classifiers=pool).fit(X, y) 95 | -------------------------------------------------------------------------------- /deslib/tests/static/test_static_selection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.datasets import make_classification 4 | from sklearn.ensemble import AdaBoostClassifier 5 | from sklearn.exceptions import NotFittedError 6 | from sklearn.metrics import log_loss 7 | from sklearn.utils.estimator_checks import check_estimator 8 | 9 | from deslib.static.static_selection import StaticSelection 10 | 11 | 12 | def test_check_estimator(): 13 | check_estimator(StaticSelection()) 14 | 15 | 16 | # Testing if the fit function selects the correct classifiers. 17 | # The 50 last classifiers should be selected. 18 | def test_fit(example_static_selection): 19 | X, y, pool = example_static_selection 20 | static_selection_test = StaticSelection(pool, 0.5) 21 | static_selection_test.fit(X, y) 22 | 23 | assert static_selection_test.n_classifiers_ensemble_ == 50 24 | assert static_selection_test.n_classifiers_ensemble_ == len( 25 | static_selection_test.clf_indices_) 26 | assert np.array_equal(np.sort(static_selection_test.clf_indices_), 27 | list(range(50, 100))) 28 | 29 | 30 | # The classifier with highest accuracy always predicts 0. So the expected 31 | # prediction should always be equal zero. 32 | def test_predict(example_static_selection, create_pool_classifiers): 33 | X, y, _ = example_static_selection 34 | 35 | static_selection_test = StaticSelection(create_pool_classifiers*10, 0.25) 36 | static_selection_test.fit(X, y) 37 | 38 | predicted_labels = static_selection_test.predict(X) 39 | assert np.equal(predicted_labels, 0).all() 40 | 41 | 42 | # Classifiers predicting different labels are selected 43 | def test_predict_diff(example_static_selection): 44 | X, y, pool = example_static_selection 45 | 46 | static_selection_test = StaticSelection(pool, 0.75) 47 | static_selection_test.fit(X, y) 48 | 49 | predicted_labels = static_selection_test.predict(X) 50 | assert np.equal(predicted_labels, 1).all() 51 | 52 | 53 | def test_not_fitted(): 54 | static_selection_test = StaticSelection() 55 | with pytest.raises(NotFittedError): 56 | static_selection_test.predict(np.array([[1, -1]])) 57 | 58 | 59 | def test_invalid_pct(): 60 | with pytest.raises(TypeError): 61 | test = StaticSelection(pct_classifiers='something') 62 | test.fit(np.random.rand(10, 2), np.ones(10)) 63 | 64 | 65 | def test_invalid_pct2(): 66 | with pytest.raises(ValueError): 67 | test = StaticSelection(pct_classifiers=1.2) 68 | test.fit(np.random.rand(10, 2), np.ones(10)) 69 | 70 | 71 | def test_label_encoder(create_label_encoder_test): 72 | X, y, pool = create_label_encoder_test 73 | static = StaticSelection(pool).fit(X, y) 74 | pred = static.predict(X) 75 | assert np.array_equal(pred, y) 76 | 77 | 78 | def test_label_encoder_base_ensemble(): 79 | from sklearn.ensemble import RandomForestClassifier 80 | X, y = make_classification() 81 | y[y == 1] = 2 82 | y = y.astype(float) 83 | pool = RandomForestClassifier().fit(X, y) 84 | ss = StaticSelection(pool) 85 | ss.fit(X, y) 86 | pred = ss.predict(X) 87 | assert np.isin(ss.classes_, pred).all() 88 | 89 | 90 | def test_predict_proba(example_static_selection): 91 | X, y, pool = example_static_selection 92 | expected = np.tile([0.52, 0.48], (y.size, 1)) 93 | static_selection_test = StaticSelection(pool, 0.5) 94 | static_selection_test.fit(X, y) 95 | proba = static_selection_test.predict_proba(X) 96 | assert np.allclose(proba, expected) 97 | 98 | 99 | # Test if static_selection can select the best classifier according to a 100 | # metric that needs to be minimized. 101 | def test_different_scorer(): 102 | X, y = make_classification(n_samples=100, random_state=42) 103 | X_val, y_val = make_classification(n_samples=25, random_state=123) 104 | pool = AdaBoostClassifier(n_estimators=10).fit(X, y) 105 | performances = [] 106 | for clf in pool: 107 | preds = clf.predict_proba(X_val) 108 | performances.append(log_loss(y_val.ravel(), preds[:, -1])) 109 | id_best = np.argsort(performances) 110 | ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss') 111 | ss.fit(X_val, y_val) 112 | assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all() 113 | 114 | 115 | # Test if static_selection can select the best classifier according to a 116 | # metric that needs to be minimized. 117 | def test_different_scorer(): 118 | X, y = make_classification(n_samples=100, random_state=42) 119 | X_val, y_val = make_classification(n_samples=25, random_state=123) 120 | pool = AdaBoostClassifier(n_estimators=10).fit(X, y) 121 | performances = [] 122 | for clf in pool: 123 | preds = clf.predict_proba(X_val) 124 | performances.append(log_loss(y_val.ravel(), preds[:, -1])) 125 | id_best = np.argsort(performances) 126 | ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss') 127 | ss.fit(X_val, y_val) 128 | assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all() 129 | -------------------------------------------------------------------------------- /deslib/tests/test_des_integration_multiclass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import make_classification 3 | from sklearn.ensemble import AdaBoostClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.preprocessing import StandardScaler 6 | 7 | # DCS techniques 8 | from deslib.dcs.a_priori import APriori 9 | from deslib.dcs.mcb import MCB 10 | # DES techniques 11 | from deslib.des.des_mi import DESMI 12 | from deslib.des.des_p import DESP 13 | from deslib.des.knop import KNOP 14 | from deslib.des.meta_des import METADES 15 | 16 | 17 | def setup_classifiers(): 18 | rng = np.random.RandomState(123456) 19 | 20 | X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(rng) 21 | # Train a pool of 100 classifiers 22 | pool_classifiers = AdaBoostClassifier(random_state=rng) 23 | pool_classifiers.fit(X_train, y_train) 24 | return pool_classifiers, X_dsel, y_dsel, X_test, y_test 25 | 26 | 27 | def load_dataset(rng): 28 | # Generate a classification dataset 29 | weights = [0.1, 0.2, 0.7] 30 | X, y = make_classification(n_classes=3, n_samples=2000, n_informative=3, 31 | random_state=rng, weights=weights) 32 | 33 | # split the data into training and test data 34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 35 | random_state=rng) 36 | # Scale the variables to have 0 mean and unit variance 37 | scalar = StandardScaler() 38 | X_train = scalar.fit_transform(X_train) 39 | X_test = scalar.transform(X_test) 40 | # Split the data into training and DSEL for DS techniques 41 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 42 | test_size=0.5, 43 | random_state=rng) 44 | # Considering a pool composed of 10 base classifiers 45 | # Calibrating Perceptrons to estimate probabilities 46 | return X_dsel, X_test, X_train, y_dsel, y_test, y_train 47 | 48 | 49 | def test_desp(): 50 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 51 | 52 | desp = DESP(pool_classifiers) 53 | desp.fit(X_dsel, y_dsel) 54 | assert np.isclose(desp.score(X_test, y_test), 0.6954545454545454) 55 | 56 | 57 | def test_mcb(): 58 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 59 | rng = np.random.RandomState(123456) 60 | 61 | mcb = MCB(pool_classifiers, random_state=rng) 62 | mcb.fit(X_dsel, y_dsel) 63 | assert np.isclose(mcb.score(X_test, y_test), 0.7196969696969697) 64 | 65 | 66 | def test_apriori(): 67 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 68 | rng = np.random.RandomState(123456) 69 | 70 | apriori = APriori(pool_classifiers, random_state=rng) 71 | apriori.fit(X_dsel, y_dsel) 72 | assert np.isclose(apriori.score(X_test, y_test), 0.6878787878787879) 73 | 74 | 75 | def test_meta(): 76 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 77 | 78 | meta_des = METADES(pool_classifiers) 79 | meta_des.fit(X_dsel, y_dsel) 80 | assert np.isclose(meta_des.score(X_test, y_test), 0.796969696969697) 81 | 82 | 83 | def test_knop(): 84 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 85 | 86 | knop = KNOP(pool_classifiers) 87 | knop.fit(X_dsel, y_dsel) 88 | assert np.isclose(knop.score(X_test, y_test), 0.8106060606060606) 89 | 90 | 91 | def test_mi(): 92 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 93 | 94 | desmi = DESMI(pool_classifiers, alpha=0.9) 95 | desmi.fit(X_dsel, y_dsel) 96 | assert np.isclose(desmi.score(X_test, y_test), 0.3500000000) 97 | -------------------------------------------------------------------------------- /deslib/tests/test_integration_DFP_IH.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.calibration import CalibratedClassifierCV 3 | from sklearn.datasets import make_classification 4 | from sklearn.ensemble import BaggingClassifier 5 | from sklearn.linear_model import Perceptron 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | 9 | # DCS techniques 10 | from deslib.dcs.a_posteriori import APosteriori 11 | from deslib.dcs.mcb import MCB 12 | from deslib.dcs.ola import OLA 13 | from deslib.des import DESClustering 14 | # DES techniques 15 | from deslib.des.des_p import DESP 16 | from deslib.des.knora_u import KNORAU 17 | 18 | 19 | def setup_classifiers(): 20 | rng = np.random.RandomState(654321) 21 | 22 | # Generate a classification dataset 23 | X, y = make_classification(n_classes=2, n_samples=1000, weights=[0.2, 0.8], 24 | random_state=rng) 25 | # split the data into training and test data 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 27 | random_state=rng) 28 | 29 | # Scale the variables to have 0 mean and unit variance 30 | scalar = StandardScaler() 31 | X_train = scalar.fit_transform(X_train) 32 | X_test = scalar.transform(X_test) 33 | 34 | # Split the data into training and DSEL for DS techniques 35 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 36 | test_size=0.5, 37 | random_state=rng) 38 | # Considering a pool composed of 10 base classifiers 39 | model = CalibratedClassifierCV(Perceptron(max_iter=100), cv=5) 40 | 41 | pool_classifiers = BaggingClassifier(model, n_estimators=100, n_jobs=-1, 42 | random_state=rng) 43 | pool_classifiers.fit(X_train, y_train) 44 | return pool_classifiers, X_dsel, y_dsel, X_test, y_test 45 | 46 | 47 | def test_knorau(): 48 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 49 | 50 | knorau = KNORAU(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1) 51 | knorau.fit(X_dsel, y_dsel) 52 | assert np.isclose(knorau.score(X_test, y_test), 0.9) 53 | 54 | 55 | def test_desp(): 56 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 57 | 58 | desp = DESP(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1) 59 | desp.fit(X_dsel, y_dsel) 60 | assert np.isclose(desp.score(X_test, y_test), 0.90) 61 | 62 | 63 | def test_ola(): 64 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 65 | 66 | ola = OLA(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1) 67 | ola.fit(X_dsel, y_dsel) 68 | assert np.isclose(ola.score(X_test, y_test), 0.9030303030303031) 69 | 70 | 71 | def test_mcb(): 72 | pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() 73 | rng = np.random.RandomState(123456) 74 | 75 | mcb = MCB(pool_classifiers, random_state=rng, DFP=True, with_IH=True, 76 | IH_rate=0.1) 77 | mcb.fit(X_dsel, y_dsel) 78 | assert np.isclose(mcb.score(X_test, y_test), 0.8878787878787879) 79 | -------------------------------------------------------------------------------- /deslib/tests/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/deslib/tests/util/__init__.py -------------------------------------------------------------------------------- /deslib/tests/util/test_aggregation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deslib.util.aggregation import * 4 | from ..conftest import create_base_classifier 5 | 6 | 7 | def test_majority_voting(): 8 | query = np.array([[1, -1], [0, 0], [3, -1]]) 9 | 10 | ensemble_classifiers = [create_base_classifier(return_value=0)] * 10 + [ 11 | create_base_classifier(return_value=1)] * 9 12 | 13 | predicted = majority_voting(ensemble_classifiers, query) 14 | assert predicted.all() == 0 and predicted.size == 3 15 | 16 | 17 | def test_majority_voting_multi_class(): 18 | query = np.array([1, -1]) 19 | ensemble_classifiers = ([create_base_classifier(return_value=0)] * 10) + \ 20 | [create_base_classifier(return_value=2)] * 9 + \ 21 | [create_base_classifier(return_value=1)] * 20 22 | 23 | predicted = majority_voting(ensemble_classifiers, query) 24 | assert predicted.all() == 1 and predicted.size == 1 25 | 26 | 27 | def test_weighted_majority_voting(): 28 | query = np.array([[1, -1], [0, 0], [3, -1]]) 29 | ensemble_classifiers = ([create_base_classifier(return_value=0)] * 10) + \ 30 | [create_base_classifier(return_value=2)] * 9 31 | weights = np.array([([0.5] * 10) + ([0.8] * 9), ([0.5] * 10) + ([0.8] * 9), 32 | ([0.5] * 10) + ([0.8] * 9)]) 33 | predicted = weighted_majority_voting(ensemble_classifiers, weights, query) 34 | assert predicted.all() == 1 and predicted.size == 3 35 | 36 | 37 | def test_weighted_majority_voting_single_sample(): 38 | query = np.array([1, -1]) 39 | clf_1 = create_base_classifier(return_value=1) 40 | clf_2 = create_base_classifier(return_value=1) 41 | clf_3 = create_base_classifier(return_value=2) 42 | ensemble_classifiers = [clf_2, clf_1, clf_3] 43 | weights = np.atleast_2d([0.2, 0.5, 1.0]) 44 | predicted = weighted_majority_voting(ensemble_classifiers, weights, query) 45 | assert predicted == 2 and predicted.size == 1 46 | 47 | 48 | def test_predict_proba(create_pool_classifiers): 49 | query = np.array([[1, -1]]) 50 | ensemble_classifiers = create_pool_classifiers 51 | predicted_proba = predict_proba_ensemble(ensemble_classifiers, query) 52 | assert np.isclose(predicted_proba, [0.61, 0.39]).all() 53 | 54 | 55 | # This experiment should raise an error since we have 3 base classifiers 56 | # and 4 weights. 57 | def test_wrong_weights_votes(create_pool_classifiers): 58 | query = np.array([[1, -1]]) 59 | ensemble_classifiers = create_pool_classifiers 60 | weights = np.array([1.0, 1.0, 1.0, 1.0]) 61 | with pytest.raises(ValueError): 62 | weighted_majority_voting(ensemble_classifiers, weights, query) 63 | 64 | 65 | # -------Test routines for the ensemble combination methods------- 66 | # These routines calculates the matrix with the supports given for 67 | # each class for each base classifier and them Aggregates the supports 68 | 69 | def test_product_combiner(create_pool_classifiers): 70 | query = np.array([[1, -1]]) 71 | ensemble_classifiers = create_pool_classifiers 72 | expected = 0 73 | result = product_combiner(ensemble_classifiers, query) 74 | assert np.allclose(expected, result) 75 | 76 | 77 | def test_average_combiner(create_pool_classifiers): 78 | query = np.array([[1, -1]]) 79 | ensemble_classifiers = create_pool_classifiers 80 | expected = 0 81 | result = average_combiner(ensemble_classifiers, query) 82 | assert result == expected 83 | 84 | 85 | def test_minimum_combiner(create_pool_classifiers): 86 | query = np.array([[1, -1]]) 87 | ensemble_classifiers = create_pool_classifiers 88 | expected = 0 89 | result = minimum_combiner(ensemble_classifiers, query) 90 | assert np.allclose(expected, result) 91 | 92 | 93 | def test_maximum_combiner(create_pool_classifiers): 94 | query = np.array([[1, -1]]) 95 | ensemble_classifiers = create_pool_classifiers 96 | expected = 0 97 | result = maximum_combiner(ensemble_classifiers, query) 98 | assert np.allclose(expected, result) 99 | 100 | 101 | def test_median_combiner(create_pool_classifiers): 102 | query = np.array([[1, -1]]) 103 | ensemble_classifiers = create_pool_classifiers 104 | expected = 0 105 | result = median_combiner(ensemble_classifiers, query) 106 | assert np.allclose(expected, result) 107 | 108 | 109 | def test_check_predictions(): 110 | predictions = example_kuncheva 111 | with pytest.raises(ValueError): 112 | average_rule(predictions) 113 | 114 | 115 | # -------Test routines for the fusion rules receiving prediction directly------ 116 | # These receives the matrix with the supports given for each class and 117 | # returns the class labels (max score) 118 | 119 | # Test example taken from Kuncheva's book: Combining pattern classifiers 120 | 121 | 122 | example_kuncheva = np.array( 123 | [[0.1, 0.5, 0.4], [0.0, 0.0, 1.0], [0.4, 0.3, 0.4], [0.2, 0.7, 0.1], 124 | [0.1, 0.8, 0.2]]) 125 | example_kuncheva_batch = np.expand_dims(example_kuncheva, axis=0) 126 | example_kuncheva_batch = np.repeat(example_kuncheva_batch, 10, axis=0) 127 | 128 | 129 | def test_product_rule(): 130 | expected = 2 131 | result = product_rule(example_kuncheva_batch) 132 | assert np.allclose(expected, result) 133 | 134 | 135 | def test_average_rule(): 136 | expected = 1 137 | result = average_rule(example_kuncheva_batch) 138 | assert np.allclose(expected, result) 139 | 140 | 141 | def test_minimum_rule(): 142 | expected = 2 143 | result = minimum_rule(example_kuncheva_batch) 144 | assert np.allclose(expected, result) 145 | 146 | 147 | def test_maximum_rule(): 148 | expected = 2 149 | result = maximum_rule(example_kuncheva_batch) 150 | assert np.allclose(expected, result) 151 | 152 | 153 | def test_median_rule(): 154 | expected = 1 155 | result = median_rule(example_kuncheva_batch) 156 | assert np.allclose(expected, result) 157 | -------------------------------------------------------------------------------- /deslib/tests/util/test_datasets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from deslib.util.datasets import make_P2 4 | from deslib.util.datasets import make_banana 5 | from deslib.util.datasets import make_banana2 6 | from deslib.util.datasets import make_circle_square 7 | from deslib.util.datasets import make_xor 8 | 9 | 10 | def setup_class_sizes(): 11 | 12 | size_class0 = np.random.randint(1, 1000) 13 | size_class1 = np.random.randint(1, 1000) 14 | return size_class0, size_class1 15 | 16 | 17 | def test_P2_class_distribution(): 18 | 19 | s0, s1 = setup_class_sizes() 20 | _, y = make_P2(size_classes=[s0, s1]) 21 | assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1 22 | 23 | 24 | def test_banana2_class_distribution(): 25 | s0, s1 = setup_class_sizes() 26 | _, y = make_banana2(size_classes=[s0, s1]) 27 | assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1 28 | 29 | 30 | def test_banana_class_distribution(): 31 | s0, s1 = setup_class_sizes() 32 | _, y = make_banana(size_classes=[s0, s1]) 33 | assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1 34 | 35 | 36 | def test_circle_square_class_distribution(): 37 | s0, s1 = setup_class_sizes() 38 | _, y = make_circle_square(size_classes=[s0, s1]) 39 | assert np.sum(y == 0) == s0 and np.sum(y == 1) == s1 40 | 41 | 42 | def test_xor_size(): 43 | n_samples = np.random.randint(100, 2000) 44 | X, y = make_xor(n_samples) 45 | assert y.size == n_samples 46 | 47 | 48 | def test_xor(): 49 | n_samples = np.random.randint(100, 2000) 50 | X, y = make_xor(n_samples) 51 | X_0, X_1 = X[y == 0], X[y == 1] 52 | for x in X_0: 53 | assert np.all(x[0] < 0.5 and x[1] < 0.5) or (x[0] > 0.5 and x[1] > 0.5) 54 | 55 | 56 | def test_banana_n_higher_than_one(): 57 | s0, s1 = setup_class_sizes() 58 | na = np.random.rand() + 1 59 | with pytest.raises(ValueError): 60 | make_banana([s0, s1], na) 61 | -------------------------------------------------------------------------------- /deslib/tests/util/test_diversity_batch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deslib.util.diversity_batch import (_process_predictions, 5 | double_fault, 6 | Q_statistic, 7 | ratio_errors, 8 | agreement_measure, 9 | disagreement_measure, 10 | correlation_coefficient) 11 | 12 | 13 | @pytest.fixture 14 | def create_X_y(): 15 | # ex1: The distribution of samples of a test example. 16 | X = np.array( 17 | [ 18 | [-1, 1], 19 | [-0.75, 0.5], 20 | [-1.5, 1.5], 21 | [1, 1], 22 | [0.75, 0.5], 23 | [1.5, 1.5], 24 | [1, -1], 25 | [-0.5, 0.5], 26 | [0.5, 0.5], 27 | [0, -1], 28 | [0.75, -0.5], 29 | [0.0, 0.0], 30 | [-1, -1], 31 | [0, -0.5], 32 | [1, -1], 33 | ] 34 | ) 35 | # Labels associated with the samples. This information is used 36 | # by techniques based on a posteriori information. 37 | y = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0]) 38 | return X, y 39 | 40 | 41 | @pytest.fixture 42 | def example_diversity(create_X_y): 43 | y_pred_classifier1 = np.array([0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) 44 | y_pred_classifier2 = np.tile(np.array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1]), 45 | (5, 1)) 46 | 47 | y_real = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]) 48 | 49 | y_ex1 = create_X_y[1] 50 | return y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 51 | 52 | 53 | @pytest.fixture 54 | def example_diversity_ones_zeros(create_X_y): 55 | y = create_X_y[1] 56 | y_pred_ones = np.ones(15) 57 | y_pred_zeros = np.zeros((5, 15)) 58 | return y, y_pred_ones, y_pred_zeros 59 | 60 | 61 | def test_process_predictions_ones_zeros(example_diversity_ones_zeros): 62 | y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros 63 | N00, N10, N01, N11 = _process_predictions(y, y_pred_ones, y_pred_zeros) 64 | assert ( 65 | (N00 == np.full((5,), 0.0)).all() and 66 | (N11 == np.full((5,), 0.0)).all() and 67 | (N01 == np.full((5,), 9.0 / 15.0)).all() and 68 | (N10 == np.full((5,), 6.0 / 15.0)).all() 69 | ) 70 | 71 | 72 | def test_double_fault_ones_zeros(example_diversity_ones_zeros): 73 | y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros 74 | df = double_fault(y, y_pred_ones, y_pred_zeros) 75 | assert (df == np.full((5,), 0)).all() 76 | 77 | 78 | def test_double_fault(): 79 | labels = np.array([0, 0, 0, 0, 1, 1, 1]) 80 | pred1 = np.array([1, 0, 1, 0, 0, 0, 0]) 81 | pred2 = np.tile(np.array([1, 0, 0, 0, 1, 0, 0]), (5, 1)) 82 | 83 | actual = double_fault(labels, pred1, pred2) 84 | 85 | assert ( 86 | actual == np.full((5,), 3.0 / 7) 87 | ).all() # three common errors out of 7 predictions 88 | 89 | 90 | def test_q_statistic_ones_zeros(example_diversity_ones_zeros): 91 | y, y_pred_ones, y_pred_zeros = example_diversity_ones_zeros 92 | Q = Q_statistic(y, y_pred_ones, y_pred_zeros) 93 | assert (Q == np.full((5,), -1.0)).all() 94 | 95 | 96 | def test_ratio_errors_diff_classifiers(example_diversity): 97 | y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity 98 | ratio = ratio_errors(y_real, y_pred_classifier1, y_pred_classifier2) 99 | assert np.isclose(ratio, 1.66, atol=0.01).all() 100 | 101 | 102 | def test_agreement(example_diversity): 103 | y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity 104 | agreement = agreement_measure(y_real, 105 | y_pred_classifier1, 106 | y_pred_classifier2) 107 | assert np.isclose(agreement, 0.5).all() 108 | 109 | 110 | def test_disagreement(example_diversity): 111 | y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity 112 | disagreement = disagreement_measure(y_real, 113 | y_pred_classifier1, 114 | y_pred_classifier2) 115 | assert np.isclose(disagreement, 0.5).all() 116 | 117 | 118 | def test_coefficient_correlation(example_diversity): 119 | y_pred_classifier1, y_pred_classifier2, y_real, y_ex1 = example_diversity 120 | coefficient = correlation_coefficient( 121 | y_real, y_pred_classifier1, y_pred_classifier2 122 | ) 123 | assert np.isclose(coefficient, 0.0).all() 124 | -------------------------------------------------------------------------------- /deslib/tests/util/test_faiss.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from deslib.tests.test_des_integration import load_dataset 5 | from deslib.util import faiss_knn_wrapper 6 | 7 | 8 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(), 9 | reason="requires the faiss library") 10 | def test_faiss_predict(): 11 | rng = np.random.RandomState(123456) 12 | _, X_test, X_train, _, _, y_train = load_dataset(None, rng) 13 | k = 7 14 | X_train = X_train.astype(np.float32) 15 | X_test = X_test.astype(np.float32) 16 | f_knn_test = faiss_knn_wrapper.FaissKNNClassifier(n_neighbors=k) 17 | f_knn_test.fit(X_train, y_train) 18 | f_knn_preds = f_knn_test.predict(X_test) 19 | 20 | knn_test = KNeighborsClassifier(n_neighbors=k) 21 | knn_test.fit(X_train, y_train) 22 | knn_preds = knn_test.predict(X_test) 23 | 24 | assert ((f_knn_preds - knn_preds).sum() == 0) 25 | -------------------------------------------------------------------------------- /deslib/tests/util/test_fire.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from deslib.util.dfp import frienemy_pruning 4 | from deslib.util.dfp import frienemy_pruning_preprocessed 5 | from ..conftest import create_base_classifier 6 | 7 | 8 | # Since no classifier crosses the region of competence, 9 | # all of them must be selected 10 | def test_frienemy_no_classifier_crosses(example_estimate_competence): 11 | _, y, neighbors = example_estimate_competence[0:3] 12 | n_classifiers = 3 13 | predictions = np.zeros((y.size, n_classifiers)) 14 | mask = frienemy_pruning_preprocessed(neighbors, y, predictions) 15 | assert mask.all() 16 | 17 | 18 | # In this example, all base classifier should be considered crossing the 19 | # region of competence since they always predicts the correct label for 20 | # the samples in DSEL. 21 | def test_frienemy_all_classifiers_crosses(example_all_ones): 22 | X, y, neighbors, _, dsel_processed, _ = example_all_ones 23 | result = frienemy_pruning_preprocessed(neighbors, y, dsel_processed) 24 | assert result.all() 25 | 26 | 27 | # Check if the batch processing is working by passing multiple 28 | # samples at the same time. 29 | def test_frienemy_not_all_classifiers_crosses(example_estimate_competence): 30 | expected = np.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) 31 | _, y, neighbors, _, dsel_processed, _ = example_estimate_competence 32 | 33 | # passing three samples to compute the DFP at the same time 34 | result = frienemy_pruning_preprocessed(neighbors[:, :3], y, dsel_processed) 35 | assert np.array_equal(result, expected) 36 | 37 | 38 | # Test the case where the sample is located in a safe region 39 | # (i.e., all neighbors comes from the same class) 40 | def test_frienemy_safe_region(example_estimate_competence): 41 | X, y, _, _, dsel_processed, _ = example_estimate_competence 42 | neighbors = np.tile(np.array([0, 1, 2, 6, 7, 8, 14]), (10, 1)) 43 | 44 | result = frienemy_pruning_preprocessed(neighbors, y, dsel_processed) 45 | assert result.all() 46 | 47 | 48 | def test_frienemy_not_processed(): 49 | X = np.random.rand(5, 2) 50 | y = np.array([0, 0, 0, 1, 1]) 51 | X_query = np.random.rand(1, 2) 52 | clf1 = create_base_classifier(return_value=[0, 1, 0, 0, 1]) 53 | clf2 = create_base_classifier(return_value=[1, 1, 1, 1, 1]) 54 | clf3 = create_base_classifier(return_value=[0, 0, 0, 0, 0]) 55 | clf4 = create_base_classifier(return_value=[0, 0, 0, 1, 1]) 56 | pool = [clf1, clf2, clf3, clf4] 57 | dfp_mask = frienemy_pruning(X_query, X, y, pool, 5) 58 | assert np.array_equal(dfp_mask, np.array([[1, 0, 0, 1]])) 59 | -------------------------------------------------------------------------------- /deslib/tests/util/test_instance_hardness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deslib.util.instance_hardness import hardness_region_competence 5 | from deslib.util.instance_hardness import kdn_score 6 | 7 | 8 | # ------Test routines for the Instance Hardness calculation------------------ 9 | @pytest.mark.parametrize('index, expected', [(0, 0.42), 10 | (1, 0.28), 11 | (2, 0.28)]) 12 | def test_instance_hardness_region(index, 13 | expected, 14 | example_estimate_competence): 15 | y, neighbors = example_estimate_competence[1:3] 16 | k = 7 17 | neighbors = neighbors[index, :] 18 | IH = hardness_region_competence(neighbors, y, k) 19 | assert np.isclose(IH, expected, atol=0.01) 20 | 21 | 22 | def test_instance_hardness_region_batch(example_estimate_competence): 23 | expected = np.array([0.42, 0.28, 0.28]) 24 | y, neighbors = example_estimate_competence[1:3] 25 | 26 | k = 7 27 | IH = hardness_region_competence(neighbors, y, k) 28 | assert np.allclose(IH, expected, atol=0.01) 29 | 30 | 31 | def test_instance_hardness_region_all_same(example_estimate_competence): 32 | y = example_estimate_competence[1] 33 | k = 7 34 | neighbors = np.array([0, 1, 2, 6, 7, 8, 13]) 35 | IH = hardness_region_competence(neighbors, y, k) 36 | assert IH == 0.0 37 | 38 | 39 | def test_kdn_score(example_estimate_competence): 40 | X, y, neigh, dist, _, _ = example_estimate_competence 41 | X, y = X[0:6, :], y[0:6] 42 | score, _ = kdn_score(X, y, 3) 43 | assert np.allclose(score, 0.3333333) 44 | -------------------------------------------------------------------------------- /deslib/tests/util/test_knne.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from deslib.util import faiss_knn_wrapper 4 | from deslib.util import KNNE 5 | 6 | 7 | def setup_test(n_neighbors, knn_classifier='sklearn'): 8 | X = np.tile(np.arange(15).reshape(-1, 1), 3) 9 | y = np.array(5 * [0] + 5 * [1] + 5 * [2]) 10 | knne = KNNE(n_neighbors=n_neighbors, knn_classifier=knn_classifier) 11 | knne.fit(X, y) 12 | return X, y, knne 13 | 14 | 15 | def test_equal_classes(): 16 | X, y, knne = setup_test(n_neighbors=6) 17 | dist, inds = knne.kneighbors() 18 | classes = y[inds] 19 | b = np.apply_along_axis(np.bincount, 1, classes) 20 | assert np.equal(b, 2).all() 21 | 22 | 23 | def test_uneven_k(): 24 | X, y, knne = setup_test(n_neighbors=7) 25 | test = X[0, :].reshape(1, -1) 26 | dist, inds = knne.kneighbors(test, 7) 27 | assert np.allclose([0, 1, 5, 6, 10, 11, 2], inds) 28 | 29 | 30 | def test_predict(): 31 | X, y, knne = setup_test(n_neighbors=6) 32 | prediction = knne.predict(X) 33 | assert np.equal(prediction, y).all() 34 | 35 | 36 | def test_predict_proba(): 37 | X, y, knne = setup_test(n_neighbors=6) 38 | probas = knne.predict_proba(X) 39 | assert np.all(probas[0:5, 0] > probas[0:5, 1]) 40 | assert np.all(probas[5:-1, 1] > probas[5:-1, 0]) 41 | 42 | 43 | def test_labels_not_encoded(): 44 | X = np.tile(np.arange(10).reshape(-1, 1), 3) 45 | y = np.array(5 * ['cat'] + 5 * ['dog']) 46 | knne = KNNE(n_neighbors=6) 47 | knne.fit(X, y) 48 | dist, inds = knne.kneighbors() 49 | classes = y[inds] 50 | a = np.sum(classes == 'dog') 51 | b = np.sum(classes == 'cat') 52 | assert np.equal(b, a).all() and a == 30 53 | 54 | 55 | def test_n_neighbors_none(): 56 | X = np.tile(np.arange(10).reshape(-1, 1), 3) 57 | y = np.array(5 * ['cat'] + 5 * ['dog']) 58 | knne = KNNE(n_neighbors=None) 59 | with pytest.raises(ValueError): 60 | knne.fit(X, y) 61 | 62 | 63 | def test_return_indices_only(): 64 | X, y, knne = setup_test(n_neighbors=6) 65 | inds = knne.kneighbors(X, return_distance=False) 66 | assert inds.shape == (15, 6) 67 | 68 | 69 | def test_n_neighbors_less_n_classes(): 70 | with pytest.raises(ValueError): 71 | setup_test(n_neighbors=2) 72 | 73 | 74 | def test_n_neighbors_not_integer(): 75 | with pytest.raises(TypeError): 76 | setup_test(n_neighbors=5.5) 77 | 78 | 79 | def test_n_neighbors_not_multiple_raise_warning(): 80 | with pytest.warns(Warning): 81 | setup_test(n_neighbors=7) 82 | 83 | 84 | # ------Tests using KNNE using faiss for similarity search------------------ 85 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(), 86 | reason="requires the faiss library") 87 | def test_faiss_knne(): 88 | X, y, knne = setup_test(n_neighbors=6, knn_classifier='faiss') 89 | y_pred = knne.predict(X) 90 | assert np.allclose(y, y_pred) 91 | 92 | 93 | @pytest.mark.skipif(not faiss_knn_wrapper.is_available(), 94 | reason="requires the faiss library") 95 | def test_faiss_knne_inds(): 96 | X, y, knne = setup_test(n_neighbors=6, knn_classifier='faiss') 97 | inds = knne.kneighbors(X, return_distance=False) 98 | assert inds.shape == (15, 6) 99 | -------------------------------------------------------------------------------- /deslib/tests/util/test_prob_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deslib.util.prob_functions import (ccprmod, 5 | log_func, 6 | min_difference, 7 | softmax, 8 | exponential_func, 9 | entropy_func) 10 | 11 | 12 | # Example from the original paper "A probabilistic model of classifier 13 | # competence for dynamic ensemble selection" 14 | @pytest.mark.parametrize("supports, idx_correct_label, expected", 15 | [([[0.3, 0.6, 0.1], [1.0 / 3, 1.0 / 3, 1.0 / 3]], 16 | [1, 0], [0.784953394056843, 0.332872292262951]), 17 | ([[0.5, 0.2, 0.3], [0.5, 0.2, 0.3]], [0, 1], 18 | [0.6428, 0.1194])]) 19 | def test_ccprmod_return_value(supports, idx_correct_label, expected): 20 | value = ccprmod(supports, idx_correct_label) 21 | assert np.isclose(value, expected, atol=0.001).all() 22 | 23 | 24 | @pytest.mark.parametrize('B', [0, -1, None, 0.55]) 25 | def test_valid_ccprmod_beta(B): 26 | supports = [0.3, 0.6, 0.1] 27 | idx_correct_label = [1] 28 | 29 | with pytest.raises((ValueError, TypeError)): 30 | ccprmod(supports, idx_correct_label, B) 31 | 32 | 33 | def test_ccprmod_zero_support(): 34 | supports = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.8, 0.2]] 35 | idx_correct_label = [0, 2, 0] 36 | assert np.isclose(ccprmod(supports, idx_correct_label), 0, atol=0.01).all() 37 | 38 | 39 | def test_ccprmod_one_support(): 40 | supports = [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0]] 41 | idx_correct_label = [2, 1] 42 | assert np.isclose(ccprmod(supports, idx_correct_label), 1, atol=0.01).all() 43 | 44 | 45 | def test_softmax_sum_to_one(): 46 | test = np.random.rand(10) 47 | assert np.allclose(np.sum(softmax(test)), 1.0, atol=0.001) 48 | 49 | 50 | @pytest.mark.parametrize('vector, expected', 51 | [([0, 1, -0.5, 0.5], 52 | [0.1674, 0.4551, 0.1015, 0.2760]), 53 | ([3.20, 5, 7.856, 9.65], 54 | [0.0013, 0.0081, 0.1412, 0.8493])]) 55 | def test_softmax(vector, expected): 56 | assert np.isclose(softmax(vector), expected, atol=0.001).all() 57 | 58 | 59 | @pytest.mark.parametrize('supports_correct, expected', 60 | [(np.array([0.5]), 0), 61 | (np.array([0.0]), -1.0), 62 | (np.array([1.0]), 1.0)]) 63 | def test_log_func_two_classes(supports_correct, expected): 64 | n_classes = 2 65 | result = log_func(n_classes, supports_correct) 66 | assert np.isclose(result, expected) 67 | 68 | 69 | @pytest.mark.parametrize('supports_correct, expected', 70 | [(np.array([0.33]), 0), 71 | (np.array([0.0]), -1.0), 72 | (np.array([1.0]), 1.0)]) 73 | def test_log_func_multi_class(supports_correct, expected): 74 | n_classes = 3 75 | result = log_func(n_classes, supports_correct) 76 | assert np.isclose(result, expected, atol=0.01) 77 | 78 | 79 | # Example from the paper "A probabilistic model of classifier competence for 80 | # dynamic ensemble selection" 81 | @pytest.mark.parametrize("supports, idx_correct_label, expected", 82 | [(np.array([[0.3, 0.6, 0.1], [0.33, 0.33, 0.33]]), 83 | [1, 0], [0.3, 0.0]), 84 | (np.array([[0.5, 0.2, 0.3], [0.5, 0.2, 0.3]]), 85 | [0, 1], [0.2, -0.3])]) 86 | def test_min_difference(supports, idx_correct_label, expected): 87 | result = min_difference(supports, idx_correct_label) 88 | assert np.isclose(result, expected, atol=0.01).all() 89 | 90 | 91 | @pytest.mark.parametrize('supports_correct, expected', 92 | [(np.array([0.33]), -0.01), 93 | (np.array([0.0]), -1.0), 94 | (np.array([1.0]), 1.0)]) 95 | def test_exponential_func_multi_class(supports_correct, expected): 96 | n_classes = 3 97 | result = exponential_func(n_classes, supports_correct) 98 | assert np.isclose(result, expected, atol=0.01).all() 99 | 100 | 101 | def test_exponential_func_multi_class_batch(): 102 | supports_correct = np.array([0.33, 0.0, 1.0]) 103 | expected = [-0.01, -1.0, 1.0] 104 | n_classes = 3 105 | result = exponential_func(n_classes, supports_correct) 106 | assert np.allclose(result, expected, atol=0.01) 107 | 108 | 109 | @pytest.mark.parametrize('supports_correct, expected', 110 | [(np.array([0.5]), 0.00), 111 | (np.array([0.0]), -1.0), 112 | (np.array([1.0]), 1.0)]) 113 | def test_exponential_func_two_class(supports_correct, expected): 114 | n_classes = 2 115 | result = exponential_func(n_classes, supports_correct) 116 | assert np.isclose(result, expected, atol=0.01).all() 117 | 118 | 119 | def test_exponential_func(): 120 | n_classes = 2 121 | result1 = exponential_func(n_classes, np.array([0.2])) 122 | assert result1 < 0.0 123 | 124 | result2 = exponential_func(n_classes, np.array([0.8])) 125 | assert result2 > 0.0 126 | 127 | assert result2 > result1 128 | 129 | result3 = exponential_func(n_classes, np.array([1.0])) 130 | result4 = exponential_func(n_classes, np.array([0.9])) 131 | assert result3 > result4 > result2 > result1 132 | 133 | 134 | def test_entropy_func_three_classes(): 135 | n_classes = 3 136 | supports = np.array([[0.33, 0.33, 0.33], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) 137 | is_correct = np.array([0, 1, 0]) 138 | expected = [0.0, 1.0, -1.0] 139 | result = entropy_func(n_classes, supports, is_correct) 140 | assert np.isclose(result, expected, atol=0.01).all() 141 | 142 | 143 | def test_entropy_func_parameter_shape(): 144 | with pytest.raises(ValueError): 145 | entropy_func(2, np.array([0, 1]), np.array([0])) 146 | -------------------------------------------------------------------------------- /deslib/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`deslib.util` This module includes various utilities. They are divided 3 | into four parts: 4 | 5 | deslib.util.aggregation - Implementation of aggregation functions such as 6 | majority voting and averaging. Such functions can be applied to any list of 7 | classifiers. 8 | 9 | deslib.util.diversity - Implementation of different measures of diversity 10 | between classifiers. 11 | 12 | deslib.util.prob_functions - Functions to estimate the competence of a base 13 | classifier based on the 14 | probability estimates. 15 | 16 | deslib.util.instance_hardness - Functions to measure the hardness level of a 17 | given instance 18 | 19 | deslib.util.faiss_knn_wrapper - Wrapper for Facebook AI fast similarity search 20 | on GPU 21 | 22 | deslib.util.datasets - Provides methods to generate synthetic data. 23 | 24 | deslib.util.knne - Implementation of the K-Nearest Neighbors Equality 25 | technique 26 | """ 27 | 28 | from .aggregation import * 29 | from .diversity import * 30 | from .instance_hardness import * 31 | from .prob_functions import * 32 | from .datasets import * 33 | from .knne import KNNE 34 | from .faiss_knn_wrapper import FaissKNNClassifier 35 | -------------------------------------------------------------------------------- /deslib/util/dfp.py: -------------------------------------------------------------------------------- 1 | """Implementation of the Dynamic Frienemy Pruning (DFP) algorithm for online 2 | pruning of base classifiers. 3 | 4 | References 5 | ---------- 6 | Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., Online Pruning 7 | of Base Classifiers for Dynamic Ensemble Selection, 8 | Pattern Recognition, vol. 72, December 2017, pp 44-58. 9 | 10 | Cruz, Rafael MO, Dayvid VR Oliveira, George DC Cavalcanti, and Robert Sabourin. 11 | "FIRE-DES++: Enhanced online pruning of base classifiers for dynamic ensemble 12 | selection." Pattern Recognition 85 (2019): 149-160. 13 | """ 14 | 15 | # coding=utf-8 16 | 17 | # Author: Rafael Menelau Oliveira e Cruz 18 | # 19 | # License: BSD 3 clause 20 | 21 | 22 | import numpy as np 23 | from sklearn.neighbors import KNeighborsClassifier 24 | 25 | 26 | def frienemy_pruning(X_query, X_dsel, y_dsel, ensemble, k): 27 | """Implements the Online Pruning method (frienemy) which prunes base 28 | classifiers that do not cross the region of competence of a given instance. 29 | A classifier crosses the region of competence if it correctly 30 | classify at least one sample for each different class in the region. 31 | 32 | Parameters 33 | ---------- 34 | X_query : array-like of shape (n_samples, n_features) 35 | Test set. 36 | X_dsel : array-like of shape (n_samples, n_features) 37 | Dynamic selection set. 38 | y_dsel : array-like of shape (n_samples,) 39 | The target values (Dynamic selection set). 40 | ensemble : list of shape = [n_classifiers] 41 | The ensemble of classifiers to be pruned. 42 | k : int 43 | Number of neighbors used to compute the regions of competence. 44 | 45 | Returns 46 | ------- 47 | DFP_mask : array-like of shape = [n_samples, n_classifiers] 48 | Mask containing 1 for the selected base classifier and 0 49 | otherwise. 50 | 51 | """ 52 | predictions = np.zeros((X_dsel.shape[0], len(ensemble)), 53 | dtype=np.intp) 54 | for index, clf in enumerate(ensemble): 55 | predictions[:, index] = clf.predict(X_dsel) 56 | hit_miss = predictions == y_dsel[:, np.newaxis] 57 | competence_region = KNeighborsClassifier(n_neighbors=k).fit(X_dsel, y_dsel) 58 | neighbors = competence_region.kneighbors(X_query, return_distance=False) 59 | return frienemy_pruning_preprocessed(neighbors, y_dsel, hit_miss) 60 | 61 | 62 | def frienemy_pruning_preprocessed(neighbors, y_val, hit_miss): 63 | """Implements the Online Pruning method (frienemy) which prunes base 64 | classifiers that do not cross the region of competence of a given instance. 65 | A classifier crosses the region of competence if it correctly 66 | classify at least one sample for each different class in the region. 67 | 68 | Notes 69 | ----- 70 | This implementation assumes the regions of competence of each query example 71 | (neighbors) and the predictions for the dynamic selection data (hit_miss) 72 | were already pre-computed. 73 | 74 | Parameters 75 | ---------- 76 | neighbors : array-like of shape (n_samples, n_neighbors) 77 | Indices of the k nearest neighbors. 78 | y_val : array-like of shape (n_samples,) 79 | The target values (class labels). 80 | hit_miss : array-like of shape (n_samples, n_classifiers) 81 | Matrix containing 1 when the base classifier made the correct 82 | prediction, 0 otherwise. 83 | 84 | Returns 85 | ------- 86 | DFP_mask : array-like of shape = [n_samples, n_classifiers] 87 | Mask containing 1 for the selected base classifier and 0 88 | otherwise. 89 | """ 90 | if neighbors.ndim < 2: 91 | neighbors = neighbors.reshape(1, -1) 92 | 93 | n_samples = neighbors.shape[0] 94 | n_classifiers = hit_miss.shape[1] 95 | dfp_mask = np.zeros((n_samples, n_classifiers)) 96 | 97 | # TODO: vectorize this code? 98 | for sample_idx in range(n_samples): 99 | curr_neighbors = neighbors[sample_idx] 100 | neighbors_y = y_val[curr_neighbors] 101 | if len(set(neighbors_y)) > 1: 102 | # Indecision region. Check if the base classifier predict the 103 | # correct label for a sample belonging to each class. 104 | for clf_index in range(n_classifiers): 105 | [mask] = np.where(hit_miss[curr_neighbors, clf_index]) 106 | if len(set(neighbors_y[mask])) > 1: 107 | dfp_mask[sample_idx, clf_index] = 1.0 108 | else: 109 | # Safe region. 110 | dfp_mask[sample_idx, :] = 1.0 111 | # rows that all classifiers were pruned are set to 1.0 112 | dfp_mask[np.all(dfp_mask == 0, axis=1)] = 1.0 113 | return dfp_mask 114 | -------------------------------------------------------------------------------- /deslib/util/instance_hardness.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | 7 | import numpy as np 8 | from scipy.stats import mode 9 | from sklearn.neighbors import NearestNeighbors 10 | 11 | """ 12 | This file contains the implementation of different functions to measure 13 | instance hardness. Instance hardness can be defined as the likelihood that a 14 | given sample will be misclassified by different learning algorithms. 15 | 16 | References 17 | ---------- 18 | Smith, M.R., Martinez, T. and Giraud-Carrier, C., 2014. An instance level 19 | analysis of data complexity. 20 | Machine learning, 95(2), pp.225-256 21 | """ 22 | 23 | 24 | def hardness_region_competence(neighbors_idx, labels, safe_k): 25 | """Calculate the Instance hardness of the sample based on its neighborhood. 26 | The sample is deemed hard to classify when there is overlap between 27 | different classes in the region of competence. This method does not 28 | takes into account the target label of the test sample 29 | 30 | This hardness measure is used to select whether use DS or use the KNN for 31 | the classification of a given query sample 32 | 33 | Parameters 34 | ---------- 35 | neighbors_idx : array of shape = [n_samples_test, k] 36 | Indices of the nearest neighbors for each considered sample 37 | 38 | labels : array of shape = [n_samples_train] 39 | labels associated with each training sample 40 | 41 | safe_k : int 42 | Number of neighbors used to estimate the hardness of the corresponding 43 | region 44 | 45 | Returns 46 | ------- 47 | hardness : array of shape = [n_samples_test] 48 | The Hardness level associated with each example. 49 | 50 | References 51 | ---------- 52 | Smith, M.R., Martinez, T. and Giraud-Carrier, C., 2014. An instance level 53 | analysis of data complexity. 54 | Machine learning, 95(2), pp.225-256 55 | """ 56 | if neighbors_idx.ndim < 2: 57 | neighbors_idx = np.atleast_2d(neighbors_idx) 58 | 59 | neighbors_y = labels[neighbors_idx[:, :safe_k]] 60 | _, num_majority_class = mode(neighbors_y, axis=1) 61 | hardness = ((safe_k - num_majority_class) / safe_k).reshape(-1, ) 62 | 63 | return hardness 64 | 65 | 66 | def kdn_score(X, y, k): 67 | """ 68 | Calculates the K-Disagreeing Neighbors score (KDN) of each sample in the 69 | input dataset. 70 | 71 | Parameters 72 | ---------- 73 | X : array of shape (n_samples, n_features) 74 | The input data. 75 | 76 | y : array of shape (n_samples) 77 | class labels of each example in X. 78 | 79 | k : int 80 | Neighborhood size for calculating the KDN score. 81 | 82 | Returns 83 | ------- 84 | 85 | score : array of shape = [n_samples,1] 86 | KDN score of each sample in X. 87 | 88 | neighbors : array of shape = [n_samples,k] 89 | Indexes of the k neighbors of each sample in X. 90 | 91 | 92 | References 93 | ---------- 94 | M. R. Smith, T. Martinez, C. Giraud-Carrier, An instance level analysis of 95 | data complexity, 96 | Machine Learning 95 (2) (2014) 225-256. 97 | 98 | """ 99 | 100 | nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm='kd_tree').fit(X) 101 | _, indices = nbrs.kneighbors(X) 102 | neighbors = indices[:, 1:] 103 | diff_class = np.tile(y, (k, 1)).transpose() != y[neighbors] 104 | score = np.sum(diff_class, axis=1) / k 105 | return score, neighbors 106 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/_static/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scikit-learn-contrib/DESlib/73cb18b5454e447313d31e6b6f15951f0d57f348/docs/_static/.keep -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api-reference: 2 | 3 | ###################### 4 | API Reference 5 | ###################### 6 | 7 | This is the full API documentation of the `DESlib`. Currently the library is divided into four modules: 8 | 9 | Dynamic Classifier Selection (DCS) 10 | ----------------------------------- 11 | 12 | This module contains the implementation of techniques in which only the base 13 | classifier that attained the highest competence level is selected for the classification of the query. 14 | 15 | .. automodule:: deslib.dcs 16 | 17 | .. toctree:: 18 | :maxdepth: 3 19 | 20 | modules/dcs/a_posteriori 21 | modules/dcs/a_priori 22 | modules/dcs/lca 23 | modules/dcs/mcb 24 | modules/dcs/mla 25 | modules/dcs/ola 26 | modules/dcs/rank 27 | 28 | Dynamic Ensemble Selection (DES) 29 | ----------------------------------- 30 | 31 | Dynamic ensemble selection strategies refer to techniques that select an ensemble of classifier rather than a single one. 32 | All base classifiers that attain a minimum competence level are selected to compose the ensemble of classifiers. 33 | 34 | .. automodule:: deslib.des 35 | 36 | .. toctree:: 37 | :maxdepth: 3 38 | 39 | modules/des/meta_des 40 | modules/des/des_clustering 41 | modules/des/des_p 42 | modules/des/ds_knn 43 | modules/des/knop 44 | modules/des/knora_e 45 | modules/des/knora_u 46 | modules/des/desmi 47 | modules/des/probabilistic 48 | 49 | Static ensembles 50 | ----------------------------------- 51 | 52 | This module provides the implementation of static ensemble techniques that are usually used as a baseline for the 53 | comparison of DS methods: Single Best (SB), Static Selection (SS), Stacked classifier and Oracle. 54 | 55 | 56 | .. automodule:: deslib.static 57 | 58 | .. toctree:: 59 | :maxdepth: 3 60 | 61 | modules/static/oracle 62 | modules/static/single_best 63 | modules/static/static_selection 64 | modules/static/stacked 65 | 66 | Utils 67 | ----------------------------------- 68 | Utility functions for ensemble methods such as diversity and aggregation methods. 69 | 70 | .. automodule:: deslib.util 71 | 72 | .. toctree:: 73 | :maxdepth: 3 74 | 75 | modules/util/diversity 76 | modules/util/aggregation 77 | modules/util/prob_functions 78 | modules/util/instance_hardness 79 | modules/util/dfp 80 | modules/util/knne 81 | modules/util/faiss_knn_wrapper 82 | modules/util/datasets 83 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to DESlib documentation! 2 | ================================================= 3 | 4 | DESlib is an ensemble learning library focusing the implementation of the state-of-the-art techniques for dynamic classifier 5 | and ensemble selection. 6 | 7 | DESlib is a work in progress. Contributions are welcomed through its GitHub page: https://github.com/scikit-learn-contrib/DESlib. 8 | 9 | Introduction 10 | -------------- 11 | Dynamic Selection (DS) refers to techniques in which the base classifiers are selected 12 | on the fly, according to each new sample to be classified. Only the most competent, or an ensemble containing the most competent classifiers is selected to predict 13 | the label of a specific test sample. The rationale for such techniques is that not every classifier in 14 | the pool is an expert in classifying all unknown samples; rather, each base classifier is an expert in 15 | a different local region of the feature space. 16 | 17 | DS is one of the most promising MCS approaches due to the fact that 18 | more and more works are reporting the superior performance of such techniques over static combination methods. Such techniques 19 | have achieved better classification performance especially when dealing with small-sized and imbalanced datasets. A 20 | comprehensive review of dynamic selection can be found in the following papers [1]_ [2]_ 21 | 22 | Philosophy 23 | ----------- 24 | DESlib was developed with two objectives in mind: to make it easy to integrate Dynamic Selection algorithms to 25 | machine learning projects, and to facilitate research on this topic, by providing implementations of the main 26 | DES and DCS methods, as well as the commonly used baseline methods. Each algorithm implements the main methods 27 | in the scikit-learn_ API **scikit-learn**: **fit(X, y)**, **predict(X)**, **predict_proba(X)** 28 | and **score(X, y)**. 29 | 30 | The implementation of the DS methods is modular, following a taxonomy defined in [1]_. 31 | This taxonomy considers the main characteristics of DS methods, that are centered in three components: 32 | 33 | 1. the methodology used to define the local region, in which the competence level of the base classifiers are estimated (region of competence); 34 | 2. the source of information used to estimate the competence level of the base classifiers. 35 | 3. the selection approach to define the best classifier (for DCS) or the best set of classifiers (for DES). 36 | 37 | This modular approach makes it easy for researchers to implement new DS methods, in many cases requiring only the 38 | implementation of the method **estimate_competence**, that is, how the local competence of the base classifier is measured. 39 | 40 | `API Reference `_ 41 | ---------------------------- 42 | 43 | If you are looking for information on a specific function, class or 44 | method, this part of the documentation is for you. 45 | 46 | .. toctree:: 47 | :hidden: 48 | 49 | user_guide 50 | api 51 | auto_examples/index 52 | news 53 | 54 | 55 | `Example `_ 56 | ---------------------------------------- 57 | 58 | Here we present an example of the KNORA-E techniques using a random forest to generate the pool of classifiers: 59 | 60 | .. code-block:: python 61 | 62 | from sklearn.ensemble import RandomForestClassifier 63 | from deslib.des.knora_e import KNORAE 64 | 65 | # Train a pool of 10 classifiers 66 | pool_classifiers = RandomForestClassifier(n_estimators=10) 67 | pool_classifiers.fit(X_train, y_train) 68 | 69 | # Initialize the DES model 70 | knorae = KNORAE(pool_classifiers) 71 | 72 | # Preprocess the Dynamic Selection dataset (DSEL) 73 | knorae.fit(X_dsel, y_dsel) 74 | 75 | # Predict new examples: 76 | knorae.predict(X_test) 77 | 78 | The library accepts any list of classifiers (from scikit-learn) as input, including a list containing different classifier models (heterogeneous ensembles). 79 | More examples to use the API can be found in the `examples page `_. 80 | 81 | 82 | Citation 83 | ================== 84 | 85 | If you use DESLib in a scientific paper, please consider citing the following paper: 86 | 87 | Rafael M. O. Cruz, Luiz G. Hafemann, Robert Sabourin and George D. C. Cavalcanti **DESlib: A Dynamic ensemble selection library in Python.** arXiv preprint arXiv:1802.04967 (2018). 88 | 89 | .. code-block:: text 90 | 91 | @article{JMLR:v21:18-144, 92 | author = {Rafael M. O. Cruz and Luiz G. Hafemann and Robert Sabourin and George D. C. Cavalcanti}, 93 | title = {DESlib: A Dynamic ensemble selection library in Python}, 94 | journal = {Journal of Machine Learning Research}, 95 | year = {2020}, 96 | volume = {21}, 97 | number = {8}, 98 | pages = {1-5}, 99 | url = {http://jmlr.org/papers/v21/18-144.html} 100 | } 101 | 102 | 103 | References 104 | ----------- 105 | .. [1] : R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier selection: Recent advances and perspectives,” Information Fusion, vol. 41, pp. 195 – 216, 2018. 106 | 107 | .. [2] : A. S. Britto, R. Sabourin, L. E. S. de Oliveira, Dynamic selection of classifiers - A comprehensive review, Pattern Recognition 47 (11) (2014) 3665–3680. 108 | 109 | .. _scikit-learn: http://scikit-learn.org/stable/ 110 | 111 | .. _GitHub: https://github.com/scikit-learn-contrib/DESlib 112 | -------------------------------------------------------------------------------- /docs/modules/dcs/a_posteriori.rst: -------------------------------------------------------------------------------- 1 | A posteriori 2 | ------------ 3 | 4 | .. automodule:: deslib.dcs.a_posteriori 5 | 6 | .. autoclass:: APosteriori 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/a_priori.rst: -------------------------------------------------------------------------------- 1 | A Priori 2 | ---------- 3 | 4 | .. automodule:: deslib.dcs.a_priori 5 | 6 | .. autoclass:: APriori 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/lca.rst: -------------------------------------------------------------------------------- 1 | Local Class Accuracy (LCA) 2 | -------------------------- 3 | 4 | .. automodule:: deslib.dcs.lca 5 | 6 | .. autoclass:: LCA 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/mcb.rst: -------------------------------------------------------------------------------- 1 | Multiple Classifier Behaviour (MCB) 2 | ----------------------------------- 3 | 4 | .. automodule:: deslib.dcs.mcb 5 | 6 | .. autoclass:: MCB 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/mla.rst: -------------------------------------------------------------------------------- 1 | Modified Local Accuracy (MLA) 2 | ----------------------------- 3 | 4 | .. automodule:: deslib.dcs.mla 5 | 6 | .. autoclass:: MLA 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/ola.rst: -------------------------------------------------------------------------------- 1 | Overall Local Accuracy (OLA) 2 | ----------------------------- 3 | 4 | .. automodule:: deslib.dcs.ola 5 | 6 | .. autoclass:: OLA 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/dcs/rank.rst: -------------------------------------------------------------------------------- 1 | Modified Rank 2 | --------------- 3 | 4 | .. automodule:: deslib.dcs.rank 5 | 6 | .. autoclass:: Rank 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | 9 | -------------------------------------------------------------------------------- /docs/modules/des/des_clustering.rst: -------------------------------------------------------------------------------- 1 | DES Clustering 2 | -------------- 3 | 4 | .. automodule:: deslib.des.des_clustering 5 | 6 | .. autoclass:: DESClustering 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/des_p.rst: -------------------------------------------------------------------------------- 1 | Dynamic Ensemble Selection performance (DES-P) 2 | ---------------------------------------------- 3 | 4 | .. automodule:: deslib.des.des_p 5 | 6 | .. autoclass:: DESP 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/deskl.rst: -------------------------------------------------------------------------------- 1 | DES-Kullback Leibler 2 | --------------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: DESKL 7 | :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/desmi.rst: -------------------------------------------------------------------------------- 1 | DES Multiclass Imbalance (DES-MI) 2 | ---------------------------------- 3 | 4 | .. automodule:: deslib.des.des_mi 5 | 6 | .. autoclass:: DESMI 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/ds_knn.rst: -------------------------------------------------------------------------------- 1 | DES-KNN 2 | -------------- 3 | 4 | .. automodule:: deslib.des.des_knn 5 | 6 | .. autoclass:: DESKNN 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/exponential.rst: -------------------------------------------------------------------------------- 1 | DES-Exponential 2 | ---------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: Exponential 7 | :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select -------------------------------------------------------------------------------- /docs/modules/des/knop.rst: -------------------------------------------------------------------------------- 1 | k-Nearest Output Profiles (KNOP) 2 | -------------------------------- 3 | 4 | .. automodule:: deslib.des.knop 5 | 6 | .. autoclass:: KNOP 7 | :members: fit, predict, predict_proba, score, estimate_competence_from_proba, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/knora_e.rst: -------------------------------------------------------------------------------- 1 | k-Nearest Oracle-Eliminate (KNORA-E) 2 | -------------------------------------- 3 | 4 | .. automodule:: deslib.des.knora_e 5 | 6 | .. autoclass:: KNORAE 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/knora_u.rst: -------------------------------------------------------------------------------- 1 | k-Nearest Oracle Union (KNORA-U) 2 | -------------------------------- 3 | 4 | .. automodule:: deslib.des.knora_u 5 | 6 | .. autoclass:: KNORAU 7 | :members: fit, predict, predict_proba, score, estimate_competence, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/logarithmic.rst: -------------------------------------------------------------------------------- 1 | DES-Logarithmic 2 | --------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: Logarithmic 7 | :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select -------------------------------------------------------------------------------- /docs/modules/des/meta_des.rst: -------------------------------------------------------------------------------- 1 | META-DES 2 | ======== 3 | 4 | .. automodule:: deslib.des.meta_des 5 | 6 | .. autoclass:: METADES 7 | :members: fit, predict, predict_proba, score, estimate_competence_from_proba, select 8 | -------------------------------------------------------------------------------- /docs/modules/des/minimum_difference.rst: -------------------------------------------------------------------------------- 1 | DES-Minimum Difference 2 | ---------------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: MinimumDifference 7 | :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select -------------------------------------------------------------------------------- /docs/modules/des/probabilistic.rst: -------------------------------------------------------------------------------- 1 | Probabilistic 2 | ------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: BaseProbabilistic 7 | :members: 8 | 9 | .. toctree:: 10 | rrc 11 | deskl 12 | minimum_difference 13 | exponential 14 | logarithmic 15 | -------------------------------------------------------------------------------- /docs/modules/des/rrc.rst: -------------------------------------------------------------------------------- 1 | Randomized Reference Classifier (RRC) 2 | ------------------------------------- 3 | 4 | .. automodule:: deslib.des.probabilistic 5 | 6 | .. autoclass:: RRC 7 | :members: source_competence, fit, predict, predict_proba, score, estimate_competence, select -------------------------------------------------------------------------------- /docs/modules/static/oracle.rst: -------------------------------------------------------------------------------- 1 | Oracle 2 | -------------- 3 | 4 | .. automodule:: deslib.static.oracle 5 | 6 | .. autoclass:: Oracle 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/modules/static/single_best.rst: -------------------------------------------------------------------------------- 1 | Single Best 2 | -------------- 3 | 4 | .. automodule:: deslib.static.single_best 5 | 6 | .. autoclass:: SingleBest 7 | :members: fit, predict, predict_proba, score 8 | 9 | -------------------------------------------------------------------------------- /docs/modules/static/stacked.rst: -------------------------------------------------------------------------------- 1 | Stacked Classifier 2 | ------------------ 3 | 4 | .. automodule:: deslib.static.stacked 5 | 6 | .. autoclass:: StackedClassifier 7 | :members: fit, predict, predict_proba, score 8 | -------------------------------------------------------------------------------- /docs/modules/static/static_selection.rst: -------------------------------------------------------------------------------- 1 | Static Selection 2 | ---------------- 3 | 4 | .. automodule:: deslib.static.static_selection 5 | 6 | .. autoclass:: StaticSelection 7 | :members: fit, predict, predict_proba, score 8 | -------------------------------------------------------------------------------- /docs/modules/util/aggregation.rst: -------------------------------------------------------------------------------- 1 | Aggregation 2 | -------------- 3 | 4 | This file contains the implementation of different aggregation functions to combine the outputs of the base 5 | classifiers to give the final decision. 6 | 7 | .. automodule:: deslib.util.aggregation 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/modules/util/datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets 2 | -------- 3 | 4 | This file contains routines to generate 2D classification datasets 5 | that can be used to test the performance of different machine learning 6 | algorithms. 7 | 8 | - P2 Dataset 9 | - Circle and Square 10 | - Banana 11 | - Banana 2 12 | 13 | 14 | .. automodule:: deslib.util.datasets 15 | :members: 16 | -------------------------------------------------------------------------------- /docs/modules/util/dfp.rst: -------------------------------------------------------------------------------- 1 | Frienemy Pruning 2 | ---------------- 3 | 4 | .. automodule:: deslib.util.dfp 5 | :members: -------------------------------------------------------------------------------- /docs/modules/util/diversity.rst: -------------------------------------------------------------------------------- 1 | Diversity 2 | -------------- 3 | 4 | This file contains the implementation of key diversity measures found in the ensemble literature: 5 | 6 | - Double Fault 7 | - Negative Double fault 8 | - Q-statistics 9 | - Ratio of errors 10 | 11 | The implementation are made according to the specifications from the book "Combining Pattern Classifiers". 12 | 13 | 14 | .. automodule:: deslib.util.diversity 15 | :members: 16 | -------------------------------------------------------------------------------- /docs/modules/util/faiss_knn_wrapper.rst: -------------------------------------------------------------------------------- 1 | FAISS Wrapper 2 | -------------- 3 | 4 | .. automodule:: deslib.util.faiss_knn_wrapper 5 | 6 | .. autoclass:: FaissKNNClassifier 7 | :members: fit, predict, predict_proba, kneighbors 8 | -------------------------------------------------------------------------------- /docs/modules/util/instance_hardness.rst: -------------------------------------------------------------------------------- 1 | Instance Hardness 2 | ------------------- 3 | 4 | This file contains the implementation of different measures of instance hardness. 5 | 6 | .. automodule:: deslib.util.instance_hardness 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/modules/util/knne.rst: -------------------------------------------------------------------------------- 1 | KNN-Equality 2 | ------------ 3 | 4 | .. automodule:: deslib.util.knne 5 | 6 | .. autoclass:: KNNE 7 | :members: fit, predict, predict_proba, kneighbors 8 | -------------------------------------------------------------------------------- /docs/modules/util/prob_functions.rst: -------------------------------------------------------------------------------- 1 | Probabilistic Functions 2 | ------------------------ 3 | This file contains the implementation of several functions used to estimate the competence 4 | level of a base classifiers based on posterior probabilities predicted for each class. 5 | 6 | .. automodule:: deslib.util.prob_functions 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/news.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: deslib 2 | 3 | =============== 4 | Release history 5 | =============== 6 | 7 | .. include:: news/v0.3.rst 8 | 9 | .. include:: news/v0.2.rst 10 | 11 | .. include:: news/v0.1.rst 12 | 13 | .. toctree:: 14 | :hidden: 15 | 16 | news/v0.1 17 | news/v0.2 18 | news/v0.3 -------------------------------------------------------------------------------- /docs/news/v0.1.rst: -------------------------------------------------------------------------------- 1 | Version 0.1 2 | =========== 3 | 4 | API 5 | ~~~ 6 | 7 | - First release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_. 8 | 9 | Implemented methods: 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | * DES techniques currently available are: 13 | 1. META-DES 14 | 2. K-Nearest-Oracle-Eliminate (KNORA-E) 15 | 3. K-Nearest-Oracle-Union (KNORA-U) 16 | 4. Dynamic Ensemble Selection-Performance(DES-P) 17 | 5. K-Nearest-Output Profiles (KNOP) 18 | 6. Randomized Reference Classifier (DES-RRC) 19 | 7. DES Kullback-Leibler Divergence (DES-KL) 20 | 8. DES-Exponential 21 | 9. DES-Logarithmic 22 | 10. DES-Minimum Difference 23 | 11. DES-Clustering 24 | 12. DES-KNN 25 | 26 | * DCS techniques: 27 | 1. Modified Classifier Rank (Rank) 28 | 2. Overall Locall Accuracy (OLA) 29 | 3. Local Class Accuracy (LCA) 30 | 4. Modified Local Accuracy (MLA) 31 | 5. Multiple Classifier Behaviour (MCB) 32 | 6. A Priori Selection (A Priori) 33 | 7. A Posteriori Selection (A Posteriori) 34 | 35 | * Baseline methods: 36 | 1. Oracle 37 | 2. Single Best 38 | 3. Static Selection 39 | 40 | * Dynamic Frienemy Prunning (DFP) 41 | * Diversity measures 42 | * Aggregation functions 43 | 44 | .. _Rafael M O Cruz: https://github.com/Menelau 45 | .. _Luiz G Hafemann: https://github.com/luizgh 46 | -------------------------------------------------------------------------------- /docs/news/v0.2.rst: -------------------------------------------------------------------------------- 1 | Version 0.2 2 | =========== 3 | 4 | - Second release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_. 5 | 6 | Changes 7 | ~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | * Implemented Label Encoding: labels are no longer required to be integers starting from 0. Categorical (strings) and non-sequential integers are supported (similarly to scikit-learn). 10 | * Batch processing: Vectorized implementation of predictions. Large speed-up in computation time (100x faster in some cases). 11 | * Predict proba: only required (in the base estimators) if using methods that rely on probabilities (or if requesting probabilities from the ensemble). 12 | * Improved documentation: Included additional examples, a step-by-step tutorial on how to use the library. 13 | * New integration tests: Now covering predict_proba, IH and DFP. 14 | * Bug fixes on 1) predict_proba 2) KNOP with DFP. 15 | 16 | .. _Rafael M O Cruz: https://github.com/Menelau 17 | .. _Luiz G Hafemann: https://github.com/luizgh 18 | -------------------------------------------------------------------------------- /docs/news/v0.3.5.rst: -------------------------------------------------------------------------------- 1 | Version 0.3.5 2 | ============= 3 | 4 | - Fourth release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_. This release was mainly focused on compatibility with newer scikit-learn versions, performance improvement and bug fixes. 5 | 6 | 7 | Changes 8 | ~~~~~~~~~~~~~~~~~~~~~ 9 | * Update tests according to the new scikit-learn standards. 10 | * Added n_jobs parameter for parallelization. 11 | * Refactored FIRE-DES for faster processing. 12 | * Added new approximated KNN methods using Facebook FAISS search. 13 | * Added passtrhough features for StackedClassifier. 14 | * Added different scoring methods (e.g., AUC, F1-Score) for Single best and Static Selection methods 15 | * Added different scoring methods for DESClustering 16 | * Added predict_proba for the Oracle method. 17 | * Added batch processing for probabilistic methods. 18 | * Added KNearest Neighbors equality option and 'n_neighbors parameter. 19 | * Improved weighted majority voting performance with bath processing. 20 | * Removal of redundant information in documentation. 21 | * Update reference article. 22 | 23 | Bug Fixes 24 | ~~~~~~~~~~~~ 25 | 26 | * Fixed randomness with APosteriori and APriori methods during test. 27 | * Fixed error with label encoder for the Oracle and static combination methods 28 | * Methods do not allow a pool containing a single classifier mdoel. 29 | * Removal of Collinear features in stacked classifier. 30 | * Fixed meta-classfier when passing a classifier model to the META-DES technique. 31 | * Fixed DCS-OLA documentation. 32 | * Fixed bug when support given to a class is very small but not zero. 33 | * Fixed FAISS batch processing mode. 34 | 35 | 36 | 37 | .. _Rafael M O Cruz: https://github.com/Menelau 38 | .. _Luiz G Hafemann: https://github.com/luizgh 39 | 40 | -------------------------------------------------------------------------------- /docs/news/v0.3.rst: -------------------------------------------------------------------------------- 1 | Version 0.3 2 | =========== 3 | 4 | - Third release of the stable API. By `Rafael M O Cruz`_ and `Luiz G Hafemann`_ 5 | 6 | Changes 7 | ~~~~~~~~~~~~~~~~~~~~~ 8 | * All techniques are now sklearn estimators and passes the check_estimator tests. 9 | * All techniques can now be instantiated without a trained pool of classifiers. 10 | * Pool of classifiers can now be fitted together with the ensemble techniques. See `simple example `_. 11 | * Added support for Faiss (Facebook AI Similarity Search) for fast region of competence estimation on GPU. 12 | * Added DES Multi-class Imbalance method :class:`deslib.des.des_mi.DESMI`. 13 | * Added stacked classifier model, :class:`deslib.static.stacked.StackedClassifier` to the static ensemble module. 14 | * Added a new Instance Hardness measure :func:`utils.instance_hardness.kdn_score`. 15 | * Added Instance Hardness support when using DES-Clustering. 16 | * Added label encoder for the :mod:`static` module. 17 | * Added a script :mod:`utils.datasets` with routines to generate synthetic datasets (e.g., the P2 and XOR datasets). 18 | * Changed name of base classes (Adding Base to their following scikit-learn standards). 19 | * Removal of **DFP_mask**, **neighbors** and **distances** as class variables. 20 | * Changed signature of methods **estimate_competence**, **predict_with_ds**, **predict_proba_with_ds**. They now require the neighbors and distances to be passed as input arguments. 21 | * Added random_state parameter to all methods in order to have reproducible results. 22 | * Added Python 3.7 support. 23 | * New and updated `examples `_. 24 | * Added performance tests comparing the speed of Faiss vs sklearn KNN. 25 | 26 | Bug Fixes 27 | ~~~~~~~~~~~~ 28 | 29 | * Fixed bug with META-DES when checking if the meta-classifier was already fitted. 30 | * Fixed bug with random state on DCS techniques. 31 | * Fixed high memory consumption on DES probabilistic methods. 32 | * Fixed bug on Heterogeneous ensembles example and notebooks examples. 33 | * Fixed bug on :class:`deslib.des.probabilistic.MinimumDifference` when only samples from a single class are provided. 34 | * Fixed problem with DS methods when the number of training examples was lower than the k value. 35 | * Fixed division by zero problems with :class:`APosteriori` :class:`APriori` :class:`MLA` when the distance is equal to zero. 36 | * Fixed bug on :func:`deslib.utils.prob_functions.exponential_func` when the support obtained for the correct class was equal to one. 37 | 38 | 39 | .. _Rafael M O Cruz: https://github.com/Menelau 40 | .. _Luiz G Hafemann: https://github.com/luizgh 41 | 42 | -------------------------------------------------------------------------------- /docs/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. _user_guide: 2 | 3 | ###################### 4 | User guide 5 | ###################### 6 | 7 | This user guide explains how to install DESlib, how to contribute to the library and 8 | presents a step-by-step tutorial to fit and predict new instances using several dynamic selection techniques. 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | user_guide/installation 14 | user_guide/development 15 | user_guide/tutorial 16 | user_guide/known_issues 17 | user_guide/packaging 18 | 19 | -------------------------------------------------------------------------------- /docs/user_guide/development.rst: -------------------------------------------------------------------------------- 1 | .. _development: 2 | 3 | Development 4 | =========== 5 | 6 | DESlib was started by Rafael M. O. Cruz as a way to facilitate research in this topic by providing other researchers 7 | a toolbox with everything that is required to easily develop and compare different dynamic ensemble techniques. 8 | 9 | The library is a work in progress. As an open-source project, any type of contribution is welcomed and encouraged! 10 | 11 | 12 | Contributing to DESlib 13 | ---------------------- 14 | 15 | You can contribute to the project in several ways: 16 | 17 | - Reporting bugs 18 | - Requesting features 19 | - Improving the documentation 20 | - Adding examples to use the library 21 | - Implementing new features and fixing bugs 22 | 23 | Reporting Bugs and requesting features 24 | --------------------------------------- 25 | 26 | We use Github issues to track all bugs and feature requests; feel free to 27 | open an issue if you have found a bug or wish to see a new feature implemented. 28 | Before opening a new issue, please check if the issue is not being currently addressed: 29 | [Issues](https://github.com/scikit-learn-contrib/DESlib/issues) 30 | 31 | For reporting bugs: 32 | 33 | - Include information of your working environment. This information 34 | can be found by running the following code snippet: 35 | 36 | .. code-block:: python 37 | 38 | import platform; print(platform.platform()) 39 | import sys; print("Python", sys.version) 40 | import numpy; print("NumPy", numpy.__version__) 41 | import scipy; print("SciPy", scipy.__version__) 42 | import sklearn; print("Scikit-Learn", sklearn.__version__) 43 | 44 | - Include a [reproducible](https://stackoverflow.com/help/mcve) code snippet 45 | or link to a [gist](https://gist.github.com). If an exception is raised, 46 | please provide the traceback. 47 | 48 | Documentation 49 | -------------- 50 | 51 | We are glad to accept any sort of documentation: function docstrings, 52 | reStructuredText documents (like this one), tutorials, etc. 53 | reStructuredText documents live in the source code repository under the 54 | doc/ directory. 55 | 56 | You can edit the documentation using any text editor and then generate 57 | the HTML output by typing ``make html`` from the doc/ directory. 58 | Alternatively, ``make`` can be used to quickly generate the 59 | documentation without the example gallery. The resulting HTML files will 60 | be placed in _build/html/ and are viewable in a web browser. See the 61 | README file in the doc/ directory for more information. 62 | 63 | For building the documentation, you will need to install sphinx and sphinx_rtd_theme. This 64 | can be easily done by installing the requirements for development using the following command: 65 | 66 | .. code-block:: bash 67 | 68 | pip install -r requirements-dev.txt 69 | 70 | Contributing with code 71 | ----------------------- 72 | 73 | The preferred way to contribute is to fork the main repository to your account: 74 | 75 | 1. Fork the [project repository](https://github.com/scikit-learn-contrib/DESlib): 76 | click on the 'Fork' button near the top of the page. This creates 77 | a copy of the code under your account on the GitHub server. 78 | 79 | 2. Clone this copy to your local disk: 80 | 81 | .. code-block:: bash 82 | 83 | git clone git@github.com:YourLogin/DESlib.git 84 | cd DESlib 85 | 86 | 3. Install all requirements for development: 87 | 88 | .. code-block:: bash 89 | 90 | pip install -r requirements-dev.txt 91 | pip install --editable . 92 | 93 | 4. Create a branch to hold your changes: 94 | 95 | .. code-block:: bash 96 | 97 | git checkout -b branch_name 98 | 99 | Where ``branch_name`` is the new feature or bug to be fixed. Do not work directly on the ``master`` branch. 100 | 101 | 5. Work on this copy on your computer using Git to do the version 102 | control. To record your changes in Git, then push them to GitHub with: 103 | 104 | .. code-block:: bash 105 | 106 | git push -u origin branch_name 107 | 108 | It is important to assert your code is well covered by test routines (coverage of at least 90%), well documented and 109 | follows PEP8 guidelines. 110 | 111 | 6. Create a 'Pull request' to send your changes for review. 112 | 113 | If your pull request addresses an issue, please use the title to describe 114 | the issue and mention the issue number in the pull request description to 115 | ensure a link is created to the original issue. 116 | 117 | .. _GitHub: https://github.com/scikit-learn-contrib/DESlib. 118 | 119 | .. _scikit-learn: http://scikit-learn.org/stable/ -------------------------------------------------------------------------------- /docs/user_guide/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | The library can be installed using pip: 8 | 9 | Stable version: 10 | 11 | .. code-block:: bash 12 | 13 | pip install deslib 14 | 15 | Latest version (under development): 16 | 17 | .. code-block:: bash 18 | 19 | pip install git+https://github.com/scikit-learn-contrib/DESlib 20 | 21 | DESlib is tested to work with Python 3.5, 3.6 and 3.7. The dependency requirements are: 22 | 23 | * scipy(>=0.13.3) 24 | * numpy(>=1.10.4) 25 | * scikit-learn(>=0.19.0) 26 | 27 | These dependencies are automatically installed using the pip commands above. 28 | 29 | Optional dependencies 30 | ===================== 31 | To use Faiss (Fair AI Similarity Search), a fast implementation of KNN that can use GPUs, follow the instructions below: 32 | https://github.com/facebookresearch/faiss/blob/master/INSTALL.md 33 | 34 | Note that Faiss is only available on Linux and MacOS. -------------------------------------------------------------------------------- /docs/user_guide/known_issues.rst: -------------------------------------------------------------------------------- 1 | .. _known_issues: 2 | 3 | Known Issues 4 | ============ 5 | 6 | The estimators in this library are not compatible with scikit-learn's GridSearch, and other CV methods. That is, the following is not supported: 7 | 8 | .. code-block:: python 9 | 10 | from deslib.des.knora_e import KNORAE 11 | from sklearn.model_selection import GridSearchCV 12 | 13 | # (...) initialize a pool of classifiers 14 | kne = KNORAE(pool_classifiers) 15 | 16 | # Do a grid search on KNORAE's "k" parameter 17 | params = {'k': [1, 3, 5, 7]} 18 | 19 | grid = GridSearchCV(kne, params) 20 | grid.fit(X_dsel, y_dsel) # Raises an error 21 | 22 | This is due to a limitation of a scikit-learn method (sklearn.base.clone), under discussion in this issue_ 23 | 24 | .. _issue: https://github.com/scikit-learn/scikit-learn/issues/8370 -------------------------------------------------------------------------------- /docs/user_guide/packaging.rst: -------------------------------------------------------------------------------- 1 | .. _packaging: 2 | 3 | Releasing a new version 4 | ======================= 5 | 6 | Publishing new version involves: 7 | 8 | 1) Updating the version numbers and creating a new tag in git (which also updates the "stable" version of the documentation) 9 | 2) Creating the distribution (.tar.gz and wheel files), and uploading them to pypi 10 | 11 | Some important things to have in mind: 12 | * Read the "Packaging and Distributing Projects" guide: https://packaging.python.org/tutorials/distributing-packages/ 13 | * The version numbers (in setup.py and __init__.py) are used as metadata for pypi and for the readthedocs documentation - pay attention to them or some things can break. In general, you should be working on a version such as "0.2.dev". You then rename it to "0.2" and create a tag "v0.2". After you finish everything, you update the version to "0.3.dev" to indicate that new developments are being made for the next version. 14 | 15 | 16 | Step-by-step process 17 | -------------------- 18 | 19 | 20 | * Create an account in PyPi production: https://pypi.org/ and test: https://test.pypi.org/ 21 | * Make sure you have twine installed: 22 | 23 | .. code-block:: bash 24 | 25 | pip install twine 26 | 27 | * Update version on setup.py (e.g. "0.1") 28 | * Update version on deslib/__init__.py 29 | * Create tag: :code:`git tag ` (example: "git tag 'v0.1'") 30 | * Push the tag :code:`git push origin ` 31 | * Create the source and wheels distributions 32 | 33 | .. code-block:: bash 34 | 35 | python setup.py sdist # source distribution 36 | python setup.py bdist_wheel # wheel distribution for current python version 37 | 38 | * Upload to test pypi and check 39 | 40 | - uploading the package: 41 | 42 | .. code-block:: bash 43 | 44 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 45 | 46 | - Note: if you do this multiple times (e.g. to fix an issue), you will need to rename the files under the "dist" folder: a filename can only be submitted once to pypi. You may also need to manually delete the "source" version of the distribution, since there can only be one source file per version of the software 47 | 48 | - Test an installation from the testing pypi environment. 49 | 50 | .. code-block:: bash 51 | 52 | conda create -y -n testdes python=3 53 | source activate testdes 54 | pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple deslib 55 | conda remove -y --name testdes --all #remove temporary environment 56 | 57 | * Upload to production pypi 58 | 59 | .. code-block:: bash 60 | 61 | twine upload dist/* 62 | 63 | * Mark the new stable version to be built on readthedocs: 64 | 65 | - Go to https://readthedocs.org/projects/deslib/versions/, find the new tag and click "Edit". Mark the "active" checkbox and save. 66 | 67 | * Update version on setup.py and __init.py__ to mention the new version in development (e.g. "0.2.dev") 68 | 69 | 70 | Note #1: Read the docs is automatically updated: 71 | 72 | * When a new commit is done in master (this updates the "master" version) 73 | * When a new tag is pushed to github (this updates the "stable" version) -> This seems to not aways work - it is better to check 74 | 75 | Note #2: The documentation automatically links to source files for the methods/classes. This only works if the tag is pushed to github, and matches the __version__ variable in __init.py__. Example: 76 | __version__ = "0.1" and the tag being: 77 | git tag "v0.1" 78 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | General examples 4 | ---------------- 5 | 6 | Examples showing how to use different aspect of the library -------------------------------------------------------------------------------- /examples/example_calibrating_classifiers.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Calibrating base classifiers to estimate probabilities 9 | ==================================================================== 10 | 11 | In this example we show how to apply different DCS and DES techniques for a 12 | classification dataset. 13 | 14 | A very important aspect in dynamic selection is the generation of a pool 15 | of classifiers. A common practice in the dynamic selection literature is to 16 | use the Bagging (Bootstrap Aggregating) method to generate a pool containing 17 | base classifiers that are both diverse and informative. 18 | 19 | In this example we generate a pool of classifiers using the Bagging technique 20 | implemented on the Scikit-learn library. Then, we compare the results obtained 21 | by combining this pool of classifiers using the standard Bagging combination 22 | approach versus the application of dynamic selection technique to select the 23 | set of most competent classifiers 24 | """ 25 | 26 | 27 | import numpy as np 28 | from sklearn.calibration import CalibratedClassifierCV 29 | from sklearn.datasets import load_breast_cancer 30 | from sklearn.ensemble import BaggingClassifier 31 | from sklearn.linear_model import Perceptron 32 | from sklearn.model_selection import train_test_split 33 | from sklearn.preprocessing import StandardScaler 34 | 35 | from deslib.dcs.a_priori import APriori 36 | from deslib.dcs.mcb import MCB 37 | from deslib.dcs.ola import OLA 38 | from deslib.des.des_p import DESP 39 | from deslib.des.knora_e import KNORAE 40 | from deslib.des.knora_u import KNORAU 41 | from deslib.des.meta_des import METADES 42 | 43 | ############################################################################### 44 | # Preparing the dataset 45 | # --------------------- 46 | # In this part we load the breast cancer dataset from scikit-learn and 47 | # preprocess it in order to pass to the DS models. An important point here is 48 | # to normalize the data so that it has zero mean and unit variance, which is 49 | # a common requirement for many machine learning algorithms. 50 | # This step can be easily done using the StandardScaler class. 51 | 52 | rng = np.random.RandomState(123) 53 | data = load_breast_cancer() 54 | X = data.data 55 | y = data.target 56 | # split the data into training and test data 57 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 58 | random_state=rng) 59 | 60 | # Scale the variables to have 0 mean and unit variance 61 | scaler = StandardScaler() 62 | X_train = scaler.fit_transform(X_train) 63 | X_test = scaler.transform(X_test) 64 | 65 | # Split the data into training and DSEL for DS techniques 66 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 67 | test_size=0.5, 68 | random_state=rng) 69 | 70 | # Train a pool of 100 base classifiers 71 | pool_classifiers = BaggingClassifier(Perceptron(max_iter=10), 72 | n_estimators=100, random_state=rng) 73 | pool_classifiers.fit(X_train, y_train) 74 | 75 | # Initialize the DS techniques 76 | knorau = KNORAU(pool_classifiers) 77 | kne = KNORAE(pool_classifiers) 78 | desp = DESP(pool_classifiers) 79 | ola = OLA(pool_classifiers) 80 | mcb = MCB(pool_classifiers, random_state=rng) 81 | 82 | ############################################################################### 83 | # Calibrating base classifiers 84 | # ----------------------------- 85 | # Some dynamic selection techniques requires that the base classifiers estimate 86 | # probabilities in order to estimate its competence level. Since the Perceptron 87 | # model is not a probabilistic classifier (does not implements the 88 | # predict_proba method, it needs to be calibrated for 89 | # probability estimation before being used by such DS techniques. This step can 90 | # be conducted using the CalibrateClassifierCV class from scikit-learn. Note 91 | # that in this example we pass a prefited pool of classifiers to the 92 | # calibration method in order to use exactly the same pool used in the other 93 | # DS methods. 94 | calibrated_pool = [] 95 | for clf in pool_classifiers: 96 | calibrated = CalibratedClassifierCV(estimator=clf, cv='prefit') 97 | calibrated.fit(X_dsel, y_dsel) 98 | calibrated_pool.append(calibrated) 99 | 100 | apriori = APriori(calibrated_pool, random_state=rng) 101 | meta = METADES(calibrated_pool) 102 | 103 | 104 | knorau.fit(X_dsel, y_dsel) 105 | kne.fit(X_dsel, y_dsel) 106 | desp.fit(X_dsel, y_dsel) 107 | ola.fit(X_dsel, y_dsel) 108 | mcb.fit(X_dsel, y_dsel) 109 | apriori.fit(X_dsel, y_dsel) 110 | meta.fit(X_dsel, y_dsel) 111 | 112 | ############################################################################### 113 | # Evaluating the methods 114 | # ----------------------- 115 | # Let's now evaluate the methods on the test set. We also use the performance 116 | # of Bagging (pool of classifiers without any selection) as a baseline 117 | # comparison. We can see that the majority of DS methods achieve higher 118 | # classification accuracy. 119 | 120 | print('Evaluating DS techniques:') 121 | print('Classification accuracy KNORA-Union: ', 122 | knorau.score(X_test, y_test)) 123 | print('Classification accuracy KNORA-Eliminate: ', 124 | kne.score(X_test, y_test)) 125 | print('Classification accuracy DESP: ', desp.score(X_test, y_test)) 126 | print('Classification accuracy OLA: ', ola.score(X_test, y_test)) 127 | print('Classification accuracy A priori: ', apriori.score(X_test, y_test)) 128 | print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) 129 | print('Classification accuracy META-DES: ', meta.score(X_test, y_test)) 130 | print('Classification accuracy Bagging: ', 131 | pool_classifiers.score(X_test, y_test)) 132 | -------------------------------------------------------------------------------- /examples/example_heterogeneous.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================================================== 3 | Example using heterogeneous ensemble 4 | ==================================================================== 5 | DESlib accepts different classifier models in the pool of classifiers. 6 | Such pool of classifiers is called Heterogeneous. 7 | 8 | In this example, we consider a pool of classifiers composed of a 9 | Gaussian Naive Bayes, Perceptron, k-NN, Decision tree and Gaussian SVM. We 10 | also compare the result of DS methods with the voting classifier from sklearn. 11 | """ 12 | import numpy as np 13 | from sklearn.calibration import CalibratedClassifierCV 14 | # Importing dataset and preprocessing routines 15 | from sklearn.datasets import fetch_openml 16 | from sklearn.ensemble import VotingClassifier 17 | # Base classifier models: 18 | from sklearn.linear_model import Perceptron 19 | from sklearn.model_selection import train_test_split 20 | from sklearn.naive_bayes import GaussianNB 21 | from sklearn.neighbors import KNeighborsClassifier 22 | from sklearn.preprocessing import StandardScaler 23 | from sklearn.svm import SVC 24 | from sklearn.tree import DecisionTreeClassifier 25 | 26 | from deslib.dcs import MCB 27 | # Example of DCS techniques 28 | from deslib.dcs import OLA 29 | from deslib.des import DESP 30 | # Example of DES techniques 31 | from deslib.des import KNORAE 32 | from deslib.des import KNORAU 33 | from deslib.des import METADES 34 | from deslib.static import StackedClassifier 35 | 36 | rng = np.random.RandomState(42) 37 | data = fetch_openml(name='phoneme', cache=False, as_frame=False) 38 | X = data.data 39 | y = data.target 40 | 41 | # split the data into training and test data 42 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 43 | random_state=rng) 44 | 45 | scaler = StandardScaler() 46 | X_train = scaler.fit_transform(X_train) 47 | X_test = scaler.transform(X_test) 48 | 49 | # Split the data into training and DSEL for DS techniques 50 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 51 | test_size=0.5, 52 | random_state=rng) 53 | 54 | model_perceptron = CalibratedClassifierCV(Perceptron(max_iter=100, 55 | random_state=rng), 56 | cv=3) 57 | 58 | model_perceptron.fit(X_train, y_train) 59 | model_svc = SVC(probability=True, gamma='auto', 60 | random_state=rng).fit(X_train, y_train) 61 | model_bayes = GaussianNB().fit(X_train, y_train) 62 | model_tree = DecisionTreeClassifier(random_state=rng, 63 | max_depth=10).fit(X_train, y_train) 64 | model_knn = KNeighborsClassifier(n_neighbors=7).fit(X_train, y_train) 65 | 66 | pool_classifiers = [model_perceptron, 67 | model_svc, 68 | model_bayes, 69 | model_tree, 70 | model_knn] 71 | 72 | voting_classifiers = [("perceptron", model_perceptron), 73 | ("svc", model_svc), 74 | ("bayes", model_bayes), 75 | ("tree", model_tree), 76 | ("knn", model_knn)] 77 | 78 | model_voting = VotingClassifier(estimators=voting_classifiers).fit( 79 | X_train, y_train) 80 | 81 | # Initializing the techniques 82 | knorau = KNORAU(pool_classifiers) 83 | kne = KNORAE(pool_classifiers) 84 | desp = DESP(pool_classifiers) 85 | metades = METADES(pool_classifiers) 86 | # DCS techniques 87 | ola = OLA(pool_classifiers) 88 | mcb = MCB(pool_classifiers) 89 | 90 | ############################################################################## 91 | # Adding stacked classifier as baseline comparison. Stacked classifier can 92 | # be found in the static module. In this experiment we consider two types 93 | # of stacking: one using logistic regression as meta-classifier 94 | # (default configuration) and the other using a Decision Tree. 95 | stacked_lr = StackedClassifier(pool_classifiers, random_state=rng) 96 | stacked_dt = StackedClassifier(pool_classifiers, 97 | random_state=rng, 98 | meta_classifier=DecisionTreeClassifier()) 99 | # Fitting the DS techniques 100 | knorau.fit(X_dsel, y_dsel) 101 | kne.fit(X_dsel, y_dsel) 102 | desp.fit(X_dsel, y_dsel) 103 | metades.fit(X_dsel, y_dsel) 104 | ola.fit(X_dsel, y_dsel) 105 | mcb.fit(X_dsel, y_dsel) 106 | 107 | # Fitting the tacking models 108 | stacked_lr.fit(X_dsel, y_dsel) 109 | stacked_dt.fit(X_dsel, y_dsel) 110 | 111 | # Calculate classification accuracy of each technique 112 | print('Evaluating DS techniques:') 113 | print('Classification accuracy of Majority voting the pool: ', 114 | model_voting.score(X_test, y_test)) 115 | print('Classification accuracy of KNORA-U: ', knorau.score(X_test, y_test)) 116 | print('Classification accuracy of KNORA-E: ', kne.score(X_test, y_test)) 117 | print('Classification accuracy of DESP: ', desp.score(X_test, y_test)) 118 | print('Classification accuracy of META-DES: ', metades.score(X_test, y_test)) 119 | print('Classification accuracy of OLA: ', ola.score(X_test, y_test)) 120 | print('Classification accuracy Stacking LR', stacked_lr.score(X_test, y_test)) 121 | print('Classification accuracy Stacking DT', stacked_dt.score(X_test, y_test)) 122 | -------------------------------------------------------------------------------- /examples/plot_example_DFP.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Using the Dynamic Frienemy Pruning (DFP) 9 | ==================================================================== 10 | 11 | In this example we show how to apply the dynamic frienemy pruning (DFP) to 12 | different dynamic selection techniques. 13 | 14 | The DFP method is an online pruning model which analyzes the region 15 | of competence to know if it is composed of samples from different classes 16 | (indecision region). Then, it remove the base classifiers that do not correctly 17 | classifies at least a pair of samples coming from different classes, i.e., the 18 | base classifiers that cannot separate the classes in the local region. 19 | More information on this method can be found in refs [1] and [2]. 20 | 21 | DES techniques using the DFP algorithm are called FIRE-DES (Frienemy Indecision 22 | REgion Dynamic Ensemble Selection). 23 | The FIRE-DES is shown to significantly improve the performance of several 24 | dynamic selection algorithms when dealing with imbalanced classification 25 | problems as it avoids the classifiers that are biased towards the majority 26 | class in predicting the label for the query. 27 | 28 | References 29 | ---------- 30 | [1] Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., "Online Pruning 31 | of Base Classifiers for Dynamic Ensemble Selection", Pattern Recognition, 32 | vol. 72, 2017, pp 44-58. 33 | 34 | [2] Cruz, R.M.O., Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., 35 | "FIRE-DES++: Enhanced online pruning of base classifiers for dynamic ensemble 36 | selection"., Pattern Recognition, vol. 85, 2019, pp 149-160. 37 | """ 38 | 39 | import numpy as np 40 | from sklearn.datasets import make_classification 41 | from sklearn.ensemble import RandomForestClassifier 42 | from sklearn.model_selection import train_test_split 43 | from sklearn.metrics import roc_auc_score 44 | import matplotlib.pyplot as plt 45 | from deslib.dcs import APosteriori 46 | from deslib.dcs import APriori 47 | from deslib.dcs import LCA 48 | from deslib.dcs import OLA 49 | from deslib.des import DESP 50 | from deslib.des import METADES 51 | 52 | rng = np.random.RandomState(654321) 53 | 54 | # Generate an imbalanced classification dataset 55 | X, y = make_classification(n_classes=2, n_samples=2000, weights=[0.05, 0.95], 56 | random_state=rng) 57 | # split the data into training and test data 58 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 59 | random_state=rng) 60 | 61 | # Split the data into training and DSEL for DS techniques 62 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 63 | test_size=0.5, 64 | random_state=rng) 65 | # Considering a pool composed of 10 base classifiers 66 | pool_classifiers = RandomForestClassifier(n_estimators=10, random_state=rng, 67 | max_depth=10) 68 | pool_classifiers.fit(X_train, y_train) 69 | 70 | ds_names = ['A Priori', 'A Posteriori', 'OLA', 'LCA', 'DES-P', 'META-DES'] 71 | 72 | # DS techniques without DFP 73 | apriori = APriori(pool_classifiers, random_state=rng) 74 | aposteriori = APosteriori(pool_classifiers, random_state=rng) 75 | ola = OLA(pool_classifiers) 76 | lca = LCA(pool_classifiers) 77 | desp = DESP(pool_classifiers) 78 | meta = METADES(pool_classifiers) 79 | 80 | # FIRE-DS techniques (with DFP) 81 | fire_apriori = APriori(pool_classifiers, DFP=True, random_state=rng) 82 | fire_aposteriori = APosteriori(pool_classifiers, DFP=True, random_state=rng) 83 | fire_ola = OLA(pool_classifiers, DFP=True) 84 | fire_lca = LCA(pool_classifiers, DFP=True) 85 | fire_desp = DESP(pool_classifiers, DFP=True) 86 | fire_meta = METADES(pool_classifiers, DFP=True) 87 | 88 | list_ds = [apriori, aposteriori, ola, lca, desp, meta] 89 | list_fire_ds = [fire_apriori, fire_aposteriori, fire_ola, 90 | fire_lca, fire_desp, fire_meta] 91 | 92 | scores_ds = [] 93 | for ds in list_ds: 94 | ds.fit(X_dsel, y_dsel) 95 | scores_ds.append(roc_auc_score(y_test, ds.predict(X_test))) 96 | 97 | scores_fire_ds = [] 98 | for fire_ds in list_fire_ds: 99 | fire_ds.fit(X_dsel, y_dsel) 100 | scores_fire_ds.append(roc_auc_score(y_test, fire_ds.predict(X_test))) 101 | 102 | ############################################################################### 103 | # Comparing DS techniques with FIRE-DES techniques 104 | # ------------------------------------------------ 105 | # Let's now evaluate the DES methods on the test set. Since we are dealing with 106 | # imbalanced data, we use the area under the roc curve (AUC) as performance 107 | # metric instead of classification accuracy. The AUC can be easily calculated 108 | # using the `sklearn.metrics.roc_auc_score` function from scikit-learn. 109 | 110 | width = 0.35 111 | ind = np.arange(len(ds_names)) 112 | plt.bar(ind, scores_ds, width, label='DES', edgecolor='k') 113 | plt.bar(ind + width, scores_fire_ds, width, label='FIRE-DES', edgecolor='k') 114 | 115 | plt.ylabel('Area under the roc curve (AUC)') 116 | plt.title('AUC Performance: DS vs FIRE-DES') 117 | plt.ylim((0.60, 0.81)) 118 | plt.xticks(ind + width / 2, ds_names) 119 | plt.legend(loc='best') 120 | plt.show() 121 | -------------------------------------------------------------------------------- /examples/plot_influence_k_value.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Measuring the influence of the region of competence 9 | ==================================================================== 10 | 11 | This example shows how the size of the region of competence (parameter k) 12 | can influence the final performance of DS techniques. 13 | 14 | In this example we vary the value of the parameter k from 3 to 15 and measure 15 | the performance of 7 different dynamic selection technique using the same 16 | pool of classifiers. 17 | 18 | """ 19 | 20 | ############################################################################### 21 | # Let's start by importing all required modules. In this example we use the 22 | # new sklearn-OpenML interface to fetch the diabetes classification problem. 23 | 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | from sklearn.datasets import fetch_openml 27 | from sklearn.ensemble import BaggingClassifier 28 | from sklearn.linear_model import Perceptron 29 | from sklearn.model_selection import train_test_split 30 | from sklearn.preprocessing import StandardScaler 31 | 32 | from deslib.dcs import LCA 33 | # DCS techniques 34 | from deslib.dcs import MCB 35 | from deslib.dcs import OLA 36 | from deslib.dcs import Rank 37 | # DES techniques 38 | from deslib.des import DESP 39 | from deslib.des import KNORAE 40 | from deslib.des import KNORAU 41 | 42 | rng = np.random.RandomState(123456) 43 | 44 | data = fetch_openml(name='diabetes', cache=False, as_frame=False) 45 | X = data.data 46 | y = data.target 47 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) 48 | 49 | # Normalizing the dataset to have 0 mean and unit variance. 50 | scaler = StandardScaler() 51 | X_train = scaler.fit_transform(X_train) 52 | X_test = scaler.transform(X_test) 53 | 54 | pool_classifiers = BaggingClassifier(Perceptron(max_iter=100), 55 | random_state=rng) 56 | pool_classifiers.fit(X_train, y_train) 57 | 58 | # Setting with_IH 59 | mcb = MCB(pool_classifiers, random_state=rng) 60 | ola = OLA(pool_classifiers) 61 | des_p = DESP(pool_classifiers) 62 | knu = KNORAU(pool_classifiers) 63 | lca = LCA(pool_classifiers) 64 | kne = KNORAE(pool_classifiers) 65 | rank = Rank(pool_classifiers) 66 | list_ds_methods = [mcb, ola, des_p, knu, lca, kne, rank] 67 | names = ['MCB', 'OLA', 'DES-P', 'KNORA-U', 'LCA', 'KNORA-E', 'Rank'] 68 | 69 | k_value_list = range(3, 16) 70 | 71 | ############################################################################### 72 | # Plot accuracy x region of competence size. 73 | # ------------------------------------------- 74 | # We can see the this parameter can have a huge influence in the performance 75 | # of certain DS techniques. The main exception being the KNORA-E and Rank 76 | # which have built-in mechanism to automatically adjust the region 77 | # of competence size during the competence level estimation. 78 | 79 | fig, ax = plt.subplots() 80 | for ds_method, name in zip(list_ds_methods, names): 81 | accuracy = [] 82 | for k in k_value_list: 83 | ds_method.k = k 84 | ds_method.fit(X_train, y_train) 85 | accuracy.append(ds_method.score(X_test, y_test)) 86 | ax.plot(k_value_list, accuracy, label=name) 87 | 88 | plt.xticks(k_value_list) 89 | ax.set_ylim(0.60, 0.80) 90 | ax.set_xlabel('Region of competence size (K value)', fontsize=13) 91 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13) 92 | ax.legend(loc='lower right') 93 | plt.show() 94 | -------------------------------------------------------------------------------- /examples/plot_random_forest.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Comparing dynamic selection with Random Forest 9 | ==================================================================== 10 | 11 | In this example we use a pool of classifiers generated using the Random Forest 12 | method rather than Bagging. We also show how to change the size of the region 13 | of competence, used to estimate the local competence of the base classifiers. 14 | 15 | This demonstrates that the library accepts any kind of base classifiers as 16 | long as they implement the predict and predict proba functions. Moreover, 17 | any ensemble generation method such as Boosting or Rotation Trees can be used 18 | to generate a pool containing diverse base classifiers. We also included the 19 | performance of the RandomForest classifier as a baseline comparison. 20 | """ 21 | 22 | import matplotlib.pyplot as plt 23 | import numpy as np 24 | from matplotlib.cm import get_cmap 25 | from matplotlib.ticker import FuncFormatter 26 | from sklearn.datasets import fetch_openml 27 | # Pool of base classifiers 28 | from sklearn.ensemble import RandomForestClassifier 29 | from sklearn.linear_model import LogisticRegression 30 | from sklearn.model_selection import train_test_split 31 | 32 | from deslib.dcs.mcb import MCB 33 | # Example of a dcs techniques 34 | from deslib.dcs.ola import OLA 35 | # Example of a des techniques 36 | from deslib.des.des_p import DESP 37 | from deslib.des.knora_e import KNORAE 38 | from deslib.des.knora_u import KNORAU 39 | from deslib.des.meta_des import METADES 40 | # Example of stacked model 41 | from deslib.static.stacked import StackedClassifier 42 | 43 | rng = np.random.RandomState(42) 44 | 45 | # Fetch a classification dataset from OpenML 46 | data = fetch_openml(name='phoneme', version=1, 47 | cache=False, as_frame=False) 48 | X = data.data 49 | y = data.target 50 | # split the data into training and test data 51 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, 52 | random_state=rng) 53 | 54 | # Training a random forest to be used as the pool of classifiers. 55 | # We set the maximum depth of the tree so that it 56 | # can estimate probabilities 57 | RF = RandomForestClassifier(random_state=rng, n_estimators=10) 58 | RF.fit(X_train, y_train) 59 | 60 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 61 | test_size=0.750, 62 | random_state=rng) 63 | 64 | stacked = StackedClassifier(RF, LogisticRegression()) 65 | stacked.fit(X_dsel, y_dsel) 66 | 67 | # Initialize a DS technique. Here we specify the size of 68 | # the region of competence (5 neighbors) 69 | knorau = KNORAU(RF, k=5, random_state=rng) 70 | kne = KNORAE(RF, k=5, random_state=rng) 71 | desp = DESP(RF, k=5, random_state=rng) 72 | ola = OLA(RF, k=5, random_state=rng) 73 | mcb = MCB(RF, k=5, random_state=rng) 74 | meta = METADES(RF, k=5, random_state=rng) 75 | 76 | # Fit the DS techniques 77 | knorau.fit(X_dsel, y_dsel) 78 | kne.fit(X_dsel, y_dsel) 79 | desp.fit(X_dsel, y_dsel) 80 | meta.fit(X_dsel, y_dsel) 81 | ola.fit(X_dsel, y_dsel) 82 | mcb.fit(X_dsel, y_dsel) 83 | 84 | ############################################################################### 85 | # Plotting the results 86 | # ----------------------- 87 | # Let's now evaluate the methods on the test set. 88 | 89 | rf_score = RF.score(X_test, y_test) 90 | stacked_score = stacked.score(X_test, y_test) 91 | knorau_score = knorau.score(X_test, y_test) 92 | kne_score = kne.score(X_test, y_test) 93 | desp_score = desp.score(X_test, y_test) 94 | ola_score = ola.score(X_test, y_test) 95 | mcb_score = mcb.score(X_test, y_test) 96 | meta_score = meta.score(X_test, y_test) 97 | print('Classification accuracy RF: ', rf_score) 98 | print('Classification accuracy Stacked: ', stacked_score) 99 | print('Evaluating DS techniques:') 100 | print('Classification accuracy KNORA-U: ', knorau_score) 101 | print('Classification accuracy KNORA-E: ', kne_score) 102 | print('Classification accuracy DESP: ', desp_score) 103 | print('Classification accuracy OLA: ', ola_score) 104 | print('Classification accuracy MCB: ', mcb_score) 105 | print('Classification accuracy META-DES: ', meta_score) 106 | 107 | cmap = get_cmap('Dark2') 108 | colors = [cmap(i) for i in np.linspace(0, 1, 7)] 109 | labels = ['RF', 'Stacked', 'KNORA-U', 'KNORA-E', 'DESP', 'OLA', 'MCB', 110 | 'META-DES'] 111 | 112 | fig, ax = plt.subplots() 113 | pct_formatter = FuncFormatter(lambda x, pos: '{:.1f}'.format(x * 100)) 114 | ax.bar(np.arange(8), 115 | [rf_score, stacked_score, knorau_score, kne_score, desp_score, 116 | ola_score, mcb_score, meta_score], 117 | color=colors, 118 | tick_label=labels) 119 | ax.set_ylim(0.65, 0.80) 120 | ax.set_xlabel('Method', fontsize=13) 121 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13) 122 | ax.yaxis.set_major_formatter(pct_formatter) 123 | for tick in ax.get_xticklabels(): 124 | tick.set_rotation(45) 125 | plt.subplots_adjust(bottom=0.15) 126 | plt.show() 127 | -------------------------------------------------------------------------------- /examples/plot_using_instance_hardness.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Dynamic selection vs K-NN: Using instance hardness 9 | ==================================================================== 10 | 11 | One aspect about dynamic selection techniques is that it can better deal with 12 | the classification of test examples associated with high degree of instance 13 | hardness. Such examples are often found close to the border of the classes, 14 | with the majority of its neighbors belonging to different classes. 15 | On the other hand, the KNN method, which is often used to estimate the region 16 | of competence in DS methods works better in the classification of examples 17 | associated with low instance hardness [1]. 18 | 19 | DESlib already implements a switch mechanism between DS techniques and the KNN 20 | classifier according to the hardness level of an instance. This example 21 | varies the threshold in which KNN is used for classification instead of DS 22 | methods. It also compares the classification results with the standard KNN 23 | as a baseline. 24 | 25 | The switch mechanism also reduces the computational cost involved since only 26 | part of the test samples are classified by the DS method. 27 | 28 | References 29 | ---------- 30 | [1] Cruz, Rafael MO, et al. "Dynamic Ensemble Selection VS K-NN: why and 31 | when Dynamic Selection obtains higher classification performance?." 32 | arXiv preprint arXiv:1804.07882 (2018). 33 | """ 34 | 35 | ############################################################################### 36 | # Let's start by importing all required modules. In this example we use the 37 | # new sklearn-OpenML interface to fetch the diabetes classification problem. 38 | 39 | import matplotlib.pyplot as plt 40 | import numpy as np 41 | from sklearn.datasets import fetch_openml 42 | from sklearn.ensemble import BaggingClassifier 43 | from sklearn.model_selection import train_test_split 44 | from sklearn.preprocessing import StandardScaler 45 | from sklearn.tree import DecisionTreeClassifier 46 | 47 | from deslib.dcs import MCB 48 | from deslib.dcs import OLA 49 | from deslib.dcs import Rank 50 | from deslib.des import DESP 51 | from deslib.des import KNORAE 52 | from deslib.des import KNORAU 53 | 54 | rng = np.random.RandomState(123456) 55 | 56 | data = fetch_openml(name='diabetes', version=1, cache=False, as_frame=False) 57 | X = data.data 58 | y = data.target 59 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) 60 | 61 | # Normalizing the dataset to have 0 mean and unit variance. 62 | scaler = StandardScaler() 63 | X_train = scaler.fit_transform(X_train) 64 | X_test = scaler.transform(X_test) 65 | 66 | # Training a pool of classifiers using the bagging technique. 67 | pool_classifiers = BaggingClassifier(DecisionTreeClassifier(random_state=rng), 68 | random_state=rng) 69 | pool_classifiers.fit(X_train, y_train) 70 | 71 | ############################################################################### 72 | # Setting DS method to use the switch mechanism 73 | # ---------------------------------------------- 74 | # In order to activate the functionality to switch between DS and KNN according 75 | # to the instance hardness level we need to set the DS techniques to use this 76 | # information. This is done by setting the hyperparameter `with_IH` to True. 77 | # In this example we consider four different values for te threshold 78 | mcb = MCB(pool_classifiers, with_IH=True, random_state=rng) 79 | ola = OLA(pool_classifiers, with_IH=True, random_state=rng) 80 | rank = Rank(pool_classifiers, with_IH=True, random_state=rng) 81 | des_p = DESP(pool_classifiers, with_IH=True, random_state=rng) 82 | kne = KNORAE(pool_classifiers, with_IH=True, random_state=rng) 83 | knu = KNORAU(pool_classifiers, with_IH=True, random_state=rng) 84 | list_ih_values = [0.0, 1./7., 2./7., 3./7.] 85 | 86 | list_ds_methods = [method.fit(X_train, y_train) for method in 87 | [mcb, ola, rank, des_p, kne, knu]] 88 | names = ['MCB', 'OLA', 'Mod. Rank', 'DES-P', 'KNORA-E', 'KNORA-U'] 89 | 90 | # Plot accuracy x IH 91 | fig, ax = plt.subplots() 92 | for ds_method, name in zip(list_ds_methods, names): 93 | accuracy = [] 94 | for idx_ih, ih_rate in enumerate([0.0, 0.14, 0.28, 0.42]): 95 | ds_method.IH_rate = ih_rate 96 | accuracy.append(ds_method.score(X_test, y_test)) 97 | ax.plot(list_ih_values, accuracy, label=name) 98 | 99 | plt.xticks(list_ih_values) 100 | ax.set_ylim(0.65, 0.80) 101 | ax.set_xlabel('IH value', fontsize=13) 102 | ax.set_ylabel('Accuracy on the test set (%)', fontsize=13) 103 | ax.legend() 104 | 105 | plt.show() 106 | -------------------------------------------------------------------------------- /examples/simple_example.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Author: Rafael Menelau Oliveira e Cruz 4 | # 5 | # License: BSD 3 clause 6 | """ 7 | ==================================================================== 8 | Simple example 9 | ==================================================================== 10 | 11 | In this example we show how to apply different DCS and DES techniques for a 12 | classification dataset. 13 | 14 | """ 15 | import numpy as np 16 | from sklearn.datasets import make_classification 17 | from sklearn.model_selection import train_test_split 18 | from deslib.des import METADES 19 | from deslib.des import KNORAE 20 | 21 | 22 | # Setting up the random state to have consistent results 23 | rng = np.random.RandomState(42) 24 | 25 | # Generate a classification dataset 26 | X, y = make_classification(n_samples=1000, random_state=rng) 27 | # split the data into training and test data 28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 29 | random_state=rng) 30 | 31 | # Split the data into training and DSEL for DS techniques 32 | X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, 33 | test_size=0.5, 34 | random_state=rng) 35 | 36 | # Initialize the DS techniques. DS methods can be initialized without 37 | # specifying a single input parameter. In this example, we just pass the random 38 | # state in order to always have the same result. 39 | kne = KNORAE(random_state=rng) 40 | meta = METADES(random_state=rng) 41 | 42 | # Fitting the des techniques 43 | kne.fit(X_dsel, y_dsel) 44 | meta.fit(X_dsel, y_dsel) 45 | 46 | # Calculate classification accuracy of each technique 47 | print('Evaluating DS techniques:') 48 | print('Classification accuracy KNORA-Eliminate: ', 49 | kne.score(X_test, y_test)) 50 | print('Classification accuracy META-DES: ', meta.score(X_test, y_test)) 51 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.17.0 2 | scipy>=1.4.0 3 | scikit-learn>=1.0.2 4 | sphinx 5 | sphinx_rtd_theme 6 | numpydoc 7 | pytest 8 | coverage 9 | pytest-cov 10 | pillow 11 | sphinx_gallery 12 | matplotlib>=2 13 | nose 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy>=1.4.0 2 | numpy>=1.17.0 3 | scikit-learn>=1.0.2 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import codecs 4 | import os 5 | from distutils.core import setup 6 | 7 | from setuptools import find_packages 8 | 9 | setup_path = os.path.abspath(os.path.dirname(__file__)) 10 | with codecs.open(os.path.join(setup_path, 'README.rst'), encoding='utf-8-sig') as f: 11 | README = f.read() 12 | 13 | setup(name='DESlib', 14 | version='0.3.7', 15 | url='https://github.com/Menelau/DESlib', 16 | maintainer='Rafael M. O. Cruz, L. G. Hafemann', 17 | maintainer_email='rafaelmenelau@gmail.com', 18 | description='Implementation of Dynamic Ensemble Selection methods', 19 | long_description=README, 20 | author='Rafael M. O. Cruz', 21 | author_email='rafaelmenelau@gmail.com', 22 | license='BSD 3-clause "New" or "Revised License"', 23 | 24 | classifiers=[ 25 | 'Development Status :: 3 - Alpha', 26 | 'Intended Audience :: Developers', 27 | 'Intended Audience :: Science/Research', 28 | 'License :: OSI Approved :: BSD License', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.5', 31 | 'Programming Language :: Python :: 3.6', 32 | 'Programming Language :: Python :: 3.7', 33 | 'Programming Language :: Python :: 3.8', 34 | 'Programming Language :: Python :: 3.9', 35 | 'Programming Language :: Python :: 3.10', 36 | 'Programming Language :: Python :: 3.11', 37 | 'Programming Language :: Python :: 3.12', 38 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 39 | ], 40 | install_requires=[ 41 | 'scikit-learn>=1.0.2', 42 | 'numpy>=1.17.0', 43 | 'scipy>=1.4.0', 44 | ], 45 | python_requires='>=3', 46 | 47 | packages=find_packages()) 48 | 49 | --------------------------------------------------------------------------------