├── .all-contributorsrc ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── ✅ task.md │ ├── 🐛 bug report.md │ └── 🚀 feature request.md ├── images │ ├── niaaml.png │ ├── niaaml_cli_help.png │ ├── niaaml_cli_infer_help.png │ ├── niaaml_cli_optimize_example.png │ └── niaaml_logo.png ├── pull_request_template.md └── workflows │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CHANGELOG.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── COMPONENTS.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── Makefile ├── about.rst ├── api │ ├── classifiers.rst │ ├── data.rst │ ├── fitness.rst │ ├── index.rst │ ├── niaaml.rst │ └── preprocessing.rst ├── changelog.rst ├── code_of_conduct.rst ├── conf.py ├── contributing.rst ├── documentation.rst ├── getting_started.rst ├── index.rst ├── installation.rst ├── make.bat ├── paper │ └── 10.21105.joss.02949.pdf ├── requirements.txt └── testing.rst ├── examples ├── classifier.py ├── example_files │ ├── dataset.csv │ ├── dataset_categorical.csv │ ├── dataset_categorical_missing.csv │ └── pipeline.ppln ├── export_pipeline_object.py ├── export_pipeline_text.py ├── factories.py ├── feature_encoding.py ├── feature_imputing.py ├── feature_selection.py ├── feature_selection_threshold_algorithms.py ├── feature_transform.py ├── fitness.py ├── load_data_basic.py ├── load_data_csv.py ├── load_pipeline_object_file.py ├── logger.py ├── optimization_stats.py ├── optimize_run_pipeline.py ├── optimize_run_pipeline_categorical_features.py ├── optimize_run_pipeline_logger.py ├── optimize_run_pipeline_missing_values.py ├── run_all.bat ├── run_all.sh ├── run_pipeline_optimizer_array_data.py ├── run_pipeline_optimizer_csv_data.py ├── run_pipeline_optimizer_csv_data_categorical.py ├── run_pipeline_optimizer_csv_data_missing.py └── run_pipeline_optimizer_csv_data_v1.py ├── niaaml ├── __init__.py ├── classifiers │ ├── __init__.py │ ├── ada_boost.py │ ├── bagging.py │ ├── classifier.py │ ├── decision_tree.py │ ├── extremely_randomized_trees.py │ ├── gaussian_naive_bayes.py │ ├── gaussian_process.py │ ├── k_neighbors.py │ ├── linear_svc.py │ ├── multi_layer_perceptron.py │ ├── quadratic_driscriminant_analysis.py │ ├── random_forest.py │ ├── regression_decision_tree.py │ ├── regression_gaussian_process.py │ ├── regression_lasso.py │ ├── regression_linear_model.py │ ├── regression_ridge.py │ └── utility.py ├── cli.py ├── data │ ├── __init__.py │ ├── basic_data_reader.py │ ├── csv_data_reader.py │ └── data_reader.py ├── fitness │ ├── __init__.py │ ├── accuracy.py │ ├── cohen_kappa.py │ ├── f1.py │ ├── fitness_function.py │ ├── mse.py │ ├── precision.py │ ├── r2.py │ └── utility.py ├── logger.py ├── pipeline.py ├── pipeline_component.py ├── pipeline_optimizer.py ├── preprocessing │ ├── __init__.py │ ├── encoding │ │ ├── __init__.py │ │ ├── feature_encoder.py │ │ ├── one_hot_encoder.py │ │ └── utility.py │ ├── feature_selection │ │ ├── __init__.py │ │ ├── _feature_selection_threshold_problem.py │ │ ├── bat_algorithm.py │ │ ├── differential_evolution.py │ │ ├── feature_selection_algorithm.py │ │ ├── grey_wolf_optimizer.py │ │ ├── jDEFSTH.py │ │ ├── particle_swarm_optimization.py │ │ ├── select_k_best.py │ │ ├── select_percentile.py │ │ ├── select_univariate_regression.py │ │ ├── utility.py │ │ └── variance_threshold.py │ ├── feature_transform │ │ ├── __init__.py │ │ ├── feature_transform_algorithm.py │ │ ├── max_abs_scaler.py │ │ ├── normalizer.py │ │ ├── quantile_transformer.py │ │ ├── robust_scaler.py │ │ ├── standard_scaler.py │ │ └── utility.py │ ├── imputation │ │ ├── __init__.py │ │ ├── imputer.py │ │ ├── simple_imputer.py │ │ └── utility.py │ └── preprocessing_algorithm.py └── utilities.py ├── paper ├── niaamlFlow.png ├── paper.bib └── paper.md ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py ├── test_basic_data_reader.py ├── test_classifier_factory.py ├── test_classifiers.py ├── test_csv_data_reader.py ├── test_encoder_factory.py ├── test_feature_encoder.py ├── test_feature_selection.py ├── test_feature_selection_algorithm_factory.py ├── test_feature_transform.py ├── test_feature_transform_algorithm_factory.py ├── test_fitness.py ├── test_fitness_factory.py ├── test_imputer.py ├── test_imputer_factory.py ├── test_pipeline.py ├── test_pipeline_optimizer.py ├── test_utilities.py └── tests_files ├── dataset_header_classes.csv ├── dataset_header_classes_cat_miss.csv ├── dataset_header_no_classes.csv ├── dataset_no_header_classes.csv ├── dataset_no_header_no_classes.csv └── dataset_real_estate_regression.csv /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "README.md" 4 | ], 5 | "imageSize": 100, 6 | "commit": false, 7 | "contributors": [ 8 | { 9 | "login": "lukapecnik", 10 | "name": "Luka Pečnik", 11 | "avatar_url": "https://avatars0.githubusercontent.com/u/23029992?s=460&u=d1c802fd8c82af0a020b1e21af80a34d6e28fb10&v=4", 12 | "profile": "https://github.com/lukapecnik", 13 | "contributions": [ 14 | "code", 15 | "doc", 16 | "review", 17 | "bug", 18 | "example", 19 | "test", 20 | "infra" 21 | ] 22 | }, 23 | { 24 | "login": "firefly-cpp", 25 | "name": "firefly-cpp", 26 | "avatar_url": "https://avatars2.githubusercontent.com/u/1633361?v=4", 27 | "profile": "https://github.com/firefly-cpp", 28 | "contributions": [ 29 | "code", 30 | "bug", 31 | "mentoring", 32 | "research", 33 | "ideas" 34 | ] 35 | }, 36 | { 37 | "login": "sisco0", 38 | "name": "sisco0", 39 | "avatar_url": "https://avatars0.githubusercontent.com/u/25695302?v=4", 40 | "profile": "https://github.com/sisco0", 41 | "contributions": [ 42 | "ideas" 43 | ] 44 | }, 45 | { 46 | "login": "zStupan", 47 | "name": "zStupan", 48 | "avatar_url": "https://avatars.githubusercontent.com/u/48752988?v=4", 49 | "profile": "https://github.com/zStupan", 50 | "contributions": [ 51 | "code" 52 | ] 53 | }, 54 | { 55 | "login": "musicinmybrain", 56 | "name": "Ben Beasley", 57 | "avatar_url": "https://avatars.githubusercontent.com/u/6898909?v=4", 58 | "profile": "https://github.com/musicinmybrain", 59 | "contributions": [ 60 | "code", 61 | "infra" 62 | ] 63 | } 64 | ], 65 | "skipCi": true, 66 | "contributorsPerLine": 7, 67 | "projectName": "NiaAML", 68 | "projectOwner": "lukapecnik", 69 | "repoType": "github", 70 | "repoHost": "https://github.com" 71 | } 72 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/✅ task.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "✅ Task" 3 | about: "If something needs to be done." 4 | title: '[TASK] ' 5 | labels: 'task' 6 | 7 | --- 8 | 9 | ## ✅ Task 10 | 11 | Add a description of the task. 12 | 13 | ### 📃 Checklist 14 | 15 | - [ ] sub-task1 description 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/🐛 bug report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🐛 Bug Report" 3 | about: "If something isn't working as expected." 4 | title: '[BUG] ' 5 | labels: 'bug' 6 | 7 | --- 8 | 9 | ## 🐛 Bug Report 10 | 11 | > 🚮 delete any section that is not helpful/required for your report (including this message) 12 | 13 | ### 🔥 Current Behavior 14 | 15 | A clear and concise description of the behavior. 16 | 17 | ### ✨ Expected Behavior 18 | 19 | A clear and concise description of what you expected to happen (or code). 20 | 21 | ### 💻 Reproducing the Bug 22 | 23 | Input Code, REPL or Repo link 24 | 25 | #### 📄🖼️ Context and Screenshots 26 | 27 | Add any other context about the problem here. If applicable, add screenshots to help explain. 28 | 29 | #### 🐋 Environment 30 | 31 | Add information about your environment if you think the bug is specific to your setup. 32 | 33 | ### 🤔 Possible Solution 34 | 35 | Only if you have suggestions on a fix for the bug. 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/🚀 feature request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🚀 Feature Request" 3 | about: "If you want to add/implement/request a new feature." 4 | title: '[FEATURE] ' 5 | labels: 'enhancement' 6 | 7 | --- 8 | 9 | ## 🚀 Feature Request 10 | 11 | > 🚮 feel free to delete any section that is not helpful/required for your report (including this message) 12 | 13 | ### 😞 Problem Statement 14 | 15 | A clear and concise description of what the problem is. Ex. I have an issue when [...] 16 | 17 | ### 💬 Feature Description 18 | 19 | A clear and concise description of what you want to happen. Add any considered drawbacks. 20 | 21 | If you can, explain how users will be able to use this and possibly write out a version the docs. 22 | Maybe a screenshot or design? 23 | 24 | ### 🤔 Alternatives 25 | 26 | A clear and concise description of any alternative solutions or features you've considered. 27 | 28 | ### ✅ Implementation Checklist 29 | 30 | - [ ] ... 31 | -------------------------------------------------------------------------------- /.github/images/niaaml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml.png -------------------------------------------------------------------------------- /.github/images/niaaml_cli_help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_help.png -------------------------------------------------------------------------------- /.github/images/niaaml_cli_infer_help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_infer_help.png -------------------------------------------------------------------------------- /.github/images/niaaml_cli_optimize_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_optimize_example.png -------------------------------------------------------------------------------- /.github/images/niaaml_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_logo.png -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | > 🙇💖 Thank you for contributing to NiaAML! 2 | 3 | > 🚮 feel free to delete any section that is not helpful/required for your report (including this message) 4 | 5 | ## 💬 Description 6 | 7 | Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change. 8 | 9 | ## ❗ Issue Links 10 | Fixes # 11 | 12 | ## 🧪 How Has This Been Tested? 13 | 14 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration 15 | 16 | # ✅ Checklist 17 | 18 | - [ ] My code follows the style guidelines of this project 19 | - [ ] I have performed a self-review of my code 20 | - [ ] I have commented my code, particularly in hard-to-understand areas 21 | - [ ] I have made corresponding changes to the documentation 22 | - [ ] I have added tests that prove my fix is effective or that my feature works 23 | - [ ] New and existing unit tests pass locally with my changes 24 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: NiaAML 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [ubuntu-latest, windows-latest, macos-latest] 16 | python-version: ['3.10', '3.11', '3.12'] 17 | defaults: 18 | run: 19 | shell: bash 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Get full Python version 28 | id: full-python-version 29 | run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") 30 | - name: Install poetry 31 | run: | 32 | curl -sL https://install.python-poetry.org | python - -y 33 | - name: Update path 34 | if: ${{ matrix.os != 'windows-latest' }} 35 | run: echo "$HOME/.local/bin" >> $GITHUB_PATH 36 | - name: Update Windows path 37 | if: ${{ matrix.os == 'windows-latest' }} 38 | run: echo "$APPDATA\Python\Scripts" >> $GITHUB_PATH 39 | - name: Configure poetry 40 | run: poetry config virtualenvs.in-project true 41 | - name: Set up cache 42 | uses: actions/cache@v3 43 | id: cache 44 | with: 45 | path: .venv 46 | key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} 47 | - name: Ensure cache is healthy 48 | if: steps.cache.outputs.cache-hit == 'true' 49 | run: timeout 10s poetry run pip --version || rm -rf .venv 50 | - name: Install dependencies 51 | run: poetry install 52 | - name: Run tests 53 | run: poetry run pytest 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | .vscode -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: black 5 | name: black 6 | entry: black . 7 | language: system 8 | always_run: true 9 | pass_filenames: false 10 | - id: autoflake 11 | name: autoflake 12 | entry: autoflake --recursive . 13 | language: system 14 | always_run: true 15 | pass_filenames: false 16 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | requirements_file: docs/requirements.txt 2 | build: 3 | image: latest 4 | python: 5 | version: 3.8 -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: 🤚 if you use this software, please cite it using these metadata. 3 | title: >- 4 | NiaAML: AutoML for classification and regression pipelines 5 | abstract: | 6 | In this paper we present NiaAML, an AutoML framework that we have developed for creating machine learning pipelines and hyperparameter tuning. The composition of machine learning pipelines is presented as an optimization problem that can be solved using various stochastic, population-based, nature-inspired algorithms. Nature-inspired algorithms are powerful tools for solving real-world optimization problems, especially those that are highly complex, nonlinear, and involve large search spaces where traditional algorithms may struggle. They are applied widely in various fields, including robotics, operations research, and bioinformatics. This paper provides a comprehensive overview of the software architecture, and describes the main tasks of NiaAML, including the automatic composition of classification and regression pipelines. The overview is supported by an practical illustrative example. 7 | type: journalArticle 8 | license: All rights reserved 9 | copyright: All rights reserved 10 | database: ScienceDirect 11 | issn: 2352-7110 12 | journal: SoftwareX 13 | pages: 101974 14 | volume: 29 15 | url: https://www.sciencedirect.com/science/article/pii/S2352711024003443 16 | keywords: 17 | - AutoML 18 | - Classification 19 | - Nature-inspired algorithms 20 | - Optimization 21 | authors: 22 | - family-names: Fister 23 | given-names: Iztok 24 | - family-names: Farthofer 25 | given-names: Laurenz A. 26 | - family-names: Pečnik 27 | given-names: Luka 28 | - family-names: Fister 29 | given-names: Iztok 30 | - family-names: Holzinger 31 | given-names: Andreas 32 | editors: 33 | - family-names: Fister 34 | given-names: Iztok 35 | - family-names: Farthofer 36 | given-names: Laurenz A. 37 | - family-names: Pečnik 38 | given-names: Luka 39 | - family-names: Fister 40 | given-names: Iztok 41 | - family-names: Holzinger 42 | given-names: Andreas 43 | recipients: 44 | - family-names: Fister 45 | given-names: Iztok 46 | - family-names: Farthofer 47 | given-names: Laurenz A. 48 | - family-names: Pečnik 49 | given-names: Luka 50 | - family-names: Fister 51 | given-names: Iztok 52 | - family-names: Holzinger 53 | given-names: Andreas 54 | translators: 55 | - family-names: Fister 56 | given-names: Iztok 57 | - family-names: Farthofer 58 | given-names: Laurenz A. 59 | - family-names: Pečnik 60 | given-names: Luka 61 | - family-names: Fister 62 | given-names: Iztok 63 | - family-names: Holzinger 64 | given-names: Andreas 65 | date-published: 2025-02-01 66 | identifiers: 67 | - type: doi 68 | value: 10.1016/j.softx.2024.101974 69 | -------------------------------------------------------------------------------- /COMPONENTS.md: -------------------------------------------------------------------------------- 1 | ### Classifiers 2 | 3 | * Adaptive Boosting (AdaBoost), 4 | * Bagging (Bagging), 5 | * Extremely Randomized Trees (ExtremelyRandomizedTrees), 6 | * Linear SVC (LinearSVC), 7 | * Multi Layer Perceptron (MultiLayerPerceptron), 8 | * Random Forest Classifier (RandomForest), 9 | * Decision Tree Classifier (DecisionTree), 10 | * K-Neighbors Classifier (KNeighbors), 11 | * Gaussian Process Classifier (GaussianProcess), 12 | * Gaussian Naive Bayes (GaussianNB), 13 | * Quadratic Discriminant Analysis (QuadraticDiscriminantAnalysis). 14 | 15 | ### Feature Selection Algorithms 16 | 17 | * Select K Best (SelectKBest), 18 | * Select Percentile (SelectPercentile), 19 | * Variance Threshold (VarianceThreshold). 20 | 21 | #### Nature-Inspired based 22 | 23 | * Bat Algorithm (BatAlgorithm), 24 | * Differential Evolution (DifferentialEvolution), 25 | * Self-Adaptive Differential Evolution (jDEFSTH), 26 | * Grey Wolf Optimizer (GreyWolfOptimizer), 27 | * Particle Swarm Optimization (ParticleSwarmOptimization). 28 | 29 | ### Feature Transformation Algorithms 30 | 31 | * Normalizer (Normalizer), 32 | * Standard Scaler (StandardScaler), 33 | * Maximum Absolute Scaler (MaxAbsScaler), 34 | * Quantile Transformer (QuantileTransformer), 35 | * Robust Scaler (RobustScaler). 36 | 37 | ### Fitness Functions based on 38 | 39 | * Accuracy (Accuracy), 40 | * Cohen's kappa (CohenKappa), 41 | * F1-Score (F1), 42 | * Precision (Precision). 43 | 44 | ### Categorical Feature Encoders 45 | 46 | * One-Hot Encoder (OneHotEncoder). 47 | 48 | ### Feature Imputers 49 | 50 | * Simple Imputer (SimpleImputer). -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to NiaAML 2 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 3 | 4 | ## Code of Conduct 5 | This project and everyone participating in it is governed by the [NiaAML Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [lukapecnik96@gmail.com](mailto:iztok.fister1@um.si). 6 | 7 | ## How Can I Contribute? 8 | 9 | ### Reporting Bugs 10 | Before creating bug reports, please check existing issues list as you might find out that you don't need to create one. When you are creating a bug report, please include as many details as possible in the issue using the [🐛 bug report issue template](https://github.com/firefly-cpp/NiaAML/blob/master/.github/ISSUE_TEMPLATE/%F0%9F%90%9B%20bug%20report.md). 11 | 12 | ### Suggesting Enhancements 13 | 14 | Open new issue using the [🚀 feature request template](https://github.com/firefly-cpp/NiaAML/blob/master/.github/ISSUE_TEMPLATE/%F0%9F%9A%80%20feature%20request.md). 15 | 16 | ### Pull requests 17 | 18 | Fill in the [pull request template](.github/pull_request_template.md) and make sure your code is documented. 19 | 20 | ## Setup development environment 21 | 22 | ### Requirements 23 | 24 | * Poetry: [https://python-poetry.org/docs/](https://python-poetry.org/docs/) 25 | 26 | After installing Poetry and cloning the project from GitHub, you should run the following command from the root of the cloned project: 27 | 28 | ```sh 29 | poetry install 30 | ``` 31 | 32 | All of the project's dependencies should be installed and the project ready for further development. **Note that Poetry creates a separate virtual environment for your project.** 33 | 34 | ### Development dependencies 35 | 36 | List of NiaAML's dependencies: 37 | 38 | | Package | Version | Platform | 39 | |---------------|---------|----------| 40 | | numpy | ^1.19.1 | All | 41 | | scikit-learn | ^1.1.2 | All | 42 | | niapy | ^2.0.5 | All | 43 | | pandas | ^2.1.1 | All | 44 | 45 | List of development dependencies: 46 | 47 | | Package | Version | Platform | 48 | |-------------------|---------|----------| 49 | | sphinx | ^3.3.1 | Any | 50 | | sphinx-rtd-theme | ^0.5.0 | Any | 51 | | coveralls | ^2.2.0 | Any | 52 | | autoflake | ^1.4 | Any | 53 | | black | ^21.5b1 | Any | 54 | | pre-commit | ^2.12.1 | Any | 55 | | pytest | ^7.4.2 | Any | 56 | | pytest-cov | ^4.1.0 | Any | 57 | 58 | ## Development Tasks 59 | 60 | ### Testing 61 | 62 | Manually run the tests: 63 | 64 | ```sh 65 | $ poetry run coverage run --source=niaaml -m unittest discover -b 66 | ``` 67 | 68 | ### Documentation 69 | 70 | Build the documentation: 71 | 72 | ```sh 73 | $ poetry run sphinx-build ./docs ./docs/_build 74 | ``` 75 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2025 Luka Pečnik et al. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/about.rst: -------------------------------------------------------------------------------- 1 | About 2 | ===== 3 | 4 | NiaAML is an automated machine learning Python framework based on nature-inspired algorithms for optimization. The name comes from the `automated machine learning method of the same name `_. Its goal is to efficiently compose the best possible classification pipeline for the given task using components on the input. The components are divided into three groups: feature seletion algorithms, feature transformation algorithms and classifiers. The framework uses nature-inspired algorithms for optimization to choose the best set of components for the classification pipeline on the output and optimize their parameters. We use NiaPy framework for the optimization process which is a popular Python collection of nature-inspired algorithms. The NiaAML framework is easy to use and customize or expand to suit your needs. 5 | 6 | The NiaAML framework allows you not only to run full pipeline optimization, but also separate implemented components such as classifiers, feature selection algorithms, etc. It supports numerical and categorical features. 7 | 8 | Licence 9 | ------- 10 | This package is distributed under the `MIT License `_. 11 | 12 | Disclaimer 13 | ---------- 14 | This framework is provided as-is, and there are no guarantees that it fits your purposes 15 | or that it is bug-free. Use it at your own risk! -------------------------------------------------------------------------------- /docs/api/classifiers.rst: -------------------------------------------------------------------------------- 1 | :mod:`niaaml.classifiers` 2 | ========================= 3 | 4 | .. automodule:: niaaml.classifiers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/data.rst: -------------------------------------------------------------------------------- 1 | :mod:`niaaml.data` 2 | ========================= 3 | 4 | .. automodule:: niaaml.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/fitness.rst: -------------------------------------------------------------------------------- 1 | :mod:`niaaml.fitness` 2 | ========================= 3 | 4 | .. automodule:: niaaml.fitness 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | This is the NiaAML API documentation, auto generated from the source code. 5 | 6 | .. toctree:: 7 | 8 | niaaml 9 | data 10 | classifiers 11 | preprocessing 12 | fitness -------------------------------------------------------------------------------- /docs/api/niaaml.rst: -------------------------------------------------------------------------------- 1 | :mod:`niaaml` 2 | ============= 3 | 4 | .. automodule:: niaaml 5 | :noindex: 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/api/preprocessing.rst: -------------------------------------------------------------------------------- 1 | :mod:`niaaml.preprocessing` 2 | =========================== 3 | 4 | .. automodule:: niaaml.preprocessing 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | :mod:`niaaml.preprocessing.feature_selection` 10 | --------------------------------------------- 11 | 12 | .. automodule:: niaaml.preprocessing.feature_selection 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | :mod:`niaaml.preprocessing.feature_transform` 18 | --------------------------------------------- 19 | 20 | .. automodule:: niaaml.preprocessing.feature_transform 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | :mod:`niaaml.preprocessing.encoding` 26 | --------------------------------------------- 27 | 28 | .. automodule:: niaaml.preprocessing.encoding 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | :mod:`niaaml.preprocessing.imputation` 34 | --------------------------------------------- 35 | 36 | .. automodule:: niaaml.preprocessing.imputation 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import sphinx_rtd_theme 2 | 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file only contains a selection of the most common options. For a full 6 | # list see the documentation: 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | 18 | sys.path.insert(0, os.path.abspath("../")) 19 | 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = "NiaAML" 24 | copyright = "2020-2025, Luka Pečnik et al." 25 | author = "Luka Pečnik et al." 26 | 27 | # The full version, including alpha/beta/rc tags 28 | release = "2.1.2" 29 | 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme"] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ["_templates"] 40 | 41 | # List of patterns, relative to source directory, that match files and 42 | # directories to ignore when looking for source files. 43 | # This pattern also affects html_static_path and html_extra_path. 44 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 45 | 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | 49 | # The theme to use for HTML and HTML Help pages. See the documentation for 50 | # a list of builtin themes. 51 | # 52 | html_theme = "sphinx_rtd_theme" 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | # html_static_path = ["_static"] 58 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing to NiaAML 2 | ====================== 3 | 4 | First off, thanks for taking the time to contribute! 5 | 6 | Code of Conduct 7 | --------------- 8 | 9 | This project and everyone participating in it is governed by the :doc:`/code_of_conduct`. By participating, you are 10 | expected to uphold this code. Please report unacceptable behavior to 11 | lukapecnik96@gmail.com. 12 | 13 | How Can I Contribute? 14 | --------------------- 15 | 16 | Reporting Bugs 17 | ~~~~~~~~~~~~~~ 18 | 19 | Before creating bug reports, please check existing issues list as you 20 | might find out that you don't need to create one. When you are creating 21 | a bug report, please include as many details as possible in the issue template. 22 | 23 | Suggesting Enhancements 24 | ~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | Open new issue using the feature request template. 27 | 28 | Pull requests 29 | ~~~~~~~~~~~~~ 30 | 31 | Fill in the pull request template and make sure 32 | your code is documented. 33 | -------------------------------------------------------------------------------- /docs/documentation.rst: -------------------------------------------------------------------------------- 1 | Documentation 2 | ============= 3 | 4 | To locally generate and preview documentation run the following command in the project root folder: 5 | 6 | .. code:: sh 7 | 8 | $ poetry run sphinx-build ./docs ./docs/_build 9 | 10 | If the build of the documentation is successful, you can preview the documentation in the docs/_build folder by clicking the ``index.html`` file. -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | NiaAML's documentation! 2 | ================================== 3 | 4 | .. automodule:: niaaml 5 | 6 | NiaAML is an automated machine learning Python framework based on nature-inspired algorithms for optimization. The name comes from the automated machine learning method of the same name [1]. Its goal is to efficiently compose the best possible classification pipeline for the given task using components on the input. The components are divided into three groups: feature seletion algorithms, feature transformation algorithms and classifiers. The framework uses nature-inspired algorithms for optimization to choose the best set of components for the classification pipeline on the output and optimize their parameters. We use `NiaPy framework `_ for the optimization process which is a popular Python collection of nature-inspired algorithms. The NiaAML framework is easy to use and customize or expand to suit your needs. 7 | 8 | * **Free software:** MIT license 9 | * **Github repository:** https://github.com/lukapecnik/NiaAML 10 | * **Python versions:** 3.11.x, 3.12.x 11 | 12 | The main documentation is organized into a couple of sections: 13 | 14 | * :ref:`user-docs` 15 | * :ref:`dev-docs` 16 | * :ref:`about-docs` 17 | 18 | .. _user-docs: 19 | 20 | .. toctree:: 21 | :maxdepth: 3 22 | :caption: User Documentation 23 | 24 | getting_started 25 | 26 | .. _dev-docs: 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | :caption: Developer Documentation 31 | 32 | changelog 33 | installation 34 | testing 35 | documentation 36 | api/index 37 | 38 | .. _about-docs: 39 | 40 | .. toctree:: 41 | :maxdepth: 3 42 | :caption: About 43 | 44 | about 45 | contributing 46 | code_of_conduct 47 | 48 | References 49 | ---------- 50 | 51 | [1] Iztok Fister Jr., Milan Zorman, Dušan Fister, Iztok Fister. Continuous optimizers for automatic design and evaluation of classification pipelines. In: Frontier applications of nature inspired computation. Springer tracts in nature-inspired computing, pp.281-301, 2020. -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Setup development environment 5 | ----------------------------- 6 | 7 | Requirements 8 | ~~~~~~~~~~~~ 9 | 10 | - Poetry: https://python-poetry.org/docs/ 11 | 12 | After installing Poetry and cloning the project from GitHub, you should 13 | run the following command from the root of the cloned project: 14 | 15 | .. code:: sh 16 | 17 | $ poetry install 18 | 19 | All of the project's dependencies should be installed and the project 20 | ready for further development. **Note that Poetry creates a separate 21 | virtual environment for your project.** 22 | 23 | Development dependencies 24 | ~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | List of NiaAML's dependencies: 27 | 28 | +----------------+--------------+------------+ 29 | | Package | Version | Platform | 30 | +================+==============+============+ 31 | | numpy | ^1.19.1 | All | 32 | +----------------+--------------+------------+ 33 | | scikit-learn | ^1.1.2 | All | 34 | +----------------+--------------+------------+ 35 | | niapy | ^2.0.5 | All | 36 | +----------------+--------------+------------+ 37 | | pandas | ^2.1.1 | All | 38 | +----------------+--------------+------------+ 39 | 40 | List of development dependencies: 41 | 42 | +--------------------+-----------+------------+ 43 | | Package | Version | Platform | 44 | +====================+===========+============+ 45 | | sphinx | ^3.3.1 | Any | 46 | +--------------------+-----------+------------+ 47 | | sphinx-rtd-theme | ^0.5.0 | Any | 48 | +--------------------+-----------+------------+ 49 | | coveralls | ^2.2.0 | Any | 50 | +--------------------+-----------+------------+ 51 | | autoflake | ^1.4 | Any | 52 | +--------------------+-----------+------------+ 53 | | black | ^21.5b1 | Any | 54 | +--------------------+-----------+------------+ 55 | | pre-commit | ^2.12.1 | Any | 56 | +--------------------+-----------+------------+ 57 | | pytest | ^7.4.2 | Any | 58 | +--------------------+-----------+------------+ 59 | | pytest-cov | ^4.1.0 | Any | 60 | +--------------------+-----------+------------+ -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/paper/10.21105.joss.02949.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/docs/paper/10.21105.joss.02949.pdf -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/docs/requirements.txt -------------------------------------------------------------------------------- /docs/testing.rst: -------------------------------------------------------------------------------- 1 | Testing 2 | ======= 3 | 4 | Before making a pull request, if possible provide tests for added features or bug fixes. 5 | 6 | We have an automated building system which also runs all of provided tests. In case any of the test cases fails, we are notified about failing tests. Those should be fixed before we merge your pull request to master branch. 7 | 8 | For the purpose of checking if all test are passing localy you can run following command: 9 | 10 | .. code:: sh 11 | 12 | $ poetry run coverage run --source=niaaml -m unittest discover -b 13 | 14 | If all tests passed running this command it is most likely that the tests would pass on our build system too. -------------------------------------------------------------------------------- /examples/classifier.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers import AdaBoost 2 | import os 3 | from niaaml.data import CSVDataReader 4 | import numpy 5 | 6 | """ 7 | In this example, we show how to individually use an implemented classifier and its methods. In this case we use AdaBoost for demonstration, but 8 | you can use any of the implemented classifiers in the same way. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # instantiate AdaBoost classifier 19 | classifier = AdaBoost() 20 | 21 | # set parameters of the classifier 22 | classifier.set_parameters(n_estimators=50, algorithm="SAMME") 23 | 24 | # fit classifier to the data 25 | classifier.fit(data_reader.get_x(), data_reader.get_y()) 26 | 27 | # predict classes of the dummy input 28 | predicted = classifier.predict( 29 | numpy.random.uniform(low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1])) 30 | ) 31 | 32 | # print classifier in a user-friendly form 33 | print(classifier.to_string()) 34 | -------------------------------------------------------------------------------- /examples/example_files/pipeline.ppln: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/examples/example_files/pipeline.ppln -------------------------------------------------------------------------------- /examples/export_pipeline_object.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import AdaBoost 3 | from niaaml.preprocessing.feature_selection import SelectKBest 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | 6 | """ 7 | This example presents how to export a pipeline object into a file that can later be loaded back into a Python program as a Pipeline object. 8 | """ 9 | 10 | # instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as a feature transformation algorithm 11 | pipeline = Pipeline( 12 | feature_selection_algorithm=SelectKBest(), 13 | feature_transform_algorithm=Normalizer(), 14 | classifier=AdaBoost(), 15 | ) 16 | 17 | # export the object to a file for later use 18 | pipeline.export("exported_pipeline.ppln") 19 | -------------------------------------------------------------------------------- /examples/export_pipeline_text.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import AdaBoost 3 | from niaaml.preprocessing.feature_selection import SelectKBest 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | 6 | """ 7 | This example presents how to export a pipeline object into a text file in a user-friendly form. A text file cannot be loaded back into a Python program in 8 | the form of a Pipeline object. 9 | """ 10 | 11 | # instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as feature transformation algorithm 12 | pipeline = Pipeline( 13 | feature_selection_algorithm=SelectKBest(), 14 | feature_transform_algorithm=Normalizer(), 15 | classifier=AdaBoost(), 16 | ) 17 | 18 | # export the object to a file in a user-friendly form 19 | pipeline.export_text("exported_pipeline.txt") 20 | -------------------------------------------------------------------------------- /examples/factories.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers import ClassifierFactory 2 | from niaaml.preprocessing.feature_selection import FeatureSelectionAlgorithmFactory 3 | from niaaml.preprocessing.feature_transform import FeatureTransformAlgorithmFactory 4 | from niaaml.fitness import FitnessFactory 5 | from niaaml.preprocessing.encoding import EncoderFactory 6 | from niaaml.preprocessing.imputation import ImputerFactory 7 | 8 | """ 9 | This example presents how to use all of the implemented factories to create new object instances using their class names. You may also 10 | import and instantiate objects directly, but it is more convenient to use factories in some cases. 11 | """ 12 | 13 | # instantiate all possible factories 14 | classifier_factory = ClassifierFactory() 15 | fsa_factory = FeatureSelectionAlgorithmFactory() 16 | fta_factory = FeatureTransformAlgorithmFactory() 17 | f_factory = FitnessFactory() 18 | e_factory = EncoderFactory() 19 | i_factory = ImputerFactory() 20 | 21 | # get an instance of the MultiLayerPerceptron class 22 | mlp = classifier_factory.get_result("MultiLayerPerceptron") 23 | 24 | # get an instance of the ParticleSwarmOptimization class 25 | pso = fsa_factory.get_result("ParticleSwarmOptimization") 26 | 27 | # get an instance of the Normalizer class 28 | normalizer = fta_factory.get_result("Normalizer") 29 | 30 | # get an instance of the Precision class 31 | precision = f_factory.get_result("Precision") 32 | 33 | # get an instance of the OneHotEncoder class 34 | ohe = e_factory.get_result("OneHotEncoder") 35 | 36 | # get an instance of the SimpleImputer class 37 | imp = i_factory.get_result("SimpleImputer") 38 | 39 | # variables mlp, pso, normalizer, precision, ohe and imp contain instances of the classes with the passed names 40 | -------------------------------------------------------------------------------- /examples/feature_encoding.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.encoding import OneHotEncoder, encode_categorical_features 2 | import os 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to use an implemented categorical feature encoder and its methods individually. In this case, we use OneHotEncoder for demonstration, but 7 | you can use any of the implemented encoders in the same way. 8 | """ 9 | 10 | # prepare data reader using csv file 11 | data_reader = CSVDataReader( 12 | src=os.path.dirname(os.path.abspath(__file__)) 13 | + "/example_files/dataset_categorical.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # instantiate OneHotEncoder 19 | ohe = OneHotEncoder() 20 | 21 | # fit, transform and print to output the categorical feature in the dataset (index 6) 22 | features = data_reader.get_x() 23 | ohe.fit(features[[6]]) 24 | f = ohe.transform(features[[6]]) 25 | print(f) 26 | 27 | # if you need to get an array of encoders for all of the categorical features in a dataset (and transformed DataFrame of features), you may use the utility method encode_categorical_features 28 | transformed_features, encoders = encode_categorical_features(features, "OneHotEncoder") 29 | -------------------------------------------------------------------------------- /examples/feature_imputing.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.imputation import SimpleImputer, impute_features 2 | import os 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to use an implemented missing features' imputer and its methods individually. In this case, we use SimpleImputer for demonstration, but 7 | you can use any of the implemented imputers in the same way. 8 | """ 9 | 10 | # prepare data reader using csv file 11 | data_reader = CSVDataReader( 12 | src=os.path.dirname(os.path.abspath(__file__)) 13 | + "/example_files/dataset_categorical_missing.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # instantiate SimpleImputer 19 | si = SimpleImputer() 20 | 21 | # fit, transform and print to output the feature in the dataset (index 6) 22 | features = data_reader.get_x() 23 | si.fit(features[[6]]) 24 | f = si.transform(features[[6]]) 25 | print(f) 26 | 27 | # if you wish to get array of imputers for all of the features with missing values in a dataset (and transformed DataFrame of features), you may use the utility method impute_features 28 | transformed_features, imputers = impute_features(features, "SimpleImputer") 29 | -------------------------------------------------------------------------------- /examples/feature_selection.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_selection import SelectKBest 2 | import os 3 | from niaaml.data import CSVDataReader 4 | from sklearn.feature_selection import chi2 5 | 6 | """ 7 | This example presents how to use an implemented feature selection algorithm and its methods individually. In this case, we use SelectKBest for demonstration, but 8 | you can use any of the implemented feature selection algorithms in the same way. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # instantiate SelectKBest feature selection algorithms 19 | fs = SelectKBest() 20 | 21 | # set parameters of the object 22 | fs.set_parameters(k=4, score_func=chi2) 23 | 24 | # select best features according to the SelectKBest algorithm (returns boolean mask of the selected features - True if selected, False if not) 25 | features_mask = fs.select_features(data_reader.get_x(), data_reader.get_y()) 26 | 27 | # print feature selection algorithm in a user-friendly form 28 | print(fs.to_string()) 29 | -------------------------------------------------------------------------------- /examples/feature_selection_threshold_algorithms.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_selection import ParticleSwarmOptimization 2 | import os 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to use implemented feature selection algorithms that use threshold mechanism. 7 | """ 8 | 9 | # prepare data reader using csv file 10 | data_reader = CSVDataReader( 11 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 12 | has_header=False, 13 | contains_classes=True, 14 | ) 15 | 16 | # instantiate feature selection algorithm 17 | fs = ParticleSwarmOptimization() 18 | # BatAlgorithm, DifferentialEvolution, GreyWolfOptimizer and jDEFSTH also use threshold mechanism 19 | 20 | # set parameters of the instantiated algorithm 21 | fs.set_parameters(C1=1.5, C2=2.0) 22 | 23 | # select best features according to the ParticleSwarmOptimization algorithm (returns boolean mask of the selected features - True if selected, False if not) 24 | features_mask = fs.select_features(data_reader.get_x(), data_reader.get_y()) 25 | 26 | # print feature selection algorithm in a user-friendly form 27 | print(fs.to_string()) 28 | -------------------------------------------------------------------------------- /examples/feature_transform.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_transform import Normalizer 2 | import os 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to individually use an implemented feature transform algorithm and its methods individually. In this case, we use Normalizer for demonstration, but 7 | you can use any of the implemented feature transform algorithms in the same way. 8 | """ 9 | 10 | # prepare data reader using csv file 11 | data_reader = CSVDataReader( 12 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 13 | has_header=False, 14 | contains_classes=True, 15 | ) 16 | 17 | # instantiate Normalizer 18 | ft = Normalizer() 19 | 20 | # set parameters of the Normalizer 21 | ft.set_parameters(norm="l2") 22 | 23 | # fit the algorithm to the input data 24 | ft.fit(data_reader.get_x()) 25 | 26 | # transform features 27 | transformed_features = ft.transform(data_reader.get_x()) 28 | 29 | # print feature transform algorithm in a user-friendly form 30 | print(ft.to_string()) 31 | -------------------------------------------------------------------------------- /examples/fitness.py: -------------------------------------------------------------------------------- 1 | from niaaml.fitness import Precision 2 | from niaaml.data import CSVDataReader 3 | import os 4 | import numpy 5 | 6 | """ 7 | This example presents how to use an implemented fitness function and its method individually. In this case, we use Precision for demonstration, but 8 | you can use any of the implemented fitness functions in the same way. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # lets say the following array contains predictions after the classification process 19 | predictions = numpy.random.choice( 20 | ["Class 1", "Class 2"], size=data_reader.get_y().shape 21 | ) 22 | 23 | # instantiate instance of a fitness function (Precision in this case) 24 | fitness_func = Precision() 25 | 26 | # calculate fitness value 27 | precision = fitness_func.get_fitness(predictions, data_reader.get_y()) 28 | 29 | # precision will probably be low due to dummy data 30 | print(precision) 31 | -------------------------------------------------------------------------------- /examples/load_data_basic.py: -------------------------------------------------------------------------------- 1 | from niaaml.data import BasicDataReader 2 | import numpy 3 | 4 | """ 5 | This example presents how to instantiate BasicDataReader and use its methods. You can use it to contain data in a single variable 6 | or as an input to an instance of the PipelineOptimizer class. 7 | """ 8 | 9 | # BasicDataReader instance uses arrays on the input (x and y arrays) 10 | data_reader = BasicDataReader( 11 | x=numpy.random.uniform(low=0.0, high=15.0, size=(50, 3)), 12 | y=numpy.random.choice(["Class 1", "Class 2"], size=50), 13 | ) 14 | 15 | # get x and y arrays and print them 16 | print(data_reader.get_x()) 17 | print(data_reader.get_y()) 18 | -------------------------------------------------------------------------------- /examples/load_data_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml.data import CSVDataReader 3 | 4 | """ 5 | This example presents how to instantiate CSVDataReader and use its methods. You can use it to contain data in a single variable, 6 | or as an input to an instance of the PipelineOptimizer class. 7 | """ 8 | 9 | # CSVDataReader gets a path to csv file on the input, reads and parses it into the x and y arrays 10 | # has_header and contains_classes arguments needs to be set according to the input csv file's structure 11 | data_reader = CSVDataReader( 12 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 13 | has_header=False, 14 | contains_classes=True, 15 | ) 16 | 17 | # get x and y arrays and print them 18 | print(data_reader.get_x()) 19 | print(data_reader.get_y()) 20 | -------------------------------------------------------------------------------- /examples/load_pipeline_object_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml import Pipeline 3 | 4 | """ 5 | This example presents how to load a saved Pipeline object from a file. You can use all of its methods after it has been loaded successfully. 6 | """ 7 | 8 | # load Pipeline object from a file 9 | pipeline = Pipeline.load( 10 | os.path.dirname(os.path.abspath(__file__)) + "/example_files/pipeline.ppln" 11 | ) 12 | 13 | # all of the Pipeline's classes methods can be called after a successful load 14 | -------------------------------------------------------------------------------- /examples/logger.py: -------------------------------------------------------------------------------- 1 | from niaaml.logger import Logger 2 | 3 | """ 4 | This example presents how to use Logger class individually. 5 | """ 6 | 7 | # instatiate instance with verbose mode 8 | logger = Logger(verbose=True) 9 | 10 | # in verbose mode, all of the call functions should show their output 11 | logger.log_progress("progress") 12 | logger.log_pipeline("pipeline") 13 | logger.log_optimization_error("optimization error") 14 | 15 | print("-------------------------") 16 | 17 | # in this case only log_progress function's call is going to show the output 18 | logger = Logger() 19 | logger.log_progress("progress") 20 | logger.log_pipeline("pipeline") 21 | logger.log_optimization_error("optimization error") 22 | 23 | print("-------------------------") 24 | 25 | # you may also output logs to some log file 26 | logger = Logger(verbose=True, output_file="log_output") 27 | logger.log_progress("progress") 28 | logger.log_pipeline("pipeline") 29 | logger.log_optimization_error("optimization error") 30 | -------------------------------------------------------------------------------- /examples/optimization_stats.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import OptimizationStats 2 | import numpy as np 3 | 4 | """ 5 | This example presents how the OptimizationStats class can be used. Normally, it is used in the background when the Pipeline's optimize method is called. 6 | """ 7 | 8 | # dummy array with expected results of the classification process 9 | y = np.array( 10 | [ 11 | "Class 1", 12 | "Class 1", 13 | "Class 1", 14 | "Class 2", 15 | "Class 1", 16 | "Class 2", 17 | "Class 2", 18 | "Class 2", 19 | "Class 2", 20 | "Class 1", 21 | "Class 1", 22 | "Class 2", 23 | "Class 1", 24 | "Class 2", 25 | "Class 1", 26 | "Class 1", 27 | "Class 1", 28 | "Class 1", 29 | "Class 2", 30 | "Class 1", 31 | ] 32 | ) 33 | 34 | # dummy array with predicted classes 35 | predicted = np.array( 36 | [ 37 | "Class 1", 38 | "Class 1", 39 | "Class 1", 40 | "Class 2", 41 | "Class 2", 42 | "Class 2", 43 | "Class 1", 44 | "Class 1", 45 | "Class 1", 46 | "Class 2", 47 | "Class 1", 48 | "Class 1", 49 | "Class 2", 50 | "Class 2", 51 | "Class 1", 52 | "Class 2", 53 | "Class 1", 54 | "Class 2", 55 | "Class 2", 56 | "Class 2", 57 | ] 58 | ) 59 | 60 | # instantiate OptimizationStats 61 | stats = OptimizationStats(predicted, y) 62 | 63 | # print user-friendly text representation 64 | print(stats.to_string()) 65 | -------------------------------------------------------------------------------- /examples/optimize_run_pipeline.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import MultiLayerPerceptron 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | from niaaml.data import CSVDataReader 6 | import os 7 | import numpy 8 | import pandas 9 | 10 | """ 11 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline. 12 | """ 13 | 14 | # prepare data reader using csv file 15 | data_reader = CSVDataReader( 16 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 17 | has_header=False, 18 | contains_classes=True, 19 | ) 20 | 21 | # instantiate a Pipeline object 22 | pipeline = Pipeline( 23 | feature_selection_algorithm=VarianceThreshold(), 24 | feature_transform_algorithm=Normalizer(), 25 | classifier=MultiLayerPerceptron(), 26 | ) 27 | 28 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process) 29 | pipeline.optimize( 30 | data_reader.get_x(), 31 | data_reader.get_y(), 32 | 10, 33 | 50, 34 | "ParticleSwarmAlgorithm", 35 | "Accuracy", 36 | ) 37 | 38 | # run the pipeline using dummy data 39 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset 40 | predicted = pipeline.run( 41 | pandas.DataFrame( 42 | numpy.random.uniform( 43 | low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1]) 44 | ) 45 | ) 46 | ) 47 | 48 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 49 | -------------------------------------------------------------------------------- /examples/optimize_run_pipeline_categorical_features.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import MultiLayerPerceptron 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | from niaaml.data import CSVDataReader 6 | from niaaml.preprocessing.encoding import encode_categorical_features 7 | import os 8 | import numpy 9 | import pandas 10 | 11 | """ 12 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline. 13 | We use a dataset that contains categorical and numerical features. 14 | """ 15 | 16 | # prepare data reader using csv file 17 | data_reader = CSVDataReader( 18 | src=os.path.dirname(os.path.abspath(__file__)) 19 | + "/example_files/dataset_categorical.csv", 20 | has_header=False, 21 | contains_classes=True, 22 | ) 23 | 24 | # we use the utility method encode_categorical_features to get encoders for the categorical features, but you may instantiate and fit 25 | # feature encoders separately and pass them as an array (as long as they are implemented as this framework suggests) 26 | # there should be as many encoders as categorical features 27 | # this example uses One-Hot Encoding 28 | _, encoders = encode_categorical_features(data_reader.get_x(), "OneHotEncoder") 29 | 30 | # instantiate a Pipeline object 31 | pipeline = Pipeline( 32 | feature_selection_algorithm=VarianceThreshold(), 33 | feature_transform_algorithm=Normalizer(), 34 | classifier=MultiLayerPerceptron(), 35 | categorical_features_encoders=encoders, 36 | ) 37 | 38 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process) 39 | pipeline.optimize( 40 | data_reader.get_x(), 41 | data_reader.get_y(), 42 | 10, 43 | 50, 44 | "ParticleSwarmAlgorithm", 45 | "Accuracy", 46 | ) 47 | 48 | # run the pipeline using dummy data 49 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset 50 | predicted = pipeline.run( 51 | pandas.DataFrame( 52 | [ 53 | [ 54 | 10.32440339, 55 | 3.195964543, 56 | 1.215275549, 57 | 3.741461311, 58 | 11.6736581, 59 | 6.435247906, 60 | "a", 61 | ] 62 | ] 63 | ) 64 | ) 65 | 66 | # pipeline variable contains a Pipeline object that can be used for further classification, exported as an object (that can later be loaded and used) or exported as a text file 67 | -------------------------------------------------------------------------------- /examples/optimize_run_pipeline_logger.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import MultiLayerPerceptron 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | from niaaml.data import CSVDataReader 6 | from niaaml.logger import Logger 7 | import os 8 | import numpy 9 | import pandas 10 | 11 | """ 12 | This example presents how to use the Pipeline class with logging individually. You may use this if you want to test out a specific classification pipeline. 13 | """ 14 | 15 | # prepare data reader using csv file 16 | data_reader = CSVDataReader( 17 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 18 | has_header=False, 19 | contains_classes=True, 20 | ) 21 | 22 | # prepare Logger instance 23 | # verbose=True means more information, output_file is the log's file name 24 | # if output_file is None, there is no file created 25 | logger = Logger(verbose=True, output_file="output.log") 26 | 27 | # instantiate a Pipeline object 28 | pipeline = Pipeline( 29 | feature_selection_algorithm=VarianceThreshold(), 30 | feature_transform_algorithm=Normalizer(), 31 | classifier=MultiLayerPerceptron(), 32 | logger=logger, 33 | ) 34 | 35 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process) 36 | pipeline.optimize( 37 | data_reader.get_x(), 38 | data_reader.get_y(), 39 | 10, 40 | 50, 41 | "ParticleSwarmAlgorithm", 42 | "Accuracy", 43 | ) 44 | 45 | # run the pipeline using dummy data 46 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset 47 | predicted = pipeline.run( 48 | pandas.DataFrame( 49 | numpy.random.uniform( 50 | low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1]) 51 | ) 52 | ) 53 | ) 54 | 55 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 56 | -------------------------------------------------------------------------------- /examples/optimize_run_pipeline_missing_values.py: -------------------------------------------------------------------------------- 1 | from niaaml import Pipeline 2 | from niaaml.classifiers import MultiLayerPerceptron 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold 4 | from niaaml.preprocessing.feature_transform import Normalizer 5 | from niaaml.data import CSVDataReader 6 | from niaaml.preprocessing.encoding import encode_categorical_features 7 | from niaaml.preprocessing.imputation import impute_features 8 | import os 9 | import numpy 10 | import pandas 11 | 12 | """ 13 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline. 14 | We use a dataset that contains categorical and numerical features with missing values. 15 | """ 16 | 17 | # prepare data reader using csv file 18 | data_reader = CSVDataReader( 19 | src=os.path.dirname(os.path.abspath(__file__)) 20 | + "/example_files/dataset_categorical_missing.csv", 21 | has_header=False, 22 | contains_classes=True, 23 | ) 24 | 25 | features = data_reader.get_x() 26 | 27 | # we use the utility method impute_features to get imputers for the features with missing values, but you may instantiate and fit 28 | # imputers separately and pass them as a dictionary (as long as they are implemented as this framework suggests), with keys as column names or indices (if there is no header in the csv) 29 | # there should be as many imputers as the features with missing values 30 | # this example uses Simple Imputer 31 | features, imputers = impute_features(features, "SimpleImputer") 32 | 33 | # exactly the same goes for encoders 34 | _, encoders = encode_categorical_features(features, "OneHotEncoder") 35 | 36 | # instantiate a Pipeline object 37 | pipeline = Pipeline( 38 | feature_selection_algorithm=VarianceThreshold(), 39 | feature_transform_algorithm=Normalizer(), 40 | classifier=MultiLayerPerceptron(), 41 | categorical_features_encoders=encoders, 42 | imputers=imputers, 43 | ) 44 | 45 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process) 46 | pipeline.optimize( 47 | data_reader.get_x(), 48 | data_reader.get_y(), 49 | 10, 50 | 50, 51 | "ParticleSwarmAlgorithm", 52 | "Accuracy", 53 | ) 54 | 55 | # run the pipeline using dummy data 56 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset 57 | predicted = pipeline.run( 58 | pandas.DataFrame( 59 | [ 60 | [ 61 | 10.32440339, 62 | 3.195964543, 63 | 1.215275549, 64 | 3.741461311, 65 | 11.6736581, 66 | 6.435247906, 67 | "a", 68 | ] 69 | ] 70 | ) 71 | ) 72 | 73 | # pipeline variable contains a Pipeline object that can be used for further classification, exported as an object (that can later be loaded and used) or exported as text file 74 | -------------------------------------------------------------------------------- /examples/run_all.bat: -------------------------------------------------------------------------------- 1 | for %%i in (%cd%\*.py) do python3 %%i -------------------------------------------------------------------------------- /examples/run_all.sh: -------------------------------------------------------------------------------- 1 | for file in ./*.py 2 | do 3 | python3 "$file" 4 | done -------------------------------------------------------------------------------- /examples/run_pipeline_optimizer_array_data.py: -------------------------------------------------------------------------------- 1 | from niaaml import PipelineOptimizer 2 | from niaaml.data import BasicDataReader 3 | import numpy 4 | 5 | """ 6 | This example presents how to use the PipelineOptimizer class. This example is using an instance of BasicDataReader. 7 | The instantiated PipelineOptimizer try to compose the best pipeline with the components that are specified in its constructor. 8 | """ 9 | 10 | # prepare data reader using features and classes from arrays 11 | # in this case random dummy arrays are generated 12 | data_reader = BasicDataReader( 13 | x=numpy.random.uniform(low=0.0, high=15.0, size=(50, 3)), 14 | y=numpy.random.choice(["Class 1", "Class 2"], size=50), 15 | ) 16 | 17 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms 18 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file 19 | # if log_output_file is not provided there is no file created 20 | # if log is False, logging is turned off 21 | pipeline_optimizer = PipelineOptimizer( 22 | data=data_reader, 23 | classifiers=[ 24 | "AdaBoost", 25 | "Bagging", 26 | "MultiLayerPerceptron", 27 | "RandomForest", 28 | "ExtremelyRandomizedTrees", 29 | "LinearSVC", 30 | ], 31 | feature_selection_algorithms=[ 32 | "SelectKBest", 33 | "SelectPercentile", 34 | "ParticleSwarmOptimization", 35 | "VarianceThreshold", 36 | ], 37 | feature_transform_algorithms=["Normalizer", "StandardScaler"], 38 | log=True, 39 | log_verbose=True, 40 | log_output_file="output.log", 41 | ) 42 | 43 | # runs the optimization process 44 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest 45 | # returns the best found pipeline 46 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm 47 | pipeline = pipeline_optimizer.run( 48 | "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm" 49 | ) 50 | 51 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 52 | -------------------------------------------------------------------------------- /examples/run_pipeline_optimizer_csv_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml import PipelineOptimizer 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader. 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor. 8 | """ 9 | 10 | # prepare data reader using csv file 11 | data_reader = CSVDataReader( 12 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 13 | has_header=False, 14 | contains_classes=True, 15 | ) 16 | 17 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms 18 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file 19 | # if log_output_file is not provided there is no file created 20 | # if log is False, logging is turned off 21 | pipeline_optimizer = PipelineOptimizer( 22 | data=data_reader, 23 | classifiers=[ 24 | "AdaBoost", 25 | "Bagging", 26 | "MultiLayerPerceptron", 27 | "RandomForest", 28 | "ExtremelyRandomizedTrees", 29 | "LinearSVC", 30 | ], 31 | feature_selection_algorithms=[ 32 | "SelectKBest", 33 | "SelectPercentile", 34 | "ParticleSwarmOptimization", 35 | "VarianceThreshold", 36 | ], 37 | feature_transform_algorithms=["Normalizer", "StandardScaler"], 38 | log=True, 39 | log_verbose=True, 40 | log_output_file="output.log", 41 | ) 42 | 43 | # runs the optimization process 44 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest 45 | # returns the best found pipeline 46 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm 47 | pipeline = pipeline_optimizer.run( 48 | "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm" 49 | ) 50 | 51 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 52 | -------------------------------------------------------------------------------- /examples/run_pipeline_optimizer_csv_data_categorical.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml import PipelineOptimizer 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader. 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor. 8 | We use a dataset with 1 categorical feature to demonstrate a use of PipelineOptimizer instance with automatic feature encoding. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) 14 | + "/example_files/dataset_categorical.csv", 15 | has_header=False, 16 | contains_classes=True, 17 | ) 18 | 19 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms 20 | # OneHotEncoder is used for encoding categorical features in this example 21 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file 22 | # if log_output_file is not provided there is no file created 23 | # if log is False, logging is turned off 24 | pipeline_optimizer = PipelineOptimizer( 25 | data=data_reader, 26 | classifiers=[ 27 | "AdaBoost", 28 | "Bagging", 29 | "MultiLayerPerceptron", 30 | "RandomForest", 31 | "ExtremelyRandomizedTrees", 32 | "LinearSVC", 33 | ], 34 | feature_selection_algorithms=[ 35 | "SelectKBest", 36 | "SelectPercentile", 37 | "ParticleSwarmOptimization", 38 | "VarianceThreshold", 39 | ], 40 | feature_transform_algorithms=["Normalizer", "StandardScaler"], 41 | categorical_features_encoder="OneHotEncoder", 42 | log=True, 43 | log_verbose=True, 44 | log_output_file="output.log", 45 | ) 46 | 47 | # runs the optimization process 48 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest 49 | # returns the best found pipeline 50 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm 51 | pipeline = pipeline_optimizer.run( 52 | "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm" 53 | ) 54 | 55 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 56 | -------------------------------------------------------------------------------- /examples/run_pipeline_optimizer_csv_data_missing.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml import PipelineOptimizer 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader. 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor. 8 | We use a dataset with 1 categorical feature and missing values to demonstrate a use of PipelineOptimizer instance with automatic feature encoding and imputation. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) 14 | + "/example_files/dataset_categorical_missing.csv", 15 | has_header=False, 16 | contains_classes=True, 17 | ) 18 | 19 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms 20 | # OneHotEncoder is used for encoding categorical features in this example 21 | # SimpleImputer is used for imputing missing values in this example 22 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file 23 | # if log_output_file is not provided there is no file created 24 | # if log is False, logging is turned off 25 | pipeline_optimizer = PipelineOptimizer( 26 | data=data_reader, 27 | classifiers=[ 28 | "AdaBoost", 29 | "Bagging", 30 | "MultiLayerPerceptron", 31 | "RandomForest", 32 | "ExtremelyRandomizedTrees", 33 | "LinearSVC", 34 | ], 35 | feature_selection_algorithms=[ 36 | "SelectKBest", 37 | "SelectPercentile", 38 | "ParticleSwarmOptimization", 39 | "VarianceThreshold", 40 | ], 41 | feature_transform_algorithms=["Normalizer", "StandardScaler"], 42 | categorical_features_encoder="OneHotEncoder", 43 | imputer="SimpleImputer", 44 | log=True, 45 | log_verbose=True, 46 | log_output_file="output.log", 47 | ) 48 | 49 | # runs the optimization process 50 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest 51 | # returns the best found pipeline 52 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm 53 | pipeline = pipeline_optimizer.run( 54 | "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm" 55 | ) 56 | 57 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 58 | -------------------------------------------------------------------------------- /examples/run_pipeline_optimizer_csv_data_v1.py: -------------------------------------------------------------------------------- 1 | import os 2 | from niaaml import PipelineOptimizer 3 | from niaaml.data import CSVDataReader 4 | 5 | """ 6 | This example presents how to use the PipelineOptimizer class to run the original optimization process according to the paper where NiaAML is proposed. 7 | This example is using an instance of CSVDataReader. 8 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor. 9 | """ 10 | 11 | # prepare data reader using csv file 12 | data_reader = CSVDataReader( 13 | src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv", 14 | has_header=False, 15 | contains_classes=True, 16 | ) 17 | 18 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms 19 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file 20 | # if log_output_file is not provided there is no file created 21 | # if log is False, logging is turned off 22 | pipeline_optimizer = PipelineOptimizer( 23 | data=data_reader, 24 | classifiers=[ 25 | "AdaBoost", 26 | "Bagging", 27 | "MultiLayerPerceptron", 28 | "RandomForest", 29 | "ExtremelyRandomizedTrees", 30 | "LinearSVC", 31 | ], 32 | feature_selection_algorithms=[ 33 | "SelectKBest", 34 | "SelectPercentile", 35 | "ParticleSwarmOptimization", 36 | "VarianceThreshold", 37 | ], 38 | feature_transform_algorithms=["Normalizer", "StandardScaler"], 39 | log=True, 40 | log_verbose=True, 41 | log_output_file="output.log", 42 | ) 43 | 44 | # runs the optimization process 45 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest 46 | # returns the best found pipeline 47 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm 48 | pipeline = pipeline_optimizer.run_v1("Accuracy", 10, 30, "ParticleSwarmAlgorithm") 49 | 50 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file 51 | -------------------------------------------------------------------------------- /niaaml/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml import classifiers 2 | from niaaml import data 3 | from niaaml import preprocessing 4 | from niaaml import fitness 5 | from niaaml.utilities import MinMax 6 | from niaaml.utilities import ParameterDefinition 7 | from niaaml.utilities import Factory 8 | from niaaml.utilities import OptimizationStats 9 | from niaaml.utilities import get_bin_index 10 | from niaaml.pipeline_optimizer import PipelineOptimizer 11 | from niaaml.pipeline import Pipeline 12 | from niaaml.pipeline_component import PipelineComponent 13 | from niaaml.logger import Logger 14 | from niaaml import cli 15 | 16 | __all__ = [ 17 | "classifiers", 18 | "data", 19 | "preprocessing", 20 | "fitness", 21 | "get_bin_index", 22 | "MinMax", 23 | "ParameterDefinition", 24 | "OptimizationStats", 25 | "Factory", 26 | "PipelineOptimizer", 27 | "Pipeline", 28 | "PipelineComponent", 29 | "Logger", 30 | "cli", 31 | ] 32 | 33 | __project__ = "niaaml" 34 | __version__ = "2.1.2" 35 | -------------------------------------------------------------------------------- /niaaml/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.classifiers.random_forest import RandomForest 3 | from niaaml.classifiers.multi_layer_perceptron import MultiLayerPerceptron 4 | from niaaml.classifiers.linear_svc import LinearSVC 5 | from niaaml.classifiers.ada_boost import AdaBoost 6 | from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees 7 | from niaaml.classifiers.bagging import Bagging 8 | from niaaml.classifiers.decision_tree import DecisionTree 9 | from niaaml.classifiers.regression_decision_tree import DecisionTreeRegression 10 | from niaaml.classifiers.k_neighbors import KNeighbors 11 | from niaaml.classifiers.gaussian_process import GaussianProcess 12 | from niaaml.classifiers.regression_gaussian_process import GaussianProcessRegression 13 | from niaaml.classifiers.gaussian_naive_bayes import GaussianNB 14 | from niaaml.classifiers.quadratic_driscriminant_analysis import ( 15 | QuadraticDiscriminantAnalysis, 16 | ) 17 | from niaaml.classifiers.regression_linear_model import LinearRegression 18 | from niaaml.classifiers.regression_ridge import RidgeRegression 19 | from niaaml.classifiers.regression_lasso import LassoRegression 20 | from niaaml.classifiers.utility import ClassifierFactory 21 | 22 | __all__ = [ 23 | "Classifier", 24 | "RandomForest", 25 | "MultiLayerPerceptron", 26 | "LinearSVC", 27 | "AdaBoost", 28 | "Bagging", 29 | "ExtremelyRandomizedTrees", 30 | "DecisionTree", 31 | "DecisionTreeRegression", 32 | "KNeighbors", 33 | "GaussianProcess", 34 | "GaussianProcessRegression", 35 | "GaussianNB", 36 | "QuadraticDiscriminantAnalysis", 37 | "ClassifierFactory", 38 | "LinearRegression", 39 | "RidgeRegression", 40 | "LassoRegression", 41 | ] 42 | -------------------------------------------------------------------------------- /niaaml/classifiers/ada_boost.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import MinMax 3 | from niaaml.utilities import ParameterDefinition 4 | from sklearn.ensemble import AdaBoostClassifier 5 | import numpy as np 6 | 7 | import warnings 8 | from sklearn.exceptions import ( 9 | ConvergenceWarning, 10 | DataConversionWarning, 11 | DataDimensionalityWarning, 12 | EfficiencyWarning, 13 | FitFailedWarning, 14 | UndefinedMetricWarning, 15 | ) 16 | 17 | __all__ = ["AdaBoost"] 18 | 19 | 20 | class AdaBoost(Classifier): 21 | r"""Implementation of AdaBoost classifier. 22 | 23 | Date: 24 | 2020 25 | 26 | Author: 27 | Luka Pečnik 28 | 29 | License: 30 | MIT 31 | 32 | Reference: 33 | Y. Freund, R. Schapire, “A Decision-Theoretic Generalization of on-Line Learning and an Application to Boosting”, 1995. 34 | 35 | Documentation: 36 | https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html 37 | 38 | See Also: 39 | * :class:`niaaml.classifiers.Classifier` 40 | """ 41 | Name = "AdaBoost" 42 | 43 | def __init__(self, **kwargs): 44 | r"""Initialize AdaBoost instance.""" 45 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 46 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 47 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 48 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 49 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 50 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 51 | 52 | self._params = dict( 53 | n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint), 54 | algorithm=ParameterDefinition(["SAMME"]), 55 | ) 56 | self.__ada_boost = AdaBoostClassifier(algorithm='SAMME') 57 | 58 | def set_parameters(self, **kwargs): 59 | r"""Set the parameters/arguments of the algorithm.""" 60 | self.__ada_boost.set_params(**kwargs) 61 | 62 | def fit(self, x, y, **kwargs): 63 | r"""Fit AdaBoost. 64 | 65 | Arguments: 66 | x (pandas.core.frame.DataFrame): n samples to classify. 67 | y (pandas.core.series.Series): n classes of the samples in the x array. 68 | """ 69 | self.__ada_boost.fit(x, y) 70 | 71 | def predict(self, x, **kwargs): 72 | r"""Predict class for each sample (row) in x. 73 | 74 | Arguments: 75 | x (pandas.core.frame.DataFrame): n samples to classify. 76 | 77 | Returns: 78 | pandas.core.series.Series: n predicted classes. 79 | """ 80 | return self.__ada_boost.predict(x) 81 | 82 | def to_string(self): 83 | r"""User friendly representation of the object. 84 | 85 | Returns: 86 | str: User friendly representation of the object. 87 | """ 88 | return Classifier.to_string(self).format( 89 | name=self.Name, 90 | args=self._parameters_to_string(self.__ada_boost.get_params()), 91 | ) 92 | -------------------------------------------------------------------------------- /niaaml/classifiers/bagging.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import MinMax 3 | from niaaml.utilities import ParameterDefinition 4 | from sklearn.ensemble import BaggingClassifier 5 | import numpy as np 6 | 7 | import warnings 8 | from sklearn.exceptions import ( 9 | ConvergenceWarning, 10 | DataConversionWarning, 11 | DataDimensionalityWarning, 12 | EfficiencyWarning, 13 | FitFailedWarning, 14 | UndefinedMetricWarning, 15 | ) 16 | 17 | __all__ = ["Bagging"] 18 | 19 | 20 | class Bagging(Classifier): 21 | r"""Implementation of bagging classifier. 22 | 23 | Date: 24 | 2020 25 | 26 | Author: 27 | Luka Pečnik 28 | 29 | License: 30 | MIT 31 | 32 | Reference: 33 | L. Breiman, “Bagging predictors”, Machine Learning, 24(2), 123-140, 1996. 34 | 35 | Documentation: 36 | https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html 37 | 38 | See Also: 39 | * :class:`niaaml.classifiers.Classifier` 40 | """ 41 | Name = "Bagging" 42 | 43 | def __init__(self, **kwargs): 44 | r"""Initialize Bagging instance.""" 45 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 46 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 47 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 48 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 49 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 50 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 51 | 52 | self._params = dict( 53 | n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint), 54 | bootstrap=ParameterDefinition([True, False]), 55 | bootstrap_features=ParameterDefinition([True, False]), 56 | ) 57 | self.__bagging_classifier = BaggingClassifier() 58 | 59 | def set_parameters(self, **kwargs): 60 | r"""Set the parameters/arguments of the algorithm.""" 61 | self.__bagging_classifier.set_params(**kwargs) 62 | 63 | def fit(self, x, y, **kwargs): 64 | r"""Fit Bagging. 65 | 66 | Arguments: 67 | x (pandas.core.frame.DataFrame): n samples to classify. 68 | y (pandas.core.series.Series): n classes of the samples in the x array. 69 | 70 | Returns: 71 | None 72 | """ 73 | self.__bagging_classifier.fit(x, y) 74 | 75 | def predict(self, x, **kwargs): 76 | r"""Predict class for each sample (row) in x. 77 | 78 | Arguments: 79 | x (pandas.core.frame.DataFrame): n samples to classify. 80 | 81 | Returns: 82 | pandas.core.series.Series: n predicted classes. 83 | """ 84 | return self.__bagging_classifier.predict(x) 85 | 86 | def to_string(self): 87 | r"""User friendly representation of the object. 88 | 89 | Returns: 90 | str: User friendly representation of the object. 91 | """ 92 | return Classifier.to_string(self).format( 93 | name=self.Name, 94 | args=self._parameters_to_string(self.__bagging_classifier.get_params()), 95 | ) 96 | -------------------------------------------------------------------------------- /niaaml/classifiers/classifier.py: -------------------------------------------------------------------------------- 1 | from niaaml.pipeline_component import PipelineComponent 2 | 3 | __all__ = ["Classifier"] 4 | 5 | 6 | class Classifier(PipelineComponent): 7 | r"""Class for implementing classifiers. 8 | 9 | Date: 10 | 2020 11 | 12 | Author: 13 | Luka Pečnik 14 | 15 | License: 16 | MIT 17 | 18 | See Also: 19 | * :class:`niaaml.pipeline_component.PipelineComponent` 20 | """ 21 | 22 | def fit(self, x, y, **kwargs): 23 | r"""Fit implemented classifier. 24 | 25 | Arguments: 26 | x (pandas.core.frame.DataFrame): n samples to classify. 27 | y (pandas.core.series.Series): n classes of the samples in the x array. 28 | """ 29 | return 30 | 31 | def predict(self, x, **kwargs): 32 | r"""Predict class for each sample (row) in x. 33 | 34 | Arguments: 35 | x (pandas.core.frame.DataFrame): n samples to classify. 36 | 37 | Returns: 38 | pandas.core.series.Series: n predicted classes. 39 | """ 40 | return 41 | -------------------------------------------------------------------------------- /niaaml/classifiers/decision_tree.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import ParameterDefinition 3 | from sklearn.tree import DecisionTreeClassifier as DTC 4 | 5 | import warnings 6 | from sklearn.exceptions import ( 7 | ConvergenceWarning, 8 | DataConversionWarning, 9 | DataDimensionalityWarning, 10 | EfficiencyWarning, 11 | FitFailedWarning, 12 | UndefinedMetricWarning, 13 | ) 14 | 15 | __all__ = ["DecisionTree"] 16 | 17 | 18 | class DecisionTree(Classifier): 19 | r"""Implementation of decision tree classifier. 20 | 21 | Date: 22 | 2020 23 | 24 | Author: 25 | Luka Pečnik 26 | 27 | License: 28 | MIT 29 | 30 | Reference: 31 | L. Breiman, J. Friedman, R. Olshen, and C. Stone, “Classification and Regression Trees”, Wadsworth, Belmont, CA, 1984. 32 | 33 | Documentation: 34 | https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier 35 | 36 | See Also: 37 | * :class:`niaaml.classifiers.Classifier` 38 | """ 39 | Name = "Decision Tree Classifier" 40 | 41 | def __init__(self, **kwargs): 42 | r"""Initialize DecisionTree instance.""" 43 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 44 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 45 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 46 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 47 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 48 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 49 | 50 | self._params = dict( 51 | criterion=ParameterDefinition(["gini", "entropy"]), 52 | splitter=ParameterDefinition(["best", "random"]), 53 | ) 54 | self.__decision_tree_classifier = DTC() 55 | 56 | def set_parameters(self, **kwargs): 57 | r"""Set the parameters/arguments of the algorithm.""" 58 | self.__decision_tree_classifier.set_params(**kwargs) 59 | 60 | def fit(self, x, y, **kwargs): 61 | r"""Fit DecisionTree. 62 | 63 | Arguments: 64 | x (pandas.core.frame.DataFrame): n samples to classify. 65 | y (pandas.core.series.Series): n classes of the samples in the x array. 66 | 67 | Returns: 68 | None 69 | """ 70 | self.__decision_tree_classifier.fit(x, y) 71 | 72 | def predict(self, x, **kwargs): 73 | r"""Predict class for each sample (row) in x. 74 | 75 | Arguments: 76 | x (pandas.core.frame.DataFrame): n samples to classify. 77 | 78 | Returns: 79 | pandas.core.series.Series: n predicted classes. 80 | """ 81 | return self.__decision_tree_classifier.predict(x) 82 | 83 | def to_string(self): 84 | r"""User friendly representation of the object. 85 | 86 | Returns: 87 | str: User friendly representation of the object. 88 | """ 89 | return Classifier.to_string(self).format( 90 | name=self.Name, 91 | args=self._parameters_to_string( 92 | self.__decision_tree_classifier.get_params() 93 | ), 94 | ) 95 | -------------------------------------------------------------------------------- /niaaml/classifiers/gaussian_naive_bayes.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from sklearn.naive_bayes import GaussianNB as GNB 3 | 4 | import warnings 5 | from sklearn.exceptions import ( 6 | ConvergenceWarning, 7 | DataConversionWarning, 8 | DataDimensionalityWarning, 9 | EfficiencyWarning, 10 | FitFailedWarning, 11 | UndefinedMetricWarning, 12 | ) 13 | 14 | __all__ = ["GaussianNB"] 15 | 16 | 17 | class GaussianNB(Classifier): 18 | r"""Implementation of gaussian Naive Bayes classifier. 19 | 20 | Date: 21 | 2020 22 | 23 | Author: 24 | Luka Pečnik 25 | 26 | License: 27 | MIT 28 | 29 | Reference: 30 | Murphy, Kevin P. "Naive bayes classifiers." University of British Columbia 18 (2006): 60. 31 | 32 | Documentation: 33 | https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html 34 | 35 | See Also: 36 | * :class:`niaaml.classifiers.Classifier` 37 | """ 38 | Name = "Gaussian Naive Bayes" 39 | 40 | def __init__(self, **kwargs): 41 | r"""Initialize GaussianNB instance.""" 42 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 43 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 44 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 45 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 46 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 47 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 48 | 49 | self.__gaussian_nb = GNB() 50 | super(GaussianNB, self).__init__() 51 | 52 | def set_parameters(self, **kwargs): 53 | r"""Set the parameters/arguments of the algorithm.""" 54 | self.__gaussian_nb.set_params(**kwargs) 55 | 56 | def fit(self, x, y, **kwargs): 57 | r"""Fit GaussianNB. 58 | 59 | Arguments: 60 | x (pandas.core.frame.DataFrame): n samples to classify. 61 | y (pandas.core.series.Series): n classes of the samples in the x array. 62 | 63 | Returns: 64 | None 65 | """ 66 | self.__gaussian_nb.fit(x, y) 67 | 68 | def predict(self, x, **kwargs): 69 | r"""Predict class for each sample (row) in x. 70 | 71 | Arguments: 72 | x (pandas.core.frame.DataFrame): n samples to classify. 73 | 74 | Returns: 75 | pandas.core.series.Series: n predicted classes. 76 | """ 77 | return self.__gaussian_nb.predict(x) 78 | 79 | def to_string(self): 80 | r"""User friendly representation of the object. 81 | 82 | Returns: 83 | str: User friendly representation of the object. 84 | """ 85 | return Classifier.to_string(self).format( 86 | name=self.Name, 87 | args=self._parameters_to_string(self.__gaussian_nb.get_params()), 88 | ) 89 | -------------------------------------------------------------------------------- /niaaml/classifiers/k_neighbors.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import ParameterDefinition 3 | from sklearn.neighbors import KNeighborsClassifier as KNC 4 | 5 | import warnings 6 | from sklearn.exceptions import ( 7 | ConvergenceWarning, 8 | DataConversionWarning, 9 | DataDimensionalityWarning, 10 | EfficiencyWarning, 11 | FitFailedWarning, 12 | UndefinedMetricWarning, 13 | ) 14 | 15 | __all__ = ["KNeighbors"] 16 | 17 | 18 | class KNeighbors(Classifier): 19 | r"""Implementation of k neighbors classifier. 20 | 21 | Date: 22 | 2020 23 | 24 | Author: 25 | Luka Pečnik 26 | 27 | License: 28 | MIT 29 | 30 | Reference: 31 | “Neighbourhood Components Analysis”, J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520. 32 | 33 | Documentation: 34 | https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html 35 | 36 | See Also: 37 | * :class:`niaaml.classifiers.Classifier` 38 | """ 39 | Name = "K Neighbors Classifier" 40 | 41 | def __init__(self, **kwargs): 42 | r"""Initialize KNeighbors instance.""" 43 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 44 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 45 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 46 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 47 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 48 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 49 | 50 | self._params = dict( 51 | weights=ParameterDefinition(["uniform", "distance"]), 52 | algorithm=ParameterDefinition(["auto", "ball_tree", "kd_tree", "brute"]), 53 | ) 54 | self.__kn_classifier = KNC() 55 | 56 | def set_parameters(self, **kwargs): 57 | r"""Set the parameters/arguments of the algorithm.""" 58 | self.__kn_classifier.set_params(**kwargs) 59 | 60 | def fit(self, x, y, **kwargs): 61 | r"""Fit KNeighbors. 62 | 63 | Arguments: 64 | x (pandas.core.frame.DataFrame): n samples to classify. 65 | y (pandas.core.series.Series): n classes of the samples in the x array. 66 | 67 | Returns: 68 | None 69 | """ 70 | self.__kn_classifier.fit(x, y) 71 | 72 | def predict(self, x, **kwargs): 73 | r"""Predict class for each sample (row) in x. 74 | 75 | Arguments: 76 | x (pandas.core.frame.DataFrame): n samples to classify. 77 | 78 | Returns: 79 | pandas.core.series.Series: n predicted classes. 80 | """ 81 | return self.__kn_classifier.predict(x) 82 | 83 | def to_string(self): 84 | r"""User friendly representation of the object. 85 | 86 | Returns: 87 | str: User friendly representation of the object. 88 | """ 89 | return Classifier.to_string(self).format( 90 | name=self.Name, 91 | args=self._parameters_to_string(self.__kn_classifier.get_params()), 92 | ) 93 | -------------------------------------------------------------------------------- /niaaml/classifiers/linear_svc.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import MinMax 3 | from niaaml.utilities import ParameterDefinition 4 | from sklearn.svm import LinearSVC as LSVC 5 | import numpy as np 6 | 7 | import warnings 8 | from sklearn.exceptions import ( 9 | ConvergenceWarning, 10 | DataConversionWarning, 11 | DataDimensionalityWarning, 12 | EfficiencyWarning, 13 | FitFailedWarning, 14 | UndefinedMetricWarning, 15 | ) 16 | 17 | __all__ = ["LinearSVC"] 18 | 19 | 20 | class LinearSVC(Classifier): 21 | r"""Implementation of linear support vector classification. 22 | 23 | Date: 24 | 2020 25 | 26 | Author: 27 | Luka Pečnik 28 | 29 | License: 30 | MIT 31 | 32 | Reference: 33 | Fan, Rong-En, et al. "LIBLINEAR: A library for large linear classification." Journal of machine learning research 9.Aug (2008): 1871-1874. 34 | 35 | Documentation: 36 | https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html 37 | 38 | See Also: 39 | * :class:`niaaml.classifiers.Classifier` 40 | """ 41 | Name = "Linear Support Vector Classification" 42 | 43 | def __init__(self, **kwargs): 44 | r"""Initialize LinearSVCClassifier instance.""" 45 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 46 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 47 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 48 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 49 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 50 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 51 | 52 | self._params = dict( 53 | penalty=ParameterDefinition(["l1", "l2"]), 54 | max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint), 55 | ) 56 | self.__linear_SVC = LSVC(dual=True) 57 | 58 | def set_parameters(self, **kwargs): 59 | r"""Set the parameters/arguments of the algorithm.""" 60 | self.__linear_SVC.set_params(**kwargs) 61 | 62 | def fit(self, x, y, **kwargs): 63 | r"""Fit LinearSVCClassifier. 64 | 65 | Arguments: 66 | x (pandas.core.frame.DataFrame): n samples to classify. 67 | y (pandas.core.series.Series): n classes of the samples in the x array. 68 | 69 | Returns: 70 | None 71 | """ 72 | self.__linear_SVC.fit(x, y) 73 | 74 | def predict(self, x, **kwargs): 75 | r"""Predict class for each sample (row) in x. 76 | 77 | Arguments: 78 | x (pandas.core.frame.DataFrame): n samples to classify. 79 | 80 | Returns: 81 | pandas.core.series.Series: n predicted classes. 82 | """ 83 | return self.__linear_SVC.predict(x) 84 | 85 | def to_string(self): 86 | r"""User friendly representation of the object. 87 | 88 | Returns: 89 | str: User friendly representation of the object. 90 | """ 91 | return Classifier.to_string(self).format( 92 | name=self.Name, 93 | args=self._parameters_to_string(self.__linear_SVC.get_params()), 94 | ) 95 | -------------------------------------------------------------------------------- /niaaml/classifiers/quadratic_driscriminant_analysis.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA 3 | 4 | import warnings 5 | from sklearn.exceptions import ( 6 | ConvergenceWarning, 7 | DataConversionWarning, 8 | DataDimensionalityWarning, 9 | EfficiencyWarning, 10 | FitFailedWarning, 11 | UndefinedMetricWarning, 12 | ) 13 | 14 | __all__ = ["QuadraticDiscriminantAnalysis"] 15 | 16 | 17 | class QuadraticDiscriminantAnalysis(Classifier): 18 | r"""Implementation of quadratic discriminant analysis classifier. 19 | 20 | Date: 21 | 2020 22 | 23 | Author: 24 | Luka Pečnik 25 | 26 | License: 27 | MIT 28 | 29 | Reference: 30 | “The Elements of Statistical Learning”, Hastie T., Tibshirani R., Friedman J., Section 4.3, p.106-119, 2008. 31 | 32 | Documentation: 33 | https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.html#sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis 34 | 35 | See Also: 36 | * :class:`niaaml.classifiers.Classifier` 37 | """ 38 | Name = "Quadratic Discriminant Analysis" 39 | 40 | def __init__(self, **kwargs): 41 | r"""Initialize QuadraticDiscriminantAnalysis instance.""" 42 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 43 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 44 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 45 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 46 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 47 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 48 | 49 | self.__qda = QDA() 50 | super(QuadraticDiscriminantAnalysis, self).__init__() 51 | 52 | def set_parameters(self, **kwargs): 53 | r"""Set the parameters/arguments of the algorithm.""" 54 | self.__qda.set_params(**kwargs) 55 | 56 | def fit(self, x, y, **kwargs): 57 | r"""Fit QuadraticDiscriminantAnalysis. 58 | 59 | Arguments: 60 | x (pandas.core.frame.DataFrame): n samples to classify. 61 | y (pandas.core.series.Series): n classes of the samples in the x array. 62 | 63 | Returns: 64 | None 65 | """ 66 | self.__qda.fit(x, y) 67 | 68 | def predict(self, x, **kwargs): 69 | r"""Predict class for each sample (row) in x. 70 | 71 | Arguments: 72 | x (pandas.core.frame.DataFrame): n samples to classify. 73 | 74 | Returns: 75 | pandas.core.series.Series: n predicted classes. 76 | """ 77 | return self.__qda.predict(x) 78 | 79 | def to_string(self): 80 | r"""User friendly representation of the object. 81 | 82 | Returns: 83 | str: User friendly representation of the object. 84 | """ 85 | return Classifier.to_string(self).format( 86 | name=self.Name, args=self._parameters_to_string(self.__qda.get_params()) 87 | ) 88 | -------------------------------------------------------------------------------- /niaaml/classifiers/random_forest.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import MinMax 3 | from niaaml.utilities import ParameterDefinition 4 | from sklearn.ensemble import RandomForestClassifier as RF 5 | import numpy as np 6 | 7 | import warnings 8 | from sklearn.exceptions import ( 9 | ConvergenceWarning, 10 | DataConversionWarning, 11 | DataDimensionalityWarning, 12 | EfficiencyWarning, 13 | FitFailedWarning, 14 | UndefinedMetricWarning, 15 | ) 16 | 17 | __all__ = ["RandomForest"] 18 | 19 | 20 | class RandomForest(Classifier): 21 | r"""Implementation of random forest classifier. 22 | 23 | Date: 24 | 2020 25 | 26 | Author: 27 | Luka Pečnik 28 | 29 | License: 30 | MIT 31 | 32 | Reference: 33 | Breiman, “Random Forests”, Machine Learning, 45(1), 5-32, 2001. 34 | 35 | Documentation: 36 | https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html 37 | 38 | See Also: 39 | * :class:`niaaml.classifiers.Classifier` 40 | """ 41 | Name = "Random Forest Classifier" 42 | 43 | def __init__(self, **kwargs): 44 | r"""Initialize RandomForestClassifier instance.""" 45 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 46 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 47 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 48 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 49 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 50 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 51 | 52 | self._params = dict( 53 | n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint) 54 | ) 55 | self.__random_forest_classifier = RF() 56 | 57 | def set_parameters(self, **kwargs): 58 | r"""Set the parameters/arguments of the algorithm.""" 59 | self.__random_forest_classifier.set_params(**kwargs) 60 | 61 | def fit(self, x, y, **kwargs): 62 | r"""Fit RandomForestClassifier. 63 | 64 | Arguments: 65 | x (pandas.core.frame.DataFrame): n samples to classify. 66 | y (pandas.core.series.Series): n classes of the samples in the x array. 67 | 68 | Returns: 69 | None 70 | """ 71 | self.__random_forest_classifier.fit(x, y) 72 | 73 | def predict(self, x, **kwargs): 74 | r"""Predict class for each sample (row) in x. 75 | 76 | Arguments: 77 | x (pandas.core.frame.DataFrame): n samples to classify. 78 | 79 | Returns: 80 | pandas.core.series.Series: n predicted classes. 81 | """ 82 | return self.__random_forest_classifier.predict(x) 83 | 84 | def to_string(self): 85 | r"""User friendly representation of the object. 86 | 87 | Returns: 88 | str: User friendly representation of the object. 89 | """ 90 | return Classifier.to_string(self).format( 91 | name=self.Name, 92 | args=self._parameters_to_string( 93 | self.__random_forest_classifier.get_params() 94 | ), 95 | ) 96 | -------------------------------------------------------------------------------- /niaaml/classifiers/regression_decision_tree.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import ParameterDefinition 3 | from sklearn.tree import DecisionTreeRegressor as DTR 4 | 5 | import warnings 6 | from sklearn.exceptions import ( 7 | ConvergenceWarning, 8 | DataConversionWarning, 9 | DataDimensionalityWarning, 10 | EfficiencyWarning, 11 | FitFailedWarning, 12 | UndefinedMetricWarning, 13 | ) 14 | 15 | __all__ = ["DecisionTreeRegression"] 16 | 17 | 18 | class DecisionTreeRegression(Classifier): 19 | r"""Implementation of decision tree regression. 20 | 21 | Date: 22 | 2024 23 | 24 | Author: 25 | Laurenz Farthofer 26 | 27 | License: 28 | MIT 29 | 30 | Documentation: 31 | https://scikit-learn.org/stable/modules/tree.html#regression 32 | 33 | See Also: 34 | * :class:`niaaml.classifiers.Classifier` 35 | """ 36 | Name = "Decision Tree Regression" 37 | 38 | def __init__(self, **kwargs): 39 | r"""Initialize DecisionTree instance.""" 40 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 41 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 42 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 43 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 44 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 45 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 46 | 47 | self._params = dict( 48 | criterion=ParameterDefinition(["squared_error", "friedman_mse", "absolute_error", "poisson"]), 49 | splitter=ParameterDefinition(["best", "random"]), 50 | ) 51 | self.__decision_tree_regression = DTR() 52 | 53 | def set_parameters(self, **kwargs): 54 | r"""Set the parameters/arguments of the algorithm.""" 55 | self.__decision_tree_regression.set_params(**kwargs) 56 | 57 | def fit(self, x, y, **kwargs): 58 | r"""Fit DecisionTree. 59 | 60 | Arguments: 61 | x (pandas.core.frame.DataFrame): n samples to classify. 62 | y (pandas.core.series.Series): n classes of the samples in the x array. 63 | 64 | Returns: 65 | None 66 | """ 67 | self.__decision_tree_regression.fit(x, y) 68 | 69 | def predict(self, x, **kwargs): 70 | r"""Predict class for each sample (row) in x. 71 | 72 | Arguments: 73 | x (pandas.core.frame.DataFrame): n samples to classify. 74 | 75 | Returns: 76 | pandas.core.series.Series: n predicted classes. 77 | """ 78 | return self.__decision_tree_regression.predict(x) 79 | 80 | def to_string(self): 81 | r"""User friendly representation of the object. 82 | 83 | Returns: 84 | str: User friendly representation of the object. 85 | """ 86 | return Classifier.to_string(self).format( 87 | name=self.Name, 88 | args=self._parameters_to_string( 89 | self.__decision_tree_regression.get_params() 90 | ), 91 | ) 92 | -------------------------------------------------------------------------------- /niaaml/classifiers/regression_gaussian_process.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from niaaml.utilities import MinMax 3 | from niaaml.utilities import ParameterDefinition 4 | from sklearn.gaussian_process import GaussianProcessRegressor as GPR 5 | import numpy as np 6 | 7 | import warnings 8 | from sklearn.exceptions import ( 9 | ConvergenceWarning, 10 | DataConversionWarning, 11 | DataDimensionalityWarning, 12 | EfficiencyWarning, 13 | FitFailedWarning, 14 | UndefinedMetricWarning, 15 | ) 16 | 17 | __all__ = ["GaussianProcessRegression"] 18 | 19 | 20 | class GaussianProcessRegression(Classifier): 21 | r"""Implementation of gaussian process regression. 22 | 23 | Date: 24 | 2024 25 | 26 | Author: 27 | Laurenz Farthofer 28 | 29 | License: 30 | MIT 31 | 32 | Documentation: 33 | https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html#sklearn.gaussian_process.GaussianProcessRegressor 34 | 35 | See Also: 36 | * :class:`niaaml.classifiers.Classifier` 37 | """ 38 | Name = "Gaussian Process Regression" 39 | 40 | def __init__(self, **kwargs): 41 | r"""Initialize GaussianProcess instance.""" 42 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 43 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 44 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 45 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 46 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 47 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 48 | 49 | self._params = dict() 50 | self.__gaussian_process = GPR() 51 | 52 | def set_parameters(self, **kwargs): 53 | r"""Set the parameters/arguments of the algorithm.""" 54 | self.__gaussian_process.set_params(**kwargs) 55 | 56 | def fit(self, x, y, **kwargs): 57 | r"""Fit GaussianProcess. 58 | 59 | Arguments: 60 | x (pandas.core.frame.DataFrame): n samples to classify. 61 | y (pandas.core.series.Series): n classes of the samples in the x array. 62 | 63 | Returns: 64 | None 65 | """ 66 | self.__gaussian_process.fit(x, y) 67 | 68 | def predict(self, x, **kwargs): 69 | r"""Predict class for each sample (row) in x. 70 | 71 | Arguments: 72 | x (pandas.core.frame.DataFrame): n samples to classify. 73 | 74 | Returns: 75 | pandas.core.series.Series: n predicted classes. 76 | """ 77 | return self.__gaussian_process.predict(x) 78 | 79 | def to_string(self): 80 | r"""User friendly representation of the object. 81 | 82 | Returns: 83 | str: User friendly representation of the object. 84 | """ 85 | return Classifier.to_string(self).format( 86 | name=self.Name, 87 | args=self._parameters_to_string(self.__gaussian_process.get_params()), 88 | ) 89 | -------------------------------------------------------------------------------- /niaaml/classifiers/regression_lasso.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from niaaml.classifiers.classifier import Classifier 3 | from sklearn.linear_model import Lasso as LR 4 | 5 | import warnings 6 | from sklearn.exceptions import ( 7 | ConvergenceWarning, 8 | DataConversionWarning, 9 | DataDimensionalityWarning, 10 | EfficiencyWarning, 11 | FitFailedWarning, 12 | UndefinedMetricWarning, 13 | ) 14 | 15 | from niaaml.utilities import MinMax, ParameterDefinition 16 | 17 | __all__ = ["LassoRegression"] 18 | 19 | 20 | class LassoRegression(Classifier): 21 | r"""Implementation of linear lasso regression. 22 | 23 | Date: 24 | 2024 25 | 26 | Author: 27 | Laurenz Farthofer 28 | 29 | License: 30 | MIT 31 | 32 | Documentation: 33 | https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso 34 | 35 | See Also: 36 | * :class:`niaaml.classifiers.Classifier` 37 | """ 38 | Name = "Lasso Regression" 39 | Task = "Regression" 40 | 41 | def __init__(self, **kwargs): 42 | r"""Initialize LinearRegression instance.""" 43 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 44 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 45 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 46 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 47 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 48 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 49 | 50 | self.model = LR() 51 | 52 | self._params = dict( 53 | alpha=ParameterDefinition(MinMax(min=0.0, max=10e6), np.float64), 54 | fit_intercept=ParameterDefinition([True, False]), 55 | max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint), 56 | ) 57 | 58 | def set_parameters(self, **kwargs): 59 | r"""Set the parameters/arguments of the algorithm.""" 60 | self.model.set_params(**kwargs) 61 | 62 | def fit(self, x, y, **kwargs): 63 | r"""Fit LinearSVCClassifier. 64 | 65 | Arguments: 66 | x (pandas.core.frame.DataFrame): n samples to classify. 67 | y (pandas.core.series.Series): n classes of the samples in the x array. 68 | 69 | Returns: 70 | None 71 | """ 72 | self.model.fit(x, y) 73 | 74 | def predict(self, x, **kwargs): 75 | r"""Predict class for each sample (row) in x. 76 | 77 | Arguments: 78 | x (pandas.core.frame.DataFrame): n samples to classify. 79 | 80 | Returns: 81 | pandas.core.series.Series: n predicted classes. 82 | """ 83 | return self.model.predict(x) 84 | 85 | def to_string(self): 86 | r"""User friendly representation of the object. 87 | 88 | Returns: 89 | str: User friendly representation of the object. 90 | """ 91 | return Classifier.to_string(self).format( 92 | name=self.Name, 93 | args=self._parameters_to_string(self.__gaussian_process.get_params()), 94 | ) 95 | -------------------------------------------------------------------------------- /niaaml/classifiers/regression_linear_model.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.classifier import Classifier 2 | from sklearn.linear_model import LinearRegression as LR 3 | 4 | import warnings 5 | from sklearn.exceptions import ( 6 | ConvergenceWarning, 7 | DataConversionWarning, 8 | DataDimensionalityWarning, 9 | EfficiencyWarning, 10 | FitFailedWarning, 11 | UndefinedMetricWarning, 12 | ) 13 | 14 | from niaaml.utilities import ParameterDefinition 15 | 16 | __all__ = ["LinearRegression"] 17 | 18 | 19 | class LinearRegression(Classifier): 20 | r"""Implementation of linear regression. 21 | 22 | Date: 23 | 2024 24 | 25 | Author: 26 | Laurenz Farthofer 27 | 28 | License: 29 | MIT 30 | 31 | Documentation: 32 | https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression 33 | 34 | See Also: 35 | * :class:`niaaml.classifiers.Classifier` 36 | """ 37 | Name = "Linear Regression" 38 | Task = "Regression" 39 | 40 | def __init__(self, **kwargs): 41 | r"""Initialize LinearRegression instance.""" 42 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 43 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 44 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 45 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 46 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 47 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 48 | 49 | self.model = LR() 50 | 51 | self._params = dict( 52 | fit_intercept=ParameterDefinition([True, False]), 53 | ) 54 | 55 | def set_parameters(self, **kwargs): 56 | r"""Set the parameters/arguments of the algorithm.""" 57 | self.model.set_params(**kwargs) 58 | 59 | def fit(self, x, y, **kwargs): 60 | r"""Fit LinearSVCClassifier. 61 | 62 | Arguments: 63 | x (pandas.core.frame.DataFrame): n samples to classify. 64 | y (pandas.core.series.Series): n classes of the samples in the x array. 65 | 66 | Returns: 67 | None 68 | """ 69 | self.model.fit(x, y) 70 | 71 | def predict(self, x, **kwargs): 72 | r"""Predict class for each sample (row) in x. 73 | 74 | Arguments: 75 | x (pandas.core.frame.DataFrame): n samples to classify. 76 | 77 | Returns: 78 | pandas.core.series.Series: n predicted classes. 79 | """ 80 | return self.model.predict(x) 81 | 82 | def to_string(self): 83 | r"""User friendly representation of the object. 84 | 85 | Returns: 86 | str: User friendly representation of the object. 87 | """ 88 | return Classifier.to_string(self).format( 89 | name=self.Name, 90 | args=self._parameters_to_string(self.__gaussian_process.get_params()), 91 | ) 92 | -------------------------------------------------------------------------------- /niaaml/classifiers/regression_ridge.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from niaaml.classifiers.classifier import Classifier 3 | from sklearn.linear_model import Ridge as LR 4 | 5 | import warnings 6 | from sklearn.exceptions import ( 7 | ConvergenceWarning, 8 | DataConversionWarning, 9 | DataDimensionalityWarning, 10 | EfficiencyWarning, 11 | FitFailedWarning, 12 | UndefinedMetricWarning, 13 | ) 14 | 15 | from niaaml.utilities import MinMax, ParameterDefinition 16 | 17 | __all__ = ["RidgeRegression"] 18 | 19 | 20 | class RidgeRegression(Classifier): 21 | r"""Implementation of linear ridge regression. 22 | 23 | Date: 24 | 2024 25 | 26 | Author: 27 | Laurenz Farthofer 28 | 29 | License: 30 | MIT 31 | 32 | Documentation: 33 | https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge 34 | 35 | See Also: 36 | * :class:`niaaml.classifiers.Classifier` 37 | """ 38 | Name = "Ridge Regression" 39 | Task = "Regression" 40 | 41 | def __init__(self, **kwargs): 42 | r"""Initialize LinearRegression instance.""" 43 | warnings.filterwarnings(action="ignore", category=ConvergenceWarning) 44 | warnings.filterwarnings(action="ignore", category=DataConversionWarning) 45 | warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning) 46 | warnings.filterwarnings(action="ignore", category=EfficiencyWarning) 47 | warnings.filterwarnings(action="ignore", category=FitFailedWarning) 48 | warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning) 49 | 50 | self.model = LR() 51 | 52 | self._params = dict( 53 | alpha=ParameterDefinition(MinMax(min=0.0, max=100000.0), np.float64), 54 | fit_intercept=ParameterDefinition([True, False]), 55 | max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint), 56 | ) 57 | 58 | def set_parameters(self, **kwargs): 59 | r"""Set the parameters/arguments of the algorithm.""" 60 | self.model.set_params(**kwargs) 61 | 62 | def fit(self, x, y, **kwargs): 63 | r"""Fit LinearSVCClassifier. 64 | 65 | Arguments: 66 | x (pandas.core.frame.DataFrame): n samples to classify. 67 | y (pandas.core.series.Series): n classes of the samples in the x array. 68 | 69 | Returns: 70 | None 71 | """ 72 | self.model.fit(x, y) 73 | 74 | def predict(self, x, **kwargs): 75 | r"""Predict class for each sample (row) in x. 76 | 77 | Arguments: 78 | x (pandas.core.frame.DataFrame): n samples to classify. 79 | 80 | Returns: 81 | pandas.core.series.Series: n predicted classes. 82 | """ 83 | return self.model.predict(x) 84 | 85 | def to_string(self): 86 | r"""User friendly representation of the object. 87 | 88 | Returns: 89 | str: User friendly representation of the object. 90 | """ 91 | return Classifier.to_string(self).format( 92 | name=self.Name, 93 | args=self._parameters_to_string(self.__gaussian_process.get_params()), 94 | ) 95 | -------------------------------------------------------------------------------- /niaaml/classifiers/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.classifiers.regression_decision_tree import DecisionTreeRegression 2 | from niaaml.classifiers.regression_gaussian_process import GaussianProcessRegression 3 | from niaaml.utilities import Factory 4 | from niaaml.classifiers.ada_boost import AdaBoost 5 | from niaaml.classifiers.bagging import Bagging 6 | from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees 7 | from niaaml.classifiers.linear_svc import LinearSVC 8 | from niaaml.classifiers.multi_layer_perceptron import MultiLayerPerceptron 9 | from niaaml.classifiers.random_forest import RandomForest 10 | from niaaml.classifiers.decision_tree import DecisionTree 11 | from niaaml.classifiers.k_neighbors import KNeighbors 12 | from niaaml.classifiers.gaussian_process import GaussianProcess 13 | from niaaml.classifiers.gaussian_naive_bayes import GaussianNB 14 | from niaaml.classifiers.quadratic_driscriminant_analysis import ( 15 | QuadraticDiscriminantAnalysis, 16 | ) 17 | from niaaml.classifiers.regression_linear_model import LinearRegression 18 | from niaaml.classifiers.regression_ridge import RidgeRegression 19 | from niaaml.classifiers.regression_lasso import LassoRegression 20 | 21 | __all__ = ["ClassifierFactory"] 22 | 23 | 24 | class ClassifierFactory(Factory): 25 | r"""Class with string mappings to classifiers. 26 | 27 | Date: 28 | 2020 29 | 30 | Author: 31 | Luka Pečnik 32 | 33 | License: 34 | MIT 35 | 36 | Attributes: 37 | _entities (Dict[str, Classifier]): Mapping from strings to classifiers. 38 | 39 | See Also: 40 | * :class:`niaaml.utilities.Factory` 41 | """ 42 | 43 | def _set_parameters(self, **kwargs): 44 | r"""Set the parameters/arguments of the factory.""" 45 | self._entities = { 46 | "AdaBoost": AdaBoost, 47 | "Bagging": Bagging, 48 | "ExtremelyRandomizedTrees": ExtremelyRandomizedTrees, 49 | "LinearSVC": LinearSVC, 50 | "MultiLayerPerceptron": MultiLayerPerceptron, 51 | "RandomForest": RandomForest, 52 | "DecisionTree": DecisionTree, 53 | "DecisionTreeRegression": DecisionTreeRegression, 54 | "KNeighbors": KNeighbors, 55 | "GaussianProcess": GaussianProcess, 56 | "GaussianProcessRegression": GaussianProcessRegression, 57 | "GaussianNB": GaussianNB, 58 | "QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis, 59 | "LinearRegression": LinearRegression, 60 | "RidgeRegression": RidgeRegression, 61 | "LassoRegression": LassoRegression, 62 | } 63 | -------------------------------------------------------------------------------- /niaaml/data/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.data.data_reader import DataReader 2 | from niaaml.data.basic_data_reader import BasicDataReader 3 | from niaaml.data.csv_data_reader import CSVDataReader 4 | 5 | __all__ = ["DataReader", "CSVDataReader", "BasicDataReader"] 6 | -------------------------------------------------------------------------------- /niaaml/data/basic_data_reader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from niaaml.data.data_reader import DataReader 3 | 4 | __all__ = ["BasicDataReader"] 5 | 6 | 7 | class BasicDataReader(DataReader): 8 | r"""Implementation of basic data reader. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | See Also: 20 | * :class:`niaaml.data.DataReader` 21 | """ 22 | 23 | def _set_parameters(self, x, y=None, **kwargs): 24 | r"""Set the parameters of the algorithm. 25 | 26 | Arguments: 27 | x (Iterable[float]): Array of rows from dataset without expected classification results. 28 | y (Optional[Iterable[any]]): Array of expected classification results. 29 | """ 30 | self._x = pd.DataFrame(x) 31 | 32 | if y is not None: 33 | self._y = pd.Series(y) 34 | -------------------------------------------------------------------------------- /niaaml/data/csv_data_reader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from niaaml.data.data_reader import DataReader 3 | 4 | __all__ = ["CSVDataReader"] 5 | 6 | 7 | class CSVDataReader(DataReader): 8 | r"""Implementation of CSV data reader. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | Attributes: 20 | __src (string): Path to a CSV file. 21 | __contains_classes (bool): Tells if src contains expected classification results or only features. 22 | __has_header (bool): Tells if src contains header row. 23 | 24 | See Also: 25 | * :class:`niaaml.data.DataReader` 26 | """ 27 | 28 | def _set_parameters(self, src, contains_classes=True, has_header=False, ignore_columns=[], **kwargs): 29 | r"""Set the parameters of the algorithm. 30 | 31 | Arguments: 32 | src (string): Path to a CSV dataset file. 33 | contains_classes (Optional[bool]): Tells if src contains expected classification results or only features. 34 | has_header (Optional[bool]): Tells if src contains header row. 35 | ignore_columns (Optional[List[int]]): Column indices to drop. 36 | """ 37 | self.__src = src 38 | self.__contains_classes = contains_classes 39 | self.__has_header = has_header 40 | self.__ignore_columns = ignore_columns 41 | self._read_data() 42 | 43 | def _read_data(self, **kwargs): 44 | r"""Read data from expected source.""" 45 | data = pd.read_csv( 46 | self.__src, header=None if self.__has_header is False else "infer" 47 | ) 48 | header = data.columns 49 | 50 | if self.__contains_classes: 51 | self._y = data.pop(header[len(header) - 1]) 52 | 53 | if len(self.__ignore_columns) > 0: 54 | data.drop(header[self.__ignore_columns], axis=1, inplace=True) 55 | 56 | self._x = data 57 | -------------------------------------------------------------------------------- /niaaml/data/data_reader.py: -------------------------------------------------------------------------------- 1 | __all__ = ["DataReader"] 2 | 3 | 4 | class DataReader: 5 | r"""Class for implementing data readers with different sources of data. 6 | 7 | Date: 8 | 2020 9 | 10 | Author: 11 | Luka Pečnik 12 | 13 | License: 14 | MIT 15 | 16 | Attributes: 17 | _x (pandas.core.frame.DataFrame): Array of rows from dataset without expected classification results. 18 | _y (Optional[pandas.core.series.Series]): Array of encoded expected classification results. 19 | """ 20 | 21 | def __init__(self, **kwargs): 22 | r"""Initialize data reader.""" 23 | self._x = None 24 | self._y = None 25 | self._set_parameters(**kwargs) 26 | 27 | def _set_parameters(self, **kwargs): 28 | r"""Set the parameters/arguments of the algorithm.""" 29 | return 30 | 31 | def get_x(self): 32 | r"""Get value of _x. 33 | 34 | Returns: 35 | pandas.core.frame.DataFrame: Array of rows from dataset without expected classification results. 36 | """ 37 | return self._x 38 | 39 | def get_y(self): 40 | r"""Get value of _y. 41 | 42 | Returns: 43 | pandas.core.series.Series: Array of encoded expected classification results. 44 | """ 45 | return self._y 46 | 47 | def set_x(self, value): 48 | r"""Set the value of _x.""" 49 | self._x = value 50 | 51 | def set_y(self, value): 52 | r"""Set the value of _y.""" 53 | self._y = value 54 | 55 | def _read_data(self): 56 | r"""Read data from expected source.""" 57 | return 58 | -------------------------------------------------------------------------------- /niaaml/fitness/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.fitness.fitness_function import FitnessFunction 2 | from niaaml.fitness.accuracy import Accuracy 3 | from niaaml.fitness.cohen_kappa import CohenKappa 4 | from niaaml.fitness.f1 import F1 5 | from niaaml.fitness.r2 import R2 6 | from niaaml.fitness.mse import MSE 7 | from niaaml.fitness.precision import Precision 8 | from niaaml.fitness.utility import FitnessFactory 9 | 10 | __all__ = [ 11 | "FitnessFunction", 12 | "Accuracy", 13 | "CohenKappa", 14 | "F1", 15 | "R2", 16 | "MSE", 17 | "Precision", 18 | "FitnessFactory", 19 | ] 20 | -------------------------------------------------------------------------------- /niaaml/fitness/accuracy.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import accuracy_score 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["Accuracy"] 5 | 6 | 7 | class Accuracy(FitnessFunction): 8 | r"""Class representing the accuracy as a fitness function. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "Accuracy" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return accuracy_score(expected, predicted) 38 | -------------------------------------------------------------------------------- /niaaml/fitness/cohen_kappa.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import cohen_kappa_score 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["CohenKappa"] 5 | 6 | 7 | class CohenKappa(FitnessFunction): 8 | r"""Class representing the cohen's kappa as a fitness function. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.cohen_kappa_score.html 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "Cohen's Kappa" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return cohen_kappa_score(expected, predicted) 38 | -------------------------------------------------------------------------------- /niaaml/fitness/f1.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import f1_score 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["F1"] 5 | 6 | 7 | class F1(FitnessFunction): 8 | r"""Class representing the F1-score as a fitness function. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "F-score" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return f1_score(expected, predicted, average="weighted") 38 | -------------------------------------------------------------------------------- /niaaml/fitness/fitness_function.py: -------------------------------------------------------------------------------- 1 | __all__ = ["FitnessFunction"] 2 | 3 | 4 | class FitnessFunction: 5 | r"""Class for implementing fitness functions. 6 | 7 | Date: 8 | 2020 9 | 10 | Author: 11 | Luka Pečnik 12 | 13 | License: 14 | MIT 15 | 16 | Attributes: 17 | Name (str): Name of the fitness function. 18 | """ 19 | Name = None 20 | 21 | def __init__(self, **kwargs): 22 | r"""Initialize fitness function.""" 23 | self.set_parameters(**kwargs) 24 | 25 | def set_parameters(self, **kwargs): 26 | r"""Set the parameters/arguments of the pipeline component.""" 27 | return 28 | 29 | def get_fitness(self, predicted, expected): 30 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 31 | 32 | Arguments: 33 | predicted (pandas.core.series.Series): Predicted values. 34 | expected (pandas.core.series.Series): Expected values. 35 | 36 | Returns: 37 | float: Calculated fitness value. 38 | """ 39 | return None 40 | 41 | def get_bounds(self): 42 | """Returns the optimization bounds for this fitness function. 43 | 44 | The default is for classification metrics. 45 | 46 | Retunrs: 47 | Tuple[float, float]: lower and upper optimization bounds. Defaults to (0.0, 1.0)""" 48 | return (0.0, 1.0) 49 | -------------------------------------------------------------------------------- /niaaml/fitness/mse.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_squared_error 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["MSE"] 5 | 6 | 7 | class MSE(FitnessFunction): 8 | r"""Class representing the negative mean squared error as a fitness function. 9 | 10 | Date: 11 | 2024 12 | 13 | Author: 14 | Laurenz Farthofer 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "Mean Squared Error" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return - mean_squared_error(expected, predicted) 38 | 39 | def get_bounds(self): 40 | #! float("-inf") leads to errors in the pipeline logic, so we use a very big number instead 41 | return (-1000000000.0, 0.0) 42 | -------------------------------------------------------------------------------- /niaaml/fitness/precision.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import precision_score 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["Precision"] 5 | 6 | 7 | class Precision(FitnessFunction): 8 | r"""Class representing the precision as a fitness function. 9 | 10 | Date: 11 | 2020 12 | 13 | Author: 14 | Luka Pečnik 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "Precision" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return precision_score(expected, predicted, average="weighted") 38 | -------------------------------------------------------------------------------- /niaaml/fitness/r2.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import r2_score 2 | from niaaml.fitness.fitness_function import FitnessFunction 3 | 4 | __all__ = ["R2"] 5 | 6 | 7 | class R2(FitnessFunction): 8 | r"""Class representing the R2-score as a fitness function. 9 | 10 | Date: 11 | 2024 12 | 13 | Author: 14 | Laurenz Farthofer 15 | 16 | License: 17 | MIT 18 | 19 | Documentation: 20 | https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html#sklearn.metrics.r2_score 21 | 22 | See Also: 23 | * :class:`niaaml.fitness.FitnessFunction` 24 | """ 25 | Name = "R2-score" 26 | 27 | def get_fitness(self, predicted, expected): 28 | r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly. 29 | 30 | Arguments: 31 | predicted (pandas.core.series.Series): Predicted values. 32 | expected (pandas.core.series.Series): Expected values. 33 | 34 | Returns: 35 | float: Calculated fitness value. 36 | """ 37 | return r2_score(expected, predicted) 38 | 39 | def get_bounds(self): 40 | #! float("-inf") leads to errors in the pipeline logic, so we use a very big number instead 41 | return (-100000.0, 1.0) 42 | -------------------------------------------------------------------------------- /niaaml/fitness/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import Factory 2 | from niaaml.fitness.accuracy import Accuracy 3 | from niaaml.fitness.cohen_kappa import CohenKappa 4 | from niaaml.fitness.precision import Precision 5 | from niaaml.fitness.f1 import F1 6 | from niaaml.fitness.r2 import R2 7 | from niaaml.fitness.mse import MSE 8 | 9 | __all__ = ["FitnessFactory"] 10 | 11 | 12 | class FitnessFactory(Factory): 13 | r"""Class with string mappings to fitness class. 14 | 15 | Attributes: 16 | _entities (Dict[str, Fitness]): Mapping from strings to fitness classes. 17 | 18 | See Also: 19 | * :class:`niaaml.utilities.Factory` 20 | """ 21 | 22 | def _set_parameters(self, **kwargs): 23 | r"""Set the parameters/arguments of the factory.""" 24 | self._entities = { 25 | "Accuracy": Accuracy, 26 | "Precision": Precision, 27 | "CohenKappa": CohenKappa, 28 | "F1": F1, 29 | "R2": R2, 30 | "MSE": MSE, 31 | } 32 | -------------------------------------------------------------------------------- /niaaml/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | 5 | class Logger: 6 | r"""Class for logging throughout the framework. 7 | 8 | Date: 9 | 2020 10 | 11 | Author: 12 | Luka Pečnik 13 | 14 | License: 15 | MIT 16 | """ 17 | 18 | def __init__(self, verbose=False, output_file=None, **kwargs): 19 | r"""Initialize Logger. 20 | 21 | Arguments: 22 | verbose (Optional(bool)): If True, output verbose pipeline info. 23 | output_file (Optional(str)): If set, logger outputs content to a log file. 24 | """ 25 | if output_file is not None: 26 | if ( 27 | len(os.path.splitext(output_file)[1]) == 0 28 | or os.path.splitext(output_file)[1] != ".log" 29 | ): 30 | output_file = output_file + ".log" 31 | self.__logger = logging.getLogger("niaaml") 32 | self.__logger.setLevel(logging.INFO) 33 | 34 | if output_file is not None: 35 | fh = logging.FileHandler(output_file) 36 | self.__logger.addHandler(fh) 37 | 38 | self.__verbose = verbose 39 | 40 | def __del__(self): 41 | logging.shutdown() 42 | 43 | def log_progress(self, text): 44 | r"""Log progress message.""" 45 | self.__logger.info(text) 46 | 47 | def log_pipeline(self, text): 48 | r"""Log pipeline info message.""" 49 | if self.__verbose is True: 50 | self.__logger.info(text) 51 | 52 | def log_optimization_error(self, text): 53 | r"""Log optimization error message.""" 54 | if self.__verbose is True: 55 | self.__logger.warning(text) 56 | -------------------------------------------------------------------------------- /niaaml/pipeline_component.py: -------------------------------------------------------------------------------- 1 | __all__ = ["PipelineComponent"] 2 | 3 | 4 | class PipelineComponent: 5 | r"""Class for implementing pipeline components. 6 | 7 | Date: 8 | 2020 9 | 10 | Author: 11 | Luka Pečnik 12 | 13 | License: 14 | MIT 15 | 16 | Attributes: 17 | Name (str): Name of the pipeline component. 18 | _params (Dict[str, ParameterDefinition]): Dictionary of components's parameters with possible values. Possible parameter values are given as an instance of the ParameterDefinition class. 19 | 20 | See Also: 21 | * :class:`niaaml.utilities.ParameterDefinition` 22 | """ 23 | Name = None 24 | 25 | def __init__(self, **kwargs): 26 | r"""Initialize pipeline component. 27 | 28 | Notes: 29 | _params variable should not be static as in some cases it is instance specific. See * :class:`niaaml.preprocessing.feature_selection.select_k_best.SelectKBest` for example. 30 | """ 31 | self._params = dict() 32 | self.set_parameters(**kwargs) 33 | 34 | def set_parameters(self, **kwargs): 35 | r"""Set the parameters/arguments of the pipeline component.""" 36 | return 37 | 38 | def get_params_dict(self): 39 | r"""Return parameters definition dictionary.""" 40 | return self._params 41 | 42 | def to_string(self): 43 | r"""User friendly representation of the object. 44 | 45 | Returns: 46 | str: User friendly representation of the object. 47 | """ 48 | return "Name: {name}\nArguments:\n{args}" 49 | 50 | def _parameters_to_string(self, dictionary): 51 | r"""User friendly representation of component's parameters. 52 | 53 | Arguments: 54 | dictionary (dict): Dictionary of parameters. 55 | 56 | Returns: 57 | str: User friendly representation of component's parameters. 58 | """ 59 | args_string = "" 60 | for key in dictionary: 61 | args_string += "\t" + key + " = " + str(dictionary[key]) + "\n" 62 | if len(args_string) == 0: 63 | args_string = "None" 64 | return args_string 65 | -------------------------------------------------------------------------------- /niaaml/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm 2 | from niaaml.preprocessing import feature_selection 3 | from niaaml.preprocessing import feature_transform 4 | from niaaml.preprocessing import encoding 5 | from niaaml.preprocessing import imputation 6 | 7 | __all__ = [ 8 | "feature_selection", 9 | "feature_transform", 10 | "encoding", 11 | "imputation", 12 | "PreprocessingAlgorithm", 13 | ] 14 | -------------------------------------------------------------------------------- /niaaml/preprocessing/encoding/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.encoding.feature_encoder import FeatureEncoder 2 | from niaaml.preprocessing.encoding.one_hot_encoder import OneHotEncoder 3 | from niaaml.preprocessing.encoding.utility import EncoderFactory 4 | from niaaml.preprocessing.encoding.utility import encode_categorical_features 5 | 6 | __all__ = [ 7 | "FeatureEncoder", 8 | "OneHotEncoder", 9 | "EncoderFactory", 10 | "encode_categorical_features", 11 | ] 12 | -------------------------------------------------------------------------------- /niaaml/preprocessing/encoding/feature_encoder.py: -------------------------------------------------------------------------------- 1 | __all__ = ["FeatureEncoder"] 2 | 3 | 4 | class FeatureEncoder: 5 | r"""Class for implementing feature encoders. 6 | 7 | Date: 8 | 2020 9 | 10 | Author: 11 | Luka Pečnik 12 | 13 | License: 14 | MIT 15 | 16 | Attributes: 17 | Name (str): Name of the feature encoder. 18 | """ 19 | Name = None 20 | 21 | def __init__(self, **kwargs): 22 | r"""Initialize feature encoder.""" 23 | return None 24 | 25 | def fit(self, feature): 26 | r"""Fit feature encoder. 27 | 28 | Arguments: 29 | feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features. 30 | """ 31 | return None 32 | 33 | def transform(self, feature): 34 | r"""Transform feature's values. 35 | 36 | Arguments: 37 | feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features. 38 | 39 | Returns: 40 | pandas.core.frame.DataFrame: A transformed column. 41 | """ 42 | return None 43 | 44 | def to_string(self): 45 | r"""User friendly representation of the object. 46 | 47 | Returns: 48 | str: User friendly representation of the object. 49 | """ 50 | return "{name}" 51 | -------------------------------------------------------------------------------- /niaaml/preprocessing/encoding/one_hot_encoder.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.encoding.feature_encoder import FeatureEncoder 2 | from sklearn.preprocessing import OneHotEncoder as OHE 3 | import pandas as pd 4 | 5 | __all__ = ["OneHotEncoder"] 6 | 7 | 8 | class OneHotEncoder(FeatureEncoder): 9 | r"""Implementation of one-hot encoder. 10 | 11 | Date: 12 | 2020 13 | 14 | Author: 15 | Luka Pečnik 16 | 17 | License: 18 | MIT 19 | 20 | Reference: 21 | Seger, Cedric. "An investigation of categorical variable encoding techniques in machine learning: binary versus one-hot and feature hashing." (2018). 22 | 23 | Documentation: 24 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html 25 | 26 | See Also: 27 | * :class:`niaaml.preprocessing.encoding.FeatureEncoder` 28 | """ 29 | Name = "One-Hot Encoder" 30 | 31 | def __init__(self, **kwargs): 32 | r"""Initialize feature encoder.""" 33 | self.__one_hot_encoder = OHE(handle_unknown="ignore") 34 | 35 | def fit(self, feature): 36 | r"""Fit feature encoder. 37 | 38 | Arguments: 39 | feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features. 40 | """ 41 | self.__one_hot_encoder.fit(feature) 42 | 43 | def transform(self, feature): 44 | r"""Transform feature's values. 45 | 46 | Arguments: 47 | feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features. 48 | 49 | Returns: 50 | pandas.core.frame.DataFrame: A transformed column. 51 | """ 52 | return pd.DataFrame(self.__one_hot_encoder.transform(feature).toarray()) 53 | 54 | def to_string(self): 55 | r"""User friendly representation of the object. 56 | 57 | Returns: 58 | str: User friendly representation of the object. 59 | """ 60 | return FeatureEncoder.to_string(self).format(name=self.Name) 61 | -------------------------------------------------------------------------------- /niaaml/preprocessing/encoding/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import Factory 2 | import pandas as pd 3 | from niaaml.preprocessing.encoding.one_hot_encoder import OneHotEncoder 4 | 5 | __all__ = ["encode_categorical_features", "EncoderFactory"] 6 | 7 | 8 | def encode_categorical_features(features, encoder): 9 | """Encode categorical features. 10 | 11 | Arguments: 12 | features (pandas.core.frame.DataFrame): DataFrame of features. 13 | encoder (str): Name of the encoder to use. 14 | 15 | Returns: 16 | Tuple[pandas.core.frame.DataFrame, Iterable[FeatureEncoder]]: 17 | 1. Converted dataframe. 18 | 2. Dictionary of encoders for all categorical features. 19 | """ 20 | enc = EncoderFactory().get_result(encoder) 21 | 22 | encoders = {} 23 | to_drop = [] 24 | enc_features = pd.DataFrame() 25 | cols = [ 26 | col 27 | for col in features.columns 28 | if not pd.api.types.is_numeric_dtype(features[col]) 29 | ] 30 | for c in cols: 31 | enc.fit(features[[c]]) 32 | tr = enc.transform(features[[c]]) 33 | to_drop.append(c) 34 | enc_features = pd.concat([enc_features, tr], axis=1) 35 | encoders[c] = enc 36 | features = features.drop(to_drop, axis=1) 37 | features = pd.concat([features, enc_features], axis=1) 38 | return features, encoders if len(encoders) > 0 else None 39 | 40 | 41 | class EncoderFactory(Factory): 42 | r"""Class with string mappings to encoders. 43 | 44 | Attributes: 45 | _entities (Dict[str, FeatureEncoder]): Mapping from strings to encoders. 46 | 47 | See Also: 48 | * :class:`niaaml.utilities.Factory` 49 | """ 50 | 51 | def _set_parameters(self, **kwargs): 52 | r"""Set the parameters/arguments of the factory.""" 53 | self._entities = {"OneHotEncoder": OneHotEncoder} 54 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 2 | FeatureSelectionAlgorithm, 3 | ) 4 | from niaaml.preprocessing.feature_selection.variance_threshold import VarianceThreshold 5 | from niaaml.preprocessing.feature_selection.jDEFSTH import jDEFSTH 6 | from niaaml.preprocessing.feature_selection.select_percentile import SelectPercentile 7 | from niaaml.preprocessing.feature_selection.select_k_best import SelectKBest 8 | from niaaml.preprocessing.feature_selection.particle_swarm_optimization import ( 9 | ParticleSwarmOptimization, 10 | ) 11 | from niaaml.preprocessing.feature_selection.bat_algorithm import BatAlgorithm 12 | from niaaml.preprocessing.feature_selection.differential_evolution import ( 13 | DifferentialEvolution, 14 | ) 15 | from niaaml.preprocessing.feature_selection.grey_wolf_optimizer import GreyWolfOptimizer 16 | from niaaml.preprocessing.feature_selection.utility import ( 17 | FeatureSelectionAlgorithmFactory, 18 | ) 19 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import ( 20 | _FeatureSelectionThresholdProblem, 21 | ) 22 | from niaaml.preprocessing.feature_selection.select_univariate_regression import SelectUnivariateRegression 23 | 24 | 25 | __all__ = [ 26 | "FeatureSelectionAlgorithm", 27 | "VarianceThreshold", 28 | "jDEFSTH", 29 | "SelectPercentile", 30 | "ParticleSwarmOptimization", 31 | "BatAlgorithm", 32 | "DifferentialEvolution", 33 | "GreyWolfOptimizer", 34 | "SelectKBest", 35 | "SelectUnivariateRegression", 36 | "FeatureSelectionAlgorithmFactory", 37 | "_FeatureSelectionThresholdProblem", 38 | ] 39 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/_feature_selection_threshold_problem.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from niapy.problems import Problem 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.linear_model import LogisticRegression 5 | 6 | __all__ = ["_FeatureSelectionThresholdProblem"] 7 | 8 | 9 | class _FeatureSelectionThresholdProblem(Problem): 10 | r"""NiaPy Problem class implementation. 11 | 12 | Attributes: 13 | __best_fitness (float): Current best fitness of the optimization process. 14 | __best_solution (numpy.ndarray[float]): Current best solution of the optimization process. 15 | """ 16 | 17 | def __init__(self, X, y): 18 | r"""Initialize feature selection problem. 19 | 20 | Arguments: 21 | X (pandas.core.frame.DataFrame): Features. 22 | y (pandas.core.series.Series) Expected classifier results. 23 | """ 24 | self.__best_fitness = np.inf 25 | self.__best_solution = None 26 | super().__init__(X.shape[1] + 1, 0.0, 1.0) 27 | self.train_X, self.test_X, self.train_y, self.test_y = train_test_split( 28 | X, y, test_size=0.2 29 | ) 30 | 31 | def get_best_solution(self): 32 | r"""Get best solution found. 33 | 34 | Returns: 35 | numpy.ndarray[float]: Best solution found. 36 | """ 37 | return self.__best_solution 38 | 39 | def _evaluate(self, x): 40 | r"""Override fitness function. 41 | 42 | Args: 43 | x (np.ndarray): Solution vector. 44 | 45 | Returns: 46 | float: Fitness value of `x`. 47 | """ 48 | self.Threshold = x[-1] # current threshold 49 | 50 | # select features 51 | selected = x[:-1] >= self.Threshold 52 | 53 | # in the case if threshold is too low (no features selected) 54 | if np.sum(selected) == 0: 55 | return 1 56 | 57 | lr = LogisticRegression(solver="lbfgs", max_iter=10000).fit( 58 | self.train_X.iloc[:, selected], self.train_y 59 | ) 60 | accuracy = lr.score(self.test_X.iloc[:, selected], self.test_y) 61 | fitness = 1.0 - accuracy 62 | 63 | if fitness < self.__best_fitness: 64 | self.__best_fitness = fitness 65 | self.__best_solution = x 66 | return fitness 67 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/feature_selection_algorithm.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm 2 | 3 | __all__ = ["FeatureSelectionAlgorithm"] 4 | 5 | 6 | class FeatureSelectionAlgorithm(PreprocessingAlgorithm): 7 | r"""Class for implementing feature selection algorithms. 8 | 9 | Date: 10 | 2020 11 | 12 | Author: 13 | Luka Pečnik 14 | 15 | License: 16 | MIT 17 | 18 | See Also: 19 | * :class:`niaaml.preprocessing.preprocessing_algorithm.PreprocessingAlgorithm` 20 | """ 21 | 22 | def select_features(self, x, y, **kwargs): 23 | r"""Perform the feature selection process. 24 | 25 | Arguments: 26 | x (pandas.core.frame.DataFrame): Array of original features. 27 | y (pandas.core.series.Series) Expected classifier results. 28 | 29 | Returns: 30 | numpy.ndarray[bool]: Mask of selected features. 31 | """ 32 | return x 33 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/grey_wolf_optimizer.py: -------------------------------------------------------------------------------- 1 | from niapy.algorithms.basic import GreyWolfOptimizer as GWO 2 | from niapy.task import Task 3 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 4 | FeatureSelectionAlgorithm, 5 | ) 6 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import ( 7 | _FeatureSelectionThresholdProblem, 8 | ) 9 | import numpy 10 | 11 | __all__ = ["GreyWolfOptimizer"] 12 | 13 | 14 | class GreyWolfOptimizer(FeatureSelectionAlgorithm): 15 | r"""Implementation of feature selection using GWO algorithm. 16 | 17 | Date: 18 | 2020 19 | 20 | Author: 21 | Luka Pečnik 22 | 23 | Reference: 24 | The implementation is adapted according to the following article: 25 | D. Fister, I. Fister, T. Jagrič, I. Fister Jr., J. Brest. A novel self-adaptive differential evolution for feature selection using threshold mechanism . In: Proceedings of the 2018 IEEE Symposium on Computational Intelligence (SSCI 2018), pp. 17-24, 2018. 26 | 27 | Reference URL: 28 | http://iztok-jr-fister.eu/static/publications/236.pdf 29 | 30 | License: 31 | MIT 32 | 33 | See Also: 34 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 35 | """ 36 | Name = "Grey Wolf Optimizer" 37 | 38 | def __init__(self, **kwargs): 39 | r"""Initialize GWO feature selection algorithm.""" 40 | super(GreyWolfOptimizer, self).__init__() 41 | self.__gwo = GWO(population_size=10) 42 | 43 | def __final_output(self, sol): 44 | r"""Calculate final array of features. 45 | 46 | Arguments: 47 | sol (numpy.ndarray[float]): Individual of population/ possible solution. 48 | 49 | Returns: 50 | numpy.ndarray[bool]: Mask of selected features. 51 | """ 52 | selected = numpy.ones(sol.shape[0] - 1, dtype=bool) 53 | threshold = sol[sol.shape[0] - 1] 54 | for i in range(sol.shape[0] - 1): 55 | if sol[i] < threshold: 56 | selected[i] = False 57 | return selected 58 | 59 | def select_features(self, x, y, **kwargs): 60 | r"""Perform the feature selection process. 61 | 62 | Arguments: 63 | x (pandas.core.frame.DataFrame): Array of original features. 64 | y (pandas.core.series.Series) Expected classifier results. 65 | 66 | Returns: 67 | numpy.ndarray[bool]: Mask of selected features. 68 | """ 69 | problem = _FeatureSelectionThresholdProblem(x, y) 70 | task = Task(problem=problem, max_evals=1000) 71 | self.__gwo.run(task) 72 | return self.__final_output(problem.get_best_solution()) 73 | 74 | def to_string(self): 75 | r"""User friendly representation of the object. 76 | 77 | Returns: 78 | str: User friendly representation of the object. 79 | """ 80 | return FeatureSelectionAlgorithm.to_string(self).format( 81 | name=self.Name, args=self._parameters_to_string(self.__gwo.get_parameters()) 82 | ) 83 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/jDEFSTH.py: -------------------------------------------------------------------------------- 1 | from niapy.algorithms.modified import SelfAdaptiveDifferentialEvolution 2 | from niapy.task import Task 3 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 4 | FeatureSelectionAlgorithm, 5 | ) 6 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import ( 7 | _FeatureSelectionThresholdProblem, 8 | ) 9 | import numpy 10 | 11 | __all__ = ["jDEFSTH"] 12 | 13 | 14 | class jDEFSTH(FeatureSelectionAlgorithm): 15 | r"""Implementation of self-adaptive differential evolution for feature selection using threshold mechanism. 16 | 17 | Date: 18 | 2020 19 | 20 | Author: 21 | Iztok Fister Jr. 22 | 23 | Reference: 24 | D. Fister, I. Fister, T. Jagrič, I. Fister Jr., J. Brest. A novel self-adaptive differential evolution for feature selection using threshold mechanism . In: Proceedings of the 2018 IEEE Symposium on Computational Intelligence (SSCI 2018), pp. 17-24, 2018. 25 | 26 | Reference URL: 27 | http://iztok-jr-fister.eu/static/publications/236.pdf 28 | 29 | License: 30 | MIT 31 | 32 | See Also: 33 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 34 | """ 35 | Name = "Self-Adaptive Differential Evolution" 36 | 37 | def __init__(self, **kwargs): 38 | r"""Initialize GWO feature selection algorithm.""" 39 | super(jDEFSTH, self).__init__() 40 | self.__jdefsth = SelfAdaptiveDifferentialEvolution( 41 | population_size=10, differential_weight=0.5, f_lower=0.0, f_upper=2.0, tao1=0.9, 42 | crossover_probability=0.5, tao2=0.45 43 | ) 44 | 45 | def __final_output(self, sol): 46 | r"""Calculate final array of features. 47 | 48 | Arguments: 49 | sol (numpy.ndarray[float]): Individual of population/ possible solution. 50 | 51 | Returns: 52 | numpy.ndarray[bool]: Mask of selected features. 53 | """ 54 | selected = numpy.ones(sol.shape[0] - 1, dtype=bool) 55 | threshold = sol[sol.shape[0] - 1] 56 | for i in range(sol.shape[0] - 1): 57 | if sol[i] < threshold: 58 | selected[i] = False 59 | return selected 60 | 61 | def select_features(self, x, y, **kwargs): 62 | r"""Perform the feature selection process. 63 | 64 | Arguments: 65 | x (pandas.core.frame.DataFrame): Array of original features. 66 | y (pandas.core.series.Series) Expected classifier results. 67 | 68 | Returns: 69 | numpy.ndarray[bool]: Mask of selected features. 70 | """ 71 | problem = _FeatureSelectionThresholdProblem(x, y) 72 | task = Task(problem=problem, max_evals=1000) 73 | self.__jdefsth.run(task) 74 | return self.__final_output(problem.get_best_solution()) 75 | 76 | def to_string(self): 77 | r"""User friendly representation of the object. 78 | 79 | Returns: 80 | str: User friendly representation of the object. 81 | """ 82 | return FeatureSelectionAlgorithm.to_string(self).format( 83 | name=self.Name, 84 | args=self._parameters_to_string(self.__jdefsth.get_parameters()), 85 | ) 86 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/select_k_best.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import ParameterDefinition, MinMax 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 3 | FeatureSelectionAlgorithm, 4 | ) 5 | from sklearn.feature_selection import ( 6 | SelectKBest as SelectKB, 7 | chi2, 8 | f_classif, 9 | mutual_info_classif, 10 | ) 11 | import numpy as np 12 | 13 | __all__ = ["SelectKBest"] 14 | 15 | 16 | class SelectKBest(FeatureSelectionAlgorithm): 17 | r"""Implementation of feature selection using selection of k best features according to used score function. 18 | 19 | Date: 20 | 2020 21 | 22 | Author: 23 | Luka Pečnik 24 | 25 | License: 26 | MIT 27 | 28 | Documentation: 29 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html 30 | 31 | See Also: 32 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 33 | """ 34 | Name = "Select K Best" 35 | 36 | def __init__(self, **kwargs): 37 | r"""Initialize SelectKBest feature selection algorithm. 38 | 39 | Notes: 40 | _params['k'] is initialized to None as it is included in the optimization process later since we cannot determine a proper value range until length of the feature vector becomes known. 41 | """ 42 | self._params = dict( 43 | score_func=ParameterDefinition([chi2, f_classif, mutual_info_classif]), 44 | k=None, 45 | ) 46 | self.__k = None 47 | self.__select_k_best = SelectKB() 48 | 49 | def set_parameters(self, **kwargs): 50 | r"""Set the parameters/arguments of the algorithm.""" 51 | self.__select_k_best.set_params(**kwargs) 52 | 53 | def select_features(self, x, y, **kwargs): 54 | r"""Perform the feature selection process. 55 | 56 | Arguments: 57 | x (pandas.core.frame.DataFrame): Array of original features. 58 | y (pandas.core.series.Series) Expected classifier results. 59 | 60 | Returns: 61 | numpy.ndarray[bool]: Mask of selected features. 62 | """ 63 | if self.__k is None: 64 | self.__k = x.shape[1] 65 | self._params["k"] = ParameterDefinition(MinMax(1, self.__k), int) 66 | val = int(np.around(np.random.uniform(1, self.__k))) 67 | self.__select_k_best.set_params(k=val) 68 | 69 | self.__select_k_best.fit(x, y) 70 | return self.__select_k_best.get_support() 71 | 72 | def to_string(self): 73 | r"""User friendly representation of the object. 74 | 75 | Returns: 76 | str: User friendly representation of the object. 77 | """ 78 | return FeatureSelectionAlgorithm.to_string(self).format( 79 | name=self.Name, 80 | args=self._parameters_to_string(self.__select_k_best.get_params()), 81 | ) 82 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/select_percentile.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import ParameterDefinition, MinMax 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 3 | FeatureSelectionAlgorithm, 4 | ) 5 | from sklearn.feature_selection import ( 6 | SelectPercentile as SelectPerc, 7 | chi2, 8 | f_classif, 9 | mutual_info_classif, 10 | ) 11 | import numpy as np 12 | 13 | __all__ = ["SelectPercentile"] 14 | 15 | 16 | class SelectPercentile(FeatureSelectionAlgorithm): 17 | r"""Implementation of feature selection using percentile selection of best features according to used score function. 18 | 19 | Date: 20 | 2020 21 | 22 | Author: 23 | Luka Pečnik 24 | 25 | License: 26 | MIT 27 | 28 | Documentation: 29 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html 30 | 31 | See Also: 32 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 33 | """ 34 | Name = "Select Percentile" 35 | 36 | def __init__(self, **kwargs): 37 | r"""Initialize SelectPercentile feature selection algorithm.""" 38 | self._params = dict( 39 | score_func=ParameterDefinition([chi2, f_classif, mutual_info_classif]), 40 | percentile=ParameterDefinition(MinMax(10, 100), np.uint), 41 | ) 42 | self.__select_percentile = SelectPerc() 43 | 44 | def set_parameters(self, **kwargs): 45 | r"""Set the parameters/arguments of the algorithm.""" 46 | self.__select_percentile.set_params(**kwargs) 47 | 48 | def select_features(self, x, y, **kwargs): 49 | r"""Perform the feature selection process. 50 | 51 | Arguments: 52 | x (pandas.core.frame.DataFrame): Array of original features. 53 | y (pandas.core.series.Series) Expected classifier results. 54 | 55 | Returns: 56 | numpy.ndarray[bool]: Mask of selected features. 57 | """ 58 | self.__select_percentile.fit(x, y) 59 | return self.__select_percentile.get_support() 60 | 61 | def to_string(self): 62 | r"""User friendly representation of the object. 63 | 64 | Returns: 65 | str: User friendly representation of the object. 66 | """ 67 | return FeatureSelectionAlgorithm.to_string(self).format( 68 | name=self.Name, 69 | args=self._parameters_to_string(self.__select_percentile.get_params()), 70 | ) 71 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/select_univariate_regression.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import ParameterDefinition 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 3 | FeatureSelectionAlgorithm, 4 | ) 5 | from sklearn.feature_selection import ( 6 | GenericUnivariateSelect as Select, 7 | r_regression 8 | ) 9 | 10 | __all__ = ["SelectUnivariateRegression"] 11 | 12 | 13 | class SelectUnivariateRegression(FeatureSelectionAlgorithm): 14 | r"""Implementation of feature selection using a generic univariate selection strategy from scikit learn. 15 | 16 | Date: 17 | 2024 18 | 19 | Author: 20 | Laurenz Farthofer 21 | 22 | License: 23 | MIT 24 | 25 | Documentation: 26 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.GenericUnivariateSelect.html#sklearn.feature_selection.GenericUnivariateSelect 27 | 28 | See Also: 29 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 30 | """ 31 | Name = "Select Univariate Regression" 32 | 33 | def __init__(self, **kwargs): 34 | r"""Initialize SelectPercentile feature selection algorithm.""" 35 | self._params = dict( 36 | score_func=ParameterDefinition([r_regression]), 37 | ) 38 | self.__select = Select() 39 | 40 | def set_parameters(self, **kwargs): 41 | r"""Set the parameters/arguments of the algorithm.""" 42 | self.__select.set_params(**kwargs) 43 | 44 | def select_features(self, x, y, **kwargs): 45 | r"""Perform the feature selection process. 46 | 47 | Arguments: 48 | x (pandas.core.frame.DataFrame): Array of original features. 49 | y (pandas.core.series.Series) Expected classifier results. 50 | 51 | Returns: 52 | numpy.ndarray[bool]: Mask of selected features. 53 | """ 54 | self.__select.fit(x, y) 55 | return self.__select.get_support() 56 | 57 | def to_string(self): 58 | r"""User friendly representation of the object. 59 | 60 | Returns: 61 | str: User friendly representation of the object. 62 | """ 63 | return FeatureSelectionAlgorithm.to_string(self).format( 64 | name=self.Name, 65 | args=self._parameters_to_string(self.__select.get_params()), 66 | ) 67 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_selection.select_univariate_regression import SelectUnivariateRegression 2 | from niaaml.utilities import Factory 3 | from niaaml.preprocessing.feature_selection.bat_algorithm import BatAlgorithm 4 | from niaaml.preprocessing.feature_selection.differential_evolution import ( 5 | DifferentialEvolution, 6 | ) 7 | from niaaml.preprocessing.feature_selection.grey_wolf_optimizer import GreyWolfOptimizer 8 | from niaaml.preprocessing.feature_selection.jDEFSTH import jDEFSTH 9 | from niaaml.preprocessing.feature_selection.particle_swarm_optimization import ( 10 | ParticleSwarmOptimization, 11 | ) 12 | from niaaml.preprocessing.feature_selection.select_k_best import SelectKBest 13 | from niaaml.preprocessing.feature_selection.select_percentile import SelectPercentile 14 | from niaaml.preprocessing.feature_selection.variance_threshold import VarianceThreshold 15 | 16 | __all__ = ["FeatureSelectionAlgorithmFactory"] 17 | 18 | 19 | class FeatureSelectionAlgorithmFactory(Factory): 20 | r"""Class with string mappings to feature selection algorithms. 21 | 22 | Attributes: 23 | _entities (Dict[str, FeatureSelectionAlgorithm]): Mapping from strings to feature selection algorithms. 24 | 25 | See Also: 26 | * :class:`niaaml.utilities.Factory` 27 | """ 28 | 29 | def _set_parameters(self, **kwargs): 30 | r"""Set the parameters/arguments of the factory.""" 31 | self._entities = { 32 | "jDEFSTH": jDEFSTH, 33 | "SelectKBest": SelectKBest, 34 | "SelectPercentile": SelectPercentile, 35 | "VarianceThreshold": VarianceThreshold, 36 | "BatAlgorithm": BatAlgorithm, 37 | "DifferentialEvolution": DifferentialEvolution, 38 | "GreyWolfOptimizer": GreyWolfOptimizer, 39 | "ParticleSwarmOptimization": ParticleSwarmOptimization, 40 | "SelectUnivariateRegression": SelectUnivariateRegression, 41 | } 42 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_selection/variance_threshold.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import ParameterDefinition, MinMax 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import ( 3 | FeatureSelectionAlgorithm, 4 | ) 5 | from sklearn.feature_selection import VarianceThreshold as VarThr 6 | import numpy as np 7 | 8 | __all__ = ["VarianceThreshold"] 9 | 10 | 11 | class VarianceThreshold(FeatureSelectionAlgorithm): 12 | r"""Implementation of feature selection using variance threshold. 13 | 14 | Date: 15 | 2020 16 | 17 | Author: 18 | Luka Pečnik 19 | 20 | License: 21 | MIT 22 | 23 | Documentation: 24 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html 25 | 26 | See Also: 27 | * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` 28 | """ 29 | Name = "Variance Threshold" 30 | 31 | def __init__(self, **kwargs): 32 | r"""Initialize VarianceThreshold feature selection algorithm.""" 33 | self._params = dict(threshold=ParameterDefinition(MinMax(0, 0.1), float)) 34 | self.__variance_threshold = VarThr() 35 | 36 | def set_parameters(self, **kwargs): 37 | r"""Set the parameters/arguments of the algorithm.""" 38 | self.__variance_threshold.set_params(**kwargs) 39 | 40 | def select_features(self, x, y, **kwargs): 41 | r"""Perform the feature selection process. 42 | 43 | Arguments: 44 | x (pandas.core.frame.DataFrame): Array of original features. 45 | y (pandas.core.series.Series) Expected classifier results. 46 | 47 | Returns: 48 | numpy.ndarray[bool]: Mask of selected features. 49 | """ 50 | self.__variance_threshold.fit(x) 51 | return self.__variance_threshold.get_support() 52 | 53 | def to_string(self): 54 | r"""User friendly representation of the object. 55 | 56 | Returns: 57 | str: User friendly representation of the object. 58 | """ 59 | return FeatureSelectionAlgorithm.to_string(self).format( 60 | name=self.Name, 61 | args=self._parameters_to_string(self.__variance_threshold.get_params()), 62 | ) 63 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 2 | FeatureTransformAlgorithm, 3 | ) 4 | from niaaml.preprocessing.feature_transform.normalizer import Normalizer 5 | from niaaml.preprocessing.feature_transform.standard_scaler import StandardScaler 6 | from niaaml.preprocessing.feature_transform.max_abs_scaler import MaxAbsScaler 7 | from niaaml.preprocessing.feature_transform.quantile_transformer import ( 8 | QuantileTransformer, 9 | ) 10 | from niaaml.preprocessing.feature_transform.robust_scaler import RobustScaler 11 | from niaaml.preprocessing.feature_transform.utility import ( 12 | FeatureTransformAlgorithmFactory, 13 | ) 14 | 15 | __all__ = [ 16 | "FeatureTransformAlgorithm", 17 | "Normalizer", 18 | "StandardScaler", 19 | "MaxAbsScaler", 20 | "RobustScaler", 21 | "FeatureTransformAlgorithmFactory", 22 | "QuantileTransformer", 23 | ] 24 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/feature_transform_algorithm.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm 2 | 3 | __all__ = ["FeatureTransformAlgorithm"] 4 | 5 | 6 | class FeatureTransformAlgorithm(PreprocessingAlgorithm): 7 | r"""Class for implementing feature transform algorithms. 8 | 9 | Date: 10 | 2020 11 | 12 | Author: 13 | Luka Pečnik 14 | 15 | License: 16 | MIT 17 | 18 | See Also: 19 | * :class:`niaaml.preprocessing.preprocessing_algorithm.PreprocessingAlgorithm` 20 | """ 21 | 22 | def fit(self, x, **kwargs): 23 | r"""Fit implemented feature transform algorithm. 24 | 25 | Arguments: 26 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 27 | """ 28 | return 29 | 30 | def transform(self, x, **kwargs): 31 | r"""Transforms the given x data. 32 | 33 | Arguments: 34 | x (pandas.core.frame.DataFrame): Data to transform. 35 | 36 | Returns: 37 | pandas.core.frame.DataFrame: Transformed data. 38 | """ 39 | return x 40 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/max_abs_scaler.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import MaxAbsScaler as MAS 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 3 | FeatureTransformAlgorithm, 4 | ) 5 | 6 | __all__ = ["MaxAbsScaler"] 7 | 8 | 9 | class MaxAbsScaler(FeatureTransformAlgorithm): 10 | r"""Implementation of feature scaling by its maximum absolute value. 11 | 12 | Date: 13 | 2020 14 | 15 | Author: 16 | Luka Pečnik 17 | 18 | License: 19 | MIT 20 | 21 | Documentation: 22 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html#sklearn.preprocessing.MaxAbsScaler 23 | 24 | See Also: 25 | * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm` 26 | """ 27 | Name = "Maximum Absolute Scaler" 28 | 29 | def __init__(self, **kwargs): 30 | r"""Initialize MaxAbsScaler.""" 31 | super(MaxAbsScaler, self).__init__() 32 | self.__max_abs_scaler = MAS() 33 | 34 | def fit(self, x, **kwargs): 35 | r"""Fit implemented transformation algorithm. 36 | 37 | Arguments: 38 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 39 | """ 40 | self.__max_abs_scaler.fit(x) 41 | 42 | def transform(self, x, **kwargs): 43 | r"""Transforms the given x data. 44 | 45 | Arguments: 46 | x (pandas.core.frame.DataFrame): Data to transform. 47 | 48 | Returns: 49 | pandas.core.frame.DataFrame: Transformed data. 50 | """ 51 | 52 | return self.__max_abs_scaler.transform(x) 53 | 54 | def to_string(self): 55 | r"""User friendly representation of the object. 56 | 57 | Returns: 58 | str: User friendly representation of the object. 59 | """ 60 | return FeatureTransformAlgorithm.to_string(self).format( 61 | name=self.Name, 62 | args=self._parameters_to_string(self.__max_abs_scaler.get_params()), 63 | ) 64 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/normalizer.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import Normalizer as Nrm 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 3 | FeatureTransformAlgorithm, 4 | ) 5 | from niaaml.utilities import ParameterDefinition 6 | 7 | __all__ = ["Normalizer"] 8 | 9 | 10 | class Normalizer(FeatureTransformAlgorithm): 11 | r"""Implementation of feature normalization algorithm. 12 | 13 | Date: 14 | 2020 15 | 16 | Author: 17 | Luka Pečnik 18 | 19 | License: 20 | MIT 21 | 22 | Documentation: 23 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer 24 | 25 | See Also: 26 | * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm` 27 | """ 28 | Name = "Normalizer" 29 | 30 | def __init__(self, **kwargs): 31 | r"""Initialize Normalizer.""" 32 | self._params = dict(norm=ParameterDefinition(["l1", "l2", "max"])) 33 | self.__params = None 34 | self.__normalizer = Nrm() 35 | 36 | def set_parameters(self, **kwargs): 37 | r"""Set the parameters/arguments of the algorithm.""" 38 | self.__params = kwargs 39 | self.__params["axis"] = 0 40 | 41 | def fit(self, x, **kwargs): 42 | r"""Fit implemented transformation algorithm. 43 | 44 | Arguments: 45 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 46 | """ 47 | self.__normalizer.fit(x) 48 | 49 | def transform(self, x, **kwargs): 50 | r"""Transforms the given x data. 51 | 52 | Arguments: 53 | x (pandas.core.frame.DataFrame): Data to transform. 54 | 55 | Returns: 56 | pandas.core.frame.DataFrame: Transformed data. 57 | """ 58 | return self.__normalizer.transform(x) 59 | 60 | def to_string(self): 61 | r"""User friendly representation of the object. 62 | 63 | Returns: 64 | str: User friendly representation of the object. 65 | """ 66 | return FeatureTransformAlgorithm.to_string(self).format( 67 | name=self.Name, 68 | args=self._parameters_to_string(self.__normalizer.get_params()), 69 | ) 70 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/quantile_transformer.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import QuantileTransformer as QT 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 3 | FeatureTransformAlgorithm, 4 | ) 5 | from niaaml.utilities import ParameterDefinition 6 | 7 | __all__ = ["QuantileTransformer"] 8 | 9 | 10 | class QuantileTransformer(FeatureTransformAlgorithm): 11 | r"""Implementation of quantile transformer. 12 | 13 | Date: 14 | 2020 15 | 16 | Author: 17 | Luka Pečnik 18 | 19 | License: 20 | MIT 21 | 22 | Documentation: 23 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html#sklearn.preprocessing.QuantileTransformer 24 | 25 | See Also: 26 | * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm` 27 | """ 28 | Name = "Quantile Transformer" 29 | 30 | def __init__(self, n_quantiles=1000, **kwargs): 31 | r"""Initialize QuantileTransformer.""" 32 | self._params = dict( 33 | output_distribution=ParameterDefinition(["uniform", "normal"]) 34 | ) 35 | self.__quantile_transformer = QT(n_quantiles=n_quantiles) 36 | 37 | def fit(self, x, **kwargs): 38 | r"""Fit implemented transformation algorithm. 39 | 40 | Arguments: 41 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 42 | """ 43 | self.__quantile_transformer.fit(x) 44 | 45 | def transform(self, x, **kwargs): 46 | r"""Transforms the given x data. 47 | 48 | Arguments: 49 | x (pandas.core.frame.DataFrame): Data to transform. 50 | 51 | Returns: 52 | pandas.core.frame.DataFrame: Transformed data. 53 | """ 54 | 55 | return self.__quantile_transformer.transform(x) 56 | 57 | def to_string(self): 58 | r"""User friendly representation of the object. 59 | 60 | Returns: 61 | str: User friendly representation of the object. 62 | """ 63 | return FeatureTransformAlgorithm.to_string(self).format( 64 | name=self.Name, 65 | args=self._parameters_to_string(self.__quantile_transformer.get_params()), 66 | ) 67 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/robust_scaler.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import RobustScaler as RS 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 3 | FeatureTransformAlgorithm, 4 | ) 5 | from niaaml.utilities import ParameterDefinition 6 | 7 | __all__ = ["RobustScaler"] 8 | 9 | 10 | class RobustScaler(FeatureTransformAlgorithm): 11 | r"""Implementation of the robust scaler. 12 | 13 | Date: 14 | 2020 15 | 16 | Author: 17 | Luka Pečnik 18 | 19 | License: 20 | MIT 21 | 22 | Documentation: 23 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html#sklearn.preprocessing.RobustScaler 24 | 25 | See Also: 26 | * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm` 27 | """ 28 | Name = "Robust Scaler" 29 | 30 | def __init__(self, **kwargs): 31 | r"""Initialize RobustScaler.""" 32 | self._params = dict( 33 | with_centering=ParameterDefinition([True, False]), 34 | with_scaling=ParameterDefinition([True, False]), 35 | ) 36 | self.__robust_scaler = RS() 37 | 38 | def fit(self, x, **kwargs): 39 | r"""Fit implemented transformation algorithm. 40 | 41 | Arguments: 42 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 43 | """ 44 | self.__robust_scaler.fit(x) 45 | 46 | def transform(self, x, **kwargs): 47 | r"""Transforms the given x data. 48 | 49 | Arguments: 50 | x (pandas.core.frame.DataFrame): Data to transform. 51 | 52 | Returns: 53 | pandas.core.frame.DataFrame: Transformed data. 54 | """ 55 | 56 | return self.__robust_scaler.transform(x) 57 | 58 | def to_string(self): 59 | r"""User friendly representation of the object. 60 | 61 | Returns: 62 | str: User friendly representation of the object. 63 | """ 64 | return FeatureTransformAlgorithm.to_string(self).format( 65 | name=self.Name, 66 | args=self._parameters_to_string(self.__robust_scaler.get_params()), 67 | ) 68 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/standard_scaler.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler as StdScaler 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import ( 3 | FeatureTransformAlgorithm, 4 | ) 5 | 6 | __all__ = ["StandardScaler"] 7 | 8 | 9 | class StandardScaler(FeatureTransformAlgorithm): 10 | r"""Implementation of feature standard scaling algorithm. 11 | 12 | Date: 13 | 2020 14 | 15 | Author: 16 | Luka Pečnik 17 | 18 | License: 19 | MIT 20 | 21 | Documentation: 22 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html 23 | 24 | See Also: 25 | * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm` 26 | """ 27 | Name = "Standard Scaler" 28 | 29 | def __init__(self, **kwargs): 30 | r"""Initialize StandardScaler.""" 31 | super(StandardScaler, self).__init__() 32 | self.__std_scaler = StdScaler() 33 | 34 | def fit(self, x, **kwargs): 35 | r"""Fit implemented transformation algorithm. 36 | 37 | Arguments: 38 | x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm. 39 | """ 40 | self.__std_scaler.fit(x) 41 | 42 | def transform(self, x, **kwargs): 43 | r"""Transforms the given x data. 44 | 45 | Arguments: 46 | x (pandas.core.frame.DataFrame): Data to transform. 47 | 48 | Returns: 49 | pandas.core.frame.DataFrame: Transformed data. 50 | """ 51 | 52 | return self.__std_scaler.transform(x) 53 | 54 | def to_string(self): 55 | r"""User friendly representation of the object. 56 | 57 | Returns: 58 | str: User friendly representation of the object. 59 | """ 60 | return FeatureTransformAlgorithm.to_string(self).format( 61 | name=self.Name, 62 | args=self._parameters_to_string(self.__std_scaler.get_params()), 63 | ) 64 | -------------------------------------------------------------------------------- /niaaml/preprocessing/feature_transform/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.utilities import Factory 2 | from niaaml.preprocessing.feature_transform.normalizer import Normalizer 3 | from niaaml.preprocessing.feature_transform.standard_scaler import StandardScaler 4 | from niaaml.preprocessing.feature_transform.max_abs_scaler import MaxAbsScaler 5 | from niaaml.preprocessing.feature_transform.quantile_transformer import ( 6 | QuantileTransformer, 7 | ) 8 | from niaaml.preprocessing.feature_transform.robust_scaler import RobustScaler 9 | 10 | __all__ = ["FeatureTransformAlgorithmFactory"] 11 | 12 | 13 | class FeatureTransformAlgorithmFactory(Factory): 14 | r"""Class with string mappings to feature transform algorithms. 15 | 16 | Attributes: 17 | _entities (Dict[str, FeatureTransformAlgorithm]): Mapping from strings to feature transform algorithms. 18 | """ 19 | 20 | def _set_parameters(self, **kwargs): 21 | r"""Set the parameters/arguments of the factory.""" 22 | self._entities = { 23 | "Normalizer": Normalizer, 24 | "StandardScaler": StandardScaler, 25 | "MaxAbsScaler": MaxAbsScaler, 26 | "QuantileTransformer": QuantileTransformer, 27 | "RobustScaler": RobustScaler, 28 | } 29 | -------------------------------------------------------------------------------- /niaaml/preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.imputation.imputer import Imputer 2 | from niaaml.preprocessing.imputation.simple_imputer import SimpleImputer 3 | from niaaml.preprocessing.imputation.utility import ImputerFactory 4 | from niaaml.preprocessing.imputation.utility import impute_features 5 | 6 | __all__ = ["Imputer", "SimpleImputer", "ImputerFactory", "impute_features"] 7 | -------------------------------------------------------------------------------- /niaaml/preprocessing/imputation/imputer.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Imputer"] 2 | 3 | 4 | class Imputer: 5 | r"""Class for implementing imputers. 6 | 7 | Date: 8 | 2020 9 | 10 | Author: 11 | Luka Pečnik 12 | 13 | License: 14 | MIT 15 | 16 | Attributes: 17 | Name (str): Name of the imputer. 18 | """ 19 | Name = None 20 | 21 | def __init__(self, **kwargs): 22 | r"""Initialize imputer.""" 23 | return None 24 | 25 | def fit(self, feature): 26 | r"""Fit imputer. 27 | 28 | Arguments: 29 | feature (pandas.core.frame.DataFrame): A column from DataFrame of features. 30 | """ 31 | return None 32 | 33 | def transform(self, feature): 34 | r"""Transform feature's values. 35 | 36 | Arguments: 37 | feature (pandas.core.frame.DataFrame): A column from DataFrame of features. 38 | 39 | Returns: 40 | pandas.core.frame.DataFrame: A transformed column. 41 | """ 42 | return None 43 | 44 | def to_string(self): 45 | r"""User friendly representation of the object. 46 | 47 | Returns: 48 | str: User friendly representation of the object. 49 | """ 50 | return "{name}" 51 | -------------------------------------------------------------------------------- /niaaml/preprocessing/imputation/simple_imputer.py: -------------------------------------------------------------------------------- 1 | from sklearn.impute import SimpleImputer as SI 2 | from niaaml.preprocessing.imputation.imputer import Imputer 3 | import numpy as np 4 | import pandas as pd 5 | 6 | __all__ = ["SimpleImputer"] 7 | 8 | 9 | class SimpleImputer(Imputer): 10 | r"""Implementation of simple imputer. 11 | 12 | Date: 13 | 2020 14 | 15 | Author: 16 | Luka Pečnik 17 | 18 | License: 19 | MIT 20 | 21 | Documentation: 22 | https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html 23 | 24 | See Also: 25 | * :class:`niaaml.preprocessing.imputation.Imputer` 26 | """ 27 | Name = "Simple Imputer" 28 | 29 | def __init__(self, **kwargs): 30 | r"""Initialize imputer.""" 31 | self.__simple_imputer = SI(missing_values=np.nan) 32 | 33 | def fit(self, feature): 34 | r"""Fit imputer. 35 | 36 | Arguments: 37 | feature (pandas.core.frame.DataFrame): A column from DataFrame of features. 38 | """ 39 | if not pd.api.types.is_numeric_dtype(feature.iloc[:, 0]): 40 | replacement_val = feature.mode().iloc[0, 0] 41 | self.__simple_imputer.set_params( 42 | **{"fill_value": replacement_val, "strategy": "constant"} 43 | ) 44 | self.__simple_imputer.fit(feature) 45 | else: 46 | self.__simple_imputer.fit(feature) 47 | 48 | def transform(self, feature): 49 | r"""Transform feature's values. 50 | 51 | Arguments: 52 | feature (pandas.core.frame.DataFrame): A column from DataFrame of features. 53 | 54 | Returns: 55 | pandas.core.frame.DataFrame: A transformed column. 56 | """ 57 | return self.__simple_imputer.transform(feature) 58 | 59 | def to_string(self): 60 | r"""User friendly representation of the object. 61 | 62 | Returns: 63 | str: User friendly representation of the object. 64 | """ 65 | return Imputer.to_string(self).format(name=self.Name) 66 | -------------------------------------------------------------------------------- /niaaml/preprocessing/imputation/utility.py: -------------------------------------------------------------------------------- 1 | from niaaml.preprocessing.imputation.simple_imputer import SimpleImputer 2 | from niaaml.utilities import Factory 3 | 4 | __all__ = ["ImputerFactory", "impute_features"] 5 | 6 | 7 | def impute_features(features, imputer): 8 | """Impute features with missing data. 9 | 10 | Arguments: 11 | features (pandas.core.frame.DataFrame): DataFrame of features. 12 | imputer (str): Name of the imputer to use. 13 | 14 | Returns: 15 | Tuple[pandas.core.frame.DataFrame, Dict[Imputer]]: 16 | 1. Converted dataframe. 17 | 2. Dictionary of imputers for all features with missing data. 18 | """ 19 | imp = ImputerFactory().get_result(imputer) 20 | 21 | imputers = {} 22 | cols = [col for col in features.columns if features[col].isnull().any()] 23 | for c in cols: 24 | imp.fit(features[[c]]) 25 | features.loc[:, c] = imp.transform(features[[c]]) 26 | imputers[c] = imp 27 | 28 | return features, imputers if len(imputers) > 0 else None 29 | 30 | 31 | class ImputerFactory(Factory): 32 | r"""Class with string mappings to imputers. 33 | 34 | Attributes: 35 | _entities (Dict[str, Imputer]): Mapping from strings to imputers. 36 | 37 | See Also: 38 | * :class:`niaaml.utilities.Factory` 39 | """ 40 | 41 | def _set_parameters(self, **kwargs): 42 | r"""Set the parameters/arguments of the factory.""" 43 | self._entities = {"SimpleImputer": SimpleImputer} 44 | -------------------------------------------------------------------------------- /niaaml/preprocessing/preprocessing_algorithm.py: -------------------------------------------------------------------------------- 1 | from niaaml.pipeline_component import PipelineComponent 2 | 3 | __all__ = ["PreprocessingAlgorithm"] 4 | 5 | 6 | class PreprocessingAlgorithm(PipelineComponent): 7 | r"""Class for implementing preprocessing algorithms. 8 | 9 | Date: 10 | 2020 11 | 12 | Author: 13 | Luka Pečnik 14 | 15 | License: 16 | MIT 17 | 18 | See Also: 19 | * :class:`niaaml.pipeline_component.PipelineComponent` 20 | """ 21 | -------------------------------------------------------------------------------- /paper/niaamlFlow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/paper/niaamlFlow.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "NiaAML" 3 | version = "2.1.2" 4 | description = "Python automated machine learning framework" 5 | license = "MIT" 6 | authors = ["Luka Pečnik ", "Iztok Fister Jr. ", "Laurenz Farthofer "] 7 | keywords = ['classification', 'NiaPy', 'scikit-learn', 'nature-inspired algorithms', 'feature selection', 'preprocessing'] 8 | homepage = "https://github.com/firefly-cpp/NiaAML" 9 | repository = "https://github.com/firefly-cpp/NiaAML" 10 | documentation= "https://niaaml.readthedocs.io/en/latest/" 11 | readme = "README.md" 12 | include = [ 13 | { path="LICENSE", format="sdist" }, 14 | { path="CHANGELOG.md", format="sdist" }, 15 | { path="CITATION.md", format="sdist" }, 16 | { path="COMPONENTS.md", format="sdist" } 17 | ] 18 | 19 | [tool.poetry.scripts] 20 | niaaml = "niaaml.cli:main" 21 | 22 | [tool.poetry.dependencies] 23 | python = ">=3.9,<3.14" 24 | numpy = "^1.19.1" 25 | scikit-learn = "^1.6.1" 26 | niapy = "^2.5.2" 27 | pandas = "^2.1.1" 28 | typer = "^0.12.3" 29 | loguru = "^0.7.2" 30 | 31 | [tool.poetry.dev-dependencies] 32 | sphinx = "^3.3.1" 33 | sphinx-rtd-theme = "^0.5.0" 34 | coveralls = "^2.2.0" 35 | autoflake = "^1.4" 36 | black = "^21.5b1" 37 | pre-commit = "^2.12.1" 38 | pytest = "^7.4.2" 39 | pytest-cov = "^4.1.0" 40 | 41 | [build-system] 42 | requires = ["poetry-core"] 43 | build-backend = "poetry.core.masonry.api" 44 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_basic_data_reader.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.data import BasicDataReader 3 | import numpy 4 | 5 | 6 | class BasicDataReaderTestCase(TestCase): 7 | def setUp(self): 8 | self.__x = numpy.random.uniform(low=0.0, high=15.0, size=(100, 6)) 9 | self.__y = numpy.random.choice(["Class 1", "Class 2"], size=100) 10 | 11 | def test_x_y_works_fine(self): 12 | data_reader = BasicDataReader(x=self.__x, y=self.__y) 13 | x = data_reader.get_x() 14 | y = data_reader.get_y() 15 | self.assertEqual(x.shape, (100, 6)) 16 | self.assertEqual(y.shape, (100,)) 17 | 18 | self.assertTrue(numpy.all(self.__x == x)) 19 | self.assertTrue(numpy.all(self.__y == y)) 20 | 21 | def test_no_y_works_fine(self): 22 | data_reader = BasicDataReader(x=self.__x) 23 | x = data_reader.get_x() 24 | y = data_reader.get_y() 25 | self.assertEqual(x.shape, (100, 6)) 26 | self.assertIsNone(y) 27 | 28 | self.assertTrue(numpy.all(self.__x == x)) 29 | -------------------------------------------------------------------------------- /tests/test_classifier_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.classifiers import ClassifierFactory, Classifier 3 | 4 | 5 | class ClassifierFactoryTestCase(TestCase): 6 | def setUp(self): 7 | self.__factory = ClassifierFactory() 8 | 9 | def test_get_result_works_fine(self): 10 | for entry in self.__factory._entities: 11 | instance = self.__factory.get_result(entry) 12 | self.assertIsNotNone(instance) 13 | self.assertIsInstance(instance, Classifier) 14 | 15 | with self.assertRaises(TypeError): 16 | self.__factory.get_result("non_existent_name") 17 | 18 | def test_get_dictionary_works_fine(self): 19 | d = self.__factory.get_name_to_classname_mapping() 20 | d_keys = d.keys() 21 | e_keys = self.__factory._entities.keys() 22 | 23 | self.assertEqual(len(e_keys), len(d_keys)) 24 | 25 | for k in d: 26 | self.assertIsNotNone(d[k]) 27 | -------------------------------------------------------------------------------- /tests/test_csv_data_reader.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import numpy as np 4 | from niaaml.data import CSVDataReader 5 | import os 6 | 7 | 8 | class CSVDataReaderTestCase(TestCase): 9 | def test_header_classes_works_fine(self): 10 | data_reader = CSVDataReader( 11 | src=os.path.dirname(os.path.abspath(__file__)) 12 | + "/tests_files/dataset_header_classes.csv", 13 | has_header=True, 14 | contains_classes=True, 15 | ) 16 | x = data_reader.get_x() 17 | y = data_reader.get_y() 18 | self.assertEqual(x.shape, (100, 6)) 19 | self.assertEqual(y.shape, (100,)) 20 | 21 | def test_no_header_classes_works_fine(self): 22 | data_reader = CSVDataReader( 23 | src=os.path.dirname(os.path.abspath(__file__)) 24 | + "/tests_files/dataset_no_header_classes.csv", 25 | has_header=False, 26 | contains_classes=True, 27 | ) 28 | x = data_reader.get_x() 29 | y = data_reader.get_y() 30 | self.assertEqual(x.shape, (100, 6)) 31 | self.assertEqual(y.shape, (100,)) 32 | 33 | def test_no_header_no_classes_works_fine(self): 34 | data_reader = CSVDataReader( 35 | src=os.path.dirname(os.path.abspath(__file__)) 36 | + "/tests_files/dataset_no_header_no_classes.csv", 37 | has_header=False, 38 | contains_classes=False, 39 | ) 40 | x = data_reader.get_x() 41 | y = data_reader.get_y() 42 | self.assertEqual(x.shape, (100, 6)) 43 | self.assertIsNone(y) 44 | 45 | def test_header_no_classes_works_fine(self): 46 | data_reader = CSVDataReader( 47 | src=os.path.dirname(os.path.abspath(__file__)) 48 | + "/tests_files/dataset_header_no_classes.csv", 49 | has_header=True, 50 | contains_classes=False, 51 | ) 52 | x = data_reader.get_x() 53 | y = data_reader.get_y() 54 | self.assertEqual(x.shape, (100, 6)) 55 | self.assertIsNone(y) 56 | 57 | def test_ignore_columns_works_fine(self): 58 | data_reader = CSVDataReader( 59 | src=os.path.dirname(os.path.abspath(__file__)) 60 | + "/tests_files/dataset_real_estate_regression.csv", 61 | has_header=True, 62 | contains_classes=True, 63 | ignore_columns=[0] 64 | ) 65 | x = data_reader.get_x() 66 | y = data_reader.get_y() 67 | self.assertEqual(x.shape, (414, 6)) 68 | self.assertEqual(y.shape, (414,)) 69 | -------------------------------------------------------------------------------- /tests/test_encoder_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.encoding import EncoderFactory, FeatureEncoder 3 | 4 | 5 | class FitnessFactoryTestCase(TestCase): 6 | def setUp(self): 7 | self.__factory = EncoderFactory() 8 | 9 | def test_get_result_works_fine(self): 10 | for entry in self.__factory._entities: 11 | instance = self.__factory.get_result(entry) 12 | self.assertIsNotNone(instance) 13 | self.assertIsInstance(instance, FeatureEncoder) 14 | 15 | with self.assertRaises(TypeError): 16 | self.__factory.get_result("non_existent_name") 17 | 18 | def test_get_dictionary_works_fine(self): 19 | d = self.__factory.get_name_to_classname_mapping() 20 | d_keys = d.keys() 21 | e_keys = self.__factory._entities.keys() 22 | 23 | self.assertEqual(len(e_keys), len(d_keys)) 24 | 25 | for k in d: 26 | self.assertIsNotNone(d[k]) 27 | -------------------------------------------------------------------------------- /tests/test_feature_encoder.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.encoding import OneHotEncoder, encode_categorical_features 3 | from niaaml.data import BasicDataReader 4 | import numpy 5 | 6 | 7 | class FeatureEncoderTestCase(TestCase): 8 | def setUp(self): 9 | x = numpy.concatenate( 10 | ( 11 | numpy.random.uniform(low=0.0, high=15.0, size=(100, 6)), 12 | numpy.array([numpy.random.choice(["a", "b"], size=(100,))]).T, 13 | ), 14 | axis=1, 15 | ) 16 | y = numpy.random.choice(["Class 1", "Class 2"], size=100) 17 | self.__data_reader = BasicDataReader(x=x, y=y) 18 | 19 | def test_one_hot_encoder_works_fine(self): 20 | encoder = OneHotEncoder() 21 | features = self.__data_reader.get_x() 22 | encoder.fit(features[[6]]) 23 | f = encoder.transform(features[[6]]) 24 | 25 | ind = 0 26 | for i in features[6]: 27 | if i == "a": 28 | self.assertTrue(f.loc[ind, 0] == 1.0) 29 | self.assertTrue(f.loc[ind, 1] == 0.0) 30 | else: 31 | self.assertTrue(f.loc[ind, 0] == 0.0) 32 | self.assertTrue(f.loc[ind, 1] == 1.0) 33 | ind += 1 34 | 35 | def test_utility_method_works_fine(self): 36 | features = self.__data_reader.get_x().astype( 37 | { 38 | 0: "float64", 39 | 1: "float64", 40 | 2: "float64", 41 | 3: "float64", 42 | 4: "float64", 43 | 5: "float64", 44 | } 45 | ) 46 | features, encoders = encode_categorical_features(features, "OneHotEncoder") 47 | self.assertEqual(len(encoders), 1) 48 | self.assertEqual(features.shape[1], 8) 49 | -------------------------------------------------------------------------------- /tests/test_feature_selection.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import niaaml.preprocessing.feature_selection as fs 3 | from niaaml.data import CSVDataReader 4 | import os 5 | 6 | 7 | class FeatureSelectionTestCase(TestCase): 8 | def setUp(self): 9 | self.__data = CSVDataReader( 10 | src=os.path.dirname(os.path.abspath(__file__)) 11 | + "/tests_files/dataset_header_classes.csv", 12 | has_header=True, 13 | contains_classes=True, 14 | ) 15 | 16 | def test_PSO_works_fine(self): 17 | algo = fs.ParticleSwarmOptimization() 18 | selected_features_mask = algo.select_features( 19 | self.__data.get_x(), self.__data.get_y() 20 | ) 21 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 22 | 23 | def test_select_k_best_works_fine(self): 24 | algo = fs.SelectKBest() 25 | selected_features_mask = algo.select_features( 26 | self.__data.get_x(), self.__data.get_y() 27 | ) 28 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 29 | 30 | def test_select_percentile_works_fine(self): 31 | algo = fs.SelectPercentile() 32 | selected_features_mask = algo.select_features( 33 | self.__data.get_x(), self.__data.get_y() 34 | ) 35 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 36 | 37 | def test_bat_algorithm_works_fine(self): 38 | algo = fs.BatAlgorithm() 39 | selected_features_mask = algo.select_features( 40 | self.__data.get_x(), self.__data.get_y() 41 | ) 42 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 43 | 44 | def test_de_works_fine(self): 45 | algo = fs.DifferentialEvolution() 46 | selected_features_mask = algo.select_features( 47 | self.__data.get_x(), self.__data.get_y() 48 | ) 49 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 50 | 51 | def test_gwo_works_fine(self): 52 | algo = fs.GreyWolfOptimizer() 53 | selected_features_mask = algo.select_features( 54 | self.__data.get_x(), self.__data.get_y() 55 | ) 56 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 57 | 58 | def test_jdefsth_works_fine(self): 59 | algo = fs.jDEFSTH() 60 | selected_features_mask = algo.select_features( 61 | self.__data.get_x(), self.__data.get_y() 62 | ) 63 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 64 | 65 | def test_vt_works_fine(self): 66 | algo = fs.VarianceThreshold() 67 | selected_features_mask = algo.select_features( 68 | self.__data.get_x(), self.__data.get_y() 69 | ) 70 | self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask)) 71 | -------------------------------------------------------------------------------- /tests/test_feature_selection_algorithm_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.feature_selection import ( 3 | FeatureSelectionAlgorithmFactory, 4 | FeatureSelectionAlgorithm, 5 | ) 6 | 7 | 8 | class FeatureSelectionAlgorithmFactoryTestCase(TestCase): 9 | def setUp(self): 10 | self.__factory = FeatureSelectionAlgorithmFactory() 11 | 12 | def test_get_result_works_fine(self): 13 | for entry in self.__factory._entities: 14 | instance = self.__factory.get_result(entry) 15 | self.assertIsNotNone(instance) 16 | self.assertIsInstance(instance, FeatureSelectionAlgorithm) 17 | 18 | with self.assertRaises(TypeError): 19 | self.__factory.get_result("non_existent_name") 20 | 21 | def test_get_dictionary_works_fine(self): 22 | d = self.__factory.get_name_to_classname_mapping() 23 | d_keys = d.keys() 24 | e_keys = self.__factory._entities.keys() 25 | 26 | self.assertEqual(len(e_keys), len(d_keys)) 27 | 28 | for k in d: 29 | self.assertIsNotNone(d[k]) 30 | -------------------------------------------------------------------------------- /tests/test_feature_transform.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import niaaml.preprocessing.feature_transform as ft 3 | from niaaml.data import CSVDataReader 4 | import os 5 | 6 | 7 | class FeatureTransformTestCase(TestCase): 8 | def setUp(self): 9 | self.__data = CSVDataReader( 10 | src=os.path.dirname(os.path.abspath(__file__)) 11 | + "/tests_files/dataset_header_classes.csv", 12 | has_header=True, 13 | contains_classes=True, 14 | ) 15 | 16 | def test_mas_works_fine(self): 17 | algo = ft.MaxAbsScaler() 18 | algo.fit(self.__data.get_x()) 19 | transformed = algo.transform(self.__data.get_x()) 20 | self.assertEqual(transformed.shape, self.__data.get_x().shape) 21 | 22 | def test_norm_works_fine(self): 23 | algo = ft.Normalizer() 24 | algo.fit(self.__data.get_x()) 25 | transformed = algo.transform(self.__data.get_x()) 26 | self.assertEqual(transformed.shape, self.__data.get_x().shape) 27 | 28 | def test_qt_works_fine(self): 29 | algo = ft.QuantileTransformer(n_quantiles=25) 30 | algo.fit(self.__data.get_x()) 31 | transformed = algo.transform(self.__data.get_x()) 32 | self.assertEqual(transformed.shape, self.__data.get_x().shape) 33 | 34 | def test_rs_works_fine(self): 35 | algo = ft.RobustScaler() 36 | algo.fit(self.__data.get_x()) 37 | transformed = algo.transform(self.__data.get_x()) 38 | self.assertEqual(transformed.shape, self.__data.get_x().shape) 39 | 40 | def test_ss_works_fine(self): 41 | algo = ft.StandardScaler() 42 | algo.fit(self.__data.get_x()) 43 | transformed = algo.transform(self.__data.get_x()) 44 | self.assertEqual(transformed.shape, self.__data.get_x().shape) 45 | -------------------------------------------------------------------------------- /tests/test_feature_transform_algorithm_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.feature_transform import ( 3 | FeatureTransformAlgorithmFactory, 4 | FeatureTransformAlgorithm, 5 | ) 6 | 7 | 8 | class FeatureTransformAlgorithmFactoryTestCase(TestCase): 9 | def setUp(self): 10 | self.__factory = FeatureTransformAlgorithmFactory() 11 | 12 | def test_get_result_works_fine(self): 13 | for entry in self.__factory._entities: 14 | instance = self.__factory.get_result(entry) 15 | self.assertIsNotNone(instance) 16 | self.assertIsInstance(instance, FeatureTransformAlgorithm) 17 | 18 | with self.assertRaises(TypeError): 19 | self.__factory.get_result("non_existent_name") 20 | 21 | def test_get_dictionary_works_fine(self): 22 | d = self.__factory.get_name_to_classname_mapping() 23 | d_keys = d.keys() 24 | e_keys = self.__factory._entities.keys() 25 | 26 | self.assertEqual(len(e_keys), len(d_keys)) 27 | 28 | for k in d: 29 | self.assertIsNotNone(d[k]) 30 | -------------------------------------------------------------------------------- /tests/test_fitness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from unittest import TestCase 3 | import niaaml.fitness as f 4 | 5 | 6 | class FitnessTestCase(TestCase): 7 | def setUp(self): 8 | self.__y = np.array( 9 | [ 10 | "Class 1", 11 | "Class 1", 12 | "Class 1", 13 | "Class 2", 14 | "Class 1", 15 | "Class 2", 16 | "Class 2", 17 | "Class 2", 18 | "Class 2", 19 | "Class 1", 20 | "Class 1", 21 | "Class 2", 22 | "Class 1", 23 | "Class 2", 24 | "Class 1", 25 | "Class 1", 26 | "Class 1", 27 | "Class 1", 28 | "Class 2", 29 | "Class 1", 30 | ] 31 | ) 32 | self.__predicted = np.array( 33 | [ 34 | "Class 1", 35 | "Class 1", 36 | "Class 1", 37 | "Class 2", 38 | "Class 2", 39 | "Class 2", 40 | "Class 1", 41 | "Class 1", 42 | "Class 1", 43 | "Class 2", 44 | "Class 1", 45 | "Class 1", 46 | "Class 2", 47 | "Class 2", 48 | "Class 1", 49 | "Class 2", 50 | "Class 1", 51 | "Class 2", 52 | "Class 2", 53 | "Class 2", 54 | ] 55 | ) 56 | 57 | def test_accuracy_works_fine(self): 58 | ff = f.Accuracy() 59 | val = ff.get_fitness(self.__predicted, self.__y) 60 | self.assertEqual(val, 0.5) 61 | 62 | def test_precision_works_fine(self): 63 | ff = f.Precision() 64 | val = ff.get_fitness(self.__predicted, self.__y) 65 | self.assertEqual(val, 0.5199999999999999) 66 | 67 | def test_cohen_kappa_works_fine(self): 68 | ff = f.CohenKappa() 69 | val = ff.get_fitness(self.__predicted, self.__y) 70 | self.assertEqual(val, 0.0) 71 | 72 | def test_f1_works_fine(self): 73 | ff = f.F1() 74 | val = ff.get_fitness(self.__predicted, self.__y) 75 | self.assertEqual(val, 0.505050505050505) 76 | 77 | 78 | class RegressionFitnessTestCase(TestCase): 79 | def setUp(self): 80 | self.__y = np.array([3, -0.5, 2, 7]) 81 | self.__predicted = np.array([2.5, 0.0, 2, 8]) 82 | 83 | def test_r2_works_fine(self): 84 | ff = f.R2() 85 | val = ff.get_fitness(self.__predicted, self.__y) 86 | self.assertEqual(val, 0.9486081370449679) 87 | -------------------------------------------------------------------------------- /tests/test_fitness_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.fitness import FitnessFactory, FitnessFunction 3 | 4 | 5 | class FitnessFactoryTestCase(TestCase): 6 | def setUp(self): 7 | self.__factory = FitnessFactory() 8 | 9 | def test_get_result_works_fine(self): 10 | for entry in self.__factory._entities: 11 | instance = self.__factory.get_result(entry) 12 | self.assertIsNotNone(instance) 13 | self.assertIsInstance(instance, FitnessFunction) 14 | 15 | with self.assertRaises(TypeError): 16 | self.__factory.get_result("non_existent_name") 17 | 18 | def test_get_dictionary_works_fine(self): 19 | d = self.__factory.get_name_to_classname_mapping() 20 | d_keys = d.keys() 21 | e_keys = self.__factory._entities.keys() 22 | 23 | self.assertEqual(len(e_keys), len(d_keys)) 24 | 25 | for k in d: 26 | self.assertIsNotNone(d[k]) 27 | -------------------------------------------------------------------------------- /tests/test_imputer.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.imputation import SimpleImputer, impute_features 3 | from niaaml.data import BasicDataReader 4 | import numpy 5 | import pandas 6 | 7 | 8 | class ImputerTestCase(TestCase): 9 | def setUp(self): 10 | x = numpy.concatenate( 11 | ( 12 | numpy.random.uniform(low=0.0, high=15.0, size=(100, 6)), 13 | numpy.array([numpy.random.choice(["a", "b"], size=(100,))]).T, 14 | ), 15 | axis=1, 16 | ) 17 | x[50, 6] = numpy.nan 18 | x[30, 2] = numpy.nan 19 | y = numpy.random.choice(["Class 1", "Class 2"], size=100) 20 | self.__data_reader = BasicDataReader(x=x, y=y) 21 | 22 | def test_simple_imputer_works_fine(self): 23 | features = self.__data_reader.get_x() 24 | imputer1 = SimpleImputer() 25 | imputer1.fit(features[[2]]) 26 | f = pandas.DataFrame(imputer1.transform(features[[2]])) 27 | self.assertFalse(f[0].isnull().any()) 28 | 29 | imputer2 = SimpleImputer() 30 | imputer2.fit(features[[6]]) 31 | f = pandas.DataFrame(imputer2.transform(features[[6]])) 32 | self.assertFalse(f[0].isnull().any()) 33 | 34 | def test_utility_method_works_fine(self): 35 | features = self.__data_reader.get_x().astype( 36 | { 37 | 0: "float64", 38 | 1: "float64", 39 | 2: "float64", 40 | 3: "float64", 41 | 4: "float64", 42 | 5: "float64", 43 | } 44 | ) 45 | features.iloc[50, 6] = numpy.nan 46 | features, imputers = impute_features(features, "SimpleImputer") 47 | self.assertEqual(len(imputers), 2) 48 | self.assertEqual(features.shape[1], 7) 49 | -------------------------------------------------------------------------------- /tests/test_imputer_factory.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml.preprocessing.imputation import Imputer, ImputerFactory 3 | 4 | 5 | class ImputerFactoryTestCase(TestCase): 6 | def setUp(self): 7 | self.__factory = ImputerFactory() 8 | 9 | def test_get_result_works_fine(self): 10 | for entry in self.__factory._entities: 11 | instance = self.__factory.get_result(entry) 12 | self.assertIsNotNone(instance) 13 | self.assertIsInstance(instance, Imputer) 14 | 15 | with self.assertRaises(TypeError): 16 | self.__factory.get_result("non_existent_name") 17 | 18 | def test_get_dictionary_works_fine(self): 19 | d = self.__factory.get_name_to_classname_mapping() 20 | d_keys = d.keys() 21 | e_keys = self.__factory._entities.keys() 22 | 23 | self.assertEqual(len(e_keys), len(d_keys)) 24 | 25 | for k in d: 26 | self.assertIsNotNone(d[k]) 27 | -------------------------------------------------------------------------------- /tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from niaaml import ParameterDefinition, MinMax, OptimizationStats, get_bin_index 3 | import numpy as np 4 | import tempfile 5 | 6 | 7 | class UtilitiesTestCase(TestCase): 8 | def test_get_bin_index_works_fine(self): 9 | self.assertEqual(get_bin_index(0.0, 4), 0) 10 | self.assertEqual(get_bin_index(0.24, 4), 0) 11 | self.assertEqual(get_bin_index(0.25, 4), 1) 12 | self.assertEqual(get_bin_index(0.49, 4), 1) 13 | self.assertEqual(get_bin_index(0.5, 4), 2) 14 | self.assertEqual(get_bin_index(0.74, 4), 2) 15 | self.assertEqual(get_bin_index(0.75, 4), 3) 16 | self.assertEqual(get_bin_index(1.0, 4), 3) 17 | 18 | 19 | class ParameterDefinitionTestCase(TestCase): 20 | def test_works_fine(self): 21 | parameter_definition = ParameterDefinition(MinMax(0.0, 5.9), float) 22 | 23 | self.assertIsInstance(parameter_definition.value, MinMax) 24 | self.assertEqual(parameter_definition.param_type, float) 25 | 26 | 27 | class OptimizationStatsTestCase(TestCase): 28 | def setUp(self): 29 | y = np.array( 30 | [ 31 | "Class 1", 32 | "Class 1", 33 | "Class 1", 34 | "Class 2", 35 | "Class 1", 36 | "Class 2", 37 | "Class 2", 38 | "Class 2", 39 | "Class 2", 40 | "Class 1", 41 | "Class 1", 42 | "Class 2", 43 | "Class 1", 44 | "Class 2", 45 | "Class 1", 46 | "Class 1", 47 | "Class 1", 48 | "Class 1", 49 | "Class 2", 50 | "Class 1", 51 | ] 52 | ) 53 | predicted = np.array( 54 | [ 55 | "Class 1", 56 | "Class 1", 57 | "Class 1", 58 | "Class 2", 59 | "Class 2", 60 | "Class 2", 61 | "Class 1", 62 | "Class 1", 63 | "Class 1", 64 | "Class 2", 65 | "Class 1", 66 | "Class 1", 67 | "Class 2", 68 | "Class 2", 69 | "Class 1", 70 | "Class 2", 71 | "Class 1", 72 | "Class 2", 73 | "Class 2", 74 | "Class 2", 75 | ] 76 | ) 77 | 78 | self.__stats = OptimizationStats(predicted, y) 79 | 80 | def test_works_fine(self): 81 | self.assertEqual(self.__stats._accuracy, 0.5) 82 | self.assertEqual(self.__stats._precision, 0.5199999999999999) 83 | self.assertEqual(self.__stats._cohen_kappa, 0.0) 84 | self.assertEqual(self.__stats._f1_score, 0.505050505050505) 85 | 86 | 87 | class MinMaxTestCase(TestCase): 88 | def test_works_fine(self): 89 | minmax = MinMax(0.0, 5.9) 90 | 91 | self.assertEqual(minmax.min, 0.0) 92 | self.assertEqual(minmax.max, 5.9) 93 | --------------------------------------------------------------------------------