├── .all-contributorsrc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── ✅ task.md
    │   ├── 🐛 bug report.md
    │   └── 🚀 feature request.md
    ├── images
    │   ├── niaaml.png
    │   ├── niaaml_cli_help.png
    │   ├── niaaml_cli_infer_help.png
    │   ├── niaaml_cli_optimize_example.png
    │   └── niaaml_logo.png
    ├── pull_request_template.md
    └── workflows
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── CHANGELOG.md
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── COMPONENTS.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── about.rst
    ├── api
    │   ├── classifiers.rst
    │   ├── data.rst
    │   ├── fitness.rst
    │   ├── index.rst
    │   ├── niaaml.rst
    │   └── preprocessing.rst
    ├── changelog.rst
    ├── code_of_conduct.rst
    ├── conf.py
    ├── contributing.rst
    ├── documentation.rst
    ├── getting_started.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── paper
    │   └── 10.21105.joss.02949.pdf
    ├── requirements.txt
    └── testing.rst
├── examples
    ├── classifier.py
    ├── example_files
    │   ├── dataset.csv
    │   ├── dataset_categorical.csv
    │   ├── dataset_categorical_missing.csv
    │   └── pipeline.ppln
    ├── export_pipeline_object.py
    ├── export_pipeline_text.py
    ├── factories.py
    ├── feature_encoding.py
    ├── feature_imputing.py
    ├── feature_selection.py
    ├── feature_selection_threshold_algorithms.py
    ├── feature_transform.py
    ├── fitness.py
    ├── load_data_basic.py
    ├── load_data_csv.py
    ├── load_pipeline_object_file.py
    ├── logger.py
    ├── optimization_stats.py
    ├── optimize_run_pipeline.py
    ├── optimize_run_pipeline_categorical_features.py
    ├── optimize_run_pipeline_logger.py
    ├── optimize_run_pipeline_missing_values.py
    ├── run_all.bat
    ├── run_all.sh
    ├── run_pipeline_optimizer_array_data.py
    ├── run_pipeline_optimizer_csv_data.py
    ├── run_pipeline_optimizer_csv_data_categorical.py
    ├── run_pipeline_optimizer_csv_data_missing.py
    └── run_pipeline_optimizer_csv_data_v1.py
├── niaaml
    ├── __init__.py
    ├── classifiers
    │   ├── __init__.py
    │   ├── ada_boost.py
    │   ├── bagging.py
    │   ├── classifier.py
    │   ├── decision_tree.py
    │   ├── extremely_randomized_trees.py
    │   ├── gaussian_naive_bayes.py
    │   ├── gaussian_process.py
    │   ├── k_neighbors.py
    │   ├── linear_svc.py
    │   ├── multi_layer_perceptron.py
    │   ├── quadratic_driscriminant_analysis.py
    │   ├── random_forest.py
    │   ├── regression_decision_tree.py
    │   ├── regression_gaussian_process.py
    │   ├── regression_lasso.py
    │   ├── regression_linear_model.py
    │   ├── regression_ridge.py
    │   └── utility.py
    ├── cli.py
    ├── data
    │   ├── __init__.py
    │   ├── basic_data_reader.py
    │   ├── csv_data_reader.py
    │   └── data_reader.py
    ├── fitness
    │   ├── __init__.py
    │   ├── accuracy.py
    │   ├── cohen_kappa.py
    │   ├── f1.py
    │   ├── fitness_function.py
    │   ├── mse.py
    │   ├── precision.py
    │   ├── r2.py
    │   └── utility.py
    ├── logger.py
    ├── pipeline.py
    ├── pipeline_component.py
    ├── pipeline_optimizer.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── encoding
    │   │   ├── __init__.py
    │   │   ├── feature_encoder.py
    │   │   ├── one_hot_encoder.py
    │   │   └── utility.py
    │   ├── feature_selection
    │   │   ├── __init__.py
    │   │   ├── _feature_selection_threshold_problem.py
    │   │   ├── bat_algorithm.py
    │   │   ├── differential_evolution.py
    │   │   ├── feature_selection_algorithm.py
    │   │   ├── grey_wolf_optimizer.py
    │   │   ├── jDEFSTH.py
    │   │   ├── particle_swarm_optimization.py
    │   │   ├── select_k_best.py
    │   │   ├── select_percentile.py
    │   │   ├── select_univariate_regression.py
    │   │   ├── utility.py
    │   │   └── variance_threshold.py
    │   ├── feature_transform
    │   │   ├── __init__.py
    │   │   ├── feature_transform_algorithm.py
    │   │   ├── max_abs_scaler.py
    │   │   ├── normalizer.py
    │   │   ├── quantile_transformer.py
    │   │   ├── robust_scaler.py
    │   │   ├── standard_scaler.py
    │   │   └── utility.py
    │   ├── imputation
    │   │   ├── __init__.py
    │   │   ├── imputer.py
    │   │   ├── simple_imputer.py
    │   │   └── utility.py
    │   └── preprocessing_algorithm.py
    └── utilities.py
├── paper
    ├── niaamlFlow.png
    ├── paper.bib
    └── paper.md
├── poetry.lock
├── pyproject.toml
└── tests
    ├── __init__.py
    ├── test_basic_data_reader.py
    ├── test_classifier_factory.py
    ├── test_classifiers.py
    ├── test_csv_data_reader.py
    ├── test_encoder_factory.py
    ├── test_feature_encoder.py
    ├── test_feature_selection.py
    ├── test_feature_selection_algorithm_factory.py
    ├── test_feature_transform.py
    ├── test_feature_transform_algorithm_factory.py
    ├── test_fitness.py
    ├── test_fitness_factory.py
    ├── test_imputer.py
    ├── test_imputer_factory.py
    ├── test_pipeline.py
    ├── test_pipeline_optimizer.py
    ├── test_utilities.py
    └── tests_files
        ├── dataset_header_classes.csv
        ├── dataset_header_classes_cat_miss.csv
        ├── dataset_header_no_classes.csv
        ├── dataset_no_header_classes.csv
        ├── dataset_no_header_no_classes.csv
        └── dataset_real_estate_regression.csv


/.all-contributorsrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": [
 3 |     "README.md"
 4 |   ],
 5 |   "imageSize": 100,
 6 |   "commit": false,
 7 |   "contributors": [
 8 |     {
 9 |       "login": "lukapecnik",
10 |       "name": "Luka Pečnik",
11 |       "avatar_url": "https://avatars0.githubusercontent.com/u/23029992?s=460&u=d1c802fd8c82af0a020b1e21af80a34d6e28fb10&v=4",
12 |       "profile": "https://github.com/lukapecnik",
13 |       "contributions": [
14 |         "code",
15 |         "doc",
16 |         "review",
17 |         "bug",
18 |         "example",
19 |         "test",
20 |         "infra"
21 |       ]
22 |     },
23 |     {
24 |       "login": "firefly-cpp",
25 |       "name": "firefly-cpp",
26 |       "avatar_url": "https://avatars2.githubusercontent.com/u/1633361?v=4",
27 |       "profile": "https://github.com/firefly-cpp",
28 |       "contributions": [
29 |         "code",
30 |         "bug",
31 |         "mentoring",
32 |         "research",
33 |         "ideas"
34 |       ]
35 |     },
36 |     {
37 |       "login": "sisco0",
38 |       "name": "sisco0",
39 |       "avatar_url": "https://avatars0.githubusercontent.com/u/25695302?v=4",
40 |       "profile": "https://github.com/sisco0",
41 |       "contributions": [
42 |         "ideas"
43 |       ]
44 |     },
45 |     {
46 |       "login": "zStupan",
47 |       "name": "zStupan",
48 |       "avatar_url": "https://avatars.githubusercontent.com/u/48752988?v=4",
49 |       "profile": "https://github.com/zStupan",
50 |       "contributions": [
51 |         "code"
52 |       ]
53 |     },
54 |     {
55 |       "login": "musicinmybrain",
56 |       "name": "Ben Beasley",
57 |       "avatar_url": "https://avatars.githubusercontent.com/u/6898909?v=4",
58 |       "profile": "https://github.com/musicinmybrain",
59 |       "contributions": [
60 |         "code",
61 |         "infra"
62 |       ]
63 |     }
64 |   ],
65 |   "skipCi": true,
66 |   "contributorsPerLine": 7,
67 |   "projectName": "NiaAML",
68 |   "projectOwner": "lukapecnik",
69 |   "repoType": "github",
70 |   "repoHost": "https://github.com"
71 | }
72 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/✅ task.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "✅ Task"
 3 | about: "If something needs to be done."
 4 | title: '[TASK] '
 5 | labels: 'task'
 6 | 
 7 | ---
 8 | 
 9 | ## ✅ Task
10 | 
11 | Add a description of the task.
12 | 
13 | ### 📃 Checklist
14 | 
15 | - [ ] sub-task1 description
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/🐛 bug report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "🐛 Bug Report"
 3 | about: "If something isn't working as expected."
 4 | title: '[BUG] '
 5 | labels: 'bug'
 6 | 
 7 | ---
 8 | 
 9 | ## 🐛 Bug Report
10 | 
11 | > 🚮 delete any section that is not helpful/required for your report (including this message)
12 | 
13 | ### 🔥 Current Behavior
14 | 
15 | A clear and concise description of the behavior.
16 | 
17 | ### ✨ Expected Behavior
18 | 
19 | A clear and concise description of what you expected to happen (or code).
20 | 
21 | ### 💻 Reproducing the Bug
22 | 
23 | Input Code, REPL or Repo link
24 | 
25 | #### 📄🖼️ Context and Screenshots
26 | 
27 | Add any other context about the problem here. If applicable, add screenshots to help explain.
28 | 
29 | #### 🐋 Environment
30 | 
31 | Add information about your environment if you think the bug is specific to your setup.
32 | 
33 | ### 🤔 Possible Solution
34 | 
35 | Only if you have suggestions on a fix for the bug.
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/🚀 feature request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "🚀 Feature Request"
 3 | about: "If you want to add/implement/request a new feature."
 4 | title: '[FEATURE] '
 5 | labels: 'enhancement'
 6 | 
 7 | ---
 8 | 
 9 | ## 🚀 Feature Request
10 | 
11 | > 🚮 feel free to delete any section that is not helpful/required for your report (including this message)
12 | 
13 | ### 😞 Problem Statement
14 | 
15 | A clear and concise description of what the problem is. Ex. I have an issue when [...]
16 | 
17 | ### 💬 Feature Description
18 | 
19 | A clear and concise description of what you want to happen. Add any considered drawbacks.
20 | 
21 | If you can, explain how users will be able to use this and possibly write out a version the docs.
22 | Maybe a screenshot or design?
23 | 
24 | ### 🤔 Alternatives
25 | 
26 | A clear and concise description of any alternative solutions or features you've considered.
27 | 
28 | ### ✅ Implementation Checklist
29 | 
30 | - [ ] ...
31 | 


--------------------------------------------------------------------------------
/.github/images/niaaml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml.png


--------------------------------------------------------------------------------
/.github/images/niaaml_cli_help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_help.png


--------------------------------------------------------------------------------
/.github/images/niaaml_cli_infer_help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_infer_help.png


--------------------------------------------------------------------------------
/.github/images/niaaml_cli_optimize_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_cli_optimize_example.png


--------------------------------------------------------------------------------
/.github/images/niaaml_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/.github/images/niaaml_logo.png


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | > 🙇💖 Thank you for contributing to NiaAML!
 2 | 
 3 | > 🚮 feel free to delete any section that is not helpful/required for your report (including this message)
 4 | 
 5 | ## 💬 Description
 6 | 
 7 | Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change.
 8 | 
 9 | ## ❗ Issue Links
10 | Fixes #<issue_number>
11 | 
12 | ## 🧪 How Has This Been Tested?
13 | 
14 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
15 | 
16 | # ✅ Checklist
17 | 
18 | - [ ] My code follows the style guidelines of this project
19 | - [ ] I have performed a self-review of my code
20 | - [ ] I have commented my code, particularly in hard-to-understand areas
21 | - [ ] I have made corresponding changes to the documentation
22 | - [ ] I have added tests that prove my fix is effective or that my feature works
23 | - [ ] New and existing unit tests pass locally with my changes
24 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: NiaAML
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: [ubuntu-latest, windows-latest, macos-latest]
16 |         python-version: ['3.10', '3.11', '3.12']
17 |     defaults:
18 |       run:
19 |         shell: bash
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Get full Python version
28 |       id: full-python-version
29 |       run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
30 |     - name: Install poetry
31 |       run: |
32 |         curl -sL https://install.python-poetry.org | python - -y
33 |     - name: Update path
34 |       if: ${{ matrix.os != 'windows-latest' }}
35 |       run: echo "$HOME/.local/bin" >> $GITHUB_PATH
36 |     - name: Update Windows path
37 |       if: ${{ matrix.os == 'windows-latest' }}
38 |       run: echo "$APPDATA\Python\Scripts" >> $GITHUB_PATH
39 |     - name: Configure poetry
40 |       run: poetry config virtualenvs.in-project true
41 |     - name: Set up cache
42 |       uses: actions/cache@v3
43 |       id: cache
44 |       with:
45 |         path: .venv
46 |         key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
47 |     - name: Ensure cache is healthy
48 |       if: steps.cache.outputs.cache-hit == 'true'
49 |       run: timeout 10s poetry run pip --version || rm -rf .venv
50 |     - name: Install dependencies
51 |       run: poetry install
52 |     - name: Run tests
53 |       run: poetry run pytest
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | .vscode


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |     - repo: local
 3 |       hooks:
 4 |         - id: black
 5 |           name: black
 6 |           entry: black .
 7 |           language: system
 8 |           always_run: true
 9 |           pass_filenames: false
10 |         - id: autoflake
11 |           name: autoflake
12 |           entry: autoflake --recursive .
13 |           language: system
14 |           always_run: true
15 |           pass_filenames: false
16 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | requirements_file: docs/requirements.txt
2 | build:
3 |   image: latest
4 | python:
5 |   version: 3.8


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: 🤚 if you use this software, please cite it using these metadata.
 3 | title: >-
 4 |   NiaAML: AutoML for classification and regression pipelines
 5 | abstract: |
 6 |   In this paper we present NiaAML, an AutoML framework that we have developed for creating machine learning pipelines and hyperparameter tuning. The composition of machine learning pipelines is presented as an optimization problem that can be solved using various stochastic, population-based, nature-inspired algorithms. Nature-inspired algorithms are powerful tools for solving real-world optimization problems, especially those that are highly complex, nonlinear, and involve large search spaces where traditional algorithms may struggle. They are applied widely in various fields, including robotics, operations research, and bioinformatics. This paper provides a comprehensive overview of the software architecture, and describes the main tasks of NiaAML, including the automatic composition of classification and regression pipelines. The overview is supported by an practical illustrative example.
 7 | type: journalArticle
 8 | license: All rights reserved
 9 | copyright: All rights reserved
10 | database: ScienceDirect
11 | issn: 2352-7110
12 | journal: SoftwareX
13 | pages: 101974
14 | volume: 29
15 | url: https://www.sciencedirect.com/science/article/pii/S2352711024003443
16 | keywords: 
17 |   - AutoML
18 |   - Classification
19 |   - Nature-inspired algorithms
20 |   - Optimization
21 | authors: 
22 |   - family-names: Fister
23 |     given-names: Iztok
24 |   - family-names: Farthofer
25 |     given-names: Laurenz A.
26 |   - family-names: Pečnik
27 |     given-names: Luka
28 |   - family-names: Fister
29 |     given-names: Iztok
30 |   - family-names: Holzinger
31 |     given-names: Andreas
32 | editors: 
33 |   - family-names: Fister
34 |     given-names: Iztok
35 |   - family-names: Farthofer
36 |     given-names: Laurenz A.
37 |   - family-names: Pečnik
38 |     given-names: Luka
39 |   - family-names: Fister
40 |     given-names: Iztok
41 |   - family-names: Holzinger
42 |     given-names: Andreas
43 | recipients: 
44 |   - family-names: Fister
45 |     given-names: Iztok
46 |   - family-names: Farthofer
47 |     given-names: Laurenz A.
48 |   - family-names: Pečnik
49 |     given-names: Luka
50 |   - family-names: Fister
51 |     given-names: Iztok
52 |   - family-names: Holzinger
53 |     given-names: Andreas
54 | translators: 
55 |   - family-names: Fister
56 |     given-names: Iztok
57 |   - family-names: Farthofer
58 |     given-names: Laurenz A.
59 |   - family-names: Pečnik
60 |     given-names: Luka
61 |   - family-names: Fister
62 |     given-names: Iztok
63 |   - family-names: Holzinger
64 |     given-names: Andreas
65 | date-published: 2025-02-01
66 | identifiers: 
67 |   - type: doi
68 |     value: 10.1016/j.softx.2024.101974
69 | 


--------------------------------------------------------------------------------
/COMPONENTS.md:
--------------------------------------------------------------------------------
 1 | ### Classifiers
 2 | 
 3 | * Adaptive Boosting (AdaBoost),
 4 | * Bagging (Bagging),
 5 | * Extremely Randomized Trees (ExtremelyRandomizedTrees),
 6 | * Linear SVC (LinearSVC),
 7 | * Multi Layer Perceptron (MultiLayerPerceptron),
 8 | * Random Forest Classifier (RandomForest),
 9 | * Decision Tree Classifier (DecisionTree),
10 | * K-Neighbors Classifier (KNeighbors),
11 | * Gaussian Process Classifier (GaussianProcess),
12 | * Gaussian Naive Bayes (GaussianNB),
13 | * Quadratic Discriminant Analysis (QuadraticDiscriminantAnalysis).
14 | 
15 | ### Feature Selection Algorithms
16 | 
17 | * Select K Best (SelectKBest),
18 | * Select Percentile (SelectPercentile),
19 | * Variance Threshold (VarianceThreshold).
20 | 
21 | #### Nature-Inspired based
22 | 
23 | * Bat Algorithm (BatAlgorithm),
24 | * Differential Evolution (DifferentialEvolution),
25 | * Self-Adaptive Differential Evolution (jDEFSTH),
26 | * Grey Wolf Optimizer (GreyWolfOptimizer),
27 | * Particle Swarm Optimization (ParticleSwarmOptimization).
28 | 
29 | ### Feature Transformation Algorithms
30 | 
31 | * Normalizer (Normalizer),
32 | * Standard Scaler (StandardScaler),
33 | * Maximum Absolute Scaler (MaxAbsScaler),
34 | * Quantile Transformer (QuantileTransformer),
35 | * Robust Scaler (RobustScaler).
36 | 
37 | ### Fitness Functions based on
38 | 
39 | * Accuracy (Accuracy),
40 | * Cohen's kappa (CohenKappa),
41 | * F1-Score (F1),
42 | * Precision (Precision).
43 | 
44 | ### Categorical Feature Encoders
45 | 
46 | * One-Hot Encoder (OneHotEncoder).
47 | 
48 | ### Feature Imputers
49 | 
50 | * Simple Imputer (SimpleImputer).


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to NiaAML
 2 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
 3 | 
 4 | ## Code of Conduct
 5 | This project and everyone participating in it is governed by the [NiaAML Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [lukapecnik96@gmail.com](mailto:iztok.fister1@um.si).
 6 | 
 7 | ## How Can I Contribute?
 8 | 
 9 | ### Reporting Bugs
10 | Before creating bug reports, please check existing issues list as you might find out that you don't need to create one. When you are creating a bug report, please include as many details as possible in the issue using the [🐛 bug report issue template](https://github.com/firefly-cpp/NiaAML/blob/master/.github/ISSUE_TEMPLATE/%F0%9F%90%9B%20bug%20report.md).
11 | 
12 | ### Suggesting Enhancements
13 | 
14 | Open new issue using the [🚀 feature request template](https://github.com/firefly-cpp/NiaAML/blob/master/.github/ISSUE_TEMPLATE/%F0%9F%9A%80%20feature%20request.md).
15 | 
16 | ### Pull requests
17 | 
18 | Fill in the [pull request template](.github/pull_request_template.md) and make sure your code is documented.
19 | 
20 | ## Setup development environment
21 | 
22 | ### Requirements
23 | 
24 | * Poetry: [https://python-poetry.org/docs/](https://python-poetry.org/docs/)
25 | 
26 | After installing Poetry and cloning the project from GitHub, you should run the following command from the root of the cloned project:
27 | 
28 | ```sh
29 | poetry install
30 | ```
31 | 
32 | All of the project's dependencies should be installed and the project ready for further development. **Note that Poetry creates a separate virtual environment for your project.**
33 | 
34 | ### Development dependencies
35 | 
36 | List of NiaAML's dependencies:
37 | 
38 | | Package       | Version | Platform |
39 | |---------------|---------|----------|
40 | | numpy         | ^1.19.1 | All      |
41 | | scikit-learn  | ^1.1.2  | All      |
42 | | niapy         | ^2.0.5  | All      |
43 | | pandas        | ^2.1.1  | All      |
44 | 
45 | List of development dependencies:
46 | 
47 | | Package           | Version | Platform |
48 | |-------------------|---------|----------|
49 | | sphinx            | ^3.3.1  | Any      |
50 | | sphinx-rtd-theme  | ^0.5.0  | Any      |
51 | | coveralls         | ^2.2.0  | Any      |
52 | | autoflake         | ^1.4    | Any      |
53 | | black             | ^21.5b1 | Any      |
54 | | pre-commit        | ^2.12.1 | Any      |
55 | | pytest            | ^7.4.2  | Any      |
56 | | pytest-cov        | ^4.1.0  | Any      |
57 | 
58 | ## Development Tasks
59 | 
60 | ### Testing
61 | 
62 | Manually run the tests:
63 | 
64 | ```sh
65 | $ poetry run coverage run --source=niaaml -m unittest discover -b
66 | ```
67 | 
68 | ### Documentation
69 | 
70 | Build the documentation:
71 | 
72 | ```sh
73 | $ poetry run sphinx-build ./docs ./docs/_build
74 | ```
75 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-2025 Luka Pečnik et al.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/about.rst:
--------------------------------------------------------------------------------
 1 | About
 2 | =====
 3 | 
 4 | NiaAML is an automated machine learning Python framework based on nature-inspired algorithms for optimization. The name comes from the `automated machine learning method of the same name <https://link.springer.com/chapter/10.1007%2F978-981-15-2133-1_13>`_. Its goal is to efficiently compose the best possible classification pipeline for the given task using components on the input. The components are divided into three groups: feature seletion algorithms, feature transformation algorithms and classifiers. The framework uses nature-inspired algorithms for optimization to choose the best set of components for the classification pipeline on the output and optimize their parameters. We use NiaPy framework for the optimization process which is a popular Python collection of nature-inspired algorithms. The NiaAML framework is easy to use and customize or expand to suit your needs.
 5 | 
 6 | The NiaAML framework allows you not only to run full pipeline optimization, but also separate implemented components such as classifiers, feature selection algorithms, etc. It supports numerical and categorical features.
 7 | 
 8 | Licence
 9 | -------
10 | This package is distributed under the `MIT License <http://www.opensource.org/licenses/MIT>`_.
11 | 
12 | Disclaimer
13 | ----------
14 | This framework is provided as-is, and there are no guarantees that it fits your purposes
15 | or that it is bug-free. Use it at your own risk!


--------------------------------------------------------------------------------
/docs/api/classifiers.rst:
--------------------------------------------------------------------------------
1 | :mod:`niaaml.classifiers`
2 | =========================
3 | 
4 | .. automodule:: niaaml.classifiers
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/api/data.rst:
--------------------------------------------------------------------------------
1 | :mod:`niaaml.data`
2 | =========================
3 | 
4 | .. automodule:: niaaml.data
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/api/fitness.rst:
--------------------------------------------------------------------------------
1 | :mod:`niaaml.fitness`
2 | =========================
3 | 
4 | .. automodule:: niaaml.fitness
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | This is the NiaAML API documentation, auto generated from the source code.
 5 | 
 6 | .. toctree::
 7 | 
 8 |     niaaml
 9 |     data
10 |     classifiers
11 |     preprocessing
12 |     fitness


--------------------------------------------------------------------------------
/docs/api/niaaml.rst:
--------------------------------------------------------------------------------
1 | :mod:`niaaml`
2 | =============
3 | 
4 | .. automodule:: niaaml
5 |     :noindex:
6 |     :members:
7 |     :undoc-members:
8 |     :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/api/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | :mod:`niaaml.preprocessing`
 2 | ===========================
 3 | 
 4 | .. automodule:: niaaml.preprocessing
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | :mod:`niaaml.preprocessing.feature_selection`
10 | ---------------------------------------------
11 | 
12 | .. automodule:: niaaml.preprocessing.feature_selection
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | :mod:`niaaml.preprocessing.feature_transform`
18 | ---------------------------------------------
19 | 
20 | .. automodule:: niaaml.preprocessing.feature_transform
21 |     :members:
22 |     :undoc-members:
23 |     :show-inheritance:
24 | 
25 | :mod:`niaaml.preprocessing.encoding`
26 | ---------------------------------------------
27 | 
28 | .. automodule:: niaaml.preprocessing.encoding
29 |     :members:
30 |     :undoc-members:
31 |     :show-inheritance:
32 | 
33 | :mod:`niaaml.preprocessing.imputation`
34 | ---------------------------------------------
35 | 
36 | .. automodule:: niaaml.preprocessing.imputation
37 |     :members:
38 |     :undoc-members:
39 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import sphinx_rtd_theme
 2 | 
 3 | # Configuration file for the Sphinx documentation builder.
 4 | #
 5 | # This file only contains a selection of the most common options. For a full
 6 | # list see the documentation:
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 8 | 
 9 | # -- Path setup --------------------------------------------------------------
10 | 
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | import os
16 | import sys
17 | 
18 | sys.path.insert(0, os.path.abspath("../"))
19 | 
20 | 
21 | # -- Project information -----------------------------------------------------
22 | 
23 | project = "NiaAML"
24 | copyright = "2020-2025, Luka Pečnik et al."
25 | author = "Luka Pečnik et al."
26 | 
27 | # The full version, including alpha/beta/rc tags
28 | release = "2.1.2"
29 | 
30 | 
31 | # -- General configuration ---------------------------------------------------
32 | 
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme"]
37 | 
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ["_templates"]
40 | 
41 | # List of patterns, relative to source directory, that match files and
42 | # directories to ignore when looking for source files.
43 | # This pattern also affects html_static_path and html_extra_path.
44 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
45 | 
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = "sphinx_rtd_theme"
53 | 
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | # html_static_path = ["_static"]
58 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing to NiaAML
 2 | ======================
 3 | 
 4 | First off, thanks for taking the time to contribute!
 5 | 
 6 | Code of Conduct
 7 | ---------------
 8 | 
 9 | This project and everyone participating in it is governed by the :doc:`/code_of_conduct`. By participating, you are
10 | expected to uphold this code. Please report unacceptable behavior to
11 | lukapecnik96@gmail.com.
12 | 
13 | How Can I Contribute?
14 | ---------------------
15 | 
16 | Reporting Bugs
17 | ~~~~~~~~~~~~~~
18 | 
19 | Before creating bug reports, please check existing issues list as you
20 | might find out that you don't need to create one. When you are creating
21 | a bug report, please include as many details as possible in the issue template.
22 | 
23 | Suggesting Enhancements
24 | ~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | Open new issue using the feature request template.
27 | 
28 | Pull requests
29 | ~~~~~~~~~~~~~
30 | 
31 | Fill in the pull request template and make sure
32 | your code is documented.
33 | 


--------------------------------------------------------------------------------
/docs/documentation.rst:
--------------------------------------------------------------------------------
 1 | Documentation
 2 | =============
 3 | 
 4 | To locally generate and preview documentation run the following command in the project root folder:
 5 | 
 6 | .. code:: sh
 7 | 
 8 |     $ poetry run sphinx-build ./docs ./docs/_build
 9 | 
10 | If the build of the documentation is successful, you can preview the documentation in the docs/_build folder by clicking the ``index.html`` file.


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | NiaAML's documentation!
 2 | ==================================
 3 | 
 4 | .. automodule:: niaaml
 5 | 
 6 | NiaAML is an automated machine learning Python framework based on nature-inspired algorithms for optimization. The name comes from the automated machine learning method of the same name [1]. Its goal is to efficiently compose the best possible classification pipeline for the given task using components on the input. The components are divided into three groups: feature seletion algorithms, feature transformation algorithms and classifiers. The framework uses nature-inspired algorithms for optimization to choose the best set of components for the classification pipeline on the output and optimize their parameters. We use `NiaPy framework <https://github.com/NiaOrg/NiaPy>`_  for the optimization process which is a popular Python collection of nature-inspired algorithms. The NiaAML framework is easy to use and customize or expand to suit your needs.
 7 | 
 8 | * **Free software:** MIT license
 9 | * **Github repository:** https://github.com/lukapecnik/NiaAML
10 | * **Python versions:** 3.11.x, 3.12.x
11 | 
12 | The main documentation is organized into a couple of sections:
13 | 
14 | * :ref:`user-docs`
15 | * :ref:`dev-docs`
16 | * :ref:`about-docs`
17 | 
18 | .. _user-docs:
19 | 
20 | .. toctree::
21 |    :maxdepth: 3
22 |    :caption: User Documentation
23 | 
24 |    getting_started
25 | 
26 | .. _dev-docs:
27 | 
28 | .. toctree::
29 |    :maxdepth: 2
30 |    :caption: Developer Documentation
31 | 
32 |    changelog
33 |    installation
34 |    testing
35 |    documentation
36 |    api/index
37 | 
38 | .. _about-docs:
39 | 
40 | .. toctree::
41 |    :maxdepth: 3
42 |    :caption: About
43 | 
44 |    about
45 |    contributing
46 |    code_of_conduct
47 | 
48 | References
49 | ----------
50 | 
51 | [1] Iztok Fister Jr., Milan Zorman, Dušan Fister, Iztok Fister. Continuous optimizers for automatic design and evaluation of classification pipelines. In: Frontier applications of nature inspired computation. Springer tracts in nature-inspired computing, pp.281-301, 2020.


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Setup development environment
 5 | -----------------------------
 6 | 
 7 | Requirements
 8 | ~~~~~~~~~~~~
 9 | 
10 | -  Poetry: https://python-poetry.org/docs/
11 | 
12 | After installing Poetry and cloning the project from GitHub, you should
13 | run the following command from the root of the cloned project:
14 | 
15 | .. code:: sh
16 | 
17 |     $ poetry install
18 | 
19 | All of the project's dependencies should be installed and the project
20 | ready for further development. **Note that Poetry creates a separate
21 | virtual environment for your project.**
22 | 
23 | Development dependencies
24 | ~~~~~~~~~~~~~~~~~~~~~~~~
25 | 
26 | List of NiaAML's dependencies:
27 | 
28 | +----------------+--------------+------------+
29 | | Package        | Version      | Platform   |
30 | +================+==============+============+
31 | | numpy          | ^1.19.1      | All        |
32 | +----------------+--------------+------------+
33 | | scikit-learn   | ^1.1.2       | All        |
34 | +----------------+--------------+------------+
35 | | niapy          | ^2.0.5       | All        |
36 | +----------------+--------------+------------+
37 | | pandas         | ^2.1.1       | All        |
38 | +----------------+--------------+------------+
39 | 
40 | List of development dependencies:
41 | 
42 | +--------------------+-----------+------------+
43 | | Package            | Version   | Platform   |
44 | +====================+===========+============+
45 | | sphinx             | ^3.3.1    | Any        |
46 | +--------------------+-----------+------------+
47 | | sphinx-rtd-theme   | ^0.5.0    | Any        |
48 | +--------------------+-----------+------------+
49 | | coveralls          | ^2.2.0    | Any        |
50 | +--------------------+-----------+------------+
51 | | autoflake          | ^1.4      | Any        |
52 | +--------------------+-----------+------------+
53 | | black              | ^21.5b1   | Any        |
54 | +--------------------+-----------+------------+
55 | | pre-commit         | ^2.12.1   | Any        |
56 | +--------------------+-----------+------------+
57 | | pytest             | ^7.4.2    | Any        |
58 | +--------------------+-----------+------------+
59 | | pytest-cov         | ^4.1.0    | Any        |
60 | +--------------------+-----------+------------+


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/paper/10.21105.joss.02949.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/docs/paper/10.21105.joss.02949.pdf


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/docs/requirements.txt


--------------------------------------------------------------------------------
/docs/testing.rst:
--------------------------------------------------------------------------------
 1 | Testing
 2 | =======
 3 | 
 4 | Before making a pull request, if possible provide tests for added features or bug fixes.
 5 | 
 6 | We have an automated building system which also runs all of provided tests. In case any of the test cases fails, we are notified about failing tests. Those should be fixed before we merge your pull request to master branch.
 7 | 
 8 | For the purpose of checking if all test are passing localy you can run following command:
 9 | 
10 | .. code:: sh
11 | 
12 |     $ poetry run coverage run --source=niaaml -m unittest discover -b
13 | 
14 | If all tests passed running this command it is most likely that the tests would pass on our build system too.


--------------------------------------------------------------------------------
/examples/classifier.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers import AdaBoost
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | import numpy
 5 | 
 6 | """
 7 | In this example, we show how to individually use an implemented classifier and its methods. In this case we use AdaBoost for demonstration, but
 8 | you can use any of the implemented classifiers in the same way.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # instantiate AdaBoost classifier
19 | classifier = AdaBoost()
20 | 
21 | # set parameters of the classifier
22 | classifier.set_parameters(n_estimators=50, algorithm="SAMME")
23 | 
24 | # fit classifier to the data
25 | classifier.fit(data_reader.get_x(), data_reader.get_y())
26 | 
27 | # predict classes of the dummy input
28 | predicted = classifier.predict(
29 |     numpy.random.uniform(low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1]))
30 | )
31 | 
32 | # print classifier in a user-friendly form
33 | print(classifier.to_string())
34 | 


--------------------------------------------------------------------------------
/examples/example_files/pipeline.ppln:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/examples/example_files/pipeline.ppln


--------------------------------------------------------------------------------
/examples/export_pipeline_object.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import AdaBoost
 3 | from niaaml.preprocessing.feature_selection import SelectKBest
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | 
 6 | """
 7 | This example presents how to export a pipeline object into a file that can later be loaded back into a Python program as a Pipeline object.
 8 | """
 9 | 
10 | # instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as a feature transformation algorithm
11 | pipeline = Pipeline(
12 |     feature_selection_algorithm=SelectKBest(),
13 |     feature_transform_algorithm=Normalizer(),
14 |     classifier=AdaBoost(),
15 | )
16 | 
17 | # export the object to a file for later use
18 | pipeline.export("exported_pipeline.ppln")
19 | 


--------------------------------------------------------------------------------
/examples/export_pipeline_text.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import AdaBoost
 3 | from niaaml.preprocessing.feature_selection import SelectKBest
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | 
 6 | """
 7 | This example presents how to export a pipeline object into a text file in a user-friendly form. A text file cannot be loaded back into a Python program in
 8 | the form of a Pipeline object.
 9 | """
10 | 
11 | # instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as feature transformation algorithm
12 | pipeline = Pipeline(
13 |     feature_selection_algorithm=SelectKBest(),
14 |     feature_transform_algorithm=Normalizer(),
15 |     classifier=AdaBoost(),
16 | )
17 | 
18 | # export the object to a file in a user-friendly form
19 | pipeline.export_text("exported_pipeline.txt")
20 | 


--------------------------------------------------------------------------------
/examples/factories.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers import ClassifierFactory
 2 | from niaaml.preprocessing.feature_selection import FeatureSelectionAlgorithmFactory
 3 | from niaaml.preprocessing.feature_transform import FeatureTransformAlgorithmFactory
 4 | from niaaml.fitness import FitnessFactory
 5 | from niaaml.preprocessing.encoding import EncoderFactory
 6 | from niaaml.preprocessing.imputation import ImputerFactory
 7 | 
 8 | """
 9 | This example presents how to use all of the implemented factories to create new object instances using their class names. You may also
10 | import and instantiate objects directly, but it is more convenient to use factories in some cases.
11 | """
12 | 
13 | # instantiate all possible factories
14 | classifier_factory = ClassifierFactory()
15 | fsa_factory = FeatureSelectionAlgorithmFactory()
16 | fta_factory = FeatureTransformAlgorithmFactory()
17 | f_factory = FitnessFactory()
18 | e_factory = EncoderFactory()
19 | i_factory = ImputerFactory()
20 | 
21 | # get an instance of the MultiLayerPerceptron class
22 | mlp = classifier_factory.get_result("MultiLayerPerceptron")
23 | 
24 | # get an instance of the ParticleSwarmOptimization class
25 | pso = fsa_factory.get_result("ParticleSwarmOptimization")
26 | 
27 | # get an instance of the Normalizer class
28 | normalizer = fta_factory.get_result("Normalizer")
29 | 
30 | # get an instance of the Precision class
31 | precision = f_factory.get_result("Precision")
32 | 
33 | # get an instance of the OneHotEncoder class
34 | ohe = e_factory.get_result("OneHotEncoder")
35 | 
36 | # get an instance of the SimpleImputer class
37 | imp = i_factory.get_result("SimpleImputer")
38 | 
39 | # variables mlp, pso, normalizer, precision, ohe and imp contain instances of the classes with the passed names
40 | 


--------------------------------------------------------------------------------
/examples/feature_encoding.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.encoding import OneHotEncoder, encode_categorical_features
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to use an implemented categorical feature encoder and its methods individually. In this case, we use OneHotEncoder for demonstration, but
 7 | you can use any of the implemented encoders in the same way.
 8 | """
 9 | 
10 | # prepare data reader using csv file
11 | data_reader = CSVDataReader(
12 |     src=os.path.dirname(os.path.abspath(__file__))
13 |     + "/example_files/dataset_categorical.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # instantiate OneHotEncoder
19 | ohe = OneHotEncoder()
20 | 
21 | # fit, transform and print to output the categorical feature in the dataset (index 6)
22 | features = data_reader.get_x()
23 | ohe.fit(features[[6]])
24 | f = ohe.transform(features[[6]])
25 | print(f)
26 | 
27 | # if you need to get an array of encoders for all of the categorical features in a dataset (and transformed DataFrame of features), you may use the utility method encode_categorical_features
28 | transformed_features, encoders = encode_categorical_features(features, "OneHotEncoder")
29 | 


--------------------------------------------------------------------------------
/examples/feature_imputing.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.imputation import SimpleImputer, impute_features
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to use an implemented missing features' imputer and its methods individually. In this case, we use SimpleImputer for demonstration, but
 7 | you can use any of the implemented imputers in the same way.
 8 | """
 9 | 
10 | # prepare data reader using csv file
11 | data_reader = CSVDataReader(
12 |     src=os.path.dirname(os.path.abspath(__file__))
13 |     + "/example_files/dataset_categorical_missing.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # instantiate SimpleImputer
19 | si = SimpleImputer()
20 | 
21 | # fit, transform and print to output the feature in the dataset (index 6)
22 | features = data_reader.get_x()
23 | si.fit(features[[6]])
24 | f = si.transform(features[[6]])
25 | print(f)
26 | 
27 | # if you wish to get array of imputers for all of the features with missing values in a dataset (and transformed DataFrame of features), you may use the utility method impute_features
28 | transformed_features, imputers = impute_features(features, "SimpleImputer")
29 | 


--------------------------------------------------------------------------------
/examples/feature_selection.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_selection import SelectKBest
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | from sklearn.feature_selection import chi2
 5 | 
 6 | """
 7 | This example presents how to use an implemented feature selection algorithm and its methods individually. In this case, we use SelectKBest for demonstration, but
 8 | you can use any of the implemented feature selection algorithms in the same way.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # instantiate SelectKBest feature selection algorithms
19 | fs = SelectKBest()
20 | 
21 | # set parameters of the object
22 | fs.set_parameters(k=4, score_func=chi2)
23 | 
24 | # select best features according to the SelectKBest algorithm (returns boolean mask of the selected features - True if selected, False if not)
25 | features_mask = fs.select_features(data_reader.get_x(), data_reader.get_y())
26 | 
27 | # print feature selection algorithm in a user-friendly form
28 | print(fs.to_string())
29 | 


--------------------------------------------------------------------------------
/examples/feature_selection_threshold_algorithms.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_selection import ParticleSwarmOptimization
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to use implemented feature selection algorithms that use threshold mechanism.
 7 | """
 8 | 
 9 | # prepare data reader using csv file
10 | data_reader = CSVDataReader(
11 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
12 |     has_header=False,
13 |     contains_classes=True,
14 | )
15 | 
16 | # instantiate feature selection algorithm
17 | fs = ParticleSwarmOptimization()
18 | # BatAlgorithm, DifferentialEvolution, GreyWolfOptimizer and jDEFSTH also use threshold mechanism
19 | 
20 | # set parameters of the instantiated algorithm
21 | fs.set_parameters(C1=1.5, C2=2.0)
22 | 
23 | # select best features according to the ParticleSwarmOptimization algorithm (returns boolean mask of the selected features - True if selected, False if not)
24 | features_mask = fs.select_features(data_reader.get_x(), data_reader.get_y())
25 | 
26 | # print feature selection algorithm in a user-friendly form
27 | print(fs.to_string())
28 | 


--------------------------------------------------------------------------------
/examples/feature_transform.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_transform import Normalizer
 2 | import os
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to individually use an implemented feature transform algorithm and its methods individually. In this case, we use Normalizer for demonstration, but
 7 | you can use any of the implemented feature transform algorithms in the same way.
 8 | """
 9 | 
10 | # prepare data reader using csv file
11 | data_reader = CSVDataReader(
12 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
13 |     has_header=False,
14 |     contains_classes=True,
15 | )
16 | 
17 | # instantiate Normalizer
18 | ft = Normalizer()
19 | 
20 | # set parameters of the Normalizer
21 | ft.set_parameters(norm="l2")
22 | 
23 | # fit the algorithm to the input data
24 | ft.fit(data_reader.get_x())
25 | 
26 | # transform features
27 | transformed_features = ft.transform(data_reader.get_x())
28 | 
29 | # print feature transform algorithm in a user-friendly form
30 | print(ft.to_string())
31 | 


--------------------------------------------------------------------------------
/examples/fitness.py:
--------------------------------------------------------------------------------
 1 | from niaaml.fitness import Precision
 2 | from niaaml.data import CSVDataReader
 3 | import os
 4 | import numpy
 5 | 
 6 | """
 7 | This example presents how to use an implemented fitness function and its method individually. In this case, we use Precision for demonstration, but
 8 | you can use any of the implemented fitness functions in the same way.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # lets say the following array contains predictions after the classification process
19 | predictions = numpy.random.choice(
20 |     ["Class 1", "Class 2"], size=data_reader.get_y().shape
21 | )
22 | 
23 | # instantiate instance of a fitness function (Precision in this case)
24 | fitness_func = Precision()
25 | 
26 | # calculate fitness value
27 | precision = fitness_func.get_fitness(predictions, data_reader.get_y())
28 | 
29 | # precision will probably be low due to dummy data
30 | print(precision)
31 | 


--------------------------------------------------------------------------------
/examples/load_data_basic.py:
--------------------------------------------------------------------------------
 1 | from niaaml.data import BasicDataReader
 2 | import numpy
 3 | 
 4 | """
 5 | This example presents how to instantiate BasicDataReader and use its methods. You can use it to contain data in a single variable
 6 | or as an input to an instance of the PipelineOptimizer class.
 7 | """
 8 | 
 9 | # BasicDataReader instance uses arrays on the input (x and y arrays)
10 | data_reader = BasicDataReader(
11 |     x=numpy.random.uniform(low=0.0, high=15.0, size=(50, 3)),
12 |     y=numpy.random.choice(["Class 1", "Class 2"], size=50),
13 | )
14 | 
15 | # get x and y arrays and print them
16 | print(data_reader.get_x())
17 | print(data_reader.get_y())
18 | 


--------------------------------------------------------------------------------
/examples/load_data_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml.data import CSVDataReader
 3 | 
 4 | """
 5 | This example presents how to instantiate CSVDataReader and use its methods. You can use it to contain data in a single variable,
 6 | or as an input to an instance of the PipelineOptimizer class.
 7 | """
 8 | 
 9 | # CSVDataReader gets a path to csv file on the input, reads and parses it into the x and y arrays
10 | # has_header and contains_classes arguments needs to be set according to the input csv file's structure
11 | data_reader = CSVDataReader(
12 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
13 |     has_header=False,
14 |     contains_classes=True,
15 | )
16 | 
17 | # get x and y arrays and print them
18 | print(data_reader.get_x())
19 | print(data_reader.get_y())
20 | 


--------------------------------------------------------------------------------
/examples/load_pipeline_object_file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml import Pipeline
 3 | 
 4 | """
 5 | This example presents how to load a saved Pipeline object from a file. You can use all of its methods after it has been loaded successfully.
 6 | """
 7 | 
 8 | # load Pipeline object from a file
 9 | pipeline = Pipeline.load(
10 |     os.path.dirname(os.path.abspath(__file__)) + "/example_files/pipeline.ppln"
11 | )
12 | 
13 | # all of the Pipeline's classes methods can be called after a successful load
14 | 


--------------------------------------------------------------------------------
/examples/logger.py:
--------------------------------------------------------------------------------
 1 | from niaaml.logger import Logger
 2 | 
 3 | """
 4 | This example presents how to use Logger class individually.
 5 | """
 6 | 
 7 | # instatiate instance with verbose mode
 8 | logger = Logger(verbose=True)
 9 | 
10 | # in verbose mode, all of the call functions should show their output
11 | logger.log_progress("progress")
12 | logger.log_pipeline("pipeline")
13 | logger.log_optimization_error("optimization error")
14 | 
15 | print("-------------------------")
16 | 
17 | # in this case only log_progress function's call is going to show the output
18 | logger = Logger()
19 | logger.log_progress("progress")
20 | logger.log_pipeline("pipeline")
21 | logger.log_optimization_error("optimization error")
22 | 
23 | print("-------------------------")
24 | 
25 | # you may also output logs to some log file
26 | logger = Logger(verbose=True, output_file="log_output")
27 | logger.log_progress("progress")
28 | logger.log_pipeline("pipeline")
29 | logger.log_optimization_error("optimization error")
30 | 


--------------------------------------------------------------------------------
/examples/optimization_stats.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import OptimizationStats
 2 | import numpy as np
 3 | 
 4 | """
 5 | This example presents how the OptimizationStats class can be used. Normally, it is used in the background when the Pipeline's optimize method is called.
 6 | """
 7 | 
 8 | # dummy array with expected results of the classification process
 9 | y = np.array(
10 |     [
11 |         "Class 1",
12 |         "Class 1",
13 |         "Class 1",
14 |         "Class 2",
15 |         "Class 1",
16 |         "Class 2",
17 |         "Class 2",
18 |         "Class 2",
19 |         "Class 2",
20 |         "Class 1",
21 |         "Class 1",
22 |         "Class 2",
23 |         "Class 1",
24 |         "Class 2",
25 |         "Class 1",
26 |         "Class 1",
27 |         "Class 1",
28 |         "Class 1",
29 |         "Class 2",
30 |         "Class 1",
31 |     ]
32 | )
33 | 
34 | # dummy array with predicted classes
35 | predicted = np.array(
36 |     [
37 |         "Class 1",
38 |         "Class 1",
39 |         "Class 1",
40 |         "Class 2",
41 |         "Class 2",
42 |         "Class 2",
43 |         "Class 1",
44 |         "Class 1",
45 |         "Class 1",
46 |         "Class 2",
47 |         "Class 1",
48 |         "Class 1",
49 |         "Class 2",
50 |         "Class 2",
51 |         "Class 1",
52 |         "Class 2",
53 |         "Class 1",
54 |         "Class 2",
55 |         "Class 2",
56 |         "Class 2",
57 |     ]
58 | )
59 | 
60 | # instantiate OptimizationStats
61 | stats = OptimizationStats(predicted, y)
62 | 
63 | # print user-friendly text representation
64 | print(stats.to_string())
65 | 


--------------------------------------------------------------------------------
/examples/optimize_run_pipeline.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import MultiLayerPerceptron
 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | from niaaml.data import CSVDataReader
 6 | import os
 7 | import numpy
 8 | import pandas
 9 | 
10 | """
11 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
12 | """
13 | 
14 | # prepare data reader using csv file
15 | data_reader = CSVDataReader(
16 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
17 |     has_header=False,
18 |     contains_classes=True,
19 | )
20 | 
21 | # instantiate a Pipeline object
22 | pipeline = Pipeline(
23 |     feature_selection_algorithm=VarianceThreshold(),
24 |     feature_transform_algorithm=Normalizer(),
25 |     classifier=MultiLayerPerceptron(),
26 | )
27 | 
28 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
29 | pipeline.optimize(
30 |     data_reader.get_x(),
31 |     data_reader.get_y(),
32 |     10,
33 |     50,
34 |     "ParticleSwarmAlgorithm",
35 |     "Accuracy",
36 | )
37 | 
38 | # run the pipeline using dummy data
39 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
40 | predicted = pipeline.run(
41 |     pandas.DataFrame(
42 |         numpy.random.uniform(
43 |             low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1])
44 |         )
45 |     )
46 | )
47 | 
48 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
49 | 


--------------------------------------------------------------------------------
/examples/optimize_run_pipeline_categorical_features.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import MultiLayerPerceptron
 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | from niaaml.data import CSVDataReader
 6 | from niaaml.preprocessing.encoding import encode_categorical_features
 7 | import os
 8 | import numpy
 9 | import pandas
10 | 
11 | """
12 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
13 | We use a dataset that contains categorical and numerical features.
14 | """
15 | 
16 | # prepare data reader using csv file
17 | data_reader = CSVDataReader(
18 |     src=os.path.dirname(os.path.abspath(__file__))
19 |     + "/example_files/dataset_categorical.csv",
20 |     has_header=False,
21 |     contains_classes=True,
22 | )
23 | 
24 | # we use the utility method encode_categorical_features to get encoders for the categorical features, but you may instantiate and fit
25 | # feature encoders separately and pass them as an array (as long as they are implemented as this framework suggests)
26 | # there should be as many encoders as categorical features
27 | # this example uses One-Hot Encoding
28 | _, encoders = encode_categorical_features(data_reader.get_x(), "OneHotEncoder")
29 | 
30 | # instantiate a Pipeline object
31 | pipeline = Pipeline(
32 |     feature_selection_algorithm=VarianceThreshold(),
33 |     feature_transform_algorithm=Normalizer(),
34 |     classifier=MultiLayerPerceptron(),
35 |     categorical_features_encoders=encoders,
36 | )
37 | 
38 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
39 | pipeline.optimize(
40 |     data_reader.get_x(),
41 |     data_reader.get_y(),
42 |     10,
43 |     50,
44 |     "ParticleSwarmAlgorithm",
45 |     "Accuracy",
46 | )
47 | 
48 | # run the pipeline using dummy data
49 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
50 | predicted = pipeline.run(
51 |     pandas.DataFrame(
52 |         [
53 |             [
54 |                 10.32440339,
55 |                 3.195964543,
56 |                 1.215275549,
57 |                 3.741461311,
58 |                 11.6736581,
59 |                 6.435247906,
60 |                 "a",
61 |             ]
62 |         ]
63 |     )
64 | )
65 | 
66 | # pipeline variable contains a Pipeline object that can be used for further classification, exported as an object (that can later be loaded and used) or exported as a text file
67 | 


--------------------------------------------------------------------------------
/examples/optimize_run_pipeline_logger.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import MultiLayerPerceptron
 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | from niaaml.data import CSVDataReader
 6 | from niaaml.logger import Logger
 7 | import os
 8 | import numpy
 9 | import pandas
10 | 
11 | """
12 | This example presents how to use the Pipeline class with logging individually. You may use this if you want to test out a specific classification pipeline.
13 | """
14 | 
15 | # prepare data reader using csv file
16 | data_reader = CSVDataReader(
17 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
18 |     has_header=False,
19 |     contains_classes=True,
20 | )
21 | 
22 | # prepare Logger instance
23 | # verbose=True means more information, output_file is the log's file name
24 | # if output_file is None, there is no file created
25 | logger = Logger(verbose=True, output_file="output.log")
26 | 
27 | # instantiate a Pipeline object
28 | pipeline = Pipeline(
29 |     feature_selection_algorithm=VarianceThreshold(),
30 |     feature_transform_algorithm=Normalizer(),
31 |     classifier=MultiLayerPerceptron(),
32 |     logger=logger,
33 | )
34 | 
35 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
36 | pipeline.optimize(
37 |     data_reader.get_x(),
38 |     data_reader.get_y(),
39 |     10,
40 |     50,
41 |     "ParticleSwarmAlgorithm",
42 |     "Accuracy",
43 | )
44 | 
45 | # run the pipeline using dummy data
46 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
47 | predicted = pipeline.run(
48 |     pandas.DataFrame(
49 |         numpy.random.uniform(
50 |             low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1])
51 |         )
52 |     )
53 | )
54 | 
55 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
56 | 


--------------------------------------------------------------------------------
/examples/optimize_run_pipeline_missing_values.py:
--------------------------------------------------------------------------------
 1 | from niaaml import Pipeline
 2 | from niaaml.classifiers import MultiLayerPerceptron
 3 | from niaaml.preprocessing.feature_selection import VarianceThreshold
 4 | from niaaml.preprocessing.feature_transform import Normalizer
 5 | from niaaml.data import CSVDataReader
 6 | from niaaml.preprocessing.encoding import encode_categorical_features
 7 | from niaaml.preprocessing.imputation import impute_features
 8 | import os
 9 | import numpy
10 | import pandas
11 | 
12 | """
13 | This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
14 | We use a dataset that contains categorical and numerical features with missing values.
15 | """
16 | 
17 | # prepare data reader using csv file
18 | data_reader = CSVDataReader(
19 |     src=os.path.dirname(os.path.abspath(__file__))
20 |     + "/example_files/dataset_categorical_missing.csv",
21 |     has_header=False,
22 |     contains_classes=True,
23 | )
24 | 
25 | features = data_reader.get_x()
26 | 
27 | # we use the utility method impute_features to get imputers for the features with missing values, but you may instantiate and fit
28 | # imputers separately and pass them as a dictionary (as long as they are implemented as this framework suggests), with keys as column names or indices (if there is no header in the csv)
29 | # there should be as many imputers as the features with missing values
30 | # this example uses Simple Imputer
31 | features, imputers = impute_features(features, "SimpleImputer")
32 | 
33 | # exactly the same goes for encoders
34 | _, encoders = encode_categorical_features(features, "OneHotEncoder")
35 | 
36 | # instantiate a Pipeline object
37 | pipeline = Pipeline(
38 |     feature_selection_algorithm=VarianceThreshold(),
39 |     feature_transform_algorithm=Normalizer(),
40 |     classifier=MultiLayerPerceptron(),
41 |     categorical_features_encoders=encoders,
42 |     imputers=imputers,
43 | )
44 | 
45 | # run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
46 | pipeline.optimize(
47 |     data_reader.get_x(),
48 |     data_reader.get_y(),
49 |     10,
50 |     50,
51 |     "ParticleSwarmAlgorithm",
52 |     "Accuracy",
53 | )
54 | 
55 | # run the pipeline using dummy data
56 | # you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
57 | predicted = pipeline.run(
58 |     pandas.DataFrame(
59 |         [
60 |             [
61 |                 10.32440339,
62 |                 3.195964543,
63 |                 1.215275549,
64 |                 3.741461311,
65 |                 11.6736581,
66 |                 6.435247906,
67 |                 "a",
68 |             ]
69 |         ]
70 |     )
71 | )
72 | 
73 | # pipeline variable contains a Pipeline object that can be used for further classification, exported as an object (that can later be loaded and used) or exported as text file
74 | 


--------------------------------------------------------------------------------
/examples/run_all.bat:
--------------------------------------------------------------------------------
1 | for %%i in (%cd%\*.py) do python3 %%i


--------------------------------------------------------------------------------
/examples/run_all.sh:
--------------------------------------------------------------------------------
1 | for file in ./*.py
2 | do
3 |   python3 "$file"
4 | done


--------------------------------------------------------------------------------
/examples/run_pipeline_optimizer_array_data.py:
--------------------------------------------------------------------------------
 1 | from niaaml import PipelineOptimizer
 2 | from niaaml.data import BasicDataReader
 3 | import numpy
 4 | 
 5 | """
 6 | This example presents how to use the PipelineOptimizer class. This example is using an instance of BasicDataReader.
 7 | The instantiated PipelineOptimizer try to compose the best pipeline with the components that are specified in its constructor.
 8 | """
 9 | 
10 | # prepare data reader using features and classes from arrays
11 | # in this case random dummy arrays are generated
12 | data_reader = BasicDataReader(
13 |     x=numpy.random.uniform(low=0.0, high=15.0, size=(50, 3)),
14 |     y=numpy.random.choice(["Class 1", "Class 2"], size=50),
15 | )
16 | 
17 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
18 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
19 | # if log_output_file is not provided there is no file created
20 | # if log is False, logging is turned off
21 | pipeline_optimizer = PipelineOptimizer(
22 |     data=data_reader,
23 |     classifiers=[
24 |         "AdaBoost",
25 |         "Bagging",
26 |         "MultiLayerPerceptron",
27 |         "RandomForest",
28 |         "ExtremelyRandomizedTrees",
29 |         "LinearSVC",
30 |     ],
31 |     feature_selection_algorithms=[
32 |         "SelectKBest",
33 |         "SelectPercentile",
34 |         "ParticleSwarmOptimization",
35 |         "VarianceThreshold",
36 |     ],
37 |     feature_transform_algorithms=["Normalizer", "StandardScaler"],
38 |     log=True,
39 |     log_verbose=True,
40 |     log_output_file="output.log",
41 | )
42 | 
43 | # runs the optimization process
44 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest
45 | # returns the best found pipeline
46 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm
47 | pipeline = pipeline_optimizer.run(
48 |     "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm"
49 | )
50 | 
51 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
52 | 


--------------------------------------------------------------------------------
/examples/run_pipeline_optimizer_csv_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml import PipelineOptimizer
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader.
 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
 8 | """
 9 | 
10 | # prepare data reader using csv file
11 | data_reader = CSVDataReader(
12 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
13 |     has_header=False,
14 |     contains_classes=True,
15 | )
16 | 
17 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
18 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
19 | # if log_output_file is not provided there is no file created
20 | # if log is False, logging is turned off
21 | pipeline_optimizer = PipelineOptimizer(
22 |     data=data_reader,
23 |     classifiers=[
24 |         "AdaBoost",
25 |         "Bagging",
26 |         "MultiLayerPerceptron",
27 |         "RandomForest",
28 |         "ExtremelyRandomizedTrees",
29 |         "LinearSVC",
30 |     ],
31 |     feature_selection_algorithms=[
32 |         "SelectKBest",
33 |         "SelectPercentile",
34 |         "ParticleSwarmOptimization",
35 |         "VarianceThreshold",
36 |     ],
37 |     feature_transform_algorithms=["Normalizer", "StandardScaler"],
38 |     log=True,
39 |     log_verbose=True,
40 |     log_output_file="output.log",
41 | )
42 | 
43 | # runs the optimization process
44 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest
45 | # returns the best found pipeline
46 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm
47 | pipeline = pipeline_optimizer.run(
48 |     "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm"
49 | )
50 | 
51 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
52 | 


--------------------------------------------------------------------------------
/examples/run_pipeline_optimizer_csv_data_categorical.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml import PipelineOptimizer
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader.
 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
 8 | We use a dataset with 1 categorical feature to demonstrate a use of PipelineOptimizer instance with automatic feature encoding.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__))
14 |     + "/example_files/dataset_categorical.csv",
15 |     has_header=False,
16 |     contains_classes=True,
17 | )
18 | 
19 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
20 | # OneHotEncoder is used for encoding categorical features in this example
21 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
22 | # if log_output_file is not provided there is no file created
23 | # if log is False, logging is turned off
24 | pipeline_optimizer = PipelineOptimizer(
25 |     data=data_reader,
26 |     classifiers=[
27 |         "AdaBoost",
28 |         "Bagging",
29 |         "MultiLayerPerceptron",
30 |         "RandomForest",
31 |         "ExtremelyRandomizedTrees",
32 |         "LinearSVC",
33 |     ],
34 |     feature_selection_algorithms=[
35 |         "SelectKBest",
36 |         "SelectPercentile",
37 |         "ParticleSwarmOptimization",
38 |         "VarianceThreshold",
39 |     ],
40 |     feature_transform_algorithms=["Normalizer", "StandardScaler"],
41 |     categorical_features_encoder="OneHotEncoder",
42 |     log=True,
43 |     log_verbose=True,
44 |     log_output_file="output.log",
45 | )
46 | 
47 | # runs the optimization process
48 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest
49 | # returns the best found pipeline
50 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm
51 | pipeline = pipeline_optimizer.run(
52 |     "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm"
53 | )
54 | 
55 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
56 | 


--------------------------------------------------------------------------------
/examples/run_pipeline_optimizer_csv_data_missing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml import PipelineOptimizer
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader.
 7 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
 8 | We use a dataset with 1 categorical feature and missing values to demonstrate a use of PipelineOptimizer instance with automatic feature encoding and imputation.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__))
14 |     + "/example_files/dataset_categorical_missing.csv",
15 |     has_header=False,
16 |     contains_classes=True,
17 | )
18 | 
19 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
20 | # OneHotEncoder is used for encoding categorical features in this example
21 | # SimpleImputer is used for imputing missing values in this example
22 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
23 | # if log_output_file is not provided there is no file created
24 | # if log is False, logging is turned off
25 | pipeline_optimizer = PipelineOptimizer(
26 |     data=data_reader,
27 |     classifiers=[
28 |         "AdaBoost",
29 |         "Bagging",
30 |         "MultiLayerPerceptron",
31 |         "RandomForest",
32 |         "ExtremelyRandomizedTrees",
33 |         "LinearSVC",
34 |     ],
35 |     feature_selection_algorithms=[
36 |         "SelectKBest",
37 |         "SelectPercentile",
38 |         "ParticleSwarmOptimization",
39 |         "VarianceThreshold",
40 |     ],
41 |     feature_transform_algorithms=["Normalizer", "StandardScaler"],
42 |     categorical_features_encoder="OneHotEncoder",
43 |     imputer="SimpleImputer",
44 |     log=True,
45 |     log_verbose=True,
46 |     log_output_file="output.log",
47 | )
48 | 
49 | # runs the optimization process
50 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest
51 | # returns the best found pipeline
52 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm
53 | pipeline = pipeline_optimizer.run(
54 |     "Accuracy", 10, 10, 30, 30, "ParticleSwarmAlgorithm", "ParticleSwarmAlgorithm"
55 | )
56 | 
57 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
58 | 


--------------------------------------------------------------------------------
/examples/run_pipeline_optimizer_csv_data_v1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from niaaml import PipelineOptimizer
 3 | from niaaml.data import CSVDataReader
 4 | 
 5 | """
 6 | This example presents how to use the PipelineOptimizer class to run the original optimization process according to the paper where NiaAML is proposed.
 7 | This example is using an instance of CSVDataReader.
 8 | The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
 9 | """
10 | 
11 | # prepare data reader using csv file
12 | data_reader = CSVDataReader(
13 |     src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
14 |     has_header=False,
15 |     contains_classes=True,
16 | )
17 | 
18 | # instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
19 | # log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
20 | # if log_output_file is not provided there is no file created
21 | # if log is False, logging is turned off
22 | pipeline_optimizer = PipelineOptimizer(
23 |     data=data_reader,
24 |     classifiers=[
25 |         "AdaBoost",
26 |         "Bagging",
27 |         "MultiLayerPerceptron",
28 |         "RandomForest",
29 |         "ExtremelyRandomizedTrees",
30 |         "LinearSVC",
31 |     ],
32 |     feature_selection_algorithms=[
33 |         "SelectKBest",
34 |         "SelectPercentile",
35 |         "ParticleSwarmOptimization",
36 |         "VarianceThreshold",
37 |     ],
38 |     feature_transform_algorithms=["Normalizer", "StandardScaler"],
39 |     log=True,
40 |     log_verbose=True,
41 |     log_output_file="output.log",
42 | )
43 | 
44 | # runs the optimization process
45 | # one of the possible pipelines in this case is: SelectPercentile -> Normalizer -> RandomForest
46 | # returns the best found pipeline
47 | # the chosen fitness function and optimization algorithm are Accuracy and Particle Swarm Algorithm
48 | pipeline = pipeline_optimizer.run_v1("Accuracy", 10, 30, "ParticleSwarmAlgorithm")
49 | 
50 | # pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
51 | 


--------------------------------------------------------------------------------
/niaaml/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml import classifiers
 2 | from niaaml import data
 3 | from niaaml import preprocessing
 4 | from niaaml import fitness
 5 | from niaaml.utilities import MinMax
 6 | from niaaml.utilities import ParameterDefinition
 7 | from niaaml.utilities import Factory
 8 | from niaaml.utilities import OptimizationStats
 9 | from niaaml.utilities import get_bin_index
10 | from niaaml.pipeline_optimizer import PipelineOptimizer
11 | from niaaml.pipeline import Pipeline
12 | from niaaml.pipeline_component import PipelineComponent
13 | from niaaml.logger import Logger
14 | from niaaml import cli
15 | 
16 | __all__ = [
17 |     "classifiers",
18 |     "data",
19 |     "preprocessing",
20 |     "fitness",
21 |     "get_bin_index",
22 |     "MinMax",
23 |     "ParameterDefinition",
24 |     "OptimizationStats",
25 |     "Factory",
26 |     "PipelineOptimizer",
27 |     "Pipeline",
28 |     "PipelineComponent",
29 |     "Logger",
30 |     "cli",
31 | ]
32 | 
33 | __project__ = "niaaml"
34 | __version__ = "2.1.2"
35 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.classifiers.random_forest import RandomForest
 3 | from niaaml.classifiers.multi_layer_perceptron import MultiLayerPerceptron
 4 | from niaaml.classifiers.linear_svc import LinearSVC
 5 | from niaaml.classifiers.ada_boost import AdaBoost
 6 | from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees
 7 | from niaaml.classifiers.bagging import Bagging
 8 | from niaaml.classifiers.decision_tree import DecisionTree
 9 | from niaaml.classifiers.regression_decision_tree import DecisionTreeRegression
10 | from niaaml.classifiers.k_neighbors import KNeighbors
11 | from niaaml.classifiers.gaussian_process import GaussianProcess
12 | from niaaml.classifiers.regression_gaussian_process import GaussianProcessRegression
13 | from niaaml.classifiers.gaussian_naive_bayes import GaussianNB
14 | from niaaml.classifiers.quadratic_driscriminant_analysis import (
15 |     QuadraticDiscriminantAnalysis,
16 | )
17 | from niaaml.classifiers.regression_linear_model import LinearRegression
18 | from niaaml.classifiers.regression_ridge import RidgeRegression
19 | from niaaml.classifiers.regression_lasso import LassoRegression
20 | from niaaml.classifiers.utility import ClassifierFactory
21 | 
22 | __all__ = [
23 |     "Classifier",
24 |     "RandomForest",
25 |     "MultiLayerPerceptron",
26 |     "LinearSVC",
27 |     "AdaBoost",
28 |     "Bagging",
29 |     "ExtremelyRandomizedTrees",
30 |     "DecisionTree",
31 |     "DecisionTreeRegression",
32 |     "KNeighbors",
33 |     "GaussianProcess",
34 |     "GaussianProcessRegression",
35 |     "GaussianNB",
36 |     "QuadraticDiscriminantAnalysis",
37 |     "ClassifierFactory",
38 |     "LinearRegression",
39 |     "RidgeRegression",
40 |     "LassoRegression",
41 | ]
42 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/ada_boost.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import MinMax
 3 | from niaaml.utilities import ParameterDefinition
 4 | from sklearn.ensemble import AdaBoostClassifier
 5 | import numpy as np
 6 | 
 7 | import warnings
 8 | from sklearn.exceptions import (
 9 |     ConvergenceWarning,
10 |     DataConversionWarning,
11 |     DataDimensionalityWarning,
12 |     EfficiencyWarning,
13 |     FitFailedWarning,
14 |     UndefinedMetricWarning,
15 | )
16 | 
17 | __all__ = ["AdaBoost"]
18 | 
19 | 
20 | class AdaBoost(Classifier):
21 |     r"""Implementation of AdaBoost classifier.
22 | 
23 |     Date:
24 |         2020
25 | 
26 |     Author:
27 |         Luka Pečnik
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Reference:
33 |         Y. Freund, R. Schapire, “A Decision-Theoretic Generalization of on-Line Learning and an Application to Boosting”, 1995.
34 | 
35 |     Documentation:
36 |         https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html
37 | 
38 |     See Also:
39 |         * :class:`niaaml.classifiers.Classifier`
40 |     """
41 |     Name = "AdaBoost"
42 | 
43 |     def __init__(self, **kwargs):
44 |         r"""Initialize AdaBoost instance."""
45 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
46 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
47 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
48 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
49 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
50 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
51 | 
52 |         self._params = dict(
53 |             n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint),
54 |             algorithm=ParameterDefinition(["SAMME"]),
55 |         )
56 |         self.__ada_boost = AdaBoostClassifier(algorithm='SAMME')
57 | 
58 |     def set_parameters(self, **kwargs):
59 |         r"""Set the parameters/arguments of the algorithm."""
60 |         self.__ada_boost.set_params(**kwargs)
61 | 
62 |     def fit(self, x, y, **kwargs):
63 |         r"""Fit AdaBoost.
64 | 
65 |         Arguments:
66 |             x (pandas.core.frame.DataFrame): n samples to classify.
67 |             y (pandas.core.series.Series): n classes of the samples in the x array.
68 |         """
69 |         self.__ada_boost.fit(x, y)
70 | 
71 |     def predict(self, x, **kwargs):
72 |         r"""Predict class for each sample (row) in x.
73 | 
74 |         Arguments:
75 |             x (pandas.core.frame.DataFrame): n samples to classify.
76 | 
77 |         Returns:
78 |             pandas.core.series.Series: n predicted classes.
79 |         """
80 |         return self.__ada_boost.predict(x)
81 | 
82 |     def to_string(self):
83 |         r"""User friendly representation of the object.
84 | 
85 |         Returns:
86 |             str: User friendly representation of the object.
87 |         """
88 |         return Classifier.to_string(self).format(
89 |             name=self.Name,
90 |             args=self._parameters_to_string(self.__ada_boost.get_params()),
91 |         )
92 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/bagging.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import MinMax
 3 | from niaaml.utilities import ParameterDefinition
 4 | from sklearn.ensemble import BaggingClassifier
 5 | import numpy as np
 6 | 
 7 | import warnings
 8 | from sklearn.exceptions import (
 9 |     ConvergenceWarning,
10 |     DataConversionWarning,
11 |     DataDimensionalityWarning,
12 |     EfficiencyWarning,
13 |     FitFailedWarning,
14 |     UndefinedMetricWarning,
15 | )
16 | 
17 | __all__ = ["Bagging"]
18 | 
19 | 
20 | class Bagging(Classifier):
21 |     r"""Implementation of bagging classifier.
22 | 
23 |     Date:
24 |         2020
25 | 
26 |     Author:
27 |         Luka Pečnik
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Reference:
33 |         L. Breiman, “Bagging predictors”, Machine Learning, 24(2), 123-140, 1996.
34 | 
35 |     Documentation:
36 |         https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html
37 | 
38 |     See Also:
39 |         * :class:`niaaml.classifiers.Classifier`
40 |     """
41 |     Name = "Bagging"
42 | 
43 |     def __init__(self, **kwargs):
44 |         r"""Initialize Bagging instance."""
45 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
46 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
47 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
48 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
49 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
50 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
51 | 
52 |         self._params = dict(
53 |             n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint),
54 |             bootstrap=ParameterDefinition([True, False]),
55 |             bootstrap_features=ParameterDefinition([True, False]),
56 |         )
57 |         self.__bagging_classifier = BaggingClassifier()
58 | 
59 |     def set_parameters(self, **kwargs):
60 |         r"""Set the parameters/arguments of the algorithm."""
61 |         self.__bagging_classifier.set_params(**kwargs)
62 | 
63 |     def fit(self, x, y, **kwargs):
64 |         r"""Fit Bagging.
65 | 
66 |         Arguments:
67 |             x (pandas.core.frame.DataFrame): n samples to classify.
68 |             y (pandas.core.series.Series): n classes of the samples in the x array.
69 | 
70 |         Returns:
71 |             None
72 |         """
73 |         self.__bagging_classifier.fit(x, y)
74 | 
75 |     def predict(self, x, **kwargs):
76 |         r"""Predict class for each sample (row) in x.
77 | 
78 |         Arguments:
79 |             x (pandas.core.frame.DataFrame): n samples to classify.
80 | 
81 |         Returns:
82 |             pandas.core.series.Series: n predicted classes.
83 |         """
84 |         return self.__bagging_classifier.predict(x)
85 | 
86 |     def to_string(self):
87 |         r"""User friendly representation of the object.
88 | 
89 |         Returns:
90 |             str: User friendly representation of the object.
91 |         """
92 |         return Classifier.to_string(self).format(
93 |             name=self.Name,
94 |             args=self._parameters_to_string(self.__bagging_classifier.get_params()),
95 |         )
96 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/classifier.py:
--------------------------------------------------------------------------------
 1 | from niaaml.pipeline_component import PipelineComponent
 2 | 
 3 | __all__ = ["Classifier"]
 4 | 
 5 | 
 6 | class Classifier(PipelineComponent):
 7 |     r"""Class for implementing classifiers.
 8 | 
 9 |     Date:
10 |         2020
11 | 
12 |     Author:
13 |         Luka Pečnik
14 | 
15 |     License:
16 |         MIT
17 | 
18 |     See Also:
19 |         * :class:`niaaml.pipeline_component.PipelineComponent`
20 |     """
21 | 
22 |     def fit(self, x, y, **kwargs):
23 |         r"""Fit implemented classifier.
24 | 
25 |         Arguments:
26 |             x (pandas.core.frame.DataFrame): n samples to classify.
27 |             y (pandas.core.series.Series): n classes of the samples in the x array.
28 |         """
29 |         return
30 | 
31 |     def predict(self, x, **kwargs):
32 |         r"""Predict class for each sample (row) in x.
33 | 
34 |         Arguments:
35 |             x (pandas.core.frame.DataFrame): n samples to classify.
36 | 
37 |         Returns:
38 |             pandas.core.series.Series: n predicted classes.
39 |         """
40 |         return
41 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/decision_tree.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import ParameterDefinition
 3 | from sklearn.tree import DecisionTreeClassifier as DTC
 4 | 
 5 | import warnings
 6 | from sklearn.exceptions import (
 7 |     ConvergenceWarning,
 8 |     DataConversionWarning,
 9 |     DataDimensionalityWarning,
10 |     EfficiencyWarning,
11 |     FitFailedWarning,
12 |     UndefinedMetricWarning,
13 | )
14 | 
15 | __all__ = ["DecisionTree"]
16 | 
17 | 
18 | class DecisionTree(Classifier):
19 |     r"""Implementation of decision tree classifier.
20 | 
21 |     Date:
22 |         2020
23 | 
24 |     Author:
25 |         Luka Pečnik
26 | 
27 |     License:
28 |         MIT
29 | 
30 |     Reference:
31 |         L. Breiman, J. Friedman, R. Olshen, and C. Stone, “Classification and Regression Trees”, Wadsworth, Belmont, CA, 1984.
32 | 
33 |     Documentation:
34 |         https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
35 | 
36 |     See Also:
37 |         * :class:`niaaml.classifiers.Classifier`
38 |     """
39 |     Name = "Decision Tree Classifier"
40 | 
41 |     def __init__(self, **kwargs):
42 |         r"""Initialize DecisionTree instance."""
43 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
45 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
46 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
47 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
48 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
49 | 
50 |         self._params = dict(
51 |             criterion=ParameterDefinition(["gini", "entropy"]),
52 |             splitter=ParameterDefinition(["best", "random"]),
53 |         )
54 |         self.__decision_tree_classifier = DTC()
55 | 
56 |     def set_parameters(self, **kwargs):
57 |         r"""Set the parameters/arguments of the algorithm."""
58 |         self.__decision_tree_classifier.set_params(**kwargs)
59 | 
60 |     def fit(self, x, y, **kwargs):
61 |         r"""Fit DecisionTree.
62 | 
63 |         Arguments:
64 |             x (pandas.core.frame.DataFrame): n samples to classify.
65 |             y (pandas.core.series.Series): n classes of the samples in the x array.
66 | 
67 |         Returns:
68 |             None
69 |         """
70 |         self.__decision_tree_classifier.fit(x, y)
71 | 
72 |     def predict(self, x, **kwargs):
73 |         r"""Predict class for each sample (row) in x.
74 | 
75 |         Arguments:
76 |             x (pandas.core.frame.DataFrame): n samples to classify.
77 | 
78 |         Returns:
79 |             pandas.core.series.Series: n predicted classes.
80 |         """
81 |         return self.__decision_tree_classifier.predict(x)
82 | 
83 |     def to_string(self):
84 |         r"""User friendly representation of the object.
85 | 
86 |         Returns:
87 |             str: User friendly representation of the object.
88 |         """
89 |         return Classifier.to_string(self).format(
90 |             name=self.Name,
91 |             args=self._parameters_to_string(
92 |                 self.__decision_tree_classifier.get_params()
93 |             ),
94 |         )
95 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/gaussian_naive_bayes.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from sklearn.naive_bayes import GaussianNB as GNB
 3 | 
 4 | import warnings
 5 | from sklearn.exceptions import (
 6 |     ConvergenceWarning,
 7 |     DataConversionWarning,
 8 |     DataDimensionalityWarning,
 9 |     EfficiencyWarning,
10 |     FitFailedWarning,
11 |     UndefinedMetricWarning,
12 | )
13 | 
14 | __all__ = ["GaussianNB"]
15 | 
16 | 
17 | class GaussianNB(Classifier):
18 |     r"""Implementation of gaussian Naive Bayes classifier.
19 | 
20 |     Date:
21 |         2020
22 | 
23 |     Author:
24 |         Luka Pečnik
25 | 
26 |     License:
27 |         MIT
28 | 
29 |     Reference:
30 |         Murphy, Kevin P. "Naive bayes classifiers." University of British Columbia 18 (2006): 60.
31 | 
32 |     Documentation:
33 |         https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
34 | 
35 |     See Also:
36 |         * :class:`niaaml.classifiers.Classifier`
37 |     """
38 |     Name = "Gaussian Naive Bayes"
39 | 
40 |     def __init__(self, **kwargs):
41 |         r"""Initialize GaussianNB instance."""
42 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
43 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
45 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
46 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
47 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
48 | 
49 |         self.__gaussian_nb = GNB()
50 |         super(GaussianNB, self).__init__()
51 | 
52 |     def set_parameters(self, **kwargs):
53 |         r"""Set the parameters/arguments of the algorithm."""
54 |         self.__gaussian_nb.set_params(**kwargs)
55 | 
56 |     def fit(self, x, y, **kwargs):
57 |         r"""Fit GaussianNB.
58 | 
59 |         Arguments:
60 |             x (pandas.core.frame.DataFrame): n samples to classify.
61 |             y (pandas.core.series.Series): n classes of the samples in the x array.
62 | 
63 |         Returns:
64 |             None
65 |         """
66 |         self.__gaussian_nb.fit(x, y)
67 | 
68 |     def predict(self, x, **kwargs):
69 |         r"""Predict class for each sample (row) in x.
70 | 
71 |         Arguments:
72 |             x (pandas.core.frame.DataFrame): n samples to classify.
73 | 
74 |         Returns:
75 |             pandas.core.series.Series: n predicted classes.
76 |         """
77 |         return self.__gaussian_nb.predict(x)
78 | 
79 |     def to_string(self):
80 |         r"""User friendly representation of the object.
81 | 
82 |         Returns:
83 |             str: User friendly representation of the object.
84 |         """
85 |         return Classifier.to_string(self).format(
86 |             name=self.Name,
87 |             args=self._parameters_to_string(self.__gaussian_nb.get_params()),
88 |         )
89 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/k_neighbors.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import ParameterDefinition
 3 | from sklearn.neighbors import KNeighborsClassifier as KNC
 4 | 
 5 | import warnings
 6 | from sklearn.exceptions import (
 7 |     ConvergenceWarning,
 8 |     DataConversionWarning,
 9 |     DataDimensionalityWarning,
10 |     EfficiencyWarning,
11 |     FitFailedWarning,
12 |     UndefinedMetricWarning,
13 | )
14 | 
15 | __all__ = ["KNeighbors"]
16 | 
17 | 
18 | class KNeighbors(Classifier):
19 |     r"""Implementation of k neighbors classifier.
20 | 
21 |     Date:
22 |         2020
23 | 
24 |     Author:
25 |         Luka Pečnik
26 | 
27 |     License:
28 |         MIT
29 | 
30 |     Reference:
31 |         “Neighbourhood Components Analysis”, J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520.
32 | 
33 |     Documentation:
34 |         https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
35 | 
36 |     See Also:
37 |         * :class:`niaaml.classifiers.Classifier`
38 |     """
39 |     Name = "K Neighbors Classifier"
40 | 
41 |     def __init__(self, **kwargs):
42 |         r"""Initialize KNeighbors instance."""
43 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
45 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
46 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
47 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
48 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
49 | 
50 |         self._params = dict(
51 |             weights=ParameterDefinition(["uniform", "distance"]),
52 |             algorithm=ParameterDefinition(["auto", "ball_tree", "kd_tree", "brute"]),
53 |         )
54 |         self.__kn_classifier = KNC()
55 | 
56 |     def set_parameters(self, **kwargs):
57 |         r"""Set the parameters/arguments of the algorithm."""
58 |         self.__kn_classifier.set_params(**kwargs)
59 | 
60 |     def fit(self, x, y, **kwargs):
61 |         r"""Fit KNeighbors.
62 | 
63 |         Arguments:
64 |             x (pandas.core.frame.DataFrame): n samples to classify.
65 |             y (pandas.core.series.Series): n classes of the samples in the x array.
66 | 
67 |         Returns:
68 |             None
69 |         """
70 |         self.__kn_classifier.fit(x, y)
71 | 
72 |     def predict(self, x, **kwargs):
73 |         r"""Predict class for each sample (row) in x.
74 | 
75 |         Arguments:
76 |             x (pandas.core.frame.DataFrame): n samples to classify.
77 | 
78 |         Returns:
79 |             pandas.core.series.Series: n predicted classes.
80 |         """
81 |         return self.__kn_classifier.predict(x)
82 | 
83 |     def to_string(self):
84 |         r"""User friendly representation of the object.
85 | 
86 |         Returns:
87 |             str: User friendly representation of the object.
88 |         """
89 |         return Classifier.to_string(self).format(
90 |             name=self.Name,
91 |             args=self._parameters_to_string(self.__kn_classifier.get_params()),
92 |         )
93 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/linear_svc.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import MinMax
 3 | from niaaml.utilities import ParameterDefinition
 4 | from sklearn.svm import LinearSVC as LSVC
 5 | import numpy as np
 6 | 
 7 | import warnings
 8 | from sklearn.exceptions import (
 9 |     ConvergenceWarning,
10 |     DataConversionWarning,
11 |     DataDimensionalityWarning,
12 |     EfficiencyWarning,
13 |     FitFailedWarning,
14 |     UndefinedMetricWarning,
15 | )
16 | 
17 | __all__ = ["LinearSVC"]
18 | 
19 | 
20 | class LinearSVC(Classifier):
21 |     r"""Implementation of linear support vector classification.
22 | 
23 |     Date:
24 |         2020
25 | 
26 |     Author:
27 |         Luka Pečnik
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Reference:
33 |         Fan, Rong-En, et al. "LIBLINEAR: A library for large linear classification." Journal of machine learning research 9.Aug (2008): 1871-1874.
34 | 
35 |     Documentation:
36 |         https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
37 | 
38 |     See Also:
39 |         * :class:`niaaml.classifiers.Classifier`
40 |     """
41 |     Name = "Linear Support Vector Classification"
42 | 
43 |     def __init__(self, **kwargs):
44 |         r"""Initialize LinearSVCClassifier instance."""
45 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
46 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
47 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
48 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
49 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
50 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
51 | 
52 |         self._params = dict(
53 |             penalty=ParameterDefinition(["l1", "l2"]),
54 |             max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint),
55 |         )
56 |         self.__linear_SVC = LSVC(dual=True)
57 | 
58 |     def set_parameters(self, **kwargs):
59 |         r"""Set the parameters/arguments of the algorithm."""
60 |         self.__linear_SVC.set_params(**kwargs)
61 | 
62 |     def fit(self, x, y, **kwargs):
63 |         r"""Fit LinearSVCClassifier.
64 | 
65 |         Arguments:
66 |             x (pandas.core.frame.DataFrame): n samples to classify.
67 |             y (pandas.core.series.Series): n classes of the samples in the x array.
68 | 
69 |         Returns:
70 |             None
71 |         """
72 |         self.__linear_SVC.fit(x, y)
73 | 
74 |     def predict(self, x, **kwargs):
75 |         r"""Predict class for each sample (row) in x.
76 | 
77 |         Arguments:
78 |            x (pandas.core.frame.DataFrame): n samples to classify.
79 | 
80 |         Returns:
81 |             pandas.core.series.Series: n predicted classes.
82 |         """
83 |         return self.__linear_SVC.predict(x)
84 | 
85 |     def to_string(self):
86 |         r"""User friendly representation of the object.
87 | 
88 |         Returns:
89 |             str: User friendly representation of the object.
90 |         """
91 |         return Classifier.to_string(self).format(
92 |             name=self.Name,
93 |             args=self._parameters_to_string(self.__linear_SVC.get_params()),
94 |         )
95 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/quadratic_driscriminant_analysis.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
 3 | 
 4 | import warnings
 5 | from sklearn.exceptions import (
 6 |     ConvergenceWarning,
 7 |     DataConversionWarning,
 8 |     DataDimensionalityWarning,
 9 |     EfficiencyWarning,
10 |     FitFailedWarning,
11 |     UndefinedMetricWarning,
12 | )
13 | 
14 | __all__ = ["QuadraticDiscriminantAnalysis"]
15 | 
16 | 
17 | class QuadraticDiscriminantAnalysis(Classifier):
18 |     r"""Implementation of quadratic discriminant analysis classifier.
19 | 
20 |     Date:
21 |         2020
22 | 
23 |     Author:
24 |         Luka Pečnik
25 | 
26 |     License:
27 |         MIT
28 | 
29 |     Reference:
30 |         “The Elements of Statistical Learning”, Hastie T., Tibshirani R., Friedman J., Section 4.3, p.106-119, 2008.
31 | 
32 |     Documentation:
33 |         https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.html#sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis
34 | 
35 |     See Also:
36 |         * :class:`niaaml.classifiers.Classifier`
37 |     """
38 |     Name = "Quadratic Discriminant Analysis"
39 | 
40 |     def __init__(self, **kwargs):
41 |         r"""Initialize QuadraticDiscriminantAnalysis instance."""
42 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
43 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
45 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
46 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
47 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
48 | 
49 |         self.__qda = QDA()
50 |         super(QuadraticDiscriminantAnalysis, self).__init__()
51 | 
52 |     def set_parameters(self, **kwargs):
53 |         r"""Set the parameters/arguments of the algorithm."""
54 |         self.__qda.set_params(**kwargs)
55 | 
56 |     def fit(self, x, y, **kwargs):
57 |         r"""Fit QuadraticDiscriminantAnalysis.
58 | 
59 |         Arguments:
60 |             x (pandas.core.frame.DataFrame): n samples to classify.
61 |             y (pandas.core.series.Series): n classes of the samples in the x array.
62 | 
63 |         Returns:
64 |             None
65 |         """
66 |         self.__qda.fit(x, y)
67 | 
68 |     def predict(self, x, **kwargs):
69 |         r"""Predict class for each sample (row) in x.
70 | 
71 |         Arguments:
72 |             x (pandas.core.frame.DataFrame): n samples to classify.
73 | 
74 |         Returns:
75 |             pandas.core.series.Series: n predicted classes.
76 |         """
77 |         return self.__qda.predict(x)
78 | 
79 |     def to_string(self):
80 |         r"""User friendly representation of the object.
81 | 
82 |         Returns:
83 |             str: User friendly representation of the object.
84 |         """
85 |         return Classifier.to_string(self).format(
86 |             name=self.Name, args=self._parameters_to_string(self.__qda.get_params())
87 |         )
88 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/random_forest.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import MinMax
 3 | from niaaml.utilities import ParameterDefinition
 4 | from sklearn.ensemble import RandomForestClassifier as RF
 5 | import numpy as np
 6 | 
 7 | import warnings
 8 | from sklearn.exceptions import (
 9 |     ConvergenceWarning,
10 |     DataConversionWarning,
11 |     DataDimensionalityWarning,
12 |     EfficiencyWarning,
13 |     FitFailedWarning,
14 |     UndefinedMetricWarning,
15 | )
16 | 
17 | __all__ = ["RandomForest"]
18 | 
19 | 
20 | class RandomForest(Classifier):
21 |     r"""Implementation of random forest classifier.
22 | 
23 |     Date:
24 |         2020
25 | 
26 |     Author:
27 |         Luka Pečnik
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Reference:
33 |         Breiman, “Random Forests”, Machine Learning, 45(1), 5-32, 2001.
34 | 
35 |     Documentation:
36 |         https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
37 | 
38 |     See Also:
39 |         * :class:`niaaml.classifiers.Classifier`
40 |     """
41 |     Name = "Random Forest Classifier"
42 | 
43 |     def __init__(self, **kwargs):
44 |         r"""Initialize RandomForestClassifier instance."""
45 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
46 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
47 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
48 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
49 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
50 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
51 | 
52 |         self._params = dict(
53 |             n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint)
54 |         )
55 |         self.__random_forest_classifier = RF()
56 | 
57 |     def set_parameters(self, **kwargs):
58 |         r"""Set the parameters/arguments of the algorithm."""
59 |         self.__random_forest_classifier.set_params(**kwargs)
60 | 
61 |     def fit(self, x, y, **kwargs):
62 |         r"""Fit RandomForestClassifier.
63 | 
64 |         Arguments:
65 |             x (pandas.core.frame.DataFrame): n samples to classify.
66 |             y (pandas.core.series.Series): n classes of the samples in the x array.
67 | 
68 |         Returns:
69 |             None
70 |         """
71 |         self.__random_forest_classifier.fit(x, y)
72 | 
73 |     def predict(self, x, **kwargs):
74 |         r"""Predict class for each sample (row) in x.
75 | 
76 |         Arguments:
77 |             x (pandas.core.frame.DataFrame): n samples to classify.
78 | 
79 |         Returns:
80 |             pandas.core.series.Series: n predicted classes.
81 |         """
82 |         return self.__random_forest_classifier.predict(x)
83 | 
84 |     def to_string(self):
85 |         r"""User friendly representation of the object.
86 | 
87 |         Returns:
88 |             str: User friendly representation of the object.
89 |         """
90 |         return Classifier.to_string(self).format(
91 |             name=self.Name,
92 |             args=self._parameters_to_string(
93 |                 self.__random_forest_classifier.get_params()
94 |             ),
95 |         )
96 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/regression_decision_tree.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import ParameterDefinition
 3 | from sklearn.tree import DecisionTreeRegressor as DTR
 4 | 
 5 | import warnings
 6 | from sklearn.exceptions import (
 7 |     ConvergenceWarning,
 8 |     DataConversionWarning,
 9 |     DataDimensionalityWarning,
10 |     EfficiencyWarning,
11 |     FitFailedWarning,
12 |     UndefinedMetricWarning,
13 | )
14 | 
15 | __all__ = ["DecisionTreeRegression"]
16 | 
17 | 
18 | class DecisionTreeRegression(Classifier):
19 |     r"""Implementation of decision tree regression.
20 | 
21 |     Date:
22 |         2024
23 | 
24 |     Author:
25 |         Laurenz Farthofer
26 | 
27 |     License:
28 |         MIT
29 | 
30 |     Documentation:
31 |         https://scikit-learn.org/stable/modules/tree.html#regression
32 | 
33 |     See Also:
34 |         * :class:`niaaml.classifiers.Classifier`
35 |     """
36 |     Name = "Decision Tree Regression"
37 | 
38 |     def __init__(self, **kwargs):
39 |         r"""Initialize DecisionTree instance."""
40 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
41 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
42 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
43 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
44 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
45 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
46 | 
47 |         self._params = dict(
48 |             criterion=ParameterDefinition(["squared_error", "friedman_mse", "absolute_error", "poisson"]),
49 |             splitter=ParameterDefinition(["best", "random"]),
50 |         )
51 |         self.__decision_tree_regression = DTR()
52 | 
53 |     def set_parameters(self, **kwargs):
54 |         r"""Set the parameters/arguments of the algorithm."""
55 |         self.__decision_tree_regression.set_params(**kwargs)
56 | 
57 |     def fit(self, x, y, **kwargs):
58 |         r"""Fit DecisionTree.
59 | 
60 |         Arguments:
61 |             x (pandas.core.frame.DataFrame): n samples to classify.
62 |             y (pandas.core.series.Series): n classes of the samples in the x array.
63 | 
64 |         Returns:
65 |             None
66 |         """
67 |         self.__decision_tree_regression.fit(x, y)
68 | 
69 |     def predict(self, x, **kwargs):
70 |         r"""Predict class for each sample (row) in x.
71 | 
72 |         Arguments:
73 |             x (pandas.core.frame.DataFrame): n samples to classify.
74 | 
75 |         Returns:
76 |             pandas.core.series.Series: n predicted classes.
77 |         """
78 |         return self.__decision_tree_regression.predict(x)
79 | 
80 |     def to_string(self):
81 |         r"""User friendly representation of the object.
82 | 
83 |         Returns:
84 |             str: User friendly representation of the object.
85 |         """
86 |         return Classifier.to_string(self).format(
87 |             name=self.Name,
88 |             args=self._parameters_to_string(
89 |                 self.__decision_tree_regression.get_params()
90 |             ),
91 |         )
92 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/regression_gaussian_process.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from niaaml.utilities import MinMax
 3 | from niaaml.utilities import ParameterDefinition
 4 | from sklearn.gaussian_process import GaussianProcessRegressor as GPR
 5 | import numpy as np
 6 | 
 7 | import warnings
 8 | from sklearn.exceptions import (
 9 |     ConvergenceWarning,
10 |     DataConversionWarning,
11 |     DataDimensionalityWarning,
12 |     EfficiencyWarning,
13 |     FitFailedWarning,
14 |     UndefinedMetricWarning,
15 | )
16 | 
17 | __all__ = ["GaussianProcessRegression"]
18 | 
19 | 
20 | class GaussianProcessRegression(Classifier):
21 |     r"""Implementation of gaussian process regression.
22 | 
23 |     Date:
24 |         2024
25 | 
26 |     Author:
27 |         Laurenz Farthofer
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Documentation:
33 |         https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html#sklearn.gaussian_process.GaussianProcessRegressor
34 | 
35 |     See Also:
36 |         * :class:`niaaml.classifiers.Classifier`
37 |     """
38 |     Name = "Gaussian Process Regression"
39 | 
40 |     def __init__(self, **kwargs):
41 |         r"""Initialize GaussianProcess instance."""
42 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
43 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
45 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
46 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
47 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
48 | 
49 |         self._params = dict()
50 |         self.__gaussian_process = GPR()
51 | 
52 |     def set_parameters(self, **kwargs):
53 |         r"""Set the parameters/arguments of the algorithm."""
54 |         self.__gaussian_process.set_params(**kwargs)
55 | 
56 |     def fit(self, x, y, **kwargs):
57 |         r"""Fit GaussianProcess.
58 | 
59 |         Arguments:
60 |             x (pandas.core.frame.DataFrame): n samples to classify.
61 |             y (pandas.core.series.Series): n classes of the samples in the x array.
62 | 
63 |         Returns:
64 |             None
65 |         """
66 |         self.__gaussian_process.fit(x, y)
67 | 
68 |     def predict(self, x, **kwargs):
69 |         r"""Predict class for each sample (row) in x.
70 | 
71 |         Arguments:
72 |             x (pandas.core.frame.DataFrame): n samples to classify.
73 | 
74 |         Returns:
75 |             pandas.core.series.Series: n predicted classes.
76 |         """
77 |         return self.__gaussian_process.predict(x)
78 | 
79 |     def to_string(self):
80 |         r"""User friendly representation of the object.
81 | 
82 |         Returns:
83 |             str: User friendly representation of the object.
84 |         """
85 |         return Classifier.to_string(self).format(
86 |             name=self.Name,
87 |             args=self._parameters_to_string(self.__gaussian_process.get_params()),
88 |         )
89 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/regression_lasso.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from niaaml.classifiers.classifier import Classifier
 3 | from sklearn.linear_model import Lasso as LR
 4 | 
 5 | import warnings
 6 | from sklearn.exceptions import (
 7 |     ConvergenceWarning,
 8 |     DataConversionWarning,
 9 |     DataDimensionalityWarning,
10 |     EfficiencyWarning,
11 |     FitFailedWarning,
12 |     UndefinedMetricWarning,
13 | )
14 | 
15 | from niaaml.utilities import MinMax, ParameterDefinition
16 | 
17 | __all__ = ["LassoRegression"]
18 | 
19 | 
20 | class LassoRegression(Classifier):
21 |     r"""Implementation of linear lasso regression.
22 | 
23 |     Date:
24 |         2024
25 | 
26 |     Author:
27 |         Laurenz Farthofer
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Documentation:
33 |         https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso
34 | 
35 |     See Also:
36 |         * :class:`niaaml.classifiers.Classifier`
37 |     """
38 |     Name = "Lasso Regression"
39 |     Task = "Regression"
40 | 
41 |     def __init__(self, **kwargs):
42 |         r"""Initialize LinearRegression instance."""
43 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
45 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
46 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
47 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
48 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
49 | 
50 |         self.model = LR()
51 | 
52 |         self._params = dict(
53 |             alpha=ParameterDefinition(MinMax(min=0.0, max=10e6), np.float64),
54 |             fit_intercept=ParameterDefinition([True, False]),
55 |             max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint),
56 |         )
57 | 
58 |     def set_parameters(self, **kwargs):
59 |         r"""Set the parameters/arguments of the algorithm."""
60 |         self.model.set_params(**kwargs)
61 | 
62 |     def fit(self, x, y, **kwargs):
63 |         r"""Fit LinearSVCClassifier.
64 | 
65 |         Arguments:
66 |             x (pandas.core.frame.DataFrame): n samples to classify.
67 |             y (pandas.core.series.Series): n classes of the samples in the x array.
68 | 
69 |         Returns:
70 |             None
71 |         """
72 |         self.model.fit(x, y)
73 | 
74 |     def predict(self, x, **kwargs):
75 |         r"""Predict class for each sample (row) in x.
76 | 
77 |         Arguments:
78 |             x (pandas.core.frame.DataFrame): n samples to classify.
79 | 
80 |         Returns:
81 |             pandas.core.series.Series: n predicted classes.
82 |         """
83 |         return self.model.predict(x)
84 | 
85 |     def to_string(self):
86 |         r"""User friendly representation of the object.
87 | 
88 |         Returns:
89 |             str: User friendly representation of the object.
90 |         """
91 |         return Classifier.to_string(self).format(
92 |             name=self.Name,
93 |             args=self._parameters_to_string(self.__gaussian_process.get_params()),
94 |         )
95 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/regression_linear_model.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.classifier import Classifier
 2 | from sklearn.linear_model import LinearRegression as LR
 3 | 
 4 | import warnings
 5 | from sklearn.exceptions import (
 6 |     ConvergenceWarning,
 7 |     DataConversionWarning,
 8 |     DataDimensionalityWarning,
 9 |     EfficiencyWarning,
10 |     FitFailedWarning,
11 |     UndefinedMetricWarning,
12 | )
13 | 
14 | from niaaml.utilities import ParameterDefinition
15 | 
16 | __all__ = ["LinearRegression"]
17 | 
18 | 
19 | class LinearRegression(Classifier):
20 |     r"""Implementation of linear regression.
21 | 
22 |     Date:
23 |         2024
24 | 
25 |     Author:
26 |         Laurenz Farthofer
27 | 
28 |     License:
29 |         MIT
30 | 
31 |     Documentation:
32 |         https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression
33 | 
34 |     See Also:
35 |         * :class:`niaaml.classifiers.Classifier`
36 |     """
37 |     Name = "Linear Regression"
38 |     Task = "Regression"
39 | 
40 |     def __init__(self, **kwargs):
41 |         r"""Initialize LinearRegression instance."""
42 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
43 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
45 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
46 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
47 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
48 | 
49 |         self.model = LR()
50 | 
51 |         self._params = dict(
52 |             fit_intercept=ParameterDefinition([True, False]),
53 |         )
54 | 
55 |     def set_parameters(self, **kwargs):
56 |         r"""Set the parameters/arguments of the algorithm."""
57 |         self.model.set_params(**kwargs)
58 | 
59 |     def fit(self, x, y, **kwargs):
60 |         r"""Fit LinearSVCClassifier.
61 | 
62 |         Arguments:
63 |             x (pandas.core.frame.DataFrame): n samples to classify.
64 |             y (pandas.core.series.Series): n classes of the samples in the x array.
65 | 
66 |         Returns:
67 |             None
68 |         """
69 |         self.model.fit(x, y)
70 | 
71 |     def predict(self, x, **kwargs):
72 |         r"""Predict class for each sample (row) in x.
73 | 
74 |         Arguments:
75 |             x (pandas.core.frame.DataFrame): n samples to classify.
76 | 
77 |         Returns:
78 |             pandas.core.series.Series: n predicted classes.
79 |         """
80 |         return self.model.predict(x)
81 | 
82 |     def to_string(self):
83 |         r"""User friendly representation of the object.
84 | 
85 |         Returns:
86 |             str: User friendly representation of the object.
87 |         """
88 |         return Classifier.to_string(self).format(
89 |             name=self.Name,
90 |             args=self._parameters_to_string(self.__gaussian_process.get_params()),
91 |         )
92 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/regression_ridge.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from niaaml.classifiers.classifier import Classifier
 3 | from sklearn.linear_model import Ridge as LR
 4 | 
 5 | import warnings
 6 | from sklearn.exceptions import (
 7 |     ConvergenceWarning,
 8 |     DataConversionWarning,
 9 |     DataDimensionalityWarning,
10 |     EfficiencyWarning,
11 |     FitFailedWarning,
12 |     UndefinedMetricWarning,
13 | )
14 | 
15 | from niaaml.utilities import MinMax, ParameterDefinition
16 | 
17 | __all__ = ["RidgeRegression"]
18 | 
19 | 
20 | class RidgeRegression(Classifier):
21 |     r"""Implementation of linear ridge regression.
22 | 
23 |     Date:
24 |         2024
25 | 
26 |     Author:
27 |         Laurenz Farthofer
28 | 
29 |     License:
30 |         MIT
31 | 
32 |     Documentation:
33 |         https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
34 | 
35 |     See Also:
36 |         * :class:`niaaml.classifiers.Classifier`
37 |     """
38 |     Name = "Ridge Regression"
39 |     Task = "Regression"
40 | 
41 |     def __init__(self, **kwargs):
42 |         r"""Initialize LinearRegression instance."""
43 |         warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
44 |         warnings.filterwarnings(action="ignore", category=DataConversionWarning)
45 |         warnings.filterwarnings(action="ignore", category=DataDimensionalityWarning)
46 |         warnings.filterwarnings(action="ignore", category=EfficiencyWarning)
47 |         warnings.filterwarnings(action="ignore", category=FitFailedWarning)
48 |         warnings.filterwarnings(action="ignore", category=UndefinedMetricWarning)
49 | 
50 |         self.model = LR()
51 | 
52 |         self._params = dict(
53 |             alpha=ParameterDefinition(MinMax(min=0.0, max=100000.0), np.float64),
54 |             fit_intercept=ParameterDefinition([True, False]),
55 |             max_iter=ParameterDefinition(MinMax(min=300, max=2000), np.uint),
56 |         )
57 | 
58 |     def set_parameters(self, **kwargs):
59 |         r"""Set the parameters/arguments of the algorithm."""
60 |         self.model.set_params(**kwargs)
61 | 
62 |     def fit(self, x, y, **kwargs):
63 |         r"""Fit LinearSVCClassifier.
64 | 
65 |         Arguments:
66 |             x (pandas.core.frame.DataFrame): n samples to classify.
67 |             y (pandas.core.series.Series): n classes of the samples in the x array.
68 | 
69 |         Returns:
70 |             None
71 |         """
72 |         self.model.fit(x, y)
73 | 
74 |     def predict(self, x, **kwargs):
75 |         r"""Predict class for each sample (row) in x.
76 | 
77 |         Arguments:
78 |             x (pandas.core.frame.DataFrame): n samples to classify.
79 | 
80 |         Returns:
81 |             pandas.core.series.Series: n predicted classes.
82 |         """
83 |         return self.model.predict(x)
84 | 
85 |     def to_string(self):
86 |         r"""User friendly representation of the object.
87 | 
88 |         Returns:
89 |             str: User friendly representation of the object.
90 |         """
91 |         return Classifier.to_string(self).format(
92 |             name=self.Name,
93 |             args=self._parameters_to_string(self.__gaussian_process.get_params()),
94 |         )
95 | 


--------------------------------------------------------------------------------
/niaaml/classifiers/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.classifiers.regression_decision_tree import DecisionTreeRegression
 2 | from niaaml.classifiers.regression_gaussian_process import GaussianProcessRegression
 3 | from niaaml.utilities import Factory
 4 | from niaaml.classifiers.ada_boost import AdaBoost
 5 | from niaaml.classifiers.bagging import Bagging
 6 | from niaaml.classifiers.extremely_randomized_trees import ExtremelyRandomizedTrees
 7 | from niaaml.classifiers.linear_svc import LinearSVC
 8 | from niaaml.classifiers.multi_layer_perceptron import MultiLayerPerceptron
 9 | from niaaml.classifiers.random_forest import RandomForest
10 | from niaaml.classifiers.decision_tree import DecisionTree
11 | from niaaml.classifiers.k_neighbors import KNeighbors
12 | from niaaml.classifiers.gaussian_process import GaussianProcess
13 | from niaaml.classifiers.gaussian_naive_bayes import GaussianNB
14 | from niaaml.classifiers.quadratic_driscriminant_analysis import (
15 |     QuadraticDiscriminantAnalysis,
16 | )
17 | from niaaml.classifiers.regression_linear_model import LinearRegression
18 | from niaaml.classifiers.regression_ridge import RidgeRegression
19 | from niaaml.classifiers.regression_lasso import LassoRegression
20 | 
21 | __all__ = ["ClassifierFactory"]
22 | 
23 | 
24 | class ClassifierFactory(Factory):
25 |     r"""Class with string mappings to classifiers.
26 | 
27 |     Date:
28 |         2020
29 | 
30 |     Author:
31 |         Luka Pečnik
32 | 
33 |     License:
34 |         MIT
35 | 
36 |     Attributes:
37 |         _entities (Dict[str, Classifier]): Mapping from strings to classifiers.
38 | 
39 |     See Also:
40 |         * :class:`niaaml.utilities.Factory`
41 |     """
42 | 
43 |     def _set_parameters(self, **kwargs):
44 |         r"""Set the parameters/arguments of the factory."""
45 |         self._entities = {
46 |             "AdaBoost": AdaBoost,
47 |             "Bagging": Bagging,
48 |             "ExtremelyRandomizedTrees": ExtremelyRandomizedTrees,
49 |             "LinearSVC": LinearSVC,
50 |             "MultiLayerPerceptron": MultiLayerPerceptron,
51 |             "RandomForest": RandomForest,
52 |             "DecisionTree": DecisionTree,
53 |             "DecisionTreeRegression": DecisionTreeRegression,
54 |             "KNeighbors": KNeighbors,
55 |             "GaussianProcess": GaussianProcess,
56 |             "GaussianProcessRegression": GaussianProcessRegression,
57 |             "GaussianNB": GaussianNB,
58 |             "QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis,
59 |             "LinearRegression": LinearRegression,
60 |             "RidgeRegression": RidgeRegression,
61 |             "LassoRegression": LassoRegression,
62 |         }
63 | 


--------------------------------------------------------------------------------
/niaaml/data/__init__.py:
--------------------------------------------------------------------------------
1 | from niaaml.data.data_reader import DataReader
2 | from niaaml.data.basic_data_reader import BasicDataReader
3 | from niaaml.data.csv_data_reader import CSVDataReader
4 | 
5 | __all__ = ["DataReader", "CSVDataReader", "BasicDataReader"]
6 | 


--------------------------------------------------------------------------------
/niaaml/data/basic_data_reader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from niaaml.data.data_reader import DataReader
 3 | 
 4 | __all__ = ["BasicDataReader"]
 5 | 
 6 | 
 7 | class BasicDataReader(DataReader):
 8 |     r"""Implementation of basic data reader.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     See Also:
20 |         * :class:`niaaml.data.DataReader`
21 |     """
22 | 
23 |     def _set_parameters(self, x, y=None, **kwargs):
24 |         r"""Set the parameters of the algorithm.
25 | 
26 |         Arguments:
27 |             x (Iterable[float]): Array of rows from dataset without expected classification results.
28 |             y (Optional[Iterable[any]]): Array of expected classification results.
29 |         """
30 |         self._x = pd.DataFrame(x)
31 | 
32 |         if y is not None:
33 |             self._y = pd.Series(y)
34 | 


--------------------------------------------------------------------------------
/niaaml/data/csv_data_reader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from niaaml.data.data_reader import DataReader
 3 | 
 4 | __all__ = ["CSVDataReader"]
 5 | 
 6 | 
 7 | class CSVDataReader(DataReader):
 8 |     r"""Implementation of CSV data reader.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Attributes:
20 |         __src (string): Path to a CSV file.
21 |         __contains_classes (bool): Tells if src contains expected classification results or only features.
22 |         __has_header (bool): Tells if src contains header row.
23 | 
24 |     See Also:
25 |         * :class:`niaaml.data.DataReader`
26 |     """
27 | 
28 |     def _set_parameters(self, src, contains_classes=True, has_header=False, ignore_columns=[], **kwargs):
29 |         r"""Set the parameters of the algorithm.
30 | 
31 |         Arguments:
32 |             src (string): Path to a CSV dataset file.
33 |             contains_classes (Optional[bool]): Tells if src contains expected classification results or only features.
34 |             has_header (Optional[bool]): Tells if src contains header row.
35 |             ignore_columns (Optional[List[int]]): Column indices to drop.
36 |         """
37 |         self.__src = src
38 |         self.__contains_classes = contains_classes
39 |         self.__has_header = has_header
40 |         self.__ignore_columns = ignore_columns
41 |         self._read_data()
42 | 
43 |     def _read_data(self, **kwargs):
44 |         r"""Read data from expected source."""
45 |         data = pd.read_csv(
46 |             self.__src, header=None if self.__has_header is False else "infer"
47 |         )
48 |         header = data.columns
49 | 
50 |         if self.__contains_classes:
51 |             self._y = data.pop(header[len(header) - 1])
52 |         
53 |         if len(self.__ignore_columns) > 0:
54 |             data.drop(header[self.__ignore_columns], axis=1, inplace=True)
55 | 
56 |         self._x = data
57 | 


--------------------------------------------------------------------------------
/niaaml/data/data_reader.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["DataReader"]
 2 | 
 3 | 
 4 | class DataReader:
 5 |     r"""Class for implementing data readers with different sources of data.
 6 | 
 7 |     Date:
 8 |         2020
 9 | 
10 |     Author:
11 |         Luka Pečnik
12 | 
13 |     License:
14 |         MIT
15 | 
16 |     Attributes:
17 |         _x (pandas.core.frame.DataFrame): Array of rows from dataset without expected classification results.
18 |         _y (Optional[pandas.core.series.Series]): Array of encoded expected classification results.
19 |     """
20 | 
21 |     def __init__(self, **kwargs):
22 |         r"""Initialize data reader."""
23 |         self._x = None
24 |         self._y = None
25 |         self._set_parameters(**kwargs)
26 | 
27 |     def _set_parameters(self, **kwargs):
28 |         r"""Set the parameters/arguments of the algorithm."""
29 |         return
30 | 
31 |     def get_x(self):
32 |         r"""Get value of _x.
33 | 
34 |         Returns:
35 |             pandas.core.frame.DataFrame: Array of rows from dataset without expected classification results.
36 |         """
37 |         return self._x
38 | 
39 |     def get_y(self):
40 |         r"""Get value of _y.
41 | 
42 |         Returns:
43 |             pandas.core.series.Series: Array of encoded expected classification results.
44 |         """
45 |         return self._y
46 | 
47 |     def set_x(self, value):
48 |         r"""Set the value of _x."""
49 |         self._x = value
50 | 
51 |     def set_y(self, value):
52 |         r"""Set the value of _y."""
53 |         self._y = value
54 | 
55 |     def _read_data(self):
56 |         r"""Read data from expected source."""
57 |         return
58 | 


--------------------------------------------------------------------------------
/niaaml/fitness/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.fitness.fitness_function import FitnessFunction
 2 | from niaaml.fitness.accuracy import Accuracy
 3 | from niaaml.fitness.cohen_kappa import CohenKappa
 4 | from niaaml.fitness.f1 import F1
 5 | from niaaml.fitness.r2 import R2
 6 | from niaaml.fitness.mse import MSE
 7 | from niaaml.fitness.precision import Precision
 8 | from niaaml.fitness.utility import FitnessFactory
 9 | 
10 | __all__ = [
11 |     "FitnessFunction",
12 |     "Accuracy",
13 |     "CohenKappa",
14 |     "F1",
15 |     "R2",
16 |     "MSE",
17 |     "Precision",
18 |     "FitnessFactory",
19 | ]
20 | 


--------------------------------------------------------------------------------
/niaaml/fitness/accuracy.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import accuracy_score
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["Accuracy"]
 5 | 
 6 | 
 7 | class Accuracy(FitnessFunction):
 8 |     r"""Class representing the accuracy as a fitness function.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "Accuracy"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return accuracy_score(expected, predicted)
38 | 


--------------------------------------------------------------------------------
/niaaml/fitness/cohen_kappa.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import cohen_kappa_score
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["CohenKappa"]
 5 | 
 6 | 
 7 | class CohenKappa(FitnessFunction):
 8 |     r"""Class representing the cohen's kappa as a fitness function.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.cohen_kappa_score.html
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "Cohen's Kappa"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return cohen_kappa_score(expected, predicted)
38 | 


--------------------------------------------------------------------------------
/niaaml/fitness/f1.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import f1_score
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["F1"]
 5 | 
 6 | 
 7 | class F1(FitnessFunction):
 8 |     r"""Class representing the F1-score as a fitness function.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "F-score"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return f1_score(expected, predicted, average="weighted")
38 | 


--------------------------------------------------------------------------------
/niaaml/fitness/fitness_function.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["FitnessFunction"]
 2 | 
 3 | 
 4 | class FitnessFunction:
 5 |     r"""Class for implementing fitness functions.
 6 | 
 7 |     Date:
 8 |         2020
 9 | 
10 |     Author:
11 |         Luka Pečnik
12 | 
13 |     License:
14 |         MIT
15 | 
16 |     Attributes:
17 |         Name (str): Name of the fitness function.
18 |     """
19 |     Name = None
20 | 
21 |     def __init__(self, **kwargs):
22 |         r"""Initialize fitness function."""
23 |         self.set_parameters(**kwargs)
24 | 
25 |     def set_parameters(self, **kwargs):
26 |         r"""Set the parameters/arguments of the pipeline component."""
27 |         return
28 | 
29 |     def get_fitness(self, predicted, expected):
30 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
31 | 
32 |         Arguments:
33 |             predicted (pandas.core.series.Series): Predicted values.
34 |             expected (pandas.core.series.Series): Expected values.
35 | 
36 |         Returns:
37 |             float: Calculated fitness value.
38 |         """
39 |         return None
40 |     
41 |     def get_bounds(self):
42 |         """Returns the optimization bounds for this fitness function.
43 |         
44 |         The default is for classification metrics.
45 |         
46 |         Retunrs:
47 |             Tuple[float, float]: lower and upper optimization bounds. Defaults to (0.0, 1.0)"""
48 |         return (0.0, 1.0)
49 | 


--------------------------------------------------------------------------------
/niaaml/fitness/mse.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import mean_squared_error
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["MSE"]
 5 | 
 6 | 
 7 | class MSE(FitnessFunction):
 8 |     r"""Class representing the negative mean squared error as a fitness function.
 9 | 
10 |     Date:
11 |         2024
12 | 
13 |     Author:
14 |         Laurenz Farthofer
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "Mean Squared Error"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return  - mean_squared_error(expected, predicted)
38 |     
39 |     def get_bounds(self):
40 |         #! float("-inf") leads to errors in the pipeline logic, so we use a very big number instead
41 |         return (-1000000000.0, 0.0)
42 | 


--------------------------------------------------------------------------------
/niaaml/fitness/precision.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import precision_score
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["Precision"]
 5 | 
 6 | 
 7 | class Precision(FitnessFunction):
 8 |     r"""Class representing the precision as a fitness function.
 9 | 
10 |     Date:
11 |         2020
12 | 
13 |     Author:
14 |         Luka Pečnik
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "Precision"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return precision_score(expected, predicted, average="weighted")
38 | 


--------------------------------------------------------------------------------
/niaaml/fitness/r2.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import r2_score
 2 | from niaaml.fitness.fitness_function import FitnessFunction
 3 | 
 4 | __all__ = ["R2"]
 5 | 
 6 | 
 7 | class R2(FitnessFunction):
 8 |     r"""Class representing the R2-score as a fitness function.
 9 | 
10 |     Date:
11 |         2024
12 | 
13 |     Author:
14 |         Laurenz Farthofer
15 | 
16 |     License:
17 |         MIT
18 | 
19 |     Documentation:
20 |         https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html#sklearn.metrics.r2_score
21 | 
22 |     See Also:
23 |         * :class:`niaaml.fitness.FitnessFunction`
24 |     """
25 |     Name = "R2-score"
26 | 
27 |     def get_fitness(self, predicted, expected):
28 |         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
29 | 
30 |         Arguments:
31 |             predicted (pandas.core.series.Series): Predicted values.
32 |             expected (pandas.core.series.Series): Expected values.
33 | 
34 |         Returns:
35 |             float: Calculated fitness value.
36 |         """
37 |         return r2_score(expected, predicted)
38 |     
39 |     def get_bounds(self):
40 |         #! float("-inf") leads to errors in the pipeline logic, so we use a very big number instead
41 |         return (-100000.0, 1.0)
42 | 


--------------------------------------------------------------------------------
/niaaml/fitness/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import Factory
 2 | from niaaml.fitness.accuracy import Accuracy
 3 | from niaaml.fitness.cohen_kappa import CohenKappa
 4 | from niaaml.fitness.precision import Precision
 5 | from niaaml.fitness.f1 import F1
 6 | from niaaml.fitness.r2 import R2
 7 | from niaaml.fitness.mse import MSE
 8 | 
 9 | __all__ = ["FitnessFactory"]
10 | 
11 | 
12 | class FitnessFactory(Factory):
13 |     r"""Class with string mappings to fitness class.
14 | 
15 |     Attributes:
16 |         _entities (Dict[str, Fitness]): Mapping from strings to fitness classes.
17 | 
18 |     See Also:
19 |         * :class:`niaaml.utilities.Factory`
20 |     """
21 | 
22 |     def _set_parameters(self, **kwargs):
23 |         r"""Set the parameters/arguments of the factory."""
24 |         self._entities = {
25 |             "Accuracy": Accuracy,
26 |             "Precision": Precision,
27 |             "CohenKappa": CohenKappa,
28 |             "F1": F1,
29 |             "R2": R2,
30 |             "MSE": MSE,
31 |         }
32 | 


--------------------------------------------------------------------------------
/niaaml/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | 
 5 | class Logger:
 6 |     r"""Class for logging throughout the framework.
 7 | 
 8 |     Date:
 9 |         2020
10 | 
11 |     Author:
12 |         Luka Pečnik
13 | 
14 |     License:
15 |         MIT
16 |     """
17 | 
18 |     def __init__(self, verbose=False, output_file=None, **kwargs):
19 |         r"""Initialize Logger.
20 | 
21 |         Arguments:
22 |             verbose (Optional(bool)): If True, output verbose pipeline info.
23 |             output_file (Optional(str)): If set, logger outputs content to a log file.
24 |         """
25 |         if output_file is not None:
26 |             if (
27 |                 len(os.path.splitext(output_file)[1]) == 0
28 |                 or os.path.splitext(output_file)[1] != ".log"
29 |             ):
30 |                 output_file = output_file + ".log"
31 |         self.__logger = logging.getLogger("niaaml")
32 |         self.__logger.setLevel(logging.INFO)
33 | 
34 |         if output_file is not None:
35 |             fh = logging.FileHandler(output_file)
36 |             self.__logger.addHandler(fh)
37 | 
38 |         self.__verbose = verbose
39 | 
40 |     def __del__(self):
41 |         logging.shutdown()
42 | 
43 |     def log_progress(self, text):
44 |         r"""Log progress message."""
45 |         self.__logger.info(text)
46 | 
47 |     def log_pipeline(self, text):
48 |         r"""Log pipeline info message."""
49 |         if self.__verbose is True:
50 |             self.__logger.info(text)
51 | 
52 |     def log_optimization_error(self, text):
53 |         r"""Log optimization error message."""
54 |         if self.__verbose is True:
55 |             self.__logger.warning(text)
56 | 


--------------------------------------------------------------------------------
/niaaml/pipeline_component.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["PipelineComponent"]
 2 | 
 3 | 
 4 | class PipelineComponent:
 5 |     r"""Class for implementing pipeline components.
 6 | 
 7 |     Date:
 8 |         2020
 9 | 
10 |     Author:
11 |         Luka Pečnik
12 | 
13 |     License:
14 |         MIT
15 | 
16 |     Attributes:
17 |         Name (str): Name of the pipeline component.
18 |         _params (Dict[str, ParameterDefinition]): Dictionary of components's parameters with possible values. Possible parameter values are given as an instance of the ParameterDefinition class.
19 | 
20 |     See Also:
21 |         * :class:`niaaml.utilities.ParameterDefinition`
22 |     """
23 |     Name = None
24 | 
25 |     def __init__(self, **kwargs):
26 |         r"""Initialize pipeline component.
27 | 
28 |         Notes:
29 |             _params variable should not be static as in some cases it is instance specific. See * :class:`niaaml.preprocessing.feature_selection.select_k_best.SelectKBest` for example.
30 |         """
31 |         self._params = dict()
32 |         self.set_parameters(**kwargs)
33 | 
34 |     def set_parameters(self, **kwargs):
35 |         r"""Set the parameters/arguments of the pipeline component."""
36 |         return
37 | 
38 |     def get_params_dict(self):
39 |         r"""Return parameters definition dictionary."""
40 |         return self._params
41 | 
42 |     def to_string(self):
43 |         r"""User friendly representation of the object.
44 | 
45 |         Returns:
46 |             str: User friendly representation of the object.
47 |         """
48 |         return "Name: {name}\nArguments:\n{args}"
49 | 
50 |     def _parameters_to_string(self, dictionary):
51 |         r"""User friendly representation of component's parameters.
52 | 
53 |         Arguments:
54 |             dictionary (dict): Dictionary of parameters.
55 | 
56 |         Returns:
57 |             str: User friendly representation of component's parameters.
58 |         """
59 |         args_string = ""
60 |         for key in dictionary:
61 |             args_string += "\t" + key + " = " + str(dictionary[key]) + "\n"
62 |         if len(args_string) == 0:
63 |             args_string = "None"
64 |         return args_string
65 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm
 2 | from niaaml.preprocessing import feature_selection
 3 | from niaaml.preprocessing import feature_transform
 4 | from niaaml.preprocessing import encoding
 5 | from niaaml.preprocessing import imputation
 6 | 
 7 | __all__ = [
 8 |     "feature_selection",
 9 |     "feature_transform",
10 |     "encoding",
11 |     "imputation",
12 |     "PreprocessingAlgorithm",
13 | ]
14 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/encoding/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.encoding.feature_encoder import FeatureEncoder
 2 | from niaaml.preprocessing.encoding.one_hot_encoder import OneHotEncoder
 3 | from niaaml.preprocessing.encoding.utility import EncoderFactory
 4 | from niaaml.preprocessing.encoding.utility import encode_categorical_features
 5 | 
 6 | __all__ = [
 7 |     "FeatureEncoder",
 8 |     "OneHotEncoder",
 9 |     "EncoderFactory",
10 |     "encode_categorical_features",
11 | ]
12 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/encoding/feature_encoder.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["FeatureEncoder"]
 2 | 
 3 | 
 4 | class FeatureEncoder:
 5 |     r"""Class for implementing feature encoders.
 6 | 
 7 |     Date:
 8 |         2020
 9 | 
10 |     Author:
11 |         Luka Pečnik
12 | 
13 |     License:
14 |         MIT
15 | 
16 |     Attributes:
17 |         Name (str): Name of the feature encoder.
18 |     """
19 |     Name = None
20 | 
21 |     def __init__(self, **kwargs):
22 |         r"""Initialize feature encoder."""
23 |         return None
24 | 
25 |     def fit(self, feature):
26 |         r"""Fit feature encoder.
27 | 
28 |         Arguments:
29 |             feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
30 |         """
31 |         return None
32 | 
33 |     def transform(self, feature):
34 |         r"""Transform feature's values.
35 | 
36 |         Arguments:
37 |             feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
38 | 
39 |         Returns:
40 |             pandas.core.frame.DataFrame: A transformed column.
41 |         """
42 |         return None
43 | 
44 |     def to_string(self):
45 |         r"""User friendly representation of the object.
46 | 
47 |         Returns:
48 |             str: User friendly representation of the object.
49 |         """
50 |         return "{name}"
51 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/encoding/one_hot_encoder.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.encoding.feature_encoder import FeatureEncoder
 2 | from sklearn.preprocessing import OneHotEncoder as OHE
 3 | import pandas as pd
 4 | 
 5 | __all__ = ["OneHotEncoder"]
 6 | 
 7 | 
 8 | class OneHotEncoder(FeatureEncoder):
 9 |     r"""Implementation of one-hot encoder.
10 | 
11 |     Date:
12 |         2020
13 | 
14 |     Author:
15 |         Luka Pečnik
16 | 
17 |     License:
18 |         MIT
19 | 
20 |     Reference:
21 |         Seger, Cedric. "An investigation of categorical variable encoding techniques in machine learning: binary versus one-hot and feature hashing." (2018).
22 | 
23 |     Documentation:
24 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html
25 | 
26 |     See Also:
27 |         * :class:`niaaml.preprocessing.encoding.FeatureEncoder`
28 |     """
29 |     Name = "One-Hot Encoder"
30 | 
31 |     def __init__(self, **kwargs):
32 |         r"""Initialize feature encoder."""
33 |         self.__one_hot_encoder = OHE(handle_unknown="ignore")
34 | 
35 |     def fit(self, feature):
36 |         r"""Fit feature encoder.
37 | 
38 |         Arguments:
39 |             feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
40 |         """
41 |         self.__one_hot_encoder.fit(feature)
42 | 
43 |     def transform(self, feature):
44 |         r"""Transform feature's values.
45 | 
46 |         Arguments:
47 |             feature (pandas.core.frame.DataFrame): A column (categorical) from DataFrame of features.
48 | 
49 |         Returns:
50 |             pandas.core.frame.DataFrame: A transformed column.
51 |         """
52 |         return pd.DataFrame(self.__one_hot_encoder.transform(feature).toarray())
53 | 
54 |     def to_string(self):
55 |         r"""User friendly representation of the object.
56 | 
57 |         Returns:
58 |             str: User friendly representation of the object.
59 |         """
60 |         return FeatureEncoder.to_string(self).format(name=self.Name)
61 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/encoding/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import Factory
 2 | import pandas as pd
 3 | from niaaml.preprocessing.encoding.one_hot_encoder import OneHotEncoder
 4 | 
 5 | __all__ = ["encode_categorical_features", "EncoderFactory"]
 6 | 
 7 | 
 8 | def encode_categorical_features(features, encoder):
 9 |     """Encode categorical features.
10 | 
11 |     Arguments:
12 |         features (pandas.core.frame.DataFrame): DataFrame of features.
13 |         encoder (str): Name of the encoder to use.
14 | 
15 |     Returns:
16 |                 Tuple[pandas.core.frame.DataFrame, Iterable[FeatureEncoder]]:
17 |                         1. Converted dataframe.
18 |                         2. Dictionary of encoders for all categorical features.
19 |     """
20 |     enc = EncoderFactory().get_result(encoder)
21 | 
22 |     encoders = {}
23 |     to_drop = []
24 |     enc_features = pd.DataFrame()
25 |     cols = [
26 |         col
27 |         for col in features.columns
28 |         if not pd.api.types.is_numeric_dtype(features[col])
29 |     ]
30 |     for c in cols:
31 |         enc.fit(features[[c]])
32 |         tr = enc.transform(features[[c]])
33 |         to_drop.append(c)
34 |         enc_features = pd.concat([enc_features, tr], axis=1)
35 |         encoders[c] = enc
36 |     features = features.drop(to_drop, axis=1)
37 |     features = pd.concat([features, enc_features], axis=1)
38 |     return features, encoders if len(encoders) > 0 else None
39 | 
40 | 
41 | class EncoderFactory(Factory):
42 |     r"""Class with string mappings to encoders.
43 | 
44 |     Attributes:
45 |         _entities (Dict[str, FeatureEncoder]): Mapping from strings to encoders.
46 | 
47 |     See Also:
48 |         * :class:`niaaml.utilities.Factory`
49 |     """
50 | 
51 |     def _set_parameters(self, **kwargs):
52 |         r"""Set the parameters/arguments of the factory."""
53 |         self._entities = {"OneHotEncoder": OneHotEncoder}
54 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 2 |     FeatureSelectionAlgorithm,
 3 | )
 4 | from niaaml.preprocessing.feature_selection.variance_threshold import VarianceThreshold
 5 | from niaaml.preprocessing.feature_selection.jDEFSTH import jDEFSTH
 6 | from niaaml.preprocessing.feature_selection.select_percentile import SelectPercentile
 7 | from niaaml.preprocessing.feature_selection.select_k_best import SelectKBest
 8 | from niaaml.preprocessing.feature_selection.particle_swarm_optimization import (
 9 |     ParticleSwarmOptimization,
10 | )
11 | from niaaml.preprocessing.feature_selection.bat_algorithm import BatAlgorithm
12 | from niaaml.preprocessing.feature_selection.differential_evolution import (
13 |     DifferentialEvolution,
14 | )
15 | from niaaml.preprocessing.feature_selection.grey_wolf_optimizer import GreyWolfOptimizer
16 | from niaaml.preprocessing.feature_selection.utility import (
17 |     FeatureSelectionAlgorithmFactory,
18 | )
19 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import (
20 |     _FeatureSelectionThresholdProblem,
21 | )
22 | from niaaml.preprocessing.feature_selection.select_univariate_regression import SelectUnivariateRegression
23 | 
24 | 
25 | __all__ = [
26 |     "FeatureSelectionAlgorithm",
27 |     "VarianceThreshold",
28 |     "jDEFSTH",
29 |     "SelectPercentile",
30 |     "ParticleSwarmOptimization",
31 |     "BatAlgorithm",
32 |     "DifferentialEvolution",
33 |     "GreyWolfOptimizer",
34 |     "SelectKBest",
35 |     "SelectUnivariateRegression",
36 |     "FeatureSelectionAlgorithmFactory",
37 |     "_FeatureSelectionThresholdProblem",
38 | ]
39 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/_feature_selection_threshold_problem.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from niapy.problems import Problem
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.linear_model import LogisticRegression
 5 | 
 6 | __all__ = ["_FeatureSelectionThresholdProblem"]
 7 | 
 8 | 
 9 | class _FeatureSelectionThresholdProblem(Problem):
10 |     r"""NiaPy Problem class implementation.
11 | 
12 |     Attributes:
13 |         __best_fitness (float): Current best fitness of the optimization process.
14 |         __best_solution (numpy.ndarray[float]): Current best solution of the optimization process.
15 |     """
16 | 
17 |     def __init__(self, X, y):
18 |         r"""Initialize feature selection problem.
19 | 
20 |         Arguments:
21 |             X (pandas.core.frame.DataFrame): Features.
22 |             y (pandas.core.series.Series) Expected classifier results.
23 |         """
24 |         self.__best_fitness = np.inf
25 |         self.__best_solution = None
26 |         super().__init__(X.shape[1] + 1, 0.0, 1.0)
27 |         self.train_X, self.test_X, self.train_y, self.test_y = train_test_split(
28 |             X, y, test_size=0.2
29 |         )
30 | 
31 |     def get_best_solution(self):
32 |         r"""Get best solution found.
33 | 
34 |         Returns:
35 |             numpy.ndarray[float]: Best solution found.
36 |         """
37 |         return self.__best_solution
38 | 
39 |     def _evaluate(self, x):
40 |         r"""Override fitness function.
41 | 
42 |         Args:
43 |             x (np.ndarray): Solution vector.
44 | 
45 |         Returns:
46 |             float: Fitness value of `x`.
47 |         """
48 |         self.Threshold = x[-1]  # current threshold
49 | 
50 |         # select features
51 |         selected = x[:-1] >= self.Threshold
52 | 
53 |         # in the case if threshold is too low (no features selected)
54 |         if np.sum(selected) == 0:
55 |             return 1
56 | 
57 |         lr = LogisticRegression(solver="lbfgs", max_iter=10000).fit(
58 |             self.train_X.iloc[:, selected], self.train_y
59 |         )
60 |         accuracy = lr.score(self.test_X.iloc[:, selected], self.test_y)
61 |         fitness = 1.0 - accuracy
62 | 
63 |         if fitness < self.__best_fitness:
64 |             self.__best_fitness = fitness
65 |             self.__best_solution = x
66 |         return fitness
67 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/feature_selection_algorithm.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm
 2 | 
 3 | __all__ = ["FeatureSelectionAlgorithm"]
 4 | 
 5 | 
 6 | class FeatureSelectionAlgorithm(PreprocessingAlgorithm):
 7 |     r"""Class for implementing feature selection algorithms.
 8 | 
 9 |     Date:
10 |         2020
11 | 
12 |     Author:
13 |         Luka Pečnik
14 | 
15 |     License:
16 |         MIT
17 | 
18 |     See Also:
19 |         * :class:`niaaml.preprocessing.preprocessing_algorithm.PreprocessingAlgorithm`
20 |     """
21 | 
22 |     def select_features(self, x, y, **kwargs):
23 |         r"""Perform the feature selection process.
24 | 
25 |         Arguments:
26 |             x (pandas.core.frame.DataFrame): Array of original features.
27 |             y (pandas.core.series.Series) Expected classifier results.
28 | 
29 |         Returns:
30 |             numpy.ndarray[bool]: Mask of selected features.
31 |         """
32 |         return x
33 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/grey_wolf_optimizer.py:
--------------------------------------------------------------------------------
 1 | from niapy.algorithms.basic import GreyWolfOptimizer as GWO
 2 | from niapy.task import Task
 3 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 4 |     FeatureSelectionAlgorithm,
 5 | )
 6 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import (
 7 |     _FeatureSelectionThresholdProblem,
 8 | )
 9 | import numpy
10 | 
11 | __all__ = ["GreyWolfOptimizer"]
12 | 
13 | 
14 | class GreyWolfOptimizer(FeatureSelectionAlgorithm):
15 |     r"""Implementation of feature selection using GWO algorithm.
16 | 
17 |     Date:
18 |         2020
19 | 
20 |     Author:
21 |         Luka Pečnik
22 | 
23 |     Reference:
24 |         The implementation is adapted according to the following article:
25 |         D. Fister, I. Fister, T. Jagrič, I. Fister Jr., J. Brest. A novel self-adaptive differential evolution for feature selection using threshold mechanism . In: Proceedings of the 2018 IEEE Symposium on Computational Intelligence (SSCI 2018), pp. 17-24, 2018.
26 | 
27 |     Reference URL:
28 |         http://iztok-jr-fister.eu/static/publications/236.pdf
29 | 
30 |     License:
31 |         MIT
32 | 
33 |     See Also:
34 |         * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
35 |     """
36 |     Name = "Grey Wolf Optimizer"
37 | 
38 |     def __init__(self, **kwargs):
39 |         r"""Initialize GWO feature selection algorithm."""
40 |         super(GreyWolfOptimizer, self).__init__()
41 |         self.__gwo = GWO(population_size=10)
42 | 
43 |     def __final_output(self, sol):
44 |         r"""Calculate final array of features.
45 | 
46 |         Arguments:
47 |             sol (numpy.ndarray[float]): Individual of population/ possible solution.
48 | 
49 |         Returns:
50 |             numpy.ndarray[bool]: Mask of selected features.
51 |         """
52 |         selected = numpy.ones(sol.shape[0] - 1, dtype=bool)
53 |         threshold = sol[sol.shape[0] - 1]
54 |         for i in range(sol.shape[0] - 1):
55 |             if sol[i] < threshold:
56 |                 selected[i] = False
57 |         return selected
58 | 
59 |     def select_features(self, x, y, **kwargs):
60 |         r"""Perform the feature selection process.
61 | 
62 |         Arguments:
63 |             x (pandas.core.frame.DataFrame): Array of original features.
64 |             y (pandas.core.series.Series) Expected classifier results.
65 | 
66 |         Returns:
67 |             numpy.ndarray[bool]: Mask of selected features.
68 |         """
69 |         problem = _FeatureSelectionThresholdProblem(x, y)
70 |         task = Task(problem=problem, max_evals=1000)
71 |         self.__gwo.run(task)
72 |         return self.__final_output(problem.get_best_solution())
73 | 
74 |     def to_string(self):
75 |         r"""User friendly representation of the object.
76 | 
77 |         Returns:
78 |             str: User friendly representation of the object.
79 |         """
80 |         return FeatureSelectionAlgorithm.to_string(self).format(
81 |             name=self.Name, args=self._parameters_to_string(self.__gwo.get_parameters())
82 |         )
83 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/jDEFSTH.py:
--------------------------------------------------------------------------------
 1 | from niapy.algorithms.modified import SelfAdaptiveDifferentialEvolution
 2 | from niapy.task import Task
 3 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 4 |     FeatureSelectionAlgorithm,
 5 | )
 6 | from niaaml.preprocessing.feature_selection._feature_selection_threshold_problem import (
 7 |     _FeatureSelectionThresholdProblem,
 8 | )
 9 | import numpy
10 | 
11 | __all__ = ["jDEFSTH"]
12 | 
13 | 
14 | class jDEFSTH(FeatureSelectionAlgorithm):
15 |     r"""Implementation of self-adaptive differential evolution for feature selection using threshold mechanism.
16 | 
17 |     Date:
18 |         2020
19 | 
20 |     Author:
21 |         Iztok Fister Jr.
22 | 
23 |     Reference:
24 |         D. Fister, I. Fister, T. Jagrič, I. Fister Jr., J. Brest. A novel self-adaptive differential evolution for feature selection using threshold mechanism . In: Proceedings of the 2018 IEEE Symposium on Computational Intelligence (SSCI 2018), pp. 17-24, 2018.
25 | 
26 |     Reference URL:
27 |         http://iztok-jr-fister.eu/static/publications/236.pdf
28 | 
29 |     License:
30 |         MIT
31 | 
32 |         See Also:
33 |                 * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
34 |     """
35 |     Name = "Self-Adaptive Differential Evolution"
36 | 
37 |     def __init__(self, **kwargs):
38 |         r"""Initialize GWO feature selection algorithm."""
39 |         super(jDEFSTH, self).__init__()
40 |         self.__jdefsth = SelfAdaptiveDifferentialEvolution(
41 |             population_size=10, differential_weight=0.5, f_lower=0.0, f_upper=2.0, tao1=0.9,
42 |             crossover_probability=0.5, tao2=0.45
43 |         )
44 | 
45 |     def __final_output(self, sol):
46 |         r"""Calculate final array of features.
47 | 
48 |         Arguments:
49 |             sol (numpy.ndarray[float]): Individual of population/ possible solution.
50 | 
51 |         Returns:
52 |             numpy.ndarray[bool]: Mask of selected features.
53 |         """
54 |         selected = numpy.ones(sol.shape[0] - 1, dtype=bool)
55 |         threshold = sol[sol.shape[0] - 1]
56 |         for i in range(sol.shape[0] - 1):
57 |             if sol[i] < threshold:
58 |                 selected[i] = False
59 |         return selected
60 | 
61 |     def select_features(self, x, y, **kwargs):
62 |         r"""Perform the feature selection process.
63 | 
64 |         Arguments:
65 |             x (pandas.core.frame.DataFrame): Array of original features.
66 |             y (pandas.core.series.Series) Expected classifier results.
67 | 
68 |         Returns:
69 |             numpy.ndarray[bool]: Mask of selected features.
70 |         """
71 |         problem = _FeatureSelectionThresholdProblem(x, y)
72 |         task = Task(problem=problem, max_evals=1000)
73 |         self.__jdefsth.run(task)
74 |         return self.__final_output(problem.get_best_solution())
75 | 
76 |     def to_string(self):
77 |         r"""User friendly representation of the object.
78 | 
79 |         Returns:
80 |             str: User friendly representation of the object.
81 |         """
82 |         return FeatureSelectionAlgorithm.to_string(self).format(
83 |             name=self.Name,
84 |             args=self._parameters_to_string(self.__jdefsth.get_parameters()),
85 |         )
86 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/select_k_best.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import ParameterDefinition, MinMax
 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 3 |     FeatureSelectionAlgorithm,
 4 | )
 5 | from sklearn.feature_selection import (
 6 |     SelectKBest as SelectKB,
 7 |     chi2,
 8 |     f_classif,
 9 |     mutual_info_classif,
10 | )
11 | import numpy as np
12 | 
13 | __all__ = ["SelectKBest"]
14 | 
15 | 
16 | class SelectKBest(FeatureSelectionAlgorithm):
17 |     r"""Implementation of feature selection using selection of k best features according to used score function.
18 | 
19 |     Date:
20 |         2020
21 | 
22 |     Author:
23 |         Luka Pečnik
24 | 
25 |     License:
26 |         MIT
27 | 
28 |     Documentation:
29 |         https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
30 | 
31 |     See Also:
32 |         * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
33 |     """
34 |     Name = "Select K Best"
35 | 
36 |     def __init__(self, **kwargs):
37 |         r"""Initialize SelectKBest feature selection algorithm.
38 | 
39 |         Notes:
40 |             _params['k'] is initialized to None as it is included in the optimization process later since we cannot determine a proper value range until length of the feature vector becomes known.
41 |         """
42 |         self._params = dict(
43 |             score_func=ParameterDefinition([chi2, f_classif, mutual_info_classif]),
44 |             k=None,
45 |         )
46 |         self.__k = None
47 |         self.__select_k_best = SelectKB()
48 | 
49 |     def set_parameters(self, **kwargs):
50 |         r"""Set the parameters/arguments of the algorithm."""
51 |         self.__select_k_best.set_params(**kwargs)
52 | 
53 |     def select_features(self, x, y, **kwargs):
54 |         r"""Perform the feature selection process.
55 | 
56 |         Arguments:
57 |             x (pandas.core.frame.DataFrame): Array of original features.
58 |             y (pandas.core.series.Series) Expected classifier results.
59 | 
60 |         Returns:
61 |             numpy.ndarray[bool]: Mask of selected features.
62 |         """
63 |         if self.__k is None:
64 |             self.__k = x.shape[1]
65 |             self._params["k"] = ParameterDefinition(MinMax(1, self.__k), int)
66 |             val = int(np.around(np.random.uniform(1, self.__k)))
67 |             self.__select_k_best.set_params(k=val)
68 | 
69 |         self.__select_k_best.fit(x, y)
70 |         return self.__select_k_best.get_support()
71 | 
72 |     def to_string(self):
73 |         r"""User friendly representation of the object.
74 | 
75 |         Returns:
76 |             str: User friendly representation of the object.
77 |         """
78 |         return FeatureSelectionAlgorithm.to_string(self).format(
79 |             name=self.Name,
80 |             args=self._parameters_to_string(self.__select_k_best.get_params()),
81 |         )
82 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/select_percentile.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import ParameterDefinition, MinMax
 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 3 |     FeatureSelectionAlgorithm,
 4 | )
 5 | from sklearn.feature_selection import (
 6 |     SelectPercentile as SelectPerc,
 7 |     chi2,
 8 |     f_classif,
 9 |     mutual_info_classif,
10 | )
11 | import numpy as np
12 | 
13 | __all__ = ["SelectPercentile"]
14 | 
15 | 
16 | class SelectPercentile(FeatureSelectionAlgorithm):
17 |     r"""Implementation of feature selection using percentile selection of best features according to used score function.
18 | 
19 |     Date:
20 |         2020
21 | 
22 |     Author:
23 |         Luka Pečnik
24 | 
25 |     License:
26 |         MIT
27 | 
28 |     Documentation:
29 |         https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html
30 | 
31 |     See Also:
32 |         * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
33 |     """
34 |     Name = "Select Percentile"
35 | 
36 |     def __init__(self, **kwargs):
37 |         r"""Initialize SelectPercentile feature selection algorithm."""
38 |         self._params = dict(
39 |             score_func=ParameterDefinition([chi2, f_classif, mutual_info_classif]),
40 |             percentile=ParameterDefinition(MinMax(10, 100), np.uint),
41 |         )
42 |         self.__select_percentile = SelectPerc()
43 | 
44 |     def set_parameters(self, **kwargs):
45 |         r"""Set the parameters/arguments of the algorithm."""
46 |         self.__select_percentile.set_params(**kwargs)
47 | 
48 |     def select_features(self, x, y, **kwargs):
49 |         r"""Perform the feature selection process.
50 | 
51 |         Arguments:
52 |             x (pandas.core.frame.DataFrame): Array of original features.
53 |             y (pandas.core.series.Series) Expected classifier results.
54 | 
55 |         Returns:
56 |             numpy.ndarray[bool]: Mask of selected features.
57 |         """
58 |         self.__select_percentile.fit(x, y)
59 |         return self.__select_percentile.get_support()
60 | 
61 |     def to_string(self):
62 |         r"""User friendly representation of the object.
63 | 
64 |         Returns:
65 |             str: User friendly representation of the object.
66 |         """
67 |         return FeatureSelectionAlgorithm.to_string(self).format(
68 |             name=self.Name,
69 |             args=self._parameters_to_string(self.__select_percentile.get_params()),
70 |         )
71 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/select_univariate_regression.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import ParameterDefinition
 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 3 |     FeatureSelectionAlgorithm,
 4 | )
 5 | from sklearn.feature_selection import (
 6 |     GenericUnivariateSelect as Select,
 7 |     r_regression
 8 | )
 9 | 
10 | __all__ = ["SelectUnivariateRegression"]
11 | 
12 | 
13 | class SelectUnivariateRegression(FeatureSelectionAlgorithm):
14 |     r"""Implementation of feature selection using a generic univariate selection strategy from scikit learn.
15 | 
16 |     Date:
17 |         2024
18 | 
19 |     Author:
20 |         Laurenz Farthofer
21 | 
22 |     License:
23 |         MIT
24 | 
25 |     Documentation:
26 |         https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.GenericUnivariateSelect.html#sklearn.feature_selection.GenericUnivariateSelect
27 | 
28 |     See Also:
29 |         * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
30 |     """
31 |     Name = "Select Univariate Regression"
32 | 
33 |     def __init__(self, **kwargs):
34 |         r"""Initialize SelectPercentile feature selection algorithm."""
35 |         self._params = dict(
36 |             score_func=ParameterDefinition([r_regression]),
37 |         )
38 |         self.__select = Select()
39 | 
40 |     def set_parameters(self, **kwargs):
41 |         r"""Set the parameters/arguments of the algorithm."""
42 |         self.__select.set_params(**kwargs)
43 | 
44 |     def select_features(self, x, y, **kwargs):
45 |         r"""Perform the feature selection process.
46 | 
47 |         Arguments:
48 |             x (pandas.core.frame.DataFrame): Array of original features.
49 |             y (pandas.core.series.Series) Expected classifier results.
50 | 
51 |         Returns:
52 |             numpy.ndarray[bool]: Mask of selected features.
53 |         """
54 |         self.__select.fit(x, y)
55 |         return self.__select.get_support()
56 | 
57 |     def to_string(self):
58 |         r"""User friendly representation of the object.
59 | 
60 |         Returns:
61 |             str: User friendly representation of the object.
62 |         """
63 |         return FeatureSelectionAlgorithm.to_string(self).format(
64 |             name=self.Name,
65 |             args=self._parameters_to_string(self.__select.get_params()),
66 |         )
67 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_selection.select_univariate_regression import SelectUnivariateRegression
 2 | from niaaml.utilities import Factory
 3 | from niaaml.preprocessing.feature_selection.bat_algorithm import BatAlgorithm
 4 | from niaaml.preprocessing.feature_selection.differential_evolution import (
 5 |     DifferentialEvolution,
 6 | )
 7 | from niaaml.preprocessing.feature_selection.grey_wolf_optimizer import GreyWolfOptimizer
 8 | from niaaml.preprocessing.feature_selection.jDEFSTH import jDEFSTH
 9 | from niaaml.preprocessing.feature_selection.particle_swarm_optimization import (
10 |     ParticleSwarmOptimization,
11 | )
12 | from niaaml.preprocessing.feature_selection.select_k_best import SelectKBest
13 | from niaaml.preprocessing.feature_selection.select_percentile import SelectPercentile
14 | from niaaml.preprocessing.feature_selection.variance_threshold import VarianceThreshold
15 | 
16 | __all__ = ["FeatureSelectionAlgorithmFactory"]
17 | 
18 | 
19 | class FeatureSelectionAlgorithmFactory(Factory):
20 |     r"""Class with string mappings to feature selection algorithms.
21 | 
22 |     Attributes:
23 |         _entities (Dict[str, FeatureSelectionAlgorithm]): Mapping from strings to feature selection algorithms.
24 | 
25 |     See Also:
26 |         * :class:`niaaml.utilities.Factory`
27 |     """
28 | 
29 |     def _set_parameters(self, **kwargs):
30 |         r"""Set the parameters/arguments of the factory."""
31 |         self._entities = {
32 |             "jDEFSTH": jDEFSTH,
33 |             "SelectKBest": SelectKBest,
34 |             "SelectPercentile": SelectPercentile,
35 |             "VarianceThreshold": VarianceThreshold,
36 |             "BatAlgorithm": BatAlgorithm,
37 |             "DifferentialEvolution": DifferentialEvolution,
38 |             "GreyWolfOptimizer": GreyWolfOptimizer,
39 |             "ParticleSwarmOptimization": ParticleSwarmOptimization,
40 |             "SelectUnivariateRegression": SelectUnivariateRegression,
41 |         }
42 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_selection/variance_threshold.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import ParameterDefinition, MinMax
 2 | from niaaml.preprocessing.feature_selection.feature_selection_algorithm import (
 3 |     FeatureSelectionAlgorithm,
 4 | )
 5 | from sklearn.feature_selection import VarianceThreshold as VarThr
 6 | import numpy as np
 7 | 
 8 | __all__ = ["VarianceThreshold"]
 9 | 
10 | 
11 | class VarianceThreshold(FeatureSelectionAlgorithm):
12 |     r"""Implementation of feature selection using variance threshold.
13 | 
14 |     Date:
15 |         2020
16 | 
17 |     Author:
18 |         Luka Pečnik
19 | 
20 |     License:
21 |         MIT
22 | 
23 |     Documentation:
24 |         https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
25 | 
26 |     See Also:
27 |         * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm`
28 |     """
29 |     Name = "Variance Threshold"
30 | 
31 |     def __init__(self, **kwargs):
32 |         r"""Initialize VarianceThreshold feature selection algorithm."""
33 |         self._params = dict(threshold=ParameterDefinition(MinMax(0, 0.1), float))
34 |         self.__variance_threshold = VarThr()
35 | 
36 |     def set_parameters(self, **kwargs):
37 |         r"""Set the parameters/arguments of the algorithm."""
38 |         self.__variance_threshold.set_params(**kwargs)
39 | 
40 |     def select_features(self, x, y, **kwargs):
41 |         r"""Perform the feature selection process.
42 | 
43 |         Arguments:
44 |             x (pandas.core.frame.DataFrame): Array of original features.
45 |             y (pandas.core.series.Series) Expected classifier results.
46 | 
47 |         Returns:
48 |             numpy.ndarray[bool]: Mask of selected features.
49 |         """
50 |         self.__variance_threshold.fit(x)
51 |         return self.__variance_threshold.get_support()
52 | 
53 |     def to_string(self):
54 |         r"""User friendly representation of the object.
55 | 
56 |         Returns:
57 |             str: User friendly representation of the object.
58 |         """
59 |         return FeatureSelectionAlgorithm.to_string(self).format(
60 |             name=self.Name,
61 |             args=self._parameters_to_string(self.__variance_threshold.get_params()),
62 |         )
63 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/__init__.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 2 |     FeatureTransformAlgorithm,
 3 | )
 4 | from niaaml.preprocessing.feature_transform.normalizer import Normalizer
 5 | from niaaml.preprocessing.feature_transform.standard_scaler import StandardScaler
 6 | from niaaml.preprocessing.feature_transform.max_abs_scaler import MaxAbsScaler
 7 | from niaaml.preprocessing.feature_transform.quantile_transformer import (
 8 |     QuantileTransformer,
 9 | )
10 | from niaaml.preprocessing.feature_transform.robust_scaler import RobustScaler
11 | from niaaml.preprocessing.feature_transform.utility import (
12 |     FeatureTransformAlgorithmFactory,
13 | )
14 | 
15 | __all__ = [
16 |     "FeatureTransformAlgorithm",
17 |     "Normalizer",
18 |     "StandardScaler",
19 |     "MaxAbsScaler",
20 |     "RobustScaler",
21 |     "FeatureTransformAlgorithmFactory",
22 |     "QuantileTransformer",
23 | ]
24 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/feature_transform_algorithm.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.preprocessing_algorithm import PreprocessingAlgorithm
 2 | 
 3 | __all__ = ["FeatureTransformAlgorithm"]
 4 | 
 5 | 
 6 | class FeatureTransformAlgorithm(PreprocessingAlgorithm):
 7 |     r"""Class for implementing feature transform algorithms.
 8 | 
 9 |     Date:
10 |         2020
11 | 
12 |     Author:
13 |         Luka Pečnik
14 | 
15 |     License:
16 |         MIT
17 | 
18 |     See Also:
19 |         * :class:`niaaml.preprocessing.preprocessing_algorithm.PreprocessingAlgorithm`
20 |     """
21 | 
22 |     def fit(self, x, **kwargs):
23 |         r"""Fit implemented feature transform algorithm.
24 | 
25 |         Arguments:
26 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
27 |         """
28 |         return
29 | 
30 |     def transform(self, x, **kwargs):
31 |         r"""Transforms the given x data.
32 | 
33 |         Arguments:
34 |             x (pandas.core.frame.DataFrame): Data to transform.
35 | 
36 |         Returns:
37 |             pandas.core.frame.DataFrame: Transformed data.
38 |         """
39 |         return x
40 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/max_abs_scaler.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import MaxAbsScaler as MAS
 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 3 |     FeatureTransformAlgorithm,
 4 | )
 5 | 
 6 | __all__ = ["MaxAbsScaler"]
 7 | 
 8 | 
 9 | class MaxAbsScaler(FeatureTransformAlgorithm):
10 |     r"""Implementation of feature scaling by its maximum absolute value.
11 | 
12 |     Date:
13 |         2020
14 | 
15 |     Author:
16 |         Luka Pečnik
17 | 
18 |     License:
19 |         MIT
20 | 
21 |     Documentation:
22 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html#sklearn.preprocessing.MaxAbsScaler
23 | 
24 |     See Also:
25 |         * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm`
26 |     """
27 |     Name = "Maximum Absolute Scaler"
28 | 
29 |     def __init__(self, **kwargs):
30 |         r"""Initialize MaxAbsScaler."""
31 |         super(MaxAbsScaler, self).__init__()
32 |         self.__max_abs_scaler = MAS()
33 | 
34 |     def fit(self, x, **kwargs):
35 |         r"""Fit implemented transformation algorithm.
36 | 
37 |         Arguments:
38 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
39 |         """
40 |         self.__max_abs_scaler.fit(x)
41 | 
42 |     def transform(self, x, **kwargs):
43 |         r"""Transforms the given x data.
44 | 
45 |         Arguments:
46 |             x (pandas.core.frame.DataFrame): Data to transform.
47 | 
48 |         Returns:
49 |             pandas.core.frame.DataFrame: Transformed data.
50 |         """
51 | 
52 |         return self.__max_abs_scaler.transform(x)
53 | 
54 |     def to_string(self):
55 |         r"""User friendly representation of the object.
56 | 
57 |         Returns:
58 |             str: User friendly representation of the object.
59 |         """
60 |         return FeatureTransformAlgorithm.to_string(self).format(
61 |             name=self.Name,
62 |             args=self._parameters_to_string(self.__max_abs_scaler.get_params()),
63 |         )
64 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/normalizer.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import Normalizer as Nrm
 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 3 |     FeatureTransformAlgorithm,
 4 | )
 5 | from niaaml.utilities import ParameterDefinition
 6 | 
 7 | __all__ = ["Normalizer"]
 8 | 
 9 | 
10 | class Normalizer(FeatureTransformAlgorithm):
11 |     r"""Implementation of feature normalization algorithm.
12 | 
13 |     Date:
14 |         2020
15 | 
16 |     Author:
17 |         Luka Pečnik
18 | 
19 |     License:
20 |         MIT
21 | 
22 |     Documentation:
23 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer
24 | 
25 |     See Also:
26 |         * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm`
27 |     """
28 |     Name = "Normalizer"
29 | 
30 |     def __init__(self, **kwargs):
31 |         r"""Initialize Normalizer."""
32 |         self._params = dict(norm=ParameterDefinition(["l1", "l2", "max"]))
33 |         self.__params = None
34 |         self.__normalizer = Nrm()
35 | 
36 |     def set_parameters(self, **kwargs):
37 |         r"""Set the parameters/arguments of the algorithm."""
38 |         self.__params = kwargs
39 |         self.__params["axis"] = 0
40 | 
41 |     def fit(self, x, **kwargs):
42 |         r"""Fit implemented transformation algorithm.
43 | 
44 |         Arguments:
45 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
46 |         """
47 |         self.__normalizer.fit(x)
48 | 
49 |     def transform(self, x, **kwargs):
50 |         r"""Transforms the given x data.
51 | 
52 |         Arguments:
53 |             x (pandas.core.frame.DataFrame): Data to transform.
54 | 
55 |         Returns:
56 |             pandas.core.frame.DataFrame: Transformed data.
57 |         """
58 |         return self.__normalizer.transform(x)
59 | 
60 |     def to_string(self):
61 |         r"""User friendly representation of the object.
62 | 
63 |         Returns:
64 |             str: User friendly representation of the object.
65 |         """
66 |         return FeatureTransformAlgorithm.to_string(self).format(
67 |             name=self.Name,
68 |             args=self._parameters_to_string(self.__normalizer.get_params()),
69 |         )
70 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/quantile_transformer.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import QuantileTransformer as QT
 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 3 |     FeatureTransformAlgorithm,
 4 | )
 5 | from niaaml.utilities import ParameterDefinition
 6 | 
 7 | __all__ = ["QuantileTransformer"]
 8 | 
 9 | 
10 | class QuantileTransformer(FeatureTransformAlgorithm):
11 |     r"""Implementation of quantile transformer.
12 | 
13 |     Date:
14 |         2020
15 | 
16 |     Author:
17 |         Luka Pečnik
18 | 
19 |     License:
20 |         MIT
21 | 
22 |     Documentation:
23 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html#sklearn.preprocessing.QuantileTransformer
24 | 
25 |     See Also:
26 |         * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm`
27 |     """
28 |     Name = "Quantile Transformer"
29 | 
30 |     def __init__(self, n_quantiles=1000, **kwargs):
31 |         r"""Initialize QuantileTransformer."""
32 |         self._params = dict(
33 |             output_distribution=ParameterDefinition(["uniform", "normal"])
34 |         )
35 |         self.__quantile_transformer = QT(n_quantiles=n_quantiles)
36 | 
37 |     def fit(self, x, **kwargs):
38 |         r"""Fit implemented transformation algorithm.
39 | 
40 |         Arguments:
41 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
42 |         """
43 |         self.__quantile_transformer.fit(x)
44 | 
45 |     def transform(self, x, **kwargs):
46 |         r"""Transforms the given x data.
47 | 
48 |         Arguments:
49 |             x (pandas.core.frame.DataFrame): Data to transform.
50 | 
51 |         Returns:
52 |             pandas.core.frame.DataFrame: Transformed data.
53 |         """
54 | 
55 |         return self.__quantile_transformer.transform(x)
56 | 
57 |     def to_string(self):
58 |         r"""User friendly representation of the object.
59 | 
60 |         Returns:
61 |             str: User friendly representation of the object.
62 |         """
63 |         return FeatureTransformAlgorithm.to_string(self).format(
64 |             name=self.Name,
65 |             args=self._parameters_to_string(self.__quantile_transformer.get_params()),
66 |         )
67 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/robust_scaler.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import RobustScaler as RS
 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 3 |     FeatureTransformAlgorithm,
 4 | )
 5 | from niaaml.utilities import ParameterDefinition
 6 | 
 7 | __all__ = ["RobustScaler"]
 8 | 
 9 | 
10 | class RobustScaler(FeatureTransformAlgorithm):
11 |     r"""Implementation of the robust scaler.
12 | 
13 |     Date:
14 |         2020
15 | 
16 |     Author:
17 |         Luka Pečnik
18 | 
19 |     License:
20 |         MIT
21 | 
22 |     Documentation:
23 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html#sklearn.preprocessing.RobustScaler
24 | 
25 |     See Also:
26 |         * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm`
27 |     """
28 |     Name = "Robust Scaler"
29 | 
30 |     def __init__(self, **kwargs):
31 |         r"""Initialize RobustScaler."""
32 |         self._params = dict(
33 |             with_centering=ParameterDefinition([True, False]),
34 |             with_scaling=ParameterDefinition([True, False]),
35 |         )
36 |         self.__robust_scaler = RS()
37 | 
38 |     def fit(self, x, **kwargs):
39 |         r"""Fit implemented transformation algorithm.
40 | 
41 |         Arguments:
42 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
43 |         """
44 |         self.__robust_scaler.fit(x)
45 | 
46 |     def transform(self, x, **kwargs):
47 |         r"""Transforms the given x data.
48 | 
49 |         Arguments:
50 |             x (pandas.core.frame.DataFrame): Data to transform.
51 | 
52 |         Returns:
53 |             pandas.core.frame.DataFrame: Transformed data.
54 |         """
55 | 
56 |         return self.__robust_scaler.transform(x)
57 | 
58 |     def to_string(self):
59 |         r"""User friendly representation of the object.
60 | 
61 |         Returns:
62 |             str: User friendly representation of the object.
63 |         """
64 |         return FeatureTransformAlgorithm.to_string(self).format(
65 |             name=self.Name,
66 |             args=self._parameters_to_string(self.__robust_scaler.get_params()),
67 |         )
68 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/standard_scaler.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import StandardScaler as StdScaler
 2 | from niaaml.preprocessing.feature_transform.feature_transform_algorithm import (
 3 |     FeatureTransformAlgorithm,
 4 | )
 5 | 
 6 | __all__ = ["StandardScaler"]
 7 | 
 8 | 
 9 | class StandardScaler(FeatureTransformAlgorithm):
10 |     r"""Implementation of feature standard scaling algorithm.
11 | 
12 |     Date:
13 |         2020
14 | 
15 |     Author:
16 |         Luka Pečnik
17 | 
18 |     License:
19 |         MIT
20 | 
21 |     Documentation:
22 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
23 | 
24 |     See Also:
25 |         * :class:`niaaml.preprocessing.feature_transform.FeatureTransformAlgorithm`
26 |     """
27 |     Name = "Standard Scaler"
28 | 
29 |     def __init__(self, **kwargs):
30 |         r"""Initialize StandardScaler."""
31 |         super(StandardScaler, self).__init__()
32 |         self.__std_scaler = StdScaler()
33 | 
34 |     def fit(self, x, **kwargs):
35 |         r"""Fit implemented transformation algorithm.
36 | 
37 |         Arguments:
38 |             x (pandas.core.frame.DataFrame): n samples to fit transformation algorithm.
39 |         """
40 |         self.__std_scaler.fit(x)
41 | 
42 |     def transform(self, x, **kwargs):
43 |         r"""Transforms the given x data.
44 | 
45 |         Arguments:
46 |             x (pandas.core.frame.DataFrame): Data to transform.
47 | 
48 |         Returns:
49 |             pandas.core.frame.DataFrame: Transformed data.
50 |         """
51 | 
52 |         return self.__std_scaler.transform(x)
53 | 
54 |     def to_string(self):
55 |         r"""User friendly representation of the object.
56 | 
57 |         Returns:
58 |             str: User friendly representation of the object.
59 |         """
60 |         return FeatureTransformAlgorithm.to_string(self).format(
61 |             name=self.Name,
62 |             args=self._parameters_to_string(self.__std_scaler.get_params()),
63 |         )
64 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/feature_transform/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.utilities import Factory
 2 | from niaaml.preprocessing.feature_transform.normalizer import Normalizer
 3 | from niaaml.preprocessing.feature_transform.standard_scaler import StandardScaler
 4 | from niaaml.preprocessing.feature_transform.max_abs_scaler import MaxAbsScaler
 5 | from niaaml.preprocessing.feature_transform.quantile_transformer import (
 6 |     QuantileTransformer,
 7 | )
 8 | from niaaml.preprocessing.feature_transform.robust_scaler import RobustScaler
 9 | 
10 | __all__ = ["FeatureTransformAlgorithmFactory"]
11 | 
12 | 
13 | class FeatureTransformAlgorithmFactory(Factory):
14 |     r"""Class with string mappings to feature transform algorithms.
15 | 
16 |     Attributes:
17 |         _entities (Dict[str, FeatureTransformAlgorithm]): Mapping from strings to feature transform algorithms.
18 |     """
19 | 
20 |     def _set_parameters(self, **kwargs):
21 |         r"""Set the parameters/arguments of the factory."""
22 |         self._entities = {
23 |             "Normalizer": Normalizer,
24 |             "StandardScaler": StandardScaler,
25 |             "MaxAbsScaler": MaxAbsScaler,
26 |             "QuantileTransformer": QuantileTransformer,
27 |             "RobustScaler": RobustScaler,
28 |         }
29 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/imputation/__init__.py:
--------------------------------------------------------------------------------
1 | from niaaml.preprocessing.imputation.imputer import Imputer
2 | from niaaml.preprocessing.imputation.simple_imputer import SimpleImputer
3 | from niaaml.preprocessing.imputation.utility import ImputerFactory
4 | from niaaml.preprocessing.imputation.utility import impute_features
5 | 
6 | __all__ = ["Imputer", "SimpleImputer", "ImputerFactory", "impute_features"]
7 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/imputation/imputer.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["Imputer"]
 2 | 
 3 | 
 4 | class Imputer:
 5 |     r"""Class for implementing imputers.
 6 | 
 7 |     Date:
 8 |         2020
 9 | 
10 |     Author:
11 |         Luka Pečnik
12 | 
13 |     License:
14 |         MIT
15 | 
16 |     Attributes:
17 |         Name (str): Name of the imputer.
18 |     """
19 |     Name = None
20 | 
21 |     def __init__(self, **kwargs):
22 |         r"""Initialize imputer."""
23 |         return None
24 | 
25 |     def fit(self, feature):
26 |         r"""Fit imputer.
27 | 
28 |         Arguments:
29 |             feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
30 |         """
31 |         return None
32 | 
33 |     def transform(self, feature):
34 |         r"""Transform feature's values.
35 | 
36 |         Arguments:
37 |             feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
38 | 
39 |         Returns:
40 |             pandas.core.frame.DataFrame: A transformed column.
41 |         """
42 |         return None
43 | 
44 |     def to_string(self):
45 |         r"""User friendly representation of the object.
46 | 
47 |         Returns:
48 |             str: User friendly representation of the object.
49 |         """
50 |         return "{name}"
51 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/imputation/simple_imputer.py:
--------------------------------------------------------------------------------
 1 | from sklearn.impute import SimpleImputer as SI
 2 | from niaaml.preprocessing.imputation.imputer import Imputer
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | __all__ = ["SimpleImputer"]
 7 | 
 8 | 
 9 | class SimpleImputer(Imputer):
10 |     r"""Implementation of simple imputer.
11 | 
12 |     Date:
13 |         2020
14 | 
15 |     Author:
16 |         Luka Pečnik
17 | 
18 |     License:
19 |         MIT
20 | 
21 |     Documentation:
22 |         https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html
23 | 
24 |     See Also:
25 |         * :class:`niaaml.preprocessing.imputation.Imputer`
26 |     """
27 |     Name = "Simple Imputer"
28 | 
29 |     def __init__(self, **kwargs):
30 |         r"""Initialize imputer."""
31 |         self.__simple_imputer = SI(missing_values=np.nan)
32 | 
33 |     def fit(self, feature):
34 |         r"""Fit imputer.
35 | 
36 |         Arguments:
37 |             feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
38 |         """
39 |         if not pd.api.types.is_numeric_dtype(feature.iloc[:, 0]):
40 |             replacement_val = feature.mode().iloc[0, 0]
41 |             self.__simple_imputer.set_params(
42 |                 **{"fill_value": replacement_val, "strategy": "constant"}
43 |             )
44 |             self.__simple_imputer.fit(feature)
45 |         else:
46 |             self.__simple_imputer.fit(feature)
47 | 
48 |     def transform(self, feature):
49 |         r"""Transform feature's values.
50 | 
51 |         Arguments:
52 |             feature (pandas.core.frame.DataFrame): A column from DataFrame of features.
53 | 
54 |         Returns:
55 |             pandas.core.frame.DataFrame: A transformed column.
56 |         """
57 |         return self.__simple_imputer.transform(feature)
58 | 
59 |     def to_string(self):
60 |         r"""User friendly representation of the object.
61 | 
62 |         Returns:
63 |             str: User friendly representation of the object.
64 |         """
65 |         return Imputer.to_string(self).format(name=self.Name)
66 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/imputation/utility.py:
--------------------------------------------------------------------------------
 1 | from niaaml.preprocessing.imputation.simple_imputer import SimpleImputer
 2 | from niaaml.utilities import Factory
 3 | 
 4 | __all__ = ["ImputerFactory", "impute_features"]
 5 | 
 6 | 
 7 | def impute_features(features, imputer):
 8 |     """Impute features with missing data.
 9 | 
10 |     Arguments:
11 |         features (pandas.core.frame.DataFrame): DataFrame of features.
12 |         imputer (str): Name of the imputer to use.
13 | 
14 |     Returns:
15 |                 Tuple[pandas.core.frame.DataFrame, Dict[Imputer]]:
16 |                         1. Converted dataframe.
17 |                         2. Dictionary of imputers for all features with missing data.
18 |     """
19 |     imp = ImputerFactory().get_result(imputer)
20 | 
21 |     imputers = {}
22 |     cols = [col for col in features.columns if features[col].isnull().any()]
23 |     for c in cols:
24 |         imp.fit(features[[c]])
25 |         features.loc[:, c] = imp.transform(features[[c]])
26 |         imputers[c] = imp
27 | 
28 |     return features, imputers if len(imputers) > 0 else None
29 | 
30 | 
31 | class ImputerFactory(Factory):
32 |     r"""Class with string mappings to imputers.
33 | 
34 |     Attributes:
35 |         _entities (Dict[str, Imputer]): Mapping from strings to imputers.
36 | 
37 |     See Also:
38 |         * :class:`niaaml.utilities.Factory`
39 |     """
40 | 
41 |     def _set_parameters(self, **kwargs):
42 |         r"""Set the parameters/arguments of the factory."""
43 |         self._entities = {"SimpleImputer": SimpleImputer}
44 | 


--------------------------------------------------------------------------------
/niaaml/preprocessing/preprocessing_algorithm.py:
--------------------------------------------------------------------------------
 1 | from niaaml.pipeline_component import PipelineComponent
 2 | 
 3 | __all__ = ["PreprocessingAlgorithm"]
 4 | 
 5 | 
 6 | class PreprocessingAlgorithm(PipelineComponent):
 7 |     r"""Class for implementing preprocessing algorithms.
 8 | 
 9 |     Date:
10 |         2020
11 | 
12 |     Author:
13 |         Luka Pečnik
14 | 
15 |     License:
16 |         MIT
17 | 
18 |     See Also:
19 |         * :class:`niaaml.pipeline_component.PipelineComponent`
20 |     """
21 | 


--------------------------------------------------------------------------------
/paper/niaamlFlow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/paper/niaamlFlow.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "NiaAML"
 3 | version = "2.1.2"
 4 | description = "Python automated machine learning framework"
 5 | license = "MIT"
 6 | authors = ["Luka Pečnik <lukapecnik96@gmail.com>", "Iztok Fister Jr. <iztok@iztok-jr-fister.eu>", "Laurenz Farthofer <laurenz@hey.com>"]
 7 | keywords = ['classification', 'NiaPy', 'scikit-learn', 'nature-inspired algorithms', 'feature selection', 'preprocessing']
 8 | homepage = "https://github.com/firefly-cpp/NiaAML"
 9 | repository = "https://github.com/firefly-cpp/NiaAML"
10 | documentation= "https://niaaml.readthedocs.io/en/latest/"
11 | readme = "README.md"
12 | include = [
13 |     { path="LICENSE", format="sdist" },
14 |     { path="CHANGELOG.md", format="sdist" },
15 |     { path="CITATION.md", format="sdist" },
16 |     { path="COMPONENTS.md", format="sdist" }
17 | ]
18 | 
19 | [tool.poetry.scripts]
20 | niaaml = "niaaml.cli:main"
21 | 
22 | [tool.poetry.dependencies]
23 | python = ">=3.9,<3.14"
24 | numpy = "^1.19.1"
25 | scikit-learn = "^1.6.1"
26 | niapy = "^2.5.2"
27 | pandas = "^2.1.1"
28 | typer = "^0.12.3"
29 | loguru = "^0.7.2"
30 | 
31 | [tool.poetry.dev-dependencies]
32 | sphinx = "^3.3.1"
33 | sphinx-rtd-theme = "^0.5.0"
34 | coveralls = "^2.2.0"
35 | autoflake = "^1.4"
36 | black = "^21.5b1"
37 | pre-commit = "^2.12.1"
38 | pytest = "^7.4.2"
39 | pytest-cov = "^4.1.0"
40 | 
41 | [build-system]
42 | requires = ["poetry-core"]
43 | build-backend = "poetry.core.masonry.api"
44 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/firefly-cpp/NiaAML/11766422d32bc83e12f10706f59674fdb38e783e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_basic_data_reader.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.data import BasicDataReader
 3 | import numpy
 4 | 
 5 | 
 6 | class BasicDataReaderTestCase(TestCase):
 7 |     def setUp(self):
 8 |         self.__x = numpy.random.uniform(low=0.0, high=15.0, size=(100, 6))
 9 |         self.__y = numpy.random.choice(["Class 1", "Class 2"], size=100)
10 | 
11 |     def test_x_y_works_fine(self):
12 |         data_reader = BasicDataReader(x=self.__x, y=self.__y)
13 |         x = data_reader.get_x()
14 |         y = data_reader.get_y()
15 |         self.assertEqual(x.shape, (100, 6))
16 |         self.assertEqual(y.shape, (100,))
17 | 
18 |         self.assertTrue(numpy.all(self.__x == x))
19 |         self.assertTrue(numpy.all(self.__y == y))
20 | 
21 |     def test_no_y_works_fine(self):
22 |         data_reader = BasicDataReader(x=self.__x)
23 |         x = data_reader.get_x()
24 |         y = data_reader.get_y()
25 |         self.assertEqual(x.shape, (100, 6))
26 |         self.assertIsNone(y)
27 | 
28 |         self.assertTrue(numpy.all(self.__x == x))
29 | 


--------------------------------------------------------------------------------
/tests/test_classifier_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.classifiers import ClassifierFactory, Classifier
 3 | 
 4 | 
 5 | class ClassifierFactoryTestCase(TestCase):
 6 |     def setUp(self):
 7 |         self.__factory = ClassifierFactory()
 8 | 
 9 |     def test_get_result_works_fine(self):
10 |         for entry in self.__factory._entities:
11 |             instance = self.__factory.get_result(entry)
12 |             self.assertIsNotNone(instance)
13 |             self.assertIsInstance(instance, Classifier)
14 | 
15 |         with self.assertRaises(TypeError):
16 |             self.__factory.get_result("non_existent_name")
17 | 
18 |     def test_get_dictionary_works_fine(self):
19 |         d = self.__factory.get_name_to_classname_mapping()
20 |         d_keys = d.keys()
21 |         e_keys = self.__factory._entities.keys()
22 | 
23 |         self.assertEqual(len(e_keys), len(d_keys))
24 | 
25 |         for k in d:
26 |             self.assertIsNotNone(d[k])
27 | 


--------------------------------------------------------------------------------
/tests/test_csv_data_reader.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import numpy as np
 4 | from niaaml.data import CSVDataReader
 5 | import os
 6 | 
 7 | 
 8 | class CSVDataReaderTestCase(TestCase):
 9 |     def test_header_classes_works_fine(self):
10 |         data_reader = CSVDataReader(
11 |             src=os.path.dirname(os.path.abspath(__file__))
12 |             + "/tests_files/dataset_header_classes.csv",
13 |             has_header=True,
14 |             contains_classes=True,
15 |         )
16 |         x = data_reader.get_x()
17 |         y = data_reader.get_y()
18 |         self.assertEqual(x.shape, (100, 6))
19 |         self.assertEqual(y.shape, (100,))
20 | 
21 |     def test_no_header_classes_works_fine(self):
22 |         data_reader = CSVDataReader(
23 |             src=os.path.dirname(os.path.abspath(__file__))
24 |             + "/tests_files/dataset_no_header_classes.csv",
25 |             has_header=False,
26 |             contains_classes=True,
27 |         )
28 |         x = data_reader.get_x()
29 |         y = data_reader.get_y()
30 |         self.assertEqual(x.shape, (100, 6))
31 |         self.assertEqual(y.shape, (100,))
32 | 
33 |     def test_no_header_no_classes_works_fine(self):
34 |         data_reader = CSVDataReader(
35 |             src=os.path.dirname(os.path.abspath(__file__))
36 |             + "/tests_files/dataset_no_header_no_classes.csv",
37 |             has_header=False,
38 |             contains_classes=False,
39 |         )
40 |         x = data_reader.get_x()
41 |         y = data_reader.get_y()
42 |         self.assertEqual(x.shape, (100, 6))
43 |         self.assertIsNone(y)
44 | 
45 |     def test_header_no_classes_works_fine(self):
46 |         data_reader = CSVDataReader(
47 |             src=os.path.dirname(os.path.abspath(__file__))
48 |             + "/tests_files/dataset_header_no_classes.csv",
49 |             has_header=True,
50 |             contains_classes=False,
51 |         )
52 |         x = data_reader.get_x()
53 |         y = data_reader.get_y()
54 |         self.assertEqual(x.shape, (100, 6))
55 |         self.assertIsNone(y)
56 |     
57 |     def test_ignore_columns_works_fine(self):
58 |         data_reader = CSVDataReader(
59 |             src=os.path.dirname(os.path.abspath(__file__))
60 |             + "/tests_files/dataset_real_estate_regression.csv",
61 |             has_header=True,
62 |             contains_classes=True,
63 |             ignore_columns=[0]
64 |         )
65 |         x = data_reader.get_x()
66 |         y = data_reader.get_y()
67 |         self.assertEqual(x.shape, (414, 6))
68 |         self.assertEqual(y.shape, (414,))
69 | 


--------------------------------------------------------------------------------
/tests/test_encoder_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.encoding import EncoderFactory, FeatureEncoder
 3 | 
 4 | 
 5 | class FitnessFactoryTestCase(TestCase):
 6 |     def setUp(self):
 7 |         self.__factory = EncoderFactory()
 8 | 
 9 |     def test_get_result_works_fine(self):
10 |         for entry in self.__factory._entities:
11 |             instance = self.__factory.get_result(entry)
12 |             self.assertIsNotNone(instance)
13 |             self.assertIsInstance(instance, FeatureEncoder)
14 | 
15 |         with self.assertRaises(TypeError):
16 |             self.__factory.get_result("non_existent_name")
17 | 
18 |     def test_get_dictionary_works_fine(self):
19 |         d = self.__factory.get_name_to_classname_mapping()
20 |         d_keys = d.keys()
21 |         e_keys = self.__factory._entities.keys()
22 | 
23 |         self.assertEqual(len(e_keys), len(d_keys))
24 | 
25 |         for k in d:
26 |             self.assertIsNotNone(d[k])
27 | 


--------------------------------------------------------------------------------
/tests/test_feature_encoder.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.encoding import OneHotEncoder, encode_categorical_features
 3 | from niaaml.data import BasicDataReader
 4 | import numpy
 5 | 
 6 | 
 7 | class FeatureEncoderTestCase(TestCase):
 8 |     def setUp(self):
 9 |         x = numpy.concatenate(
10 |             (
11 |                 numpy.random.uniform(low=0.0, high=15.0, size=(100, 6)),
12 |                 numpy.array([numpy.random.choice(["a", "b"], size=(100,))]).T,
13 |             ),
14 |             axis=1,
15 |         )
16 |         y = numpy.random.choice(["Class 1", "Class 2"], size=100)
17 |         self.__data_reader = BasicDataReader(x=x, y=y)
18 | 
19 |     def test_one_hot_encoder_works_fine(self):
20 |         encoder = OneHotEncoder()
21 |         features = self.__data_reader.get_x()
22 |         encoder.fit(features[[6]])
23 |         f = encoder.transform(features[[6]])
24 | 
25 |         ind = 0
26 |         for i in features[6]:
27 |             if i == "a":
28 |                 self.assertTrue(f.loc[ind, 0] == 1.0)
29 |                 self.assertTrue(f.loc[ind, 1] == 0.0)
30 |             else:
31 |                 self.assertTrue(f.loc[ind, 0] == 0.0)
32 |                 self.assertTrue(f.loc[ind, 1] == 1.0)
33 |             ind += 1
34 | 
35 |     def test_utility_method_works_fine(self):
36 |         features = self.__data_reader.get_x().astype(
37 |             {
38 |                 0: "float64",
39 |                 1: "float64",
40 |                 2: "float64",
41 |                 3: "float64",
42 |                 4: "float64",
43 |                 5: "float64",
44 |             }
45 |         )
46 |         features, encoders = encode_categorical_features(features, "OneHotEncoder")
47 |         self.assertEqual(len(encoders), 1)
48 |         self.assertEqual(features.shape[1], 8)
49 | 


--------------------------------------------------------------------------------
/tests/test_feature_selection.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | import niaaml.preprocessing.feature_selection as fs
 3 | from niaaml.data import CSVDataReader
 4 | import os
 5 | 
 6 | 
 7 | class FeatureSelectionTestCase(TestCase):
 8 |     def setUp(self):
 9 |         self.__data = CSVDataReader(
10 |             src=os.path.dirname(os.path.abspath(__file__))
11 |             + "/tests_files/dataset_header_classes.csv",
12 |             has_header=True,
13 |             contains_classes=True,
14 |         )
15 | 
16 |     def test_PSO_works_fine(self):
17 |         algo = fs.ParticleSwarmOptimization()
18 |         selected_features_mask = algo.select_features(
19 |             self.__data.get_x(), self.__data.get_y()
20 |         )
21 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
22 | 
23 |     def test_select_k_best_works_fine(self):
24 |         algo = fs.SelectKBest()
25 |         selected_features_mask = algo.select_features(
26 |             self.__data.get_x(), self.__data.get_y()
27 |         )
28 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
29 | 
30 |     def test_select_percentile_works_fine(self):
31 |         algo = fs.SelectPercentile()
32 |         selected_features_mask = algo.select_features(
33 |             self.__data.get_x(), self.__data.get_y()
34 |         )
35 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
36 | 
37 |     def test_bat_algorithm_works_fine(self):
38 |         algo = fs.BatAlgorithm()
39 |         selected_features_mask = algo.select_features(
40 |             self.__data.get_x(), self.__data.get_y()
41 |         )
42 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
43 | 
44 |     def test_de_works_fine(self):
45 |         algo = fs.DifferentialEvolution()
46 |         selected_features_mask = algo.select_features(
47 |             self.__data.get_x(), self.__data.get_y()
48 |         )
49 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
50 | 
51 |     def test_gwo_works_fine(self):
52 |         algo = fs.GreyWolfOptimizer()
53 |         selected_features_mask = algo.select_features(
54 |             self.__data.get_x(), self.__data.get_y()
55 |         )
56 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
57 | 
58 |     def test_jdefsth_works_fine(self):
59 |         algo = fs.jDEFSTH()
60 |         selected_features_mask = algo.select_features(
61 |             self.__data.get_x(), self.__data.get_y()
62 |         )
63 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
64 | 
65 |     def test_vt_works_fine(self):
66 |         algo = fs.VarianceThreshold()
67 |         selected_features_mask = algo.select_features(
68 |             self.__data.get_x(), self.__data.get_y()
69 |         )
70 |         self.assertEqual(self.__data.get_x().shape[1], len(selected_features_mask))
71 | 


--------------------------------------------------------------------------------
/tests/test_feature_selection_algorithm_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.feature_selection import (
 3 |     FeatureSelectionAlgorithmFactory,
 4 |     FeatureSelectionAlgorithm,
 5 | )
 6 | 
 7 | 
 8 | class FeatureSelectionAlgorithmFactoryTestCase(TestCase):
 9 |     def setUp(self):
10 |         self.__factory = FeatureSelectionAlgorithmFactory()
11 | 
12 |     def test_get_result_works_fine(self):
13 |         for entry in self.__factory._entities:
14 |             instance = self.__factory.get_result(entry)
15 |             self.assertIsNotNone(instance)
16 |             self.assertIsInstance(instance, FeatureSelectionAlgorithm)
17 | 
18 |         with self.assertRaises(TypeError):
19 |             self.__factory.get_result("non_existent_name")
20 | 
21 |     def test_get_dictionary_works_fine(self):
22 |         d = self.__factory.get_name_to_classname_mapping()
23 |         d_keys = d.keys()
24 |         e_keys = self.__factory._entities.keys()
25 | 
26 |         self.assertEqual(len(e_keys), len(d_keys))
27 | 
28 |         for k in d:
29 |             self.assertIsNotNone(d[k])
30 | 


--------------------------------------------------------------------------------
/tests/test_feature_transform.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | import niaaml.preprocessing.feature_transform as ft
 3 | from niaaml.data import CSVDataReader
 4 | import os
 5 | 
 6 | 
 7 | class FeatureTransformTestCase(TestCase):
 8 |     def setUp(self):
 9 |         self.__data = CSVDataReader(
10 |             src=os.path.dirname(os.path.abspath(__file__))
11 |             + "/tests_files/dataset_header_classes.csv",
12 |             has_header=True,
13 |             contains_classes=True,
14 |         )
15 | 
16 |     def test_mas_works_fine(self):
17 |         algo = ft.MaxAbsScaler()
18 |         algo.fit(self.__data.get_x())
19 |         transformed = algo.transform(self.__data.get_x())
20 |         self.assertEqual(transformed.shape, self.__data.get_x().shape)
21 | 
22 |     def test_norm_works_fine(self):
23 |         algo = ft.Normalizer()
24 |         algo.fit(self.__data.get_x())
25 |         transformed = algo.transform(self.__data.get_x())
26 |         self.assertEqual(transformed.shape, self.__data.get_x().shape)
27 | 
28 |     def test_qt_works_fine(self):
29 |         algo = ft.QuantileTransformer(n_quantiles=25)
30 |         algo.fit(self.__data.get_x())
31 |         transformed = algo.transform(self.__data.get_x())
32 |         self.assertEqual(transformed.shape, self.__data.get_x().shape)
33 | 
34 |     def test_rs_works_fine(self):
35 |         algo = ft.RobustScaler()
36 |         algo.fit(self.__data.get_x())
37 |         transformed = algo.transform(self.__data.get_x())
38 |         self.assertEqual(transformed.shape, self.__data.get_x().shape)
39 | 
40 |     def test_ss_works_fine(self):
41 |         algo = ft.StandardScaler()
42 |         algo.fit(self.__data.get_x())
43 |         transformed = algo.transform(self.__data.get_x())
44 |         self.assertEqual(transformed.shape, self.__data.get_x().shape)
45 | 


--------------------------------------------------------------------------------
/tests/test_feature_transform_algorithm_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.feature_transform import (
 3 |     FeatureTransformAlgorithmFactory,
 4 |     FeatureTransformAlgorithm,
 5 | )
 6 | 
 7 | 
 8 | class FeatureTransformAlgorithmFactoryTestCase(TestCase):
 9 |     def setUp(self):
10 |         self.__factory = FeatureTransformAlgorithmFactory()
11 | 
12 |     def test_get_result_works_fine(self):
13 |         for entry in self.__factory._entities:
14 |             instance = self.__factory.get_result(entry)
15 |             self.assertIsNotNone(instance)
16 |             self.assertIsInstance(instance, FeatureTransformAlgorithm)
17 | 
18 |         with self.assertRaises(TypeError):
19 |             self.__factory.get_result("non_existent_name")
20 | 
21 |     def test_get_dictionary_works_fine(self):
22 |         d = self.__factory.get_name_to_classname_mapping()
23 |         d_keys = d.keys()
24 |         e_keys = self.__factory._entities.keys()
25 | 
26 |         self.assertEqual(len(e_keys), len(d_keys))
27 | 
28 |         for k in d:
29 |             self.assertIsNotNone(d[k])
30 | 


--------------------------------------------------------------------------------
/tests/test_fitness.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from unittest import TestCase
 3 | import niaaml.fitness as f
 4 | 
 5 | 
 6 | class FitnessTestCase(TestCase):
 7 |     def setUp(self):
 8 |         self.__y = np.array(
 9 |             [
10 |                 "Class 1",
11 |                 "Class 1",
12 |                 "Class 1",
13 |                 "Class 2",
14 |                 "Class 1",
15 |                 "Class 2",
16 |                 "Class 2",
17 |                 "Class 2",
18 |                 "Class 2",
19 |                 "Class 1",
20 |                 "Class 1",
21 |                 "Class 2",
22 |                 "Class 1",
23 |                 "Class 2",
24 |                 "Class 1",
25 |                 "Class 1",
26 |                 "Class 1",
27 |                 "Class 1",
28 |                 "Class 2",
29 |                 "Class 1",
30 |             ]
31 |         )
32 |         self.__predicted = np.array(
33 |             [
34 |                 "Class 1",
35 |                 "Class 1",
36 |                 "Class 1",
37 |                 "Class 2",
38 |                 "Class 2",
39 |                 "Class 2",
40 |                 "Class 1",
41 |                 "Class 1",
42 |                 "Class 1",
43 |                 "Class 2",
44 |                 "Class 1",
45 |                 "Class 1",
46 |                 "Class 2",
47 |                 "Class 2",
48 |                 "Class 1",
49 |                 "Class 2",
50 |                 "Class 1",
51 |                 "Class 2",
52 |                 "Class 2",
53 |                 "Class 2",
54 |             ]
55 |         )
56 | 
57 |     def test_accuracy_works_fine(self):
58 |         ff = f.Accuracy()
59 |         val = ff.get_fitness(self.__predicted, self.__y)
60 |         self.assertEqual(val, 0.5)
61 | 
62 |     def test_precision_works_fine(self):
63 |         ff = f.Precision()
64 |         val = ff.get_fitness(self.__predicted, self.__y)
65 |         self.assertEqual(val, 0.5199999999999999)
66 | 
67 |     def test_cohen_kappa_works_fine(self):
68 |         ff = f.CohenKappa()
69 |         val = ff.get_fitness(self.__predicted, self.__y)
70 |         self.assertEqual(val, 0.0)
71 | 
72 |     def test_f1_works_fine(self):
73 |         ff = f.F1()
74 |         val = ff.get_fitness(self.__predicted, self.__y)
75 |         self.assertEqual(val, 0.505050505050505)
76 | 
77 | 
78 | class RegressionFitnessTestCase(TestCase):
79 |     def setUp(self):
80 |         self.__y = np.array([3, -0.5, 2, 7])
81 |         self.__predicted = np.array([2.5, 0.0, 2, 8])
82 | 
83 |     def test_r2_works_fine(self):
84 |         ff = f.R2()
85 |         val = ff.get_fitness(self.__predicted, self.__y)
86 |         self.assertEqual(val, 0.9486081370449679)
87 | 


--------------------------------------------------------------------------------
/tests/test_fitness_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.fitness import FitnessFactory, FitnessFunction
 3 | 
 4 | 
 5 | class FitnessFactoryTestCase(TestCase):
 6 |     def setUp(self):
 7 |         self.__factory = FitnessFactory()
 8 | 
 9 |     def test_get_result_works_fine(self):
10 |         for entry in self.__factory._entities:
11 |             instance = self.__factory.get_result(entry)
12 |             self.assertIsNotNone(instance)
13 |             self.assertIsInstance(instance, FitnessFunction)
14 | 
15 |         with self.assertRaises(TypeError):
16 |             self.__factory.get_result("non_existent_name")
17 | 
18 |     def test_get_dictionary_works_fine(self):
19 |         d = self.__factory.get_name_to_classname_mapping()
20 |         d_keys = d.keys()
21 |         e_keys = self.__factory._entities.keys()
22 | 
23 |         self.assertEqual(len(e_keys), len(d_keys))
24 | 
25 |         for k in d:
26 |             self.assertIsNotNone(d[k])
27 | 


--------------------------------------------------------------------------------
/tests/test_imputer.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.imputation import SimpleImputer, impute_features
 3 | from niaaml.data import BasicDataReader
 4 | import numpy
 5 | import pandas
 6 | 
 7 | 
 8 | class ImputerTestCase(TestCase):
 9 |     def setUp(self):
10 |         x = numpy.concatenate(
11 |             (
12 |                 numpy.random.uniform(low=0.0, high=15.0, size=(100, 6)),
13 |                 numpy.array([numpy.random.choice(["a", "b"], size=(100,))]).T,
14 |             ),
15 |             axis=1,
16 |         )
17 |         x[50, 6] = numpy.nan
18 |         x[30, 2] = numpy.nan
19 |         y = numpy.random.choice(["Class 1", "Class 2"], size=100)
20 |         self.__data_reader = BasicDataReader(x=x, y=y)
21 | 
22 |     def test_simple_imputer_works_fine(self):
23 |         features = self.__data_reader.get_x()
24 |         imputer1 = SimpleImputer()
25 |         imputer1.fit(features[[2]])
26 |         f = pandas.DataFrame(imputer1.transform(features[[2]]))
27 |         self.assertFalse(f[0].isnull().any())
28 | 
29 |         imputer2 = SimpleImputer()
30 |         imputer2.fit(features[[6]])
31 |         f = pandas.DataFrame(imputer2.transform(features[[6]]))
32 |         self.assertFalse(f[0].isnull().any())
33 | 
34 |     def test_utility_method_works_fine(self):
35 |         features = self.__data_reader.get_x().astype(
36 |             {
37 |                 0: "float64",
38 |                 1: "float64",
39 |                 2: "float64",
40 |                 3: "float64",
41 |                 4: "float64",
42 |                 5: "float64",
43 |             }
44 |         )
45 |         features.iloc[50, 6] = numpy.nan
46 |         features, imputers = impute_features(features, "SimpleImputer")
47 |         self.assertEqual(len(imputers), 2)
48 |         self.assertEqual(features.shape[1], 7)
49 | 


--------------------------------------------------------------------------------
/tests/test_imputer_factory.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml.preprocessing.imputation import Imputer, ImputerFactory
 3 | 
 4 | 
 5 | class ImputerFactoryTestCase(TestCase):
 6 |     def setUp(self):
 7 |         self.__factory = ImputerFactory()
 8 | 
 9 |     def test_get_result_works_fine(self):
10 |         for entry in self.__factory._entities:
11 |             instance = self.__factory.get_result(entry)
12 |             self.assertIsNotNone(instance)
13 |             self.assertIsInstance(instance, Imputer)
14 | 
15 |         with self.assertRaises(TypeError):
16 |             self.__factory.get_result("non_existent_name")
17 | 
18 |     def test_get_dictionary_works_fine(self):
19 |         d = self.__factory.get_name_to_classname_mapping()
20 |         d_keys = d.keys()
21 |         e_keys = self.__factory._entities.keys()
22 | 
23 |         self.assertEqual(len(e_keys), len(d_keys))
24 | 
25 |         for k in d:
26 |             self.assertIsNotNone(d[k])
27 | 


--------------------------------------------------------------------------------
/tests/test_utilities.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from niaaml import ParameterDefinition, MinMax, OptimizationStats, get_bin_index
 3 | import numpy as np
 4 | import tempfile
 5 | 
 6 | 
 7 | class UtilitiesTestCase(TestCase):
 8 |     def test_get_bin_index_works_fine(self):
 9 |         self.assertEqual(get_bin_index(0.0, 4), 0)
10 |         self.assertEqual(get_bin_index(0.24, 4), 0)
11 |         self.assertEqual(get_bin_index(0.25, 4), 1)
12 |         self.assertEqual(get_bin_index(0.49, 4), 1)
13 |         self.assertEqual(get_bin_index(0.5, 4), 2)
14 |         self.assertEqual(get_bin_index(0.74, 4), 2)
15 |         self.assertEqual(get_bin_index(0.75, 4), 3)
16 |         self.assertEqual(get_bin_index(1.0, 4), 3)
17 | 
18 | 
19 | class ParameterDefinitionTestCase(TestCase):
20 |     def test_works_fine(self):
21 |         parameter_definition = ParameterDefinition(MinMax(0.0, 5.9), float)
22 | 
23 |         self.assertIsInstance(parameter_definition.value, MinMax)
24 |         self.assertEqual(parameter_definition.param_type, float)
25 | 
26 | 
27 | class OptimizationStatsTestCase(TestCase):
28 |     def setUp(self):
29 |         y = np.array(
30 |             [
31 |                 "Class 1",
32 |                 "Class 1",
33 |                 "Class 1",
34 |                 "Class 2",
35 |                 "Class 1",
36 |                 "Class 2",
37 |                 "Class 2",
38 |                 "Class 2",
39 |                 "Class 2",
40 |                 "Class 1",
41 |                 "Class 1",
42 |                 "Class 2",
43 |                 "Class 1",
44 |                 "Class 2",
45 |                 "Class 1",
46 |                 "Class 1",
47 |                 "Class 1",
48 |                 "Class 1",
49 |                 "Class 2",
50 |                 "Class 1",
51 |             ]
52 |         )
53 |         predicted = np.array(
54 |             [
55 |                 "Class 1",
56 |                 "Class 1",
57 |                 "Class 1",
58 |                 "Class 2",
59 |                 "Class 2",
60 |                 "Class 2",
61 |                 "Class 1",
62 |                 "Class 1",
63 |                 "Class 1",
64 |                 "Class 2",
65 |                 "Class 1",
66 |                 "Class 1",
67 |                 "Class 2",
68 |                 "Class 2",
69 |                 "Class 1",
70 |                 "Class 2",
71 |                 "Class 1",
72 |                 "Class 2",
73 |                 "Class 2",
74 |                 "Class 2",
75 |             ]
76 |         )
77 | 
78 |         self.__stats = OptimizationStats(predicted, y)
79 | 
80 |     def test_works_fine(self):
81 |         self.assertEqual(self.__stats._accuracy, 0.5)
82 |         self.assertEqual(self.__stats._precision, 0.5199999999999999)
83 |         self.assertEqual(self.__stats._cohen_kappa, 0.0)
84 |         self.assertEqual(self.__stats._f1_score, 0.505050505050505)
85 | 
86 | 
87 | class MinMaxTestCase(TestCase):
88 |     def test_works_fine(self):
89 |         minmax = MinMax(0.0, 5.9)
90 | 
91 |         self.assertEqual(minmax.min, 0.0)
92 |         self.assertEqual(minmax.max, 5.9)
93 | 


--------------------------------------------------------------------------------