├── .github
    └── workflows
    │   ├── release.yml
    │   ├── scripts
    │       ├── release_linux.sh
    │       ├── release_osx.sh
    │       └── release_windows.bat
    │   ├── test_R.yml
    │   ├── test_full.yml
    │   ├── test_pr.yml
    │   └── test_tutorials.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── README.md
    ├── arch.png
    ├── automl.rst
    ├── classifiers.rst
    ├── conf.py
    ├── examples.rst
    ├── explainers.rst
    ├── imputers.rst
    ├── index.rst
    ├── make.bat
    ├── prediction.rst
    ├── preprocessing.rst
    ├── regression.rst
    ├── risk_estimation.rst
    └── tutorials
├── pyproject.toml
├── scripts
    ├── __init__.py
    ├── build_demonstrator.py
    ├── nb_test.py
    ├── run_demonstrator.py
    └── studies
    │   ├── build_adj_biobank_cvd.sh
    │   └── build_adj_biobank_diabetes.sh
├── setup.cfg
├── setup.py
├── src
    └── autoprognosis
    │   ├── __init__.py
    │   ├── apps
    │       ├── __init__.py
    │       ├── classification
    │       │   └── classification_template_streamlit.py
    │       ├── common
    │       │   ├── __init__.py
    │       │   ├── login.py
    │       │   └── pandas_to_streamlit.py
    │       ├── extras
    │       │   ├── __init__.py
    │       │   ├── biobank_cvd.py
    │       │   └── biobank_diabetes.py
    │       └── survival_analysis
    │       │   └── survival_analysis_template_streamlit.py
    │   ├── deploy
    │       ├── __init__.py
    │       ├── build.py
    │       ├── proto.py
    │       ├── run.py
    │       └── utils.py
    │   ├── exceptions
    │       └── __init__.py
    │   ├── explorers
    │       ├── __init__.py
    │       ├── classifiers.py
    │       ├── classifiers_combos.py
    │       ├── core
    │       │   ├── __init__.py
    │       │   ├── defaults.py
    │       │   ├── optimizer.py
    │       │   ├── optimizers
    │       │   │   ├── __init__.py
    │       │   │   ├── bayesian.py
    │       │   │   └── hyperband.py
    │       │   └── selector.py
    │       ├── regression.py
    │       ├── regression_combos.py
    │       ├── risk_estimation.py
    │       └── risk_estimation_combos.py
    │   ├── hooks
    │       ├── __init__.py
    │       ├── base.py
    │       └── default.py
    │   ├── logger.py
    │   ├── plugins
    │       ├── __init__.py
    │       ├── core
    │       │   ├── __init__.py
    │       │   ├── base_plugin.py
    │       │   └── params.py
    │       ├── ensemble
    │       │   ├── __init__.py
    │       │   ├── classifiers.py
    │       │   ├── combos.py
    │       │   ├── regression.py
    │       │   └── risk_estimation.py
    │       ├── explainers
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── plugin_invase.py
    │       │   ├── plugin_kernel_shap.py
    │       │   ├── plugin_lime.py
    │       │   ├── plugin_risk_effect_size.py
    │       │   ├── plugin_shap_permutation_sampler.py
    │       │   └── plugin_symbolic_pursuit.py
    │       ├── imputers
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── plugin_EM.py
    │       │   ├── plugin_gain.py
    │       │   ├── plugin_hyperimpute.py
    │       │   ├── plugin_ice.py
    │       │   ├── plugin_mean.py
    │       │   ├── plugin_median.py
    │       │   ├── plugin_mice.py
    │       │   ├── plugin_missforest.py
    │       │   ├── plugin_most_frequent.py
    │       │   ├── plugin_nop.py
    │       │   ├── plugin_sinkhorn.py
    │       │   └── plugin_softimpute.py
    │       ├── pipeline
    │       │   ├── __init__.py
    │       │   └── generators.py
    │       ├── prediction
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── classifiers
    │       │   │   ├── __init__.py
    │       │   │   ├── base.py
    │       │   │   ├── helper_calibration.py
    │       │   │   ├── plugin_adaboost.py
    │       │   │   ├── plugin_bagging.py
    │       │   │   ├── plugin_bernoulli_naive_bayes.py
    │       │   │   ├── plugin_catboost.py
    │       │   │   ├── plugin_decision_trees.py
    │       │   │   ├── plugin_extra_tree_classifier.py
    │       │   │   ├── plugin_gaussian_naive_bayes.py
    │       │   │   ├── plugin_gaussian_process.py
    │       │   │   ├── plugin_gradient_boosting.py
    │       │   │   ├── plugin_hist_gradient_boosting.py
    │       │   │   ├── plugin_knn.py
    │       │   │   ├── plugin_lda.py
    │       │   │   ├── plugin_lgbm.py
    │       │   │   ├── plugin_linear_svm.py
    │       │   │   ├── plugin_logistic_regression.py
    │       │   │   ├── plugin_multinomial_naive_bayes.py
    │       │   │   ├── plugin_neural_nets.py
    │       │   │   ├── plugin_perceptron.py
    │       │   │   ├── plugin_qda.py
    │       │   │   ├── plugin_random_forest.py
    │       │   │   ├── plugin_ridge_classifier.py
    │       │   │   ├── plugin_tabnet.py
    │       │   │   └── plugin_xgboost.py
    │       │   ├── regression
    │       │   │   ├── __init__.py
    │       │   │   ├── base.py
    │       │   │   ├── plugin_bayesian_ridge.py
    │       │   │   ├── plugin_catboost_regressor.py
    │       │   │   ├── plugin_kneighbors_regressor.py
    │       │   │   ├── plugin_linear_regression.py
    │       │   │   ├── plugin_mlp_regressor.py
    │       │   │   ├── plugin_neural_nets_regression.py
    │       │   │   ├── plugin_random_forest_regressor.py
    │       │   │   ├── plugin_tabnet_regressor.py
    │       │   │   └── plugin_xgboost_regressor.py
    │       │   └── risk_estimation
    │       │   │   ├── __init__.py
    │       │   │   ├── base.py
    │       │   │   ├── benchmarks
    │       │   │       ├── __init__.py
    │       │   │       ├── cvd
    │       │   │       │   ├── __init__.py
    │       │   │       │   ├── aha
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       │   ├── framingham
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       │   └── qrisk3
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       ├── diabetes
    │       │   │       │   ├── __init__.py
    │       │   │       │   ├── ada
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       │   ├── diabetes_uk
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       │   ├── finrisk
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       │   └── qdiabetes
    │       │   │       │   │   ├── __init__.py
    │       │   │       │   │   └── model.py
    │       │   │       └── prostate_cancer
    │       │   │       │   ├── __init__.py
    │       │   │       │   └── predict.py
    │       │   │   ├── helper_lifelines.py
    │       │   │   ├── plugin_cox_ph.py
    │       │   │   ├── plugin_coxnet.py
    │       │   │   ├── plugin_deephit.py
    │       │   │   ├── plugin_loglogistic_aft.py
    │       │   │   ├── plugin_lognormal_aft.py
    │       │   │   ├── plugin_survival_xgboost.py
    │       │   │   └── plugin_weibull_aft.py
    │       ├── preprocessors
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── dimensionality_reduction
    │       │   │   ├── __init__.py
    │       │   │   ├── plugin_data_cleanup.py
    │       │   │   ├── plugin_fast_ica.py
    │       │   │   ├── plugin_feature_agglomeration.py
    │       │   │   ├── plugin_gauss_projection.py
    │       │   │   ├── plugin_nop.py
    │       │   │   ├── plugin_pca.py
    │       │   │   └── plugin_variance_threshold.py
    │       │   └── feature_scaling
    │       │   │   ├── __init__.py
    │       │   │   ├── plugin_feature_normalizer.py
    │       │   │   ├── plugin_maxabs_scaler.py
    │       │   │   ├── plugin_minmax_scaler.py
    │       │   │   ├── plugin_nop.py
    │       │   │   ├── plugin_normal_transform.py
    │       │   │   ├── plugin_scaler.py
    │       │   │   └── plugin_uniform_transform.py
    │       ├── uncertainty
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── plugin_cohort_explainer.py
    │       │   ├── plugin_conformal_prediction.py
    │       │   └── plugin_jackknife.py
    │       └── utils
    │       │   ├── __init__.py
    │       │   ├── cast.py
    │       │   ├── decorators.py
    │       │   ├── metrics.py
    │       │   └── simulate.py
    │   ├── studies
    │       ├── __init__.py
    │       ├── _base.py
    │       ├── classifiers.py
    │       ├── regression.py
    │       └── risk_estimation.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── data_encoder.py
    │       ├── distributions.py
    │       ├── encoder.py
    │       ├── metrics.py
    │       ├── pandas.py
    │       ├── parallel.py
    │       ├── redis.py
    │       ├── risk_estimation.py
    │       ├── serialization.py
    │       ├── tester.py
    │       ├── third_party
    │       │   ├── __init__.py
    │       │   ├── metrics.py
    │       │   ├── nonparametric.py
    │       │   └── util.py
    │       └── torch.py
    │   └── version.py
├── tests
    ├── apps
    │   ├── test_classifiers_app.py
    │   └── test_survival_app.py
    ├── bindings
    │   └── R
    │   │   ├── test_classification.R
    │   │   ├── test_classification_with_missing_data.R
    │   │   ├── test_regression.R
    │   │   └── test_survival_analysis.R
    ├── bugfixing
    │   ├── test_ensemble_crash.py
    │   └── test_not_fitted_error.py
    ├── conftest.py
    ├── explorers
    │   ├── explorers_mocks.py
    │   ├── test_classifiers.py
    │   ├── test_classifiers_combos.py
    │   ├── test_regression.py
    │   ├── test_regression_combos.py
    │   ├── test_risk_estimation.py
    │   ├── test_risk_estimation_combos.py
    │   └── test_selector.py
    ├── plugins
    │   ├── ensemble
    │   │   ├── test_classifier.py
    │   │   └── test_risk_estimation_ensemble.py
    │   ├── explainers
    │   │   ├── test_invase.py
    │   │   ├── test_kernel_shap.py
    │   │   ├── test_lime.py
    │   │   ├── test_risk_effect_size.py
    │   │   ├── test_shap_permutation_sampler.py
    │   │   └── test_symbolic_pursuit.py
    │   ├── imputers
    │   │   ├── test_api.py
    │   │   ├── test_em.py
    │   │   ├── test_gain.py
    │   │   ├── test_hyperimpute.py
    │   │   ├── test_ice.py
    │   │   ├── test_imputation_serde.py
    │   │   ├── test_imputers_api.py
    │   │   ├── test_mean.py
    │   │   ├── test_median.py
    │   │   ├── test_mice.py
    │   │   ├── test_missforest.py
    │   │   ├── test_most_freq.py
    │   │   ├── test_sinkhorn.py
    │   │   └── test_softimpute.py
    │   ├── pipeline
    │   │   └── test_pipeline.py
    │   ├── prediction
    │   │   ├── classifiers
    │   │   │   ├── test_bagging.py
    │   │   │   ├── test_bernoulli_naive_bayes.py
    │   │   │   ├── test_calibration.py
    │   │   │   ├── test_catboost.py
    │   │   │   ├── test_classifiers_linear_svm.py
    │   │   │   ├── test_clf_serde.py
    │   │   │   ├── test_decision_trees.py
    │   │   │   ├── test_extra_tree_classifier.py
    │   │   │   ├── test_gaussian_naive_bayes.py
    │   │   │   ├── test_gaussian_process.py
    │   │   │   ├── test_gradient_boosting.py
    │   │   │   ├── test_hist_gradient_boosting.py
    │   │   │   ├── test_knn.py
    │   │   │   ├── test_lda.py
    │   │   │   ├── test_lgbm.py
    │   │   │   ├── test_logistic_regression.py
    │   │   │   ├── test_neural_nets.py
    │   │   │   ├── test_perceptron.py
    │   │   │   ├── test_prediction_api.py
    │   │   │   ├── test_qda.py
    │   │   │   ├── test_random_forest.py
    │   │   │   ├── test_ridge_classifier.py
    │   │   │   ├── test_tabnet.py
    │   │   │   └── test_xgboost.py
    │   │   ├── regression
    │   │   │   ├── test_kneighbors_regressor.py
    │   │   │   ├── test_linear_regression.py
    │   │   │   ├── test_neural_nets_regression.py
    │   │   │   ├── test_random_forest_regressor.py
    │   │   │   ├── test_tabnet_regressor.py
    │   │   │   └── test_xgboost_regression.py
    │   │   └── risk_estimation
    │   │   │   ├── benchmarks
    │   │   │       ├── cvd
    │   │   │       │   ├── test_aha.py
    │   │   │       │   ├── test_fram.py
    │   │   │       │   └── test_qrisk3.py
    │   │   │       └── diabetes
    │   │   │       │   ├── test_ada.py
    │   │   │       │   ├── test_diabetesuk.py
    │   │   │       │   ├── test_finrisk.py
    │   │   │       │   └── test_qdiab.py
    │   │   │   ├── test_cox_ph.py
    │   │   │   ├── test_coxnet.py
    │   │   │   ├── test_deephit.py
    │   │   │   ├── test_loglogistic_aft.py
    │   │   │   ├── test_lognormal_aft.py
    │   │   │   ├── test_survival_xgboost.py
    │   │   │   └── test_weibull_aft.py
    │   ├── preprocessors
    │   │   ├── dimensionality_reduction
    │   │   │   ├── test_data_cleanup.py
    │   │   │   ├── test_dr_nop.py
    │   │   │   ├── test_fast_ica.py
    │   │   │   ├── test_feature_agglomeration.py
    │   │   │   ├── test_gauss_projection.py
    │   │   │   ├── test_pca.py
    │   │   │   └── test_variance_threshold.py
    │   │   ├── feature_scaling
    │   │   │   ├── test_feature_normalizer.py
    │   │   │   ├── test_fs_nop.py
    │   │   │   ├── test_maxabs_scaler.py
    │   │   │   ├── test_minmax_scaler.py
    │   │   │   ├── test_normal_transform.py
    │   │   │   ├── test_scaler.py
    │   │   │   └── test_uniform_transform.py
    │   │   └── test_preprocessing_api.py
    │   ├── uncertainty
    │   │   ├── test_cohort_explainer.py
    │   │   ├── test_conformal_prediction.py
    │   │   └── test_jackknife.py
    │   └── utils
    │   │   ├── test_cast.py
    │   │   ├── test_imputation_metrics.py
    │   │   └── test_simulate.py
    ├── studies
    │   ├── helpers.py
    │   ├── test_classifiers_studies.py
    │   ├── test_regression_studies.py
    │   └── test_risk_studies.py
    └── utils
    │   ├── test_metrics.py
    │   └── test_parallel.py
├── third_party
    └── image_template
    │   └── streamlit
    │       ├── .gitattributes
    │       ├── .streamlit
    │           └── config.toml
    │       ├── Procfile
    │       ├── README.md
    │       ├── app.py
    │       ├── requirements.txt
    │       └── runtime.txt
└── tutorials
    ├── automl
        ├── tutorial_00_classification_study.ipynb
        ├── tutorial_01_automl_classification_with_imputation.ipynb
        ├── tutorial_02_survival_analysis_study.ipynb
        ├── tutorial_03_automl_survival_analysis_with_imputation.ipynb
        ├── tutorial_04_regression.ipynb
        ├── tutorial_05_classification_with_explainers.ipynb
        └── tutorial_06_automl_multiple_imputation_example.ipynb
    ├── bindings
        └── R
        │   ├── tutorial_classification.R
        │   ├── tutorial_classification_with_missing_data.R
        │   ├── tutorial_regression.R
        │   └── tutorial_survival_analysis.R
    ├── demonstrators
        ├── tutorial_00_build_a_demonstrator_classification.ipynb
        └── tutorial_01_build_a_demonstrator_survival_analysis.ipynb
    └── plugins
        ├── tutorial_00_imputation_plugins.ipynb
        ├── tutorial_01_preprocessing_plugins.ipynb
        ├── tutorial_02_classification_plugins.ipynb
        ├── tutorial_03_pipelines.ipynb
        ├── tutorial_04_interpretability.ipynb
        ├── tutorial_05_survival_analysis_plugins.ipynb
        └── tutorial_06_regression_plugins.ipynb


/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Package release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:
 7 | 
 8 | 
 9 | jobs:
10 |   # Build for OSX and publish, see scripts/release_osx.sh.
11 |   deploy_osx:
12 |     runs-on: macos-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 |         with:
20 |           submodules: true
21 |       - name: Set up Python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: ${{ matrix.python-version }}
25 |       - name: Build and publish
26 |         env:
27 |           TWINE_USERNAME: __token__
28 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
29 |         run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_osx.sh
30 | 
31 |   # Build for Linux and publish, see scripts/release_linux.sh.
32 |   deploy_linux:
33 |     strategy:
34 |       matrix:
35 |         python-version:
36 |           - cp39-cp39
37 |           - cp310-cp310
38 |           - cp311-cp311
39 |           - cp312-cp312
40 | 
41 |     runs-on: ubuntu-latest
42 |     container:
43 |       image: node:20-bullseye  # Use the official Node.js 20 image based on Debian
44 |     steps:
45 |       - uses: actions/checkout@v3
46 |         with:
47 |           submodules: true
48 |       - name: Set target Python version PATH
49 |         run: |
50 |           echo "/opt/python/${{ matrix.python-version }}/bin" >> $GITHUB_PATH
51 |       - name: Install Python Build Dependencies
52 |         run: |
53 |           apt-get update
54 |           apt-get install -y python3 python3-pip python3-dev build-essential
55 |       - name: Build and publish
56 |         env:
57 |           TWINE_USERNAME: __token__
58 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
59 |         run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_linux.sh
60 | 
61 |   # Build for Windows and publish, see scripts/release_windows.bat.
62 |   deploy_windows:
63 |     runs-on: windows-latest
64 |     strategy:
65 |       matrix:
66 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
67 | 
68 |     steps:
69 |       - uses: actions/checkout@v3
70 |         with:
71 |           submodules: true
72 |       - name: Set up Python ${{ matrix.python-version }}
73 |         uses: actions/setup-python@v5
74 |         with:
75 |           python-version: ${{ matrix.python-version }}
76 |       - name: Build and publish
77 |         env:
78 |           TWINE_USERNAME: __token__
79 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
80 |         run: |
81 |           ../../.github/workflows/scripts/release_windows.bat
82 | 


--------------------------------------------------------------------------------
/.github/workflows/scripts/release_linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # Update the package list
 6 | apt-get update
 7 | 
 8 | # Install necessary packages and build tools
 9 | apt-get install -y \
10 |     software-properties-common \
11 |     python3 \
12 |     python3-dev \
13 |     python3-pip \
14 |     build-essential \
15 |     llvm \
16 |     clang \
17 |     lsb-release
18 | 
19 | # Add the LLVM repository to get the latest version of LLVM (if needed)
20 | wget https://apt.llvm.org/llvm.sh
21 | chmod +x llvm.sh
22 | ./llvm.sh 14  # Replace 14 with the required version if necessary
23 | 
24 | # Upgrade pip to the latest version
25 | python3 -m pip install --upgrade pip
26 | 
27 | # Install Python packaging tools
28 | python3 -m pip install setuptools wheel twine auditwheel
29 | 
30 | # Build Python wheels
31 | python3 -m pip wheel . -w dist/ --no-deps
32 | 
33 | # Publish the built wheels to PyPI
34 | twine upload --verbose --skip-existing dist/*
35 | 


--------------------------------------------------------------------------------
/.github/workflows/scripts/release_osx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | export MACOSX_DEPLOYMENT_TARGET=10.14
 4 | 
 5 | python -m pip install --upgrade pip
 6 | pip install setuptools wheel twine auditwheel
 7 | 
 8 | python3 setup.py build bdist_wheel --plat-name macosx_10_14_x86_64 --dist-dir wheel
 9 | twine upload --skip-existing wheel/*
10 | 


--------------------------------------------------------------------------------
/.github/workflows/scripts/release_windows.bat:
--------------------------------------------------------------------------------
1 | echo on
2 | 
3 | python -m pip install --upgrade pip
4 | pip install setuptools wheel twine auditwheel
5 | 
6 | pip wheel . -w wheel/ --no-deps
7 | twine upload --skip-existing wheel/*
8 | 


--------------------------------------------------------------------------------
/.github/workflows/test_R.yml:
--------------------------------------------------------------------------------
 1 | name: Tests R
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   # push:
 6 |   #   branches: [main, release]
 7 |   # schedule:
 8 |   #   - cron:  '2 3 * * 4'
 9 | 
10 | 
11 | jobs:
12 |   Library:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         r-version: ['4.2']
17 |         python-version: ['3.8']
18 |         os: [macos-latest, ubuntu-latest]
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |         with:
22 |           submodules: true
23 |       - name: Set up Python ${{ matrix.python-version }}
24 |         uses: actions/setup-python@v5
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 |       - name: Set up R ${{ matrix.r-version }}
28 |         uses: r-lib/actions/setup-r@v2
29 |         with:
30 |           r-version: ${{ matrix.r-version }}
31 |       - name: Install depends
32 |         run: |
33 |           Rscript -e "install.packages(c('remotes','reticulate'))"
34 |       - name: Test R
35 |         run: |
36 |           Rscript tests/bindings/R/test_classification.R
37 |           Rscript tests/bindings/R/test_classification_with_missing_data.R
38 |           Rscript tests/bindings/R/test_regression.R
39 |           Rscript tests/bindings/R/test_survival_analysis.R
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_full.yml:
--------------------------------------------------------------------------------
 1 | name: Tests Full Python
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron:  '0 1 * * 1'
 6 |   workflow_dispatch:
 7 | 
 8 | 
 9 | jobs:
10 |   Library:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
15 |         os: [macos-latest, ubuntu-latest, windows-latest]
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |         with:
19 |           submodules: true
20 |       - uses: gautamkrishnar/keepalive-workflow@v1
21 |       - name: Set up Python ${{ matrix.python-version }}
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: ${{ matrix.python-version }}
25 |       - name: Install MacOS dependencies
26 |         run: |
27 |             brew install libomp
28 |         if: ${{ matrix.os == 'macos-latest' }}
29 |       - name: Install dependencies
30 |         run: |
31 |             pip install --upgrade pip
32 |             pip install .[dev]
33 |       - name: Test with pytest
34 |         run: pytest -vvsx --durations=50
35 | 


--------------------------------------------------------------------------------
/.github/workflows/test_pr.yml:
--------------------------------------------------------------------------------
 1 | name: Tests Fast Python
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main, release]
 6 |   pull_request:
 7 |     types: [opened, synchronize, reopened]
 8 |   workflow_dispatch:
 9 | 
10 | 
11 | jobs:
12 |   Linter:
13 |     # GH runners:
14 |     runs-on: ${{ matrix.os }}
15 |     # # Self-hosted runners:
16 |     # runs-on:
17 |     #   - self-hosted
18 |     #   - ${{ matrix.os }}
19 |     strategy:
20 |       matrix:
21 |         python-version: ["3.10"]
22 |         # GH runners:
23 |         os: [ubuntu-latest]
24 |         # # Self-hosted runners:
25 |         # os: [Linux]
26 |     steps:
27 |       - uses: actions/checkout@v2
28 |         with:
29 |           submodules: true
30 |       - name: Set up Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 |       - name: Install dependencies
35 |         run: |
36 |             pip install bandit pre-commit
37 |       - name: pre-commit validation
38 |         run: pre-commit run --all
39 |       - name: Security checks
40 |         run: |
41 |             bandit -r src/autoprognosis/plugins/*
42 |             bandit -r src/autoprognosis/studies/*
43 | 
44 |   Library:
45 |     needs: [Linter]
46 |     # GH runners:
47 |     runs-on: ${{ matrix.os }}
48 |     # # Self-hosted runners:
49 |     # runs-on:
50 |     #   - self-hosted
51 |     #   - ${{ matrix.os }}
52 |     strategy:
53 |       matrix:
54 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
55 |         # GH runners:
56 |         os: [macos-latest, ubuntu-latest, windows-latest]
57 |         # # Self-hosted runners:
58 |         # os: [Linux, Windows, macOS]
59 |     steps:
60 |       - uses: actions/checkout@v2
61 |         with:
62 |           submodules: true
63 |       - uses: gautamkrishnar/keepalive-workflow@v1
64 |       - name: Set up Python ${{ matrix.python-version }}
65 |         uses: actions/setup-python@v5
66 |         with:
67 |           python-version: ${{ matrix.python-version }}
68 |       - name: Install MacOS dependencies
69 |         run: |
70 |           brew install libomp
71 |         if: ${{ matrix.os == 'macos-latest' }}
72 |       - name: Install dependencies
73 |         run: |
74 |             pip install --upgrade pip
75 |             pip install .[dev]
76 |       - name: Test with pytest
77 |         run: pytest -vvvsx -m "not slow" --durations=50
78 | 


--------------------------------------------------------------------------------
/.github/workflows/test_tutorials.yml:
--------------------------------------------------------------------------------
 1 | name: Tutorials
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main, release]
 6 |   pull_request:
 7 |     types: [opened, synchronize, reopened]
 8 |   schedule:
 9 |     - cron:  '1 3 * * 0'
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   Tutorials:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       matrix:
17 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
18 |         os: [ubuntu-latest]
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |         with:
22 |           submodules: true
23 |       - name: Set up Python ${{ matrix.python-version }}
24 |         uses: actions/setup-python@v5
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 |       - name: Install MacOS dependencies
28 |         run: |
29 |             brew install rajivshah3/libomp-tap/libomp@11.1.0
30 |         if: ${{ matrix.os == 'macos-latest' }}
31 |       - name: Install dependencies
32 |         run: |
33 |             pip install --upgrade pip
34 |             pip install .[dev]
35 | 
36 |             python -m pip install ipykernel
37 |             python -m ipykernel install --user
38 |       - name: Run the tutorials
39 |         run: python scripts/nb_test.py --nb_dir tutorials/
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temporary and binary files
 2 | *~
 3 | workspace
 4 | workspace*
 5 | weight_checkpoint*
 6 | *.py[cod]
 7 | *.json
 8 | *.so
 9 | *.cfg
10 | !.isort.cfg
11 | !setup.cfg
12 | *.orig
13 | *.log
14 | *.pot
15 | __pycache__/*
16 | .cache/*
17 | .*.swp
18 | */.ipynb_checkpoints/*
19 | .DS_Store
20 | .ipynb_checkpoints
21 | tmp
22 | runs
23 | logs
24 | catboost_info
25 | *.p
26 | *.rdb
27 | *.gz
28 | *.gz.*
29 | *.dat
30 | 
31 | # Project files
32 | .ropeproject
33 | .project
34 | .pydevproject
35 | .settings
36 | .idea
37 | .vscode
38 | tags
39 | 
40 | # Package files
41 | *.egg
42 | *.eggs/
43 | .installed.cfg
44 | *.egg-info
45 | *.csv
46 | *.gz
47 | 
48 | # Unittest and coverage
49 | htmlcov/*
50 | .coverage
51 | .coverage.*
52 | .tox
53 | junit*.xml
54 | coverage.xml
55 | .pytest_cache/
56 | 
57 | # Build and docs folder/files
58 | build/*
59 | dist/*
60 | sdist/*
61 | docs/api/*
62 | docs/_rst/*
63 | docs/_build/*
64 | cover/*
65 | MANIFEST
66 | 
67 | # Per-project virtualenvs
68 | .venv*/
69 | .conda*/
70 | datasets
71 | generated
72 | image_bin
73 | release
74 | 
75 | # Other
76 | .dev
77 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: 'setup.py'
 2 | 
 3 | repos:
 4 | - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |   rev: v5.0.0
 6 |   hooks:
 7 |   - id: trailing-whitespace
 8 |   - id: check-added-large-files
 9 |   - id: check-ast
10 |   - id: check-json
11 |   - id: check-merge-conflict
12 |   - id: check-xml
13 |   - id: check-yaml
14 |   - id: debug-statements
15 |   - id: check-executables-have-shebangs
16 |   - id: end-of-file-fixer
17 |   - id: requirements-txt-fixer
18 |   - id: mixed-line-ending
19 |     args: ['--fix=auto']  # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
20 | 
21 | - repo: https://github.com/astral-sh/ruff-pre-commit
22 |   rev: v0.11.2
23 |   hooks:
24 |     - id: ruff
25 |       types_or: [ python, pyi ]
26 |       args: ["check", "--select", "I", "--fix"]
27 |       files: "^src/"
28 |     - id: ruff-format
29 |       types_or: [ python, pyi ]
30 |       files: "^src/"
31 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the version of Python and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.9"
12 |   apt_packages:
13 |     - pandoc
14 |     # ^ pandoc required by nbsphinx.
15 | 
16 | # Build documentation in the docs/ directory with Sphinx
17 | sphinx:
18 |   configuration: docs/conf.py
19 | 
20 | # Optionally build your docs in additional formats such as PDF
21 | formats:
22 |   - pdf
23 | 
24 | python:
25 |   install:
26 |     - method: pip
27 |       path: .
28 |       extra_requirements:
29 |         - docs
30 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/docs/arch.png


--------------------------------------------------------------------------------
/docs/automl.rst:
--------------------------------------------------------------------------------
 1 | AutoML studies
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     Classification studies <generated/autoprognosis.studies.classifiers.rst>
 9 |     Regression studies <generated/autoprognosis.studies.regression.rst>
10 |     Risk estimation studies <generated/autoprognosis.studies.risk_estimation.rst>
11 | 


--------------------------------------------------------------------------------
/docs/classifiers.rst:
--------------------------------------------------------------------------------
 1 | Classifiers
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     AdaBoost <generated/autoprognosis.plugins.prediction.classifiers.plugin_adaboost.rst>
 9 |     Bagging <generated/autoprognosis.plugins.prediction.classifiers.plugin_bagging.rst>
10 |     Naive Bayes <generated/autoprognosis.plugins.prediction.classifiers.plugin_naive_bayes.rst>
11 |     CatBoost <generated/autoprognosis.plugins.prediction.classifiers.plugin_catboost.rst>
12 |     Decision Trees <generated/autoprognosis.plugins.prediction.classifiers.plugin_decision_trees.rst>
13 |     ExtraTree classifier <generated/autoprognosis.plugins.prediction.classifiers.plugin_extra_tree_classifier.rst>
14 |     Gaussian Naive Bayes <generated/autoprognosis.plugins.prediction.classifiers.plugin_gaussian_naive_bayes.rst>
15 |     Gradient Boosting <generated/autoprognosis.plugins.prediction.classifiers.plugin_gradient_boosting.rst>
16 |     KNN <generated/autoprognosis.plugins.prediction.classifiers.plugin_knn.rst>
17 |     LDA <generated/autoprognosis.plugins.prediction.classifiers.plugin_lda.rst>
18 |     LGBM <generated/autoprognosis.plugins.prediction.classifiers.plugin_lgbm.rst>
19 |     Linear SVM <generated/autoprognosis.plugins.prediction.classifiers.plugin_linear_svm.rst>
20 |     Logistic Regression <generated/autoprognosis.plugins.prediction.classifiers.plugin_logistic_regression.rst>
21 |     Multinomial Naive Bayes <generated/autoprognosis.plugins.prediction.classifiers.plugin_multinomial_naive_bayes.rst>
22 |     Neural nets <generated/autoprognosis.plugins.prediction.classifiers.plugin_neural_nets.rst>
23 |     Perceptron <generated/autoprognosis.plugins.prediction.classifiers.plugin_perceptron.rst>
24 |     QDA <generated/autoprognosis.plugins.prediction.classifiers.plugin_qda.rst>
25 |     Random forest <generated/autoprognosis.plugins.prediction.classifiers.plugin_random_forest.rst>
26 |     Ridge classifier <generated/autoprognosis.plugins.prediction.classifiers.plugin_ridge_classifier.rst>
27 |     TabNet <generated/autoprognosis.plugins.prediction.classifiers.plugin_tabnet.rst>
28 |     XGBoost <generated/autoprognosis.plugins.prediction.classifiers.plugin_xgboost.rst>
29 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     Classification studies <tutorials/automl/tutorial_00_classification_study.ipynb>
 9 |     Classification studies with imputation <tutorials/automl/tutorial_01_automl_classification_with_imputation.ipynb>
10 |     Classification studies with explainers <tutorials/automl/tutorial_05_classification_with_explainers.ipynb>
11 |     Survival analysis studies <tutorials/automl/tutorial_02_survival_analysis_study.ipynb>
12 |     Survival analysis studies with imputation <tutorials/automl/tutorial_03_automl_survival_analysis_with_imputation.ipynb>
13 |     Regression studies <tutorials/automl/tutorial_04_regression.ipynb>
14 |     Multiple imputation studies <tutorials/automl/tutorial_06_automl_multiple_imputation_example.ipynb>
15 | 


--------------------------------------------------------------------------------
/docs/explainers.rst:
--------------------------------------------------------------------------------
 1 | Explainability plugins
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     IVNASE <generated/autoprognosis.plugins.explainers.plugin_invase.rst>
 9 |     Kernel SHAP <generated/autoprognosis.plugins.explainers.plugin_kernel_shap.rst>
10 |     LIME <generated/autoprognosis.plugins.explainers.plugin_lime.rst>
11 |     Risk Effect Size <generated/autoprognosis.plugins.explainers.plugin_risk_effect_size.rst>
12 |     SHAP Permutation sampler <generated/autoprognosis.plugins.explainers.plugin_shap_permutation_sampler.rst>
13 |     Symbolic Pursuit <generated/autoprognosis.plugins.explainers.plugin_symbolic_pursuit.rst>
14 | 


--------------------------------------------------------------------------------
/docs/imputers.rst:
--------------------------------------------------------------------------------
 1 | Imputation plugins
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     HyperImpute <generated/autoprognosis.plugins.imputers.plugin_hyperimpute.rst>
 9 |     EM imputation <generated/autoprognosis.plugins.imputers.plugin_EM.rst>
10 |     GAIN imputation <generated/autoprognosis.plugins.imputers.plugin_gain.rst>
11 |     ICE imputation <generated/autoprognosis.plugins.imputers.plugin_ice.rst>
12 |     MICE imputation <generated/autoprognosis.plugins.imputers.plugin_mice.rst>
13 |     missForest <generated/autoprognosis.plugins.imputers.plugin_missforest.rst>
14 |     SinkHorn imputation <generated/autoprognosis.plugins.imputers.plugin_sinkhorn.rst>
15 |     SoftImpute <generated/autoprognosis.plugins.imputers.plugin_softimpute.rst>
16 |     Mean imputation <generated/autoprognosis.plugins.imputers.plugin_mean.rst>
17 |     Median imputation <generated/autoprognosis.plugins.imputers.plugin_median.rst>
18 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. autoprognosis documentation master file, created by
 2 |    sphinx-quickstart on Thu Dec 15 13:02:37 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | AutoPrognosis documentation!
 7 | =========================================
 8 | 
 9 | .. mdinclude:: README.md
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 |    :caption: Contents:
14 | 
15 | Examples
16 | ==========
17 | .. toctree::
18 |     :glob:
19 |     :maxdepth: 3
20 | 
21 |     examples.rst
22 | 
23 | 
24 | AutoML studies
25 | ===============
26 | .. toctree::
27 |     :glob:
28 |     :maxdepth: 2
29 | 
30 |     automl.rst
31 | 
32 | Imputation plugins
33 | ===================
34 | .. toctree::
35 |     :glob:
36 |     :maxdepth: 2
37 | 
38 |     imputers.rst
39 | 
40 | Preprocessing plugins
41 | ======================
42 | .. toctree::
43 |     :glob:
44 |     :maxdepth: 2
45 | 
46 |     preprocessing.rst
47 | 
48 | Prediction plugins
49 | ===================
50 | .. toctree::
51 |     :glob:
52 |     :maxdepth: 3
53 | 
54 |     prediction.rst
55 | 
56 | Explainability plugins
57 | =======================
58 | .. toctree::
59 |     :glob:
60 |     :maxdepth: 3
61 | 
62 |     explainers.rst
63 | 
64 | Benchmarks
65 | ==============
66 | .. toctree::
67 |     :glob:
68 |     :maxdepth: 3
69 | 
70 |     Evaluation <generated/autoprognosis.utils.tester.rst>
71 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/prediction.rst:
--------------------------------------------------------------------------------
 1 | Prediction plugins
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     Classifiers <classifiers.rst>
 9 |     Risk estimation <risk_estimation.rst>
10 |     Regression <regression.rst>
11 | 


--------------------------------------------------------------------------------
/docs/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | Preprocessing plugins
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     Data cleanup <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_data_cleanup.rst>
 9 |     FastICA <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_fast_ica.rst>
10 |     Feature agglomeration <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_feature_agglomeration.rst>
11 |     Gaussian Projection <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_gauss_projection.rst>
12 |     PCA <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_pca.rst>
13 |     Variance threshold <generated/autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_variance_threshold.rst>
14 |     Feature normalizer <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_feature_normalizer.rst>
15 |     MaxAbs scaler <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_maxabs_scaler.rst>
16 |     MinMax scaler <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_minmax_scaler.rst>
17 |     Standard scaler <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_scaler.rst>
18 |     Normal transform <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_normal_transform.rst>
19 |     Uniform transform <generated/autoprognosis.plugins.preprocessors.feature_scaling.plugin_uniform_transform.rst>
20 | 


--------------------------------------------------------------------------------
/docs/regression.rst:
--------------------------------------------------------------------------------
 1 | Regression
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     Bayesian Ridge <generated/autoprognosis.plugins.prediction.regression.plugin_bayesian_ridge.rst>
 9 |     Catboost regressor <generated/autoprognosis.plugins.prediction.regression.plugin_catboost_regressor.rst>
10 |     k-neighbors regressor <generated/autoprognosis.plugins.prediction.regression.plugin_kneighbors_regressor.rst>
11 |     Linear regression <generated/autoprognosis.plugins.prediction.regression.plugin_linear_regression.rst>
12 |     Neural nets regressor <generated/autoprognosis.plugins.prediction.regression.plugin_neural_nets_regression.rst>
13 |     Random forest regressor <generated/autoprognosis.plugins.prediction.regression.plugin_random_forest_regressor.rst>
14 |     TabNet regressor <generated/autoprognosis.plugins.prediction.regression.plugin_tabnet_regressor.rst>
15 |     XGBoost regressor <generated/autoprognosis.plugins.prediction.regression.plugin_xgboost_regressor.rst>
16 | 


--------------------------------------------------------------------------------
/docs/risk_estimation.rst:
--------------------------------------------------------------------------------
 1 | Risk estimation
 2 | =========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 |     :maxdepth: 2
 7 | 
 8 |     CoxNet <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_coxnet.rst>
 9 |     DeepHit <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_deephit.rst>
10 |     LogLogistic AFT <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_loglogistic_aft.rst>
11 |     LogNormal AFT <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_lognormal_aft.rst>
12 |     Survival XGBoost <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_survival_xgboost.rst>
13 |     Weibull AFT <generated/autoprognosis.plugins.prediction.risk_estimation.plugin_weibull_aft.rst>
14 | 


--------------------------------------------------------------------------------
/docs/tutorials:
--------------------------------------------------------------------------------
1 | ../tutorials/


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=46.1.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 
5 | [tool.pytest.ini_options]
6 | markers = 'slow: mark a test as slow.'
7 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/nb_test.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from pathlib import Path
 3 | 
 4 | # third party
 5 | import click
 6 | import nbformat
 7 | from nbconvert.preprocessors import ExecutePreprocessor
 8 | 
 9 | workspace = Path(__file__).parents[0] / "workspace"
10 | workspace.mkdir(parents=True, exist_ok=True)
11 | 
12 | 
13 | def run_notebook(notebook_path: Path) -> None:
14 |     with open(notebook_path) as f:
15 |         nb = nbformat.read(f, as_version=4)
16 | 
17 |     proc = ExecutePreprocessor(timeout=1800)
18 |     # Will raise on cell error
19 |     proc.preprocess(nb, {"metadata": {"path": workspace}})
20 | 
21 | 
22 | @click.command()
23 | @click.option("--nb_dir", type=str, default=".")
24 | def main(nb_dir: Path) -> None:
25 |     nb_dir = Path(nb_dir)
26 | 
27 |     for p in nb_dir.rglob("*"):
28 |         if "demonstrator" in str(p):
29 |             print("Ignoring", p)
30 |             continue
31 | 
32 |         if p.suffix != ".ipynb":
33 |             continue
34 |         if "checkpoint" in p.name:
35 |             continue
36 | 
37 |         print("Testing ", p.name)
38 |         try:
39 |             run_notebook(p)
40 |         except BaseException as e:
41 |             print("FAIL", p.name, e)
42 | 
43 |             raise e
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/scripts/run_demonstrator.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import argparse
 3 | from pathlib import Path
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.deploy.run import start_app_server
 7 | 
 8 | 
 9 | def run(app: str) -> None:
10 |     start_app_server(Path(app))
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("--app", type=str)
16 | 
17 |     args = parser.parse_args()
18 | 
19 |     run(args.app)
20 | 


--------------------------------------------------------------------------------
/scripts/studies/build_adj_biobank_cvd.sh:
--------------------------------------------------------------------------------
 1 | python ./scripts/build_demonstrator.py \
 2 |     --name "AutoPrognosis: UK Biobank CVD study" \
 3 |     --model_path=./workspace/biobank_cvd/model.p \
 4 |     --dataset_path=./workspace/biobank_cvd/biobank_cvd.csv \
 5 |     --time_column=time_to_event \
 6 |     --target_column=event \
 7 |     --horizons="365, 730, 1095, 1460, 1825, 2190, 2555, 2920, 3285, 3650, 4015, 4380" \
 8 |     --task_type=risk_estimation \
 9 |     --explainers="kernel_shap" \
10 |     --extras=biobank_cvd \
11 |     --auth=True
12 | 


--------------------------------------------------------------------------------
/scripts/studies/build_adj_biobank_diabetes.sh:
--------------------------------------------------------------------------------
 1 | python ./scripts/build_demonstrator.py \
 2 |     --name "AutoPrognosis: UK Biobank Diabetes study" \
 3 |     --model_path=./workspace/biobank_diabetes/model.p \
 4 |     --dataset_path=./workspace/biobank_diabetes/biobank_diabetes.csv \
 5 |     --time_column=time_to_event \
 6 |     --target_column=event \
 7 |     --horizons="365, 730, 1095, 1460, 1825, 2190, 2555, 2920, 3285, 3650, 4015, 4380" \
 8 |     --task_type=risk_estimation \
 9 |     --explainers="kernel_shap" \
10 |     --extras=biobank_diabetes \
11 |     --auth=True
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Setup file for autoprognosis.
 3 | Use setup.cfg to configure your project.
 4 | """
 5 | 
 6 | # stdlib
 7 | import os
 8 | import re
 9 | 
10 | # third party
11 | from setuptools import setup
12 | 
13 | PKG_DIR = os.path.dirname(os.path.abspath(__file__))
14 | 
15 | 
16 | def read(fname: str) -> str:
17 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
18 | 
19 | 
20 | def find_version() -> str:
21 |     version_file = read("src/autoprognosis/version.py")
22 |     version_re = r"__version__ = \"(?P<version>.+)\""
23 |     version_raw = re.match(version_re, version_file)
24 | 
25 |     if version_raw is None:
26 |         return "0.0.1"
27 | 
28 |     version = version_raw.group("version")
29 |     return version
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     try:
34 |         setup(
35 |             version=find_version(),
36 |         )
37 |     except:  # noqa
38 |         print(
39 |             "\n\nAn error occurred while building the project, "
40 |             "please ensure you have the most updated version of setuptools, "
41 |             "setuptools_scm and wheel with:\n"
42 |             "   pip install -U setuptools setuptools_scm wheel\n\n"
43 |         )
44 |         raise
45 | 


--------------------------------------------------------------------------------
/src/autoprognosis/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import os
 3 | import sys
 4 | import warnings
 5 | 
 6 | # third party
 7 | import optuna
 8 | 
 9 | # autoprognosis relative
10 | from . import logger  # noqa: F401
11 | 
12 | optuna.logging.set_verbosity(optuna.logging.FATAL)
13 | optuna.logging.disable_propagation()
14 | optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.
15 | 
16 | 
17 | logger.add(sink=sys.stderr, level="CRITICAL")
18 | 
19 | warnings.filterwarnings("ignore", category=DeprecationWarning)
20 | 
21 | os.environ["OMP_NUM_THREADS"] = "2"
22 | os.environ["OPENBLAS_NUM_THREADS"] = "2"
23 | os.environ["MKL_NUM_THREADS"] = "2"
24 | os.environ["VECLIB_MAXIMUM_THREADS"] = "2"
25 | os.environ["NUMEXPR_NUM_THREADS"] = "2"
26 | 


--------------------------------------------------------------------------------
/src/autoprognosis/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/apps/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/common/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/apps/common/login.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import streamlit as st
 3 | 
 4 | 
 5 | def is_authenticated(dummy):
 6 |     return dummy == "autoprognosis"
 7 | 
 8 | 
 9 | def generate_login_block():
10 |     block1 = st.empty()
11 |     block2 = st.empty()
12 | 
13 |     return block1, block2
14 | 
15 | 
16 | def clean_blocks(blocks):
17 |     for block in blocks:
18 |         block.empty()
19 | 
20 | 
21 | def login(blocks):
22 |     blocks[0].markdown(
23 |         """
24 |             <style>
25 |                 input {
26 |                     -webkit-text-security: disc;
27 |                 }
28 |             </style>
29 |         """,
30 |         unsafe_allow_html=True,
31 |     )
32 | 
33 |     return blocks[1].text_input("Password")
34 | 


--------------------------------------------------------------------------------
/src/autoprognosis/apps/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/extras/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/deploy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/deploy/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/deploy/proto.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Callable, Optional
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | class BaseAppProto(BaseModel):
 8 |     name: str
 9 |     type: str
10 |     dataset_path: str
11 |     model_path: str
12 |     explainers: list
13 |     imputers: list
14 |     plot_alternatives: list
15 | 
16 | 
17 | class NewRiskEstimationAppProto(BaseAppProto):
18 |     time_column: str
19 |     target_column: str
20 |     horizons: list
21 |     comparative_models: list
22 |     extras_cbk: Optional[Callable]
23 |     auth: bool = False
24 | 
25 | 
26 | class NewClassificationAppProto(BaseAppProto):
27 |     target_column: str
28 | 


--------------------------------------------------------------------------------
/src/autoprognosis/deploy/utils.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import hashlib
 3 | import shutil
 4 | import socket
 5 | from contextlib import closing
 6 | from pathlib import Path
 7 | 
 8 | import psutil
 9 | 
10 | 
11 | def get_ports(pid: int) -> list:
12 |     ports = []
13 |     p = psutil.Process(pid)
14 |     for conn in p.connections():
15 |         if conn.status != "LISTEN":
16 |             continue
17 |         ports.append(conn.laddr.port)
18 | 
19 |     return ports
20 | 
21 | 
22 | def is_local_port_open(port: int) -> bool:
23 |     host = "127.0.0.1"
24 |     is_open = False
25 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
26 |         if sock.connect_ex((host, port)) == 0:
27 |             is_open = True
28 |     return is_open
29 | 
30 | 
31 | def file_copy(src: Path, dst: Path) -> None:
32 |     shutil.copy(src, dst)
33 | 
34 | 
35 | def file_md5(fname: Path) -> str:
36 |     hash_md5 = hashlib.md5()
37 |     with open(fname, "rb") as f:
38 |         for chunk in iter(lambda: f.read(4096), b""):
39 |             hash_md5.update(chunk)
40 |     return hash_md5.hexdigest()
41 | 


--------------------------------------------------------------------------------
/src/autoprognosis/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 | class StudyCancelled(Exception):
2 |     pass
3 | 
4 | 
5 | class BuildCancelled(Exception):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/src/autoprognosis/explorers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/explorers/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/core/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/explorers/core/defaults.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.preprocessors import Preprocessors
 3 | 
 4 | default_classifiers_names = [
 5 |     "random_forest",
 6 |     "xgboost",
 7 |     "catboost",
 8 |     "lgbm",
 9 |     "logistic_regression",
10 | ]
11 | default_regressors_names = [
12 |     "random_forest_regressor",
13 |     "xgboost_regressor",
14 |     "linear_regression",
15 |     "catboost_regressor",
16 | ]
17 | 
18 | default_imputers_names = ["mean", "ice", "missforest", "hyperimpute"]
19 | default_feature_scaling_names = Preprocessors(
20 |     category="feature_scaling"
21 | ).list_available()
22 | default_feature_selection_names = ["nop", "pca", "fast_ica"]
23 | default_risk_estimation_names = [
24 |     "survival_xgboost",
25 |     "loglogistic_aft",
26 |     "deephit",
27 |     "cox_ph",
28 |     "weibull_aft",
29 |     "lognormal_aft",
30 |     "coxnet",
31 | ]
32 | 
33 | percentile_val = 1.96
34 | 


--------------------------------------------------------------------------------
/src/autoprognosis/explorers/core/optimizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/core/optimizers/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | # autoprognosis relative
2 | from .base import Hooks  # noqa: F401
3 | from .default import DefaultHooks  # noqa: F401
4 | 


--------------------------------------------------------------------------------
/src/autoprognosis/hooks/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from abc import ABCMeta, abstractmethod
 3 | from typing import Any
 4 | 
 5 | 
 6 | class Hooks(metaclass=ABCMeta):
 7 |     """AutoML hooks interface.
 8 | 
 9 |     Methods:
10 |         - cancel: True/False if to stop the current AutoML search.
11 |         - heartbeat: Metrics/logs sink from the AutoML search
12 | 
13 |     """
14 | 
15 |     @abstractmethod
16 |     def cancel(self) -> bool: ...
17 | 
18 |     @abstractmethod
19 |     def heartbeat(
20 |         self, topic: str, subtopic: str, event_type: str, **kwargs: Any
21 |     ) -> None: ...
22 | 
23 |     @abstractmethod
24 |     def finish(self) -> None: ...
25 | 


--------------------------------------------------------------------------------
/src/autoprognosis/hooks/default.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | # autoprognosis absolute
 5 | import autoprognosis.logger as log
 6 | 
 7 | # autoprognosis relative
 8 | from .base import Hooks
 9 | 
10 | 
11 | class DefaultHooks(Hooks):
12 |     def cancel(self) -> bool:
13 |         return False
14 | 
15 |     def heartbeat(
16 |         self, topic: str, subtopic: str, event_type: str, **kwargs: Any
17 |     ) -> None:
18 |         log.debug(f"[{topic}][{subtopic}] {event_type}")
19 | 
20 |     def finish(self) -> None:
21 |         pass
22 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/core/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/ensemble/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/ensemble/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/explainers/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | 
 8 | # autoprognosis relative
 9 | from .base import ExplainerPlugin  # noqa: F401,E402
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class Explainers(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, ExplainerPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "Explainers",
21 |     "ExplainerPlugin",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/explainers/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from abc import ABCMeta, abstractmethod
 3 | from typing import Optional
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # third party
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | class ExplainerPlugin(metaclass=ABCMeta):
13 |     def __init__(self, feature_names: list = []) -> None:
14 |         self.feature_names = feature_names
15 | 
16 |     @staticmethod
17 |     @abstractmethod
18 |     def name() -> str: ...
19 | 
20 |     @staticmethod
21 |     @abstractmethod
22 |     def pretty_name() -> str: ...
23 | 
24 |     @staticmethod
25 |     def type() -> str:
26 |         return "explainer"
27 | 
28 |     @abstractmethod
29 |     def explain(self, X: pd.DataFrame) -> pd.DataFrame: ...
30 | 
31 |     def plot(
32 |         self,
33 |         importances: pd.DataFrame,
34 |         feature_names: Optional[list] = None,
35 |     ) -> None:
36 |         importances = np.asarray(importances)
37 | 
38 |         title = f"{self.name()} importance"
39 |         axis_title = "Features"
40 | 
41 |         if not feature_names:
42 |             feature_names = self.feature_names
43 | 
44 |         x_pos = np.arange(len(feature_names))
45 | 
46 |         plt.figure(figsize=(20, 6))
47 |         plt.bar(x_pos, importances, align="center")
48 |         plt.xticks(x_pos, feature_names, wrap=True)
49 |         plt.xlabel(axis_title)
50 |         plt.title(title)
51 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | 
 8 | # autoprognosis relative
 9 | from .base import ImputerPlugin  # noqa: F401,E402
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class Imputers(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, ImputerPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "Imputers",
21 |     "ImputerPlugin",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.base_plugin as plugin
 9 | import autoprognosis.plugins.utils.decorators as decorators
10 | from autoprognosis.utils.serialization import load_model, save_model
11 | 
12 | 
13 | class ImputerPlugin(plugin.Plugin):
14 |     """Base class for the imputation plugins.
15 | 
16 |     It provides the implementation for plugin.Plugin.type() static method.
17 | 
18 |     Each derived class must implement the following methods(inherited from plugin.Plugin):
19 |         name() - a static method that returns the name of the plugin. e.g., EM, mice, etc.
20 |         hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects.
21 |         _fit() - internal implementation, called by the `fit()` method.
22 |         _transform() - internal implementation, called by the `transform()` method.
23 | 
24 |     If any method implementation is missing, the class constructor will fail.
25 |     """
26 | 
27 |     def __init__(self, model: Any) -> None:
28 |         super().__init__()
29 | 
30 |         if not hasattr(model, "fit") or not hasattr(model, "transform"):
31 |             raise RuntimeError("Invalid instance model type")
32 | 
33 |         self._model = model
34 | 
35 |     @staticmethod
36 |     def type() -> str:
37 |         return "imputer"
38 | 
39 |     @staticmethod
40 |     def subtype() -> str:
41 |         return "default"
42 | 
43 |     def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
44 |         raise NotImplementedError(
45 |             "Imputation plugins do not implement the 'predict' method"
46 |         )
47 | 
48 |     def _predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
49 |         raise NotImplementedError(
50 |             "Imputation plugins do not implement the 'predict_proba' method"
51 |         )
52 | 
53 |     @decorators.benchmark
54 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "ImputerPlugin":
55 |         return self._model.fit(X, *args, **kwargs)
56 | 
57 |     @decorators.benchmark
58 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
59 |         return self._model.transform(X)
60 | 
61 |     def save(self) -> bytes:
62 |         return save_model(self)
63 | 
64 |     @classmethod
65 |     def load(cls, buff: bytes) -> "ImputerPlugin":
66 |         obj = load_model(buff)
67 | 
68 |         if not isinstance(obj, cls):
69 |             raise RuntimeError("Invalid object type in buffer")
70 | 
71 |         return obj
72 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_EM.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_EM import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class EMPlugin(base.ImputerPlugin):
13 |     """The EM algorithm is an optimization algorithm that assumes a distribution for the partially missing data and  tries to maximize the expected complete data log-likelihood under that distribution.
14 | 
15 |      Steps:
16 |          1. For an input dataset X with missing values, we assume that the values are sampled from distribution       N(Mu, Sigma).
17 |          2. We generate the "observed" and "missing" masks from X, and choose some initial values for Mu = Mu0 and    Sigma = Sigma0.
18 |          3. The EM loop tries to approximate the (Mu, Sigma) pair by some iterative means under the conditional       distribution of missing components.
19 |          4. The E step finds the conditional expectation of the "missing" data, given the observed values and         current estimates of the parameters. These expectations are then substituted for the "missing" data.
20 |          5. In the M step, maximum likelihood estimates of the parameters are computed as though the missing data     had been filled in.
21 |          6. The X_reconstructed contains the approximation after each iteration.
22 | 
23 |      Args:
24 |          maxit: int, default=500
25 |              maximum number of imputation rounds to perform.
26 |          convergence_threshold : float, default=1e-08
27 |              Minimum ration difference between iterations before stopping.
28 |         random_state: int
29 |             Random seed
30 | 
31 |      Paper: "Maximum Likelihood from Incomplete Data via the EM Algorithm", A. P. Dempster, N. M. Laird and D. B.    Rubin
32 | 
33 |     Example:
34 |         >>> import numpy as np
35 |         >>> from autoprognosis.plugins.imputers import Imputers
36 |         >>> plugin = Imputers().get("EM")
37 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
38 |     """
39 | 
40 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
41 |         model = base_model(random_state=random_state, **kwargs)
42 | 
43 |         super().__init__(model)
44 | 
45 |     @staticmethod
46 |     def name() -> str:
47 |         return base_model.name()
48 | 
49 |     @staticmethod
50 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
51 |         return base_model.hyperparameter_space()
52 | 
53 | 
54 | plugin = EMPlugin
55 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_gain.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_gain import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class GainPlugin(base.ImputerPlugin):
13 |     """GAIN Imputation for static data using Generative Adversarial Nets.
14 |      The training steps are:
15 |       - The generato imputes the missing components conditioned on what is actually observed, and outputs a           completed vector.
16 |       - The discriminator takes a completed vector and attempts to determine which components were actually observed  and which were imputed.
17 | 
18 |      Args:
19 | 
20 |          batch_size: int
21 |              The batch size for the training steps.
22 |          n_epochs: int
23 |              Number of epochs for training.
24 |          hint_rate: float
25 |              Percentage of additional information for the discriminator.
26 |          loss_alpha: int
27 |              Hyperparameter for the generator loss.
28 | 
29 |      Paper: J. Yoon, J. Jordon, M. van der Schaar, "GAIN: Missing Data Imputation using Generative Adversarial Nets,  " ICML, 2018.
30 |      Original code: https://github.com/jsyoon0823/GAIN
31 | 
32 | 
33 |     Example:
34 |         >>> import numpy as np
35 |         >>> from autoprognosis.plugins.imputers import Imputers
36 |         >>> plugin = Imputers().get("gain")
37 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
38 |     """
39 | 
40 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
41 |         model = base_model(random_state=random_state, **kwargs)
42 | 
43 |         super().__init__(model)
44 | 
45 |     @staticmethod
46 |     def name() -> str:
47 |         return base_model.name()
48 | 
49 |     @staticmethod
50 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
51 |         return base_model.hyperparameter_space()
52 | 
53 | 
54 | plugin = GainPlugin
55 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_ice.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_sklearn_ice import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class IterativeChainedEquationsPlugin(base.ImputerPlugin):
13 |     """Imputation plugin for completing missing values using the Multivariate Iterative chained equations Imputation strategy.
14 | 
15 |     Method:
16 |         Multivariate Iterative chained equations(MICE) methods model each feature with missing values as a function of other features in a round-robin fashion. For each step of the round-robin imputation, we use a BayesianRidge estimator, which does a regularized linear regression.
17 | 
18 |     Args:
19 |         max_iter: int, default=500
20 |             maximum number of imputation rounds to perform.
21 |         random_state: int, default set to the current time.
22 |             seed of the pseudo random number generator to use.
23 | 
24 |     Example:
25 |         >>> import numpy as np
26 |         >>> from autoprognosis.plugins.imputers import Imputers
27 |         >>> plugin = Imputers().get("ice")
28 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
29 |                   0         1         2         3
30 |         0  1.000000  1.000000  1.000000  1.000000
31 |         1  1.333333  1.666667  1.666667  1.333333
32 |         2  1.000000  2.000000  2.000000  1.000000
33 |         3  2.000000  2.000000  2.000000  2.000000
34 | 
35 |     Reference: "mice: Multivariate Imputation by Chained Equations in R", Stef van Buuren, Karin Groothuis-Oudshoorn
36 |     """
37 | 
38 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
39 |         model = base_model(random_state=random_state, **kwargs)
40 | 
41 |         super().__init__(model)
42 | 
43 |     @staticmethod
44 |     def name() -> str:
45 |         return "ice"
46 | 
47 |     @staticmethod
48 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
49 |         return base_model.hyperparameter_space()
50 | 
51 | 
52 | plugin = IterativeChainedEquationsPlugin
53 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_mean.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_mean import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class MeanPlugin(base.ImputerPlugin):
13 |     """Imputation plugin for completing missing values using the Mean Imputation strategy.
14 | 
15 |     Method:
16 |         The Mean Imputation strategy replaces the missing values using the mean along each column.
17 | 
18 |     Example:
19 |         >>> import numpy as np
20 |         >>> from autoprognosis.plugins.imputers import Imputers
21 |         >>> plugin = Imputers().get("mean")
22 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
23 |                   0         1         2         3
24 |         0  1.000000  1.000000  1.000000  1.000000
25 |         1  1.333333  1.666667  1.666667  1.333333
26 |         2  1.000000  2.000000  2.000000  1.000000
27 |         3  2.000000  2.000000  2.000000  2.000000
28 |     """
29 | 
30 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
31 |         model = base_model(random_state=random_state, **kwargs)
32 | 
33 |         super().__init__(model)
34 | 
35 |     @staticmethod
36 |     def name() -> str:
37 |         return base_model.name()
38 | 
39 |     @staticmethod
40 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
41 |         return base_model.hyperparameter_space()
42 | 
43 | 
44 | plugin = MeanPlugin
45 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_median.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_median import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class MedianPlugin(base.ImputerPlugin):
13 |     """Imputation plugin for completing missing values using the Median Imputation strategy.
14 | 
15 |     Method:
16 |         The Median Imputation strategy replaces the missing values using the median along each column.
17 | 
18 |     Example:
19 |         >>> import numpy as np
20 |         >>> from autoprognosis.plugins.imputers import Imputers
21 |         >>> plugin = Imputers().get("median")
22 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
23 |              0    1    2    3
24 |         0  1.0  1.0  1.0  1.0
25 |         1  1.0  2.0  2.0  1.0
26 |         2  1.0  2.0  2.0  1.0
27 |         3  2.0  2.0  2.0  2.0
28 |     """
29 | 
30 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
31 |         model = base_model(random_state=random_state, **kwargs)
32 | 
33 |         super().__init__(model)
34 | 
35 |     @staticmethod
36 |     def name() -> str:
37 |         return base_model.name()
38 | 
39 |     @staticmethod
40 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
41 |         return base_model.hyperparameter_space()
42 | 
43 | 
44 | plugin = MedianPlugin
45 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_missforest.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_sklearn_missforest import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class MissForestPlugin(base.ImputerPlugin):
13 |     """Imputation plugin for completing missing values using the MissForest strategy.
14 | 
15 |     Method:
16 |         Iterative chained equations(ICE) methods model each feature with missing values as a function of other features in a round-robin fashion. For each step of the round-robin imputation, we use a ExtraTreesRegressor, which fits a number of randomized extra-trees and averages the results.
17 | 
18 |     Args:
19 |         n_estimators: int, default=10
20 |             The number of trees in the forest.
21 |         max_iter: int, default=500
22 |             maximum number of imputation rounds to perform.
23 |         random_state: int, default set to the current time.
24 |             seed of the pseudo random number generator to use.
25 | 
26 |     AutoPrognosis Hyperparameters:
27 |         n_estimators: The number of trees in the forest.
28 | 
29 |     Example:
30 |         >>> import numpy as np
31 |         >>> from autoprognosis.plugins.imputers import Imputers
32 |         >>> plugin = Imputers().get("missforest")
33 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
34 |              0    1    2    3
35 |         0  1.0  1.0  1.0  1.0
36 |         1  1.0  1.9  1.9  1.0
37 |         2  1.0  2.0  2.0  1.0
38 |         3  2.0  2.0  2.0  2.0
39 |     """
40 | 
41 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
42 |         model = base_model(random_state=random_state, **kwargs)
43 | 
44 |         super().__init__(model)
45 | 
46 |     @staticmethod
47 |     def name() -> str:
48 |         return "missforest"
49 | 
50 |     @staticmethod
51 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
52 |         return base_model.hyperparameter_space()
53 | 
54 | 
55 | plugin = MissForestPlugin
56 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_most_frequent.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_most_frequent import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class MostFrequentPlugin(base.ImputerPlugin):
13 |     """Imputation plugin for completing missing values using the Most Frequent Imputation strategy.
14 | 
15 |     Method:
16 |         The Most Frequent Imputation strategy replaces the missing using the most frequent value along each column.
17 | 
18 |     Example:
19 |         >>> import numpy as np
20 |         >>> from autoprognosis.plugins.imputers import Imputers
21 |         >>> plugin = Imputers().get("most_frequent")
22 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
23 |              0    1    2    3
24 |         0  1.0  1.0  1.0  1.0
25 |         1  1.0  2.0  2.0  1.0
26 |         2  1.0  2.0  2.0  1.0
27 |         3  2.0  2.0  2.0  2.0
28 |     """
29 | 
30 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
31 |         model = base_model(random_state=random_state, **kwargs)
32 | 
33 |         super().__init__(model)
34 | 
35 |     @staticmethod
36 |     def name() -> str:
37 |         return base_model.name()
38 | 
39 |     @staticmethod
40 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
41 |         return base_model.hyperparameter_space()
42 | 
43 | 
44 | plugin = MostFrequentPlugin
45 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_nop.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_nop import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class NopPlugin(base.ImputerPlugin):
13 |     """Imputer plugin that doesn't alter the dataset."""
14 | 
15 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
16 |         model = base_model(random_state=random_state, **kwargs)
17 | 
18 |         super().__init__(model)
19 | 
20 |     @staticmethod
21 |     def name() -> str:
22 |         return base_model.name()
23 | 
24 |     @staticmethod
25 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
26 |         return base_model.hyperparameter_space()
27 | 
28 | 
29 | plugin = NopPlugin
30 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_sinkhorn.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | from hyperimpute.plugins.imputers.plugin_sinkhorn import plugin as base_model
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.imputers.base as base
10 | 
11 | 
12 | class SinkhornPlugin(base.ImputerPlugin):
13 |     """Sinkhorn imputation can be used to impute quantitative data and it relies on the idea that two batches        extracted randomly from the same dataset should share the same distribution and consists in minimizing optimal       transport distances between batches.
14 | 
15 |      Args:
16 |          eps: float, default=0.01
17 |              Sinkhorn regularization parameter.
18 |          lr : float, default = 0.01
19 |              Learning rate.
20 |          opt: torch.nn.optim.Optimizer, default=torch.optim.Adam
21 |              Optimizer class to use for fitting.
22 |          n_epochs : int, default=15
23 |              Number of gradient updates for each model within a cycle.
24 |          batch_size : int, defatul=256
25 |              Size of the batches on which the sinkhorn divergence is evaluated.
26 |          n_pairs : int, default=10
27 |              Number of batch pairs used per gradient update.
28 |          noise : float, default = 0.1
29 |              Noise used for the missing values initialization.
30 |          scaling: float, default=0.9
31 |              Scaling parameter in Sinkhorn iterations
32 | 
33 |     Example:
34 |         >>> import numpy as np
35 |         >>> from autoprognosis.plugins.imputers import Imputers
36 |         >>> plugin = Imputers().get("sinkhorn")
37 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
38 |                   0         1         2         3
39 |         0  1.000000  1.000000  1.000000  1.000000
40 |         1  1.404637  1.651113  1.651093  1.404638
41 |         2  1.000000  2.000000  2.000000  1.000000
42 |         3  2.000000  2.000000  2.000000  2.000000
43 | 
44 |     Reference: "Missing Data Imputation using Optimal Transport", Boris Muzellec, Julie Josse, Claire Boyer, Marco   Cuturi
45 |      Original code: https://github.com/BorisMuzellec/MissingDataOT
46 |     """
47 | 
48 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
49 |         model = base_model(random_state=random_state, **kwargs)
50 | 
51 |         super().__init__(model)
52 | 
53 |     @staticmethod
54 |     def name() -> str:
55 |         return base_model.name()
56 | 
57 |     @staticmethod
58 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
59 |         return base_model.hyperparameter_space()
60 | 
61 | 
62 | plugin = SinkhornPlugin
63 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/imputers/plugin_softimpute.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | from hyperimpute.plugins.imputers.plugin_softimpute import plugin as base_model
 5 | 
 6 | # autoprognosis absolute
 7 | import autoprognosis.plugins.core.params as params
 8 | import autoprognosis.plugins.imputers.base as base
 9 | 
10 | 
11 | class SoftImputePlugin(base.ImputerPlugin):
12 |     """The SoftImpute algorithm fits a low-rank matrix approximation to a matrix with missing values via nuclear-    norm regularization. The algorithm can be used to impute quantitative data.
13 |      To calibrate the the nuclear-norm regularization parameter(shrink_lambda), we perform cross-                     validation(_cv_softimpute)
14 | 
15 |      Args:
16 |          maxit: int, default=500
17 |              maximum number of imputation rounds to perform.
18 |          convergence_threshold : float, default=1e-5
19 |              Minimum ration difference between iterations before stopping.
20 |          max_rank : int, default=2
21 |              Perform a truncated SVD on each iteration with this value as its rank.
22 |          shrink_lambda: float, default=0
23 |              Value by which we shrink singular values on each iteration. If it's missing, it is calibrated using      cross validation.
24 |          cv_len: int, default=15
25 |              the length of the grid on which the cross-validation is performed.
26 | 
27 |     Example:
28 |         >>> import numpy as np
29 |         >>> from autoprognosis.plugins.imputers import Imputers
30 |         >>> plugin = Imputers().get("softimpute")
31 |         >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]])
32 |                       0             1             2             3
33 |         0  1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00
34 |         1  3.820605e-16  1.708249e-16  1.708249e-16  3.820605e-16
35 |         2  1.000000e+00  2.000000e+00  2.000000e+00  1.000000e+00
36 |         3  2.000000e+00  2.000000e+00  2.000000e+00  2.000000e+00
37 | 
38 |      Reference: "Spectral Regularization Algorithms for Learning Large Incomplete Matrices", by Mazumder, Hastie,    and Tibshirani.
39 |     """
40 | 
41 |     def __init__(self, random_state: int = 0, **kwargs: Any) -> None:
42 |         model = base_model(random_state=random_state, **kwargs)
43 | 
44 |         super().__init__(model)
45 | 
46 |     @staticmethod
47 |     def name() -> str:
48 |         return base_model.name()
49 | 
50 |     @staticmethod
51 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
52 |         return base_model.hyperparameter_space()
53 | 
54 | 
55 | plugin = SoftImputePlugin
56 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, Generator, List, Type, Union
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.prediction.classifiers import Classifiers
 6 | from autoprognosis.plugins.prediction.regression import Regression
 7 | from autoprognosis.plugins.prediction.risk_estimation import RiskEstimation
 8 | 
 9 | # autoprognosis relative
10 | from .base import PredictionPlugin  # noqa: F401,E402
11 | 
12 | 
13 | class Predictions:
14 |     def __init__(self, category: str = "classifier") -> None:
15 |         self._category = category
16 | 
17 |         self._plugins: Union[Classifiers, RiskEstimation, Regression]
18 | 
19 |         self.reload()
20 | 
21 |     def list(self) -> List[str]:
22 |         return self._plugins.list()
23 | 
24 |     def list_available(self) -> List[str]:
25 |         return self._plugins.list_available()
26 | 
27 |     def add(self, name: str, cls: Type) -> "Predictions":
28 |         self._plugins.add(name, cls)
29 | 
30 |         return self
31 | 
32 |     def get(self, name: str, *args: Any, **kwargs: Any) -> PredictionPlugin:
33 |         return self._plugins.get(name, *args, **kwargs)
34 | 
35 |     def get_type(self, name: str) -> Type:
36 |         return self._plugins.get_type(name)
37 | 
38 |     def __iter__(self) -> Generator:
39 |         for x in self._plugins:
40 |             yield x
41 | 
42 |     def __len__(self) -> int:
43 |         return len(self.list())
44 | 
45 |     def __getitem__(self, key: str) -> PredictionPlugin:
46 |         return self.get(key)
47 | 
48 |     def reload(self) -> "Predictions":
49 |         if self._category == "classifier":
50 |             self._plugins = Classifiers()
51 |         elif self._category == "risk_estimation":
52 |             self._plugins = RiskEstimation()
53 |         elif self._category == "regression":
54 |             self._plugins = Regression()
55 |         else:
56 |             raise ValueError(f"unsupported category {self._category}")
57 | 
58 |         return self
59 | 
60 | 
61 | __all__ = [
62 |     "Predictions",
63 |     "PredictionPlugin",
64 | ]
65 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from abc import abstractmethod
 3 | from typing import Any
 4 | 
 5 | # third party
 6 | import pandas as pd
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.logger as log
10 | import autoprognosis.plugins.core.base_plugin as plugin
11 | 
12 | 
13 | class PredictionPlugin(plugin.Plugin):
14 |     """Base class for the prediction plugins.
15 | 
16 |     It provides the implementation for plugin.Plugin.type() static method.
17 | 
18 |     Each derived class must implement the following methods(inherited from plugin.Plugin):
19 |         name() - a static method that returns the name of the plugin.
20 |         hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `params.Params` derived objects.
21 |         _fit() - internal implementation, called by the `fit` method.
22 |         _predict() - internal implementation, called by the `predict` method.
23 |         _predict_proba() - internal implementation, called by the `predict_proba` method.
24 | 
25 |     If any method implementation is missing, the class constructor will fail.
26 |     """
27 | 
28 |     def __init__(self) -> None:
29 |         super().__init__()
30 | 
31 |     @staticmethod
32 |     def type() -> str:
33 |         return "prediction"
34 | 
35 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
36 |         raise NotImplementedError(
37 |             "Prediction plugins do not implement the 'transform' method"
38 |         )
39 | 
40 |     def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float:
41 |         raise NotImplementedError(f"Score not implemented for {self.name()}")
42 | 
43 |     def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
44 |         raise NotImplementedError(f"Explainer not implemented for {self.name()}")
45 | 
46 |     def predict_proba(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
47 |         if not self.is_fitted():
48 |             raise RuntimeError("Fit the model first")
49 | 
50 |         log.debug(f"Predicting using {self.fqdn()}, input shape = {X.shape}")
51 |         X = self._preprocess_inference_data(X)
52 |         result = pd.DataFrame(self._predict_proba(X, *args, **kwargs))
53 | 
54 |         return result
55 | 
56 |     @abstractmethod
57 |     def _predict_proba(
58 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
59 |     ) -> pd.DataFrame: ...
60 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/classifiers/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | from autoprognosis.plugins.prediction.classifiers.base import (  # noqa: F401,E402
 8 |     ClassifierPlugin,
 9 | )
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class Classifiers(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, ClassifierPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "Classifiers",
21 |     "ClassifierPlugin",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/classifiers/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.logger as log
 9 | import autoprognosis.plugins.core.base_plugin as plugin
10 | import autoprognosis.plugins.prediction.base as prediction_base
11 | import autoprognosis.plugins.utils.cast as cast
12 | from autoprognosis.utils.tester import classifier_metrics
13 | 
14 | 
15 | class ClassifierPlugin(prediction_base.PredictionPlugin):
16 |     """Base class for the classifier plugins.
17 | 
18 |     It provides the implementation for plugin.Plugin's subtype, _fit and _predict methods.
19 | 
20 |     Each derived class must implement the following methods(inherited from plugin.Plugin):
21 |         name() - a static method that returns the name of the plugin.
22 |         hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects.
23 | 
24 |     If any method implementation is missing, the class constructor will fail.
25 |     """
26 | 
27 |     def __init__(self, **kwargs: Any) -> None:
28 |         self.args = kwargs
29 | 
30 |         super().__init__()
31 | 
32 |     @staticmethod
33 |     def subtype() -> str:
34 |         return "classifier"
35 | 
36 |     def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> plugin.Plugin:
37 |         X = self._preprocess_training_data(X)
38 | 
39 |         log.debug(f"Training using {self.fqdn()}, input shape = {X.shape}")
40 |         if len(args) == 0:
41 |             raise RuntimeError("Training requires X, y")
42 |         Y = cast.to_dataframe(args[0]).values.ravel()
43 | 
44 |         self._fit(X, Y, **kwargs)
45 | 
46 |         self._fitted = True
47 |         log.debug(f"Done training using {self.fqdn()}, input shape = {X.shape}")
48 | 
49 |         return self
50 | 
51 |     def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float:
52 |         ev = classifier_metrics()
53 | 
54 |         preds = self.predict_proba(X)
55 |         return ev.score_proba(y, preds)[metric]
56 | 
57 |     def get_args(self) -> dict:
58 |         return self.args
59 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/classifiers/helper_calibration.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | import sklearn
 5 | 
 6 | # third party
 7 | from packaging import version
 8 | from sklearn.calibration import CalibratedClassifierCV
 9 | 
10 | # autoprognosis absolute
11 | from autoprognosis.utils.parallel import n_learner_jobs
12 | 
13 | calibrations = ["none", "sigmoid", "isotonic"]
14 | 
15 | 
16 | def calibrated_model(model: Any, calibration: int = 1, **kwargs: Any) -> Any:
17 |     if calibration >= len(calibrations):
18 |         raise RuntimeError("invalid calibration value")
19 | 
20 |     if version.parse(sklearn.__version__) >= version.parse("1.2"):
21 |         est_kwargs = {
22 |             "estimator": model,
23 |         }
24 |     else:
25 |         est_kwargs = {
26 |             "base_estimator": model,
27 |         }
28 |     if not hasattr(model, "predict_proba"):
29 |         return CalibratedClassifierCV(**est_kwargs, n_jobs=n_learner_jobs())
30 | 
31 |     if calibration != 0:
32 |         return CalibratedClassifierCV(
33 |             **est_kwargs,
34 |             method=calibrations[calibration],
35 |             n_jobs=n_learner_jobs(),
36 |         )
37 | 
38 |     return model
39 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/regression/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | from autoprognosis.plugins.prediction.regression.base import (  # noqa: F401,E402
 8 |     RegressionPlugin,
 9 | )
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class Regression(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, RegressionPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "Regression",
21 |     "RegressionPlugin",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/regression/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.logger as log
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.prediction.base as prediction_base
11 | 
12 | 
13 | class RegressionPlugin(prediction_base.PredictionPlugin):
14 |     """Base class for the regression plugins.
15 | 
16 |     It provides the implementation for plugin.Plugin's subtype, _fit and _predict methods.
17 | 
18 |     Each derived class must implement the following methods(inherited from plugin.Plugin):
19 |         name() - a static method that returns the name of the plugin.
20 |         hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects.
21 | 
22 |     If any method implementation is missing, the class constructor will fail.
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         **kwargs: Any,
28 |     ) -> None:
29 |         super().__init__()
30 | 
31 |         self.args = kwargs
32 | 
33 |     @staticmethod
34 |     def subtype() -> str:
35 |         return "regression"
36 | 
37 |     @staticmethod
38 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
39 |         return []
40 | 
41 |     def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RegressionPlugin":
42 |         if len(args) < 1:
43 |             raise ValueError("Invalid input for fit. Expecting X and Y.")
44 | 
45 |         log.debug(f"Training using {self.fqdn()}, input shape = {X.shape}")
46 |         X = self._preprocess_training_data(X)
47 |         self._fit(X, *args, **kwargs)
48 |         self._fitted = True
49 |         log.debug(f"Done using {self.fqdn()}, input shape = {X.shape}")
50 | 
51 |         return self
52 | 
53 |     def _predict_proba(
54 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
55 |     ) -> pd.DataFrame:
56 |         raise NotImplementedError(f"Model {self.name()} doesn't support predict proba")
57 | 
58 |     def get_args(self) -> dict:
59 |         return self.args
60 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/regression/plugin_linear_regression.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.linear_model import LinearRegression
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.prediction.regression.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | from autoprognosis.utils.parallel import n_learner_jobs
13 | 
14 | 
15 | class LinearRegressionPlugin(base.RegressionPlugin):
16 |     """Regression plugin based on the Linear Regression.
17 | 
18 |     Example:
19 |         >>> from autoprognosis.plugins.prediction import Predictions
20 |         >>> plugin = Predictions(category="regression").get("linear_regression")
21 |         >>> from sklearn.datasets import load_iris
22 |         >>> X, y = load_iris(return_X_y=True)
23 |         >>> plugin.fit_predict(X, y) # returns the probabilities for each class
24 |     """
25 | 
26 |     solvers = ["auto", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
27 | 
28 |     def __init__(self, model: Any = None, random_state: int = 0, **kwargs: Any) -> None:
29 |         super().__init__(**kwargs)
30 |         if model is not None:
31 |             self.model = model
32 |             return
33 | 
34 |         self.model = LinearRegression(
35 |             n_jobs=n_learner_jobs(),
36 |         )
37 | 
38 |     @staticmethod
39 |     def name() -> str:
40 |         return "linear_regression"
41 | 
42 |     @staticmethod
43 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
44 |         return [
45 |             params.Categorical("max_iter", [100, 1000, 10000]),
46 |             params.Integer("solver", 0, len(LinearRegressionPlugin.solvers) - 1),
47 |         ]
48 | 
49 |     def _fit(
50 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
51 |     ) -> "LinearRegressionPlugin":
52 |         self.model.fit(X, *args, **kwargs)
53 |         return self
54 | 
55 |     def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
56 |         return self.model.predict(X, *args, **kwargs)
57 | 
58 |     def save(self) -> bytes:
59 |         return serialization.save_model(self.model)
60 | 
61 |     @classmethod
62 |     def load(cls, buff: bytes) -> "LinearRegressionPlugin":
63 |         model = serialization.load_model(buff)
64 | 
65 |         return cls(model=model)
66 | 
67 | 
68 | plugin = LinearRegressionPlugin
69 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/regression/plugin_mlp_regressor.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.neural_network import MLPRegressor
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.prediction.regression.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class MLPRegressionPlugin(base.RegressionPlugin):
15 |     """Regression plugin based on the MLP Regression classifier.
16 | 
17 |     Example:
18 |         >>> from autoprognosis.plugins.prediction import Predictions
19 |         >>> plugin = Predictions(category="regression").get("mlp_regressor")
20 |         >>> from sklearn.datasets import load_iris
21 |         >>> X, y = load_iris(return_X_y=True)
22 |         >>> plugin.fit_predict(X, y) # returns the probabilities for each class
23 |     """
24 | 
25 |     def __init__(self, model: Any = None, random_state: int = 0, **kwargs: Any) -> None:
26 |         super().__init__(**kwargs)
27 |         if model is not None:
28 |             self.model = model
29 |             return
30 | 
31 |         self.model = MLPRegressor(max_iter=500, random_state=random_state)
32 | 
33 |     @staticmethod
34 |     def name() -> str:
35 |         return "mlp_regressor"
36 | 
37 |     @staticmethod
38 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
39 |         return []
40 | 
41 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MLPRegressionPlugin":
42 |         self.model.fit(X, *args, **kwargs)
43 |         return self
44 | 
45 |     def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
46 |         return self.model.predict(X, *args, **kwargs)
47 | 
48 |     def save(self) -> bytes:
49 |         return serialization.save_model(self.model)
50 | 
51 |     @classmethod
52 |     def load(cls, buff: bytes) -> "MLPRegressionPlugin":
53 |         model = serialization.load_model(buff)
54 | 
55 |         return cls(model=model)
56 | 
57 | 
58 | plugin = MLPRegressionPlugin
59 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | from autoprognosis.plugins.prediction.risk_estimation.base import (  # noqa: F401,E402
 8 |     RiskEstimationPlugin,
 9 | )
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class RiskEstimation(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, RiskEstimationPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "RiskEstimation",
21 |     "RiskEstimationPlugin",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/aha/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/aha/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/framingham/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/framingham/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/qrisk3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/qrisk3/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/ada/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/ada/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/diabetes_uk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/diabetes_uk/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/finrisk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/finrisk/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/qdiabetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/qdiabetes/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/prostate_cancer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/prostate_cancer/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/prediction/risk_estimation/helper_lifelines.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | 
 9 | class LifelinesWrapper:
10 |     def __init__(self, model: Any, **kwargs: Any) -> None:
11 |         self.model = model
12 | 
13 |     def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "LifelinesWrapper":
14 |         if len(args) < 2:
15 |             raise ValueError("Invalid input for fit. Expecting X, T and Y.")
16 | 
17 |         T = args[0]
18 |         Y = args[1]
19 | 
20 |         X = X.reset_index(drop=True)
21 |         T = T.reset_index(drop=True)
22 |         Y = Y.reset_index(drop=True)
23 | 
24 |         df = pd.concat([X, T, Y], axis=1)
25 |         df.columns = [x for x in X.columns] + ["time", "label"]
26 | 
27 |         self.model.fit(df, duration_col="time", event_col="label", **kwargs)
28 | 
29 |         return self
30 | 
31 |     def predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
32 |         if len(args) < 1:
33 |             raise ValueError("Invalid input for predict. Expecting X and time horizon.")
34 | 
35 |         time_horizons = args[0]
36 | 
37 |         chunks = int(len(X) / 1024) + 1
38 | 
39 |         preds_ = []
40 |         for chunk in np.array_split(X, chunks):
41 |             local_preds_ = np.zeros([len(chunk), len(time_horizons)])
42 |             surv = self.model.predict_survival_function(chunk)
43 |             surv_times = np.asarray(surv.index).astype(int)
44 |             surv = np.asarray(surv.T)
45 | 
46 |             for t, eval_time in enumerate(time_horizons):
47 |                 tmp_time = np.where(eval_time <= surv_times)[0]
48 |                 if len(tmp_time) == 0:
49 |                     local_preds_[:, t] = 1.0 - surv[:, 0]
50 |                 else:
51 |                     local_preds_[:, t] = 1.0 - surv[:, tmp_time[0]]
52 | 
53 |             preds_.append(local_preds_)
54 | 
55 |         return np.concatenate(preds_, axis=0)
56 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | 
 8 | # autoprognosis relative
 9 | from .base import PreprocessorPlugin  # noqa: F401,E402
10 | 
11 | feature_scaling_plugins = glob.glob(
12 |     join(dirname(__file__), "feature_scaling/plugin*.py")
13 | )
14 | dim_reduction_plugins = glob.glob(
15 |     join(dirname(__file__), "dimensionality_reduction/plugin*.py")
16 | )
17 | 
18 | 
19 | class Preprocessors(PluginLoader):
20 |     def __init__(self, category: str = "feature_scaling") -> None:
21 |         if category not in ["feature_scaling", "dimensionality_reduction"]:
22 |             raise RuntimeError("Invalid preprocessing category")
23 | 
24 |         self.category = category
25 |         if category == "feature_scaling":
26 |             plugins = feature_scaling_plugins
27 |         elif category == "dimensionality_reduction":
28 |             plugins = dim_reduction_plugins
29 | 
30 |         super().__init__(plugins, PreprocessorPlugin)
31 | 
32 | 
33 | __all__ = (
34 |     [basename(f)[:-3] for f in feature_scaling_plugins if isfile(f)]
35 |     + [basename(f)[:-3] for f in dim_reduction_plugins if isfile(f)]
36 |     + [
37 |         "Preprocessors",
38 |         "PreprocessorPlugin",
39 |     ]
40 | )
41 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, Tuple
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.base_plugin as plugin
 9 | 
10 | 
11 | class PreprocessorPlugin(plugin.Plugin):
12 |     """Base class for the preprocessing plugins.
13 | 
14 |     It provides the implementation for plugin.Plugin.type() static method.
15 | 
16 |     Each derived class must implement the following methods(inherited from plugin.Plugin):
17 |         name() - a static method that returns the name of the plugin.
18 |         hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `params.Params` derived objects.
19 |         _fit() - internal implementation, called by the `fit` method.
20 |         _transform() - internal implementation, called by the `transform` method.
21 | 
22 |     If any method implementation is missing, the class constructor will fail.
23 |     """
24 | 
25 |     def __init__(self) -> None:
26 |         super().__init__()
27 | 
28 |     @staticmethod
29 |     def type() -> str:
30 |         return "preprocessor"
31 | 
32 |     @staticmethod
33 |     def components_interval(*args: Any, **kwargs: Any) -> Tuple[int, int]:
34 |         if "features_count" not in kwargs:
35 |             raise ValueError(
36 |                 "invalid arguments for hyperparameter_space. Expecting 'features_count' value"
37 |             )
38 | 
39 |         feature_count = kwargs.get("features_count", 0)
40 | 
41 |         if feature_count == 0:
42 |             raise ValueError("invalid value for 'features_count'")
43 | 
44 |         return (1, feature_count)
45 | 
46 |     def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
47 |         raise NotImplementedError(
48 |             "Preprocessing plugins do not implement the 'predict' method"
49 |         )
50 | 
51 |     def _predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
52 |         raise NotImplementedError(
53 |             "Preprocessing plugins do not implement the 'predict_proba' method"
54 |         )
55 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/dimensionality_reduction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/preprocessors/dimensionality_reduction/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/dimensionality_reduction/plugin_feature_agglomeration.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.cluster import FeatureAgglomeration
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class FeatureAgglomerationPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for dimensionality reduction based on Feature Agglomeration algorithm.
16 | 
17 |     Method:
18 |         FeatureAgglomeration uses agglomerative clustering to group together features that look very similar, thus decreasing the number of features.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
22 | 
23 |     Args:
24 |         n_clusters: int
25 |             Number of clusters to find.
26 | 
27 |     Example:
28 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
29 |         >>> plugin = Preprocessors(category="dimensionality_reduction").get("feature_agglomeration")
30 |         >>> from sklearn.datasets import load_iris
31 |         >>> X, y = load_iris(return_X_y=True)
32 |         >>> plugin.fit_transform(X, y)
33 |     """
34 | 
35 |     def __init__(
36 |         self, model: Any = None, random_state: int = 0, n_clusters: int = 2
37 |     ) -> None:
38 |         super().__init__()
39 |         if model:
40 |             self.model = model
41 |             return
42 |         self.model = FeatureAgglomeration(n_clusters=n_clusters)
43 | 
44 |     @staticmethod
45 |     def name() -> str:
46 |         return "feature_agglomeration"
47 | 
48 |     @staticmethod
49 |     def subtype() -> str:
50 |         return "dimensionality_reduction"
51 | 
52 |     @staticmethod
53 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
54 |         cmin, cmax = base.PreprocessorPlugin.components_interval(*args, **kwargs)
55 |         return [params.Integer("n_clusters", cmin, cmax)]
56 | 
57 |     def _fit(
58 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
59 |     ) -> "FeatureAgglomerationPlugin":
60 |         self.model.fit(X, *args, **kwargs)
61 |         return self
62 | 
63 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
64 |         return self.model.transform(X)
65 | 
66 |     def save(self) -> bytes:
67 |         return serialization.save_model(self.model)
68 | 
69 |     @classmethod
70 |     def load(cls, buff: bytes) -> "FeatureAgglomerationPlugin":
71 |         model = serialization.load_model(buff)
72 |         return cls(model=model)
73 | 
74 | 
75 | plugin = FeatureAgglomerationPlugin
76 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/dimensionality_reduction/plugin_nop.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.preprocessors.base as base
10 | 
11 | 
12 | class NopPlugin(base.PreprocessorPlugin):
13 |     """Preprocessing plugin that doesn't alter the dataset."""
14 | 
15 |     def __init__(
16 |         self,
17 |         random_state: int = 0,
18 |     ) -> None:
19 |         super().__init__()
20 | 
21 |     @staticmethod
22 |     def name() -> str:
23 |         return "nop"
24 | 
25 |     @staticmethod
26 |     def subtype() -> str:
27 |         return "dimensionality_reduction"
28 | 
29 |     @staticmethod
30 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
31 |         return []
32 | 
33 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "NopPlugin":
34 |         return self
35 | 
36 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
37 |         return X
38 | 
39 |     def save(self) -> bytes:
40 |         return b""
41 | 
42 |     @classmethod
43 |     def load(cls, buff: bytes) -> "NopPlugin":
44 |         return cls()
45 | 
46 | 
47 | plugin = NopPlugin
48 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/preprocessors/feature_scaling/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_feature_normalizer.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.preprocessing import Normalizer
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class FeatureNormalizerPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for sample normalization based on L2 normalization.
16 | 
17 |     Method:
18 |         Normalization is the process of scaling individual samples to have unit norm.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html
22 | 
23 |     Example:
24 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
25 |         >>> plugin = Preprocessors().get("feature_normalizer")
26 |         >>> from sklearn.datasets import load_iris
27 |         >>> X, y = load_iris(return_X_y=True)
28 |         >>> plugin.fit_transform(X, y)
29 |     """
30 | 
31 |     def __init__(self, random_state: int = 0, model: Any = None) -> None:
32 |         super().__init__()
33 |         if model:
34 |             self.model = model
35 |             return
36 |         self.model = Normalizer()
37 | 
38 |     @staticmethod
39 |     def name() -> str:
40 |         return "feature_normalizer"
41 | 
42 |     @staticmethod
43 |     def subtype() -> str:
44 |         return "feature_scaling"
45 | 
46 |     @staticmethod
47 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
48 |         return []
49 | 
50 |     def _fit(
51 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
52 |     ) -> "FeatureNormalizerPlugin":
53 |         self.model.fit(X, *args, **kwargs)
54 | 
55 |         return self
56 | 
57 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
58 |         return self.model.transform(X)
59 | 
60 |     def save(self) -> bytes:
61 |         return serialization.save_model(self.model)
62 | 
63 |     @classmethod
64 |     def load(cls, buff: bytes) -> "FeatureNormalizerPlugin":
65 |         model = serialization.load_model(buff)
66 |         return cls(model=model)
67 | 
68 | 
69 | plugin = FeatureNormalizerPlugin
70 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_maxabs_scaler.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.preprocessing import MaxAbsScaler
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class MaxAbsScalerPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for feature scaling based on maximum absolute value.
16 | 
17 |     Method:
18 |         The MaxAbs estimator scales and translates each feature individually such that the maximal absolute value of each feature in the training set will be 1.0.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html
22 | 
23 |     Example:
24 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
25 |         >>> plugin = Preprocessors().get("maxabs_scaler")
26 |         >>> from sklearn.datasets import load_iris
27 |         >>> X, y = load_iris(return_X_y=True)
28 |         >>> plugin.fit_transform(X, y)
29 |     """
30 | 
31 |     def __init__(self, random_state: int = 0, model: Any = None) -> None:
32 |         super().__init__()
33 |         if model:
34 |             self.model = model
35 |             return
36 |         self.model = MaxAbsScaler()
37 | 
38 |     @staticmethod
39 |     def name() -> str:
40 |         return "maxabs_scaler"
41 | 
42 |     @staticmethod
43 |     def subtype() -> str:
44 |         return "feature_scaling"
45 | 
46 |     @staticmethod
47 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
48 |         return []
49 | 
50 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MaxAbsScalerPlugin":
51 |         self.model.fit(X)
52 | 
53 |         return self
54 | 
55 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
56 |         return self.model.transform(X)
57 | 
58 |     def save(self) -> bytes:
59 |         return serialization.save_model(self.model)
60 | 
61 |     @classmethod
62 |     def load(cls, buff: bytes) -> "MaxAbsScalerPlugin":
63 |         model = serialization.load_model(buff)
64 |         return cls(model=model)
65 | 
66 | 
67 | plugin = MaxAbsScalerPlugin
68 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_minmax_scaler.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.preprocessing import MinMaxScaler
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class MinMaxScalerPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for feature scaling to a given range.
16 | 
17 |     Method:
18 |         The MinMax estimator scales and translates each feature individually such that it is in the given range on the training set, e.g. between zero and one.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
22 | 
23 |     Example:
24 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
25 |         >>> plugin = Preprocessors().get("minmax_scaler")
26 |         >>> from sklearn.datasets import load_iris
27 |         >>> X, y = load_iris(return_X_y=True)
28 |         >>> plugin.fit_transform(X, y)
29 |     """
30 | 
31 |     def __init__(self, random_state: int = 0, model: Any = None) -> None:
32 |         super().__init__()
33 |         if model:
34 |             self.model = model
35 |             return
36 |         self.model = MinMaxScaler()
37 | 
38 |     @staticmethod
39 |     def name() -> str:
40 |         return "minmax_scaler"
41 | 
42 |     @staticmethod
43 |     def subtype() -> str:
44 |         return "feature_scaling"
45 | 
46 |     @staticmethod
47 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
48 |         return []
49 | 
50 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MinMaxScalerPlugin":
51 |         self.model.fit(X)
52 | 
53 |         return self
54 | 
55 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
56 |         return self.model.transform(X)
57 | 
58 |     def save(self) -> bytes:
59 |         return serialization.save_model(self.model)
60 | 
61 |     @classmethod
62 |     def load(cls, buff: bytes) -> "MinMaxScalerPlugin":
63 |         model = serialization.load_model(buff)
64 |         return cls(model=model)
65 | 
66 | 
67 | plugin = MinMaxScalerPlugin
68 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_nop.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | import autoprognosis.plugins.preprocessors.base as base
10 | 
11 | 
12 | class NopPlugin(base.PreprocessorPlugin):
13 |     """Preprocessing plugin that doesn't alter the dataset."""
14 | 
15 |     def __init__(
16 |         self,
17 |         random_state: int = 0,
18 |     ) -> None:
19 |         super().__init__()
20 | 
21 |     @staticmethod
22 |     def name() -> str:
23 |         return "nop"
24 | 
25 |     @staticmethod
26 |     def subtype() -> str:
27 |         return "feature_scaling"
28 | 
29 |     @staticmethod
30 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
31 |         return []
32 | 
33 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "NopPlugin":
34 |         return self
35 | 
36 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
37 |         return X
38 | 
39 |     def save(self) -> bytes:
40 |         return b""
41 | 
42 |     @classmethod
43 |     def load(cls, buff: bytes) -> "NopPlugin":
44 |         return cls()
45 | 
46 | 
47 | plugin = NopPlugin
48 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_scaler.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.preprocessing import StandardScaler
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class ScalerPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for feature scaling based on StandardScaler implementation.
16 | 
17 |     Method:
18 |         The Scaler plugin standardizes the features by removing the mean and scaling to unit variance.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
22 | 
23 |     Example:
24 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
25 |         >>> plugin = Preprocessors().get("scaler")
26 |         >>> from sklearn.datasets import load_iris
27 |         >>> X, y = load_iris(return_X_y=True)
28 |         >>> plugin.fit_transform(X, y)
29 |     """
30 | 
31 |     def __init__(self, random_state: int = 0, model: Any = None) -> None:
32 |         super().__init__()
33 |         if model:
34 |             self.model = model
35 |             return
36 |         self.model = StandardScaler()
37 | 
38 |     @staticmethod
39 |     def name() -> str:
40 |         return "scaler"
41 | 
42 |     @staticmethod
43 |     def subtype() -> str:
44 |         return "feature_scaling"
45 | 
46 |     @staticmethod
47 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
48 |         return []
49 | 
50 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "ScalerPlugin":
51 |         self.model.fit(X, *args, **kwargs)
52 | 
53 |         return self
54 | 
55 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
56 |         return self.model.transform(X)
57 | 
58 |     def save(self) -> bytes:
59 |         return serialization.save_model(self.model)
60 | 
61 |     @classmethod
62 |     def load(cls, buff: bytes) -> "ScalerPlugin":
63 |         model = serialization.load_model(buff)
64 |         return cls(model=model)
65 | 
66 | 
67 | plugin = ScalerPlugin
68 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_uniform_transform.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | from sklearn.preprocessing import QuantileTransformer
 7 | 
 8 | # autoprognosis absolute
 9 | import autoprognosis.plugins.core.params as params
10 | import autoprognosis.plugins.preprocessors.base as base
11 | import autoprognosis.utils.serialization as serialization
12 | 
13 | 
14 | class UniformTransformPlugin(base.PreprocessorPlugin):
15 |     """Preprocessing plugin for feature scaling based on quantile information.
16 | 
17 |     Method:
18 |         This method transforms the features to follow a uniform distribution. Therefore, for a given feature, this transformation tends to spread out the most frequent values.
19 | 
20 |     Reference:
21 |         https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html
22 | 
23 |     Example:
24 |         >>> from autoprognosis.plugins.preprocessors import Preprocessors
25 |         >>> plugin = Preprocessors().get("uniform_transform")
26 |         >>> from sklearn.datasets import load_iris
27 |         >>> X, y = load_iris(return_X_y=True)
28 |         >>> plugin.fit_transform(X, y)
29 |     """
30 | 
31 |     def __init__(
32 |         self, random_state: int = 0, n_quantiles: int = 100, model: Any = None
33 |     ) -> None:
34 |         super().__init__()
35 |         if model:
36 |             self.model = model
37 |             return
38 |         self.model = QuantileTransformer(
39 |             n_quantiles=n_quantiles, random_state=random_state
40 |         )
41 | 
42 |     @staticmethod
43 |     def name() -> str:
44 |         return "uniform_transform"
45 | 
46 |     @staticmethod
47 |     def subtype() -> str:
48 |         return "feature_scaling"
49 | 
50 |     @staticmethod
51 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
52 |         return []
53 | 
54 |     def _fit(
55 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
56 |     ) -> "UniformTransformPlugin":
57 |         self.model.fit(X)
58 | 
59 |         return self
60 | 
61 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
62 |         return self.model.transform(X)
63 | 
64 |     def save(self) -> bytes:
65 |         return serialization.save_model(self.model)
66 | 
67 |     @classmethod
68 |     def load(cls, buff: bytes) -> "UniformTransformPlugin":
69 |         model = serialization.load_model(buff)
70 |         return cls(model=model)
71 | 
72 | 
73 | plugin = UniformTransformPlugin
74 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/uncertainty/__init__.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import glob
 3 | from os.path import basename, dirname, isfile, join
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader
 7 | 
 8 | # autoprognosis relative
 9 | from .base import UncertaintyPlugin  # noqa: F401,E402
10 | 
11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py"))
12 | 
13 | 
14 | class UncertaintyQuantification(PluginLoader):
15 |     def __init__(self) -> None:
16 |         super().__init__(plugins, UncertaintyPlugin)
17 | 
18 | 
19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [
20 |     "UncertaintyPlugin",
21 |     "UncertaintyQuantification",
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/uncertainty/base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from abc import ABCMeta, abstractmethod
 3 | from typing import Any
 4 | 
 5 | # third party
 6 | import pandas as pd
 7 | 
 8 | 
 9 | class UncertaintyPlugin(metaclass=ABCMeta):
10 |     def __init__(self, model: Any) -> None:
11 |         self.model = model
12 | 
13 |     @staticmethod
14 |     @abstractmethod
15 |     def name() -> str: ...
16 | 
17 |     @staticmethod
18 |     def type() -> str:
19 |         return "uncertainty_quantification"
20 | 
21 |     @abstractmethod
22 |     def fit(self, *args: Any, **kwargs: Any) -> "UncertaintyPlugin": ...
23 | 
24 |     @abstractmethod
25 |     def predict(self, *args: Any, **kwargs: Any) -> pd.DataFrame: ...
26 | 
27 |     @abstractmethod
28 |     def predict_proba(self, *args: Any, **kwargs: Any) -> pd.DataFrame: ...
29 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # autoprognosis relative
2 | from . import (
3 |     cast,  # noqa: F401,E402
4 |     metrics,  # noqa: F401,E402
5 |     simulate,  # noqa: F401,E402
6 | )
7 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/utils/cast.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | 
 9 | def to_dataframe(X: Any) -> pd.DataFrame:
10 |     """Helper for casting arguments to `pandas.DataFrame`.
11 | 
12 |     Args:
13 |         X: the object to cast.
14 | 
15 |     Returns:
16 |         pd.DataFrame: the converted DataFrame.
17 | 
18 |     Raises:
19 |         ValueError: if the argument cannot be converted to a DataFrame.
20 |     """
21 |     if isinstance(X, pd.DataFrame):
22 |         return X
23 |     elif isinstance(X, (list, np.ndarray, pd.core.series.Series)):
24 |         return pd.DataFrame(X)
25 | 
26 |     raise ValueError(
27 |         f"unsupported data type {type(X)}. Try list, pandas.DataFrame or numpy.ndarray"
28 |     )
29 | 
30 | 
31 | def to_ndarray(X: Any) -> np.ndarray:
32 |     """Helper for casting arguments to `numpy.ndarray`.
33 | 
34 |     Args:
35 |         X: the object to cast.
36 | 
37 |     Returns:
38 |         pd.DataFrame: the converted ndarray.
39 | 
40 |     Raises:
41 |         ValueError: if the argument cannot be converted to a ndarray.
42 |     """
43 |     if isinstance(X, np.ndarray):
44 |         return X
45 |     elif isinstance(X, (list, pd.DataFrame, pd.core.series.Series)):
46 |         return np.array(X)
47 | 
48 |     raise ValueError(
49 |         f"unsupported data type {type(X)}. Try list, pandas.DataFrame or numpy.ndarray"
50 |     )
51 | 
52 | 
53 | __all__ = [
54 |     "to_dataframe",
55 |     "to_ndarray",
56 | ]
57 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/utils/decorators.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import time
 3 | from typing import Any, Callable, Type
 4 | 
 5 | # third party
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | # autoprognosis absolute
10 | import autoprognosis.logger as log
11 | 
12 | 
13 | def expect_type_for(idx: int, dtype: Type) -> Callable:
14 |     """Decorator used for argument type checking.
15 | 
16 |     Args:
17 |         idx: which argument should be validated.
18 |         dtype: expected data type.
19 | 
20 |     Returns:
21 |         Callable: the decorator
22 |     """
23 | 
24 |     def expect_type(func: Callable) -> Callable:
25 |         def wrapper(*args: Any, **kwargs: Any) -> Any:
26 |             if len(args) <= idx:
27 |                 raise ValueError("expected parameter out of range.")
28 |             if not isinstance(args[idx], dtype):
29 |                 err = f"unsupported data type {type(args[idx])} for args[{idx}]. Expecting {dtype}"
30 |                 log.critical(err)
31 |                 raise ValueError(err)
32 | 
33 |             return func(*args, **kwargs)
34 | 
35 |         return wrapper
36 | 
37 |     return expect_type
38 | 
39 | 
40 | def expect_ndarray_for(idx: int) -> Callable:
41 |     return expect_type_for(idx, np.ndarray)
42 | 
43 | 
44 | def expect_dataframe_for(idx: int) -> Callable:
45 |     return expect_type_for(idx, pd.DataFrame)
46 | 
47 | 
48 | def benchmark(func: Callable) -> Callable:
49 |     """Decorator used for function duration benchmarking. It is active only with DEBUG loglevel.
50 | 
51 |     Args:
52 |         func: the function to be benchmarked.
53 | 
54 |     Returns:
55 |         Callable: the decorator
56 | 
57 |     """
58 | 
59 |     def wrapper(*args: Any, **kwargs: Any) -> Any:
60 |         start = time.time()
61 |         res = func(*args, **kwargs)
62 |         end = time.time()
63 | 
64 |         log.debug(f"{func.__qualname__} took {round(end - start, 4)} seconds")
65 |         return res
66 | 
67 |     return wrapper
68 | 
69 | 
70 | __all__ = [
71 |     "expect_type_for",
72 |     "expect_ndarray_for",
73 |     "expect_dataframe_for",
74 |     "benchmark",
75 | ]
76 | 


--------------------------------------------------------------------------------
/src/autoprognosis/plugins/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | 
 4 | 
 5 | def MAE(X: np.ndarray, X_true: np.ndarray, mask: np.ndarray) -> np.ndarray:
 6 |     """
 7 |     Mean Absolute Error (MAE) between imputed variables and ground truth.
 8 | 
 9 |     Args:
10 |         X : Data with imputed variables.
11 |         X_true : Ground truth.
12 |         mask : Missing value mask (missing if True)
13 | 
14 |     Returns:
15 |         MAE : np.ndarray
16 |     """
17 |     mask_ = mask.astype(bool)
18 |     return np.absolute(X[mask_] - X_true[mask_]).sum() / mask_.sum()
19 | 
20 | 
21 | def RMSE(X: np.ndarray, X_true: np.ndarray, mask: np.ndarray) -> np.ndarray:
22 |     """
23 |     Root Mean Squared Error (MAE) between imputed variables and ground truth
24 | 
25 |     Args:
26 |         X : Data with imputed variables.
27 |         X_true : Ground truth.
28 |         mask : Missing value mask (missing if True)
29 | 
30 |     Returns:
31 |         RMSE : np.ndarray
32 | 
33 |     """
34 |     mask_ = mask.astype(bool)
35 |     return np.sqrt(((X[mask_] - X_true[mask_]) ** 2).sum() / mask_.sum())
36 | 
37 | 
38 | __all__ = ["MAE", "RMSE"]
39 | 


--------------------------------------------------------------------------------
/src/autoprognosis/studies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/studies/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/studies/_base.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from abc import ABCMeta, abstractmethod
 3 | from typing import Any
 4 | 
 5 | 
 6 | class Study(metaclass=ABCMeta):
 7 |     def __init__(self) -> None:
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def run(self) -> Any: ...
12 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/utils/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import random
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | 
 7 | 
 8 | def enable_reproducible_results(seed: int = 0) -> None:
 9 |     """Set fixed seed for all the libraries"""
10 |     random.seed(seed)
11 | 
12 |     np.random.seed(seed)
13 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/encoder.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import json
 3 | from typing import Any
 4 | 
 5 | # third party
 6 | import numpy as np
 7 | 
 8 | 
 9 | class numpy_encoder(json.JSONEncoder):
10 |     """Helper for encoding jsons"""
11 | 
12 |     def default(self, obj: Any) -> Any:
13 |         if isinstance(obj, np.integer):
14 |             return int(obj)
15 |         elif isinstance(obj, np.floating):
16 |             return float(obj)
17 |         elif isinstance(obj, np.ndarray):
18 |             return obj.tolist()
19 |         else:
20 |             return super(numpy_encoder, self).default(obj)
21 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/pandas.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | 
 7 | 
 8 | def optimize_floats(df: pd.DataFrame) -> pd.DataFrame:
 9 |     floats = df.select_dtypes(include=["float64"]).columns.tolist()
10 |     df[floats] = df[floats].apply(pd.to_numeric, downcast="float")
11 |     return df
12 | 
13 | 
14 | def optimize_ints(df: pd.DataFrame) -> pd.DataFrame:
15 |     ints = df.select_dtypes(include=["int64"]).columns.tolist()
16 |     df[ints] = df[ints].apply(pd.to_numeric, downcast="integer")
17 |     return df
18 | 
19 | 
20 | def optimize_objects(df: pd.DataFrame, datetime_features: List[str]) -> pd.DataFrame:
21 |     for col in df.select_dtypes(include=["object"]):
22 |         if col not in datetime_features:
23 |             num_unique_values = len(df[col].unique())
24 |             num_total_values = len(df[col])
25 |             if float(num_unique_values) / num_total_values < 0.5:
26 |                 df[col] = df[col].astype("category")
27 |         else:
28 |             df[col] = pd.to_datetime(df[col])
29 |     return df
30 | 
31 | 
32 | def compress_df(df: pd.DataFrame) -> pd.DataFrame:
33 |     df = optimize_floats(df)
34 |     df = optimize_ints(df)
35 |     df = optimize_objects(df, [])
36 | 
37 |     return df
38 | 
39 | 
40 | def read_csv_compressed(path: str) -> pd.DataFrame:
41 |     df = pd.read_csv(path)
42 | 
43 |     return compress_df(df)
44 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/parallel.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import multiprocessing
 3 | import os
 4 | 
 5 | # autoprognosis absolute
 6 | import autoprognosis.logger as log
 7 | 
 8 | 
 9 | def n_opt_jobs() -> int:
10 |     try:
11 |         n_jobs = int(os.environ["N_OPT_JOBS"])
12 |     except BaseException as e:
13 |         log.debug(f"failed to get N_JOBS {e}")
14 |         n_jobs = 2
15 |     log.debug(f"Using {n_jobs} cores for HP")
16 |     return n_jobs
17 | 
18 | 
19 | def n_learner_jobs() -> int:
20 |     try:
21 |         n_jobs = int(os.environ["N_LEARNER_JOBS"])
22 |     except BaseException as e:
23 |         n_jobs = multiprocessing.cpu_count()
24 |         log.debug(f"failed to get N_LEARNER_JOBS {e}")
25 |     log.debug(f"Using {n_jobs} cores for learners")
26 |     return n_jobs
27 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/redis.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import os
 3 | 
 4 | import redis
 5 | 
 6 | # third party
 7 | from optuna.storages import JournalRedisStorage, JournalStorage
 8 | 
 9 | REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
10 | REDIS_PORT = os.getenv("REDIS_PORT", "6379")
11 | 
12 | 
13 | class RedisBackend:
14 |     def __init__(
15 |         self,
16 |         host: str = REDIS_HOST,
17 |         port: str = REDIS_PORT,
18 |         auth: bool = False,
19 |     ):
20 |         self.url = f"redis://{host}:{port}/"
21 | 
22 |         self._optuna_storage = JournalStorage(JournalRedisStorage(url=self.url))
23 |         self._client = redis.Redis.from_url(self.url)
24 | 
25 |     def optuna(self) -> JournalStorage:
26 |         return self._optuna_storage
27 | 
28 |     def client(self) -> redis.Redis:
29 |         return self._client
30 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/serialization.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from pathlib import Path
 3 | from typing import Any, Union
 4 | 
 5 | # third party
 6 | import cloudpickle
 7 | import pandas as pd
 8 | 
 9 | 
10 | def save(model: Any) -> bytes:
11 |     return cloudpickle.dumps(model)
12 | 
13 | 
14 | def load(buff: bytes) -> Any:
15 |     return cloudpickle.loads(buff)
16 | 
17 | 
18 | def save_model(model: Any) -> bytes:
19 |     return cloudpickle.dumps(model)
20 | 
21 | 
22 | def load_model(buff: bytes) -> Any:
23 |     return cloudpickle.loads(buff)
24 | 
25 | 
26 | def save_to_file(path: Union[str, Path], model: Any) -> Any:
27 |     with open(path, "wb") as f:
28 |         return cloudpickle.dump(model, f)
29 | 
30 | 
31 | def load_from_file(path: Union[str, Path]) -> Any:
32 |     with open(path, "rb") as f:
33 |         return cloudpickle.load(f)
34 | 
35 | 
36 | def save_model_to_file(path: Union[str, Path], model: Any) -> Any:
37 |     return save_to_file(path, model)
38 | 
39 | 
40 | def load_model_from_file(path: Union[str, Path]) -> Any:
41 |     return load_from_file(path)
42 | 
43 | 
44 | def dataframe_hash(df: pd.DataFrame) -> str:
45 |     """Dataframe hashing, used for caching/backups"""
46 |     df.columns = df.columns.astype(str)
47 |     cols = sorted(list(df.columns))
48 |     return str(abs(pd.util.hash_pandas_object(df[cols].fillna(0)).sum()))
49 | 


--------------------------------------------------------------------------------
/src/autoprognosis/utils/third_party/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/utils/third_party/__init__.py


--------------------------------------------------------------------------------
/src/autoprognosis/utils/torch.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | def one_hot_encoder(arr: np.ndarray) -> torch.Tensor:
 7 |     arr = np.asarray(arr)
 8 |     n_values = np.max(arr) + 1
 9 | 
10 |     result = np.eye(n_values)[arr]
11 |     return torch.from_numpy(result).long()
12 | 


--------------------------------------------------------------------------------
/src/autoprognosis/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.22"
2 | 


--------------------------------------------------------------------------------
/tests/apps/test_classifiers_app.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from pathlib import Path
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | from sklearn.datasets import load_iris
 7 | 
 8 | # autoprognosis absolute
 9 | from autoprognosis.deploy.build import Builder
10 | from autoprognosis.deploy.proto import NewClassificationAppProto
11 | from autoprognosis.studies.classifiers import ClassifierStudy
12 | from autoprognosis.utils.serialization import load_from_file
13 | 
14 | 
15 | def test_sanity():
16 |     X, Y = load_iris(return_X_y=True, as_frame=True)
17 | 
18 |     df = X.copy()
19 |     df["target"] = Y
20 | 
21 |     df.loc[:2, "sepal length (cm)"] = np.nan
22 | 
23 |     workspace = Path("workspace")
24 |     workspace.mkdir(parents=True, exist_ok=True)
25 | 
26 |     study_name = "test_demonstrator_classification"
27 | 
28 |     study = ClassifierStudy(
29 |         study_name=study_name,
30 |         dataset=df,  # pandas DataFrame
31 |         target="target",  # the label column in the dataset
32 |         timeout=60,  # timeout for optimization for each classfier. Default: 600 seconds
33 |         num_iter=5,
34 |         num_study_iter=1,
35 |         classifiers=["logistic_regression"],
36 |         workspace=workspace,
37 |     )
38 | 
39 |     study.run()
40 | 
41 |     dataset_path = workspace / "demo_dataset_classification.csv"
42 |     df.to_csv(dataset_path, index=None)
43 | 
44 |     name = "AutoPrognosis demo: Classification"
45 |     model_path = workspace / study_name / "model.p"
46 | 
47 |     target_column = "target"
48 |     task_type = "classification"
49 | 
50 |     task = Builder(
51 |         NewClassificationAppProto(
52 |             **{
53 |                 "name": name,
54 |                 "type": task_type,
55 |                 "dataset_path": str(dataset_path),
56 |                 "model_path": str(model_path),
57 |                 "target_column": target_column,
58 |                 "explainers": ["kernel_shap"],
59 |                 "imputers": [],
60 |                 "plot_alternatives": [],
61 |                 "comparative_models": [
62 |                     (
63 |                         "Logistic regression",  # display name
64 |                         "logistic_regression",  # autoprognosis plugin name
65 |                         {},  # plugin args
66 |                     ),
67 |                 ],
68 |                 "auth": False,
69 |             }
70 |         ),
71 |     )
72 | 
73 |     app_path = task.run()
74 | 
75 |     app = load_from_file(app_path)
76 | 
77 |     assert app["title"] == name
78 |     assert app["type"] == "classification"
79 |     assert app["banner_title"] == f"{name} study"
80 |     assert len(app["models"]) > 0
81 |     assert "encoders" in app
82 |     assert "menu_components" in app
83 |     assert "column_types" in app
84 | 


--------------------------------------------------------------------------------
/tests/bindings/R/test_classification.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | 
 3 | # geomloss bug
 4 | py_install("numpy", pip = TRUE)
 5 | py_install("torch", pip = TRUE)
 6 | 
 7 | # install autoprognosis
 8 | py_install(".", pip = TRUE)
 9 | 
10 | pathlib <- import("pathlib", convert=FALSE)
11 | warnings <- import("warnings", convert=FALSE)
12 | autoprognosis <- import("autoprognosis", convert=FALSE)
13 | 
14 | warnings$filterwarnings('ignore')
15 | 
16 | Path = pathlib$Path
17 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy
18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
19 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator
20 | workspace <- Path("workspace")
21 | study_name <- "example_classifier"
22 | 
23 | # Load the data
24 | data("iris")
25 | target <- "Species"
26 | 
27 | # Create the AutoPrognosis Study
28 | study <- ClassifierStudy(
29 | 	dataset = iris,
30 | 	target = target,
31 | 	study_name=study_name,
32 | 	num_iter=as.integer(10),
33 | 	num_study_iter=as.integer(2),
34 | 	timeout=as.integer(60),
35 | 	classifiers=list("logistic_regression", "catboost"),
36 | 	workspace=workspace
37 | )
38 | 
39 | study$run()
40 | 
41 | # Load the optimal model - if exists
42 | output <- sprintf("%s/%s/model.p", workspace, study_name)
43 | 
44 | model <- load_model_from_file(output)
45 | # The model is not fitted yet here
46 | 
47 | # Benchmark the model
48 | targets <- c(target)
49 | X <- iris[ , !(names(iris) %in% targets)]
50 | Y = iris[, target]
51 | 
52 | metrics <- evaluate_estimator(model, X, Y)
53 | 
54 | # Fit the model
55 | model$fit(X, Y)
56 | 
57 | sprintf("Performance metrics %s", metrics["str"])
58 | 
59 | # Predict using the model
60 | model$predict_proba(X)
61 | 


--------------------------------------------------------------------------------
/tests/bindings/R/test_classification_with_missing_data.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | 
 3 | # geomloss bug
 4 | py_install("numpy", pip = TRUE)
 5 | py_install("torch", pip = TRUE)
 6 | 
 7 | # autoprognosis install
 8 | py_install(".", pip = TRUE)
 9 | 
10 | pathlib <- import("pathlib", convert=FALSE)
11 | warnings <- import("warnings", convert=FALSE)
12 | autoprognosis <- import("autoprognosis", convert=FALSE)
13 | 
14 | warnings$filterwarnings('ignore')
15 | 
16 | Path = pathlib$Path
17 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy
18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
19 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator
20 | workspace <- Path("workspace")
21 | study_name <- "example_classifier_with_miss"
22 | 
23 | # Load the data
24 | adult <- read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',
25 |                     sep = ',', fill = F, strip.white = T)
26 | colnames(adult) <- c('age', 'workclass', 'fnlwgt', 'educatoin',
27 |                      'educatoin_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex',
28 |                      'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income')
29 | adult[adult == "?"] <- NA
30 | adult <- adult[ , !(names(adult) %in% c("native_country"))]
31 | 
32 | df <- adult
33 | 
34 | target <- "income"
35 | 
36 | # Create the AutoPrognosis Study
37 | study <- ClassifierStudy(
38 | 	dataset = df,
39 | 	target = target,
40 | 	study_name=study_name,
41 | 	num_iter=as.integer(10),
42 | 	num_study_iter=as.integer(2),
43 | 	timeout=as.integer(60),
44 | 	classifiers=list("logistic_regression", "catboost"),
45 | 	workspace=workspace,
46 |     nan_placeholder='NA'
47 | )
48 | 
49 | study$run()
50 | 
51 | # Load the optimal model - if exists
52 | output <- sprintf("%s/%s/model.p", workspace, study_name)
53 | 
54 | model <- load_model_from_file(output)
55 | # The model is not fitted yet here
56 | 
57 | # Benchmark the model
58 | targets <- c(target)
59 | X <- df[ , !(names(df) %in% targets)]
60 | Y = df[, target]
61 | 
62 | metrics <- evaluate_estimator(model, X, Y)
63 | 
64 | # Fit the model
65 | model$fit(X, Y)
66 | 
67 | sprintf("Performance metrics %s", metrics["str"])
68 | 
69 | # Predict using the model
70 | model$predict_proba(X)
71 | 


--------------------------------------------------------------------------------
/tests/bindings/R/test_regression.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | 
 3 | # geomloss bug
 4 | py_install("numpy", pip = TRUE)
 5 | py_install("torch", pip = TRUE)
 6 | 
 7 | # autoprognosis install
 8 | py_install(".", pip = TRUE)
 9 | 
10 | pathlib <- import("pathlib", convert=FALSE)
11 | warnings <- import("warnings", convert=FALSE)
12 | autoprognosis <- import("autoprognosis", convert=FALSE)
13 | 
14 | warnings$filterwarnings('ignore')
15 | 
16 | Path = pathlib$Path
17 | RegressionStudy = autoprognosis$studies$regression$RegressionStudy
18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
19 | evaluate_regression = autoprognosis$utils$tester$evaluate_regression
20 | 
21 | workspace <- Path("workspace")
22 | study_name <- "example_regression"
23 | 
24 | # Load dataset
25 | airfoil <- read.csv(
26 |         url("https://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat"),
27 |         sep = "\t",
28 |         header = FALSE,
29 | )
30 | 
31 | target <- "V6"
32 | 
33 | # Create AutoPrognosis Study
34 | study <- RegressionStudy(
35 | 	dataset = airfoil,
36 | 	target = target,
37 | 	study_name=study_name,
38 | 	num_iter=as.integer(10),
39 | 	num_study_iter=as.integer(2),
40 | 	timeout=as.integer(60),
41 | 	regressors=list("linear_regression", "kneighbors_regressor"),
42 | 	workspace=workspace
43 | )
44 | 
45 | study$run()
46 | 
47 | # Load the optimal model - if exists
48 | output <- sprintf("%s/%s/model.p", workspace, study_name)
49 | 
50 | model <- load_model_from_file(output)
51 | # The model is not fitted yet here
52 | 
53 | # Benchmark the model
54 | targets <- c(target)
55 | X <- airfoil[ , !(names(iris) %in% targets)]
56 | Y = airfoil[, target]
57 | 
58 | metrics <- evaluate_regression(model, X, Y)
59 | 
60 | sprintf("Performance metrics %s", metrics["str"])
61 | 
62 | # Fit the model
63 | model$fit(X, Y)
64 | 
65 | # Predict
66 | model$predict(X)
67 | 


--------------------------------------------------------------------------------
/tests/bindings/R/test_survival_analysis.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | library(survival)
 3 | 
 4 | # geomloss bug
 5 | py_install("numpy", pip = TRUE)
 6 | py_install("torch", pip = TRUE)
 7 | 
 8 | # install autoprognosis
 9 | py_install(".", pip = TRUE)
10 | 
11 | pathlib <- import("pathlib", convert=FALSE)
12 | warnings <- import("warnings", convert=FALSE)
13 | autoprognosis <- import("autoprognosis", convert=FALSE)
14 | np <- import("numpy", convert=FALSE)
15 | 
16 | warnings$filterwarnings('ignore')
17 | 
18 | Path = pathlib$Path
19 | RiskEstimationStudy = autoprognosis$studies$risk_estimation$RiskEstimationStudy
20 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
21 | evaluate_survival_estimator = autoprognosis$utils$tester$evaluate_survival_estimator
22 | 
23 | workspace <- Path("workspace")
24 | study_name <- "example_risk_estimation"
25 | 
26 | # Load the data
27 | data(cancer, package="survival")
28 | 
29 | targets <- c("dtime", "death")
30 | df <- rotterdam
31 | 
32 | X <- df[ , !(names(df) %in% targets)]
33 | Y <- df[, "death"]
34 | T <- df[, "dtime"]
35 | 
36 | eval_time_horizons <- list(2000)
37 | 
38 | # Create the AutoPrognosis Study
39 | study <- RiskEstimationStudy(
40 | 	dataset = df,
41 | 	target = "death",
42 |     time_to_event="dtime",
43 |     time_horizons = eval_time_horizons,
44 | 	study_name=study_name,
45 | 	num_iter=as.integer(10),
46 | 	num_study_iter=as.integer(2),
47 | 	timeout=as.integer(60),
48 | 	risk_estimators=list("cox_ph", "survival_xgboost"),
49 | 	workspace=workspace
50 | )
51 | 
52 | study$run()
53 | 
54 | # Load the optimal model - if exists
55 | output <- sprintf("%s/%s/model.p", workspace, study_name)
56 | 
57 | model <- load_model_from_file(output)
58 | # The model is not fitted yet here
59 | 
60 | # Benchmark the model
61 | metrics <- evaluate_survival_estimator(model, X, T, Y, eval_time_horizons)
62 | 
63 | # Fit the model
64 | model$fit(X, T, Y)
65 | 
66 | sprintf("Performance metrics %s", metrics["str"])
67 | 
68 | # Predict using the model
69 | model$predict(X)
70 | 


--------------------------------------------------------------------------------
/tests/bugfixing/test_ensemble_crash.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import random
 3 | from pathlib import Path
 4 | 
 5 | # third party
 6 | import numpy as np
 7 | from sklearn.datasets import load_breast_cancer
 8 | 
 9 | # autoprognosis absolute
10 | from autoprognosis.studies.classifiers import ClassifierStudy
11 | 
12 | 
13 | def test_ensemble_crash() -> None:
14 |     X, Y = load_breast_cancer(return_X_y=True, as_frame=True)
15 | 
16 |     # Simulate missingness
17 |     total_len = len(X)
18 | 
19 |     for col in ["mean texture", "mean compactness"]:
20 |         indices = random.sample(range(0, total_len), 10)
21 |         X.loc[indices, col] = np.nan
22 | 
23 |     dataset = X.copy()
24 |     dataset["target"] = Y
25 | 
26 |     workspace = Path("workspace")
27 |     workspace.mkdir(parents=True, exist_ok=True)
28 | 
29 |     study_name = "classification_example_imputation"
30 | 
31 |     study = ClassifierStudy(
32 |         study_name=study_name,
33 |         dataset=dataset,
34 |         target="target",
35 |         num_iter=1,
36 |         num_study_iter=1,
37 |         timeout=1,
38 |         imputers=["mean", "ice", "median"],
39 |         classifiers=["logistic_regression", "lda"],
40 |         feature_scaling=[],  # feature preprocessing is disabled
41 |         score_threshold=0.4,
42 |         workspace=workspace,
43 |     )
44 | 
45 |     study.run()
46 | 


--------------------------------------------------------------------------------
/tests/bugfixing/test_not_fitted_error.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pandas as pd
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.prediction import Predictions
 6 | 
 7 | 
 8 | def test_train_error() -> None:
 9 |     model = Predictions().get("logistic_regression")
10 | 
11 |     err = ""
12 |     try:
13 |         model.predict_proba(pd.DataFrame([]))
14 |     except BaseException as e:
15 |         err = str(e)
16 | 
17 |     assert "Fit the model first" == err
18 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import sys
 3 | import warnings
 4 | 
 5 | # autoprognosis absolute
 6 | import autoprognosis.logger as log
 7 | 
 8 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 9 | warnings.filterwarnings("ignore", category=FutureWarning)
10 | 
11 | log.add(sink=sys.stderr, level="ERROR")
12 | 


--------------------------------------------------------------------------------
/tests/explorers/explorers_mocks.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import datetime
 3 | from typing import Any
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.hooks import Hooks
 7 | 
 8 | 
 9 | class MockHook(Hooks):
10 |     def __init__(self) -> None:
11 |         self._started_at = datetime.datetime.utcnow()
12 | 
13 |     def cancel(self) -> bool:
14 |         # cancel after 10 seconds
15 |         time_passed = datetime.datetime.utcnow() - self._started_at
16 | 
17 |         return time_passed.total_seconds() > 2
18 | 
19 |     def heartbeat(
20 |         self, topic: str, subtopic: str, event_type: str, **kwargs: Any
21 |     ) -> None:
22 |         pass
23 | 
24 |     def finish(self) -> None:
25 |         pass
26 | 


--------------------------------------------------------------------------------
/tests/explorers/test_selector.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.explorers.core.selector import PipelineSelector
 3 | 
 4 | 
 5 | def test_sanity() -> None:
 6 |     clf = PipelineSelector("lda")
 7 | 
 8 |     assert len(clf.imputers) == 0
 9 |     assert len(clf.feature_scaling) > 0
10 | 
11 |     assert clf.classifier.name() == "lda"
12 |     assert clf.name() == "lda"
13 | 
14 |     assert len(clf.hyperparameter_space()) > 0
15 | 


--------------------------------------------------------------------------------
/tests/plugins/explainers/test_kernel_shap.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | # third party
 8 | from lifelines.datasets import load_rossi
 9 | from sklearn.datasets import load_breast_cancer
10 | from sklearn.model_selection import train_test_split
11 | 
12 | # autoprognosis absolute
13 | from autoprognosis.plugins.explainers.plugin_kernel_shap import plugin
14 | from autoprognosis.plugins.pipeline import Pipeline
15 | from autoprognosis.plugins.prediction.classifiers import Classifiers
16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import (
17 |     plugin as CoxPH,
18 | )
19 | from autoprognosis.plugins.preprocessors import Preprocessors
20 | 
21 | 
22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
23 |     X, y = load_breast_cancer(return_X_y=True)
24 |     return train_test_split(X, y, test_size=0.05)
25 | 
26 | 
27 | @pytest.mark.slow
28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"])
29 | def test_plugin_sanity(classifier: str) -> None:
30 |     X_train, X_test, y_train, y_test = dataset()
31 | 
32 |     template = Pipeline(
33 |         [
34 |             Preprocessors().get_type("minmax_scaler").fqdn(),
35 |             Classifiers().get_type(classifier).fqdn(),
36 |         ]
37 |     )
38 | 
39 |     pipeline = template()
40 | 
41 |     explainer = plugin(
42 |         pipeline, X_train, y_train, subsample=100, task_type="classification"
43 |     )
44 | 
45 |     result = explainer.explain(X_test)
46 | 
47 |     assert result.shape == X_test.shape
48 | 
49 | 
50 | def test_plugin_name() -> None:
51 |     assert plugin.name() == "kernel_shap"
52 | 
53 | 
54 | @pytest.mark.slow
55 | def test_plugin_kernel_shap_survival_prediction() -> None:
56 |     rossi = load_rossi()
57 | 
58 |     X = rossi.drop(["week", "arrest"], axis=1)
59 |     Y = rossi["arrest"]
60 |     T = rossi["week"]
61 | 
62 |     surv = CoxPH().fit(X, T, Y)
63 | 
64 |     explainer = plugin(
65 |         surv,
66 |         X,
67 |         Y,
68 |         time_to_event=T,
69 |         eval_times=[
70 |             int(T[Y.iloc[:] == 1].quantile(0.50)),
71 |             int(T[Y.iloc[:] == 1].quantile(0.75)),
72 |         ],
73 |         task_type="risk_estimation",
74 |     )
75 | 
76 |     result = explainer.explain(X[:3])
77 | 
78 |     assert result.shape == (3, X.shape[1])
79 | 


--------------------------------------------------------------------------------
/tests/plugins/explainers/test_lime.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | # third party
 8 | from lifelines.datasets import load_rossi
 9 | from sklearn.datasets import load_breast_cancer
10 | from sklearn.model_selection import train_test_split
11 | 
12 | # autoprognosis absolute
13 | from autoprognosis.plugins.explainers.plugin_lime import plugin
14 | from autoprognosis.plugins.pipeline import Pipeline
15 | from autoprognosis.plugins.prediction.classifiers import Classifiers
16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import (
17 |     plugin as CoxPH,
18 | )
19 | from autoprognosis.plugins.preprocessors import Preprocessors
20 | 
21 | 
22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
23 |     X, y = load_breast_cancer(return_X_y=True)
24 |     return train_test_split(X, y, test_size=0.05)
25 | 
26 | 
27 | @pytest.mark.slow
28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"])
29 | def test_plugin_sanity(classifier: str) -> None:
30 |     X_train, X_test, y_train, y_test = dataset()
31 | 
32 |     template = Pipeline(
33 |         [
34 |             Preprocessors().get_type("minmax_scaler").fqdn(),
35 |             Classifiers().get_type(classifier).fqdn(),
36 |         ]
37 |     )
38 | 
39 |     pipeline = template()
40 | 
41 |     explainer = plugin(pipeline, X_train, y_train, task_type="classification")
42 | 
43 |     result = explainer.explain(X_test[:2])
44 | 
45 |     assert len(result) == 2
46 | 
47 | 
48 | def test_plugin_name() -> None:
49 |     assert plugin.name() == "lime"
50 | 
51 | 
52 | @pytest.mark.slow
53 | def test_plugin_lime_survival_prediction() -> None:
54 |     rossi = load_rossi()
55 | 
56 |     X = rossi.drop(["week", "arrest"], axis=1)
57 |     Y = rossi["arrest"]
58 |     T = rossi["week"]
59 | 
60 |     surv = CoxPH().fit(X, T, Y)
61 | 
62 |     explainer = plugin(
63 |         surv,
64 |         X,
65 |         Y,
66 |         time_to_event=T,
67 |         eval_times=[
68 |             int(T[Y.iloc[:] == 1].quantile(0.50)),
69 |             int(T[Y.iloc[:] == 1].quantile(0.75)),
70 |         ],
71 |         task_type="risk_estimation",
72 |     )
73 | 
74 |     result = explainer.explain(X.head(1))
75 | 
76 |     assert result.shape == (1, X.shape[1])
77 | 


--------------------------------------------------------------------------------
/tests/plugins/explainers/test_shap_permutation_sampler.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import pytest
 6 | 
 7 | # third party
 8 | from lifelines.datasets import load_rossi
 9 | from sklearn.datasets import load_breast_cancer
10 | from sklearn.model_selection import train_test_split
11 | 
12 | # autoprognosis absolute
13 | from autoprognosis.plugins.explainers.plugin_shap_permutation_sampler import plugin
14 | from autoprognosis.plugins.pipeline import Pipeline
15 | from autoprognosis.plugins.prediction.classifiers import Classifiers
16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import (
17 |     plugin as CoxPH,
18 | )
19 | from autoprognosis.plugins.preprocessors import Preprocessors
20 | 
21 | 
22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
23 |     X, y = load_breast_cancer(return_X_y=True)
24 |     return train_test_split(X, y, test_size=0.05)
25 | 
26 | 
27 | @pytest.mark.slow
28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"])
29 | def test_plugin_sanity(classifier: str) -> None:
30 |     X_train, X_test, y_train, y_test = dataset()
31 | 
32 |     template = Pipeline(
33 |         [
34 |             Preprocessors().get_type("minmax_scaler").fqdn(),
35 |             Classifiers().get_type(classifier).fqdn(),
36 |         ]
37 |     )
38 | 
39 |     pipeline = template()
40 | 
41 |     explainer = plugin(pipeline, X_train, y_train, task_type="classification")
42 | 
43 |     result = explainer.explain(X_test)
44 | 
45 |     assert len(result) == len(X_test)
46 | 
47 | 
48 | def test_plugin_name() -> None:
49 |     assert plugin.name() == "shap_permutation_sampler"
50 | 
51 | 
52 | @pytest.mark.slow
53 | def test_plugin_survival_prediction() -> None:
54 |     rossi = load_rossi()
55 | 
56 |     X = rossi.drop(["week", "arrest"], axis=1)
57 |     Y = rossi["arrest"]
58 |     T = rossi["week"]
59 | 
60 |     surv = CoxPH().fit(X, T, Y)
61 | 
62 |     explainer = plugin(
63 |         surv,
64 |         X,
65 |         Y,
66 |         time_to_event=T,
67 |         eval_times=[
68 |             int(T[Y.iloc[:] == 1].quantile(0.50)),
69 |             int(T[Y.iloc[:] == 1].quantile(0.75)),
70 |         ],
71 |         task_type="risk_estimation",
72 |     )
73 | 
74 |     result = explainer.explain(X[:3])
75 | 
76 |     assert result.shape == (3, X.shape[1], 2)
77 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_api.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pytest
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers
10 | from autoprognosis.plugins.imputers.plugin_mean import plugin as mock_model
11 | 
12 | 
13 | @pytest.fixture
14 | def ctx() -> Imputers:
15 |     return Imputers()
16 | 
17 | 
18 | class Mock(ImputerPlugin):
19 |     def __init__(self, **kwargs: Any) -> None:
20 |         model = mock_model(**kwargs)
21 | 
22 |         super().__init__(model)
23 | 
24 |     @staticmethod
25 |     def name() -> str:
26 |         return "test"
27 | 
28 |     @staticmethod
29 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
30 |         return []
31 | 
32 | 
33 | class Invalid:
34 |     def __init__(self) -> None:
35 |         pass
36 | 
37 | 
38 | def test_load(ctx: Imputers) -> None:
39 |     assert len(ctx._plugins) == 0
40 |     ctx.get("mean")
41 |     ctx.get("median")
42 |     assert len(ctx._plugins) == 2
43 |     assert len(ctx._plugins) == len(ctx)
44 | 
45 | 
46 | def test_list(ctx: Imputers) -> None:
47 |     ctx.get("mean")
48 |     assert "mean" in ctx.list()
49 | 
50 | 
51 | def test_add_get(ctx: Imputers) -> None:
52 |     ctx.add("mock", Mock)
53 | 
54 |     assert "mock" in ctx.list()
55 | 
56 |     mock = ctx.get("mock")
57 | 
58 |     assert mock.name() == "test"
59 | 
60 |     ctx.reload()
61 |     assert "mock" not in ctx.list()
62 | 
63 | 
64 | def test_add_get_invalid(ctx: Imputers) -> None:
65 |     with pytest.raises(ValueError):
66 |         ctx.add("invalid", Invalid)
67 | 
68 |     assert "mock" not in ctx.list()
69 | 
70 |     with pytest.raises(ValueError):
71 |         ctx.get("mock")
72 | 
73 | 
74 | def test_iter(ctx: Imputers) -> None:
75 |     for v in ctx:
76 |         assert ctx[v].name() != ""
77 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_imputation_serde.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pytest
 7 | 
 8 | # autoprognosis absolute
 9 | from autoprognosis.plugins import Imputers
10 | from autoprognosis.plugins.utils.simulate import simulate_nan
11 | from autoprognosis.utils.serialization import load_model, save_model
12 | 
13 | 
14 | def dataset(mechanism: str, p_miss: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
15 |     np.random.seed(0)
16 | 
17 |     n = 20
18 |     p = 4
19 | 
20 |     mean = np.repeat(0, p)
21 |     cov = 0.5 * (np.ones((p, p)) + np.eye(p))
22 | 
23 |     x = np.random.multivariate_normal(mean, cov, size=n)
24 |     x_simulated = simulate_nan(x, p_miss, mechanism)
25 | 
26 |     mask = x_simulated["mask"]
27 |     x_miss = x_simulated["X_incomp"]
28 | 
29 |     return x, x_miss, mask
30 | 
31 | 
32 | @pytest.mark.slow
33 | @pytest.mark.parametrize("plugin", Imputers().list())
34 | def test_serialization(plugin: str) -> None:
35 |     x, x_miss, mask = dataset("MAR", 0.3)
36 | 
37 |     estimator = Imputers().get(plugin)
38 | 
39 |     estimator.fit_transform(x_miss)
40 | 
41 |     buff = estimator.save()
42 |     estimator_new = Imputers().get_type(plugin).load(buff)
43 | 
44 |     estimator_new.transform(x_miss)
45 | 
46 | 
47 | @pytest.mark.slow
48 | @pytest.mark.parametrize("plugin", Imputers().list())
49 | def test_pickle(plugin: str) -> None:
50 |     x, x_miss, mask = dataset("MAR", 0.3)
51 | 
52 |     estimator = Imputers().get(plugin)
53 | 
54 |     estimator.fit_transform(x_miss)
55 | 
56 |     buff = save_model(estimator)
57 |     estimator_new = load_model(buff)
58 | 
59 |     estimator_new.transform(x_miss)
60 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_imputers_api.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pytest
 6 | 
 7 | # autoprognosis absolute
 8 | import autoprognosis.plugins.core.params as params
 9 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers
10 | from autoprognosis.plugins.imputers.plugin_mean import plugin as mock_model
11 | 
12 | 
13 | @pytest.fixture
14 | def ctx() -> Imputers:
15 |     return Imputers()
16 | 
17 | 
18 | class Mock(ImputerPlugin):
19 |     def __init__(self, **kwargs: Any) -> None:
20 |         model = mock_model(**kwargs)
21 | 
22 |         super().__init__(model)
23 | 
24 |     @staticmethod
25 |     def name() -> str:
26 |         return "test"
27 | 
28 |     @staticmethod
29 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]:
30 |         return []
31 | 
32 | 
33 | class Invalid:
34 |     def __init__(self) -> None:
35 |         pass
36 | 
37 | 
38 | def test_load(ctx: Imputers) -> None:
39 |     assert len(ctx._plugins) == 0
40 |     ctx.get("mean")
41 |     ctx.get("median")
42 |     assert len(ctx._plugins) == 2
43 | 
44 | 
45 | def test_list(ctx: Imputers) -> None:
46 |     ctx.get("mean")
47 |     assert "mean" in ctx.list()
48 | 
49 | 
50 | def test_add_get(ctx: Imputers) -> None:
51 |     ctx.add("mock", Mock)
52 | 
53 |     assert "mock" in ctx.list()
54 | 
55 |     mock = ctx.get("mock")
56 | 
57 |     assert mock.name() == "test"
58 | 
59 |     ctx.reload()
60 |     assert "mock" not in ctx.list()
61 | 
62 | 
63 | def test_add_get_invalid(ctx: Imputers) -> None:
64 |     with pytest.raises(ValueError):
65 |         ctx.add("invalid", Invalid)
66 | 
67 |     assert "mock" not in ctx.list()
68 | 
69 |     with pytest.raises(ValueError):
70 |         ctx.get("mock")
71 | 
72 | 
73 | def test_iter(ctx: Imputers) -> None:
74 |     for v in ctx:
75 |         assert ctx[v].name() != ""
76 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_mean.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | # autoprognosis absolute
 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers
 8 | from autoprognosis.plugins.imputers.plugin_mean import plugin
 9 | 
10 | 
11 | def from_api() -> ImputerPlugin:
12 |     return Imputers().get("mean")
13 | 
14 | 
15 | def from_module() -> ImputerPlugin:
16 |     return plugin()
17 | 
18 | 
19 | def from_serde() -> ImputerPlugin:
20 |     buff = plugin().save()
21 |     return plugin().load(buff)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
25 | def test_mean_plugin_sanity(test_plugin: ImputerPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_mean_plugin_name(test_plugin: ImputerPlugin) -> None:
31 |     assert test_plugin.name() == "mean"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_mean_plugin_type(test_plugin: ImputerPlugin) -> None:
36 |     assert test_plugin.type() == "imputer"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
40 | def test_mean_plugin_hyperparams(test_plugin: ImputerPlugin) -> None:
41 |     assert test_plugin.hyperparameter_space() == []
42 | 
43 | 
44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
45 | def test_mean_plugin_fit_transform(test_plugin: ImputerPlugin) -> None:
46 |     res = test_plugin.fit_transform(
47 |         pd.DataFrame(
48 |             [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [3, 3, 9, 9], [2, 2, 2, 2]]
49 |         )
50 |     )
51 | 
52 |     np.testing.assert_array_equal(
53 |         res, [[1, 1, 1, 1], [2, 2, 4, 4], [3, 3, 9, 9], [2, 2, 2, 2]]
54 |     )
55 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_median.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | # autoprognosis absolute
 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers
 8 | from autoprognosis.plugins.imputers.plugin_median import plugin
 9 | 
10 | 
11 | def from_api() -> ImputerPlugin:
12 |     return Imputers().get("median")
13 | 
14 | 
15 | def from_module() -> ImputerPlugin:
16 |     return plugin()
17 | 
18 | 
19 | def from_serde() -> ImputerPlugin:
20 |     buff = plugin().save()
21 |     return plugin().load(buff)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
25 | def test_median_plugin_sanity(test_plugin: ImputerPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_median_plugin_name(test_plugin: ImputerPlugin) -> None:
31 |     assert test_plugin.name() == "median"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_median_plugin_type(test_plugin: ImputerPlugin) -> None:
36 |     assert test_plugin.type() == "imputer"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
40 | def test_median_plugin_hyperparams(test_plugin: ImputerPlugin) -> None:
41 |     assert test_plugin.hyperparameter_space() == []
42 | 
43 | 
44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
45 | def test_median_plugin_fit_transform(test_plugin: ImputerPlugin) -> None:
46 |     res = test_plugin.fit_transform(
47 |         pd.DataFrame(
48 |             [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [3, 3, 9, 9], [2, 2, 2, 2]]
49 |         )
50 |     )
51 | 
52 |     np.testing.assert_array_equal(
53 |         res, [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]]
54 |     )
55 | 


--------------------------------------------------------------------------------
/tests/plugins/imputers/test_most_freq.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | # autoprognosis absolute
 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers
 8 | from autoprognosis.plugins.imputers.plugin_most_frequent import plugin
 9 | 
10 | 
11 | def from_api() -> ImputerPlugin:
12 |     return Imputers().get("most_frequent")
13 | 
14 | 
15 | def from_module() -> ImputerPlugin:
16 |     return plugin()
17 | 
18 | 
19 | def from_serde() -> ImputerPlugin:
20 |     buff = plugin().save()
21 |     return plugin().load(buff)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
25 | def test_most_freq_plugin_sanity(test_plugin: ImputerPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_most_freq_plugin_name(test_plugin: ImputerPlugin) -> None:
31 |     assert test_plugin.name() == "most_frequent"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_most_freq_plugin_type(test_plugin: ImputerPlugin) -> None:
36 |     assert test_plugin.type() == "imputer"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
40 | def test_most_freq_plugin_hyperparams(test_plugin: ImputerPlugin) -> None:
41 |     assert test_plugin.hyperparameter_space() == []
42 | 
43 | 
44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
45 | def test_most_freq_plugin_fit_transform(test_plugin: ImputerPlugin) -> None:
46 |     res = test_plugin.fit_transform(
47 |         pd.DataFrame(
48 |             [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 1, 2], [2, 2, 2, 2]]
49 |         )
50 |     )
51 | 
52 |     np.testing.assert_array_equal(
53 |         res, [[1, 1, 1, 1], [1, 2, 1, 2], [1, 2, 1, 2], [2, 2, 2, 2]]
54 |     )
55 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/classifiers/test_calibration.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pandas as pd
 7 | import pytest
 8 | from sklearn.datasets import make_blobs
 9 | from sklearn.metrics import brier_score_loss
10 | from sklearn.model_selection import train_test_split
11 | 
12 | # autoprognosis absolute
13 | from autoprognosis.plugins.prediction import Predictions
14 | 
15 | 
16 | def gen_dataset() -> Tuple[
17 |     pd.DataFrame,
18 |     pd.DataFrame,
19 |     pd.DataFrame,
20 |     pd.DataFrame,
21 |     pd.DataFrame,
22 |     pd.DataFrame,
23 | ]:
24 |     n_samples = 5000
25 |     centers = [(-5, -5), (0, 0), (5, 5)]
26 |     X, y = make_blobs(
27 |         n_samples=n_samples, centers=centers, shuffle=False, random_state=42
28 |     )
29 | 
30 |     y[: n_samples // 2] = 0
31 |     y[n_samples // 2 :] = 1
32 |     sample_weight = np.random.RandomState(42).rand(y.shape[0])
33 | 
34 |     X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(
35 |         X, y, sample_weight, test_size=0.9, random_state=42
36 |     )
37 | 
38 |     return X_train, X_test, y_train, y_test, sw_train, sw_test
39 | 
40 | 
41 | def supports_calibration(plugin: str) -> bool:
42 |     test_plugin = Predictions().get(plugin)
43 | 
44 |     if len(test_plugin.hyperparameter_space()) == 0:
45 |         return False
46 | 
47 |     for hp in test_plugin.hyperparameter_space():
48 |         if hp.name == "calibration":
49 |             return True
50 | 
51 |     return False
52 | 
53 | 
54 | @pytest.mark.parametrize("plugin", Predictions().list())
55 | def test_plugin_calibration(plugin: str) -> None:
56 |     if not supports_calibration(plugin):
57 |         return
58 | 
59 |     X_train, X_test, y_train, y_test, sw_train, sw_test = gen_dataset()
60 | 
61 |     test_plugin = Predictions().get(plugin, calibration=0)
62 |     test_plugin.fit(X_train, y_train)
63 | 
64 |     prob_no_cal = test_plugin.predict_proba(X_test).to_numpy()[:, 1]
65 | 
66 |     score_no_cal = brier_score_loss(y_test, prob_no_cal, sample_weight=sw_test)
67 | 
68 |     for method in [0, 1, 2]:
69 |         test_plugin = Predictions().get(plugin, calibration=method)
70 |         test_plugin.fit(X_train, y_train)
71 | 
72 |         probs = test_plugin.predict_proba(X_test).to_numpy()[:, 1]
73 |         score = brier_score_loss(y_test, probs, sample_weight=sw_test)
74 | 
75 |         print(
76 |             f"score without calibration {score_no_cal} score with calibration {score}"
77 |         )
78 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/classifiers/test_clf_serde.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pytest
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | # autoprognosis absolute
10 | from autoprognosis.plugins import Predictions
11 | from autoprognosis.utils.serialization import load_model, save_model
12 | 
13 | 
14 | def dataset() -> Tuple[np.ndarray, np.ndarray]:
15 |     rng = np.random.RandomState(1)
16 | 
17 |     N = 1000
18 |     X = rng.randint(N, size=(N, 3))
19 |     y = rng.randint(2, size=(N))
20 | 
21 |     return X, y
22 | 
23 | 
24 | @pytest.mark.parametrize("plugin", Predictions().list())
25 | def test_serialization(plugin: str) -> None:
26 |     X, y = dataset()
27 | 
28 |     X_train, X_test, y_train, y_test = train_test_split(
29 |         X, y, test_size=0.2, random_state=42
30 |     )
31 | 
32 |     estimator = Predictions().get(plugin)
33 | 
34 |     estimator.fit(X_train, y_train)
35 |     estimator.predict(X_test)
36 | 
37 |     buff = estimator.save()
38 |     estimator_new = Predictions().get(plugin).load(buff)
39 | 
40 |     estimator_new.predict(X_test)
41 | 
42 | 
43 | @pytest.mark.parametrize("plugin", Predictions().list())
44 | def test_pickle(plugin: str) -> None:
45 |     X, y = dataset()
46 | 
47 |     X_train, X_test, y_train, y_test = train_test_split(
48 |         X, y, test_size=0.2, random_state=42
49 |     )
50 | 
51 |     estimator = Predictions().get(plugin)
52 | 
53 |     estimator.fit(X_train, y_train)
54 |     estimator.predict(X_test)
55 | 
56 |     buff = save_model(estimator)
57 |     estimator_new = load_model(buff)
58 | 
59 |     estimator_new.predict(X_test)
60 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/classifiers/test_prediction_api.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | import pytest
 7 | 
 8 | # autoprognosis absolute
 9 | from autoprognosis.plugins.prediction import Predictions
10 | from autoprognosis.plugins.prediction.classifiers import ClassifierPlugin
11 | 
12 | 
13 | @pytest.fixture
14 | def ctx() -> Predictions:
15 |     return Predictions()
16 | 
17 | 
18 | class Mock(ClassifierPlugin):
19 |     def __init__(self) -> None:
20 |         super().__init__()
21 | 
22 |     @staticmethod
23 |     def name() -> str:
24 |         return "test"
25 | 
26 |     @staticmethod
27 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List:
28 |         return []
29 | 
30 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Mock":
31 |         return self
32 | 
33 |     def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame:
34 |         return {}
35 | 
36 |     def _predict_proba(
37 |         self, X: pd.DataFrame, *args: Any, **kwargs: Any
38 |     ) -> pd.DataFrame:
39 |         return {}
40 | 
41 |     def save(self) -> bytes:
42 |         return b""
43 | 
44 |     @classmethod
45 |     def load(cls, buff: bytes) -> "Mock":
46 |         return cls()
47 | 
48 | 
49 | class Invalid:
50 |     def __init__(self) -> None:
51 |         pass
52 | 
53 | 
54 | def test_load(ctx: Predictions) -> None:
55 |     assert len(ctx._plugins) == 0
56 |     ctx.get("xgboost")
57 |     assert len(ctx._plugins) == 1
58 | 
59 | 
60 | def test_list(ctx: Predictions) -> None:
61 |     ctx.get("bagging")
62 |     assert "bagging" in ctx.list()
63 |     assert "catboost" not in ctx.list()
64 | 
65 | 
66 | def test_add_get(ctx: Predictions) -> None:
67 |     ctx.add("mock", Mock)
68 | 
69 |     assert "mock" in ctx.list()
70 | 
71 |     mock = ctx.get("mock")
72 | 
73 |     assert mock.name() == "test"
74 | 
75 |     ctx.reload()
76 |     assert "mock" not in ctx.list()
77 | 
78 | 
79 | def test_add_get_invalid(ctx: Predictions) -> None:
80 |     with pytest.raises(ValueError):
81 |         ctx.add("invalid", Invalid)
82 | 
83 |     assert "mock" not in ctx.list()
84 | 
85 |     with pytest.raises(ValueError):
86 |         ctx.get("mock")
87 | 
88 | 
89 | def test_iter(ctx: Predictions) -> None:
90 |     for v in ctx:
91 |         assert ctx[v].name() != ""
92 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_aha.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.aha.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         gender="M",
10 |         age=40,
11 |         tchol=160,
12 |         hdlc=40,
13 |         sbp=180,
14 |         smoking=0,
15 |         diab=0,
16 |         ht_treat=1,
17 |         race="W",
18 |     )
19 |     assert score < 1
20 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_fram.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.framingham.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         sex="F",
10 |         age=60,  # age value
11 |         total_cholesterol=204,
12 |         hdl_cholesterol=38.67,
13 |         systolic_blood_pressure=160,  # Systolic blood pressure
14 |         smoker=True,
15 |         blood_pressure_med_treatment=True,
16 |     )
17 | 
18 |     assert score < 1
19 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_qrisk3.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.qrisk3.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         gender="F",
10 |         age=44,  # age value
11 |         b_AF=1,  # bool, Atrial fibrillation
12 |         b_atypicalantipsy=1,  # bool, On atypical antipsychotic medication
13 |         b_corticosteroids=1,  # Are you on regular steroid tablets?
14 |         b_impotence2=False,
15 |         b_migraine=1,  # bool, Do you have migraines?
16 |         b_ra=0,  # Rheumatoid arthritis?
17 |         b_renal=0,  # Chronic kidney disease (stage 3, 4 or 5)?
18 |         b_semi=0,  # Severe mental illness?
19 |         b_sle=1,  # bool, Systemic lupus erythematosus
20 |         b_treatedhyp=1,  # bool, On blood pressure treatment?
21 |         b_type1=0,  # Diabetes status: type 1
22 |         b_type2=0,  # Diabetes status: type 2
23 |         bmi=25,  # Body mass index = kg/m^2
24 |         ethrisk=0,  # ethnic risk
25 |         fh_cvd=0,  # Angina or heart attack in a 1st degree relative < 60?
26 |         rati=5,  # Cholesterol/HDL ratio
27 |         sbp=180,  # Systolic blood pressure
28 |         sbps5=20,  # Standard deviation of at least two most recent systolic blood pressure readings (mmHg)
29 |         smoke_cat=0,  # smoking category: non-smoker, ex-smoker, light-smoker(less than 10/), moderate smoker(10- 19), heavy smoker(20 or over)
30 |         town=0,  # Townsend deprivation score
31 |     )
32 | 
33 |     assert score < 1
34 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_ada.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.ada.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         gender="F",
10 |         age=64,  # age value
11 |         fh_diab=0,  # Do immediate family (mother, father, brothers or sisters) have diabetes?
12 |         b_treatedhyp=1,  # Do you have high blood pressure requiring treatment?
13 |         b_daily_exercise=1,
14 |         bmi=24,  # Body mass index = kg/m^2
15 |     )
16 | 
17 |     assert score < 1
18 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_diabetesuk.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.diabetes_uk.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         gender="F",
10 |         age=64,  # age value
11 |         ethrisk=0,  # ethnic risk
12 |         fh_diab=0,  # Do immediate family (mother, father, brothers or sisters) have diabetes?
13 |         waist=80,
14 |         bmi=24,  # Body mass index = kg/m^2
15 |         b_treatedhyp=1,  # Do you have high blood pressure requiring treatment?
16 |     )
17 | 
18 |     assert score < 1
19 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_finrisk.py:
--------------------------------------------------------------------------------
 1 | # autoprognosis absolute
 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.finrisk.model import (
 3 |     inference,
 4 | )
 5 | 
 6 | 
 7 | def test_sanity() -> None:
 8 |     score = inference(
 9 |         gender="F",
10 |         age=64,  # age value
11 |         bmi=24,  # Body mass index = kg/m^2
12 |         waist=80,
13 |         b_daily_exercise=1,
14 |         b_daily_vegs=1,
15 |         b_treatedhyp=1,  # Do you have high blood pressure requiring treatment?
16 |         b_ever_had_high_glucose=1,
17 |         fh_diab=0,  # Do immediate family (mother, father, brothers or sisters) have diabetes?
18 |     )
19 | 
20 |     assert score < 1
21 | 


--------------------------------------------------------------------------------
/tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_qdiab.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.qdiabetes.model import (
 6 |     inference,
 7 | )
 8 | 
 9 | 
10 | @pytest.mark.parametrize("model", ["A", "B", "C"])
11 | def test_sanity(model) -> None:
12 |     score = inference(
13 |         model,
14 |         gender="M",
15 |         age=84,  # age value
16 |         b_atypicalantipsy=1,  # bool, On atypical antipsychotic medication
17 |         b_corticosteroids=1,  # Are you on regular steroid tablets?
18 |         b_cvd=1,  # Have you had a heart attack, angina, stroke or TIA?
19 |         b_gestdiab=0,  # Women: Do you have gestational diabetes ?
20 |         b_learning=0,  # Learning disabilities?
21 |         b_manicschiz=0,  # Manic depression or schizophrenia?
22 |         b_pos=0,  # Do you have polycystic ovaries?
23 |         b_statin=0,  # Are you on statins?
24 |         b_treatedhyp=1,  # Do you have high blood pressure requiring treatment?
25 |         bmi=34,  # Body mass index = kg/m^2
26 |         ethrisk=1,  # ethnic risk
27 |         fh_diab=1,  # Do immediate family (mother, father, brothers or sisters) have diabetes?
28 |         hba1c=40,  # HBA1c (mmol/mol)
29 |         smoke_cat=4,  # smoking category: non-smoker, ex-smoker, light-smoker(less than 10/), moderate                 smoker(10-      19), heavy smoker(20 or over)
30 |         fbs=0.01,
31 |         town=0,  # Townsend deprivation score
32 |     )
33 | 
34 |     assert score <= 1
35 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_data_cleanup.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_data_cleanup import (
 7 |     plugin,
 8 | )
 9 | from autoprognosis.utils.serialization import load_model, save_model
10 | 
11 | 
12 | def from_api() -> PreprocessorPlugin:
13 |     return Preprocessors(category="dimensionality_reduction").get("data_cleanup")
14 | 
15 | 
16 | def from_module() -> PreprocessorPlugin:
17 |     return plugin()
18 | 
19 | 
20 | def from_serde() -> PreprocessorPlugin:
21 |     buff = plugin().save()
22 |     return plugin.load(buff)
23 | 
24 | 
25 | def from_pickle() -> PreprocessorPlugin:
26 |     buff = save_model(plugin())
27 |     return load_model(buff)
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
32 | )
33 | def test_variance_threshold_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
34 |     assert test_plugin is not None
35 | 
36 | 
37 | @pytest.mark.parametrize(
38 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
39 | )
40 | def test_variance_threshold_plugin_name(test_plugin: PreprocessorPlugin) -> None:
41 |     assert test_plugin.name() == "data_cleanup"
42 | 
43 | 
44 | @pytest.mark.parametrize(
45 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
46 | )
47 | def test_variance_threshold_plugin_type(test_plugin: PreprocessorPlugin) -> None:
48 |     assert test_plugin.type() == "preprocessor"
49 |     assert test_plugin.subtype() == "dimensionality_reduction"
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
54 | )
55 | def test_variance_threshold_plugin_hyperparams(
56 |     test_plugin: PreprocessorPlugin,
57 | ) -> None:
58 |     assert test_plugin.hyperparameter_space() == []
59 | 
60 | 
61 | @pytest.mark.parametrize(
62 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
63 | )
64 | def test_variance_threshold_plugin_fit_transform(
65 |     test_plugin: PreprocessorPlugin,
66 | ) -> None:
67 |     res = test_plugin.fit_transform(
68 |         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]], [1, 2, 3, 4]
69 |     )
70 | 
71 |     assert res.shape == (4, 2)
72 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_dr_nop.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | # autoprognosis absolute
 7 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 8 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_nop import (
 9 |     plugin,
10 | )
11 | 
12 | 
13 | def from_api() -> PreprocessorPlugin:
14 |     return Preprocessors(category="dimensionality_reduction").get("nop")
15 | 
16 | 
17 | def from_module() -> PreprocessorPlugin:
18 |     return plugin()
19 | 
20 | 
21 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
22 | def test_nop_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
23 |     assert test_plugin is not None
24 | 
25 | 
26 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
27 | def test_nop_plugin_name(test_plugin: PreprocessorPlugin) -> None:
28 |     assert test_plugin.name() == "nop"
29 | 
30 | 
31 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
32 | def test_nop_plugin_type(test_plugin: PreprocessorPlugin) -> None:
33 |     assert test_plugin.type() == "preprocessor"
34 |     assert test_plugin.subtype() == "dimensionality_reduction"
35 | 
36 | 
37 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
38 | def test_nop_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
39 |     assert test_plugin.hyperparameter_space() == []
40 | 
41 | 
42 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
43 | def test_nop_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
44 |     res = test_plugin.fit_transform(pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]]))
45 | 
46 |     np.testing.assert_array_equal(res, [[1, 1, 1, 1], [2, 2, 2, 2]])
47 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_fast_ica.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | from sklearn.datasets import load_iris
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_fast_ica import (
 8 |     plugin,
 9 | )
10 | 
11 | n_components = 3
12 | 
13 | 
14 | def from_api() -> PreprocessorPlugin:
15 |     return Preprocessors(category="dimensionality_reduction").get(
16 |         "fast_ica", n_components=n_components
17 |     )
18 | 
19 | 
20 | def from_module() -> PreprocessorPlugin:
21 |     return plugin(n_components=n_components)
22 | 
23 | 
24 | def from_serde() -> PreprocessorPlugin:
25 |     buff = plugin(n_components=n_components).save()
26 |     return plugin().load(buff)
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_fast_ica_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
31 |     assert test_plugin is not None
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_fast_ica_plugin_name(test_plugin: PreprocessorPlugin) -> None:
36 |     assert test_plugin.name() == "fast_ica"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
40 | def test_fast_ica_plugin_type(test_plugin: PreprocessorPlugin) -> None:
41 |     assert test_plugin.type() == "preprocessor"
42 |     assert test_plugin.subtype() == "dimensionality_reduction"
43 | 
44 | 
45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
46 | def test_fast_ica_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
47 |     kwargs = {"features_count": 2}
48 |     assert len(test_plugin.hyperparameter_space(**kwargs)) == 1
49 |     assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components"
50 | 
51 | 
52 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
53 | def test_fast_ica_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
54 |     X, y = load_iris(return_X_y=True)
55 |     res = test_plugin.fit_transform(X, y)
56 |     print(X.shape, n_components)
57 | 
58 |     assert res.shape == (len(X), n_components)
59 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_feature_agglomeration.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | from sklearn.datasets import load_iris
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_feature_agglomeration import (
 8 |     plugin,
 9 | )
10 | 
11 | n_clusters = 2
12 | 
13 | 
14 | def from_api() -> PreprocessorPlugin:
15 |     return Preprocessors(category="dimensionality_reduction").get(
16 |         "feature_agglomeration", n_clusters=n_clusters
17 |     )
18 | 
19 | 
20 | def from_module() -> PreprocessorPlugin:
21 |     return plugin(n_clusters=n_clusters)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
25 | def test_feature_agglomeration_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
30 | def test_feature_agglomeration_plugin_name(test_plugin: PreprocessorPlugin) -> None:
31 |     assert test_plugin.name() == "feature_agglomeration"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
35 | def test_feature_agglomeration_plugin_type(test_plugin: PreprocessorPlugin) -> None:
36 |     assert test_plugin.type() == "preprocessor"
37 |     assert test_plugin.subtype() == "dimensionality_reduction"
38 | 
39 | 
40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
41 | def test_feature_agglomeration_plugin_hyperparams(
42 |     test_plugin: PreprocessorPlugin,
43 | ) -> None:
44 |     kwargs = {"features_count": 2}
45 |     assert len(test_plugin.hyperparameter_space(**kwargs)) == 1
46 |     assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_clusters"
47 | 
48 | 
49 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
50 | def test_feature_agglomeration_plugin_fit_transform(
51 |     test_plugin: PreprocessorPlugin,
52 | ) -> None:
53 |     X, y = load_iris(return_X_y=True)
54 |     res = test_plugin.fit_transform(X, y)
55 | 
56 |     assert res.shape == (len(X), n_clusters)
57 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_gauss_projection.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_gauss_projection import (
 7 |     plugin,
 8 | )
 9 | 
10 | n_components = 3
11 | 
12 | 
13 | def from_api() -> PreprocessorPlugin:
14 |     return Preprocessors(category="dimensionality_reduction").get(
15 |         "gauss_projection", n_components=n_components
16 |     )
17 | 
18 | 
19 | def from_module() -> PreprocessorPlugin:
20 |     return plugin(n_components=n_components)
21 | 
22 | 
23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
24 | def test_gauss_projection_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
25 |     assert test_plugin is not None
26 | 
27 | 
28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
29 | def test_gauss_projection_plugin_name(test_plugin: PreprocessorPlugin) -> None:
30 |     assert test_plugin.name() == "gauss_projection"
31 | 
32 | 
33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
34 | def test_gauss_projection_plugin_type(test_plugin: PreprocessorPlugin) -> None:
35 |     assert test_plugin.type() == "preprocessor"
36 |     assert test_plugin.subtype() == "dimensionality_reduction"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
40 | def test_gauss_projection_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
41 |     kwargs = {"features_count": 2}
42 |     assert len(test_plugin.hyperparameter_space(**kwargs)) == 1
43 |     assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components"
44 | 
45 | 
46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
47 | def test_gauss_projection_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
48 |     res = test_plugin.fit_transform(
49 |         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]]
50 |     )
51 | 
52 |     assert res.shape == (4, n_components)
53 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_pca.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_pca import (
 7 |     plugin,
 8 | )
 9 | 
10 | n_components = 3
11 | 
12 | 
13 | def from_api() -> PreprocessorPlugin:
14 |     return Preprocessors(category="dimensionality_reduction").get(
15 |         "pca", n_components=n_components
16 |     )
17 | 
18 | 
19 | def from_module() -> PreprocessorPlugin:
20 |     return plugin(n_components=n_components)
21 | 
22 | 
23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
24 | def test_pca_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
25 |     assert test_plugin is not None
26 | 
27 | 
28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
29 | def test_pca_plugin_name(test_plugin: PreprocessorPlugin) -> None:
30 |     assert test_plugin.name() == "pca"
31 | 
32 | 
33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
34 | def test_pca_plugin_type(test_plugin: PreprocessorPlugin) -> None:
35 |     assert test_plugin.type() == "preprocessor"
36 |     assert test_plugin.subtype() == "dimensionality_reduction"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
40 | def test_pca_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
41 |     kwargs = {"features_count": 2}
42 |     assert len(test_plugin.hyperparameter_space(**kwargs)) == 1
43 |     assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components"
44 | 
45 | 
46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
47 | def test_pca_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
48 |     res = test_plugin.fit_transform(
49 |         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]]
50 |     )
51 | 
52 |     assert res.shape == (4, n_components)
53 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/dimensionality_reduction/test_variance_threshold.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_variance_threshold import (
 7 |     plugin,
 8 | )
 9 | from autoprognosis.utils.serialization import load_model, save_model
10 | 
11 | 
12 | def from_api() -> PreprocessorPlugin:
13 |     return Preprocessors(category="dimensionality_reduction").get("variance_threshold")
14 | 
15 | 
16 | def from_module() -> PreprocessorPlugin:
17 |     return plugin()
18 | 
19 | 
20 | def from_serde() -> PreprocessorPlugin:
21 |     buff = plugin().save()
22 |     return plugin.load(buff)
23 | 
24 | 
25 | def from_pickle() -> PreprocessorPlugin:
26 |     buff = save_model(plugin())
27 |     return load_model(buff)
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
32 | )
33 | def test_variance_threshold_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
34 |     assert test_plugin is not None
35 | 
36 | 
37 | @pytest.mark.parametrize(
38 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
39 | )
40 | def test_variance_threshold_plugin_name(test_plugin: PreprocessorPlugin) -> None:
41 |     assert test_plugin.name() == "variance_threshold"
42 | 
43 | 
44 | @pytest.mark.parametrize(
45 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
46 | )
47 | def test_variance_threshold_plugin_type(test_plugin: PreprocessorPlugin) -> None:
48 |     assert test_plugin.type() == "preprocessor"
49 |     assert test_plugin.subtype() == "dimensionality_reduction"
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
54 | )
55 | def test_variance_threshold_plugin_hyperparams(
56 |     test_plugin: PreprocessorPlugin,
57 | ) -> None:
58 |     assert test_plugin.hyperparameter_space() == []
59 | 
60 | 
61 | @pytest.mark.parametrize(
62 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
63 | )
64 | def test_variance_threshold_plugin_fit_transform(
65 |     test_plugin: PreprocessorPlugin,
66 | ) -> None:
67 |     res = test_plugin.fit_transform(
68 |         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]], [1, 2, 3, 4]
69 |     )
70 | 
71 |     assert res.shape == (4, 4)
72 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_feature_normalizer.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_feature_normalizer import (
 8 |     plugin,
 9 | )
10 | from autoprognosis.utils.serialization import load_model, save_model
11 | 
12 | 
13 | def from_api() -> PreprocessorPlugin:
14 |     return Preprocessors().get("feature_normalizer")
15 | 
16 | 
17 | def from_module() -> PreprocessorPlugin:
18 |     return plugin()
19 | 
20 | 
21 | def from_serde() -> PreprocessorPlugin:
22 |     buff = plugin().save()
23 |     return plugin().load(buff)
24 | 
25 | 
26 | def from_pickle() -> PreprocessorPlugin:
27 |     buff = save_model(plugin())
28 |     return load_model(buff)
29 | 
30 | 
31 | @pytest.mark.parametrize(
32 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
33 | )
34 | def test_feature_normalizer_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
35 |     assert test_plugin is not None
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
40 | )
41 | def test_feature_normalizer_plugin_name(test_plugin: PreprocessorPlugin) -> None:
42 |     assert test_plugin.name() == "feature_normalizer"
43 | 
44 | 
45 | @pytest.mark.parametrize(
46 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
47 | )
48 | def test_feature_normalizer_plugin_type(test_plugin: PreprocessorPlugin) -> None:
49 |     assert test_plugin.type() == "preprocessor"
50 |     assert test_plugin.subtype() == "feature_scaling"
51 | 
52 | 
53 | @pytest.mark.parametrize(
54 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
55 | )
56 | def test_feature_normalizer_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
57 |     assert test_plugin.hyperparameter_space() == []
58 | 
59 | 
60 | @pytest.mark.parametrize(
61 |     "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()]
62 | )
63 | def test_feature_normalizer_plugin_fit_transform(
64 |     test_plugin: PreprocessorPlugin,
65 | ) -> None:
66 |     res = test_plugin.fit_transform([[4, 1, 2, 2], [1, 3, 9, 3], [5, 7, 5, 1]])
67 | 
68 |     np.testing.assert_array_equal(
69 |         res, [[0.8, 0.2, 0.4, 0.4], [0.1, 0.3, 0.9, 0.3], [0.5, 0.7, 0.5, 0.1]]
70 |     )
71 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_fs_nop.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | import pytest
 5 | 
 6 | # autoprognosis absolute
 7 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 8 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_nop import plugin
 9 | 
10 | 
11 | def from_api() -> PreprocessorPlugin:
12 |     return Preprocessors(category="feature_scaling").get("nop")
13 | 
14 | 
15 | def from_module() -> PreprocessorPlugin:
16 |     return plugin()
17 | 
18 | 
19 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
20 | def test_nop_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
21 |     assert test_plugin is not None
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
25 | def test_nop_plugin_name(test_plugin: PreprocessorPlugin) -> None:
26 |     assert test_plugin.name() == "nop"
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
30 | def test_nop_plugin_type(test_plugin: PreprocessorPlugin) -> None:
31 |     assert test_plugin.type() == "preprocessor"
32 |     assert test_plugin.subtype() == "feature_scaling"
33 | 
34 | 
35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
36 | def test_nop_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
37 |     assert test_plugin.hyperparameter_space() == []
38 | 
39 | 
40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()])
41 | def test_nop_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
42 |     res = test_plugin.fit_transform(pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]]))
43 | 
44 |     np.testing.assert_array_equal(res, [[1, 1, 1, 1], [2, 2, 2, 2]])
45 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_maxabs_scaler.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_maxabs_scaler import (
 8 |     plugin,
 9 | )
10 | 
11 | 
12 | def from_api() -> PreprocessorPlugin:
13 |     return Preprocessors().get("maxabs_scaler")
14 | 
15 | 
16 | def from_module() -> PreprocessorPlugin:
17 |     return plugin()
18 | 
19 | 
20 | def from_serde() -> PreprocessorPlugin:
21 |     buff = plugin().save()
22 |     return plugin().load(buff)
23 | 
24 | 
25 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
26 | def test_maxabs_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
27 |     assert test_plugin is not None
28 | 
29 | 
30 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
31 | def test_maxabs_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None:
32 |     assert test_plugin.name() == "maxabs_scaler"
33 | 
34 | 
35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
36 | def test_maxabs_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None:
37 |     assert test_plugin.type() == "preprocessor"
38 |     assert test_plugin.subtype() == "feature_scaling"
39 | 
40 | 
41 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
42 | def test_maxabs_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
43 |     assert test_plugin.hyperparameter_space() == []
44 | 
45 | 
46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
47 | def test_maxabs_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
48 |     res = test_plugin.fit_transform(
49 |         [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]]
50 |     )
51 | 
52 |     np.testing.assert_array_equal(
53 |         res, [[0.5, -1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, -0.5]]
54 |     )
55 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_minmax_scaler.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_minmax_scaler import (
 8 |     plugin,
 9 | )
10 | 
11 | 
12 | def from_api() -> PreprocessorPlugin:
13 |     return Preprocessors().get("minmax_scaler")
14 | 
15 | 
16 | def from_module() -> PreprocessorPlugin:
17 |     return plugin()
18 | 
19 | 
20 | def from_serde() -> PreprocessorPlugin:
21 |     buff = plugin().save()
22 |     return plugin().load(buff)
23 | 
24 | 
25 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
26 | def test_minmax_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
27 |     assert test_plugin is not None
28 | 
29 | 
30 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
31 | def test_minmax_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None:
32 |     assert test_plugin.name() == "minmax_scaler"
33 | 
34 | 
35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
36 | def test_minmax_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None:
37 |     assert test_plugin.type() == "preprocessor"
38 |     assert test_plugin.subtype() == "feature_scaling"
39 | 
40 | 
41 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
42 | def test_minmax_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
43 |     assert test_plugin.hyperparameter_space() == []
44 | 
45 | 
46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
47 | def test_minmax_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
48 |     res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]])
49 | 
50 |     np.testing.assert_array_equal(
51 |         res, [[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [1.0, 1.0]]
52 |     )
53 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_normal_transform.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_normal_transform import (
 7 |     plugin,
 8 | )
 9 | 
10 | 
11 | def from_api() -> PreprocessorPlugin:
12 |     return Preprocessors().get("normal_transform")
13 | 
14 | 
15 | def from_module() -> PreprocessorPlugin:
16 |     return plugin()
17 | 
18 | 
19 | def from_serde() -> PreprocessorPlugin:
20 |     buff = plugin().save()
21 |     return plugin().load(buff)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
25 | def test_normal_transform_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_normal_transform_plugin_name(test_plugin: PreprocessorPlugin) -> None:
31 |     assert test_plugin.name() == "normal_transform"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_normal_transform_plugin_type(test_plugin: PreprocessorPlugin) -> None:
36 |     assert test_plugin.type() == "preprocessor"
37 |     assert test_plugin.subtype() == "feature_scaling"
38 | 
39 | 
40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
41 | def test_normal_transform_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
42 |     assert test_plugin.hyperparameter_space() == []
43 | 
44 | 
45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
46 | def test_normal_transform_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
47 |     res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]])
48 | 
49 |     assert res.shape == (4, 2)
50 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_scaler.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_scaler import plugin
 8 | 
 9 | 
10 | def from_api() -> PreprocessorPlugin:
11 |     return Preprocessors().get("scaler")
12 | 
13 | 
14 | def from_module() -> PreprocessorPlugin:
15 |     return plugin()
16 | 
17 | 
18 | def from_serde() -> PreprocessorPlugin:
19 |     buff = plugin().save()
20 |     return plugin().load(buff)
21 | 
22 | 
23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
24 | def test_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
25 |     assert test_plugin is not None
26 | 
27 | 
28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
29 | def test_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None:
30 |     assert test_plugin.name() == "scaler"
31 | 
32 | 
33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
34 | def test_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None:
35 |     assert test_plugin.type() == "preprocessor"
36 |     assert test_plugin.subtype() == "feature_scaling"
37 | 
38 | 
39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
40 | def test_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
41 |     assert test_plugin.hyperparameter_space() == []
42 | 
43 | 
44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
45 | def test_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None:
46 |     res = test_plugin.fit_transform([[0, 0], [0, 0], [1, 1], [1, 1]])
47 | 
48 |     np.testing.assert_array_equal(
49 |         res, [[-1.0, -1.0], [-1.0, -1.0], [1.0, 1.0], [1.0, 1.0]]
50 |     )
51 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/feature_scaling/test_uniform_transform.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import pytest
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
 6 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_uniform_transform import (
 7 |     plugin,
 8 | )
 9 | 
10 | 
11 | def from_api() -> PreprocessorPlugin:
12 |     return Preprocessors().get("uniform_transform")
13 | 
14 | 
15 | def from_module() -> PreprocessorPlugin:
16 |     return plugin()
17 | 
18 | 
19 | def from_serde() -> PreprocessorPlugin:
20 |     buff = plugin().save()
21 |     return plugin().load(buff)
22 | 
23 | 
24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
25 | def test_uniform_transform_plugin_sanity(test_plugin: PreprocessorPlugin) -> None:
26 |     assert test_plugin is not None
27 | 
28 | 
29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
30 | def test_uniform_transform_plugin_name(test_plugin: PreprocessorPlugin) -> None:
31 |     assert test_plugin.name() == "uniform_transform"
32 | 
33 | 
34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
35 | def test_uniform_transform_plugin_type(test_plugin: PreprocessorPlugin) -> None:
36 |     assert test_plugin.type() == "preprocessor"
37 |     assert test_plugin.subtype() == "feature_scaling"
38 | 
39 | 
40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
41 | def test_uniform_transform_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None:
42 |     assert test_plugin.hyperparameter_space() == []
43 | 
44 | 
45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()])
46 | def test_uniform_transform_plugin_fit_transform(
47 |     test_plugin: PreprocessorPlugin,
48 | ) -> None:
49 |     res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]])
50 | 
51 |     assert res.shape == (4, 2)
52 | 


--------------------------------------------------------------------------------
/tests/plugins/preprocessors/test_preprocessing_api.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Any, List
 3 | 
 4 | # third party
 5 | import pandas as pd
 6 | import pytest
 7 | 
 8 | # autoprognosis absolute
 9 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors
10 | 
11 | 
12 | @pytest.fixture
13 | def ctx() -> Preprocessors:
14 |     return Preprocessors()
15 | 
16 | 
17 | class Mock(PreprocessorPlugin):
18 |     def __init__(self) -> None:
19 |         super().__init__()
20 | 
21 |     @staticmethod
22 |     def name() -> str:
23 |         return "test"
24 | 
25 |     @staticmethod
26 |     def subtype() -> str:
27 |         return "feature_scaling"
28 | 
29 |     @staticmethod
30 |     def hyperparameter_space(*args: Any, **kwargs: Any) -> List[Any]:
31 |         return []
32 | 
33 |     def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Mock":
34 |         return self
35 | 
36 |     def _transform(self, X: pd.DataFrame) -> pd.DataFrame:
37 |         return {}
38 | 
39 |     def save(self) -> bytes:
40 |         return b""
41 | 
42 |     @classmethod
43 |     def load(cls, buff: bytes) -> "Mock":
44 |         return cls()
45 | 
46 | 
47 | class Invalid:
48 |     def __init__(self) -> None:
49 |         pass
50 | 
51 | 
52 | def test_load(ctx: Preprocessors) -> None:
53 |     assert len(ctx._plugins) == 0
54 |     ctx.get("feature_normalizer")
55 |     assert len(ctx._plugins) == 1
56 | 
57 | 
58 | def test_list(ctx: Preprocessors) -> None:
59 |     ctx.get("nop")
60 |     assert "nop" in ctx.list()
61 | 
62 | 
63 | def test_add_get(ctx: Preprocessors) -> None:
64 |     ctx.add("mock", Mock)
65 | 
66 |     assert "mock" in ctx.list()
67 | 
68 |     mock = ctx.get("mock")
69 | 
70 |     assert mock.name() == "test"
71 | 
72 | 
73 | def test_add_get_invalid(ctx: Preprocessors) -> None:
74 |     with pytest.raises(ValueError):
75 |         ctx.add("invalid", Invalid)
76 | 
77 |     assert "mock" not in ctx.list()
78 | 
79 |     with pytest.raises(ValueError):
80 |         ctx.get("mock")
81 | 
82 | 
83 | def test_iter(ctx: Preprocessors) -> None:
84 |     for v in ctx:
85 |         assert ctx[v].name() != ""
86 | 


--------------------------------------------------------------------------------
/tests/plugins/utils/test_cast.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.plugins.utils.cast import to_dataframe
 7 | 
 8 | 
 9 | def test_cast_to_dataframe() -> None:
10 |     simple_list = [[1, 2, 3]]
11 | 
12 |     cast = to_dataframe(simple_list)
13 |     assert isinstance(cast, pd.DataFrame)
14 | 
15 |     cast = to_dataframe(pd.DataFrame(simple_list))
16 |     assert isinstance(cast, pd.DataFrame)
17 | 
18 |     cast = to_dataframe(np.array(simple_list))
19 |     assert isinstance(cast, pd.DataFrame)
20 | 


--------------------------------------------------------------------------------
/tests/plugins/utils/test_imputation_metrics.py:
--------------------------------------------------------------------------------
 1 | # third party
 2 | import numpy as np
 3 | 
 4 | # autoprognosis absolute
 5 | from autoprognosis.plugins.utils.metrics import MAE, RMSE
 6 | 
 7 | 
 8 | def test_MAE() -> None:
 9 |     data = np.array([1, 2, 3])
10 |     data_truth = np.array([1, 2, 4])
11 |     mask = np.array([False, True, True])
12 |     assert MAE(data, data_truth, mask) == 0.5
13 | 
14 | 
15 | def test_RMSE() -> None:
16 |     data = np.array([1, 2, 3])
17 |     data_truth = np.array([1, 2, 5])
18 |     mask = np.array([False, False, True])
19 |     assert RMSE(data, data_truth, mask) == 2
20 | 


--------------------------------------------------------------------------------
/tests/plugins/utils/test_simulate.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | from typing import Tuple
 3 | 
 4 | # third party
 5 | import numpy as np
 6 | import pytest
 7 | 
 8 | # autoprognosis absolute
 9 | from autoprognosis.plugins.utils.simulate import simulate_nan
10 | 
11 | 
12 | def dataset(
13 |     mechanism: str, p_miss: float, n: int = 1000, opt: str = "logistic"
14 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
15 |     np.random.seed(0)
16 | 
17 |     p = 4
18 | 
19 |     mean = np.repeat(0, p)
20 |     cov = 0.5 * (np.ones((p, p)) + np.eye(p))
21 | 
22 |     x = np.random.multivariate_normal(mean, cov, size=n)
23 |     x_simulated = simulate_nan(x, p_miss, mechanism, opt=opt)
24 | 
25 |     mask = x_simulated["mask"]
26 |     x_miss = x_simulated["X_incomp"]
27 | 
28 |     return x, x_miss, mask
29 | 
30 | 
31 | @pytest.mark.parametrize("mechanism", ["MAR", "MNAR", "MCAR"])
32 | @pytest.mark.parametrize("p_miss", [0.1, 0.3, 0.5])
33 | def test_simulate_nan(mechanism: str, p_miss: float) -> None:
34 |     orig, miss, mask = dataset(mechanism, p_miss)
35 | 
36 |     np.testing.assert_array_equal((orig != miss), mask)
37 |     np.testing.assert_array_equal(np.isnan(miss), mask)
38 | 
39 | 
40 | @pytest.mark.parametrize("opt", ["logistic", "quantile", "selfmasked"])
41 | def test_simulate_simulate_mnar(opt: str) -> None:
42 |     orig, miss, mask = dataset("MNAR", 0.5, opt=opt)
43 | 
44 |     np.testing.assert_array_equal((orig != miss), mask)
45 |     np.testing.assert_array_equal(np.isnan(miss), mask)
46 | 


--------------------------------------------------------------------------------
/tests/studies/helpers.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import datetime
 3 | from typing import Any
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.hooks import Hooks
 7 | 
 8 | 
 9 | class MockHook(Hooks):
10 |     def __init__(self) -> None:
11 |         self._started_at = datetime.datetime.utcnow()
12 | 
13 |     def cancel(self) -> bool:
14 |         # cancel after 10 seconds
15 |         time_passed = datetime.datetime.utcnow() - self._started_at
16 | 
17 |         return time_passed.total_seconds() > 10
18 | 
19 |     def heartbeat(
20 |         self, topic: str, subtopic: str, event_type: str, **kwargs: Any
21 |     ) -> None:
22 |         pass
23 | 
24 |     def finish(self) -> None:
25 |         pass
26 | 


--------------------------------------------------------------------------------
/tests/utils/test_parallel.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import multiprocessing
 3 | import os
 4 | 
 5 | # autoprognosis absolute
 6 | from autoprognosis.utils.parallel import n_learner_jobs, n_opt_jobs
 7 | 
 8 | 
 9 | def test_n_opt_jobs() -> None:
10 |     os.environ["N_OPT_JOBS"] = "1"
11 | 
12 |     assert n_opt_jobs() == 1
13 | 
14 |     del os.environ["N_OPT_JOBS"]
15 | 
16 |     assert n_opt_jobs() == 2
17 | 
18 | 
19 | def test_n_learner_jobs() -> None:
20 |     os.environ["N_LEARNER_JOBS"] = "1"
21 | 
22 |     assert n_learner_jobs() == 1
23 | 
24 |     del os.environ["N_LEARNER_JOBS"]
25 | 
26 |     assert n_learner_jobs() == multiprocessing.cpu_count()
27 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.7z filter=lfs diff=lfs merge=lfs -text
 2 | *.arrow filter=lfs diff=lfs merge=lfs -text
 3 | *.bin filter=lfs diff=lfs merge=lfs -text
 4 | *.bin.* filter=lfs diff=lfs merge=lfs -text
 5 | *.bz2 filter=lfs diff=lfs merge=lfs -text
 6 | *.ftz filter=lfs diff=lfs merge=lfs -text
 7 | *.gz filter=lfs diff=lfs merge=lfs -text
 8 | *.h5 filter=lfs diff=lfs merge=lfs -text
 9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.model filter=lfs diff=lfs merge=lfs -text
12 | *.msgpack filter=lfs diff=lfs merge=lfs -text
13 | *.onnx filter=lfs diff=lfs merge=lfs -text
14 | *.ot filter=lfs diff=lfs merge=lfs -text
15 | *.parquet filter=lfs diff=lfs merge=lfs -text
16 | *.pb filter=lfs diff=lfs merge=lfs -text
17 | *.pt filter=lfs diff=lfs merge=lfs -text
18 | *.pth filter=lfs diff=lfs merge=lfs -text
19 | *.rar filter=lfs diff=lfs merge=lfs -text
20 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21 | *.tar.* filter=lfs diff=lfs merge=lfs -text
22 | *.tflite filter=lfs diff=lfs merge=lfs -text
23 | *.tgz filter=lfs diff=lfs merge=lfs -text
24 | *.xz filter=lfs diff=lfs merge=lfs -text
25 | *.zip filter=lfs diff=lfs merge=lfs -text
26 | *.zstandard filter=lfs diff=lfs merge=lfs -text
27 | *tfevents* filter=lfs diff=lfs merge=lfs -text
28 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | base = "dark"
3 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/Procfile:
--------------------------------------------------------------------------------
1 | web: streamlit run app.py --server.port=${PORT:=8000}
2 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Test4
 3 | emoji: ðŸ“ˆ
 4 | colorFrom: red
 5 | colorTo: red
 6 | sdk: streamlit
 7 | app_file: app.py
 8 | pinned: false
 9 | ---
10 | 
11 | # Configuration
12 | 
13 | `title`: _string_
14 | Display title for the Space
15 | 
16 | `emoji`: _string_
17 | Space emoji (emoji-only character allowed)
18 | 
19 | `colorFrom`: _string_
20 | Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21 | 
22 | `colorTo`: _string_
23 | Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24 | 
25 | `sdk`: _string_
26 | Can be either `gradio`, `streamlit`, or `static`
27 | 
28 | `sdk_version` : _string_
29 | Only applicable for `streamlit` SDK.
30 | See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31 | 
32 | `app_file`: _string_
33 | Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
34 | Path is relative to the root of the repository.
35 | 
36 | `models`: _List[string]_
37 | HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space.
38 | Will be parsed automatically from your code if not specified here.
39 | 
40 | `datasets`: _List[string]_
41 | HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space.
42 | Will be parsed automatically from your code if not specified here.
43 | 
44 | `pinned`: _boolean_
45 | Whether the Space stays on top of your list.
46 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/app.py:
--------------------------------------------------------------------------------
 1 | # stdlib
 2 | import subprocess
 3 | import sys
 4 | 
 5 | 
 6 | def install(install_pack: str) -> None:
 7 |     print(f"Installing {install_pack}")
 8 | 
 9 |     subprocess.check_call(
10 |         [sys.executable, "-m", "pip", "install", install_pack],
11 |         stdout=subprocess.DEVNULL,
12 |         stderr=subprocess.DEVNULL,
13 |     )
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     # install("third_party/autoprognosis-0.1.1-py2.py3-none-any.whl")
18 |     # third party
19 |     from run_demonstrator import run
20 | 
21 |     run("app.p")
22 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/bcebere/geomloss
 2 | hyperimpute==0.1.5
 3 | matplotlib
 4 | numpy==1.20.3
 5 | pandas>=1.3
 6 | scikit-learn>=1.0.2
 7 | seaborn
 8 | shap
 9 | streamlit
10 | third_party/autoprognosis-0.1.1-py2.py3-none-any.whl
11 | torch>=1.10
12 | xgboost>=1.6.1
13 | xgbse>=0.2.3
14 | 


--------------------------------------------------------------------------------
/third_party/image_template/streamlit/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.8.11
2 | 


--------------------------------------------------------------------------------
/tutorials/bindings/R/tutorial_classification.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | py_install("autoprognosis", pip = TRUE)
 3 | 
 4 | pathlib <- import("pathlib", convert=FALSE)
 5 | warnings <- import("warnings", convert=FALSE)
 6 | autoprognosis <- import("autoprognosis", convert=FALSE)
 7 | 
 8 | warnings$filterwarnings('ignore')
 9 | 
10 | Path = pathlib$Path
11 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy
12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
13 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator
14 | workspace <- Path("workspace")
15 | study_name <- "example_classifier"
16 | 
17 | # Load the data
18 | data("iris")
19 | target <- "Species"
20 | 
21 | # Create the AutoPrognosis Study
22 | study <- ClassifierStudy(
23 | 	dataset = iris,
24 | 	target = target,
25 | 	study_name=study_name,
26 | 	num_iter=as.integer(10),
27 | 	num_study_iter=as.integer(2),
28 | 	timeout=as.integer(60),
29 | 	classifiers=list("logistic_regression", "lda", "qda"),
30 | 	workspace=workspace
31 | )
32 | 
33 | study$run()
34 | 
35 | # Load the optimal model - if exists
36 | output <- sprintf("%s/%s/model.p", workspace, study_name)
37 | 
38 | model <- load_model_from_file(output)
39 | # The model is not fitted yet here
40 | 
41 | # Benchmark the model
42 | targets <- c(target)
43 | X <- iris[ , !(names(iris) %in% targets)]
44 | Y = iris[, target]
45 | 
46 | metrics <- evaluate_estimator(model, X, Y)
47 | 
48 | # Fit the model
49 | model$fit(X, Y)
50 | 
51 | sprintf("Performance metrics %s", metrics["str"])
52 | 
53 | # Predict using the model
54 | model$predict_proba(X)
55 | 


--------------------------------------------------------------------------------
/tutorials/bindings/R/tutorial_classification_with_missing_data.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | py_install("autoprognosis", pip = TRUE)
 3 | 
 4 | pathlib <- import("pathlib", convert=FALSE)
 5 | warnings <- import("warnings", convert=FALSE)
 6 | autoprognosis <- import("autoprognosis", convert=FALSE)
 7 | 
 8 | warnings$filterwarnings('ignore')
 9 | 
10 | Path = pathlib$Path
11 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy
12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
13 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator
14 | workspace <- Path("workspace")
15 | study_name <- "example_classifier_with_miss"
16 | 
17 | # Load the data
18 | adult <- read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',
19 |                     sep = ',', fill = F, strip.white = T)
20 | colnames(adult) <- c('age', 'workclass', 'fnlwgt', 'educatoin',
21 |                      'educatoin_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex',
22 |                      'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income')
23 | adult[adult == "?"] <- NA
24 | adult <- adult[ , !(names(adult) %in% c("native_country"))]
25 | 
26 | df <- adult
27 | 
28 | target <- "income"
29 | 
30 | # Create the AutoPrognosis Study
31 | study <- ClassifierStudy(
32 | 	dataset = df,
33 | 	target = target,
34 | 	study_name=study_name,
35 | 	num_iter=as.integer(10),
36 | 	num_study_iter=as.integer(2),
37 | 	timeout=as.integer(60),
38 | 	classifiers=list("logistic_regression", "lda", "qda"),
39 | 	workspace=workspace,
40 |     nan_placeholder='NA'
41 | )
42 | 
43 | study$run()
44 | 
45 | # Load the optimal model - if exists
46 | output <- sprintf("%s/%s/model.p", workspace, study_name)
47 | 
48 | model <- load_model_from_file(output)
49 | # The model is not fitted yet here
50 | 
51 | # Benchmark the model
52 | targets <- c(target)
53 | X <- df[ , !(names(df) %in% targets)]
54 | Y = df[, target]
55 | 
56 | metrics <- evaluate_estimator(model, X, Y)
57 | 
58 | # Fit the model
59 | model$fit(X, Y)
60 | 
61 | sprintf("Performance metrics %s", metrics["str"])
62 | 
63 | # Predict using the model
64 | model$predict_proba(X)
65 | 


--------------------------------------------------------------------------------
/tutorials/bindings/R/tutorial_regression.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | py_install("autoprognosis", pip = TRUE)
 3 | 
 4 | pathlib <- import("pathlib", convert=FALSE)
 5 | warnings <- import("warnings", convert=FALSE)
 6 | autoprognosis <- import("autoprognosis", convert=FALSE)
 7 | 
 8 | warnings$filterwarnings('ignore')
 9 | 
10 | Path = pathlib$Path
11 | RegressionStudy = autoprognosis$studies$regression$RegressionStudy
12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
13 | evaluate_regression = autoprognosis$utils$tester$evaluate_regression
14 | 
15 | workspace <- Path("workspace")
16 | study_name <- "example_regression"
17 | 
18 | # Load dataset
19 | airfoil <- read.csv(
20 |         url("https://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat"),
21 |         sep = "\t",
22 |         header = FALSE,
23 | )
24 | 
25 | target <- "V6"
26 | 
27 | # Create AutoPrognosis Study
28 | study <- RegressionStudy(
29 | 	dataset = airfoil,
30 | 	target = target,
31 | 	study_name=study_name,
32 | 	num_iter=as.integer(10),
33 | 	num_study_iter=as.integer(2),
34 | 	timeout=as.integer(60),
35 | 	regressors=list("linear_regression", "kneighbors_regressor"),
36 | 	workspace=workspace
37 | )
38 | 
39 | study$run()
40 | 
41 | # Load the optimal model - if exists
42 | output <- sprintf("%s/%s/model.p", workspace, study_name)
43 | 
44 | model <- load_model_from_file(output)
45 | # The model is not fitted yet here
46 | 
47 | # Benchmark the model
48 | targets <- c(target)
49 | X <- airfoil[ , !(names(iris) %in% targets)]
50 | Y = airfoil[, target]
51 | 
52 | metrics <- evaluate_regression(model, X, Y)
53 | 
54 | sprintf("Performance metrics %s", metrics["str"])
55 | 
56 | # Fit the model
57 | model$fit(X, Y)
58 | 
59 | # Predict
60 | model$predict(X)
61 | 


--------------------------------------------------------------------------------
/tutorials/bindings/R/tutorial_survival_analysis.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | library(survival)
 3 | 
 4 | py_install("autoprognosis", pip = TRUE)
 5 | 
 6 | pathlib <- import("pathlib", convert=FALSE)
 7 | warnings <- import("warnings", convert=FALSE)
 8 | autoprognosis <- import("autoprognosis", convert=FALSE)
 9 | np <- import("numpy", convert=FALSE)
10 | 
11 | warnings$filterwarnings('ignore')
12 | 
13 | Path = pathlib$Path
14 | RiskEstimationStudy = autoprognosis$studies$risk_estimation$RiskEstimationStudy
15 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file
16 | evaluate_survival_estimator = autoprognosis$utils$tester$evaluate_survival_estimator
17 | 
18 | workspace <- Path("workspace")
19 | study_name <- "example_risk_estimation"
20 | 
21 | # Load the data
22 | data(cancer, package="survival")
23 | 
24 | targets <- c("dtime", "death")
25 | df <- rotterdam
26 | 
27 | X <- df[ , !(names(df) %in% targets)]
28 | Y <- df[, "death"]
29 | T <- df[, "dtime"]
30 | 
31 | eval_time_horizons <- list(2000)
32 | 
33 | # Create the AutoPrognosis Study
34 | study <- RiskEstimationStudy(
35 | 	dataset = df,
36 | 	target = "death",
37 |     time_to_event="dtime",
38 |     time_horizons = eval_time_horizons,
39 | 	study_name=study_name,
40 | 	num_iter=as.integer(10),
41 | 	num_study_iter=as.integer(2),
42 | 	timeout=as.integer(60),
43 | 	risk_estimators=list("cox_ph", "survival_xgboost"),
44 | 	workspace=workspace
45 | )
46 | 
47 | study$run()
48 | 
49 | # Load the optimal model - if exists
50 | output <- sprintf("%s/%s/model.p", workspace, study_name)
51 | 
52 | model <- load_model_from_file(output)
53 | # The model is not fitted yet here
54 | 
55 | # Benchmark the model
56 | metrics <- evaluate_survival_estimator(model, X, T, Y, eval_time_horizons)
57 | 
58 | # Fit the model
59 | model$fit(X, T, Y)
60 | 
61 | sprintf("Performance metrics %s", metrics["str"])
62 | 
63 | # Predict using the model
64 | model$predict(X)
65 | 


--------------------------------------------------------------------------------