├── .github └── workflows │ ├── release.yml │ ├── scripts │ ├── release_linux.sh │ ├── release_osx.sh │ └── release_windows.bat │ ├── test_R.yml │ ├── test_full.yml │ ├── test_pr.yml │ └── test_tutorials.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── LICENSE ├── README.md ├── docs ├── Makefile ├── README.md ├── arch.png ├── automl.rst ├── classifiers.rst ├── conf.py ├── examples.rst ├── explainers.rst ├── imputers.rst ├── index.rst ├── make.bat ├── prediction.rst ├── preprocessing.rst ├── regression.rst ├── risk_estimation.rst └── tutorials ├── pyproject.toml ├── scripts ├── __init__.py ├── build_demonstrator.py ├── nb_test.py ├── run_demonstrator.py └── studies │ ├── build_adj_biobank_cvd.sh │ └── build_adj_biobank_diabetes.sh ├── setup.cfg ├── setup.py ├── src └── autoprognosis │ ├── __init__.py │ ├── apps │ ├── __init__.py │ ├── classification │ │ └── classification_template_streamlit.py │ ├── common │ │ ├── __init__.py │ │ ├── login.py │ │ └── pandas_to_streamlit.py │ ├── extras │ │ ├── __init__.py │ │ ├── biobank_cvd.py │ │ └── biobank_diabetes.py │ └── survival_analysis │ │ └── survival_analysis_template_streamlit.py │ ├── deploy │ ├── __init__.py │ ├── build.py │ ├── proto.py │ ├── run.py │ └── utils.py │ ├── exceptions │ └── __init__.py │ ├── explorers │ ├── __init__.py │ ├── classifiers.py │ ├── classifiers_combos.py │ ├── core │ │ ├── __init__.py │ │ ├── defaults.py │ │ ├── optimizer.py │ │ ├── optimizers │ │ │ ├── __init__.py │ │ │ ├── bayesian.py │ │ │ └── hyperband.py │ │ └── selector.py │ ├── regression.py │ ├── regression_combos.py │ ├── risk_estimation.py │ └── risk_estimation_combos.py │ ├── hooks │ ├── __init__.py │ ├── base.py │ └── default.py │ ├── logger.py │ ├── plugins │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── base_plugin.py │ │ └── params.py │ ├── ensemble │ │ ├── __init__.py │ │ ├── classifiers.py │ │ ├── combos.py │ │ ├── regression.py │ │ └── risk_estimation.py │ ├── explainers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── plugin_invase.py │ │ ├── plugin_kernel_shap.py │ │ ├── plugin_lime.py │ │ ├── plugin_risk_effect_size.py │ │ ├── plugin_shap_permutation_sampler.py │ │ └── plugin_symbolic_pursuit.py │ ├── imputers │ │ ├── README.md │ │ ├── __init__.py │ │ ├── base.py │ │ ├── plugin_EM.py │ │ ├── plugin_gain.py │ │ ├── plugin_hyperimpute.py │ │ ├── plugin_ice.py │ │ ├── plugin_mean.py │ │ ├── plugin_median.py │ │ ├── plugin_mice.py │ │ ├── plugin_missforest.py │ │ ├── plugin_most_frequent.py │ │ ├── plugin_nop.py │ │ ├── plugin_sinkhorn.py │ │ └── plugin_softimpute.py │ ├── pipeline │ │ ├── __init__.py │ │ └── generators.py │ ├── prediction │ │ ├── __init__.py │ │ ├── base.py │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── helper_calibration.py │ │ │ ├── plugin_adaboost.py │ │ │ ├── plugin_bagging.py │ │ │ ├── plugin_bernoulli_naive_bayes.py │ │ │ ├── plugin_catboost.py │ │ │ ├── plugin_decision_trees.py │ │ │ ├── plugin_extra_tree_classifier.py │ │ │ ├── plugin_gaussian_naive_bayes.py │ │ │ ├── plugin_gaussian_process.py │ │ │ ├── plugin_gradient_boosting.py │ │ │ ├── plugin_hist_gradient_boosting.py │ │ │ ├── plugin_knn.py │ │ │ ├── plugin_lda.py │ │ │ ├── plugin_lgbm.py │ │ │ ├── plugin_linear_svm.py │ │ │ ├── plugin_logistic_regression.py │ │ │ ├── plugin_multinomial_naive_bayes.py │ │ │ ├── plugin_neural_nets.py │ │ │ ├── plugin_perceptron.py │ │ │ ├── plugin_qda.py │ │ │ ├── plugin_random_forest.py │ │ │ ├── plugin_ridge_classifier.py │ │ │ ├── plugin_tabnet.py │ │ │ └── plugin_xgboost.py │ │ ├── regression │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── plugin_bayesian_ridge.py │ │ │ ├── plugin_catboost_regressor.py │ │ │ ├── plugin_kneighbors_regressor.py │ │ │ ├── plugin_linear_regression.py │ │ │ ├── plugin_mlp_regressor.py │ │ │ ├── plugin_neural_nets_regression.py │ │ │ ├── plugin_random_forest_regressor.py │ │ │ ├── plugin_tabnet_regressor.py │ │ │ └── plugin_xgboost_regressor.py │ │ └── risk_estimation │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── benchmarks │ │ │ ├── __init__.py │ │ │ ├── cvd │ │ │ │ ├── __init__.py │ │ │ │ ├── aha │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ ├── framingham │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ └── qrisk3 │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ ├── diabetes │ │ │ │ ├── __init__.py │ │ │ │ ├── ada │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ ├── diabetes_uk │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ ├── finrisk │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ └── qdiabetes │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ └── prostate_cancer │ │ │ │ ├── __init__.py │ │ │ │ └── predict.py │ │ │ ├── helper_lifelines.py │ │ │ ├── plugin_cox_ph.py │ │ │ ├── plugin_coxnet.py │ │ │ ├── plugin_deephit.py │ │ │ ├── plugin_loglogistic_aft.py │ │ │ ├── plugin_lognormal_aft.py │ │ │ ├── plugin_survival_xgboost.py │ │ │ └── plugin_weibull_aft.py │ ├── preprocessors │ │ ├── README.md │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dimensionality_reduction │ │ │ ├── __init__.py │ │ │ ├── plugin_data_cleanup.py │ │ │ ├── plugin_fast_ica.py │ │ │ ├── plugin_feature_agglomeration.py │ │ │ ├── plugin_gauss_projection.py │ │ │ ├── plugin_nop.py │ │ │ ├── plugin_pca.py │ │ │ └── plugin_variance_threshold.py │ │ └── feature_scaling │ │ │ ├── __init__.py │ │ │ ├── plugin_feature_normalizer.py │ │ │ ├── plugin_maxabs_scaler.py │ │ │ ├── plugin_minmax_scaler.py │ │ │ ├── plugin_nop.py │ │ │ ├── plugin_normal_transform.py │ │ │ ├── plugin_scaler.py │ │ │ └── plugin_uniform_transform.py │ ├── uncertainty │ │ ├── __init__.py │ │ ├── base.py │ │ ├── plugin_cohort_explainer.py │ │ ├── plugin_conformal_prediction.py │ │ └── plugin_jackknife.py │ └── utils │ │ ├── __init__.py │ │ ├── cast.py │ │ ├── decorators.py │ │ ├── metrics.py │ │ └── simulate.py │ ├── studies │ ├── __init__.py │ ├── _base.py │ ├── classifiers.py │ ├── regression.py │ └── risk_estimation.py │ ├── utils │ ├── __init__.py │ ├── data_encoder.py │ ├── distributions.py │ ├── encoder.py │ ├── metrics.py │ ├── pandas.py │ ├── parallel.py │ ├── redis.py │ ├── risk_estimation.py │ ├── serialization.py │ ├── tester.py │ ├── third_party │ │ ├── __init__.py │ │ ├── metrics.py │ │ ├── nonparametric.py │ │ └── util.py │ └── torch.py │ └── version.py ├── tests ├── apps │ ├── test_classifiers_app.py │ └── test_survival_app.py ├── bindings │ └── R │ │ ├── test_classification.R │ │ ├── test_classification_with_missing_data.R │ │ ├── test_regression.R │ │ └── test_survival_analysis.R ├── bugfixing │ ├── test_ensemble_crash.py │ └── test_not_fitted_error.py ├── conftest.py ├── explorers │ ├── explorers_mocks.py │ ├── test_classifiers.py │ ├── test_classifiers_combos.py │ ├── test_regression.py │ ├── test_regression_combos.py │ ├── test_risk_estimation.py │ ├── test_risk_estimation_combos.py │ └── test_selector.py ├── plugins │ ├── ensemble │ │ ├── test_classifier.py │ │ └── test_risk_estimation_ensemble.py │ ├── explainers │ │ ├── test_invase.py │ │ ├── test_kernel_shap.py │ │ ├── test_lime.py │ │ ├── test_risk_effect_size.py │ │ ├── test_shap_permutation_sampler.py │ │ └── test_symbolic_pursuit.py │ ├── imputers │ │ ├── test_api.py │ │ ├── test_em.py │ │ ├── test_gain.py │ │ ├── test_hyperimpute.py │ │ ├── test_ice.py │ │ ├── test_imputation_serde.py │ │ ├── test_imputers_api.py │ │ ├── test_mean.py │ │ ├── test_median.py │ │ ├── test_mice.py │ │ ├── test_missforest.py │ │ ├── test_most_freq.py │ │ ├── test_sinkhorn.py │ │ └── test_softimpute.py │ ├── pipeline │ │ └── test_pipeline.py │ ├── prediction │ │ ├── classifiers │ │ │ ├── test_bagging.py │ │ │ ├── test_bernoulli_naive_bayes.py │ │ │ ├── test_calibration.py │ │ │ ├── test_catboost.py │ │ │ ├── test_classifiers_linear_svm.py │ │ │ ├── test_clf_serde.py │ │ │ ├── test_decision_trees.py │ │ │ ├── test_extra_tree_classifier.py │ │ │ ├── test_gaussian_naive_bayes.py │ │ │ ├── test_gaussian_process.py │ │ │ ├── test_gradient_boosting.py │ │ │ ├── test_hist_gradient_boosting.py │ │ │ ├── test_knn.py │ │ │ ├── test_lda.py │ │ │ ├── test_lgbm.py │ │ │ ├── test_logistic_regression.py │ │ │ ├── test_neural_nets.py │ │ │ ├── test_perceptron.py │ │ │ ├── test_prediction_api.py │ │ │ ├── test_qda.py │ │ │ ├── test_random_forest.py │ │ │ ├── test_ridge_classifier.py │ │ │ ├── test_tabnet.py │ │ │ └── test_xgboost.py │ │ ├── regression │ │ │ ├── test_kneighbors_regressor.py │ │ │ ├── test_linear_regression.py │ │ │ ├── test_neural_nets_regression.py │ │ │ ├── test_random_forest_regressor.py │ │ │ ├── test_tabnet_regressor.py │ │ │ └── test_xgboost_regression.py │ │ └── risk_estimation │ │ │ ├── benchmarks │ │ │ ├── cvd │ │ │ │ ├── test_aha.py │ │ │ │ ├── test_fram.py │ │ │ │ └── test_qrisk3.py │ │ │ └── diabetes │ │ │ │ ├── test_ada.py │ │ │ │ ├── test_diabetesuk.py │ │ │ │ ├── test_finrisk.py │ │ │ │ └── test_qdiab.py │ │ │ ├── test_cox_ph.py │ │ │ ├── test_coxnet.py │ │ │ ├── test_deephit.py │ │ │ ├── test_loglogistic_aft.py │ │ │ ├── test_lognormal_aft.py │ │ │ ├── test_survival_xgboost.py │ │ │ └── test_weibull_aft.py │ ├── preprocessors │ │ ├── dimensionality_reduction │ │ │ ├── test_data_cleanup.py │ │ │ ├── test_dr_nop.py │ │ │ ├── test_fast_ica.py │ │ │ ├── test_feature_agglomeration.py │ │ │ ├── test_gauss_projection.py │ │ │ ├── test_pca.py │ │ │ └── test_variance_threshold.py │ │ ├── feature_scaling │ │ │ ├── test_feature_normalizer.py │ │ │ ├── test_fs_nop.py │ │ │ ├── test_maxabs_scaler.py │ │ │ ├── test_minmax_scaler.py │ │ │ ├── test_normal_transform.py │ │ │ ├── test_scaler.py │ │ │ └── test_uniform_transform.py │ │ └── test_preprocessing_api.py │ ├── uncertainty │ │ ├── test_cohort_explainer.py │ │ ├── test_conformal_prediction.py │ │ └── test_jackknife.py │ └── utils │ │ ├── test_cast.py │ │ ├── test_imputation_metrics.py │ │ └── test_simulate.py ├── studies │ ├── helpers.py │ ├── test_classifiers_studies.py │ ├── test_regression_studies.py │ └── test_risk_studies.py └── utils │ ├── test_metrics.py │ └── test_parallel.py ├── third_party └── image_template │ └── streamlit │ ├── .gitattributes │ ├── .streamlit │ └── config.toml │ ├── Procfile │ ├── README.md │ ├── app.py │ ├── requirements.txt │ └── runtime.txt └── tutorials ├── automl ├── tutorial_00_classification_study.ipynb ├── tutorial_01_automl_classification_with_imputation.ipynb ├── tutorial_02_survival_analysis_study.ipynb ├── tutorial_03_automl_survival_analysis_with_imputation.ipynb ├── tutorial_04_regression.ipynb ├── tutorial_05_classification_with_explainers.ipynb └── tutorial_06_automl_multiple_imputation_example.ipynb ├── bindings └── R │ ├── tutorial_classification.R │ ├── tutorial_classification_with_missing_data.R │ ├── tutorial_regression.R │ └── tutorial_survival_analysis.R ├── demonstrators ├── tutorial_00_build_a_demonstrator_classification.ipynb └── tutorial_01_build_a_demonstrator_survival_analysis.ipynb └── plugins ├── tutorial_00_imputation_plugins.ipynb ├── tutorial_01_preprocessing_plugins.ipynb ├── tutorial_02_classification_plugins.ipynb ├── tutorial_03_pipelines.ipynb ├── tutorial_04_interpretability.ipynb ├── tutorial_05_survival_analysis_plugins.ipynb └── tutorial_06_regression_plugins.ipynb /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Package release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | 8 | 9 | jobs: 10 | # Build for OSX and publish, see scripts/release_osx.sh. 11 | deploy_osx: 12 | runs-on: macos-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11", "3.12"] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | with: 20 | submodules: true 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: __token__ 28 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 29 | run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_osx.sh 30 | 31 | # Build for Linux and publish, see scripts/release_linux.sh. 32 | deploy_linux: 33 | strategy: 34 | matrix: 35 | python-version: 36 | - cp39-cp39 37 | - cp310-cp310 38 | - cp311-cp311 39 | - cp312-cp312 40 | 41 | runs-on: ubuntu-latest 42 | container: 43 | image: node:20-bullseye # Use the official Node.js 20 image based on Debian 44 | steps: 45 | - uses: actions/checkout@v3 46 | with: 47 | submodules: true 48 | - name: Set target Python version PATH 49 | run: | 50 | echo "/opt/python/${{ matrix.python-version }}/bin" >> $GITHUB_PATH 51 | - name: Install Python Build Dependencies 52 | run: | 53 | apt-get update 54 | apt-get install -y python3 python3-pip python3-dev build-essential 55 | - name: Build and publish 56 | env: 57 | TWINE_USERNAME: __token__ 58 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 59 | run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_linux.sh 60 | 61 | # Build for Windows and publish, see scripts/release_windows.bat. 62 | deploy_windows: 63 | runs-on: windows-latest 64 | strategy: 65 | matrix: 66 | python-version: ["3.9", "3.10", "3.11", "3.12"] 67 | 68 | steps: 69 | - uses: actions/checkout@v3 70 | with: 71 | submodules: true 72 | - name: Set up Python ${{ matrix.python-version }} 73 | uses: actions/setup-python@v5 74 | with: 75 | python-version: ${{ matrix.python-version }} 76 | - name: Build and publish 77 | env: 78 | TWINE_USERNAME: __token__ 79 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 80 | run: | 81 | ../../.github/workflows/scripts/release_windows.bat 82 | -------------------------------------------------------------------------------- /.github/workflows/scripts/release_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Update the package list 6 | apt-get update 7 | 8 | # Install necessary packages and build tools 9 | apt-get install -y \ 10 | software-properties-common \ 11 | python3 \ 12 | python3-dev \ 13 | python3-pip \ 14 | build-essential \ 15 | llvm \ 16 | clang \ 17 | lsb-release 18 | 19 | # Add the LLVM repository to get the latest version of LLVM (if needed) 20 | wget https://apt.llvm.org/llvm.sh 21 | chmod +x llvm.sh 22 | ./llvm.sh 14 # Replace 14 with the required version if necessary 23 | 24 | # Upgrade pip to the latest version 25 | python3 -m pip install --upgrade pip 26 | 27 | # Install Python packaging tools 28 | python3 -m pip install setuptools wheel twine auditwheel 29 | 30 | # Build Python wheels 31 | python3 -m pip wheel . -w dist/ --no-deps 32 | 33 | # Publish the built wheels to PyPI 34 | twine upload --verbose --skip-existing dist/* 35 | -------------------------------------------------------------------------------- /.github/workflows/scripts/release_osx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export MACOSX_DEPLOYMENT_TARGET=10.14 4 | 5 | python -m pip install --upgrade pip 6 | pip install setuptools wheel twine auditwheel 7 | 8 | python3 setup.py build bdist_wheel --plat-name macosx_10_14_x86_64 --dist-dir wheel 9 | twine upload --skip-existing wheel/* 10 | -------------------------------------------------------------------------------- /.github/workflows/scripts/release_windows.bat: -------------------------------------------------------------------------------- 1 | echo on 2 | 3 | python -m pip install --upgrade pip 4 | pip install setuptools wheel twine auditwheel 5 | 6 | pip wheel . -w wheel/ --no-deps 7 | twine upload --skip-existing wheel/* 8 | -------------------------------------------------------------------------------- /.github/workflows/test_R.yml: -------------------------------------------------------------------------------- 1 | name: Tests R 2 | 3 | on: 4 | workflow_dispatch: 5 | # push: 6 | # branches: [main, release] 7 | # schedule: 8 | # - cron: '2 3 * * 4' 9 | 10 | 11 | jobs: 12 | Library: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | r-version: ['4.2'] 17 | python-version: ['3.8'] 18 | os: [macos-latest, ubuntu-latest] 19 | steps: 20 | - uses: actions/checkout@v2 21 | with: 22 | submodules: true 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Set up R ${{ matrix.r-version }} 28 | uses: r-lib/actions/setup-r@v2 29 | with: 30 | r-version: ${{ matrix.r-version }} 31 | - name: Install depends 32 | run: | 33 | Rscript -e "install.packages(c('remotes','reticulate'))" 34 | - name: Test R 35 | run: | 36 | Rscript tests/bindings/R/test_classification.R 37 | Rscript tests/bindings/R/test_classification_with_missing_data.R 38 | Rscript tests/bindings/R/test_regression.R 39 | Rscript tests/bindings/R/test_survival_analysis.R 40 | -------------------------------------------------------------------------------- /.github/workflows/test_full.yml: -------------------------------------------------------------------------------- 1 | name: Tests Full Python 2 | 3 | on: 4 | schedule: 5 | - cron: '0 1 * * 1' 6 | workflow_dispatch: 7 | 8 | 9 | jobs: 10 | Library: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | python-version: ["3.9", "3.10", "3.11", "3.12"] 15 | os: [macos-latest, ubuntu-latest, windows-latest] 16 | steps: 17 | - uses: actions/checkout@v2 18 | with: 19 | submodules: true 20 | - uses: gautamkrishnar/keepalive-workflow@v1 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install MacOS dependencies 26 | run: | 27 | brew install libomp 28 | if: ${{ matrix.os == 'macos-latest' }} 29 | - name: Install dependencies 30 | run: | 31 | pip install --upgrade pip 32 | pip install .[dev] 33 | - name: Test with pytest 34 | run: pytest -vvsx --durations=50 35 | -------------------------------------------------------------------------------- /.github/workflows/test_pr.yml: -------------------------------------------------------------------------------- 1 | name: Tests Fast Python 2 | 3 | on: 4 | push: 5 | branches: [main, release] 6 | pull_request: 7 | types: [opened, synchronize, reopened] 8 | workflow_dispatch: 9 | 10 | 11 | jobs: 12 | Linter: 13 | # GH runners: 14 | runs-on: ${{ matrix.os }} 15 | # # Self-hosted runners: 16 | # runs-on: 17 | # - self-hosted 18 | # - ${{ matrix.os }} 19 | strategy: 20 | matrix: 21 | python-version: ["3.10"] 22 | # GH runners: 23 | os: [ubuntu-latest] 24 | # # Self-hosted runners: 25 | # os: [Linux] 26 | steps: 27 | - uses: actions/checkout@v2 28 | with: 29 | submodules: true 30 | - name: Set up Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | - name: Install dependencies 35 | run: | 36 | pip install bandit pre-commit 37 | - name: pre-commit validation 38 | run: pre-commit run --all 39 | - name: Security checks 40 | run: | 41 | bandit -r src/autoprognosis/plugins/* 42 | bandit -r src/autoprognosis/studies/* 43 | 44 | Library: 45 | needs: [Linter] 46 | # GH runners: 47 | runs-on: ${{ matrix.os }} 48 | # # Self-hosted runners: 49 | # runs-on: 50 | # - self-hosted 51 | # - ${{ matrix.os }} 52 | strategy: 53 | matrix: 54 | python-version: ["3.9", "3.10", "3.11", "3.12"] 55 | # GH runners: 56 | os: [macos-latest, ubuntu-latest, windows-latest] 57 | # # Self-hosted runners: 58 | # os: [Linux, Windows, macOS] 59 | steps: 60 | - uses: actions/checkout@v2 61 | with: 62 | submodules: true 63 | - uses: gautamkrishnar/keepalive-workflow@v1 64 | - name: Set up Python ${{ matrix.python-version }} 65 | uses: actions/setup-python@v5 66 | with: 67 | python-version: ${{ matrix.python-version }} 68 | - name: Install MacOS dependencies 69 | run: | 70 | brew install libomp 71 | if: ${{ matrix.os == 'macos-latest' }} 72 | - name: Install dependencies 73 | run: | 74 | pip install --upgrade pip 75 | pip install .[dev] 76 | - name: Test with pytest 77 | run: pytest -vvvsx -m "not slow" --durations=50 78 | -------------------------------------------------------------------------------- /.github/workflows/test_tutorials.yml: -------------------------------------------------------------------------------- 1 | name: Tutorials 2 | 3 | on: 4 | push: 5 | branches: [main, release] 6 | pull_request: 7 | types: [opened, synchronize, reopened] 8 | schedule: 9 | - cron: '1 3 * * 0' 10 | workflow_dispatch: 11 | 12 | jobs: 13 | Tutorials: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | python-version: ["3.9", "3.10", "3.11", "3.12"] 18 | os: [ubuntu-latest] 19 | steps: 20 | - uses: actions/checkout@v2 21 | with: 22 | submodules: true 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install MacOS dependencies 28 | run: | 29 | brew install rajivshah3/libomp-tap/libomp@11.1.0 30 | if: ${{ matrix.os == 'macos-latest' }} 31 | - name: Install dependencies 32 | run: | 33 | pip install --upgrade pip 34 | pip install .[dev] 35 | 36 | python -m pip install ipykernel 37 | python -m ipykernel install --user 38 | - name: Run the tutorials 39 | run: python scripts/nb_test.py --nb_dir tutorials/ 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary and binary files 2 | *~ 3 | workspace 4 | workspace* 5 | weight_checkpoint* 6 | *.py[cod] 7 | *.json 8 | *.so 9 | *.cfg 10 | !.isort.cfg 11 | !setup.cfg 12 | *.orig 13 | *.log 14 | *.pot 15 | __pycache__/* 16 | .cache/* 17 | .*.swp 18 | */.ipynb_checkpoints/* 19 | .DS_Store 20 | .ipynb_checkpoints 21 | tmp 22 | runs 23 | logs 24 | catboost_info 25 | *.p 26 | *.rdb 27 | *.gz 28 | *.gz.* 29 | *.dat 30 | 31 | # Project files 32 | .ropeproject 33 | .project 34 | .pydevproject 35 | .settings 36 | .idea 37 | .vscode 38 | tags 39 | 40 | # Package files 41 | *.egg 42 | *.eggs/ 43 | .installed.cfg 44 | *.egg-info 45 | *.csv 46 | *.gz 47 | 48 | # Unittest and coverage 49 | htmlcov/* 50 | .coverage 51 | .coverage.* 52 | .tox 53 | junit*.xml 54 | coverage.xml 55 | .pytest_cache/ 56 | 57 | # Build and docs folder/files 58 | build/* 59 | dist/* 60 | sdist/* 61 | docs/api/* 62 | docs/_rst/* 63 | docs/_build/* 64 | cover/* 65 | MANIFEST 66 | 67 | # Per-project virtualenvs 68 | .venv*/ 69 | .conda*/ 70 | datasets 71 | generated 72 | image_bin 73 | release 74 | 75 | # Other 76 | .dev 77 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: 'setup.py' 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v5.0.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: check-added-large-files 9 | - id: check-ast 10 | - id: check-json 11 | - id: check-merge-conflict 12 | - id: check-xml 13 | - id: check-yaml 14 | - id: debug-statements 15 | - id: check-executables-have-shebangs 16 | - id: end-of-file-fixer 17 | - id: requirements-txt-fixer 18 | - id: mixed-line-ending 19 | args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows 20 | 21 | - repo: https://github.com/astral-sh/ruff-pre-commit 22 | rev: v0.11.2 23 | hooks: 24 | - id: ruff 25 | types_or: [ python, pyi ] 26 | args: ["check", "--select", "I", "--fix"] 27 | files: "^src/" 28 | - id: ruff-format 29 | types_or: [ python, pyi ] 30 | files: "^src/" 31 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.9" 12 | apt_packages: 13 | - pandoc 14 | # ^ pandoc required by nbsphinx. 15 | 16 | # Build documentation in the docs/ directory with Sphinx 17 | sphinx: 18 | configuration: docs/conf.py 19 | 20 | # Optionally build your docs in additional formats such as PDF 21 | formats: 22 | - pdf 23 | 24 | python: 25 | install: 26 | - method: pip 27 | path: . 28 | extra_requirements: 29 | - docs 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/docs/arch.png -------------------------------------------------------------------------------- /docs/automl.rst: -------------------------------------------------------------------------------- 1 | AutoML studies 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Classification studies 9 | Regression studies 10 | Risk estimation studies 11 | -------------------------------------------------------------------------------- /docs/classifiers.rst: -------------------------------------------------------------------------------- 1 | Classifiers 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | AdaBoost 9 | Bagging 10 | Naive Bayes 11 | CatBoost 12 | Decision Trees 13 | ExtraTree classifier 14 | Gaussian Naive Bayes 15 | Gradient Boosting 16 | KNN 17 | LDA 18 | LGBM 19 | Linear SVM 20 | Logistic Regression 21 | Multinomial Naive Bayes 22 | Neural nets 23 | Perceptron 24 | QDA 25 | Random forest 26 | Ridge classifier 27 | TabNet 28 | XGBoost 29 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Classification studies 9 | Classification studies with imputation 10 | Classification studies with explainers 11 | Survival analysis studies 12 | Survival analysis studies with imputation 13 | Regression studies 14 | Multiple imputation studies 15 | -------------------------------------------------------------------------------- /docs/explainers.rst: -------------------------------------------------------------------------------- 1 | Explainability plugins 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | IVNASE 9 | Kernel SHAP 10 | LIME 11 | Risk Effect Size 12 | SHAP Permutation sampler 13 | Symbolic Pursuit 14 | -------------------------------------------------------------------------------- /docs/imputers.rst: -------------------------------------------------------------------------------- 1 | Imputation plugins 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | HyperImpute 9 | EM imputation 10 | GAIN imputation 11 | ICE imputation 12 | MICE imputation 13 | missForest 14 | SinkHorn imputation 15 | SoftImpute 16 | Mean imputation 17 | Median imputation 18 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. autoprognosis documentation master file, created by 2 | sphinx-quickstart on Thu Dec 15 13:02:37 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | AutoPrognosis documentation! 7 | ========================================= 8 | 9 | .. mdinclude:: README.md 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | :caption: Contents: 14 | 15 | Examples 16 | ========== 17 | .. toctree:: 18 | :glob: 19 | :maxdepth: 3 20 | 21 | examples.rst 22 | 23 | 24 | AutoML studies 25 | =============== 26 | .. toctree:: 27 | :glob: 28 | :maxdepth: 2 29 | 30 | automl.rst 31 | 32 | Imputation plugins 33 | =================== 34 | .. toctree:: 35 | :glob: 36 | :maxdepth: 2 37 | 38 | imputers.rst 39 | 40 | Preprocessing plugins 41 | ====================== 42 | .. toctree:: 43 | :glob: 44 | :maxdepth: 2 45 | 46 | preprocessing.rst 47 | 48 | Prediction plugins 49 | =================== 50 | .. toctree:: 51 | :glob: 52 | :maxdepth: 3 53 | 54 | prediction.rst 55 | 56 | Explainability plugins 57 | ======================= 58 | .. toctree:: 59 | :glob: 60 | :maxdepth: 3 61 | 62 | explainers.rst 63 | 64 | Benchmarks 65 | ============== 66 | .. toctree:: 67 | :glob: 68 | :maxdepth: 3 69 | 70 | Evaluation 71 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/prediction.rst: -------------------------------------------------------------------------------- 1 | Prediction plugins 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Classifiers 9 | Risk estimation 10 | Regression 11 | -------------------------------------------------------------------------------- /docs/preprocessing.rst: -------------------------------------------------------------------------------- 1 | Preprocessing plugins 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Data cleanup 9 | FastICA 10 | Feature agglomeration 11 | Gaussian Projection 12 | PCA 13 | Variance threshold 14 | Feature normalizer 15 | MaxAbs scaler 16 | MinMax scaler 17 | Standard scaler 18 | Normal transform 19 | Uniform transform 20 | -------------------------------------------------------------------------------- /docs/regression.rst: -------------------------------------------------------------------------------- 1 | Regression 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | Bayesian Ridge 9 | Catboost regressor 10 | k-neighbors regressor 11 | Linear regression 12 | Neural nets regressor 13 | Random forest regressor 14 | TabNet regressor 15 | XGBoost regressor 16 | -------------------------------------------------------------------------------- /docs/risk_estimation.rst: -------------------------------------------------------------------------------- 1 | Risk estimation 2 | ========================= 3 | 4 | .. toctree:: 5 | :glob: 6 | :maxdepth: 2 7 | 8 | CoxNet 9 | DeepHit 10 | LogLogistic AFT 11 | LogNormal AFT 12 | Survival XGBoost 13 | Weibull AFT 14 | -------------------------------------------------------------------------------- /docs/tutorials: -------------------------------------------------------------------------------- 1 | ../tutorials/ -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=46.1.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.pytest.ini_options] 6 | markers = 'slow: mark a test as slow.' 7 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/nb_test.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from pathlib import Path 3 | 4 | # third party 5 | import click 6 | import nbformat 7 | from nbconvert.preprocessors import ExecutePreprocessor 8 | 9 | workspace = Path(__file__).parents[0] / "workspace" 10 | workspace.mkdir(parents=True, exist_ok=True) 11 | 12 | 13 | def run_notebook(notebook_path: Path) -> None: 14 | with open(notebook_path) as f: 15 | nb = nbformat.read(f, as_version=4) 16 | 17 | proc = ExecutePreprocessor(timeout=1800) 18 | # Will raise on cell error 19 | proc.preprocess(nb, {"metadata": {"path": workspace}}) 20 | 21 | 22 | @click.command() 23 | @click.option("--nb_dir", type=str, default=".") 24 | def main(nb_dir: Path) -> None: 25 | nb_dir = Path(nb_dir) 26 | 27 | for p in nb_dir.rglob("*"): 28 | if "demonstrator" in str(p): 29 | print("Ignoring", p) 30 | continue 31 | 32 | if p.suffix != ".ipynb": 33 | continue 34 | if "checkpoint" in p.name: 35 | continue 36 | 37 | print("Testing ", p.name) 38 | try: 39 | run_notebook(p) 40 | except BaseException as e: 41 | print("FAIL", p.name, e) 42 | 43 | raise e 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /scripts/run_demonstrator.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import argparse 3 | from pathlib import Path 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.deploy.run import start_app_server 7 | 8 | 9 | def run(app: str) -> None: 10 | start_app_server(Path(app)) 11 | 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--app", type=str) 16 | 17 | args = parser.parse_args() 18 | 19 | run(args.app) 20 | -------------------------------------------------------------------------------- /scripts/studies/build_adj_biobank_cvd.sh: -------------------------------------------------------------------------------- 1 | python ./scripts/build_demonstrator.py \ 2 | --name "AutoPrognosis: UK Biobank CVD study" \ 3 | --model_path=./workspace/biobank_cvd/model.p \ 4 | --dataset_path=./workspace/biobank_cvd/biobank_cvd.csv \ 5 | --time_column=time_to_event \ 6 | --target_column=event \ 7 | --horizons="365, 730, 1095, 1460, 1825, 2190, 2555, 2920, 3285, 3650, 4015, 4380" \ 8 | --task_type=risk_estimation \ 9 | --explainers="kernel_shap" \ 10 | --extras=biobank_cvd \ 11 | --auth=True 12 | -------------------------------------------------------------------------------- /scripts/studies/build_adj_biobank_diabetes.sh: -------------------------------------------------------------------------------- 1 | python ./scripts/build_demonstrator.py \ 2 | --name "AutoPrognosis: UK Biobank Diabetes study" \ 3 | --model_path=./workspace/biobank_diabetes/model.p \ 4 | --dataset_path=./workspace/biobank_diabetes/biobank_diabetes.csv \ 5 | --time_column=time_to_event \ 6 | --target_column=event \ 7 | --horizons="365, 730, 1095, 1460, 1825, 2190, 2555, 2920, 3285, 3650, 4015, 4380" \ 8 | --task_type=risk_estimation \ 9 | --explainers="kernel_shap" \ 10 | --extras=biobank_diabetes \ 11 | --auth=True 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup file for autoprognosis. 3 | Use setup.cfg to configure your project. 4 | """ 5 | 6 | # stdlib 7 | import os 8 | import re 9 | 10 | # third party 11 | from setuptools import setup 12 | 13 | PKG_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | 15 | 16 | def read(fname: str) -> str: 17 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 18 | 19 | 20 | def find_version() -> str: 21 | version_file = read("src/autoprognosis/version.py") 22 | version_re = r"__version__ = \"(?P.+)\"" 23 | version_raw = re.match(version_re, version_file) 24 | 25 | if version_raw is None: 26 | return "0.0.1" 27 | 28 | version = version_raw.group("version") 29 | return version 30 | 31 | 32 | if __name__ == "__main__": 33 | try: 34 | setup( 35 | version=find_version(), 36 | ) 37 | except: # noqa 38 | print( 39 | "\n\nAn error occurred while building the project, " 40 | "please ensure you have the most updated version of setuptools, " 41 | "setuptools_scm and wheel with:\n" 42 | " pip install -U setuptools setuptools_scm wheel\n\n" 43 | ) 44 | raise 45 | -------------------------------------------------------------------------------- /src/autoprognosis/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import os 3 | import sys 4 | import warnings 5 | 6 | # third party 7 | import optuna 8 | 9 | # autoprognosis relative 10 | from . import logger # noqa: F401 11 | 12 | optuna.logging.set_verbosity(optuna.logging.FATAL) 13 | optuna.logging.disable_propagation() 14 | optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr. 15 | 16 | 17 | logger.add(sink=sys.stderr, level="CRITICAL") 18 | 19 | warnings.filterwarnings("ignore", category=DeprecationWarning) 20 | 21 | os.environ["OMP_NUM_THREADS"] = "2" 22 | os.environ["OPENBLAS_NUM_THREADS"] = "2" 23 | os.environ["MKL_NUM_THREADS"] = "2" 24 | os.environ["VECLIB_MAXIMUM_THREADS"] = "2" 25 | os.environ["NUMEXPR_NUM_THREADS"] = "2" 26 | -------------------------------------------------------------------------------- /src/autoprognosis/apps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/apps/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/common/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/apps/common/login.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import streamlit as st 3 | 4 | 5 | def is_authenticated(dummy): 6 | return dummy == "autoprognosis" 7 | 8 | 9 | def generate_login_block(): 10 | block1 = st.empty() 11 | block2 = st.empty() 12 | 13 | return block1, block2 14 | 15 | 16 | def clean_blocks(blocks): 17 | for block in blocks: 18 | block.empty() 19 | 20 | 21 | def login(blocks): 22 | blocks[0].markdown( 23 | """ 24 | 29 | """, 30 | unsafe_allow_html=True, 31 | ) 32 | 33 | return blocks[1].text_input("Password") 34 | -------------------------------------------------------------------------------- /src/autoprognosis/apps/extras/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/apps/extras/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/deploy/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/deploy/proto.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Callable, Optional 3 | 4 | from pydantic import BaseModel 5 | 6 | 7 | class BaseAppProto(BaseModel): 8 | name: str 9 | type: str 10 | dataset_path: str 11 | model_path: str 12 | explainers: list 13 | imputers: list 14 | plot_alternatives: list 15 | 16 | 17 | class NewRiskEstimationAppProto(BaseAppProto): 18 | time_column: str 19 | target_column: str 20 | horizons: list 21 | comparative_models: list 22 | extras_cbk: Optional[Callable] 23 | auth: bool = False 24 | 25 | 26 | class NewClassificationAppProto(BaseAppProto): 27 | target_column: str 28 | -------------------------------------------------------------------------------- /src/autoprognosis/deploy/utils.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import hashlib 3 | import shutil 4 | import socket 5 | from contextlib import closing 6 | from pathlib import Path 7 | 8 | import psutil 9 | 10 | 11 | def get_ports(pid: int) -> list: 12 | ports = [] 13 | p = psutil.Process(pid) 14 | for conn in p.connections(): 15 | if conn.status != "LISTEN": 16 | continue 17 | ports.append(conn.laddr.port) 18 | 19 | return ports 20 | 21 | 22 | def is_local_port_open(port: int) -> bool: 23 | host = "127.0.0.1" 24 | is_open = False 25 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: 26 | if sock.connect_ex((host, port)) == 0: 27 | is_open = True 28 | return is_open 29 | 30 | 31 | def file_copy(src: Path, dst: Path) -> None: 32 | shutil.copy(src, dst) 33 | 34 | 35 | def file_md5(fname: Path) -> str: 36 | hash_md5 = hashlib.md5() 37 | with open(fname, "rb") as f: 38 | for chunk in iter(lambda: f.read(4096), b""): 39 | hash_md5.update(chunk) 40 | return hash_md5.hexdigest() 41 | -------------------------------------------------------------------------------- /src/autoprognosis/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | class StudyCancelled(Exception): 2 | pass 3 | 4 | 5 | class BuildCancelled(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /src/autoprognosis/explorers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/explorers/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/core/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/explorers/core/defaults.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.preprocessors import Preprocessors 3 | 4 | default_classifiers_names = [ 5 | "random_forest", 6 | "xgboost", 7 | "catboost", 8 | "lgbm", 9 | "logistic_regression", 10 | ] 11 | default_regressors_names = [ 12 | "random_forest_regressor", 13 | "xgboost_regressor", 14 | "linear_regression", 15 | "catboost_regressor", 16 | ] 17 | 18 | default_imputers_names = ["mean", "ice", "missforest", "hyperimpute"] 19 | default_feature_scaling_names = Preprocessors( 20 | category="feature_scaling" 21 | ).list_available() 22 | default_feature_selection_names = ["nop", "pca", "fast_ica"] 23 | default_risk_estimation_names = [ 24 | "survival_xgboost", 25 | "loglogistic_aft", 26 | "deephit", 27 | "cox_ph", 28 | "weibull_aft", 29 | "lognormal_aft", 30 | "coxnet", 31 | ] 32 | 33 | percentile_val = 1.96 34 | -------------------------------------------------------------------------------- /src/autoprognosis/explorers/core/optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/explorers/core/optimizers/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # autoprognosis relative 2 | from .base import Hooks # noqa: F401 3 | from .default import DefaultHooks # noqa: F401 4 | -------------------------------------------------------------------------------- /src/autoprognosis/hooks/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from abc import ABCMeta, abstractmethod 3 | from typing import Any 4 | 5 | 6 | class Hooks(metaclass=ABCMeta): 7 | """AutoML hooks interface. 8 | 9 | Methods: 10 | - cancel: True/False if to stop the current AutoML search. 11 | - heartbeat: Metrics/logs sink from the AutoML search 12 | 13 | """ 14 | 15 | @abstractmethod 16 | def cancel(self) -> bool: ... 17 | 18 | @abstractmethod 19 | def heartbeat( 20 | self, topic: str, subtopic: str, event_type: str, **kwargs: Any 21 | ) -> None: ... 22 | 23 | @abstractmethod 24 | def finish(self) -> None: ... 25 | -------------------------------------------------------------------------------- /src/autoprognosis/hooks/default.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | # autoprognosis absolute 5 | import autoprognosis.logger as log 6 | 7 | # autoprognosis relative 8 | from .base import Hooks 9 | 10 | 11 | class DefaultHooks(Hooks): 12 | def cancel(self) -> bool: 13 | return False 14 | 15 | def heartbeat( 16 | self, topic: str, subtopic: str, event_type: str, **kwargs: Any 17 | ) -> None: 18 | log.debug(f"[{topic}][{subtopic}] {event_type}") 19 | 20 | def finish(self) -> None: 21 | pass 22 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/core/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/ensemble/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/ensemble/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/explainers/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | 8 | # autoprognosis relative 9 | from .base import ExplainerPlugin # noqa: F401,E402 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class Explainers(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, ExplainerPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "Explainers", 21 | "ExplainerPlugin", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/explainers/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from abc import ABCMeta, abstractmethod 3 | from typing import Optional 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | # third party 8 | import numpy as np 9 | import pandas as pd 10 | 11 | 12 | class ExplainerPlugin(metaclass=ABCMeta): 13 | def __init__(self, feature_names: list = []) -> None: 14 | self.feature_names = feature_names 15 | 16 | @staticmethod 17 | @abstractmethod 18 | def name() -> str: ... 19 | 20 | @staticmethod 21 | @abstractmethod 22 | def pretty_name() -> str: ... 23 | 24 | @staticmethod 25 | def type() -> str: 26 | return "explainer" 27 | 28 | @abstractmethod 29 | def explain(self, X: pd.DataFrame) -> pd.DataFrame: ... 30 | 31 | def plot( 32 | self, 33 | importances: pd.DataFrame, 34 | feature_names: Optional[list] = None, 35 | ) -> None: 36 | importances = np.asarray(importances) 37 | 38 | title = f"{self.name()} importance" 39 | axis_title = "Features" 40 | 41 | if not feature_names: 42 | feature_names = self.feature_names 43 | 44 | x_pos = np.arange(len(feature_names)) 45 | 46 | plt.figure(figsize=(20, 6)) 47 | plt.bar(x_pos, importances, align="center") 48 | plt.xticks(x_pos, feature_names, wrap=True) 49 | plt.xlabel(axis_title) 50 | plt.title(title) 51 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | 8 | # autoprognosis relative 9 | from .base import ImputerPlugin # noqa: F401,E402 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class Imputers(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, ImputerPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "Imputers", 21 | "ImputerPlugin", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.base_plugin as plugin 9 | import autoprognosis.plugins.utils.decorators as decorators 10 | from autoprognosis.utils.serialization import load_model, save_model 11 | 12 | 13 | class ImputerPlugin(plugin.Plugin): 14 | """Base class for the imputation plugins. 15 | 16 | It provides the implementation for plugin.Plugin.type() static method. 17 | 18 | Each derived class must implement the following methods(inherited from plugin.Plugin): 19 | name() - a static method that returns the name of the plugin. e.g., EM, mice, etc. 20 | hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects. 21 | _fit() - internal implementation, called by the `fit()` method. 22 | _transform() - internal implementation, called by the `transform()` method. 23 | 24 | If any method implementation is missing, the class constructor will fail. 25 | """ 26 | 27 | def __init__(self, model: Any) -> None: 28 | super().__init__() 29 | 30 | if not hasattr(model, "fit") or not hasattr(model, "transform"): 31 | raise RuntimeError("Invalid instance model type") 32 | 33 | self._model = model 34 | 35 | @staticmethod 36 | def type() -> str: 37 | return "imputer" 38 | 39 | @staticmethod 40 | def subtype() -> str: 41 | return "default" 42 | 43 | def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 44 | raise NotImplementedError( 45 | "Imputation plugins do not implement the 'predict' method" 46 | ) 47 | 48 | def _predict_proba(self, X: pd.DataFrame) -> pd.DataFrame: 49 | raise NotImplementedError( 50 | "Imputation plugins do not implement the 'predict_proba' method" 51 | ) 52 | 53 | @decorators.benchmark 54 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "ImputerPlugin": 55 | return self._model.fit(X, *args, **kwargs) 56 | 57 | @decorators.benchmark 58 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 59 | return self._model.transform(X) 60 | 61 | def save(self) -> bytes: 62 | return save_model(self) 63 | 64 | @classmethod 65 | def load(cls, buff: bytes) -> "ImputerPlugin": 66 | obj = load_model(buff) 67 | 68 | if not isinstance(obj, cls): 69 | raise RuntimeError("Invalid object type in buffer") 70 | 71 | return obj 72 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_EM.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_EM import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class EMPlugin(base.ImputerPlugin): 13 | """The EM algorithm is an optimization algorithm that assumes a distribution for the partially missing data and tries to maximize the expected complete data log-likelihood under that distribution. 14 | 15 | Steps: 16 | 1. For an input dataset X with missing values, we assume that the values are sampled from distribution N(Mu, Sigma). 17 | 2. We generate the "observed" and "missing" masks from X, and choose some initial values for Mu = Mu0 and Sigma = Sigma0. 18 | 3. The EM loop tries to approximate the (Mu, Sigma) pair by some iterative means under the conditional distribution of missing components. 19 | 4. The E step finds the conditional expectation of the "missing" data, given the observed values and current estimates of the parameters. These expectations are then substituted for the "missing" data. 20 | 5. In the M step, maximum likelihood estimates of the parameters are computed as though the missing data had been filled in. 21 | 6. The X_reconstructed contains the approximation after each iteration. 22 | 23 | Args: 24 | maxit: int, default=500 25 | maximum number of imputation rounds to perform. 26 | convergence_threshold : float, default=1e-08 27 | Minimum ration difference between iterations before stopping. 28 | random_state: int 29 | Random seed 30 | 31 | Paper: "Maximum Likelihood from Incomplete Data via the EM Algorithm", A. P. Dempster, N. M. Laird and D. B. Rubin 32 | 33 | Example: 34 | >>> import numpy as np 35 | >>> from autoprognosis.plugins.imputers import Imputers 36 | >>> plugin = Imputers().get("EM") 37 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 38 | """ 39 | 40 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 41 | model = base_model(random_state=random_state, **kwargs) 42 | 43 | super().__init__(model) 44 | 45 | @staticmethod 46 | def name() -> str: 47 | return base_model.name() 48 | 49 | @staticmethod 50 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 51 | return base_model.hyperparameter_space() 52 | 53 | 54 | plugin = EMPlugin 55 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_gain.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_gain import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class GainPlugin(base.ImputerPlugin): 13 | """GAIN Imputation for static data using Generative Adversarial Nets. 14 | The training steps are: 15 | - The generato imputes the missing components conditioned on what is actually observed, and outputs a completed vector. 16 | - The discriminator takes a completed vector and attempts to determine which components were actually observed and which were imputed. 17 | 18 | Args: 19 | 20 | batch_size: int 21 | The batch size for the training steps. 22 | n_epochs: int 23 | Number of epochs for training. 24 | hint_rate: float 25 | Percentage of additional information for the discriminator. 26 | loss_alpha: int 27 | Hyperparameter for the generator loss. 28 | 29 | Paper: J. Yoon, J. Jordon, M. van der Schaar, "GAIN: Missing Data Imputation using Generative Adversarial Nets, " ICML, 2018. 30 | Original code: https://github.com/jsyoon0823/GAIN 31 | 32 | 33 | Example: 34 | >>> import numpy as np 35 | >>> from autoprognosis.plugins.imputers import Imputers 36 | >>> plugin = Imputers().get("gain") 37 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 38 | """ 39 | 40 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 41 | model = base_model(random_state=random_state, **kwargs) 42 | 43 | super().__init__(model) 44 | 45 | @staticmethod 46 | def name() -> str: 47 | return base_model.name() 48 | 49 | @staticmethod 50 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 51 | return base_model.hyperparameter_space() 52 | 53 | 54 | plugin = GainPlugin 55 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_ice.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_sklearn_ice import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class IterativeChainedEquationsPlugin(base.ImputerPlugin): 13 | """Imputation plugin for completing missing values using the Multivariate Iterative chained equations Imputation strategy. 14 | 15 | Method: 16 | Multivariate Iterative chained equations(MICE) methods model each feature with missing values as a function of other features in a round-robin fashion. For each step of the round-robin imputation, we use a BayesianRidge estimator, which does a regularized linear regression. 17 | 18 | Args: 19 | max_iter: int, default=500 20 | maximum number of imputation rounds to perform. 21 | random_state: int, default set to the current time. 22 | seed of the pseudo random number generator to use. 23 | 24 | Example: 25 | >>> import numpy as np 26 | >>> from autoprognosis.plugins.imputers import Imputers 27 | >>> plugin = Imputers().get("ice") 28 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 29 | 0 1 2 3 30 | 0 1.000000 1.000000 1.000000 1.000000 31 | 1 1.333333 1.666667 1.666667 1.333333 32 | 2 1.000000 2.000000 2.000000 1.000000 33 | 3 2.000000 2.000000 2.000000 2.000000 34 | 35 | Reference: "mice: Multivariate Imputation by Chained Equations in R", Stef van Buuren, Karin Groothuis-Oudshoorn 36 | """ 37 | 38 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 39 | model = base_model(random_state=random_state, **kwargs) 40 | 41 | super().__init__(model) 42 | 43 | @staticmethod 44 | def name() -> str: 45 | return "ice" 46 | 47 | @staticmethod 48 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 49 | return base_model.hyperparameter_space() 50 | 51 | 52 | plugin = IterativeChainedEquationsPlugin 53 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_mean.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_mean import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class MeanPlugin(base.ImputerPlugin): 13 | """Imputation plugin for completing missing values using the Mean Imputation strategy. 14 | 15 | Method: 16 | The Mean Imputation strategy replaces the missing values using the mean along each column. 17 | 18 | Example: 19 | >>> import numpy as np 20 | >>> from autoprognosis.plugins.imputers import Imputers 21 | >>> plugin = Imputers().get("mean") 22 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 23 | 0 1 2 3 24 | 0 1.000000 1.000000 1.000000 1.000000 25 | 1 1.333333 1.666667 1.666667 1.333333 26 | 2 1.000000 2.000000 2.000000 1.000000 27 | 3 2.000000 2.000000 2.000000 2.000000 28 | """ 29 | 30 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 31 | model = base_model(random_state=random_state, **kwargs) 32 | 33 | super().__init__(model) 34 | 35 | @staticmethod 36 | def name() -> str: 37 | return base_model.name() 38 | 39 | @staticmethod 40 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 41 | return base_model.hyperparameter_space() 42 | 43 | 44 | plugin = MeanPlugin 45 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_median.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_median import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class MedianPlugin(base.ImputerPlugin): 13 | """Imputation plugin for completing missing values using the Median Imputation strategy. 14 | 15 | Method: 16 | The Median Imputation strategy replaces the missing values using the median along each column. 17 | 18 | Example: 19 | >>> import numpy as np 20 | >>> from autoprognosis.plugins.imputers import Imputers 21 | >>> plugin = Imputers().get("median") 22 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 23 | 0 1 2 3 24 | 0 1.0 1.0 1.0 1.0 25 | 1 1.0 2.0 2.0 1.0 26 | 2 1.0 2.0 2.0 1.0 27 | 3 2.0 2.0 2.0 2.0 28 | """ 29 | 30 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 31 | model = base_model(random_state=random_state, **kwargs) 32 | 33 | super().__init__(model) 34 | 35 | @staticmethod 36 | def name() -> str: 37 | return base_model.name() 38 | 39 | @staticmethod 40 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 41 | return base_model.hyperparameter_space() 42 | 43 | 44 | plugin = MedianPlugin 45 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_missforest.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_sklearn_missforest import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class MissForestPlugin(base.ImputerPlugin): 13 | """Imputation plugin for completing missing values using the MissForest strategy. 14 | 15 | Method: 16 | Iterative chained equations(ICE) methods model each feature with missing values as a function of other features in a round-robin fashion. For each step of the round-robin imputation, we use a ExtraTreesRegressor, which fits a number of randomized extra-trees and averages the results. 17 | 18 | Args: 19 | n_estimators: int, default=10 20 | The number of trees in the forest. 21 | max_iter: int, default=500 22 | maximum number of imputation rounds to perform. 23 | random_state: int, default set to the current time. 24 | seed of the pseudo random number generator to use. 25 | 26 | AutoPrognosis Hyperparameters: 27 | n_estimators: The number of trees in the forest. 28 | 29 | Example: 30 | >>> import numpy as np 31 | >>> from autoprognosis.plugins.imputers import Imputers 32 | >>> plugin = Imputers().get("missforest") 33 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 34 | 0 1 2 3 35 | 0 1.0 1.0 1.0 1.0 36 | 1 1.0 1.9 1.9 1.0 37 | 2 1.0 2.0 2.0 1.0 38 | 3 2.0 2.0 2.0 2.0 39 | """ 40 | 41 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 42 | model = base_model(random_state=random_state, **kwargs) 43 | 44 | super().__init__(model) 45 | 46 | @staticmethod 47 | def name() -> str: 48 | return "missforest" 49 | 50 | @staticmethod 51 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 52 | return base_model.hyperparameter_space() 53 | 54 | 55 | plugin = MissForestPlugin 56 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_most_frequent.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_most_frequent import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class MostFrequentPlugin(base.ImputerPlugin): 13 | """Imputation plugin for completing missing values using the Most Frequent Imputation strategy. 14 | 15 | Method: 16 | The Most Frequent Imputation strategy replaces the missing using the most frequent value along each column. 17 | 18 | Example: 19 | >>> import numpy as np 20 | >>> from autoprognosis.plugins.imputers import Imputers 21 | >>> plugin = Imputers().get("most_frequent") 22 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 23 | 0 1 2 3 24 | 0 1.0 1.0 1.0 1.0 25 | 1 1.0 2.0 2.0 1.0 26 | 2 1.0 2.0 2.0 1.0 27 | 3 2.0 2.0 2.0 2.0 28 | """ 29 | 30 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 31 | model = base_model(random_state=random_state, **kwargs) 32 | 33 | super().__init__(model) 34 | 35 | @staticmethod 36 | def name() -> str: 37 | return base_model.name() 38 | 39 | @staticmethod 40 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 41 | return base_model.hyperparameter_space() 42 | 43 | 44 | plugin = MostFrequentPlugin 45 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_nop.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_nop import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class NopPlugin(base.ImputerPlugin): 13 | """Imputer plugin that doesn't alter the dataset.""" 14 | 15 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 16 | model = base_model(random_state=random_state, **kwargs) 17 | 18 | super().__init__(model) 19 | 20 | @staticmethod 21 | def name() -> str: 22 | return base_model.name() 23 | 24 | @staticmethod 25 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 26 | return base_model.hyperparameter_space() 27 | 28 | 29 | plugin = NopPlugin 30 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_sinkhorn.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | from hyperimpute.plugins.imputers.plugin_sinkhorn import plugin as base_model 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.imputers.base as base 10 | 11 | 12 | class SinkhornPlugin(base.ImputerPlugin): 13 | """Sinkhorn imputation can be used to impute quantitative data and it relies on the idea that two batches extracted randomly from the same dataset should share the same distribution and consists in minimizing optimal transport distances between batches. 14 | 15 | Args: 16 | eps: float, default=0.01 17 | Sinkhorn regularization parameter. 18 | lr : float, default = 0.01 19 | Learning rate. 20 | opt: torch.nn.optim.Optimizer, default=torch.optim.Adam 21 | Optimizer class to use for fitting. 22 | n_epochs : int, default=15 23 | Number of gradient updates for each model within a cycle. 24 | batch_size : int, defatul=256 25 | Size of the batches on which the sinkhorn divergence is evaluated. 26 | n_pairs : int, default=10 27 | Number of batch pairs used per gradient update. 28 | noise : float, default = 0.1 29 | Noise used for the missing values initialization. 30 | scaling: float, default=0.9 31 | Scaling parameter in Sinkhorn iterations 32 | 33 | Example: 34 | >>> import numpy as np 35 | >>> from autoprognosis.plugins.imputers import Imputers 36 | >>> plugin = Imputers().get("sinkhorn") 37 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 38 | 0 1 2 3 39 | 0 1.000000 1.000000 1.000000 1.000000 40 | 1 1.404637 1.651113 1.651093 1.404638 41 | 2 1.000000 2.000000 2.000000 1.000000 42 | 3 2.000000 2.000000 2.000000 2.000000 43 | 44 | Reference: "Missing Data Imputation using Optimal Transport", Boris Muzellec, Julie Josse, Claire Boyer, Marco Cuturi 45 | Original code: https://github.com/BorisMuzellec/MissingDataOT 46 | """ 47 | 48 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 49 | model = base_model(random_state=random_state, **kwargs) 50 | 51 | super().__init__(model) 52 | 53 | @staticmethod 54 | def name() -> str: 55 | return base_model.name() 56 | 57 | @staticmethod 58 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 59 | return base_model.hyperparameter_space() 60 | 61 | 62 | plugin = SinkhornPlugin 63 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/imputers/plugin_softimpute.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | from hyperimpute.plugins.imputers.plugin_softimpute import plugin as base_model 5 | 6 | # autoprognosis absolute 7 | import autoprognosis.plugins.core.params as params 8 | import autoprognosis.plugins.imputers.base as base 9 | 10 | 11 | class SoftImputePlugin(base.ImputerPlugin): 12 | """The SoftImpute algorithm fits a low-rank matrix approximation to a matrix with missing values via nuclear- norm regularization. The algorithm can be used to impute quantitative data. 13 | To calibrate the the nuclear-norm regularization parameter(shrink_lambda), we perform cross- validation(_cv_softimpute) 14 | 15 | Args: 16 | maxit: int, default=500 17 | maximum number of imputation rounds to perform. 18 | convergence_threshold : float, default=1e-5 19 | Minimum ration difference between iterations before stopping. 20 | max_rank : int, default=2 21 | Perform a truncated SVD on each iteration with this value as its rank. 22 | shrink_lambda: float, default=0 23 | Value by which we shrink singular values on each iteration. If it's missing, it is calibrated using cross validation. 24 | cv_len: int, default=15 25 | the length of the grid on which the cross-validation is performed. 26 | 27 | Example: 28 | >>> import numpy as np 29 | >>> from autoprognosis.plugins.imputers import Imputers 30 | >>> plugin = Imputers().get("softimpute") 31 | >>> plugin.fit_transform([[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 2, 1], [2, 2, 2, 2]]) 32 | 0 1 2 3 33 | 0 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 34 | 1 3.820605e-16 1.708249e-16 1.708249e-16 3.820605e-16 35 | 2 1.000000e+00 2.000000e+00 2.000000e+00 1.000000e+00 36 | 3 2.000000e+00 2.000000e+00 2.000000e+00 2.000000e+00 37 | 38 | Reference: "Spectral Regularization Algorithms for Learning Large Incomplete Matrices", by Mazumder, Hastie, and Tibshirani. 39 | """ 40 | 41 | def __init__(self, random_state: int = 0, **kwargs: Any) -> None: 42 | model = base_model(random_state=random_state, **kwargs) 43 | 44 | super().__init__(model) 45 | 46 | @staticmethod 47 | def name() -> str: 48 | return base_model.name() 49 | 50 | @staticmethod 51 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 52 | return base_model.hyperparameter_space() 53 | 54 | 55 | plugin = SoftImputePlugin 56 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, Generator, List, Type, Union 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.prediction.classifiers import Classifiers 6 | from autoprognosis.plugins.prediction.regression import Regression 7 | from autoprognosis.plugins.prediction.risk_estimation import RiskEstimation 8 | 9 | # autoprognosis relative 10 | from .base import PredictionPlugin # noqa: F401,E402 11 | 12 | 13 | class Predictions: 14 | def __init__(self, category: str = "classifier") -> None: 15 | self._category = category 16 | 17 | self._plugins: Union[Classifiers, RiskEstimation, Regression] 18 | 19 | self.reload() 20 | 21 | def list(self) -> List[str]: 22 | return self._plugins.list() 23 | 24 | def list_available(self) -> List[str]: 25 | return self._plugins.list_available() 26 | 27 | def add(self, name: str, cls: Type) -> "Predictions": 28 | self._plugins.add(name, cls) 29 | 30 | return self 31 | 32 | def get(self, name: str, *args: Any, **kwargs: Any) -> PredictionPlugin: 33 | return self._plugins.get(name, *args, **kwargs) 34 | 35 | def get_type(self, name: str) -> Type: 36 | return self._plugins.get_type(name) 37 | 38 | def __iter__(self) -> Generator: 39 | for x in self._plugins: 40 | yield x 41 | 42 | def __len__(self) -> int: 43 | return len(self.list()) 44 | 45 | def __getitem__(self, key: str) -> PredictionPlugin: 46 | return self.get(key) 47 | 48 | def reload(self) -> "Predictions": 49 | if self._category == "classifier": 50 | self._plugins = Classifiers() 51 | elif self._category == "risk_estimation": 52 | self._plugins = RiskEstimation() 53 | elif self._category == "regression": 54 | self._plugins = Regression() 55 | else: 56 | raise ValueError(f"unsupported category {self._category}") 57 | 58 | return self 59 | 60 | 61 | __all__ = [ 62 | "Predictions", 63 | "PredictionPlugin", 64 | ] 65 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from abc import abstractmethod 3 | from typing import Any 4 | 5 | # third party 6 | import pandas as pd 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.logger as log 10 | import autoprognosis.plugins.core.base_plugin as plugin 11 | 12 | 13 | class PredictionPlugin(plugin.Plugin): 14 | """Base class for the prediction plugins. 15 | 16 | It provides the implementation for plugin.Plugin.type() static method. 17 | 18 | Each derived class must implement the following methods(inherited from plugin.Plugin): 19 | name() - a static method that returns the name of the plugin. 20 | hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `params.Params` derived objects. 21 | _fit() - internal implementation, called by the `fit` method. 22 | _predict() - internal implementation, called by the `predict` method. 23 | _predict_proba() - internal implementation, called by the `predict_proba` method. 24 | 25 | If any method implementation is missing, the class constructor will fail. 26 | """ 27 | 28 | def __init__(self) -> None: 29 | super().__init__() 30 | 31 | @staticmethod 32 | def type() -> str: 33 | return "prediction" 34 | 35 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 36 | raise NotImplementedError( 37 | "Prediction plugins do not implement the 'transform' method" 38 | ) 39 | 40 | def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float: 41 | raise NotImplementedError(f"Score not implemented for {self.name()}") 42 | 43 | def explain(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 44 | raise NotImplementedError(f"Explainer not implemented for {self.name()}") 45 | 46 | def predict_proba(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 47 | if not self.is_fitted(): 48 | raise RuntimeError("Fit the model first") 49 | 50 | log.debug(f"Predicting using {self.fqdn()}, input shape = {X.shape}") 51 | X = self._preprocess_inference_data(X) 52 | result = pd.DataFrame(self._predict_proba(X, *args, **kwargs)) 53 | 54 | return result 55 | 56 | @abstractmethod 57 | def _predict_proba( 58 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 59 | ) -> pd.DataFrame: ... 60 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | from autoprognosis.plugins.prediction.classifiers.base import ( # noqa: F401,E402 8 | ClassifierPlugin, 9 | ) 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class Classifiers(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, ClassifierPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "Classifiers", 21 | "ClassifierPlugin", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/classifiers/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.logger as log 9 | import autoprognosis.plugins.core.base_plugin as plugin 10 | import autoprognosis.plugins.prediction.base as prediction_base 11 | import autoprognosis.plugins.utils.cast as cast 12 | from autoprognosis.utils.tester import classifier_metrics 13 | 14 | 15 | class ClassifierPlugin(prediction_base.PredictionPlugin): 16 | """Base class for the classifier plugins. 17 | 18 | It provides the implementation for plugin.Plugin's subtype, _fit and _predict methods. 19 | 20 | Each derived class must implement the following methods(inherited from plugin.Plugin): 21 | name() - a static method that returns the name of the plugin. 22 | hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects. 23 | 24 | If any method implementation is missing, the class constructor will fail. 25 | """ 26 | 27 | def __init__(self, **kwargs: Any) -> None: 28 | self.args = kwargs 29 | 30 | super().__init__() 31 | 32 | @staticmethod 33 | def subtype() -> str: 34 | return "classifier" 35 | 36 | def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> plugin.Plugin: 37 | X = self._preprocess_training_data(X) 38 | 39 | log.debug(f"Training using {self.fqdn()}, input shape = {X.shape}") 40 | if len(args) == 0: 41 | raise RuntimeError("Training requires X, y") 42 | Y = cast.to_dataframe(args[0]).values.ravel() 43 | 44 | self._fit(X, Y, **kwargs) 45 | 46 | self._fitted = True 47 | log.debug(f"Done training using {self.fqdn()}, input shape = {X.shape}") 48 | 49 | return self 50 | 51 | def score(self, X: pd.DataFrame, y: pd.DataFrame, metric: str = "aucroc") -> float: 52 | ev = classifier_metrics() 53 | 54 | preds = self.predict_proba(X) 55 | return ev.score_proba(y, preds)[metric] 56 | 57 | def get_args(self) -> dict: 58 | return self.args 59 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/classifiers/helper_calibration.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | import sklearn 5 | 6 | # third party 7 | from packaging import version 8 | from sklearn.calibration import CalibratedClassifierCV 9 | 10 | # autoprognosis absolute 11 | from autoprognosis.utils.parallel import n_learner_jobs 12 | 13 | calibrations = ["none", "sigmoid", "isotonic"] 14 | 15 | 16 | def calibrated_model(model: Any, calibration: int = 1, **kwargs: Any) -> Any: 17 | if calibration >= len(calibrations): 18 | raise RuntimeError("invalid calibration value") 19 | 20 | if version.parse(sklearn.__version__) >= version.parse("1.2"): 21 | est_kwargs = { 22 | "estimator": model, 23 | } 24 | else: 25 | est_kwargs = { 26 | "base_estimator": model, 27 | } 28 | if not hasattr(model, "predict_proba"): 29 | return CalibratedClassifierCV(**est_kwargs, n_jobs=n_learner_jobs()) 30 | 31 | if calibration != 0: 32 | return CalibratedClassifierCV( 33 | **est_kwargs, 34 | method=calibrations[calibration], 35 | n_jobs=n_learner_jobs(), 36 | ) 37 | 38 | return model 39 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/regression/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | from autoprognosis.plugins.prediction.regression.base import ( # noqa: F401,E402 8 | RegressionPlugin, 9 | ) 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class Regression(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, RegressionPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "Regression", 21 | "RegressionPlugin", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/regression/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.logger as log 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.prediction.base as prediction_base 11 | 12 | 13 | class RegressionPlugin(prediction_base.PredictionPlugin): 14 | """Base class for the regression plugins. 15 | 16 | It provides the implementation for plugin.Plugin's subtype, _fit and _predict methods. 17 | 18 | Each derived class must implement the following methods(inherited from plugin.Plugin): 19 | name() - a static method that returns the name of the plugin. 20 | hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `Params` derived objects. 21 | 22 | If any method implementation is missing, the class constructor will fail. 23 | """ 24 | 25 | def __init__( 26 | self, 27 | **kwargs: Any, 28 | ) -> None: 29 | super().__init__() 30 | 31 | self.args = kwargs 32 | 33 | @staticmethod 34 | def subtype() -> str: 35 | return "regression" 36 | 37 | @staticmethod 38 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 39 | return [] 40 | 41 | def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "RegressionPlugin": 42 | if len(args) < 1: 43 | raise ValueError("Invalid input for fit. Expecting X and Y.") 44 | 45 | log.debug(f"Training using {self.fqdn()}, input shape = {X.shape}") 46 | X = self._preprocess_training_data(X) 47 | self._fit(X, *args, **kwargs) 48 | self._fitted = True 49 | log.debug(f"Done using {self.fqdn()}, input shape = {X.shape}") 50 | 51 | return self 52 | 53 | def _predict_proba( 54 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 55 | ) -> pd.DataFrame: 56 | raise NotImplementedError(f"Model {self.name()} doesn't support predict proba") 57 | 58 | def get_args(self) -> dict: 59 | return self.args 60 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/regression/plugin_linear_regression.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.linear_model import LinearRegression 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.prediction.regression.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | from autoprognosis.utils.parallel import n_learner_jobs 13 | 14 | 15 | class LinearRegressionPlugin(base.RegressionPlugin): 16 | """Regression plugin based on the Linear Regression. 17 | 18 | Example: 19 | >>> from autoprognosis.plugins.prediction import Predictions 20 | >>> plugin = Predictions(category="regression").get("linear_regression") 21 | >>> from sklearn.datasets import load_iris 22 | >>> X, y = load_iris(return_X_y=True) 23 | >>> plugin.fit_predict(X, y) # returns the probabilities for each class 24 | """ 25 | 26 | solvers = ["auto", "cholesky", "lsqr", "sparse_cg", "sag", "saga"] 27 | 28 | def __init__(self, model: Any = None, random_state: int = 0, **kwargs: Any) -> None: 29 | super().__init__(**kwargs) 30 | if model is not None: 31 | self.model = model 32 | return 33 | 34 | self.model = LinearRegression( 35 | n_jobs=n_learner_jobs(), 36 | ) 37 | 38 | @staticmethod 39 | def name() -> str: 40 | return "linear_regression" 41 | 42 | @staticmethod 43 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 44 | return [ 45 | params.Categorical("max_iter", [100, 1000, 10000]), 46 | params.Integer("solver", 0, len(LinearRegressionPlugin.solvers) - 1), 47 | ] 48 | 49 | def _fit( 50 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 51 | ) -> "LinearRegressionPlugin": 52 | self.model.fit(X, *args, **kwargs) 53 | return self 54 | 55 | def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 56 | return self.model.predict(X, *args, **kwargs) 57 | 58 | def save(self) -> bytes: 59 | return serialization.save_model(self.model) 60 | 61 | @classmethod 62 | def load(cls, buff: bytes) -> "LinearRegressionPlugin": 63 | model = serialization.load_model(buff) 64 | 65 | return cls(model=model) 66 | 67 | 68 | plugin = LinearRegressionPlugin 69 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/regression/plugin_mlp_regressor.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.neural_network import MLPRegressor 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.prediction.regression.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class MLPRegressionPlugin(base.RegressionPlugin): 15 | """Regression plugin based on the MLP Regression classifier. 16 | 17 | Example: 18 | >>> from autoprognosis.plugins.prediction import Predictions 19 | >>> plugin = Predictions(category="regression").get("mlp_regressor") 20 | >>> from sklearn.datasets import load_iris 21 | >>> X, y = load_iris(return_X_y=True) 22 | >>> plugin.fit_predict(X, y) # returns the probabilities for each class 23 | """ 24 | 25 | def __init__(self, model: Any = None, random_state: int = 0, **kwargs: Any) -> None: 26 | super().__init__(**kwargs) 27 | if model is not None: 28 | self.model = model 29 | return 30 | 31 | self.model = MLPRegressor(max_iter=500, random_state=random_state) 32 | 33 | @staticmethod 34 | def name() -> str: 35 | return "mlp_regressor" 36 | 37 | @staticmethod 38 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 39 | return [] 40 | 41 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MLPRegressionPlugin": 42 | self.model.fit(X, *args, **kwargs) 43 | return self 44 | 45 | def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 46 | return self.model.predict(X, *args, **kwargs) 47 | 48 | def save(self) -> bytes: 49 | return serialization.save_model(self.model) 50 | 51 | @classmethod 52 | def load(cls, buff: bytes) -> "MLPRegressionPlugin": 53 | model = serialization.load_model(buff) 54 | 55 | return cls(model=model) 56 | 57 | 58 | plugin = MLPRegressionPlugin 59 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | from autoprognosis.plugins.prediction.risk_estimation.base import ( # noqa: F401,E402 8 | RiskEstimationPlugin, 9 | ) 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class RiskEstimation(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, RiskEstimationPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "RiskEstimation", 21 | "RiskEstimationPlugin", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/aha/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/aha/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/framingham/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/framingham/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/qrisk3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/cvd/qrisk3/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/ada/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/ada/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/diabetes_uk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/diabetes_uk/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/finrisk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/finrisk/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/qdiabetes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/diabetes/qdiabetes/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/prostate_cancer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/prediction/risk_estimation/benchmarks/prostate_cancer/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/prediction/risk_estimation/helper_lifelines.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | # third party 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | class LifelinesWrapper: 10 | def __init__(self, model: Any, **kwargs: Any) -> None: 11 | self.model = model 12 | 13 | def fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "LifelinesWrapper": 14 | if len(args) < 2: 15 | raise ValueError("Invalid input for fit. Expecting X, T and Y.") 16 | 17 | T = args[0] 18 | Y = args[1] 19 | 20 | X = X.reset_index(drop=True) 21 | T = T.reset_index(drop=True) 22 | Y = Y.reset_index(drop=True) 23 | 24 | df = pd.concat([X, T, Y], axis=1) 25 | df.columns = [x for x in X.columns] + ["time", "label"] 26 | 27 | self.model.fit(df, duration_col="time", event_col="label", **kwargs) 28 | 29 | return self 30 | 31 | def predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 32 | if len(args) < 1: 33 | raise ValueError("Invalid input for predict. Expecting X and time horizon.") 34 | 35 | time_horizons = args[0] 36 | 37 | chunks = int(len(X) / 1024) + 1 38 | 39 | preds_ = [] 40 | for chunk in np.array_split(X, chunks): 41 | local_preds_ = np.zeros([len(chunk), len(time_horizons)]) 42 | surv = self.model.predict_survival_function(chunk) 43 | surv_times = np.asarray(surv.index).astype(int) 44 | surv = np.asarray(surv.T) 45 | 46 | for t, eval_time in enumerate(time_horizons): 47 | tmp_time = np.where(eval_time <= surv_times)[0] 48 | if len(tmp_time) == 0: 49 | local_preds_[:, t] = 1.0 - surv[:, 0] 50 | else: 51 | local_preds_[:, t] = 1.0 - surv[:, tmp_time[0]] 52 | 53 | preds_.append(local_preds_) 54 | 55 | return np.concatenate(preds_, axis=0) 56 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | 8 | # autoprognosis relative 9 | from .base import PreprocessorPlugin # noqa: F401,E402 10 | 11 | feature_scaling_plugins = glob.glob( 12 | join(dirname(__file__), "feature_scaling/plugin*.py") 13 | ) 14 | dim_reduction_plugins = glob.glob( 15 | join(dirname(__file__), "dimensionality_reduction/plugin*.py") 16 | ) 17 | 18 | 19 | class Preprocessors(PluginLoader): 20 | def __init__(self, category: str = "feature_scaling") -> None: 21 | if category not in ["feature_scaling", "dimensionality_reduction"]: 22 | raise RuntimeError("Invalid preprocessing category") 23 | 24 | self.category = category 25 | if category == "feature_scaling": 26 | plugins = feature_scaling_plugins 27 | elif category == "dimensionality_reduction": 28 | plugins = dim_reduction_plugins 29 | 30 | super().__init__(plugins, PreprocessorPlugin) 31 | 32 | 33 | __all__ = ( 34 | [basename(f)[:-3] for f in feature_scaling_plugins if isfile(f)] 35 | + [basename(f)[:-3] for f in dim_reduction_plugins if isfile(f)] 36 | + [ 37 | "Preprocessors", 38 | "PreprocessorPlugin", 39 | ] 40 | ) 41 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, Tuple 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.base_plugin as plugin 9 | 10 | 11 | class PreprocessorPlugin(plugin.Plugin): 12 | """Base class for the preprocessing plugins. 13 | 14 | It provides the implementation for plugin.Plugin.type() static method. 15 | 16 | Each derived class must implement the following methods(inherited from plugin.Plugin): 17 | name() - a static method that returns the name of the plugin. 18 | hyperparameter_space() - a static method that returns the hyperparameters that can be tuned during the optimization. The method will return a list of `params.Params` derived objects. 19 | _fit() - internal implementation, called by the `fit` method. 20 | _transform() - internal implementation, called by the `transform` method. 21 | 22 | If any method implementation is missing, the class constructor will fail. 23 | """ 24 | 25 | def __init__(self) -> None: 26 | super().__init__() 27 | 28 | @staticmethod 29 | def type() -> str: 30 | return "preprocessor" 31 | 32 | @staticmethod 33 | def components_interval(*args: Any, **kwargs: Any) -> Tuple[int, int]: 34 | if "features_count" not in kwargs: 35 | raise ValueError( 36 | "invalid arguments for hyperparameter_space. Expecting 'features_count' value" 37 | ) 38 | 39 | feature_count = kwargs.get("features_count", 0) 40 | 41 | if feature_count == 0: 42 | raise ValueError("invalid value for 'features_count'") 43 | 44 | return (1, feature_count) 45 | 46 | def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 47 | raise NotImplementedError( 48 | "Preprocessing plugins do not implement the 'predict' method" 49 | ) 50 | 51 | def _predict_proba(self, X: pd.DataFrame) -> pd.DataFrame: 52 | raise NotImplementedError( 53 | "Preprocessing plugins do not implement the 'predict_proba' method" 54 | ) 55 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/dimensionality_reduction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/preprocessors/dimensionality_reduction/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/dimensionality_reduction/plugin_feature_agglomeration.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.cluster import FeatureAgglomeration 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class FeatureAgglomerationPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for dimensionality reduction based on Feature Agglomeration algorithm. 16 | 17 | Method: 18 | FeatureAgglomeration uses agglomerative clustering to group together features that look very similar, thus decreasing the number of features. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html 22 | 23 | Args: 24 | n_clusters: int 25 | Number of clusters to find. 26 | 27 | Example: 28 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 29 | >>> plugin = Preprocessors(category="dimensionality_reduction").get("feature_agglomeration") 30 | >>> from sklearn.datasets import load_iris 31 | >>> X, y = load_iris(return_X_y=True) 32 | >>> plugin.fit_transform(X, y) 33 | """ 34 | 35 | def __init__( 36 | self, model: Any = None, random_state: int = 0, n_clusters: int = 2 37 | ) -> None: 38 | super().__init__() 39 | if model: 40 | self.model = model 41 | return 42 | self.model = FeatureAgglomeration(n_clusters=n_clusters) 43 | 44 | @staticmethod 45 | def name() -> str: 46 | return "feature_agglomeration" 47 | 48 | @staticmethod 49 | def subtype() -> str: 50 | return "dimensionality_reduction" 51 | 52 | @staticmethod 53 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 54 | cmin, cmax = base.PreprocessorPlugin.components_interval(*args, **kwargs) 55 | return [params.Integer("n_clusters", cmin, cmax)] 56 | 57 | def _fit( 58 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 59 | ) -> "FeatureAgglomerationPlugin": 60 | self.model.fit(X, *args, **kwargs) 61 | return self 62 | 63 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 64 | return self.model.transform(X) 65 | 66 | def save(self) -> bytes: 67 | return serialization.save_model(self.model) 68 | 69 | @classmethod 70 | def load(cls, buff: bytes) -> "FeatureAgglomerationPlugin": 71 | model = serialization.load_model(buff) 72 | return cls(model=model) 73 | 74 | 75 | plugin = FeatureAgglomerationPlugin 76 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/dimensionality_reduction/plugin_nop.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.preprocessors.base as base 10 | 11 | 12 | class NopPlugin(base.PreprocessorPlugin): 13 | """Preprocessing plugin that doesn't alter the dataset.""" 14 | 15 | def __init__( 16 | self, 17 | random_state: int = 0, 18 | ) -> None: 19 | super().__init__() 20 | 21 | @staticmethod 22 | def name() -> str: 23 | return "nop" 24 | 25 | @staticmethod 26 | def subtype() -> str: 27 | return "dimensionality_reduction" 28 | 29 | @staticmethod 30 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 31 | return [] 32 | 33 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "NopPlugin": 34 | return self 35 | 36 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 37 | return X 38 | 39 | def save(self) -> bytes: 40 | return b"" 41 | 42 | @classmethod 43 | def load(cls, buff: bytes) -> "NopPlugin": 44 | return cls() 45 | 46 | 47 | plugin = NopPlugin 48 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/plugins/preprocessors/feature_scaling/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_feature_normalizer.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.preprocessing import Normalizer 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class FeatureNormalizerPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for sample normalization based on L2 normalization. 16 | 17 | Method: 18 | Normalization is the process of scaling individual samples to have unit norm. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html 22 | 23 | Example: 24 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 25 | >>> plugin = Preprocessors().get("feature_normalizer") 26 | >>> from sklearn.datasets import load_iris 27 | >>> X, y = load_iris(return_X_y=True) 28 | >>> plugin.fit_transform(X, y) 29 | """ 30 | 31 | def __init__(self, random_state: int = 0, model: Any = None) -> None: 32 | super().__init__() 33 | if model: 34 | self.model = model 35 | return 36 | self.model = Normalizer() 37 | 38 | @staticmethod 39 | def name() -> str: 40 | return "feature_normalizer" 41 | 42 | @staticmethod 43 | def subtype() -> str: 44 | return "feature_scaling" 45 | 46 | @staticmethod 47 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 48 | return [] 49 | 50 | def _fit( 51 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 52 | ) -> "FeatureNormalizerPlugin": 53 | self.model.fit(X, *args, **kwargs) 54 | 55 | return self 56 | 57 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 58 | return self.model.transform(X) 59 | 60 | def save(self) -> bytes: 61 | return serialization.save_model(self.model) 62 | 63 | @classmethod 64 | def load(cls, buff: bytes) -> "FeatureNormalizerPlugin": 65 | model = serialization.load_model(buff) 66 | return cls(model=model) 67 | 68 | 69 | plugin = FeatureNormalizerPlugin 70 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_maxabs_scaler.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.preprocessing import MaxAbsScaler 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class MaxAbsScalerPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for feature scaling based on maximum absolute value. 16 | 17 | Method: 18 | The MaxAbs estimator scales and translates each feature individually such that the maximal absolute value of each feature in the training set will be 1.0. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html 22 | 23 | Example: 24 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 25 | >>> plugin = Preprocessors().get("maxabs_scaler") 26 | >>> from sklearn.datasets import load_iris 27 | >>> X, y = load_iris(return_X_y=True) 28 | >>> plugin.fit_transform(X, y) 29 | """ 30 | 31 | def __init__(self, random_state: int = 0, model: Any = None) -> None: 32 | super().__init__() 33 | if model: 34 | self.model = model 35 | return 36 | self.model = MaxAbsScaler() 37 | 38 | @staticmethod 39 | def name() -> str: 40 | return "maxabs_scaler" 41 | 42 | @staticmethod 43 | def subtype() -> str: 44 | return "feature_scaling" 45 | 46 | @staticmethod 47 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 48 | return [] 49 | 50 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MaxAbsScalerPlugin": 51 | self.model.fit(X) 52 | 53 | return self 54 | 55 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 56 | return self.model.transform(X) 57 | 58 | def save(self) -> bytes: 59 | return serialization.save_model(self.model) 60 | 61 | @classmethod 62 | def load(cls, buff: bytes) -> "MaxAbsScalerPlugin": 63 | model = serialization.load_model(buff) 64 | return cls(model=model) 65 | 66 | 67 | plugin = MaxAbsScalerPlugin 68 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_minmax_scaler.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class MinMaxScalerPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for feature scaling to a given range. 16 | 17 | Method: 18 | The MinMax estimator scales and translates each feature individually such that it is in the given range on the training set, e.g. between zero and one. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html 22 | 23 | Example: 24 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 25 | >>> plugin = Preprocessors().get("minmax_scaler") 26 | >>> from sklearn.datasets import load_iris 27 | >>> X, y = load_iris(return_X_y=True) 28 | >>> plugin.fit_transform(X, y) 29 | """ 30 | 31 | def __init__(self, random_state: int = 0, model: Any = None) -> None: 32 | super().__init__() 33 | if model: 34 | self.model = model 35 | return 36 | self.model = MinMaxScaler() 37 | 38 | @staticmethod 39 | def name() -> str: 40 | return "minmax_scaler" 41 | 42 | @staticmethod 43 | def subtype() -> str: 44 | return "feature_scaling" 45 | 46 | @staticmethod 47 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 48 | return [] 49 | 50 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "MinMaxScalerPlugin": 51 | self.model.fit(X) 52 | 53 | return self 54 | 55 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 56 | return self.model.transform(X) 57 | 58 | def save(self) -> bytes: 59 | return serialization.save_model(self.model) 60 | 61 | @classmethod 62 | def load(cls, buff: bytes) -> "MinMaxScalerPlugin": 63 | model = serialization.load_model(buff) 64 | return cls(model=model) 65 | 66 | 67 | plugin = MinMaxScalerPlugin 68 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_nop.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | import autoprognosis.plugins.preprocessors.base as base 10 | 11 | 12 | class NopPlugin(base.PreprocessorPlugin): 13 | """Preprocessing plugin that doesn't alter the dataset.""" 14 | 15 | def __init__( 16 | self, 17 | random_state: int = 0, 18 | ) -> None: 19 | super().__init__() 20 | 21 | @staticmethod 22 | def name() -> str: 23 | return "nop" 24 | 25 | @staticmethod 26 | def subtype() -> str: 27 | return "feature_scaling" 28 | 29 | @staticmethod 30 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 31 | return [] 32 | 33 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "NopPlugin": 34 | return self 35 | 36 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 37 | return X 38 | 39 | def save(self) -> bytes: 40 | return b"" 41 | 42 | @classmethod 43 | def load(cls, buff: bytes) -> "NopPlugin": 44 | return cls() 45 | 46 | 47 | plugin = NopPlugin 48 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_scaler.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class ScalerPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for feature scaling based on StandardScaler implementation. 16 | 17 | Method: 18 | The Scaler plugin standardizes the features by removing the mean and scaling to unit variance. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html 22 | 23 | Example: 24 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 25 | >>> plugin = Preprocessors().get("scaler") 26 | >>> from sklearn.datasets import load_iris 27 | >>> X, y = load_iris(return_X_y=True) 28 | >>> plugin.fit_transform(X, y) 29 | """ 30 | 31 | def __init__(self, random_state: int = 0, model: Any = None) -> None: 32 | super().__init__() 33 | if model: 34 | self.model = model 35 | return 36 | self.model = StandardScaler() 37 | 38 | @staticmethod 39 | def name() -> str: 40 | return "scaler" 41 | 42 | @staticmethod 43 | def subtype() -> str: 44 | return "feature_scaling" 45 | 46 | @staticmethod 47 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 48 | return [] 49 | 50 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "ScalerPlugin": 51 | self.model.fit(X, *args, **kwargs) 52 | 53 | return self 54 | 55 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 56 | return self.model.transform(X) 57 | 58 | def save(self) -> bytes: 59 | return serialization.save_model(self.model) 60 | 61 | @classmethod 62 | def load(cls, buff: bytes) -> "ScalerPlugin": 63 | model = serialization.load_model(buff) 64 | return cls(model=model) 65 | 66 | 67 | plugin = ScalerPlugin 68 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/preprocessors/feature_scaling/plugin_uniform_transform.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | from sklearn.preprocessing import QuantileTransformer 7 | 8 | # autoprognosis absolute 9 | import autoprognosis.plugins.core.params as params 10 | import autoprognosis.plugins.preprocessors.base as base 11 | import autoprognosis.utils.serialization as serialization 12 | 13 | 14 | class UniformTransformPlugin(base.PreprocessorPlugin): 15 | """Preprocessing plugin for feature scaling based on quantile information. 16 | 17 | Method: 18 | This method transforms the features to follow a uniform distribution. Therefore, for a given feature, this transformation tends to spread out the most frequent values. 19 | 20 | Reference: 21 | https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html 22 | 23 | Example: 24 | >>> from autoprognosis.plugins.preprocessors import Preprocessors 25 | >>> plugin = Preprocessors().get("uniform_transform") 26 | >>> from sklearn.datasets import load_iris 27 | >>> X, y = load_iris(return_X_y=True) 28 | >>> plugin.fit_transform(X, y) 29 | """ 30 | 31 | def __init__( 32 | self, random_state: int = 0, n_quantiles: int = 100, model: Any = None 33 | ) -> None: 34 | super().__init__() 35 | if model: 36 | self.model = model 37 | return 38 | self.model = QuantileTransformer( 39 | n_quantiles=n_quantiles, random_state=random_state 40 | ) 41 | 42 | @staticmethod 43 | def name() -> str: 44 | return "uniform_transform" 45 | 46 | @staticmethod 47 | def subtype() -> str: 48 | return "feature_scaling" 49 | 50 | @staticmethod 51 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 52 | return [] 53 | 54 | def _fit( 55 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 56 | ) -> "UniformTransformPlugin": 57 | self.model.fit(X) 58 | 59 | return self 60 | 61 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 62 | return self.model.transform(X) 63 | 64 | def save(self) -> bytes: 65 | return serialization.save_model(self.model) 66 | 67 | @classmethod 68 | def load(cls, buff: bytes) -> "UniformTransformPlugin": 69 | model = serialization.load_model(buff) 70 | return cls(model=model) 71 | 72 | 73 | plugin = UniformTransformPlugin 74 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/uncertainty/__init__.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import glob 3 | from os.path import basename, dirname, isfile, join 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.core.base_plugin import PluginLoader 7 | 8 | # autoprognosis relative 9 | from .base import UncertaintyPlugin # noqa: F401,E402 10 | 11 | plugins = glob.glob(join(dirname(__file__), "plugin*.py")) 12 | 13 | 14 | class UncertaintyQuantification(PluginLoader): 15 | def __init__(self) -> None: 16 | super().__init__(plugins, UncertaintyPlugin) 17 | 18 | 19 | __all__ = [basename(f)[:-3] for f in plugins if isfile(f)] + [ 20 | "UncertaintyPlugin", 21 | "UncertaintyQuantification", 22 | ] 23 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/uncertainty/base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from abc import ABCMeta, abstractmethod 3 | from typing import Any 4 | 5 | # third party 6 | import pandas as pd 7 | 8 | 9 | class UncertaintyPlugin(metaclass=ABCMeta): 10 | def __init__(self, model: Any) -> None: 11 | self.model = model 12 | 13 | @staticmethod 14 | @abstractmethod 15 | def name() -> str: ... 16 | 17 | @staticmethod 18 | def type() -> str: 19 | return "uncertainty_quantification" 20 | 21 | @abstractmethod 22 | def fit(self, *args: Any, **kwargs: Any) -> "UncertaintyPlugin": ... 23 | 24 | @abstractmethod 25 | def predict(self, *args: Any, **kwargs: Any) -> pd.DataFrame: ... 26 | 27 | @abstractmethod 28 | def predict_proba(self, *args: Any, **kwargs: Any) -> pd.DataFrame: ... 29 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # autoprognosis relative 2 | from . import ( 3 | cast, # noqa: F401,E402 4 | metrics, # noqa: F401,E402 5 | simulate, # noqa: F401,E402 6 | ) 7 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/utils/cast.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any 3 | 4 | # third party 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | def to_dataframe(X: Any) -> pd.DataFrame: 10 | """Helper for casting arguments to `pandas.DataFrame`. 11 | 12 | Args: 13 | X: the object to cast. 14 | 15 | Returns: 16 | pd.DataFrame: the converted DataFrame. 17 | 18 | Raises: 19 | ValueError: if the argument cannot be converted to a DataFrame. 20 | """ 21 | if isinstance(X, pd.DataFrame): 22 | return X 23 | elif isinstance(X, (list, np.ndarray, pd.core.series.Series)): 24 | return pd.DataFrame(X) 25 | 26 | raise ValueError( 27 | f"unsupported data type {type(X)}. Try list, pandas.DataFrame or numpy.ndarray" 28 | ) 29 | 30 | 31 | def to_ndarray(X: Any) -> np.ndarray: 32 | """Helper for casting arguments to `numpy.ndarray`. 33 | 34 | Args: 35 | X: the object to cast. 36 | 37 | Returns: 38 | pd.DataFrame: the converted ndarray. 39 | 40 | Raises: 41 | ValueError: if the argument cannot be converted to a ndarray. 42 | """ 43 | if isinstance(X, np.ndarray): 44 | return X 45 | elif isinstance(X, (list, pd.DataFrame, pd.core.series.Series)): 46 | return np.array(X) 47 | 48 | raise ValueError( 49 | f"unsupported data type {type(X)}. Try list, pandas.DataFrame or numpy.ndarray" 50 | ) 51 | 52 | 53 | __all__ = [ 54 | "to_dataframe", 55 | "to_ndarray", 56 | ] 57 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/utils/decorators.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import time 3 | from typing import Any, Callable, Type 4 | 5 | # third party 6 | import numpy as np 7 | import pandas as pd 8 | 9 | # autoprognosis absolute 10 | import autoprognosis.logger as log 11 | 12 | 13 | def expect_type_for(idx: int, dtype: Type) -> Callable: 14 | """Decorator used for argument type checking. 15 | 16 | Args: 17 | idx: which argument should be validated. 18 | dtype: expected data type. 19 | 20 | Returns: 21 | Callable: the decorator 22 | """ 23 | 24 | def expect_type(func: Callable) -> Callable: 25 | def wrapper(*args: Any, **kwargs: Any) -> Any: 26 | if len(args) <= idx: 27 | raise ValueError("expected parameter out of range.") 28 | if not isinstance(args[idx], dtype): 29 | err = f"unsupported data type {type(args[idx])} for args[{idx}]. Expecting {dtype}" 30 | log.critical(err) 31 | raise ValueError(err) 32 | 33 | return func(*args, **kwargs) 34 | 35 | return wrapper 36 | 37 | return expect_type 38 | 39 | 40 | def expect_ndarray_for(idx: int) -> Callable: 41 | return expect_type_for(idx, np.ndarray) 42 | 43 | 44 | def expect_dataframe_for(idx: int) -> Callable: 45 | return expect_type_for(idx, pd.DataFrame) 46 | 47 | 48 | def benchmark(func: Callable) -> Callable: 49 | """Decorator used for function duration benchmarking. It is active only with DEBUG loglevel. 50 | 51 | Args: 52 | func: the function to be benchmarked. 53 | 54 | Returns: 55 | Callable: the decorator 56 | 57 | """ 58 | 59 | def wrapper(*args: Any, **kwargs: Any) -> Any: 60 | start = time.time() 61 | res = func(*args, **kwargs) 62 | end = time.time() 63 | 64 | log.debug(f"{func.__qualname__} took {round(end - start, 4)} seconds") 65 | return res 66 | 67 | return wrapper 68 | 69 | 70 | __all__ = [ 71 | "expect_type_for", 72 | "expect_ndarray_for", 73 | "expect_dataframe_for", 74 | "benchmark", 75 | ] 76 | -------------------------------------------------------------------------------- /src/autoprognosis/plugins/utils/metrics.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | 4 | 5 | def MAE(X: np.ndarray, X_true: np.ndarray, mask: np.ndarray) -> np.ndarray: 6 | """ 7 | Mean Absolute Error (MAE) between imputed variables and ground truth. 8 | 9 | Args: 10 | X : Data with imputed variables. 11 | X_true : Ground truth. 12 | mask : Missing value mask (missing if True) 13 | 14 | Returns: 15 | MAE : np.ndarray 16 | """ 17 | mask_ = mask.astype(bool) 18 | return np.absolute(X[mask_] - X_true[mask_]).sum() / mask_.sum() 19 | 20 | 21 | def RMSE(X: np.ndarray, X_true: np.ndarray, mask: np.ndarray) -> np.ndarray: 22 | """ 23 | Root Mean Squared Error (MAE) between imputed variables and ground truth 24 | 25 | Args: 26 | X : Data with imputed variables. 27 | X_true : Ground truth. 28 | mask : Missing value mask (missing if True) 29 | 30 | Returns: 31 | RMSE : np.ndarray 32 | 33 | """ 34 | mask_ = mask.astype(bool) 35 | return np.sqrt(((X[mask_] - X_true[mask_]) ** 2).sum() / mask_.sum()) 36 | 37 | 38 | __all__ = ["MAE", "RMSE"] 39 | -------------------------------------------------------------------------------- /src/autoprognosis/studies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/studies/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/studies/_base.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from abc import ABCMeta, abstractmethod 3 | from typing import Any 4 | 5 | 6 | class Study(metaclass=ABCMeta): 7 | def __init__(self) -> None: 8 | pass 9 | 10 | @abstractmethod 11 | def run(self) -> Any: ... 12 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/utils/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/utils/distributions.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import random 3 | 4 | # third party 5 | import numpy as np 6 | 7 | 8 | def enable_reproducible_results(seed: int = 0) -> None: 9 | """Set fixed seed for all the libraries""" 10 | random.seed(seed) 11 | 12 | np.random.seed(seed) 13 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/encoder.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import json 3 | from typing import Any 4 | 5 | # third party 6 | import numpy as np 7 | 8 | 9 | class numpy_encoder(json.JSONEncoder): 10 | """Helper for encoding jsons""" 11 | 12 | def default(self, obj: Any) -> Any: 13 | if isinstance(obj, np.integer): 14 | return int(obj) 15 | elif isinstance(obj, np.floating): 16 | return float(obj) 17 | elif isinstance(obj, np.ndarray): 18 | return obj.tolist() 19 | else: 20 | return super(numpy_encoder, self).default(obj) 21 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/pandas.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import List 3 | 4 | # third party 5 | import pandas as pd 6 | 7 | 8 | def optimize_floats(df: pd.DataFrame) -> pd.DataFrame: 9 | floats = df.select_dtypes(include=["float64"]).columns.tolist() 10 | df[floats] = df[floats].apply(pd.to_numeric, downcast="float") 11 | return df 12 | 13 | 14 | def optimize_ints(df: pd.DataFrame) -> pd.DataFrame: 15 | ints = df.select_dtypes(include=["int64"]).columns.tolist() 16 | df[ints] = df[ints].apply(pd.to_numeric, downcast="integer") 17 | return df 18 | 19 | 20 | def optimize_objects(df: pd.DataFrame, datetime_features: List[str]) -> pd.DataFrame: 21 | for col in df.select_dtypes(include=["object"]): 22 | if col not in datetime_features: 23 | num_unique_values = len(df[col].unique()) 24 | num_total_values = len(df[col]) 25 | if float(num_unique_values) / num_total_values < 0.5: 26 | df[col] = df[col].astype("category") 27 | else: 28 | df[col] = pd.to_datetime(df[col]) 29 | return df 30 | 31 | 32 | def compress_df(df: pd.DataFrame) -> pd.DataFrame: 33 | df = optimize_floats(df) 34 | df = optimize_ints(df) 35 | df = optimize_objects(df, []) 36 | 37 | return df 38 | 39 | 40 | def read_csv_compressed(path: str) -> pd.DataFrame: 41 | df = pd.read_csv(path) 42 | 43 | return compress_df(df) 44 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/parallel.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import multiprocessing 3 | import os 4 | 5 | # autoprognosis absolute 6 | import autoprognosis.logger as log 7 | 8 | 9 | def n_opt_jobs() -> int: 10 | try: 11 | n_jobs = int(os.environ["N_OPT_JOBS"]) 12 | except BaseException as e: 13 | log.debug(f"failed to get N_JOBS {e}") 14 | n_jobs = 2 15 | log.debug(f"Using {n_jobs} cores for HP") 16 | return n_jobs 17 | 18 | 19 | def n_learner_jobs() -> int: 20 | try: 21 | n_jobs = int(os.environ["N_LEARNER_JOBS"]) 22 | except BaseException as e: 23 | n_jobs = multiprocessing.cpu_count() 24 | log.debug(f"failed to get N_LEARNER_JOBS {e}") 25 | log.debug(f"Using {n_jobs} cores for learners") 26 | return n_jobs 27 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/redis.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import os 3 | 4 | import redis 5 | 6 | # third party 7 | from optuna.storages import JournalRedisStorage, JournalStorage 8 | 9 | REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1") 10 | REDIS_PORT = os.getenv("REDIS_PORT", "6379") 11 | 12 | 13 | class RedisBackend: 14 | def __init__( 15 | self, 16 | host: str = REDIS_HOST, 17 | port: str = REDIS_PORT, 18 | auth: bool = False, 19 | ): 20 | self.url = f"redis://{host}:{port}/" 21 | 22 | self._optuna_storage = JournalStorage(JournalRedisStorage(url=self.url)) 23 | self._client = redis.Redis.from_url(self.url) 24 | 25 | def optuna(self) -> JournalStorage: 26 | return self._optuna_storage 27 | 28 | def client(self) -> redis.Redis: 29 | return self._client 30 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/serialization.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from pathlib import Path 3 | from typing import Any, Union 4 | 5 | # third party 6 | import cloudpickle 7 | import pandas as pd 8 | 9 | 10 | def save(model: Any) -> bytes: 11 | return cloudpickle.dumps(model) 12 | 13 | 14 | def load(buff: bytes) -> Any: 15 | return cloudpickle.loads(buff) 16 | 17 | 18 | def save_model(model: Any) -> bytes: 19 | return cloudpickle.dumps(model) 20 | 21 | 22 | def load_model(buff: bytes) -> Any: 23 | return cloudpickle.loads(buff) 24 | 25 | 26 | def save_to_file(path: Union[str, Path], model: Any) -> Any: 27 | with open(path, "wb") as f: 28 | return cloudpickle.dump(model, f) 29 | 30 | 31 | def load_from_file(path: Union[str, Path]) -> Any: 32 | with open(path, "rb") as f: 33 | return cloudpickle.load(f) 34 | 35 | 36 | def save_model_to_file(path: Union[str, Path], model: Any) -> Any: 37 | return save_to_file(path, model) 38 | 39 | 40 | def load_model_from_file(path: Union[str, Path]) -> Any: 41 | return load_from_file(path) 42 | 43 | 44 | def dataframe_hash(df: pd.DataFrame) -> str: 45 | """Dataframe hashing, used for caching/backups""" 46 | df.columns = df.columns.astype(str) 47 | cols = sorted(list(df.columns)) 48 | return str(abs(pd.util.hash_pandas_object(df[cols].fillna(0)).sum())) 49 | -------------------------------------------------------------------------------- /src/autoprognosis/utils/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vanderschaarlab/autoprognosis/4be977d13174d46bdc88086cbcdec8fadc37880e/src/autoprognosis/utils/third_party/__init__.py -------------------------------------------------------------------------------- /src/autoprognosis/utils/torch.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def one_hot_encoder(arr: np.ndarray) -> torch.Tensor: 7 | arr = np.asarray(arr) 8 | n_values = np.max(arr) + 1 9 | 10 | result = np.eye(n_values)[arr] 11 | return torch.from_numpy(result).long() 12 | -------------------------------------------------------------------------------- /src/autoprognosis/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.22" 2 | -------------------------------------------------------------------------------- /tests/apps/test_classifiers_app.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from pathlib import Path 3 | 4 | # third party 5 | import numpy as np 6 | from sklearn.datasets import load_iris 7 | 8 | # autoprognosis absolute 9 | from autoprognosis.deploy.build import Builder 10 | from autoprognosis.deploy.proto import NewClassificationAppProto 11 | from autoprognosis.studies.classifiers import ClassifierStudy 12 | from autoprognosis.utils.serialization import load_from_file 13 | 14 | 15 | def test_sanity(): 16 | X, Y = load_iris(return_X_y=True, as_frame=True) 17 | 18 | df = X.copy() 19 | df["target"] = Y 20 | 21 | df.loc[:2, "sepal length (cm)"] = np.nan 22 | 23 | workspace = Path("workspace") 24 | workspace.mkdir(parents=True, exist_ok=True) 25 | 26 | study_name = "test_demonstrator_classification" 27 | 28 | study = ClassifierStudy( 29 | study_name=study_name, 30 | dataset=df, # pandas DataFrame 31 | target="target", # the label column in the dataset 32 | timeout=60, # timeout for optimization for each classfier. Default: 600 seconds 33 | num_iter=5, 34 | num_study_iter=1, 35 | classifiers=["logistic_regression"], 36 | workspace=workspace, 37 | ) 38 | 39 | study.run() 40 | 41 | dataset_path = workspace / "demo_dataset_classification.csv" 42 | df.to_csv(dataset_path, index=None) 43 | 44 | name = "AutoPrognosis demo: Classification" 45 | model_path = workspace / study_name / "model.p" 46 | 47 | target_column = "target" 48 | task_type = "classification" 49 | 50 | task = Builder( 51 | NewClassificationAppProto( 52 | **{ 53 | "name": name, 54 | "type": task_type, 55 | "dataset_path": str(dataset_path), 56 | "model_path": str(model_path), 57 | "target_column": target_column, 58 | "explainers": ["kernel_shap"], 59 | "imputers": [], 60 | "plot_alternatives": [], 61 | "comparative_models": [ 62 | ( 63 | "Logistic regression", # display name 64 | "logistic_regression", # autoprognosis plugin name 65 | {}, # plugin args 66 | ), 67 | ], 68 | "auth": False, 69 | } 70 | ), 71 | ) 72 | 73 | app_path = task.run() 74 | 75 | app = load_from_file(app_path) 76 | 77 | assert app["title"] == name 78 | assert app["type"] == "classification" 79 | assert app["banner_title"] == f"{name} study" 80 | assert len(app["models"]) > 0 81 | assert "encoders" in app 82 | assert "menu_components" in app 83 | assert "column_types" in app 84 | -------------------------------------------------------------------------------- /tests/bindings/R/test_classification.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | 3 | # geomloss bug 4 | py_install("numpy", pip = TRUE) 5 | py_install("torch", pip = TRUE) 6 | 7 | # install autoprognosis 8 | py_install(".", pip = TRUE) 9 | 10 | pathlib <- import("pathlib", convert=FALSE) 11 | warnings <- import("warnings", convert=FALSE) 12 | autoprognosis <- import("autoprognosis", convert=FALSE) 13 | 14 | warnings$filterwarnings('ignore') 15 | 16 | Path = pathlib$Path 17 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy 18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 19 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator 20 | workspace <- Path("workspace") 21 | study_name <- "example_classifier" 22 | 23 | # Load the data 24 | data("iris") 25 | target <- "Species" 26 | 27 | # Create the AutoPrognosis Study 28 | study <- ClassifierStudy( 29 | dataset = iris, 30 | target = target, 31 | study_name=study_name, 32 | num_iter=as.integer(10), 33 | num_study_iter=as.integer(2), 34 | timeout=as.integer(60), 35 | classifiers=list("logistic_regression", "catboost"), 36 | workspace=workspace 37 | ) 38 | 39 | study$run() 40 | 41 | # Load the optimal model - if exists 42 | output <- sprintf("%s/%s/model.p", workspace, study_name) 43 | 44 | model <- load_model_from_file(output) 45 | # The model is not fitted yet here 46 | 47 | # Benchmark the model 48 | targets <- c(target) 49 | X <- iris[ , !(names(iris) %in% targets)] 50 | Y = iris[, target] 51 | 52 | metrics <- evaluate_estimator(model, X, Y) 53 | 54 | # Fit the model 55 | model$fit(X, Y) 56 | 57 | sprintf("Performance metrics %s", metrics["str"]) 58 | 59 | # Predict using the model 60 | model$predict_proba(X) 61 | -------------------------------------------------------------------------------- /tests/bindings/R/test_classification_with_missing_data.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | 3 | # geomloss bug 4 | py_install("numpy", pip = TRUE) 5 | py_install("torch", pip = TRUE) 6 | 7 | # autoprognosis install 8 | py_install(".", pip = TRUE) 9 | 10 | pathlib <- import("pathlib", convert=FALSE) 11 | warnings <- import("warnings", convert=FALSE) 12 | autoprognosis <- import("autoprognosis", convert=FALSE) 13 | 14 | warnings$filterwarnings('ignore') 15 | 16 | Path = pathlib$Path 17 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy 18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 19 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator 20 | workspace <- Path("workspace") 21 | study_name <- "example_classifier_with_miss" 22 | 23 | # Load the data 24 | adult <- read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', 25 | sep = ',', fill = F, strip.white = T) 26 | colnames(adult) <- c('age', 'workclass', 'fnlwgt', 'educatoin', 27 | 'educatoin_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 28 | 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income') 29 | adult[adult == "?"] <- NA 30 | adult <- adult[ , !(names(adult) %in% c("native_country"))] 31 | 32 | df <- adult 33 | 34 | target <- "income" 35 | 36 | # Create the AutoPrognosis Study 37 | study <- ClassifierStudy( 38 | dataset = df, 39 | target = target, 40 | study_name=study_name, 41 | num_iter=as.integer(10), 42 | num_study_iter=as.integer(2), 43 | timeout=as.integer(60), 44 | classifiers=list("logistic_regression", "catboost"), 45 | workspace=workspace, 46 | nan_placeholder='NA' 47 | ) 48 | 49 | study$run() 50 | 51 | # Load the optimal model - if exists 52 | output <- sprintf("%s/%s/model.p", workspace, study_name) 53 | 54 | model <- load_model_from_file(output) 55 | # The model is not fitted yet here 56 | 57 | # Benchmark the model 58 | targets <- c(target) 59 | X <- df[ , !(names(df) %in% targets)] 60 | Y = df[, target] 61 | 62 | metrics <- evaluate_estimator(model, X, Y) 63 | 64 | # Fit the model 65 | model$fit(X, Y) 66 | 67 | sprintf("Performance metrics %s", metrics["str"]) 68 | 69 | # Predict using the model 70 | model$predict_proba(X) 71 | -------------------------------------------------------------------------------- /tests/bindings/R/test_regression.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | 3 | # geomloss bug 4 | py_install("numpy", pip = TRUE) 5 | py_install("torch", pip = TRUE) 6 | 7 | # autoprognosis install 8 | py_install(".", pip = TRUE) 9 | 10 | pathlib <- import("pathlib", convert=FALSE) 11 | warnings <- import("warnings", convert=FALSE) 12 | autoprognosis <- import("autoprognosis", convert=FALSE) 13 | 14 | warnings$filterwarnings('ignore') 15 | 16 | Path = pathlib$Path 17 | RegressionStudy = autoprognosis$studies$regression$RegressionStudy 18 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 19 | evaluate_regression = autoprognosis$utils$tester$evaluate_regression 20 | 21 | workspace <- Path("workspace") 22 | study_name <- "example_regression" 23 | 24 | # Load dataset 25 | airfoil <- read.csv( 26 | url("https://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat"), 27 | sep = "\t", 28 | header = FALSE, 29 | ) 30 | 31 | target <- "V6" 32 | 33 | # Create AutoPrognosis Study 34 | study <- RegressionStudy( 35 | dataset = airfoil, 36 | target = target, 37 | study_name=study_name, 38 | num_iter=as.integer(10), 39 | num_study_iter=as.integer(2), 40 | timeout=as.integer(60), 41 | regressors=list("linear_regression", "kneighbors_regressor"), 42 | workspace=workspace 43 | ) 44 | 45 | study$run() 46 | 47 | # Load the optimal model - if exists 48 | output <- sprintf("%s/%s/model.p", workspace, study_name) 49 | 50 | model <- load_model_from_file(output) 51 | # The model is not fitted yet here 52 | 53 | # Benchmark the model 54 | targets <- c(target) 55 | X <- airfoil[ , !(names(iris) %in% targets)] 56 | Y = airfoil[, target] 57 | 58 | metrics <- evaluate_regression(model, X, Y) 59 | 60 | sprintf("Performance metrics %s", metrics["str"]) 61 | 62 | # Fit the model 63 | model$fit(X, Y) 64 | 65 | # Predict 66 | model$predict(X) 67 | -------------------------------------------------------------------------------- /tests/bindings/R/test_survival_analysis.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | library(survival) 3 | 4 | # geomloss bug 5 | py_install("numpy", pip = TRUE) 6 | py_install("torch", pip = TRUE) 7 | 8 | # install autoprognosis 9 | py_install(".", pip = TRUE) 10 | 11 | pathlib <- import("pathlib", convert=FALSE) 12 | warnings <- import("warnings", convert=FALSE) 13 | autoprognosis <- import("autoprognosis", convert=FALSE) 14 | np <- import("numpy", convert=FALSE) 15 | 16 | warnings$filterwarnings('ignore') 17 | 18 | Path = pathlib$Path 19 | RiskEstimationStudy = autoprognosis$studies$risk_estimation$RiskEstimationStudy 20 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 21 | evaluate_survival_estimator = autoprognosis$utils$tester$evaluate_survival_estimator 22 | 23 | workspace <- Path("workspace") 24 | study_name <- "example_risk_estimation" 25 | 26 | # Load the data 27 | data(cancer, package="survival") 28 | 29 | targets <- c("dtime", "death") 30 | df <- rotterdam 31 | 32 | X <- df[ , !(names(df) %in% targets)] 33 | Y <- df[, "death"] 34 | T <- df[, "dtime"] 35 | 36 | eval_time_horizons <- list(2000) 37 | 38 | # Create the AutoPrognosis Study 39 | study <- RiskEstimationStudy( 40 | dataset = df, 41 | target = "death", 42 | time_to_event="dtime", 43 | time_horizons = eval_time_horizons, 44 | study_name=study_name, 45 | num_iter=as.integer(10), 46 | num_study_iter=as.integer(2), 47 | timeout=as.integer(60), 48 | risk_estimators=list("cox_ph", "survival_xgboost"), 49 | workspace=workspace 50 | ) 51 | 52 | study$run() 53 | 54 | # Load the optimal model - if exists 55 | output <- sprintf("%s/%s/model.p", workspace, study_name) 56 | 57 | model <- load_model_from_file(output) 58 | # The model is not fitted yet here 59 | 60 | # Benchmark the model 61 | metrics <- evaluate_survival_estimator(model, X, T, Y, eval_time_horizons) 62 | 63 | # Fit the model 64 | model$fit(X, T, Y) 65 | 66 | sprintf("Performance metrics %s", metrics["str"]) 67 | 68 | # Predict using the model 69 | model$predict(X) 70 | -------------------------------------------------------------------------------- /tests/bugfixing/test_ensemble_crash.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import random 3 | from pathlib import Path 4 | 5 | # third party 6 | import numpy as np 7 | from sklearn.datasets import load_breast_cancer 8 | 9 | # autoprognosis absolute 10 | from autoprognosis.studies.classifiers import ClassifierStudy 11 | 12 | 13 | def test_ensemble_crash() -> None: 14 | X, Y = load_breast_cancer(return_X_y=True, as_frame=True) 15 | 16 | # Simulate missingness 17 | total_len = len(X) 18 | 19 | for col in ["mean texture", "mean compactness"]: 20 | indices = random.sample(range(0, total_len), 10) 21 | X.loc[indices, col] = np.nan 22 | 23 | dataset = X.copy() 24 | dataset["target"] = Y 25 | 26 | workspace = Path("workspace") 27 | workspace.mkdir(parents=True, exist_ok=True) 28 | 29 | study_name = "classification_example_imputation" 30 | 31 | study = ClassifierStudy( 32 | study_name=study_name, 33 | dataset=dataset, 34 | target="target", 35 | num_iter=1, 36 | num_study_iter=1, 37 | timeout=1, 38 | imputers=["mean", "ice", "median"], 39 | classifiers=["logistic_regression", "lda"], 40 | feature_scaling=[], # feature preprocessing is disabled 41 | score_threshold=0.4, 42 | workspace=workspace, 43 | ) 44 | 45 | study.run() 46 | -------------------------------------------------------------------------------- /tests/bugfixing/test_not_fitted_error.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pandas as pd 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.prediction import Predictions 6 | 7 | 8 | def test_train_error() -> None: 9 | model = Predictions().get("logistic_regression") 10 | 11 | err = "" 12 | try: 13 | model.predict_proba(pd.DataFrame([])) 14 | except BaseException as e: 15 | err = str(e) 16 | 17 | assert "Fit the model first" == err 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import sys 3 | import warnings 4 | 5 | # autoprognosis absolute 6 | import autoprognosis.logger as log 7 | 8 | warnings.filterwarnings("ignore", category=DeprecationWarning) 9 | warnings.filterwarnings("ignore", category=FutureWarning) 10 | 11 | log.add(sink=sys.stderr, level="ERROR") 12 | -------------------------------------------------------------------------------- /tests/explorers/explorers_mocks.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import datetime 3 | from typing import Any 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.hooks import Hooks 7 | 8 | 9 | class MockHook(Hooks): 10 | def __init__(self) -> None: 11 | self._started_at = datetime.datetime.utcnow() 12 | 13 | def cancel(self) -> bool: 14 | # cancel after 10 seconds 15 | time_passed = datetime.datetime.utcnow() - self._started_at 16 | 17 | return time_passed.total_seconds() > 2 18 | 19 | def heartbeat( 20 | self, topic: str, subtopic: str, event_type: str, **kwargs: Any 21 | ) -> None: 22 | pass 23 | 24 | def finish(self) -> None: 25 | pass 26 | -------------------------------------------------------------------------------- /tests/explorers/test_selector.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.explorers.core.selector import PipelineSelector 3 | 4 | 5 | def test_sanity() -> None: 6 | clf = PipelineSelector("lda") 7 | 8 | assert len(clf.imputers) == 0 9 | assert len(clf.feature_scaling) > 0 10 | 11 | assert clf.classifier.name() == "lda" 12 | assert clf.name() == "lda" 13 | 14 | assert len(clf.hyperparameter_space()) > 0 15 | -------------------------------------------------------------------------------- /tests/plugins/explainers/test_kernel_shap.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | # third party 8 | from lifelines.datasets import load_rossi 9 | from sklearn.datasets import load_breast_cancer 10 | from sklearn.model_selection import train_test_split 11 | 12 | # autoprognosis absolute 13 | from autoprognosis.plugins.explainers.plugin_kernel_shap import plugin 14 | from autoprognosis.plugins.pipeline import Pipeline 15 | from autoprognosis.plugins.prediction.classifiers import Classifiers 16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import ( 17 | plugin as CoxPH, 18 | ) 19 | from autoprognosis.plugins.preprocessors import Preprocessors 20 | 21 | 22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 23 | X, y = load_breast_cancer(return_X_y=True) 24 | return train_test_split(X, y, test_size=0.05) 25 | 26 | 27 | @pytest.mark.slow 28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"]) 29 | def test_plugin_sanity(classifier: str) -> None: 30 | X_train, X_test, y_train, y_test = dataset() 31 | 32 | template = Pipeline( 33 | [ 34 | Preprocessors().get_type("minmax_scaler").fqdn(), 35 | Classifiers().get_type(classifier).fqdn(), 36 | ] 37 | ) 38 | 39 | pipeline = template() 40 | 41 | explainer = plugin( 42 | pipeline, X_train, y_train, subsample=100, task_type="classification" 43 | ) 44 | 45 | result = explainer.explain(X_test) 46 | 47 | assert result.shape == X_test.shape 48 | 49 | 50 | def test_plugin_name() -> None: 51 | assert plugin.name() == "kernel_shap" 52 | 53 | 54 | @pytest.mark.slow 55 | def test_plugin_kernel_shap_survival_prediction() -> None: 56 | rossi = load_rossi() 57 | 58 | X = rossi.drop(["week", "arrest"], axis=1) 59 | Y = rossi["arrest"] 60 | T = rossi["week"] 61 | 62 | surv = CoxPH().fit(X, T, Y) 63 | 64 | explainer = plugin( 65 | surv, 66 | X, 67 | Y, 68 | time_to_event=T, 69 | eval_times=[ 70 | int(T[Y.iloc[:] == 1].quantile(0.50)), 71 | int(T[Y.iloc[:] == 1].quantile(0.75)), 72 | ], 73 | task_type="risk_estimation", 74 | ) 75 | 76 | result = explainer.explain(X[:3]) 77 | 78 | assert result.shape == (3, X.shape[1]) 79 | -------------------------------------------------------------------------------- /tests/plugins/explainers/test_lime.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | # third party 8 | from lifelines.datasets import load_rossi 9 | from sklearn.datasets import load_breast_cancer 10 | from sklearn.model_selection import train_test_split 11 | 12 | # autoprognosis absolute 13 | from autoprognosis.plugins.explainers.plugin_lime import plugin 14 | from autoprognosis.plugins.pipeline import Pipeline 15 | from autoprognosis.plugins.prediction.classifiers import Classifiers 16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import ( 17 | plugin as CoxPH, 18 | ) 19 | from autoprognosis.plugins.preprocessors import Preprocessors 20 | 21 | 22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 23 | X, y = load_breast_cancer(return_X_y=True) 24 | return train_test_split(X, y, test_size=0.05) 25 | 26 | 27 | @pytest.mark.slow 28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"]) 29 | def test_plugin_sanity(classifier: str) -> None: 30 | X_train, X_test, y_train, y_test = dataset() 31 | 32 | template = Pipeline( 33 | [ 34 | Preprocessors().get_type("minmax_scaler").fqdn(), 35 | Classifiers().get_type(classifier).fqdn(), 36 | ] 37 | ) 38 | 39 | pipeline = template() 40 | 41 | explainer = plugin(pipeline, X_train, y_train, task_type="classification") 42 | 43 | result = explainer.explain(X_test[:2]) 44 | 45 | assert len(result) == 2 46 | 47 | 48 | def test_plugin_name() -> None: 49 | assert plugin.name() == "lime" 50 | 51 | 52 | @pytest.mark.slow 53 | def test_plugin_lime_survival_prediction() -> None: 54 | rossi = load_rossi() 55 | 56 | X = rossi.drop(["week", "arrest"], axis=1) 57 | Y = rossi["arrest"] 58 | T = rossi["week"] 59 | 60 | surv = CoxPH().fit(X, T, Y) 61 | 62 | explainer = plugin( 63 | surv, 64 | X, 65 | Y, 66 | time_to_event=T, 67 | eval_times=[ 68 | int(T[Y.iloc[:] == 1].quantile(0.50)), 69 | int(T[Y.iloc[:] == 1].quantile(0.75)), 70 | ], 71 | task_type="risk_estimation", 72 | ) 73 | 74 | result = explainer.explain(X.head(1)) 75 | 76 | assert result.shape == (1, X.shape[1]) 77 | -------------------------------------------------------------------------------- /tests/plugins/explainers/test_shap_permutation_sampler.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | # third party 8 | from lifelines.datasets import load_rossi 9 | from sklearn.datasets import load_breast_cancer 10 | from sklearn.model_selection import train_test_split 11 | 12 | # autoprognosis absolute 13 | from autoprognosis.plugins.explainers.plugin_shap_permutation_sampler import plugin 14 | from autoprognosis.plugins.pipeline import Pipeline 15 | from autoprognosis.plugins.prediction.classifiers import Classifiers 16 | from autoprognosis.plugins.prediction.risk_estimation.plugin_cox_ph import ( 17 | plugin as CoxPH, 18 | ) 19 | from autoprognosis.plugins.preprocessors import Preprocessors 20 | 21 | 22 | def dataset() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 23 | X, y = load_breast_cancer(return_X_y=True) 24 | return train_test_split(X, y, test_size=0.05) 25 | 26 | 27 | @pytest.mark.slow 28 | @pytest.mark.parametrize("classifier", ["logistic_regression", "xgboost"]) 29 | def test_plugin_sanity(classifier: str) -> None: 30 | X_train, X_test, y_train, y_test = dataset() 31 | 32 | template = Pipeline( 33 | [ 34 | Preprocessors().get_type("minmax_scaler").fqdn(), 35 | Classifiers().get_type(classifier).fqdn(), 36 | ] 37 | ) 38 | 39 | pipeline = template() 40 | 41 | explainer = plugin(pipeline, X_train, y_train, task_type="classification") 42 | 43 | result = explainer.explain(X_test) 44 | 45 | assert len(result) == len(X_test) 46 | 47 | 48 | def test_plugin_name() -> None: 49 | assert plugin.name() == "shap_permutation_sampler" 50 | 51 | 52 | @pytest.mark.slow 53 | def test_plugin_survival_prediction() -> None: 54 | rossi = load_rossi() 55 | 56 | X = rossi.drop(["week", "arrest"], axis=1) 57 | Y = rossi["arrest"] 58 | T = rossi["week"] 59 | 60 | surv = CoxPH().fit(X, T, Y) 61 | 62 | explainer = plugin( 63 | surv, 64 | X, 65 | Y, 66 | time_to_event=T, 67 | eval_times=[ 68 | int(T[Y.iloc[:] == 1].quantile(0.50)), 69 | int(T[Y.iloc[:] == 1].quantile(0.75)), 70 | ], 71 | task_type="risk_estimation", 72 | ) 73 | 74 | result = explainer.explain(X[:3]) 75 | 76 | assert result.shape == (3, X.shape[1], 2) 77 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_api.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pytest 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers 10 | from autoprognosis.plugins.imputers.plugin_mean import plugin as mock_model 11 | 12 | 13 | @pytest.fixture 14 | def ctx() -> Imputers: 15 | return Imputers() 16 | 17 | 18 | class Mock(ImputerPlugin): 19 | def __init__(self, **kwargs: Any) -> None: 20 | model = mock_model(**kwargs) 21 | 22 | super().__init__(model) 23 | 24 | @staticmethod 25 | def name() -> str: 26 | return "test" 27 | 28 | @staticmethod 29 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 30 | return [] 31 | 32 | 33 | class Invalid: 34 | def __init__(self) -> None: 35 | pass 36 | 37 | 38 | def test_load(ctx: Imputers) -> None: 39 | assert len(ctx._plugins) == 0 40 | ctx.get("mean") 41 | ctx.get("median") 42 | assert len(ctx._plugins) == 2 43 | assert len(ctx._plugins) == len(ctx) 44 | 45 | 46 | def test_list(ctx: Imputers) -> None: 47 | ctx.get("mean") 48 | assert "mean" in ctx.list() 49 | 50 | 51 | def test_add_get(ctx: Imputers) -> None: 52 | ctx.add("mock", Mock) 53 | 54 | assert "mock" in ctx.list() 55 | 56 | mock = ctx.get("mock") 57 | 58 | assert mock.name() == "test" 59 | 60 | ctx.reload() 61 | assert "mock" not in ctx.list() 62 | 63 | 64 | def test_add_get_invalid(ctx: Imputers) -> None: 65 | with pytest.raises(ValueError): 66 | ctx.add("invalid", Invalid) 67 | 68 | assert "mock" not in ctx.list() 69 | 70 | with pytest.raises(ValueError): 71 | ctx.get("mock") 72 | 73 | 74 | def test_iter(ctx: Imputers) -> None: 75 | for v in ctx: 76 | assert ctx[v].name() != "" 77 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_imputation_serde.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | # third party 5 | import numpy as np 6 | import pytest 7 | 8 | # autoprognosis absolute 9 | from autoprognosis.plugins import Imputers 10 | from autoprognosis.plugins.utils.simulate import simulate_nan 11 | from autoprognosis.utils.serialization import load_model, save_model 12 | 13 | 14 | def dataset(mechanism: str, p_miss: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 15 | np.random.seed(0) 16 | 17 | n = 20 18 | p = 4 19 | 20 | mean = np.repeat(0, p) 21 | cov = 0.5 * (np.ones((p, p)) + np.eye(p)) 22 | 23 | x = np.random.multivariate_normal(mean, cov, size=n) 24 | x_simulated = simulate_nan(x, p_miss, mechanism) 25 | 26 | mask = x_simulated["mask"] 27 | x_miss = x_simulated["X_incomp"] 28 | 29 | return x, x_miss, mask 30 | 31 | 32 | @pytest.mark.slow 33 | @pytest.mark.parametrize("plugin", Imputers().list()) 34 | def test_serialization(plugin: str) -> None: 35 | x, x_miss, mask = dataset("MAR", 0.3) 36 | 37 | estimator = Imputers().get(plugin) 38 | 39 | estimator.fit_transform(x_miss) 40 | 41 | buff = estimator.save() 42 | estimator_new = Imputers().get_type(plugin).load(buff) 43 | 44 | estimator_new.transform(x_miss) 45 | 46 | 47 | @pytest.mark.slow 48 | @pytest.mark.parametrize("plugin", Imputers().list()) 49 | def test_pickle(plugin: str) -> None: 50 | x, x_miss, mask = dataset("MAR", 0.3) 51 | 52 | estimator = Imputers().get(plugin) 53 | 54 | estimator.fit_transform(x_miss) 55 | 56 | buff = save_model(estimator) 57 | estimator_new = load_model(buff) 58 | 59 | estimator_new.transform(x_miss) 60 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_imputers_api.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pytest 6 | 7 | # autoprognosis absolute 8 | import autoprognosis.plugins.core.params as params 9 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers 10 | from autoprognosis.plugins.imputers.plugin_mean import plugin as mock_model 11 | 12 | 13 | @pytest.fixture 14 | def ctx() -> Imputers: 15 | return Imputers() 16 | 17 | 18 | class Mock(ImputerPlugin): 19 | def __init__(self, **kwargs: Any) -> None: 20 | model = mock_model(**kwargs) 21 | 22 | super().__init__(model) 23 | 24 | @staticmethod 25 | def name() -> str: 26 | return "test" 27 | 28 | @staticmethod 29 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[params.Params]: 30 | return [] 31 | 32 | 33 | class Invalid: 34 | def __init__(self) -> None: 35 | pass 36 | 37 | 38 | def test_load(ctx: Imputers) -> None: 39 | assert len(ctx._plugins) == 0 40 | ctx.get("mean") 41 | ctx.get("median") 42 | assert len(ctx._plugins) == 2 43 | 44 | 45 | def test_list(ctx: Imputers) -> None: 46 | ctx.get("mean") 47 | assert "mean" in ctx.list() 48 | 49 | 50 | def test_add_get(ctx: Imputers) -> None: 51 | ctx.add("mock", Mock) 52 | 53 | assert "mock" in ctx.list() 54 | 55 | mock = ctx.get("mock") 56 | 57 | assert mock.name() == "test" 58 | 59 | ctx.reload() 60 | assert "mock" not in ctx.list() 61 | 62 | 63 | def test_add_get_invalid(ctx: Imputers) -> None: 64 | with pytest.raises(ValueError): 65 | ctx.add("invalid", Invalid) 66 | 67 | assert "mock" not in ctx.list() 68 | 69 | with pytest.raises(ValueError): 70 | ctx.get("mock") 71 | 72 | 73 | def test_iter(ctx: Imputers) -> None: 74 | for v in ctx: 75 | assert ctx[v].name() != "" 76 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_mean.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | # autoprognosis absolute 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers 8 | from autoprognosis.plugins.imputers.plugin_mean import plugin 9 | 10 | 11 | def from_api() -> ImputerPlugin: 12 | return Imputers().get("mean") 13 | 14 | 15 | def from_module() -> ImputerPlugin: 16 | return plugin() 17 | 18 | 19 | def from_serde() -> ImputerPlugin: 20 | buff = plugin().save() 21 | return plugin().load(buff) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 25 | def test_mean_plugin_sanity(test_plugin: ImputerPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_mean_plugin_name(test_plugin: ImputerPlugin) -> None: 31 | assert test_plugin.name() == "mean" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_mean_plugin_type(test_plugin: ImputerPlugin) -> None: 36 | assert test_plugin.type() == "imputer" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 40 | def test_mean_plugin_hyperparams(test_plugin: ImputerPlugin) -> None: 41 | assert test_plugin.hyperparameter_space() == [] 42 | 43 | 44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 45 | def test_mean_plugin_fit_transform(test_plugin: ImputerPlugin) -> None: 46 | res = test_plugin.fit_transform( 47 | pd.DataFrame( 48 | [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [3, 3, 9, 9], [2, 2, 2, 2]] 49 | ) 50 | ) 51 | 52 | np.testing.assert_array_equal( 53 | res, [[1, 1, 1, 1], [2, 2, 4, 4], [3, 3, 9, 9], [2, 2, 2, 2]] 54 | ) 55 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_median.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | # autoprognosis absolute 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers 8 | from autoprognosis.plugins.imputers.plugin_median import plugin 9 | 10 | 11 | def from_api() -> ImputerPlugin: 12 | return Imputers().get("median") 13 | 14 | 15 | def from_module() -> ImputerPlugin: 16 | return plugin() 17 | 18 | 19 | def from_serde() -> ImputerPlugin: 20 | buff = plugin().save() 21 | return plugin().load(buff) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 25 | def test_median_plugin_sanity(test_plugin: ImputerPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_median_plugin_name(test_plugin: ImputerPlugin) -> None: 31 | assert test_plugin.name() == "median" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_median_plugin_type(test_plugin: ImputerPlugin) -> None: 36 | assert test_plugin.type() == "imputer" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 40 | def test_median_plugin_hyperparams(test_plugin: ImputerPlugin) -> None: 41 | assert test_plugin.hyperparameter_space() == [] 42 | 43 | 44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 45 | def test_median_plugin_fit_transform(test_plugin: ImputerPlugin) -> None: 46 | res = test_plugin.fit_transform( 47 | pd.DataFrame( 48 | [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [3, 3, 9, 9], [2, 2, 2, 2]] 49 | ) 50 | ) 51 | 52 | np.testing.assert_array_equal( 53 | res, [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]] 54 | ) 55 | -------------------------------------------------------------------------------- /tests/plugins/imputers/test_most_freq.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | # autoprognosis absolute 7 | from autoprognosis.plugins.imputers import ImputerPlugin, Imputers 8 | from autoprognosis.plugins.imputers.plugin_most_frequent import plugin 9 | 10 | 11 | def from_api() -> ImputerPlugin: 12 | return Imputers().get("most_frequent") 13 | 14 | 15 | def from_module() -> ImputerPlugin: 16 | return plugin() 17 | 18 | 19 | def from_serde() -> ImputerPlugin: 20 | buff = plugin().save() 21 | return plugin().load(buff) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 25 | def test_most_freq_plugin_sanity(test_plugin: ImputerPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_most_freq_plugin_name(test_plugin: ImputerPlugin) -> None: 31 | assert test_plugin.name() == "most_frequent" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_most_freq_plugin_type(test_plugin: ImputerPlugin) -> None: 36 | assert test_plugin.type() == "imputer" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 40 | def test_most_freq_plugin_hyperparams(test_plugin: ImputerPlugin) -> None: 41 | assert test_plugin.hyperparameter_space() == [] 42 | 43 | 44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 45 | def test_most_freq_plugin_fit_transform(test_plugin: ImputerPlugin) -> None: 46 | res = test_plugin.fit_transform( 47 | pd.DataFrame( 48 | [[1, 1, 1, 1], [np.nan, np.nan, np.nan, np.nan], [1, 2, 1, 2], [2, 2, 2, 2]] 49 | ) 50 | ) 51 | 52 | np.testing.assert_array_equal( 53 | res, [[1, 1, 1, 1], [1, 2, 1, 2], [1, 2, 1, 2], [2, 2, 2, 2]] 54 | ) 55 | -------------------------------------------------------------------------------- /tests/plugins/prediction/classifiers/test_calibration.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | # third party 5 | import numpy as np 6 | import pandas as pd 7 | import pytest 8 | from sklearn.datasets import make_blobs 9 | from sklearn.metrics import brier_score_loss 10 | from sklearn.model_selection import train_test_split 11 | 12 | # autoprognosis absolute 13 | from autoprognosis.plugins.prediction import Predictions 14 | 15 | 16 | def gen_dataset() -> Tuple[ 17 | pd.DataFrame, 18 | pd.DataFrame, 19 | pd.DataFrame, 20 | pd.DataFrame, 21 | pd.DataFrame, 22 | pd.DataFrame, 23 | ]: 24 | n_samples = 5000 25 | centers = [(-5, -5), (0, 0), (5, 5)] 26 | X, y = make_blobs( 27 | n_samples=n_samples, centers=centers, shuffle=False, random_state=42 28 | ) 29 | 30 | y[: n_samples // 2] = 0 31 | y[n_samples // 2 :] = 1 32 | sample_weight = np.random.RandomState(42).rand(y.shape[0]) 33 | 34 | X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split( 35 | X, y, sample_weight, test_size=0.9, random_state=42 36 | ) 37 | 38 | return X_train, X_test, y_train, y_test, sw_train, sw_test 39 | 40 | 41 | def supports_calibration(plugin: str) -> bool: 42 | test_plugin = Predictions().get(plugin) 43 | 44 | if len(test_plugin.hyperparameter_space()) == 0: 45 | return False 46 | 47 | for hp in test_plugin.hyperparameter_space(): 48 | if hp.name == "calibration": 49 | return True 50 | 51 | return False 52 | 53 | 54 | @pytest.mark.parametrize("plugin", Predictions().list()) 55 | def test_plugin_calibration(plugin: str) -> None: 56 | if not supports_calibration(plugin): 57 | return 58 | 59 | X_train, X_test, y_train, y_test, sw_train, sw_test = gen_dataset() 60 | 61 | test_plugin = Predictions().get(plugin, calibration=0) 62 | test_plugin.fit(X_train, y_train) 63 | 64 | prob_no_cal = test_plugin.predict_proba(X_test).to_numpy()[:, 1] 65 | 66 | score_no_cal = brier_score_loss(y_test, prob_no_cal, sample_weight=sw_test) 67 | 68 | for method in [0, 1, 2]: 69 | test_plugin = Predictions().get(plugin, calibration=method) 70 | test_plugin.fit(X_train, y_train) 71 | 72 | probs = test_plugin.predict_proba(X_test).to_numpy()[:, 1] 73 | score = brier_score_loss(y_test, probs, sample_weight=sw_test) 74 | 75 | print( 76 | f"score without calibration {score_no_cal} score with calibration {score}" 77 | ) 78 | -------------------------------------------------------------------------------- /tests/plugins/prediction/classifiers/test_clf_serde.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | # third party 5 | import numpy as np 6 | import pytest 7 | from sklearn.model_selection import train_test_split 8 | 9 | # autoprognosis absolute 10 | from autoprognosis.plugins import Predictions 11 | from autoprognosis.utils.serialization import load_model, save_model 12 | 13 | 14 | def dataset() -> Tuple[np.ndarray, np.ndarray]: 15 | rng = np.random.RandomState(1) 16 | 17 | N = 1000 18 | X = rng.randint(N, size=(N, 3)) 19 | y = rng.randint(2, size=(N)) 20 | 21 | return X, y 22 | 23 | 24 | @pytest.mark.parametrize("plugin", Predictions().list()) 25 | def test_serialization(plugin: str) -> None: 26 | X, y = dataset() 27 | 28 | X_train, X_test, y_train, y_test = train_test_split( 29 | X, y, test_size=0.2, random_state=42 30 | ) 31 | 32 | estimator = Predictions().get(plugin) 33 | 34 | estimator.fit(X_train, y_train) 35 | estimator.predict(X_test) 36 | 37 | buff = estimator.save() 38 | estimator_new = Predictions().get(plugin).load(buff) 39 | 40 | estimator_new.predict(X_test) 41 | 42 | 43 | @pytest.mark.parametrize("plugin", Predictions().list()) 44 | def test_pickle(plugin: str) -> None: 45 | X, y = dataset() 46 | 47 | X_train, X_test, y_train, y_test = train_test_split( 48 | X, y, test_size=0.2, random_state=42 49 | ) 50 | 51 | estimator = Predictions().get(plugin) 52 | 53 | estimator.fit(X_train, y_train) 54 | estimator.predict(X_test) 55 | 56 | buff = save_model(estimator) 57 | estimator_new = load_model(buff) 58 | 59 | estimator_new.predict(X_test) 60 | -------------------------------------------------------------------------------- /tests/plugins/prediction/classifiers/test_prediction_api.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | import pytest 7 | 8 | # autoprognosis absolute 9 | from autoprognosis.plugins.prediction import Predictions 10 | from autoprognosis.plugins.prediction.classifiers import ClassifierPlugin 11 | 12 | 13 | @pytest.fixture 14 | def ctx() -> Predictions: 15 | return Predictions() 16 | 17 | 18 | class Mock(ClassifierPlugin): 19 | def __init__(self) -> None: 20 | super().__init__() 21 | 22 | @staticmethod 23 | def name() -> str: 24 | return "test" 25 | 26 | @staticmethod 27 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List: 28 | return [] 29 | 30 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Mock": 31 | return self 32 | 33 | def _predict(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> pd.DataFrame: 34 | return {} 35 | 36 | def _predict_proba( 37 | self, X: pd.DataFrame, *args: Any, **kwargs: Any 38 | ) -> pd.DataFrame: 39 | return {} 40 | 41 | def save(self) -> bytes: 42 | return b"" 43 | 44 | @classmethod 45 | def load(cls, buff: bytes) -> "Mock": 46 | return cls() 47 | 48 | 49 | class Invalid: 50 | def __init__(self) -> None: 51 | pass 52 | 53 | 54 | def test_load(ctx: Predictions) -> None: 55 | assert len(ctx._plugins) == 0 56 | ctx.get("xgboost") 57 | assert len(ctx._plugins) == 1 58 | 59 | 60 | def test_list(ctx: Predictions) -> None: 61 | ctx.get("bagging") 62 | assert "bagging" in ctx.list() 63 | assert "catboost" not in ctx.list() 64 | 65 | 66 | def test_add_get(ctx: Predictions) -> None: 67 | ctx.add("mock", Mock) 68 | 69 | assert "mock" in ctx.list() 70 | 71 | mock = ctx.get("mock") 72 | 73 | assert mock.name() == "test" 74 | 75 | ctx.reload() 76 | assert "mock" not in ctx.list() 77 | 78 | 79 | def test_add_get_invalid(ctx: Predictions) -> None: 80 | with pytest.raises(ValueError): 81 | ctx.add("invalid", Invalid) 82 | 83 | assert "mock" not in ctx.list() 84 | 85 | with pytest.raises(ValueError): 86 | ctx.get("mock") 87 | 88 | 89 | def test_iter(ctx: Predictions) -> None: 90 | for v in ctx: 91 | assert ctx[v].name() != "" 92 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_aha.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.aha.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | gender="M", 10 | age=40, 11 | tchol=160, 12 | hdlc=40, 13 | sbp=180, 14 | smoking=0, 15 | diab=0, 16 | ht_treat=1, 17 | race="W", 18 | ) 19 | assert score < 1 20 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_fram.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.framingham.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | sex="F", 10 | age=60, # age value 11 | total_cholesterol=204, 12 | hdl_cholesterol=38.67, 13 | systolic_blood_pressure=160, # Systolic blood pressure 14 | smoker=True, 15 | blood_pressure_med_treatment=True, 16 | ) 17 | 18 | assert score < 1 19 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/cvd/test_qrisk3.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.cvd.qrisk3.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | gender="F", 10 | age=44, # age value 11 | b_AF=1, # bool, Atrial fibrillation 12 | b_atypicalantipsy=1, # bool, On atypical antipsychotic medication 13 | b_corticosteroids=1, # Are you on regular steroid tablets? 14 | b_impotence2=False, 15 | b_migraine=1, # bool, Do you have migraines? 16 | b_ra=0, # Rheumatoid arthritis? 17 | b_renal=0, # Chronic kidney disease (stage 3, 4 or 5)? 18 | b_semi=0, # Severe mental illness? 19 | b_sle=1, # bool, Systemic lupus erythematosus 20 | b_treatedhyp=1, # bool, On blood pressure treatment? 21 | b_type1=0, # Diabetes status: type 1 22 | b_type2=0, # Diabetes status: type 2 23 | bmi=25, # Body mass index = kg/m^2 24 | ethrisk=0, # ethnic risk 25 | fh_cvd=0, # Angina or heart attack in a 1st degree relative < 60? 26 | rati=5, # Cholesterol/HDL ratio 27 | sbp=180, # Systolic blood pressure 28 | sbps5=20, # Standard deviation of at least two most recent systolic blood pressure readings (mmHg) 29 | smoke_cat=0, # smoking category: non-smoker, ex-smoker, light-smoker(less than 10/), moderate smoker(10- 19), heavy smoker(20 or over) 30 | town=0, # Townsend deprivation score 31 | ) 32 | 33 | assert score < 1 34 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_ada.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.ada.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | gender="F", 10 | age=64, # age value 11 | fh_diab=0, # Do immediate family (mother, father, brothers or sisters) have diabetes? 12 | b_treatedhyp=1, # Do you have high blood pressure requiring treatment? 13 | b_daily_exercise=1, 14 | bmi=24, # Body mass index = kg/m^2 15 | ) 16 | 17 | assert score < 1 18 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_diabetesuk.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.diabetes_uk.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | gender="F", 10 | age=64, # age value 11 | ethrisk=0, # ethnic risk 12 | fh_diab=0, # Do immediate family (mother, father, brothers or sisters) have diabetes? 13 | waist=80, 14 | bmi=24, # Body mass index = kg/m^2 15 | b_treatedhyp=1, # Do you have high blood pressure requiring treatment? 16 | ) 17 | 18 | assert score < 1 19 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_finrisk.py: -------------------------------------------------------------------------------- 1 | # autoprognosis absolute 2 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.finrisk.model import ( 3 | inference, 4 | ) 5 | 6 | 7 | def test_sanity() -> None: 8 | score = inference( 9 | gender="F", 10 | age=64, # age value 11 | bmi=24, # Body mass index = kg/m^2 12 | waist=80, 13 | b_daily_exercise=1, 14 | b_daily_vegs=1, 15 | b_treatedhyp=1, # Do you have high blood pressure requiring treatment? 16 | b_ever_had_high_glucose=1, 17 | fh_diab=0, # Do immediate family (mother, father, brothers or sisters) have diabetes? 18 | ) 19 | 20 | assert score < 1 21 | -------------------------------------------------------------------------------- /tests/plugins/prediction/risk_estimation/benchmarks/diabetes/test_qdiab.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.prediction.risk_estimation.benchmarks.diabetes.qdiabetes.model import ( 6 | inference, 7 | ) 8 | 9 | 10 | @pytest.mark.parametrize("model", ["A", "B", "C"]) 11 | def test_sanity(model) -> None: 12 | score = inference( 13 | model, 14 | gender="M", 15 | age=84, # age value 16 | b_atypicalantipsy=1, # bool, On atypical antipsychotic medication 17 | b_corticosteroids=1, # Are you on regular steroid tablets? 18 | b_cvd=1, # Have you had a heart attack, angina, stroke or TIA? 19 | b_gestdiab=0, # Women: Do you have gestational diabetes ? 20 | b_learning=0, # Learning disabilities? 21 | b_manicschiz=0, # Manic depression or schizophrenia? 22 | b_pos=0, # Do you have polycystic ovaries? 23 | b_statin=0, # Are you on statins? 24 | b_treatedhyp=1, # Do you have high blood pressure requiring treatment? 25 | bmi=34, # Body mass index = kg/m^2 26 | ethrisk=1, # ethnic risk 27 | fh_diab=1, # Do immediate family (mother, father, brothers or sisters) have diabetes? 28 | hba1c=40, # HBA1c (mmol/mol) 29 | smoke_cat=4, # smoking category: non-smoker, ex-smoker, light-smoker(less than 10/), moderate smoker(10- 19), heavy smoker(20 or over) 30 | fbs=0.01, 31 | town=0, # Townsend deprivation score 32 | ) 33 | 34 | assert score <= 1 35 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_data_cleanup.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_data_cleanup import ( 7 | plugin, 8 | ) 9 | from autoprognosis.utils.serialization import load_model, save_model 10 | 11 | 12 | def from_api() -> PreprocessorPlugin: 13 | return Preprocessors(category="dimensionality_reduction").get("data_cleanup") 14 | 15 | 16 | def from_module() -> PreprocessorPlugin: 17 | return plugin() 18 | 19 | 20 | def from_serde() -> PreprocessorPlugin: 21 | buff = plugin().save() 22 | return plugin.load(buff) 23 | 24 | 25 | def from_pickle() -> PreprocessorPlugin: 26 | buff = save_model(plugin()) 27 | return load_model(buff) 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 32 | ) 33 | def test_variance_threshold_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 34 | assert test_plugin is not None 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 39 | ) 40 | def test_variance_threshold_plugin_name(test_plugin: PreprocessorPlugin) -> None: 41 | assert test_plugin.name() == "data_cleanup" 42 | 43 | 44 | @pytest.mark.parametrize( 45 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 46 | ) 47 | def test_variance_threshold_plugin_type(test_plugin: PreprocessorPlugin) -> None: 48 | assert test_plugin.type() == "preprocessor" 49 | assert test_plugin.subtype() == "dimensionality_reduction" 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 54 | ) 55 | def test_variance_threshold_plugin_hyperparams( 56 | test_plugin: PreprocessorPlugin, 57 | ) -> None: 58 | assert test_plugin.hyperparameter_space() == [] 59 | 60 | 61 | @pytest.mark.parametrize( 62 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 63 | ) 64 | def test_variance_threshold_plugin_fit_transform( 65 | test_plugin: PreprocessorPlugin, 66 | ) -> None: 67 | res = test_plugin.fit_transform( 68 | [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]], [1, 2, 3, 4] 69 | ) 70 | 71 | assert res.shape == (4, 2) 72 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_dr_nop.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | # autoprognosis absolute 7 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 8 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_nop import ( 9 | plugin, 10 | ) 11 | 12 | 13 | def from_api() -> PreprocessorPlugin: 14 | return Preprocessors(category="dimensionality_reduction").get("nop") 15 | 16 | 17 | def from_module() -> PreprocessorPlugin: 18 | return plugin() 19 | 20 | 21 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 22 | def test_nop_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 23 | assert test_plugin is not None 24 | 25 | 26 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 27 | def test_nop_plugin_name(test_plugin: PreprocessorPlugin) -> None: 28 | assert test_plugin.name() == "nop" 29 | 30 | 31 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 32 | def test_nop_plugin_type(test_plugin: PreprocessorPlugin) -> None: 33 | assert test_plugin.type() == "preprocessor" 34 | assert test_plugin.subtype() == "dimensionality_reduction" 35 | 36 | 37 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 38 | def test_nop_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 39 | assert test_plugin.hyperparameter_space() == [] 40 | 41 | 42 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 43 | def test_nop_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 44 | res = test_plugin.fit_transform(pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]])) 45 | 46 | np.testing.assert_array_equal(res, [[1, 1, 1, 1], [2, 2, 2, 2]]) 47 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_fast_ica.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | from sklearn.datasets import load_iris 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_fast_ica import ( 8 | plugin, 9 | ) 10 | 11 | n_components = 3 12 | 13 | 14 | def from_api() -> PreprocessorPlugin: 15 | return Preprocessors(category="dimensionality_reduction").get( 16 | "fast_ica", n_components=n_components 17 | ) 18 | 19 | 20 | def from_module() -> PreprocessorPlugin: 21 | return plugin(n_components=n_components) 22 | 23 | 24 | def from_serde() -> PreprocessorPlugin: 25 | buff = plugin(n_components=n_components).save() 26 | return plugin().load(buff) 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_fast_ica_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 31 | assert test_plugin is not None 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_fast_ica_plugin_name(test_plugin: PreprocessorPlugin) -> None: 36 | assert test_plugin.name() == "fast_ica" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 40 | def test_fast_ica_plugin_type(test_plugin: PreprocessorPlugin) -> None: 41 | assert test_plugin.type() == "preprocessor" 42 | assert test_plugin.subtype() == "dimensionality_reduction" 43 | 44 | 45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 46 | def test_fast_ica_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 47 | kwargs = {"features_count": 2} 48 | assert len(test_plugin.hyperparameter_space(**kwargs)) == 1 49 | assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components" 50 | 51 | 52 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 53 | def test_fast_ica_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 54 | X, y = load_iris(return_X_y=True) 55 | res = test_plugin.fit_transform(X, y) 56 | print(X.shape, n_components) 57 | 58 | assert res.shape == (len(X), n_components) 59 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_feature_agglomeration.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | from sklearn.datasets import load_iris 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_feature_agglomeration import ( 8 | plugin, 9 | ) 10 | 11 | n_clusters = 2 12 | 13 | 14 | def from_api() -> PreprocessorPlugin: 15 | return Preprocessors(category="dimensionality_reduction").get( 16 | "feature_agglomeration", n_clusters=n_clusters 17 | ) 18 | 19 | 20 | def from_module() -> PreprocessorPlugin: 21 | return plugin(n_clusters=n_clusters) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 25 | def test_feature_agglomeration_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 30 | def test_feature_agglomeration_plugin_name(test_plugin: PreprocessorPlugin) -> None: 31 | assert test_plugin.name() == "feature_agglomeration" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 35 | def test_feature_agglomeration_plugin_type(test_plugin: PreprocessorPlugin) -> None: 36 | assert test_plugin.type() == "preprocessor" 37 | assert test_plugin.subtype() == "dimensionality_reduction" 38 | 39 | 40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 41 | def test_feature_agglomeration_plugin_hyperparams( 42 | test_plugin: PreprocessorPlugin, 43 | ) -> None: 44 | kwargs = {"features_count": 2} 45 | assert len(test_plugin.hyperparameter_space(**kwargs)) == 1 46 | assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_clusters" 47 | 48 | 49 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 50 | def test_feature_agglomeration_plugin_fit_transform( 51 | test_plugin: PreprocessorPlugin, 52 | ) -> None: 53 | X, y = load_iris(return_X_y=True) 54 | res = test_plugin.fit_transform(X, y) 55 | 56 | assert res.shape == (len(X), n_clusters) 57 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_gauss_projection.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_gauss_projection import ( 7 | plugin, 8 | ) 9 | 10 | n_components = 3 11 | 12 | 13 | def from_api() -> PreprocessorPlugin: 14 | return Preprocessors(category="dimensionality_reduction").get( 15 | "gauss_projection", n_components=n_components 16 | ) 17 | 18 | 19 | def from_module() -> PreprocessorPlugin: 20 | return plugin(n_components=n_components) 21 | 22 | 23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 24 | def test_gauss_projection_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 25 | assert test_plugin is not None 26 | 27 | 28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 29 | def test_gauss_projection_plugin_name(test_plugin: PreprocessorPlugin) -> None: 30 | assert test_plugin.name() == "gauss_projection" 31 | 32 | 33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 34 | def test_gauss_projection_plugin_type(test_plugin: PreprocessorPlugin) -> None: 35 | assert test_plugin.type() == "preprocessor" 36 | assert test_plugin.subtype() == "dimensionality_reduction" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 40 | def test_gauss_projection_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 41 | kwargs = {"features_count": 2} 42 | assert len(test_plugin.hyperparameter_space(**kwargs)) == 1 43 | assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components" 44 | 45 | 46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 47 | def test_gauss_projection_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 48 | res = test_plugin.fit_transform( 49 | [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]] 50 | ) 51 | 52 | assert res.shape == (4, n_components) 53 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_pca.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_pca import ( 7 | plugin, 8 | ) 9 | 10 | n_components = 3 11 | 12 | 13 | def from_api() -> PreprocessorPlugin: 14 | return Preprocessors(category="dimensionality_reduction").get( 15 | "pca", n_components=n_components 16 | ) 17 | 18 | 19 | def from_module() -> PreprocessorPlugin: 20 | return plugin(n_components=n_components) 21 | 22 | 23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 24 | def test_pca_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 25 | assert test_plugin is not None 26 | 27 | 28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 29 | def test_pca_plugin_name(test_plugin: PreprocessorPlugin) -> None: 30 | assert test_plugin.name() == "pca" 31 | 32 | 33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 34 | def test_pca_plugin_type(test_plugin: PreprocessorPlugin) -> None: 35 | assert test_plugin.type() == "preprocessor" 36 | assert test_plugin.subtype() == "dimensionality_reduction" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 40 | def test_pca_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 41 | kwargs = {"features_count": 2} 42 | assert len(test_plugin.hyperparameter_space(**kwargs)) == 1 43 | assert test_plugin.hyperparameter_space(**kwargs)[0].name == "n_components" 44 | 45 | 46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 47 | def test_pca_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 48 | res = test_plugin.fit_transform( 49 | [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]] 50 | ) 51 | 52 | assert res.shape == (4, n_components) 53 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/dimensionality_reduction/test_variance_threshold.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.dimensionality_reduction.plugin_variance_threshold import ( 7 | plugin, 8 | ) 9 | from autoprognosis.utils.serialization import load_model, save_model 10 | 11 | 12 | def from_api() -> PreprocessorPlugin: 13 | return Preprocessors(category="dimensionality_reduction").get("variance_threshold") 14 | 15 | 16 | def from_module() -> PreprocessorPlugin: 17 | return plugin() 18 | 19 | 20 | def from_serde() -> PreprocessorPlugin: 21 | buff = plugin().save() 22 | return plugin.load(buff) 23 | 24 | 25 | def from_pickle() -> PreprocessorPlugin: 26 | buff = save_model(plugin()) 27 | return load_model(buff) 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 32 | ) 33 | def test_variance_threshold_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 34 | assert test_plugin is not None 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 39 | ) 40 | def test_variance_threshold_plugin_name(test_plugin: PreprocessorPlugin) -> None: 41 | assert test_plugin.name() == "variance_threshold" 42 | 43 | 44 | @pytest.mark.parametrize( 45 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 46 | ) 47 | def test_variance_threshold_plugin_type(test_plugin: PreprocessorPlugin) -> None: 48 | assert test_plugin.type() == "preprocessor" 49 | assert test_plugin.subtype() == "dimensionality_reduction" 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 54 | ) 55 | def test_variance_threshold_plugin_hyperparams( 56 | test_plugin: PreprocessorPlugin, 57 | ) -> None: 58 | assert test_plugin.hyperparameter_space() == [] 59 | 60 | 61 | @pytest.mark.parametrize( 62 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 63 | ) 64 | def test_variance_threshold_plugin_fit_transform( 65 | test_plugin: PreprocessorPlugin, 66 | ) -> None: 67 | res = test_plugin.fit_transform( 68 | [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 9, 9], [2, 2, 2, 2]], [1, 2, 3, 4] 69 | ) 70 | 71 | assert res.shape == (4, 4) 72 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_feature_normalizer.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pytest 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_feature_normalizer import ( 8 | plugin, 9 | ) 10 | from autoprognosis.utils.serialization import load_model, save_model 11 | 12 | 13 | def from_api() -> PreprocessorPlugin: 14 | return Preprocessors().get("feature_normalizer") 15 | 16 | 17 | def from_module() -> PreprocessorPlugin: 18 | return plugin() 19 | 20 | 21 | def from_serde() -> PreprocessorPlugin: 22 | buff = plugin().save() 23 | return plugin().load(buff) 24 | 25 | 26 | def from_pickle() -> PreprocessorPlugin: 27 | buff = save_model(plugin()) 28 | return load_model(buff) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 33 | ) 34 | def test_feature_normalizer_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 35 | assert test_plugin is not None 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 40 | ) 41 | def test_feature_normalizer_plugin_name(test_plugin: PreprocessorPlugin) -> None: 42 | assert test_plugin.name() == "feature_normalizer" 43 | 44 | 45 | @pytest.mark.parametrize( 46 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 47 | ) 48 | def test_feature_normalizer_plugin_type(test_plugin: PreprocessorPlugin) -> None: 49 | assert test_plugin.type() == "preprocessor" 50 | assert test_plugin.subtype() == "feature_scaling" 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 55 | ) 56 | def test_feature_normalizer_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 57 | assert test_plugin.hyperparameter_space() == [] 58 | 59 | 60 | @pytest.mark.parametrize( 61 | "test_plugin", [from_api(), from_module(), from_serde(), from_pickle()] 62 | ) 63 | def test_feature_normalizer_plugin_fit_transform( 64 | test_plugin: PreprocessorPlugin, 65 | ) -> None: 66 | res = test_plugin.fit_transform([[4, 1, 2, 2], [1, 3, 9, 3], [5, 7, 5, 1]]) 67 | 68 | np.testing.assert_array_equal( 69 | res, [[0.8, 0.2, 0.4, 0.4], [0.1, 0.3, 0.9, 0.3], [0.5, 0.7, 0.5, 0.1]] 70 | ) 71 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_fs_nop.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | import pytest 5 | 6 | # autoprognosis absolute 7 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 8 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_nop import plugin 9 | 10 | 11 | def from_api() -> PreprocessorPlugin: 12 | return Preprocessors(category="feature_scaling").get("nop") 13 | 14 | 15 | def from_module() -> PreprocessorPlugin: 16 | return plugin() 17 | 18 | 19 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 20 | def test_nop_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 21 | assert test_plugin is not None 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 25 | def test_nop_plugin_name(test_plugin: PreprocessorPlugin) -> None: 26 | assert test_plugin.name() == "nop" 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 30 | def test_nop_plugin_type(test_plugin: PreprocessorPlugin) -> None: 31 | assert test_plugin.type() == "preprocessor" 32 | assert test_plugin.subtype() == "feature_scaling" 33 | 34 | 35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 36 | def test_nop_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 37 | assert test_plugin.hyperparameter_space() == [] 38 | 39 | 40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module()]) 41 | def test_nop_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 42 | res = test_plugin.fit_transform(pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]])) 43 | 44 | np.testing.assert_array_equal(res, [[1, 1, 1, 1], [2, 2, 2, 2]]) 45 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_maxabs_scaler.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pytest 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_maxabs_scaler import ( 8 | plugin, 9 | ) 10 | 11 | 12 | def from_api() -> PreprocessorPlugin: 13 | return Preprocessors().get("maxabs_scaler") 14 | 15 | 16 | def from_module() -> PreprocessorPlugin: 17 | return plugin() 18 | 19 | 20 | def from_serde() -> PreprocessorPlugin: 21 | buff = plugin().save() 22 | return plugin().load(buff) 23 | 24 | 25 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 26 | def test_maxabs_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 27 | assert test_plugin is not None 28 | 29 | 30 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 31 | def test_maxabs_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None: 32 | assert test_plugin.name() == "maxabs_scaler" 33 | 34 | 35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 36 | def test_maxabs_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None: 37 | assert test_plugin.type() == "preprocessor" 38 | assert test_plugin.subtype() == "feature_scaling" 39 | 40 | 41 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 42 | def test_maxabs_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 43 | assert test_plugin.hyperparameter_space() == [] 44 | 45 | 46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 47 | def test_maxabs_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 48 | res = test_plugin.fit_transform( 49 | [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]] 50 | ) 51 | 52 | np.testing.assert_array_equal( 53 | res, [[0.5, -1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, -0.5]] 54 | ) 55 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_minmax_scaler.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pytest 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_minmax_scaler import ( 8 | plugin, 9 | ) 10 | 11 | 12 | def from_api() -> PreprocessorPlugin: 13 | return Preprocessors().get("minmax_scaler") 14 | 15 | 16 | def from_module() -> PreprocessorPlugin: 17 | return plugin() 18 | 19 | 20 | def from_serde() -> PreprocessorPlugin: 21 | buff = plugin().save() 22 | return plugin().load(buff) 23 | 24 | 25 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 26 | def test_minmax_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 27 | assert test_plugin is not None 28 | 29 | 30 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 31 | def test_minmax_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None: 32 | assert test_plugin.name() == "minmax_scaler" 33 | 34 | 35 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 36 | def test_minmax_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None: 37 | assert test_plugin.type() == "preprocessor" 38 | assert test_plugin.subtype() == "feature_scaling" 39 | 40 | 41 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 42 | def test_minmax_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 43 | assert test_plugin.hyperparameter_space() == [] 44 | 45 | 46 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 47 | def test_minmax_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 48 | res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]]) 49 | 50 | np.testing.assert_array_equal( 51 | res, [[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [1.0, 1.0]] 52 | ) 53 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_normal_transform.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_normal_transform import ( 7 | plugin, 8 | ) 9 | 10 | 11 | def from_api() -> PreprocessorPlugin: 12 | return Preprocessors().get("normal_transform") 13 | 14 | 15 | def from_module() -> PreprocessorPlugin: 16 | return plugin() 17 | 18 | 19 | def from_serde() -> PreprocessorPlugin: 20 | buff = plugin().save() 21 | return plugin().load(buff) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 25 | def test_normal_transform_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_normal_transform_plugin_name(test_plugin: PreprocessorPlugin) -> None: 31 | assert test_plugin.name() == "normal_transform" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_normal_transform_plugin_type(test_plugin: PreprocessorPlugin) -> None: 36 | assert test_plugin.type() == "preprocessor" 37 | assert test_plugin.subtype() == "feature_scaling" 38 | 39 | 40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 41 | def test_normal_transform_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 42 | assert test_plugin.hyperparameter_space() == [] 43 | 44 | 45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 46 | def test_normal_transform_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 47 | res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]]) 48 | 49 | assert res.shape == (4, 2) 50 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_scaler.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pytest 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 7 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_scaler import plugin 8 | 9 | 10 | def from_api() -> PreprocessorPlugin: 11 | return Preprocessors().get("scaler") 12 | 13 | 14 | def from_module() -> PreprocessorPlugin: 15 | return plugin() 16 | 17 | 18 | def from_serde() -> PreprocessorPlugin: 19 | buff = plugin().save() 20 | return plugin().load(buff) 21 | 22 | 23 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 24 | def test_scaler_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 25 | assert test_plugin is not None 26 | 27 | 28 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 29 | def test_scaler_plugin_name(test_plugin: PreprocessorPlugin) -> None: 30 | assert test_plugin.name() == "scaler" 31 | 32 | 33 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 34 | def test_scaler_plugin_type(test_plugin: PreprocessorPlugin) -> None: 35 | assert test_plugin.type() == "preprocessor" 36 | assert test_plugin.subtype() == "feature_scaling" 37 | 38 | 39 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 40 | def test_scaler_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 41 | assert test_plugin.hyperparameter_space() == [] 42 | 43 | 44 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 45 | def test_scaler_plugin_fit_transform(test_plugin: PreprocessorPlugin) -> None: 46 | res = test_plugin.fit_transform([[0, 0], [0, 0], [1, 1], [1, 1]]) 47 | 48 | np.testing.assert_array_equal( 49 | res, [[-1.0, -1.0], [-1.0, -1.0], [1.0, 1.0], [1.0, 1.0]] 50 | ) 51 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/feature_scaling/test_uniform_transform.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import pytest 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 6 | from autoprognosis.plugins.preprocessors.feature_scaling.plugin_uniform_transform import ( 7 | plugin, 8 | ) 9 | 10 | 11 | def from_api() -> PreprocessorPlugin: 12 | return Preprocessors().get("uniform_transform") 13 | 14 | 15 | def from_module() -> PreprocessorPlugin: 16 | return plugin() 17 | 18 | 19 | def from_serde() -> PreprocessorPlugin: 20 | buff = plugin().save() 21 | return plugin().load(buff) 22 | 23 | 24 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 25 | def test_uniform_transform_plugin_sanity(test_plugin: PreprocessorPlugin) -> None: 26 | assert test_plugin is not None 27 | 28 | 29 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 30 | def test_uniform_transform_plugin_name(test_plugin: PreprocessorPlugin) -> None: 31 | assert test_plugin.name() == "uniform_transform" 32 | 33 | 34 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 35 | def test_uniform_transform_plugin_type(test_plugin: PreprocessorPlugin) -> None: 36 | assert test_plugin.type() == "preprocessor" 37 | assert test_plugin.subtype() == "feature_scaling" 38 | 39 | 40 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 41 | def test_uniform_transform_plugin_hyperparams(test_plugin: PreprocessorPlugin) -> None: 42 | assert test_plugin.hyperparameter_space() == [] 43 | 44 | 45 | @pytest.mark.parametrize("test_plugin", [from_api(), from_module(), from_serde()]) 46 | def test_uniform_transform_plugin_fit_transform( 47 | test_plugin: PreprocessorPlugin, 48 | ) -> None: 49 | res = test_plugin.fit_transform([[-1, 2], [-0.5, 6], [0, 10], [1, 18]]) 50 | 51 | assert res.shape == (4, 2) 52 | -------------------------------------------------------------------------------- /tests/plugins/preprocessors/test_preprocessing_api.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Any, List 3 | 4 | # third party 5 | import pandas as pd 6 | import pytest 7 | 8 | # autoprognosis absolute 9 | from autoprognosis.plugins.preprocessors import PreprocessorPlugin, Preprocessors 10 | 11 | 12 | @pytest.fixture 13 | def ctx() -> Preprocessors: 14 | return Preprocessors() 15 | 16 | 17 | class Mock(PreprocessorPlugin): 18 | def __init__(self) -> None: 19 | super().__init__() 20 | 21 | @staticmethod 22 | def name() -> str: 23 | return "test" 24 | 25 | @staticmethod 26 | def subtype() -> str: 27 | return "feature_scaling" 28 | 29 | @staticmethod 30 | def hyperparameter_space(*args: Any, **kwargs: Any) -> List[Any]: 31 | return [] 32 | 33 | def _fit(self, X: pd.DataFrame, *args: Any, **kwargs: Any) -> "Mock": 34 | return self 35 | 36 | def _transform(self, X: pd.DataFrame) -> pd.DataFrame: 37 | return {} 38 | 39 | def save(self) -> bytes: 40 | return b"" 41 | 42 | @classmethod 43 | def load(cls, buff: bytes) -> "Mock": 44 | return cls() 45 | 46 | 47 | class Invalid: 48 | def __init__(self) -> None: 49 | pass 50 | 51 | 52 | def test_load(ctx: Preprocessors) -> None: 53 | assert len(ctx._plugins) == 0 54 | ctx.get("feature_normalizer") 55 | assert len(ctx._plugins) == 1 56 | 57 | 58 | def test_list(ctx: Preprocessors) -> None: 59 | ctx.get("nop") 60 | assert "nop" in ctx.list() 61 | 62 | 63 | def test_add_get(ctx: Preprocessors) -> None: 64 | ctx.add("mock", Mock) 65 | 66 | assert "mock" in ctx.list() 67 | 68 | mock = ctx.get("mock") 69 | 70 | assert mock.name() == "test" 71 | 72 | 73 | def test_add_get_invalid(ctx: Preprocessors) -> None: 74 | with pytest.raises(ValueError): 75 | ctx.add("invalid", Invalid) 76 | 77 | assert "mock" not in ctx.list() 78 | 79 | with pytest.raises(ValueError): 80 | ctx.get("mock") 81 | 82 | 83 | def test_iter(ctx: Preprocessors) -> None: 84 | for v in ctx: 85 | assert ctx[v].name() != "" 86 | -------------------------------------------------------------------------------- /tests/plugins/utils/test_cast.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | import pandas as pd 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.plugins.utils.cast import to_dataframe 7 | 8 | 9 | def test_cast_to_dataframe() -> None: 10 | simple_list = [[1, 2, 3]] 11 | 12 | cast = to_dataframe(simple_list) 13 | assert isinstance(cast, pd.DataFrame) 14 | 15 | cast = to_dataframe(pd.DataFrame(simple_list)) 16 | assert isinstance(cast, pd.DataFrame) 17 | 18 | cast = to_dataframe(np.array(simple_list)) 19 | assert isinstance(cast, pd.DataFrame) 20 | -------------------------------------------------------------------------------- /tests/plugins/utils/test_imputation_metrics.py: -------------------------------------------------------------------------------- 1 | # third party 2 | import numpy as np 3 | 4 | # autoprognosis absolute 5 | from autoprognosis.plugins.utils.metrics import MAE, RMSE 6 | 7 | 8 | def test_MAE() -> None: 9 | data = np.array([1, 2, 3]) 10 | data_truth = np.array([1, 2, 4]) 11 | mask = np.array([False, True, True]) 12 | assert MAE(data, data_truth, mask) == 0.5 13 | 14 | 15 | def test_RMSE() -> None: 16 | data = np.array([1, 2, 3]) 17 | data_truth = np.array([1, 2, 5]) 18 | mask = np.array([False, False, True]) 19 | assert RMSE(data, data_truth, mask) == 2 20 | -------------------------------------------------------------------------------- /tests/plugins/utils/test_simulate.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | from typing import Tuple 3 | 4 | # third party 5 | import numpy as np 6 | import pytest 7 | 8 | # autoprognosis absolute 9 | from autoprognosis.plugins.utils.simulate import simulate_nan 10 | 11 | 12 | def dataset( 13 | mechanism: str, p_miss: float, n: int = 1000, opt: str = "logistic" 14 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 15 | np.random.seed(0) 16 | 17 | p = 4 18 | 19 | mean = np.repeat(0, p) 20 | cov = 0.5 * (np.ones((p, p)) + np.eye(p)) 21 | 22 | x = np.random.multivariate_normal(mean, cov, size=n) 23 | x_simulated = simulate_nan(x, p_miss, mechanism, opt=opt) 24 | 25 | mask = x_simulated["mask"] 26 | x_miss = x_simulated["X_incomp"] 27 | 28 | return x, x_miss, mask 29 | 30 | 31 | @pytest.mark.parametrize("mechanism", ["MAR", "MNAR", "MCAR"]) 32 | @pytest.mark.parametrize("p_miss", [0.1, 0.3, 0.5]) 33 | def test_simulate_nan(mechanism: str, p_miss: float) -> None: 34 | orig, miss, mask = dataset(mechanism, p_miss) 35 | 36 | np.testing.assert_array_equal((orig != miss), mask) 37 | np.testing.assert_array_equal(np.isnan(miss), mask) 38 | 39 | 40 | @pytest.mark.parametrize("opt", ["logistic", "quantile", "selfmasked"]) 41 | def test_simulate_simulate_mnar(opt: str) -> None: 42 | orig, miss, mask = dataset("MNAR", 0.5, opt=opt) 43 | 44 | np.testing.assert_array_equal((orig != miss), mask) 45 | np.testing.assert_array_equal(np.isnan(miss), mask) 46 | -------------------------------------------------------------------------------- /tests/studies/helpers.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import datetime 3 | from typing import Any 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.hooks import Hooks 7 | 8 | 9 | class MockHook(Hooks): 10 | def __init__(self) -> None: 11 | self._started_at = datetime.datetime.utcnow() 12 | 13 | def cancel(self) -> bool: 14 | # cancel after 10 seconds 15 | time_passed = datetime.datetime.utcnow() - self._started_at 16 | 17 | return time_passed.total_seconds() > 10 18 | 19 | def heartbeat( 20 | self, topic: str, subtopic: str, event_type: str, **kwargs: Any 21 | ) -> None: 22 | pass 23 | 24 | def finish(self) -> None: 25 | pass 26 | -------------------------------------------------------------------------------- /tests/utils/test_parallel.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import multiprocessing 3 | import os 4 | 5 | # autoprognosis absolute 6 | from autoprognosis.utils.parallel import n_learner_jobs, n_opt_jobs 7 | 8 | 9 | def test_n_opt_jobs() -> None: 10 | os.environ["N_OPT_JOBS"] = "1" 11 | 12 | assert n_opt_jobs() == 1 13 | 14 | del os.environ["N_OPT_JOBS"] 15 | 16 | assert n_opt_jobs() == 2 17 | 18 | 19 | def test_n_learner_jobs() -> None: 20 | os.environ["N_LEARNER_JOBS"] = "1" 21 | 22 | assert n_learner_jobs() == 1 23 | 24 | del os.environ["N_LEARNER_JOBS"] 25 | 26 | assert n_learner_jobs() == multiprocessing.cpu_count() 27 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bin.* filter=lfs diff=lfs merge=lfs -text 5 | *.bz2 filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.model filter=lfs diff=lfs merge=lfs -text 12 | *.msgpack filter=lfs diff=lfs merge=lfs -text 13 | *.onnx filter=lfs diff=lfs merge=lfs -text 14 | *.ot filter=lfs diff=lfs merge=lfs -text 15 | *.parquet filter=lfs diff=lfs merge=lfs -text 16 | *.pb filter=lfs diff=lfs merge=lfs -text 17 | *.pt filter=lfs diff=lfs merge=lfs -text 18 | *.pth filter=lfs diff=lfs merge=lfs -text 19 | *.rar filter=lfs diff=lfs merge=lfs -text 20 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 21 | *.tar.* filter=lfs diff=lfs merge=lfs -text 22 | *.tflite filter=lfs diff=lfs merge=lfs -text 23 | *.tgz filter=lfs diff=lfs merge=lfs -text 24 | *.xz filter=lfs diff=lfs merge=lfs -text 25 | *.zip filter=lfs diff=lfs merge=lfs -text 26 | *.zstandard filter=lfs diff=lfs merge=lfs -text 27 | *tfevents* filter=lfs diff=lfs merge=lfs -text 28 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | base = "dark" 3 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/Procfile: -------------------------------------------------------------------------------- 1 | web: streamlit run app.py --server.port=${PORT:=8000} 2 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Test4 3 | emoji: 📈 4 | colorFrom: red 5 | colorTo: red 6 | sdk: streamlit 7 | app_file: app.py 8 | pinned: false 9 | --- 10 | 11 | # Configuration 12 | 13 | `title`: _string_ 14 | Display title for the Space 15 | 16 | `emoji`: _string_ 17 | Space emoji (emoji-only character allowed) 18 | 19 | `colorFrom`: _string_ 20 | Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray) 21 | 22 | `colorTo`: _string_ 23 | Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray) 24 | 25 | `sdk`: _string_ 26 | Can be either `gradio`, `streamlit`, or `static` 27 | 28 | `sdk_version` : _string_ 29 | Only applicable for `streamlit` SDK. 30 | See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions. 31 | 32 | `app_file`: _string_ 33 | Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code). 34 | Path is relative to the root of the repository. 35 | 36 | `models`: _List[string]_ 37 | HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space. 38 | Will be parsed automatically from your code if not specified here. 39 | 40 | `datasets`: _List[string]_ 41 | HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space. 42 | Will be parsed automatically from your code if not specified here. 43 | 44 | `pinned`: _boolean_ 45 | Whether the Space stays on top of your list. 46 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/app.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import subprocess 3 | import sys 4 | 5 | 6 | def install(install_pack: str) -> None: 7 | print(f"Installing {install_pack}") 8 | 9 | subprocess.check_call( 10 | [sys.executable, "-m", "pip", "install", install_pack], 11 | stdout=subprocess.DEVNULL, 12 | stderr=subprocess.DEVNULL, 13 | ) 14 | 15 | 16 | if __name__ == "__main__": 17 | # install("third_party/autoprognosis-0.1.1-py2.py3-none-any.whl") 18 | # third party 19 | from run_demonstrator import run 20 | 21 | run("app.p") 22 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/bcebere/geomloss 2 | hyperimpute==0.1.5 3 | matplotlib 4 | numpy==1.20.3 5 | pandas>=1.3 6 | scikit-learn>=1.0.2 7 | seaborn 8 | shap 9 | streamlit 10 | third_party/autoprognosis-0.1.1-py2.py3-none-any.whl 11 | torch>=1.10 12 | xgboost>=1.6.1 13 | xgbse>=0.2.3 14 | -------------------------------------------------------------------------------- /third_party/image_template/streamlit/runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.8.11 2 | -------------------------------------------------------------------------------- /tutorials/bindings/R/tutorial_classification.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | py_install("autoprognosis", pip = TRUE) 3 | 4 | pathlib <- import("pathlib", convert=FALSE) 5 | warnings <- import("warnings", convert=FALSE) 6 | autoprognosis <- import("autoprognosis", convert=FALSE) 7 | 8 | warnings$filterwarnings('ignore') 9 | 10 | Path = pathlib$Path 11 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy 12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 13 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator 14 | workspace <- Path("workspace") 15 | study_name <- "example_classifier" 16 | 17 | # Load the data 18 | data("iris") 19 | target <- "Species" 20 | 21 | # Create the AutoPrognosis Study 22 | study <- ClassifierStudy( 23 | dataset = iris, 24 | target = target, 25 | study_name=study_name, 26 | num_iter=as.integer(10), 27 | num_study_iter=as.integer(2), 28 | timeout=as.integer(60), 29 | classifiers=list("logistic_regression", "lda", "qda"), 30 | workspace=workspace 31 | ) 32 | 33 | study$run() 34 | 35 | # Load the optimal model - if exists 36 | output <- sprintf("%s/%s/model.p", workspace, study_name) 37 | 38 | model <- load_model_from_file(output) 39 | # The model is not fitted yet here 40 | 41 | # Benchmark the model 42 | targets <- c(target) 43 | X <- iris[ , !(names(iris) %in% targets)] 44 | Y = iris[, target] 45 | 46 | metrics <- evaluate_estimator(model, X, Y) 47 | 48 | # Fit the model 49 | model$fit(X, Y) 50 | 51 | sprintf("Performance metrics %s", metrics["str"]) 52 | 53 | # Predict using the model 54 | model$predict_proba(X) 55 | -------------------------------------------------------------------------------- /tutorials/bindings/R/tutorial_classification_with_missing_data.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | py_install("autoprognosis", pip = TRUE) 3 | 4 | pathlib <- import("pathlib", convert=FALSE) 5 | warnings <- import("warnings", convert=FALSE) 6 | autoprognosis <- import("autoprognosis", convert=FALSE) 7 | 8 | warnings$filterwarnings('ignore') 9 | 10 | Path = pathlib$Path 11 | ClassifierStudy = autoprognosis$studies$classifiers$ClassifierStudy 12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 13 | evaluate_estimator = autoprognosis$utils$tester$evaluate_estimator 14 | workspace <- Path("workspace") 15 | study_name <- "example_classifier_with_miss" 16 | 17 | # Load the data 18 | adult <- read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', 19 | sep = ',', fill = F, strip.white = T) 20 | colnames(adult) <- c('age', 'workclass', 'fnlwgt', 'educatoin', 21 | 'educatoin_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 22 | 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income') 23 | adult[adult == "?"] <- NA 24 | adult <- adult[ , !(names(adult) %in% c("native_country"))] 25 | 26 | df <- adult 27 | 28 | target <- "income" 29 | 30 | # Create the AutoPrognosis Study 31 | study <- ClassifierStudy( 32 | dataset = df, 33 | target = target, 34 | study_name=study_name, 35 | num_iter=as.integer(10), 36 | num_study_iter=as.integer(2), 37 | timeout=as.integer(60), 38 | classifiers=list("logistic_regression", "lda", "qda"), 39 | workspace=workspace, 40 | nan_placeholder='NA' 41 | ) 42 | 43 | study$run() 44 | 45 | # Load the optimal model - if exists 46 | output <- sprintf("%s/%s/model.p", workspace, study_name) 47 | 48 | model <- load_model_from_file(output) 49 | # The model is not fitted yet here 50 | 51 | # Benchmark the model 52 | targets <- c(target) 53 | X <- df[ , !(names(df) %in% targets)] 54 | Y = df[, target] 55 | 56 | metrics <- evaluate_estimator(model, X, Y) 57 | 58 | # Fit the model 59 | model$fit(X, Y) 60 | 61 | sprintf("Performance metrics %s", metrics["str"]) 62 | 63 | # Predict using the model 64 | model$predict_proba(X) 65 | -------------------------------------------------------------------------------- /tutorials/bindings/R/tutorial_regression.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | py_install("autoprognosis", pip = TRUE) 3 | 4 | pathlib <- import("pathlib", convert=FALSE) 5 | warnings <- import("warnings", convert=FALSE) 6 | autoprognosis <- import("autoprognosis", convert=FALSE) 7 | 8 | warnings$filterwarnings('ignore') 9 | 10 | Path = pathlib$Path 11 | RegressionStudy = autoprognosis$studies$regression$RegressionStudy 12 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 13 | evaluate_regression = autoprognosis$utils$tester$evaluate_regression 14 | 15 | workspace <- Path("workspace") 16 | study_name <- "example_regression" 17 | 18 | # Load dataset 19 | airfoil <- read.csv( 20 | url("https://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat"), 21 | sep = "\t", 22 | header = FALSE, 23 | ) 24 | 25 | target <- "V6" 26 | 27 | # Create AutoPrognosis Study 28 | study <- RegressionStudy( 29 | dataset = airfoil, 30 | target = target, 31 | study_name=study_name, 32 | num_iter=as.integer(10), 33 | num_study_iter=as.integer(2), 34 | timeout=as.integer(60), 35 | regressors=list("linear_regression", "kneighbors_regressor"), 36 | workspace=workspace 37 | ) 38 | 39 | study$run() 40 | 41 | # Load the optimal model - if exists 42 | output <- sprintf("%s/%s/model.p", workspace, study_name) 43 | 44 | model <- load_model_from_file(output) 45 | # The model is not fitted yet here 46 | 47 | # Benchmark the model 48 | targets <- c(target) 49 | X <- airfoil[ , !(names(iris) %in% targets)] 50 | Y = airfoil[, target] 51 | 52 | metrics <- evaluate_regression(model, X, Y) 53 | 54 | sprintf("Performance metrics %s", metrics["str"]) 55 | 56 | # Fit the model 57 | model$fit(X, Y) 58 | 59 | # Predict 60 | model$predict(X) 61 | -------------------------------------------------------------------------------- /tutorials/bindings/R/tutorial_survival_analysis.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | library(survival) 3 | 4 | py_install("autoprognosis", pip = TRUE) 5 | 6 | pathlib <- import("pathlib", convert=FALSE) 7 | warnings <- import("warnings", convert=FALSE) 8 | autoprognosis <- import("autoprognosis", convert=FALSE) 9 | np <- import("numpy", convert=FALSE) 10 | 11 | warnings$filterwarnings('ignore') 12 | 13 | Path = pathlib$Path 14 | RiskEstimationStudy = autoprognosis$studies$risk_estimation$RiskEstimationStudy 15 | load_model_from_file = autoprognosis$utils$serialization$load_model_from_file 16 | evaluate_survival_estimator = autoprognosis$utils$tester$evaluate_survival_estimator 17 | 18 | workspace <- Path("workspace") 19 | study_name <- "example_risk_estimation" 20 | 21 | # Load the data 22 | data(cancer, package="survival") 23 | 24 | targets <- c("dtime", "death") 25 | df <- rotterdam 26 | 27 | X <- df[ , !(names(df) %in% targets)] 28 | Y <- df[, "death"] 29 | T <- df[, "dtime"] 30 | 31 | eval_time_horizons <- list(2000) 32 | 33 | # Create the AutoPrognosis Study 34 | study <- RiskEstimationStudy( 35 | dataset = df, 36 | target = "death", 37 | time_to_event="dtime", 38 | time_horizons = eval_time_horizons, 39 | study_name=study_name, 40 | num_iter=as.integer(10), 41 | num_study_iter=as.integer(2), 42 | timeout=as.integer(60), 43 | risk_estimators=list("cox_ph", "survival_xgboost"), 44 | workspace=workspace 45 | ) 46 | 47 | study$run() 48 | 49 | # Load the optimal model - if exists 50 | output <- sprintf("%s/%s/model.p", workspace, study_name) 51 | 52 | model <- load_model_from_file(output) 53 | # The model is not fitted yet here 54 | 55 | # Benchmark the model 56 | metrics <- evaluate_survival_estimator(model, X, T, Y, eval_time_horizons) 57 | 58 | # Fit the model 59 | model$fit(X, T, Y) 60 | 61 | sprintf("Performance metrics %s", metrics["str"]) 62 | 63 | # Predict using the model 64 | model$predict(X) 65 | --------------------------------------------------------------------------------