├── .binder ├── apt.txt ├── postBuild └── requirements.txt ├── .codecov.yml ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── question.md ├── dependabot.yml └── workflows │ ├── benchmarking-files │ ├── benchmarks │ │ ├── classification.yaml │ │ └── regression.yaml │ ├── config.yaml │ ├── constraints.yaml │ ├── frameworks.yaml │ └── regressions-util.py │ ├── citation_cff.yml │ ├── dist.yml │ ├── docker-publish.yml │ ├── docs.yml │ ├── generate-baselines.yml │ ├── pre-commit-update.yml │ ├── pre-commit.yaml │ ├── pytest.yml │ ├── regressions.yml │ └── stale.yaml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CITATION.cff ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── autosklearn ├── __init__.py ├── __version__.py ├── askl_typing.py ├── automl.py ├── classification.py ├── constants.py ├── data │ ├── __init__.py │ ├── abstract_data_manager.py │ ├── feature_validator.py │ ├── target_validator.py │ ├── validation.py │ └── xy_data_manager.py ├── ensemble_building │ ├── __init__.py │ ├── builder.py │ ├── manager.py │ └── run.py ├── ensembles │ ├── __init__.py │ ├── abstract_ensemble.py │ ├── ensemble_selection.py │ ├── multiobjective_dummy_ensemble.py │ └── singlebest_ensemble.py ├── estimators.py ├── evaluation │ ├── __init__.py │ ├── abstract_evaluator.py │ ├── splitter.py │ ├── test_evaluator.py │ ├── train_evaluator.py │ └── util.py ├── experimental │ ├── __init__.py │ ├── askl2.py │ ├── balanced_accuracy │ │ ├── askl2_portfolios │ │ │ ├── RF_None_10CV_iterative_es_if.json │ │ │ ├── RF_None_3CV_iterative_es_if.json │ │ │ ├── RF_None_5CV_iterative_es_if.json │ │ │ ├── RF_None_holdout_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_10CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_3CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_5CV_iterative_es_if.json │ │ │ └── RF_SH-eta4-i_holdout_iterative_es_if.json │ │ └── askl2_training_data.json │ ├── log_loss │ │ ├── askl2_portfolios │ │ │ ├── RF_None_10CV_iterative_es_if.json │ │ │ ├── RF_None_3CV_iterative_es_if.json │ │ │ ├── RF_None_5CV_iterative_es_if.json │ │ │ ├── RF_None_holdout_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_10CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_3CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_5CV_iterative_es_if.json │ │ │ └── RF_SH-eta4-i_holdout_iterative_es_if.json │ │ └── askl2_training_data.json │ ├── roc_auc │ │ ├── askl2_portfolios │ │ │ ├── RF_None_10CV_iterative_es_if.json │ │ │ ├── RF_None_3CV_iterative_es_if.json │ │ │ ├── RF_None_5CV_iterative_es_if.json │ │ │ ├── RF_None_holdout_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_10CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_3CV_iterative_es_if.json │ │ │ ├── RF_SH-eta4-i_5CV_iterative_es_if.json │ │ │ └── RF_SH-eta4-i_holdout_iterative_es_if.json │ │ └── askl2_training_data.json │ └── selector.py ├── metalearning │ ├── __init__.py │ ├── files │ │ ├── accuracy_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── accuracy_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── accuracy_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── accuracy_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── average_precision_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── average_precision_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── average_precision_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── average_precision_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── balanced_accuracy_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── balanced_accuracy_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── balanced_accuracy_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── balanced_accuracy_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_macro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_macro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_macro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_macro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_micro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_micro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_micro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_micro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_samples_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_samples_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_samples_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_samples_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_weighted_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_weighted_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_weighted_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── f1_weighted_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── log_loss_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── log_loss_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── log_loss_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── log_loss_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_absolute_error_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_absolute_error_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_squared_error_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_squared_error_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_squared_log_error_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── mean_squared_log_error_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── median_absolute_error_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── median_absolute_error_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_macro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_macro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_macro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_macro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_micro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_micro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_micro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_micro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_samples_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_samples_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_samples_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_samples_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_weighted_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_weighted_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_weighted_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── precision_weighted_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── r2_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── r2_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_macro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_macro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_macro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_macro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_micro_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_micro_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_micro_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_micro_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_samples_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_samples_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_samples_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_samples_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_weighted_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_weighted_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_weighted_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── recall_weighted_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── roc_auc_binary.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── roc_auc_binary.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── roc_auc_multiclass.classification_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── roc_auc_multiclass.classification_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ ├── root_mean_squared_error_regression_dense │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ │ └── root_mean_squared_error_regression_sparse │ │ │ ├── algorithm_runs.arff │ │ │ ├── configurations.csv │ │ │ ├── description.txt │ │ │ ├── feature_costs.arff │ │ │ ├── feature_runstatus.arff │ │ │ ├── feature_values.arff │ │ │ └── readme.txt │ ├── input │ │ ├── __init__.py │ │ └── aslib_simple.py │ ├── metafeatures │ │ ├── __init__.py │ │ ├── metafeature.py │ │ └── metafeatures.py │ ├── metalearning │ │ ├── __init__.py │ │ ├── clustering │ │ │ ├── __init__.py │ │ │ └── gmeans.py │ │ ├── create_datasets.py │ │ ├── kNearestDatasets │ │ │ ├── __init__.py │ │ │ └── kND.py │ │ ├── meta_base.py │ │ └── metrics │ │ │ ├── __init__.py │ │ │ └── misc.py │ ├── mismbo.py │ └── optimizers │ │ ├── __init__.py │ │ ├── metalearn_optimizer │ │ ├── __init__.py │ │ ├── metalearn_optimizerDefault.cfg │ │ ├── metalearn_optimizer_parser.py │ │ └── metalearner.py │ │ └── optimizer_base.py ├── metrics │ ├── __init__.py │ └── util.py ├── pipeline │ ├── __init__.py │ ├── base.py │ ├── classification.py │ ├── components │ │ ├── __init__.py │ │ ├── base.py │ │ ├── classification │ │ │ ├── __init__.py │ │ │ ├── adaboost.py │ │ │ ├── bernoulli_nb.py │ │ │ ├── decision_tree.py │ │ │ ├── extra_trees.py │ │ │ ├── gaussian_nb.py │ │ │ ├── gradient_boosting.py │ │ │ ├── k_nearest_neighbors.py │ │ │ ├── lda.py │ │ │ ├── liblinear_svc.py │ │ │ ├── libsvm_svc.py │ │ │ ├── mlp.py │ │ │ ├── multinomial_nb.py │ │ │ ├── passive_aggressive.py │ │ │ ├── qda.py │ │ │ ├── random_forest.py │ │ │ └── sgd.py │ │ ├── data_preprocessing │ │ │ ├── __init__.py │ │ │ ├── balancing │ │ │ │ ├── __init__.py │ │ │ │ └── balancing.py │ │ │ ├── categorical_encoding │ │ │ │ ├── __init__.py │ │ │ │ ├── encoding.py │ │ │ │ ├── no_encoding.py │ │ │ │ └── one_hot_encoding.py │ │ │ ├── category_shift │ │ │ │ ├── __init__.py │ │ │ │ └── category_shift.py │ │ │ ├── feature_type.py │ │ │ ├── feature_type_categorical.py │ │ │ ├── feature_type_numerical.py │ │ │ ├── feature_type_text.py │ │ │ ├── imputation │ │ │ │ ├── __init__.py │ │ │ │ ├── categorical_imputation.py │ │ │ │ └── numerical_imputation.py │ │ │ ├── minority_coalescense │ │ │ │ ├── __init__.py │ │ │ │ ├── minority_coalescer.py │ │ │ │ └── no_coalescense.py │ │ │ ├── rescaling │ │ │ │ ├── __init__.py │ │ │ │ ├── abstract_rescaling.py │ │ │ │ ├── minmax.py │ │ │ │ ├── none.py │ │ │ │ ├── normalize.py │ │ │ │ ├── power_transformer.py │ │ │ │ ├── quantile_transformer.py │ │ │ │ ├── robust_scaler.py │ │ │ │ └── standardize.py │ │ │ ├── text_encoding │ │ │ │ ├── __init__.py │ │ │ │ └── tfidf_encoding.py │ │ │ ├── text_feature_reduction │ │ │ │ ├── __init__.py │ │ │ │ └── truncated_svd.py │ │ │ └── variance_threshold │ │ │ │ ├── __init__.py │ │ │ │ └── variance_threshold.py │ │ ├── feature_preprocessing │ │ │ ├── __init__.py │ │ │ ├── densifier.py │ │ │ ├── extra_trees_preproc_for_classification.py │ │ │ ├── extra_trees_preproc_for_regression.py │ │ │ ├── fast_ica.py │ │ │ ├── feature_agglomeration.py │ │ │ ├── kernel_pca.py │ │ │ ├── kitchen_sinks.py │ │ │ ├── liblinear_svc_preprocessor.py │ │ │ ├── no_preprocessing.py │ │ │ ├── nystroem_sampler.py │ │ │ ├── pca.py │ │ │ ├── polynomial.py │ │ │ ├── random_trees_embedding.py │ │ │ ├── select_percentile.py │ │ │ ├── select_percentile_classification.py │ │ │ ├── select_percentile_regression.py │ │ │ ├── select_rates_classification.py │ │ │ ├── select_rates_regression.py │ │ │ └── truncatedSVD.py │ │ └── regression │ │ │ ├── __init__.py │ │ │ ├── adaboost.py │ │ │ ├── ard_regression.py │ │ │ ├── decision_tree.py │ │ │ ├── extra_trees.py │ │ │ ├── gaussian_process.py │ │ │ ├── gradient_boosting.py │ │ │ ├── k_nearest_neighbors.py │ │ │ ├── liblinear_svr.py │ │ │ ├── libsvm_svr.py │ │ │ ├── mlp.py │ │ │ ├── random_forest.py │ │ │ └── sgd.py │ ├── constants.py │ ├── create_searchspace_util.py │ ├── implementations │ │ ├── CategoryShift.py │ │ ├── MinorityCoalescer.py │ │ ├── SparseOneHotEncoder.py │ │ ├── __init__.py │ │ └── util.py │ ├── regression.py │ └── util.py ├── py.typed ├── regression.py ├── requirements.txt ├── smbo.py └── util │ ├── __init__.py │ ├── common.py │ ├── dask.py │ ├── data.py │ ├── dependencies.py │ ├── disk.py │ ├── functional.py │ ├── logging.yaml │ ├── logging_.py │ ├── multiobjective.py │ ├── parallel.py │ ├── pipeline.py │ ├── progress_bar.py │ ├── single_thread_client.py │ ├── smac_wrap.py │ └── stopwatch.py ├── doc ├── Makefile ├── _templates │ ├── class.rst │ ├── class_without_init.rst │ ├── function.rst │ └── layout.html ├── api.rst ├── conf.py ├── extending.rst ├── faq.rst ├── images │ └── askl_pipeline.png ├── index.rst ├── installation.rst ├── manual.rst └── releases.rst ├── examples ├── 20_basic │ ├── README.txt │ ├── example_classification.py │ ├── example_multilabel_classification.py │ ├── example_multioutput_regression.py │ └── example_regression.py ├── 40_advanced │ ├── README.txt │ ├── __init__.py │ ├── custom_metrics.py │ ├── example_calc_multiple_metrics.py │ ├── example_debug_logging.py │ ├── example_early_stopping_and_callbacks.py │ ├── example_feature_types.py │ ├── example_get_pipeline_components.py │ ├── example_inspect_predictions.py │ ├── example_interpretable_models.py │ ├── example_metrics.py │ ├── example_multi_objective.py │ ├── example_pandas_train_test.py │ ├── example_resampling.py │ ├── example_single_configuration.py │ └── example_text_preprocessing.py ├── 60_search │ ├── README.txt │ ├── example_parallel_manual_spawning_cli.py │ ├── example_parallel_manual_spawning_python.py │ ├── example_parallel_n_jobs.py │ ├── example_random_search.py │ ├── example_sequential.py │ └── example_successive_halving.py ├── 80_extending │ ├── README.txt │ ├── example_extending_classification.py │ ├── example_extending_data_preprocessor.py │ ├── example_extending_preprocessor.py │ ├── example_extending_regression.py │ └── example_restrict_number_of_hyperparameters.py └── README.txt ├── misc ├── classifiers.csv ├── create_hyperparameter_table.py ├── create_list_of_potential_models.py ├── regressors.csv ├── support_for_imbalanced_classes.txt └── transformers.csv ├── pyproject.toml ├── requirements.txt ├── scripts ├── 01_create_commands.py ├── 02_retrieve_metadata.py ├── 03_calculate_metafeatures.py ├── 04_create_aslib_files.py ├── 2015_nips_paper │ ├── Readme.md │ ├── plot │ │ └── plot_ranks.py │ ├── run │ │ ├── remove_dataset_from_metadata.py │ │ ├── run_auto_sklearn.py │ │ ├── run_commands.sh │ │ └── score_ensemble.py │ └── setup │ │ ├── create_commands.sh │ │ └── get_tasks.py ├── readme.md ├── run_auto-sklearn_for_metadata_generation.py └── update_metadata_util.py ├── setup.py └── test ├── __init__.py ├── conftest.py ├── fixtures ├── __init__.py ├── automl.py ├── backend.py ├── caching.py ├── dask.py ├── datasets.py ├── ensemble_building.py ├── ensembles.py ├── logging.py └── metrics.py ├── mocks ├── __init__.py └── logging.py ├── test_automl ├── __init__.py ├── automl_utils.py ├── cases.py ├── test_construction.py ├── test_dataset_compression.py ├── test_dummy_predictions.py ├── test_early_stopping.py ├── test_fit.py ├── test_fit_pipeline.py ├── test_model_predict.py ├── test_pareto_front.py ├── test_performance.py ├── test_performance_over_time.py ├── test_post_fit.py ├── test_predict.py ├── test_refit.py ├── test_show_models.py └── test_sklearn_compliance.py ├── test_data ├── __init__.py ├── test_feature_validator.py ├── test_target_validator.py └── test_validation.py ├── test_ensemble_builder ├── __init__.py ├── test.test_ensemble_builder.test_ensemble_test_run_end_at │ └── .auto-sklearn │ │ └── runs │ │ ├── 0_1_0.0 │ │ └── 0.1.0.0.model │ │ ├── 0_2_0.0 │ │ ├── 0.2.0.0.model │ │ └── predictions_test_0_2_0.0.np │ │ └── 0_3_100.0 │ │ ├── 0.3.0.0.model │ │ └── 0.3.100.0.model ├── test_ensemble_builder.py ├── test_ensemble_builder_real.py ├── test_ensemble_selection.py ├── test_manager.py ├── test_multiobjective_dummy_ensemble.py ├── test_run.py └── test_singlebest_ensemble.py ├── test_estimators ├── __init__.py ├── cases.py └── test_estimators.py ├── test_evaluation ├── .datasets │ └── abalone │ │ ├── abalone_feat.type │ │ ├── abalone_public.info │ │ ├── abalone_test.data │ │ ├── abalone_test.solution │ │ ├── abalone_train.data │ │ ├── abalone_train.solution │ │ ├── abalone_valid.data │ │ └── abalone_valid.solution ├── __init__.py ├── evaluation_util.py ├── test_abstract_evaluator.py ├── test_custom_splitters.py ├── test_dummy_pipelines.py ├── test_evaluation.py ├── test_test_evaluator.py └── test_train_evaluator.py ├── test_metalearning ├── __init__.py ├── pyMetaLearn │ ├── __init__.py │ ├── datasets │ │ └── dataset.arff │ ├── metalearning │ │ ├── test_kND.py │ │ └── test_metrics.py │ ├── test_meta_base.py │ ├── test_meta_base_data │ │ ├── algorithm_runs.arff │ │ ├── configurations.csv │ │ ├── description.txt │ │ ├── feature_costs.arff │ │ ├── feature_runstatus.arff │ │ ├── feature_values.arff │ │ └── readme.txt │ ├── test_meta_features.py │ ├── test_meta_features_sparse.py │ ├── test_metalearner.py │ ├── test_metalearning_configuration.py │ └── test_optimizer_base.py └── test_metalearning.py ├── test_metric ├── __init__.py ├── test_metrics.py └── test_util.py ├── test_optimizer └── test_smbo.py ├── test_pipeline ├── __init__.py ├── components │ ├── __init__.py │ ├── classification │ │ ├── __init__.py │ │ ├── test_adaboost.py │ │ ├── test_base.py │ │ ├── test_bernoulli_nb.py │ │ ├── test_decision_tree.py │ │ ├── test_extra_trees.py │ │ ├── test_gaussian_nb.py │ │ ├── test_gradient_boosting.py │ │ ├── test_k_nearest_neighbor.py │ │ ├── test_lda.py │ │ ├── test_liblinear.py │ │ ├── test_libsvm_svc.py │ │ ├── test_mlp.py │ │ ├── test_multinomial_nb.py │ │ ├── test_passive_aggressive.py │ │ ├── test_qda.py │ │ ├── test_random_forest.py │ │ └── test_sgd.py │ ├── data_preprocessing │ │ ├── __init__.py │ │ ├── dataset.pkl │ │ ├── test_balancing.py │ │ ├── test_categorical_imputation.py │ │ ├── test_category_shift.py │ │ ├── test_data_preprocessing.py │ │ ├── test_data_preprocessing_categorical.py │ │ ├── test_data_preprocessing_feat_type.py │ │ ├── test_data_preprocessing_numerical.py │ │ ├── test_data_preprocessing_text.py │ │ ├── test_minority_coalescence.py │ │ ├── test_numerical_imputation.py │ │ ├── test_one_hot_encoding.py │ │ ├── test_scaling.py │ │ └── test_variance_threshold.py │ ├── dummy_components │ │ ├── __init__.py │ │ ├── dummy_component_1.py │ │ ├── dummy_component_2.py │ │ └── dummy_component_import.py │ ├── feature_preprocessing │ │ ├── __init__.py │ │ ├── test_NoPreprocessing.py │ │ ├── test_choice.py │ │ ├── test_densifier.py │ │ ├── test_extra_trees_classification.py │ │ ├── test_extra_trees_regression.py │ │ ├── test_fast_ica.py │ │ ├── test_feature_agglomeration.py │ │ ├── test_kernel_pca.py │ │ ├── test_kitchen_sinks.py │ │ ├── test_liblinear.py │ │ ├── test_nystroem_sampler.py │ │ ├── test_pca.py │ │ ├── test_polynomial.py │ │ ├── test_random_trees_embedding.py │ │ ├── test_select_percentile_classification.py │ │ ├── test_select_percentile_regression.py │ │ ├── test_select_rates_classification.py │ │ ├── test_select_rates_regression.py │ │ └── test_truncatedSVD.py │ ├── regression │ │ ├── __init__.py │ │ ├── test_adaboost.py │ │ ├── test_ard_regression.py │ │ ├── test_base.py │ │ ├── test_decision_tree.py │ │ ├── test_extra_trees.py │ │ ├── test_gaussian_process.py │ │ ├── test_gradient_boosting.py │ │ ├── test_k_nearest_neighbors.py │ │ ├── test_liblinear_svr.py │ │ ├── test_mlp.py │ │ ├── test_random_forests.py │ │ ├── test_sgd.py │ │ └── test_support_vector_regression.py │ └── test_base.py ├── ignored_warnings.py ├── implementations │ ├── __init__.py │ ├── test_CategoryShift.py │ ├── test_MinorityCoalescer.py │ ├── test_SparseOneHotEncoder.py │ └── test_util.py ├── test_base.py ├── test_classification.py ├── test_create_searchspace_util_classification.py └── test_regression.py ├── test_scripts ├── __init__.py └── test_metadata_generation.py ├── test_util ├── __init__.py ├── example_config.yaml ├── test_backend.py ├── test_common.py ├── test_dask.py ├── test_data.py ├── test_dependencies.py ├── test_logging.py ├── test_single_thread_client.py ├── test_stopwatch.py └── test_trials_callback.py └── util.py /.binder/apt.txt: -------------------------------------------------------------------------------- 1 | build-essential 2 | swig 3 | -------------------------------------------------------------------------------- /.binder/postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | python -m pip install .[docs,examples] 6 | 7 | # Taken from https://github.com/scikit-learn/scikit-learn/blob/22cd233e1932457947e9994285dc7fd4e93881e4/.binder/postBuild 8 | # under BSD3 license, copyright the scikit-learn contributors 9 | 10 | # This script is called in a binder context. When this script is called, we are 11 | # inside a git checkout of the automl/auto-sklearn repo. This script 12 | # generates notebooks from the auto-sklearn python examples. 13 | 14 | if [[ ! -f /.dockerenv ]]; then 15 | echo "This script was written for repo2docker and is supposed to run inside a docker container." 16 | echo "Exiting because this script can delete data if run outside of a docker container." 17 | exit 1 18 | fi 19 | 20 | # Copy content we need from the auto-sklearn repo 21 | TMP_CONTENT_DIR=/tmp/auto-sklearn 22 | mkdir -p $TMP_CONTENT_DIR 23 | cp -r examples .binder $TMP_CONTENT_DIR 24 | # delete everything in current directory including dot files and dot folders 25 | find . -delete 26 | 27 | # Generate notebooks and remove other files from examples folder 28 | GENERATED_NOTEBOOKS_DIR=examples 29 | cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR 30 | 31 | find $GENERATED_NOTEBOOKS_DIR -name 'example_*.py' -exec sphx_glr_python_to_jupyter.py '{}' + 32 | # Keep __init__.py and custom_metrics.py 33 | NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb' | grep -v 'init' | grep -v 'custom_metrics') 34 | rm -f $NON_NOTEBOOKS 35 | 36 | # Modify path to be consistent by the path given by sphinx-gallery 37 | mkdir notebooks 38 | mv $GENERATED_NOTEBOOKS_DIR notebooks/ 39 | 40 | # Put the .binder folder back (may be useful for debugging purposes) 41 | mv $TMP_CONTENT_DIR/.binder . 42 | # Final clean up 43 | rm -rf $TMP_CONTENT_DIR 44 | -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | #see https://github.com/codecov/support/wiki/Codecov-Yaml 2 | codecov: 3 | require_ci_to_pass: yes 4 | 5 | coverage: 6 | 7 | # 2 = xx.xx%, 0 = xx% 8 | precision: 2 9 | 10 | # https://docs.codecov.com/docs/commit-status 11 | status: 12 | 13 | # We want our total main project to always remain above 84% coverage, a 14 | # drop of 0.20% is allowed. It should fail if coverage couldn't be uploaded 15 | # of the CI fails otherwise 16 | project: 17 | default: 18 | target: 84% 19 | threshold: 0.20% 20 | if_not_found: failure 21 | if_ci_failed: error 22 | 23 | # The code changed by a PR should have 90% coverage. This is different from the 24 | # overall number shown above. 25 | # This encourages small PR's as they are easier to test. 26 | patch: 27 | default: 28 | target: 90% 29 | if_not_found: failure 30 | if_ci_failed: error 31 | 32 | # We upload additional information on branching with pytest-cov `--cov-branch` 33 | # This information can be used by codecov.com to increase analysis of code 34 | parsers: 35 | gcov: 36 | branch_detection: 37 | conditional: true 38 | loop: true 39 | method: true 40 | macro: false 41 | 42 | 43 | comment: 44 | layout: diff, reach 45 | behavior: default 46 | require_changes: false 47 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | show-source = True 3 | max-line-length = 88 4 | extend-exclude = 5 | venv 6 | .venv 7 | build 8 | autosklearn/automl_common 9 | extend-ignore = 10 | # No whitespace before ':' in [x : y] 11 | E203 12 | # No lambdas — too strict 13 | E731 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Describe the bug ## 11 | Please describe the bug you're experiencing is precise as possible. 12 | 13 | ## To Reproduce ## 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | ## Expected behavior ## 21 | A clear and concise description of what you expected to happen. 22 | 23 | ## Actual behavior, stacktrace or logfile ## 24 | Please describe the expected behavior here. If there is a stacktrace, please paste it here. If there is no stacktrace printed, please upload the logfile which was stored in the `tmp_folder` 25 | 26 | ## Environment and installation: ## 27 | 28 | Please give details about your installation: 29 | 30 | * OS 31 | * Is your installation in a virtual environment or conda environment? 32 | * Python version 33 | * Auto-sklearn version 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question! 4 | title: "[Question] My Question?" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Short Question Description 11 | A clear single sentence question we can try to help with? 12 | 13 | With some extra context to follow it up. This way the question is clear for both you and us without it being lost in the paragraph. 14 | Some useful information to help us with your question: 15 | * How did this question come about? 16 | * Would a small code snippet help? 17 | * What have you already looked at? 18 | 19 | Before you ask, please have a look at the 20 | * [Documentation](https://automl.github.io/auto-sklearn/master/manual.html) 21 | * If it's related but not clear, please include it in your question with a link, we'll try to make it better! 22 | * [Examples](https://automl.github.io/auto-sklearn/master/examples/index.html) 23 | * Likewise, an example can answer many questions! However we can't cover all question with examples but if you think your question would benefit from an example, let us know! 24 | * [Issues](https://github.com/automl/auto-sklearn/issues?q=label%3Aquestion+) 25 | * We try to label all questions with the label `Question`, maybe someone has already asked. If the question is about a feature, try searching more of the issues. If you find something related but doesn't directly answer your question, please link to it with #(issue number)! 26 | 27 | # System Details (if relevant) 28 | * Which version of `auto-sklearn` are you using? 29 | * Are you running this on Linux / Mac / ... ? 30 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | # This will check for updates to github actions every day 5 | # https://docs.github.com/en/enterprise-server@3.4/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 6 | - package-ecosystem: "github-actions" 7 | directory: "/" 8 | schedule: 9 | interval: "daily" 10 | -------------------------------------------------------------------------------- /.github/workflows/benchmarking-files/benchmarks/classification.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # binary, 1000 instances, 23 features, 2 labels 4 | - name: credit-g 5 | openml_task_id: 31 6 | 7 | # multi-label, 1080 instances, 857 features, 9 labels 8 | - name: cnae-9 9 | openml_task_id: 9981 10 | 11 | # binary, highly imbalanced, 2109 instances, 22 features, 2 labels 12 | - name: kc1 13 | openml_task_id: 3917 14 | -------------------------------------------------------------------------------- /.github/workflows/benchmarking-files/benchmarks/regression.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # Regression, 303 instances, 14 features, 1 target 4 | - name: cholesterol 5 | openml_task_id: 2295 6 | 7 | # Regression, 345 instances, 6 features, 1 target 8 | - name: liver-disorders 9 | openml_task_id: 52948 10 | 11 | # Regression, 1460 instances, 80 features, 1 target 12 | - name: house-prices-nominal 13 | openml_task_id: 359951 14 | -------------------------------------------------------------------------------- /.github/workflows/benchmarking-files/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | frameworks: 3 | 4 | definition_file: # this allows to add custom framework definitions (in {user}/frameworks.yaml) on top of the default ones. 5 | - '{root}/resources/frameworks.yaml' 6 | - '{user}/frameworks.yaml' 7 | 8 | benchmarks: 9 | 10 | definition_dir: # this allows to add custom benchmark definitions (under {user}/benchmarks) to the default ones. 11 | - '{root}/resources/benchmarks' 12 | - '{user}/benchmarks' 13 | 14 | constraints_file: # this allows to add custom constraint definitions (in {user}/constraints.yaml) on top of the default ones. 15 | - '{root}/resources/constraints.yaml' 16 | - '{user}/constraints.yaml' 17 | 18 | -------------------------------------------------------------------------------- /.github/workflows/benchmarking-files/constraints.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 10fold10min: 3 | folds: 10 4 | max_runtime_seconds: 600 5 | -------------------------------------------------------------------------------- /.github/workflows/benchmarking-files/frameworks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # This file is unused but just marks the structure of what is created 3 | # in github action runners. 4 | # For generating baselines we require no additional files 5 | # For creating regression tests we dynamically create this file 6 | -------------------------------------------------------------------------------- /.github/workflows/citation_cff.yml: -------------------------------------------------------------------------------- 1 | name: cffconvert 2 | 3 | on: 4 | push: 5 | paths: 6 | - CITATION.cff 7 | 8 | jobs: 9 | validate: 10 | name: "validate" 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out a copy of the repository 14 | uses: actions/checkout@v3.1.0 15 | 16 | - name: Check whether the citation metadata from CITATION.cff is valid 17 | uses: citation-file-format/cffconvert-github-action@2.0.0 18 | with: 19 | args: "--validate" 20 | -------------------------------------------------------------------------------- /.github/workflows/dist.yml: -------------------------------------------------------------------------------- 1 | name: dist-check 2 | 3 | on: 4 | # Manually triggerable in github 5 | workflow_dispatch: 6 | 7 | # When a push occurs on either of these branches 8 | push: 9 | branches: 10 | - master 11 | - development 12 | 13 | # When a push occurs on a PR that targets these branches 14 | pull_request: 15 | branches: 16 | - master 17 | - development 18 | 19 | jobs: 20 | dist: 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Check out the repo 25 | uses: actions/checkout@v3.1.0 26 | with: 27 | submodules: recursive 28 | 29 | - name: Setup Python 30 | uses: actions/setup-python@v4 31 | with: 32 | python-version: 3.8 33 | 34 | - name: Build dist 35 | run: | 36 | python setup.py sdist 37 | 38 | - name: Twine check 39 | run: | 40 | pip install twine 41 | last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1) 42 | twine check "$last_dist" --strict 43 | 44 | - name: Install dist 45 | run: | 46 | last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1) 47 | pip install $last_dist 48 | 49 | - name: PEP 561 Compliance 50 | run: | 51 | pip install mypy 52 | 53 | cd .. # required to use the installed version of autosklearn 54 | 55 | # Note this doesnt perform mypy checks, only 56 | # that the types are exported 57 | if ! mypy -c "import autosklearn"; then exit 1; fi 58 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit-update.yml: -------------------------------------------------------------------------------- 1 | name: Pre-commit auto-update 2 | 3 | on: 4 | # every day at midnight 5 | schedule: 6 | - cron: "0 0 * * *" 7 | # on demand 8 | workflow_dispatch: 9 | 10 | jobs: 11 | auto-update: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3.1.0 15 | 16 | - uses: actions/setup-python@v2 17 | 18 | - uses: browniebroke/pre-commit-autoupdate-action@main 19 | 20 | - uses: peter-evans/create-pull-request@v4 21 | with: 22 | token: ${{ secrets.GITHUB_TOKEN }} 23 | branch: update/pre-commit-hooks 24 | title: Update pre-commit hooks 25 | commit-message: "chore: update pre-commit hooks" 26 | body: Update versions of pre-commit hooks to latest version. 27 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | # Manually triggerable in github 5 | workflow_dispatch: 6 | 7 | # When a push occurs on either of these branches 8 | push: 9 | branches: 10 | - master 11 | - development 12 | 13 | # When a push occurs on a PR that targets these branches 14 | pull_request: 15 | branches: 16 | - master 17 | - development 18 | 19 | jobs: 20 | run-all-files: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3.1.0 24 | with: 25 | submodules: recursive 26 | 27 | - name: Setup Python 3.7 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: 3.7 31 | 32 | - name: Install pre-commit 33 | run: | 34 | pip install pre-commit 35 | pre-commit install 36 | 37 | - name: Run pre-commit 38 | run: | 39 | pre-commit run --all-files 40 | -------------------------------------------------------------------------------- /.github/workflows/stale.yaml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues' 2 | 3 | on: 4 | schedule: 5 | - cron: '0 7 * * *' 6 | 7 | jobs: 8 | 9 | stale: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/stale@v6 13 | with: 14 | days-before-stale: 60 15 | days-before-close: 7 16 | stale-issue-label: 'stale' 17 | only-issue-labels: 'Answered,Feedback-Required,invalid,wontfix' 18 | exempt-all-milestones: true 19 | 20 | stale-issue-message: > 21 | This issue has been automatically marked as stale because it has not had 22 | recent activity. It will be closed if no further activity occurs for the 23 | next 7 days. Thank you for your contributions. 24 | 25 | close-issue-message: > 26 | This issue has been automatically closed due to inactivity. 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Documentation 2 | doc/build/* 3 | doc/examples 4 | 5 | *.py[cod] 6 | 7 | # Exmaples 8 | # examples 40_advanced generate a tmp_folder 9 | examples/40_advanced/tmp_folder 10 | 11 | # C extensions 12 | *.c 13 | *.so 14 | 15 | # Packages 16 | *.egg 17 | *.egg-info 18 | dist 19 | build 20 | eggs 21 | parts 22 | bin 23 | var 24 | sdist 25 | develop-eggs 26 | .installed.cfg 27 | lib 28 | lib64 29 | __pycache__ 30 | 31 | # Installer logs 32 | pip-log.txt 33 | 34 | # Unit test / coverage reports 35 | .noseids 36 | nosetests.xml 37 | htmlcov 38 | .coverage 39 | .tox 40 | 41 | # Pytest cache 42 | .pytest_cache 43 | 44 | # pycharm 45 | .idea 46 | 47 | # VS code 48 | .vscode/ 49 | 50 | # Others 51 | *~ 52 | *.dat 53 | *.pstats 54 | *.stats 55 | *.npy 56 | *.zip 57 | download 58 | *.predict 59 | *.pkl 60 | num_run 61 | number_submission 62 | .pypirc 63 | dmypy.json 64 | *.log 65 | 66 | # Dask created work space 67 | dask-worker-space 68 | 69 | # Python distribution generated files 70 | .eggs 71 | 72 | # Unit test / coverage reports 73 | htmlcov/ 74 | cover 75 | coverage 76 | htmlcov 77 | .tox/ 78 | .coverage 79 | .coverage.* 80 | .cache 81 | nosetests.xml 82 | coverage.xml 83 | *,cover 84 | .hypothesis/ 85 | prof/ 86 | 87 | # Mypy 88 | .mypy_cache/ 89 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "autosklearn/automl_common"] 2 | path = autosklearn/automl_common 3 | url = https://github.com/automl/automl_common 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # If you see me, please update my `rev` field using the provided links 2 | # Click the repo and update to latest tags. 3 | # If things break on update, raise an issue 4 | repos: 5 | 6 | - repo: https://github.com/pycqa/isort 7 | rev: 5.11.5 8 | hooks: 9 | - id: isort 10 | name: isort imports autosklearn 11 | files: autosklearn/.* 12 | 13 | - id: isort 14 | name: isort imports test 15 | files: test/.* 16 | 17 | - repo: https://github.com/psf/black 18 | rev: 23.3.0 19 | hooks: 20 | - id: black 21 | name: black formatter autosklearn 22 | files: autosklearn/.* 23 | 24 | - id: black 25 | name: black formatter test 26 | files: test/.* 27 | 28 | - id: black 29 | name: black formatter examples 30 | files: examples/.* 31 | 32 | # This is disabled as most modules fail this 33 | - repo: https://github.com/pycqa/pydocstyle 34 | rev: 6.3.0 35 | hooks: 36 | - id: pydocstyle 37 | files: DISABLED # autosklearn/.* 38 | always_run: false 39 | additional_dependencies: ["toml"] # Needed to parse pyproject.toml 40 | 41 | - repo: https://github.com/pre-commit/mirrors-mypy 42 | rev: v1.2.0 43 | hooks: 44 | - id: mypy 45 | name: mypy auto-sklearn 46 | files: autosklearn/.* 47 | 48 | - repo: https://github.com/PyCQA/flake8 49 | rev: 5.0.4 50 | hooks: 51 | - id: flake8 52 | name: flake8 autosklearn 53 | files: autosklearn/.* 54 | 55 | - id: flake8 56 | name: flake8 test 57 | files: test/.* 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | WORKDIR /auto-sklearn 4 | 5 | # install linux packages 6 | RUN apt-get update 7 | 8 | # Set the locale 9 | # workaround for https://github.com/automl/auto-sklearn/issues/867 10 | RUN apt-get -y install locales 11 | RUN touch /usr/share/locale/locale.alias 12 | RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen 13 | ENV LANG en_US.UTF-8 14 | ENV LANGUAGE en_US:en 15 | ENV LC_ALL en_US.UTF-8 16 | 17 | # set environment variables to only use one core 18 | RUN export OPENBLAS_NUM_THREADS=1 19 | RUN export MKL_NUM_THREADS=1 20 | RUN export BLAS_NUM_THREADS=1 21 | RUN export OMP_NUM_THREADS=1 22 | 23 | # install build requirements 24 | RUN apt install -y python3-dev python3-pip 25 | RUN pip3 install --upgrade setuptools 26 | RUN apt install -y build-essential 27 | 28 | RUN apt install -y swig 29 | 30 | # Copy the checkout autosklearn version for installation 31 | ADD . /auto-sklearn/ 32 | 33 | # Upgrade pip then install dependencies 34 | RUN pip3 install --upgrade pip 35 | 36 | # Install 37 | RUN pip3 install "/auto-sklearn[test, examples]" 38 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2014-2021, AutoML Freiburg 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include requirements.txt 3 | include autosklearn/util/logging.yaml 4 | include autosklearn/requirements.txt 5 | include autosklearn/py.typed 6 | 7 | # Meta-data 8 | recursive-include autosklearn/metalearning/files *.arff *.csv *.txt 9 | recursive-include autosklearn/experimental *.json 10 | 11 | # Remove tests from automl_common 12 | prune autosklearn/automl_common/test 13 | exclude autosklearn/automl_common/setup.py 14 | 15 | # Include automl_common LICENSE and README 16 | include autosklearn/automl_common/LICENSE 17 | include autosklearn/automl_common/README.md 18 | 19 | -------------------------------------------------------------------------------- /autosklearn/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import os 3 | import sys 4 | 5 | import pkg_resources 6 | 7 | from autosklearn.__version__ import __version__ # noqa (imported but unused) 8 | from autosklearn.util import dependencies 9 | 10 | requirements = pkg_resources.resource_string("autosklearn", "requirements.txt") 11 | requirements = requirements.decode("utf-8") 12 | 13 | dependencies.verify_packages(requirements) 14 | 15 | if os.name != "posix": 16 | raise ValueError( 17 | "Detected unsupported operating system: %s. Please check " 18 | "the compability information of auto-sklearn: https://automl.github.io" 19 | "/auto-sklearn/stable/installation.html#windows-osx-compability" % sys.platform 20 | ) 21 | 22 | if sys.version_info < (3, 6): 23 | raise ValueError( 24 | "Unsupported python version %s found. Auto-sklearn requires Python " 25 | "3.6 or higher." % sys.version_info 26 | ) 27 | -------------------------------------------------------------------------------- /autosklearn/__version__.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | # The following line *must* be the last in the module, exactly as formatted: 4 | __version__ = "0.16.0dev" 5 | -------------------------------------------------------------------------------- /autosklearn/askl_typing.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | FEAT_TYPE_TYPE = Dict[Union[str, int], str] 4 | -------------------------------------------------------------------------------- /autosklearn/classification.py: -------------------------------------------------------------------------------- 1 | from autosklearn.estimators import AutoSklearnClassifier # noqa (imported but unused) 2 | -------------------------------------------------------------------------------- /autosklearn/constants.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | BINARY_CLASSIFICATION = 1 4 | MULTICLASS_CLASSIFICATION = 2 5 | MULTILABEL_CLASSIFICATION = 3 6 | REGRESSION = 4 7 | MULTIOUTPUT_REGRESSION = 5 8 | 9 | REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION] 10 | CLASSIFICATION_TASKS = [ 11 | BINARY_CLASSIFICATION, 12 | MULTICLASS_CLASSIFICATION, 13 | MULTILABEL_CLASSIFICATION, 14 | ] 15 | 16 | TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS 17 | 18 | TASK_TYPES_TO_STRING = { 19 | BINARY_CLASSIFICATION: "binary.classification", 20 | MULTICLASS_CLASSIFICATION: "multiclass.classification", 21 | MULTILABEL_CLASSIFICATION: "multilabel.classification", 22 | REGRESSION: "regression", 23 | MULTIOUTPUT_REGRESSION: "multioutput.regression", 24 | } 25 | 26 | STRING_TO_TASK_TYPES = { 27 | "binary.classification": BINARY_CLASSIFICATION, 28 | "multiclass.classification": MULTICLASS_CLASSIFICATION, 29 | "multilabel.classification": MULTILABEL_CLASSIFICATION, 30 | "regression": REGRESSION, 31 | "multioutput.regression": MULTIOUTPUT_REGRESSION, 32 | } 33 | -------------------------------------------------------------------------------- /autosklearn/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /autosklearn/ensemble_building/__init__.py: -------------------------------------------------------------------------------- 1 | from autosklearn.ensemble_building.builder import EnsembleBuilder 2 | from autosklearn.ensemble_building.manager import EnsembleBuilderManager 3 | from autosklearn.ensemble_building.run import Run 4 | 5 | __all__ = ["EnsembleBuilder", "EnsembleBuilderManager", "Run"] 6 | -------------------------------------------------------------------------------- /autosklearn/ensembles/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstract_ensemble import AbstractEnsemble, AbstractMultiObjectiveEnsemble 2 | from .ensemble_selection import EnsembleSelection 3 | from .multiobjective_dummy_ensemble import MultiObjectiveDummyEnsemble 4 | from .singlebest_ensemble import ( 5 | SingleBest, 6 | SingleBestFromRunhistory, 7 | SingleModelEnsemble, 8 | ) 9 | 10 | __all__ = [ 11 | "AbstractEnsemble", 12 | "AbstractMultiObjectiveEnsemble", 13 | "EnsembleSelection", 14 | "SingleBestFromRunhistory", 15 | "SingleBest", 16 | "SingleModelEnsemble", 17 | "MultiObjectiveDummyEnsemble", 18 | ] 19 | -------------------------------------------------------------------------------- /autosklearn/evaluation/util.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple, Union 2 | 3 | import multiprocessing 4 | import queue 5 | 6 | __all__ = ["read_queue"] 7 | 8 | 9 | def read_queue( 10 | queue_: multiprocessing.Queue, 11 | ) -> List[Dict[str, Union[str, bool, int, float, List, Dict, Tuple]]]: 12 | stack = [] 13 | while True: 14 | try: 15 | return_value = queue_.get(timeout=1) 16 | except queue.Empty: 17 | break 18 | 19 | # Check if there is a special placeholder value which tells us that 20 | # we don't have to wait until the queue times out in order to 21 | # retrieve the final value! 22 | if "final_queue_element" in return_value: 23 | del return_value["final_queue_element"] 24 | do_break = True 25 | else: 26 | do_break = False 27 | stack.append(return_value) 28 | if do_break: 29 | break 30 | 31 | if len(stack) == 0: 32 | raise queue.Empty 33 | else: 34 | return stack 35 | 36 | 37 | def empty_queue(queue_: multiprocessing.Queue) -> None: 38 | while True: 39 | try: 40 | queue_.get(block=False) 41 | except queue.Empty: 42 | break 43 | 44 | queue_.close() 45 | 46 | 47 | def extract_learning_curve( 48 | stack: List[Dict[str, Any]], key: Optional[str] = None 49 | ) -> List[float]: 50 | learning_curve = [] 51 | for entry in stack: 52 | if key: 53 | learning_curve.append(entry["additional_run_info"][key]) 54 | else: 55 | learning_curve.append(entry["loss"]) 56 | return list(learning_curve) 57 | -------------------------------------------------------------------------------- /autosklearn/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/experimental/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /autosklearn/metalearning/files/accuracy_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/accuracy_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/accuracy_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/accuracy_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/accuracy_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/accuracy_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/accuracy_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/average_precision_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/average_precision_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/average_precision_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/average_precision_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/average_precision_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/average_precision_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/average_precision_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/average_precision_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/balanced_accuracy_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/balanced_accuracy_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_macro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_macro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_macro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_macro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_macro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_macro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_macro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_macro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_micro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_micro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_micro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_micro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_micro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_micro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_micro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_micro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_samples_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_samples_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_samples_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_samples_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_samples_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_samples_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_samples_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_samples_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_weighted_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_weighted_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_weighted_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_weighted_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_weighted_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_weighted_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/f1_weighted_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/f1_weighted_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/log_loss_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/log_loss_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/log_loss_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/log_loss_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/log_loss_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/log_loss_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/log_loss_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_absolute_error_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_absolute_error_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_absolute_error_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_absolute_error_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_squared_error_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_squared_error_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_squared_error_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_squared_error_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_squared_log_error_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_squared_log_error_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/mean_squared_log_error_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/mean_squared_log_error_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/median_absolute_error_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/median_absolute_error_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/median_absolute_error_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/median_absolute_error_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_macro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_macro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_macro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_macro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_macro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_macro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_macro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_macro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_micro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_micro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_micro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_micro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_micro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_micro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_micro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_micro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_samples_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_samples_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_samples_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_samples_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_samples_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_samples_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_samples_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_samples_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_weighted_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_weighted_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_weighted_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_weighted_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_weighted_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_weighted_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/precision_weighted_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/precision_weighted_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/r2_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/r2_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/r2_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/r2_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_macro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_macro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_macro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_macro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_macro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_macro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_macro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_macro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_micro_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_micro_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_micro_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_micro_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_micro_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_micro_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_micro_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_micro_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_samples_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_samples_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_samples_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_samples_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_samples_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_samples_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_samples_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_samples_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_weighted_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_weighted_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_weighted_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_weighted_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_weighted_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_weighted_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/recall_weighted_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/recall_weighted_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/roc_auc_binary.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/roc_auc_binary.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/roc_auc_binary.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/roc_auc_binary.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/roc_auc_multiclass.classification_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/roc_auc_multiclass.classification_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/roc_auc_multiclass.classification_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/roc_auc_multiclass.classification_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/root_mean_squared_error_regression_dense/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/root_mean_squared_error_regression_dense/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/files/root_mean_squared_error_regression_sparse/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/files/root_mean_squared_error_regression_sparse/readme.txt -------------------------------------------------------------------------------- /autosklearn/metalearning/input/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/input/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/metafeatures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/metafeatures/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/metalearning/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /autosklearn/metalearning/metalearning/clustering/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/metalearning/clustering/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/metalearning/kNearestDatasets/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /autosklearn/metalearning/metalearning/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/metalearning/metrics/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/metalearning/metrics/misc.py: -------------------------------------------------------------------------------- 1 | import sklearn.utils 2 | 3 | 4 | def get_random_metric(random_state=1): 5 | random_state = sklearn.utils.check_random_state(random_state) 6 | 7 | def _random(d1, d2): 8 | return random_state.random_sample() 9 | 10 | return _random 11 | -------------------------------------------------------------------------------- /autosklearn/metalearning/mismbo.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | import time 4 | 5 | from autosklearn.constants import ( 6 | MULTICLASS_CLASSIFICATION, 7 | MULTILABEL_CLASSIFICATION, 8 | TASK_TYPES_TO_STRING, 9 | ) 10 | from autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner import ( 11 | MetaLearningOptimizer, 12 | ) 13 | 14 | 15 | def suggest_via_metalearning( 16 | meta_base, dataset_name, metric, task, sparse, num_initial_configurations, logger 17 | ): 18 | 19 | if task == MULTILABEL_CLASSIFICATION: 20 | task = MULTICLASS_CLASSIFICATION 21 | 22 | task = TASK_TYPES_TO_STRING[task] 23 | 24 | logger.info(task) 25 | 26 | start = time.time() 27 | ml = MetaLearningOptimizer( 28 | dataset_name=dataset_name, 29 | configuration_space=meta_base.configuration_space, 30 | meta_base=meta_base, 31 | distance="l1", 32 | seed=1, 33 | logger=logger, 34 | ) 35 | logger.info("Reading meta-data took %5.2f seconds", time.time() - start) 36 | runs = ml.metalearning_suggest_all(exclude_double_configurations=True) 37 | return runs[:num_initial_configurations] 38 | -------------------------------------------------------------------------------- /autosklearn/metalearning/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /autosklearn/metalearning/optimizers/metalearn_optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/metalearning/optimizers/metalearn_optimizer/__init__.py -------------------------------------------------------------------------------- /autosklearn/metalearning/optimizers/metalearn_optimizer/metalearn_optimizerDefault.cfg: -------------------------------------------------------------------------------- 1 | [METALEARNING] 2 | params = params.pcs 3 | distance_measure = 4 | 5 | # Absolute path to the dataset which is going to be optimized 6 | target_dataset = 7 | # A text file with one absolute path to a dataset file per line. 8 | auxiliary_datasets = 9 | # A text file in which each line corresponds to one line in the auxiliary 10 | # datasets file. Each line contains one or more absolute paths to experiment 11 | # pickles, each seperated with a whitespace 12 | experiments = 13 | 14 | optimizer_version = 0.0.1dev 15 | path_to_optimizer = ./metalearner.py 16 | -------------------------------------------------------------------------------- /autosklearn/metalearning/optimizers/metalearn_optimizer/metalearn_optimizer_parser.py: -------------------------------------------------------------------------------- 1 | def manipulate_config(config): 2 | return config 3 | -------------------------------------------------------------------------------- /autosklearn/metrics/util.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | def sanitize_array(array: np.ndarray) -> np.ndarray: 6 | """ 7 | Replace NaN and Inf (there should not be any!) 8 | :param array: 9 | :return: 10 | """ 11 | a = np.ravel(array) 12 | maxi = np.nanmax(a[np.isfinite(a)]) 13 | mini = np.nanmin(a[np.isfinite(a)]) 14 | array[array == float("inf")] = maxi 15 | array[array == float("-inf")] = mini 16 | mid = (maxi + mini) / 2 17 | array[np.isnan(array)] = mid 18 | return array 19 | -------------------------------------------------------------------------------- /autosklearn/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/balancing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/data_preprocessing/balancing/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | 6 | from autosklearn.askl_typing import FEAT_TYPE_TYPE 7 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE 8 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 9 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 10 | 11 | 12 | class NoEncoding(AutoSklearnPreprocessingAlgorithm): 13 | def __init__( 14 | self, random_state: Optional[Union[int, np.random.RandomState]] = None 15 | ) -> None: 16 | pass 17 | 18 | def fit( 19 | self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None 20 | ) -> "NoEncoding": 21 | self.preprocessor = "passthrough" 22 | self.fitted_ = True 23 | return self 24 | 25 | def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: 26 | return X 27 | 28 | @staticmethod 29 | def get_properties( 30 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 31 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 32 | return { 33 | "shortname": "no encoding", 34 | "name": "No categorical variable encoding", 35 | "handles_regression": True, 36 | "handles_classification": True, 37 | "handles_multiclass": True, 38 | "handles_multilabel": True, 39 | "handles_multioutput": True, 40 | "handles_sparse": True, 41 | "handles_dense": True, 42 | "input": (DENSE, SPARSE, UNSIGNED_DATA), 43 | "output": (INPUT,), 44 | } 45 | 46 | @staticmethod 47 | def get_hyperparameter_search_space( 48 | feat_type: Optional[FEAT_TYPE_TYPE] = None, 49 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 50 | ) -> ConfigurationSpace: 51 | cs = ConfigurationSpace() 52 | return cs 53 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/category_shift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/data_preprocessing/category_shift/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/data_preprocessing/imputation/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | 6 | from autosklearn.askl_typing import FEAT_TYPE_TYPE 7 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE 8 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 9 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 10 | 11 | 12 | class NoCoalescence(AutoSklearnPreprocessingAlgorithm): 13 | def __init__( 14 | self, 15 | feat_type: Optional[FEAT_TYPE_TYPE] = None, 16 | random_state: Optional[Union[int, np.random.RandomState]] = None, 17 | ) -> None: 18 | pass 19 | 20 | def fit( 21 | self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None 22 | ) -> PIPELINE_DATA_DTYPE: 23 | self.preprocessor = "passthrough" 24 | return self 25 | 26 | def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: 27 | return X 28 | 29 | @staticmethod 30 | def get_properties( 31 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 32 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 33 | return { 34 | "shortname": "no coalescence", 35 | "name": "No categorical variable coalescence", 36 | "handles_regression": True, 37 | "handles_classification": True, 38 | "handles_multiclass": True, 39 | "handles_multilabel": True, 40 | "handles_multioutput": True, 41 | "handles_sparse": True, 42 | "handles_dense": True, 43 | "input": (DENSE, SPARSE, UNSIGNED_DATA), 44 | "output": (INPUT,), 45 | } 46 | 47 | @staticmethod 48 | def get_hyperparameter_search_space( 49 | feat_type: Optional[FEAT_TYPE_TYPE] = None, 50 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 51 | ) -> ConfigurationSpace: 52 | cs = ConfigurationSpace() 53 | return cs 54 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union 2 | 3 | import numpy as np 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | from sklearn.base import BaseEstimator 6 | from sklearn.exceptions import NotFittedError 7 | 8 | from autosklearn.askl_typing import FEAT_TYPE_TYPE 9 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE 10 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 11 | 12 | 13 | class Rescaling(object): 14 | # Rescaling does not support fit_transform (as of 0.19.1)! 15 | def __init__( 16 | self, random_state: Optional[Union[int, np.random.RandomState]] = None 17 | ) -> None: 18 | self.preprocessor: Optional[BaseEstimator] = None 19 | 20 | def fit( 21 | self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None 22 | ) -> "AutoSklearnPreprocessingAlgorithm": 23 | 24 | if self.preprocessor is None: 25 | raise NotFittedError() 26 | 27 | self.preprocessor.fit(X) 28 | 29 | return self 30 | 31 | def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: 32 | 33 | if self.preprocessor is None: 34 | raise NotFittedError() 35 | 36 | transformed_X = self.preprocessor.transform(X) 37 | 38 | return transformed_X 39 | 40 | @staticmethod 41 | def get_hyperparameter_search_space( 42 | feat_type: Optional[FEAT_TYPE_TYPE] = None, 43 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 44 | ) -> ConfigurationSpace: 45 | cs = ConfigurationSpace() 46 | return cs 47 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/rescaling/minmax.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | 5 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE 6 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 7 | from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 8 | Rescaling, 9 | ) 10 | from autosklearn.pipeline.constants import DENSE, INPUT, SIGNED_DATA, UNSIGNED_DATA 11 | 12 | 13 | class MinMaxScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): 14 | def __init__( 15 | self, random_state: Optional[Union[int, np.random.RandomState]] = None 16 | ): 17 | from sklearn.preprocessing import MinMaxScaler 18 | 19 | self.preprocessor = MinMaxScaler(copy=False) 20 | 21 | @staticmethod 22 | def get_properties( 23 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 24 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 25 | return { 26 | "shortname": "MinMaxScaler", 27 | "name": "MinMaxScaler", 28 | "handles_missing_values": False, 29 | "handles_nominal_values": False, 30 | "handles_numerical_features": True, 31 | "prefers_data_scaled": False, 32 | "prefers_data_normalized": False, 33 | "handles_regression": True, 34 | "handles_classification": True, 35 | "handles_multiclass": True, 36 | "handles_multilabel": True, 37 | "handles_multioutput": True, 38 | "is_deterministic": True, 39 | # TODO find out if this is right! 40 | "handles_sparse": False, 41 | "handles_dense": True, 42 | "input": (DENSE, UNSIGNED_DATA), 43 | "output": (INPUT, SIGNED_DATA), 44 | "preferred_dtype": None, 45 | } 46 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/rescaling/none.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE 4 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 5 | from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 6 | Rescaling, 7 | ) 8 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 9 | 10 | 11 | class NoRescalingComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): 12 | def fit( 13 | self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None 14 | ) -> "AutoSklearnPreprocessingAlgorithm": 15 | self.preprocessor = "passthrough" 16 | return self 17 | 18 | def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: 19 | return X 20 | 21 | @staticmethod 22 | def get_properties( 23 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 24 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 25 | return { 26 | "shortname": "NoRescaling", 27 | "name": "NoRescaling", 28 | "handles_missing_values": False, 29 | "handles_nominal_values": False, 30 | "handles_numerical_features": True, 31 | "prefers_data_scaled": False, 32 | "prefers_data_normalized": False, 33 | "handles_regression": True, 34 | "handles_classification": True, 35 | "handles_multiclass": True, 36 | "handles_multilabel": True, 37 | "handles_multioutput": True, 38 | "is_deterministic": True, 39 | # TODO find out if this is right! 40 | "handles_sparse": True, 41 | "handles_dense": True, 42 | "input": (SPARSE, DENSE, UNSIGNED_DATA), 43 | "output": (INPUT,), 44 | "preferred_dtype": None, 45 | } 46 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/rescaling/normalize.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | 5 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE 6 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 7 | from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 8 | Rescaling, 9 | ) 10 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 11 | 12 | 13 | class NormalizerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): 14 | def __init__( 15 | self, random_state: Optional[Union[int, np.random.RandomState]] = None 16 | ) -> None: 17 | # Use custom implementation because sklearn implementation cannot 18 | # handle float32 input matrix 19 | from sklearn.preprocessing import Normalizer 20 | 21 | self.preprocessor = Normalizer(copy=False) 22 | 23 | @staticmethod 24 | def get_properties( 25 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 26 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 27 | return { 28 | "shortname": "Normalizer", 29 | "name": "Normalizer", 30 | "handles_missing_values": False, 31 | "handles_nominal_values": False, 32 | "handles_numerical_features": True, 33 | "prefers_data_scaled": False, 34 | "prefers_data_normalized": False, 35 | "handles_regression": True, 36 | "handles_classification": True, 37 | "handles_multiclass": True, 38 | "handles_multilabel": True, 39 | "handles_multioutput": True, 40 | "is_deterministic": True, 41 | # TODO find out if this is right! 42 | "handles_sparse": True, 43 | "handles_dense": True, 44 | "input": (SPARSE, DENSE, UNSIGNED_DATA), 45 | "output": (INPUT,), 46 | "preferred_dtype": None, 47 | } 48 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/rescaling/power_transformer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | 5 | from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE 6 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 7 | from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 8 | Rescaling, 9 | ) 10 | from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA 11 | 12 | 13 | class PowerTransformerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): 14 | def __init__( 15 | self, 16 | random_state: Optional[Union[int, np.random.RandomState]] = None, 17 | ) -> None: 18 | from sklearn.preprocessing import PowerTransformer 19 | 20 | self.preprocessor = PowerTransformer(copy=False) 21 | 22 | @staticmethod 23 | def get_properties( 24 | dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, 25 | ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: 26 | return { 27 | "shortname": "PowerTransformer", 28 | "name": "PowerTransformer", 29 | "handles_missing_values": False, 30 | "handles_nominal_values": False, 31 | "handles_numerical_features": True, 32 | "prefers_data_scaled": False, 33 | "prefers_data_normalized": False, 34 | "handles_regression": True, 35 | "handles_classification": True, 36 | "handles_multiclass": True, 37 | "handles_multilabel": True, 38 | "handles_multioutput": True, 39 | "is_deterministic": True, 40 | # TODO find out of this is right! 41 | "handles_sparse": False, 42 | "handles_dense": True, 43 | "input": (DENSE, UNSIGNED_DATA), 44 | "output": (INPUT,), 45 | "preferred_dtype": None, 46 | } 47 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/data_preprocessing/variance_threshold/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/pipeline/components/data_preprocessing/variance_threshold/__init__.py -------------------------------------------------------------------------------- /autosklearn/pipeline/components/feature_preprocessing/densifier.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | from autosklearn.askl_typing import FEAT_TYPE_TYPE 6 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 7 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 8 | 9 | 10 | class Densifier(AutoSklearnPreprocessingAlgorithm): 11 | def __init__(self, random_state=None): 12 | pass 13 | 14 | def fit(self, X, y=None): 15 | self.fitted_ = True 16 | return self 17 | 18 | def transform(self, X): 19 | from scipy import sparse 20 | 21 | if sparse.issparse(X): 22 | return X.todense().getA() 23 | else: 24 | return X 25 | 26 | @staticmethod 27 | def get_properties(dataset_properties=None): 28 | return { 29 | "shortname": "RandomTreesEmbedding", 30 | "name": "Random Trees Embedding", 31 | "handles_regression": True, 32 | "handles_classification": True, 33 | "handles_multiclass": True, 34 | "handles_multilabel": True, 35 | "handles_multioutput": True, 36 | "is_deterministic": True, 37 | "input": (SPARSE, UNSIGNED_DATA), 38 | "output": (DENSE, INPUT), 39 | } 40 | 41 | @staticmethod 42 | def get_hyperparameter_search_space( 43 | feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None 44 | ): 45 | cs = ConfigurationSpace() 46 | return cs 47 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | from autosklearn.askl_typing import FEAT_TYPE_TYPE 6 | from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm 7 | from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA 8 | 9 | 10 | class NoPreprocessing(AutoSklearnPreprocessingAlgorithm): 11 | def __init__(self, random_state): 12 | """This preprocessors does not change the data""" 13 | 14 | def fit(self, X, Y=None): 15 | self.preprocessor = "passthrough" 16 | self.fitted_ = True 17 | return self 18 | 19 | def transform(self, X): 20 | if self.preprocessor is None: 21 | raise NotImplementedError() 22 | return X 23 | 24 | @staticmethod 25 | def get_properties(dataset_properties=None): 26 | return { 27 | "shortname": "no", 28 | "name": "NoPreprocessing", 29 | "handles_regression": True, 30 | "handles_classification": True, 31 | "handles_multiclass": True, 32 | "handles_multilabel": True, 33 | "handles_multioutput": True, 34 | "is_deterministic": True, 35 | "input": (SPARSE, DENSE, UNSIGNED_DATA), 36 | "output": (INPUT,), 37 | } 38 | 39 | @staticmethod 40 | def get_hyperparameter_search_space( 41 | feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None 42 | ): 43 | cs = ConfigurationSpace() 44 | return cs 45 | -------------------------------------------------------------------------------- /autosklearn/pipeline/components/feature_preprocessing/select_percentile.py: -------------------------------------------------------------------------------- 1 | class SelectPercentileBase(object): 2 | def fit(self, X, y): 3 | import sklearn.feature_selection 4 | 5 | self.preprocessor = sklearn.feature_selection.SelectPercentile( 6 | score_func=self.score_func, percentile=self.percentile 7 | ) 8 | 9 | self.preprocessor.fit(X, y) 10 | return self 11 | 12 | def transform(self, X): 13 | if self.preprocessor is None: 14 | raise NotImplementedError() 15 | Xt = self.preprocessor.transform(X) 16 | if Xt.shape[1] == 0: 17 | raise ValueError("%s removed all features." % self.__class__.__name__) 18 | return Xt 19 | -------------------------------------------------------------------------------- /autosklearn/pipeline/constants.py: -------------------------------------------------------------------------------- 1 | """Constants which are used as dataset properties. 2 | """ 3 | BINARY_CLASSIFICATION = 1 4 | MULTICLASS_CLASSIFICATION = 2 5 | MULTILABEL_CLASSIFICATION = 3 6 | REGRESSION = 4 7 | MULTIOUTPUT_REGRESSION = 5 8 | 9 | REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION] 10 | CLASSIFICATION_TASKS = [ 11 | BINARY_CLASSIFICATION, 12 | MULTICLASS_CLASSIFICATION, 13 | MULTILABEL_CLASSIFICATION, 14 | ] 15 | 16 | TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS 17 | 18 | TASK_TYPES_TO_STRING = { 19 | BINARY_CLASSIFICATION: "binary.classification", 20 | MULTICLASS_CLASSIFICATION: "multiclass.classification", 21 | MULTILABEL_CLASSIFICATION: "multilabel.classification", 22 | REGRESSION: "regression", 23 | MULTIOUTPUT_REGRESSION: "multioutput.regression", 24 | } 25 | 26 | STRING_TO_TASK_TYPES = { 27 | "binary.classification": BINARY_CLASSIFICATION, 28 | "multiclass.classification": MULTICLASS_CLASSIFICATION, 29 | "multilabel.classification": MULTILABEL_CLASSIFICATION, 30 | "regression": REGRESSION, 31 | "multioutput.regression": MULTIOUTPUT_REGRESSION, 32 | } 33 | 34 | DENSE = 6 35 | SPARSE = 7 36 | PREDICTIONS = 8 37 | INPUT = 9 38 | 39 | SIGNED_DATA = 10 40 | UNSIGNED_DATA = 11 41 | 42 | DATASET_PROPERTIES_TO_STRING = { 43 | DENSE: "dense", 44 | SPARSE: "sparse", 45 | PREDICTIONS: "predictions", 46 | INPUT: "input", 47 | SIGNED_DATA: "signed data", 48 | UNSIGNED_DATA: "unsigned data", 49 | } 50 | -------------------------------------------------------------------------------- /autosklearn/pipeline/implementations/CategoryShift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | from sklearn.base import BaseEstimator, TransformerMixin 4 | from sklearn.utils import check_array 5 | 6 | 7 | class CategoryShift(BaseEstimator, TransformerMixin): 8 | """Add 3 to every category.""" 9 | 10 | def __init__(self, random_state=None): 11 | self.random_state = random_state 12 | 13 | def _convert_and_check_X(self, X): 14 | X_data = X.data if sparse.issparse(X) else X 15 | 16 | # Check if data is numeric and positive 17 | if X_data.dtype.kind not in set("buif") or np.nanmin(X_data) < 0: 18 | raise ValueError( 19 | "Categories should be non-negative numbers. " 20 | "NOTE: floats will be casted to integers." 21 | ) 22 | 23 | # Use check_array to make sure we are using the right kind of sparse array 24 | # Notice that we cannot convert the array to integer right now. That would get 25 | # rid of the np.nans and we need them later on for the imputation. 26 | X = check_array(X, accept_sparse="csc", force_all_finite=False, copy=True) 27 | return X 28 | 29 | def fit(self, X, y=None): 30 | self._convert_and_check_X(X) 31 | return self 32 | 33 | def transform(self, X): 34 | X = self._convert_and_check_X(X) 35 | # Increment everything by three to account for the fact that 36 | # np.NaN will get an index of two, and coalesced values will get index of 37 | # one, index of zero is not assigned to also work with sparse data 38 | X_data = X.data if sparse.issparse(X) else X 39 | X_data += 3 40 | return X 41 | -------------------------------------------------------------------------------- /autosklearn/pipeline/implementations/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /autosklearn/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/autosklearn/py.typed -------------------------------------------------------------------------------- /autosklearn/regression.py: -------------------------------------------------------------------------------- 1 | from autosklearn.estimators import AutoSklearnRegressor # noqa (imported but unused) 2 | -------------------------------------------------------------------------------- /autosklearn/requirements.txt: -------------------------------------------------------------------------------- 1 | ../requirements.txt -------------------------------------------------------------------------------- /autosklearn/util/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import re 3 | 4 | SUBPATTERN = r"((?P==|>=|>|<)(?P(\d+)?(\.[a-zA-Z0-9]+)?(\.[a-zA-Z0-9]+)?))" # noqa: E501 5 | RE_PATTERN = re.compile( 6 | r"^(?P[\w\-]+)%s?(,%s)?$" % (SUBPATTERN % (1, 1), SUBPATTERN % (2, 2)) 7 | ) 8 | -------------------------------------------------------------------------------- /autosklearn/util/common.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | import os 4 | import warnings 5 | 6 | import numpy as np 7 | 8 | __all__ = ["check_pid", "warn_if_not_float"] 9 | 10 | 11 | def warn_if_not_float(X: np.ndarray, estimator: str = "This algorithm") -> bool: 12 | """Warning utility function to check that data type is floating point. 13 | Returns True if a warning was raised (i.e. the input is not float) and 14 | False otherwise, for easier input validation. 15 | """ 16 | if not isinstance(estimator, str): 17 | estimator = estimator.__class__.__name__ 18 | if X.dtype.kind != "f": 19 | warnings.warn( 20 | "%s assumes floating point values as input, " 21 | "got %s" % (estimator, X.dtype) 22 | ) 23 | return True 24 | return False 25 | 26 | 27 | def check_pid(pid: int) -> bool: 28 | """Check For the existence of a unix pid.""" 29 | try: 30 | os.kill(pid, 0) 31 | except OSError: 32 | return False 33 | else: 34 | return True 35 | 36 | 37 | def check_true(p: str) -> bool: 38 | if p in ("True", "true", 1, True): 39 | return True 40 | return False 41 | 42 | 43 | def check_false(p: str) -> bool: 44 | if p in ("False", "false", 0, False): 45 | return True 46 | return False 47 | 48 | 49 | def check_none(p: str) -> bool: 50 | if p in ("None", "none", None): 51 | return True 52 | return False 53 | 54 | 55 | def check_for_bool(p: str) -> bool: 56 | if check_false(p): 57 | return False 58 | elif check_true(p): 59 | return True 60 | else: 61 | raise ValueError("%s is not a bool" % str(p)) 62 | -------------------------------------------------------------------------------- /autosklearn/util/logging.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | disable_existing_loggers: False 4 | formatters: 5 | simple: 6 | format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s' 7 | 8 | handlers: 9 | console: 10 | class: logging.StreamHandler 11 | level: WARNING 12 | formatter: simple 13 | stream: ext://sys.stdout 14 | 15 | file_handler: 16 | class: logging.FileHandler 17 | level: DEBUG 18 | formatter: simple 19 | filename: autosklearn.log 20 | 21 | distributed_logfile: 22 | class: logging.FileHandler 23 | level: DEBUG 24 | formatter: simple 25 | filename: distributed.log 26 | 27 | root: 28 | level: DEBUG 29 | handlers: [console, file_handler] 30 | 31 | loggers: 32 | autosklearn.metalearning: 33 | level: DEBUG 34 | handlers: [file_handler] 35 | 36 | autosklearn.automl_common.utils.backend: 37 | level: DEBUG 38 | handlers: [file_handler] 39 | propagate: no 40 | 41 | smac.intensification.intensification.Intensifier: 42 | level: INFO 43 | handlers: [file_handler, console] 44 | 45 | smac.optimizer.local_search.LocalSearch: 46 | level: INFO 47 | handlers: [file_handler, console] 48 | 49 | smac.optimizer.smbo.SMBO: 50 | level: INFO 51 | handlers: [file_handler, console] 52 | 53 | EnsembleBuilder: 54 | level: DEBUG 55 | handlers: [file_handler, console] 56 | 57 | distributed: 58 | level: DEBUG 59 | handlers: [distributed_logfile] 60 | -------------------------------------------------------------------------------- /autosklearn/util/multiobjective.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | import numpy as np 4 | 5 | 6 | def pareto_front(values: np.ndarray, *, is_loss: bool = True) -> np.ndarray: 7 | """Calculate the pareto front 8 | 9 | source from: https://stackoverflow.com/a/40239615 10 | 11 | Note 12 | ---- 13 | Works on the assumption that every value is either something to minimize or 14 | something to maximize, based on ``is_loss``. 15 | 16 | Parameters 17 | ---------- 18 | values: np.ndarray [n_models, n_metrics] 19 | The value for each of the metrics 20 | 21 | is_loss: bool = True 22 | Whether the metrics are a loss or a score 23 | 24 | Returns 25 | ------- 26 | np.ndarray 27 | A boolean mask where true indicates if the model on the pareto front 28 | """ 29 | op = operator.lt if is_loss else operator.gt 30 | 31 | is_efficient = np.ones(values.shape[0], dtype=bool) 32 | for i, c in enumerate(values): 33 | if is_efficient[i]: 34 | # Keep any point with a lower cost 35 | is_efficient[is_efficient] = np.any(op(values[is_efficient], c), axis=1) 36 | 37 | # And keep self 38 | is_efficient[i] = True 39 | 40 | return is_efficient 41 | -------------------------------------------------------------------------------- /autosklearn/util/parallel.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import sys 3 | 4 | 5 | def preload_modules(context: multiprocessing.context.BaseContext) -> None: 6 | """Attempt to preload modules when using forkserver""" 7 | # NOTE: preloading and docstring 8 | # 9 | # This is just a best guess at why this is used, coming from this blogpost 10 | # https://bnikolic.co.uk/blog/python/parallelism/2019/11/13/python-forkserver-preload.html 11 | # Ideally we should identify subprocesses that get run with this and try limit the 12 | # necessity to use all of these modules 13 | # 14 | # @eddiebergman 15 | all_loaded_modules = list(sys.modules.keys()) 16 | preload = [ 17 | loaded_module 18 | for loaded_module in all_loaded_modules 19 | if loaded_module.split(".")[0] 20 | in ( 21 | "smac", 22 | "autosklearn", 23 | "numpy", 24 | "scipy", 25 | "pandas", 26 | "pynisher", 27 | "sklearn", 28 | "ConfigSpace", 29 | ) 30 | and "logging" not in loaded_module 31 | ] 32 | context.set_forkserver_preload(preload) 33 | -------------------------------------------------------------------------------- /autosklearn/util/smac_wrap.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Callable, Union 4 | 5 | from smac.callbacks import IncorporateRunResultCallback 6 | from smac.optimizer.smbo import SMBO 7 | from smac.runhistory.runhistory import RunInfo, RunValue 8 | 9 | SMACCallback = Callable[[SMBO, RunInfo, RunValue, float], Union[bool, None]] 10 | 11 | 12 | class SmacRunCallback(IncorporateRunResultCallback): 13 | def __init__(self, f: SMACCallback): 14 | self.f = f 15 | 16 | def __call__( 17 | self, 18 | smbo: SMBO, 19 | run_info: RunInfo, 20 | result: RunValue, 21 | time_left: float, 22 | ) -> bool | None: 23 | """ 24 | Parameters 25 | ---------- 26 | smbo: SMBO 27 | The SMAC SMBO object 28 | 29 | run_info: RunInfo 30 | Information about the run completed 31 | 32 | result: RunValue 33 | The results of the run 34 | 35 | time_left: float 36 | How much time is left for the remaining runs 37 | 38 | Returns 39 | ------- 40 | bool | None 41 | If False is returned, the optimization loop will stop 42 | """ 43 | return self.f(smbo, run_info, result, time_left) 44 | -------------------------------------------------------------------------------- /doc/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | -------------------------------------------------------------------------------- /doc/_templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /doc/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {# Custom CSS overrides #} 4 | {# set bootswatch_css_custom = ['_static/my-styles.css'] #} 5 | 6 | {# Add github banner (from: https://github.com/blog/273-github-ribbons). #} 7 | {% block header %} 8 | {{ super() }} 9 | 15 | 22 | {% endblock %} 23 | 24 | -------------------------------------------------------------------------------- /doc/images/askl_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/doc/images/askl_pipeline.png -------------------------------------------------------------------------------- /examples/20_basic/README.txt: -------------------------------------------------------------------------------- 1 | .. _basic_examples: 2 | 3 | ============== 4 | Basic Examples 5 | ============== 6 | 7 | Examples for basic classification, regression, multi-output regression, and multi-label classification datasets. 8 | -------------------------------------------------------------------------------- /examples/20_basic/example_classification.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | """ 3 | ============== 4 | Classification 5 | ============== 6 | 7 | The following example shows how to fit a simple classification model with 8 | *auto-sklearn*. 9 | """ 10 | from pprint import pprint 11 | 12 | import sklearn.datasets 13 | import sklearn.metrics 14 | 15 | import autosklearn.classification 16 | 17 | 18 | ############################################################################ 19 | # Data Loading 20 | # ============ 21 | 22 | X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) 23 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( 24 | X, y, random_state=1 25 | ) 26 | 27 | ############################################################################ 28 | # Build and fit a classifier 29 | # ========================== 30 | 31 | automl = autosklearn.classification.AutoSklearnClassifier( 32 | time_left_for_this_task=120, 33 | per_run_time_limit=30, 34 | tmp_folder="/tmp/autosklearn_classification_example_tmp", 35 | ) 36 | automl.fit(X_train, y_train, dataset_name="breast_cancer") 37 | 38 | ############################################################################ 39 | # View the models found by auto-sklearn 40 | # ===================================== 41 | 42 | print(automl.leaderboard()) 43 | 44 | ############################################################################ 45 | # Print the final ensemble constructed by auto-sklearn 46 | # ==================================================== 47 | 48 | pprint(automl.show_models(), indent=4) 49 | 50 | ########################################################################### 51 | # Get the Score of the final ensemble 52 | # =================================== 53 | 54 | predictions = automl.predict(X_test) 55 | print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions)) 56 | -------------------------------------------------------------------------------- /examples/40_advanced/README.txt: -------------------------------------------------------------------------------- 1 | .. _advanced_examples: 2 | 3 | ================= 4 | Advanced Examples 5 | ================= 6 | 7 | Examples on customizing Auto-sklearn to ones use case by changing the 8 | metric to optimize, the train-validation split, giving feature types, 9 | using pandas dataframes as input and inspecting the results of the search 10 | procedure. 11 | -------------------------------------------------------------------------------- /examples/40_advanced/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/examples/40_advanced/__init__.py -------------------------------------------------------------------------------- /examples/40_advanced/custom_metrics.py: -------------------------------------------------------------------------------- 1 | """Custom metrics to be used by example_metrics.py 2 | 3 | They reside in a different file so they can be used by Auto-sklearn.""" 4 | 5 | import numpy as np 6 | 7 | 8 | ############################################################################ 9 | # Custom metrics definition 10 | # ========================= 11 | 12 | 13 | def accuracy(solution, prediction): 14 | # custom function defining accuracy 15 | return np.mean(solution == prediction) 16 | 17 | 18 | def error(solution, prediction): 19 | # custom function defining error 20 | return np.mean(solution != prediction) 21 | 22 | 23 | def accuracy_wk(solution, prediction, dummy): 24 | # custom function defining accuracy and accepting an additional argument 25 | assert dummy is None 26 | return np.mean(solution == prediction) 27 | 28 | 29 | def error_wk(solution, prediction, dummy): 30 | # custom function defining error and accepting an additional argument 31 | assert dummy is None 32 | return np.mean(solution != prediction) 33 | -------------------------------------------------------------------------------- /examples/60_search/README.txt: -------------------------------------------------------------------------------- 1 | .. _search_examples: 2 | 3 | =============== 4 | Search Examples 5 | =============== 6 | 7 | Examples of changing the search procedure of Auto-sklearn. 8 | -------------------------------------------------------------------------------- /examples/60_search/example_parallel_n_jobs.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | """ 3 | =================================== 4 | Parallel Usage on a single machine 5 | =================================== 6 | 7 | *Auto-sklearn* uses 8 | `dask.distributed _ 9 | for parallel optimization. 10 | 11 | This example shows how to start *Auto-sklearn* to use multiple cores on a 12 | single machine. Using this mode, *Auto-sklearn* starts a dask cluster, 13 | manages the workers and takes care of shutting down the cluster once the 14 | computation is done. 15 | To run *Auto-sklearn* on multiple machines check the example 16 | :ref:`sphx_glr_examples_60_search_example_parallel_manual_spawning_cli.py`. 17 | """ 18 | 19 | import sklearn.model_selection 20 | import sklearn.datasets 21 | import sklearn.metrics 22 | 23 | import autosklearn.classification 24 | 25 | 26 | ############################################################################ 27 | # Data Loading 28 | # ============ 29 | X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) 30 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( 31 | X, y, random_state=1 32 | ) 33 | 34 | ############################################################################ 35 | # Build and fit a classifier 36 | # ========================== 37 | # 38 | # To use ``n_jobs_`` we must guard the code 39 | if __name__ == "__main__": 40 | 41 | automl = autosklearn.classification.AutoSklearnClassifier( 42 | time_left_for_this_task=120, 43 | per_run_time_limit=30, 44 | tmp_folder="/tmp/autosklearn_parallel_1_example_tmp", 45 | n_jobs=4, 46 | # Each one of the 4 jobs is allocated 3GB 47 | memory_limit=3072, 48 | seed=5, 49 | ) 50 | automl.fit(X_train, y_train, dataset_name="breast_cancer") 51 | 52 | # Print statistics about the auto-sklearn run such as number of 53 | # iterations, number of models failed with a time out. 54 | print(automl.sprint_statistics()) 55 | -------------------------------------------------------------------------------- /examples/80_extending/README.txt: -------------------------------------------------------------------------------- 1 | .. _extension_examples: 2 | 3 | ================== 4 | Extension Examples 5 | ================== 6 | 7 | Examples of extending Auto-sklearn with custom components. 8 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | ======== 4 | Examples 5 | ======== 6 | 7 | Practical examples for using *auto-sklearn*. 8 | -------------------------------------------------------------------------------- /misc/create_list_of_potential_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import inspect 4 | import importlib 5 | 6 | import sklearn.base 7 | 8 | files = glob.glob( 9 | os.path.join(os.path.dirname(sklearn.__file__), "**/*.py"), recursive=True 10 | ) 11 | 12 | 13 | def find_all(cls): 14 | found = set() 15 | for file in files: 16 | parts = file.split("/") 17 | parts[-1] = parts[-1].replace(".py", "") 18 | sklearn_dir = parts.index("sklearn") 19 | name = ".".join(parts[sklearn_dir:]) 20 | module = importlib.import_module(name) 21 | for member in module.__dict__.values(): 22 | if not inspect.isclass(member): 23 | continue 24 | if issubclass(member, cls): 25 | found.add(member) 26 | print("#####") 27 | found = list(found) 28 | found.sort(key=lambda t: str(t)) 29 | for f in found: 30 | print(f) 31 | return found 32 | 33 | 34 | # classifiers = find_all(sklearn.base.ClassifierMixin) 35 | # regressors = find_all(sklearn.base.RegressorMixin) 36 | preprocs = find_all(sklearn.base.TransformerMixin) 37 | -------------------------------------------------------------------------------- /misc/support_for_imbalanced_classes.txt: -------------------------------------------------------------------------------- 1 | AdaBoost: Sample weights. If None, the sample weights are initialized to 1 / n_samples. 2 | Bernoulli_NB: Weights applied to individual samples (1. for unweighted). 3 | DecisionTree: Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node. 4 | ExtraTrees: Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node. 5 | GaussianNB: - 6 | GB: - 7 | kNN: - 8 | LDA: priors : array, optional, shape = [n_classes] ? 9 | LibLinear: class_weight : {dict, ‘auto’}, optional 10 | SVC: class_weight : {dict, ‘auto’}, optional; Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. 11 | MultinomialNB: - 12 | PA: sample_weight : array-like, shape = [n_samples], optional 13 | QDA: - 14 | RF: sample_weight : array-like, shape = [n_samples] or None 15 | RidgeClassifier:class_weight : dict, optional 16 | SGD :class_weight : dict, {class_label 17 | 18 | 19 | 20 | 21 | Preprocessors: 22 | 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | typing_extensions 3 | distro 4 | 5 | numpy>=1.9.0 6 | scipy>=1.7.0 7 | 8 | joblib 9 | scikit-learn>=0.24.0,<0.25.0 10 | 11 | dask>=2021.12 12 | distributed>=2012.12 13 | pyyaml 14 | pandas>=1.0 15 | liac-arff 16 | threadpoolctl 17 | tqdm 18 | 19 | ConfigSpace>=0.4.21,<0.5 20 | pynisher>=0.6.3,<0.7 21 | pyrfr>=0.8.1,<0.9 22 | smac>=1.2,<1.3 -------------------------------------------------------------------------------- /scripts/01_create_commands.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import os 4 | import sys 5 | 6 | import openml 7 | 8 | sys.path.append(".") 9 | from update_metadata_util import classification_tasks, regression_tasks 10 | 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--working-directory", type=str, required=True) 14 | parser.add_argument("--test", action="store_true") 15 | args = parser.parse_args() 16 | working_directory = args.working_directory 17 | test = args.test 18 | 19 | command_file_name = os.path.join(working_directory, "metadata_commands.txt") 20 | 21 | this_directory = os.path.dirname(os.path.abspath(__file__)) 22 | script_name = "run_auto-sklearn_for_metadata_generation.py" 23 | absolute_script_name = os.path.join(this_directory, script_name) 24 | 25 | commands = [] 26 | for task_id in classification_tasks if not test else (233, 245, 258): 27 | for metric in ("accuracy", "balanced_accuracy", "roc_auc", "logloss"): 28 | 29 | if ( 30 | len(openml.tasks.get_task(task_id, download_data=False).class_labels) > 2 31 | and metric == "roc_auc" 32 | ): 33 | continue 34 | 35 | command = ( 36 | "python3 %s --working-directory %s --time-limit 86400 " 37 | "--per-run-time-limit 1800 --task-id %d -s 1 --metric %s" 38 | % (absolute_script_name, working_directory, task_id, metric) 39 | ) 40 | commands.append(command) 41 | for task_id in regression_tasks if not test else (360029, 360033): 42 | for metric in ("r2", "root_mean_squared_error", "mean_absolute_error"): 43 | command = ( 44 | "python3 %s --working-directory %s --time-limit 86400 " 45 | "--per-run-time-limit 1800 --task-id %d -s 1 --metric %s" 46 | % (absolute_script_name, working_directory, task_id, metric) 47 | ) 48 | commands.append(command) 49 | 50 | with open(command_file_name, "w") as fh: 51 | for command in commands: 52 | fh.writelines(command) 53 | fh.write("\n") 54 | -------------------------------------------------------------------------------- /scripts/2015_nips_paper/Readme.md: -------------------------------------------------------------------------------- 1 | ## Reproduce results of Efficient and Robust Automated Machine Learning (Feurer et al.) 2 | This folder contains all necessary scripts in order to reproduce the results shown in 3 | Figure 3 of Efficient and Robust Automated Machine Learning (Feurer et al.). The scripts 4 | can be modified to include different datasets, change the runtime, etc. The scripts only 5 | only handles classification tasks, and balanced accuracy is used as the score metric. 6 | 7 | ### 1. Creating commands.txt 8 | To run the experiment, first create commands.txt by running: 9 | ```bash 10 | cd setup 11 | bash create_commands.sh 12 | ``` 13 | The script can be modified to run experiments with different settings, i.e. 14 | different runtime and/or different tasks. 15 | 16 | ### 2. Executing commands.txt 17 | Run each commands in commands.txt: 18 | ```bash 19 | cd run 20 | bash run_commands.sh 21 | ``` 22 | Each command line in commands.txt first executes model fitting, and then creating the 23 | single best and ensemble trajectories. Therefore, the commands can be run in parallel 24 | on a cluster by modifying run_commands.sh. 25 | 26 | ### 3. Plotting the results 27 | To plot the results, run: 28 | ```bash 29 | cd plot 30 | bash plot_ranks.py 31 | ``` 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /scripts/2015_nips_paper/run/run_commands.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run all commands in commands.txt. Each command line executes first the model fitting, 4 | # and then creates the trajectory of a single model and the ensemble. Therefore, each 5 | # line can be executed separately and in parallel, for example, on a cluster environment. 6 | cat "../commands.txt" | while read line; do eval "$line"; done -------------------------------------------------------------------------------- /scripts/readme.md: -------------------------------------------------------------------------------- 1 | # How to update metadata 2 | 3 | (to be moved to the documentation) 4 | 5 | ## 1. Create a working directory and set the task type 6 | 7 | The working directory will be used to save all temporary and final output. 8 | 9 | working_directory=~/auto-sklearn-metadata/001 10 | mkdir -p $working_directory 11 | 12 | The task type defines whether you want update classification or regression 13 | metadata: 14 | 15 | task_type=classification 16 | 17 | or 18 | 19 | task_type=regression 20 | 21 | ## 2. Install the OpenML package and create an OpenML account 22 | 23 | Read the [OpenML python package manual](https://openml.github.io/openml-python) for this. 24 | 25 | ## 3. Create configuration commands 26 | 27 | python3 01_create_commands.py --working-directory $working_directory --task-type $task_type 28 | 29 | This will create a file with all commands necessary to run auto-sklearn on a 30 | large number of datasets from OpenML. You can change the task IDs or the way 31 | how the datasets are loaded in the file `update_metadata_util.py`. To change 32 | the time used for configuration, you can alter the commands file which will 33 | be written to disk. 34 | 35 | ## 4. Run all configuration runs 36 | 37 | On hardware of your choice. It is recommended to run all runs in parallel in 38 | order to get the results in a reasonable amount of time. 39 | 40 | ## 5. Get the test performance of these configurations 41 | 42 | python3 02_retrieve_metadata.py --working-directory $working_directory --task-type $task_type 43 | 44 | ## 6. Calculate metafeatures 45 | 46 | python3 03_calculate_metafeatures.py --working-directory $working_directory --task-type $task_type 47 | 48 | ## 7. Create aslib files 49 | 50 | python3 04_create_aslib_files.py --working-directory $working_directory --task-type $task_type 51 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/__init__.py -------------------------------------------------------------------------------- /test/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/fixtures/__init__.py -------------------------------------------------------------------------------- /test/fixtures/logging.py: -------------------------------------------------------------------------------- 1 | from pytest_cases import fixture 2 | 3 | from test.mocks.logging import MockLogger 4 | 5 | 6 | @fixture 7 | def mock_logger() -> MockLogger: 8 | """A mock logger with some mock defaults""" 9 | return MockLogger() 10 | -------------------------------------------------------------------------------- /test/fixtures/metrics.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | 5 | from autosklearn.metrics import accuracy, make_scorer 6 | 7 | 8 | def _accuracy_requiring_X_data( 9 | y_true: np.ndarray, 10 | y_pred: np.ndarray, 11 | X_data: Any, 12 | ) -> float: 13 | """Dummy metric that needs X Data""" 14 | if X_data is None: 15 | raise ValueError() 16 | return accuracy(y_true, y_pred) 17 | 18 | 19 | acc_with_X_data = make_scorer( 20 | name="acc_with_X_data", 21 | score_func=_accuracy_requiring_X_data, 22 | needs_X=True, 23 | optimum=1, 24 | worst_possible_result=0, 25 | greater_is_better=True, 26 | ) 27 | -------------------------------------------------------------------------------- /test/mocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/mocks/__init__.py -------------------------------------------------------------------------------- /test/mocks/logging.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from autosklearn.util.logging_ import PicklableClientLogger 4 | 5 | from unittest.mock import Mock 6 | 7 | MOCKNAME = "mock" 8 | MOCKHOST = "mockhost" 9 | MOCKPORT = 9020 10 | 11 | 12 | class MockLogger(PicklableClientLogger): 13 | """Should not be used for testing the actual loggers functionality 14 | 15 | Overwrites all methods with mock objects that can be queries 16 | * All logging methods do nothing 17 | * isEnabledFor returns True for everything as it's part of the logging config we 18 | don't have access to 19 | * __setstate__ and __getstate__ remain the same and are not mocked 20 | """ 21 | 22 | def __init__( 23 | self, 24 | name: Optional[str] = None, 25 | host: Optional[str] = None, 26 | port: Optional[int] = None, 27 | ): 28 | self.name = name or MOCKNAME 29 | self.host = host or MOCKHOST 30 | self.port = port or MOCKPORT 31 | 32 | # Overwrite the logging implementations with mocks 33 | self.debug = Mock(return_value=None) # type: ignore 34 | self.info = Mock(return_value=None) # type: ignore 35 | self.warning = Mock(return_value=None) # type: ignore 36 | self.error = Mock(return_value=None) # type: ignore 37 | self.exception = Mock(return_value=None) # type: ignore 38 | self.critical = Mock(return_value=None) # type: ignore 39 | self.log = Mock(return_value=None) # type: ignore 40 | self.isEnabledFor = Mock(return_value=True) # type: ignore 41 | -------------------------------------------------------------------------------- /test/test_automl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_automl/__init__.py -------------------------------------------------------------------------------- /test/test_automl/test_early_stopping.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Callable 4 | 5 | if TYPE_CHECKING: 6 | import numpy as np 7 | from smac.optimizer.smbo import SMBO 8 | from smac.runhistory.runhistory import RunInfo, RunValue 9 | 10 | from autosklearn.automl import AutoMLClassifier 11 | 12 | 13 | def test_early_stopping( 14 | make_automl_classifier: Callable[..., AutoMLClassifier], 15 | make_sklearn_dataset: Callable[..., tuple[np.ndarray, ...]], 16 | ) -> None: 17 | """ 18 | Expects 19 | ------- 20 | * Should early after fitting 2 models 21 | """ 22 | 23 | def callback( 24 | smbo: SMBO, 25 | run_info: RunInfo, 26 | result: RunValue, 27 | time_left: float, 28 | ) -> bool | None: 29 | if int(result.additional_info["num_run"]) >= 2: 30 | return False 31 | 32 | automl = make_automl_classifier(get_trials_callback=callback) 33 | 34 | X_train, Y_train, X_test, Y_test = make_sklearn_dataset("iris") 35 | automl.fit(X_train, Y_train) 36 | 37 | assert len(automl.runhistory_.data) == 2 38 | -------------------------------------------------------------------------------- /test/test_automl/test_fit_pipeline.py: -------------------------------------------------------------------------------- 1 | """Test specific ways of calling `fit_pipeline`""" 2 | -------------------------------------------------------------------------------- /test/test_automl/test_pareto_front.py: -------------------------------------------------------------------------------- 1 | """Test the output of loading the pareto set from an automl instance""" 2 | from autosklearn.automl import AutoML 3 | 4 | from pytest_cases import parametrize_with_cases 5 | from pytest_cases.filters import has_tag 6 | 7 | import test.test_automl.cases as cases 8 | 9 | has_ensemble = has_tag("fitted") & ~has_tag("no_ensemble") 10 | 11 | single_objective = has_ensemble & ~has_tag("multiobjective") 12 | multi_objective = has_ensemble & has_tag("multiobjective") 13 | 14 | 15 | @parametrize_with_cases("automl", cases=cases, filter=single_objective) 16 | def test_can_output_pareto_front_singleobjective(automl: AutoML) -> None: 17 | """ 18 | Expects 19 | ------- 20 | * Non-multiobjective instances should have a pareto set of size 1 21 | """ 22 | pareto_set = automl._load_pareto_set() 23 | 24 | assert len(pareto_set) == 1 25 | 26 | 27 | @parametrize_with_cases("automl", cases=cases, filter=multi_objective) 28 | def test_can_output_pareto_front_multiobjective(automl: AutoML) -> None: 29 | """ 30 | Expects 31 | ------- 32 | * Multiobjective ensembles should return >= 1, #TODO should test it's pareto optimal 33 | """ 34 | pareto_set = automl._load_pareto_set() 35 | 36 | assert len(pareto_set) >= 1 37 | -------------------------------------------------------------------------------- /test/test_automl/test_performance.py: -------------------------------------------------------------------------------- 1 | """Test the performance of automl instances after fitting""" 2 | 3 | import numpy as np 4 | from sklearn.ensemble import VotingClassifier, VotingRegressor 5 | 6 | from autosklearn.automl import AutoML 7 | 8 | from pytest_cases import parametrize_with_cases 9 | 10 | import test.test_automl.cases as cases 11 | 12 | 13 | @parametrize_with_cases("automl", cases.case_classifier_fitted_holdout_multiobjective) 14 | def test_performance_with_multiobjective(automl: AutoML) -> None: 15 | """ 16 | Expects 17 | ------- 18 | * Auto-sklearn can predict/predict_proba and has a model 19 | * Each ensemble in the pareto_set can predict/predict_proba 20 | """ 21 | # TODO: This test is hyperspecific to this one case 22 | # 23 | # Long term we probably want to return additional info about the case so we can 24 | # test things for other than this case 25 | 26 | # Check that the predict function works 27 | X = np.array([[1.0, 1.0, 1.0, 1.0]]) 28 | 29 | assert automl.predict_proba(X).shape == (1, 3) 30 | assert automl.predict(X).shape == (1,) 31 | 32 | pareto_front = automl._load_pareto_set() 33 | for ensemble in pareto_front: 34 | 35 | assert isinstance(ensemble, (VotingClassifier, VotingRegressor)) 36 | 37 | y_pred = ensemble.predict_proba(X) 38 | assert y_pred.shape == (1, 3) 39 | 40 | y_pred = ensemble.predict(X) 41 | assert y_pred in ["setosa", "versicolor", "virginica"] 42 | 43 | statistics = automl.sprint_statistics() 44 | assert "Metrics" in statistics 45 | assert ("Best validation score: 0.9" in statistics) or ( 46 | "Best validation score: 1.0" in statistics 47 | ), statistics 48 | -------------------------------------------------------------------------------- /test/test_automl/test_performance_over_time.py: -------------------------------------------------------------------------------- 1 | """Test the performance over time functionality of automl instances""" 2 | from autosklearn.automl import AutoML 3 | 4 | from pytest_cases import parametrize_with_cases 5 | from pytest_cases.filters import has_tag 6 | 7 | import test.test_automl.cases as cases 8 | 9 | 10 | @parametrize_with_cases( 11 | "automl", 12 | cases=cases, 13 | filter=has_tag("fitted") & ~has_tag("no_ensemble"), 14 | ) 15 | def test_performance_over_time_with_ensemble(automl: AutoML) -> None: 16 | """ 17 | Parameters 18 | ---------- 19 | automl: AutoMLClassifier 20 | The fitted automl instance with an ensemble 21 | 22 | Expects 23 | ------- 24 | * Performance over time should include only the given columns 25 | * The performance over time should have at least one entry that isn't NaN 26 | * The timestamps should be monotonic 27 | """ 28 | expected_performance_columns = { 29 | "single_best_train_score", 30 | "single_best_optimization_score", 31 | "ensemble_optimization_score", 32 | "Timestamp", 33 | } 34 | columns = automl.performance_over_time_.columns 35 | assert set(columns) == set(expected_performance_columns) 36 | 37 | perf_over_time = automl.performance_over_time_ 38 | assert len(perf_over_time.drop(columns="Timestamp").dropna()) != 0 39 | assert perf_over_time["Timestamp"].is_monotonic 40 | -------------------------------------------------------------------------------- /test/test_automl/test_predict.py: -------------------------------------------------------------------------------- 1 | """Test predictions of an automl instance""" 2 | -------------------------------------------------------------------------------- /test/test_automl/test_refit.py: -------------------------------------------------------------------------------- 1 | """Test the refitting functionality of an automl instance""" 2 | from typing import Callable, Union 3 | 4 | from itertools import repeat 5 | 6 | import numpy as np 7 | 8 | from autosklearn.automl import AutoML 9 | from autosklearn.data.validation import InputValidator 10 | 11 | from pytest_cases import parametrize 12 | from unittest.mock import Mock 13 | 14 | 15 | @parametrize("budget_type", [None, "iterations"]) 16 | def test_shuffle_on_fail( 17 | budget_type: Union[None, str], 18 | make_automl: Callable[..., AutoML], 19 | ) -> None: 20 | """ 21 | Parameters 22 | ---------- 23 | budget_type : Union[None, str] 24 | The budget type to use 25 | 26 | Fixtures 27 | -------- 28 | make_automl : Callable[..., AutoML] 29 | Factory to make an AutoML instance 30 | 31 | Expects 32 | ------- 33 | * The automl should not be able to fit before `refit` 34 | * The model should be attempted to fitted `n_fails` times before successing once 35 | after 36 | * The automl should be able to fit after `refit` 37 | """ 38 | n_fails = 3 39 | failing_model = Mock() 40 | failing_model.fit.side_effect = [ValueError()] * n_fails + [None] # type: ignore 41 | failing_model.estimator_supports_iterative_fit.side_effect = repeat(False) 42 | 43 | ensemble_mock = Mock() 44 | ensemble_mock.get_selected_model_identifiers.return_value = [(1, 1, 50.0)] 45 | 46 | X = np.ones((3, 2)) 47 | y = np.ones((3,)) 48 | 49 | input_validator = InputValidator() 50 | input_validator.fit(X, y) 51 | 52 | auto = make_automl() 53 | auto.ensemble_ = ensemble_mock # type: ignore 54 | auto.models_ = {(1, 1, 50.0): failing_model} 55 | auto._budget_type = budget_type 56 | auto.InputValidator = input_validator 57 | 58 | assert not auto._can_predict 59 | auto.refit(X, y) 60 | 61 | assert failing_model.fit.call_count == n_fails + 1 62 | assert auto._can_predict 63 | -------------------------------------------------------------------------------- /test/test_automl/test_show_models.py: -------------------------------------------------------------------------------- 1 | """Test the show models functinality of an automl instance""" 2 | from autosklearn.automl import AutoML 3 | 4 | from pytest_cases import parametrize_with_cases 5 | 6 | import test.test_automl.cases as cases 7 | 8 | 9 | @parametrize_with_cases("automl", cases=cases, has_tag=["fitted", "no_ensemble"]) 10 | def test_no_ensemble_produces_empty_show_models(automl: AutoML) -> None: 11 | """ 12 | Parameters 13 | ---------- 14 | automl : AutoML 15 | The automl object with no ensemble size to test 16 | 17 | Expects 18 | ------- 19 | * Show models should return an empty dict 20 | """ 21 | assert automl.show_models() == {} 22 | -------------------------------------------------------------------------------- /test/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /test/test_ensemble_builder/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "mlindauer" 3 | -------------------------------------------------------------------------------- /test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model -------------------------------------------------------------------------------- /test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model -------------------------------------------------------------------------------- /test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model -------------------------------------------------------------------------------- /test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_ensemble_builder/test.test_ensemble_builder.test_ensemble_test_run_end_at/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model -------------------------------------------------------------------------------- /test/test_ensemble_builder/test_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Callable 4 | 5 | from autosklearn.automl import AutoML 6 | from autosklearn.ensemble_building import EnsembleBuilderManager 7 | 8 | from pytest_cases import parametrize_with_cases 9 | 10 | import test.test_automl.cases as cases 11 | from test.conftest import DEFAULT_SEED 12 | 13 | 14 | @parametrize_with_cases("automl", cases=cases, has_tag="fitted") 15 | def case_real_runs( 16 | automl: AutoML, 17 | make_ensemble_builder_manager: Callable[..., EnsembleBuilderManager], 18 | ) -> EnsembleBuilderManager: 19 | """Uses real runs from a fitted automl instance""" 20 | manager = make_ensemble_builder_manager( 21 | backend=automl._backend, 22 | metric=automl._metrics[0], 23 | task=automl._task, 24 | dataset_name=automl._dataset_name, 25 | seed=automl._seed, 26 | logger_port=automl._logger_port, 27 | random_state=DEFAULT_SEED, 28 | ) 29 | return manager 30 | -------------------------------------------------------------------------------- /test/test_estimators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_estimators/__init__.py -------------------------------------------------------------------------------- /test/test_estimators/cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_estimators/cases.py -------------------------------------------------------------------------------- /test/test_evaluation/.datasets/abalone/abalone_feat.type: -------------------------------------------------------------------------------- 1 | Categorical 2 | Numerical 3 | Numerical 4 | Numerical 5 | Numerical 6 | Numerical 7 | Numerical 8 | Numerical 9 | -------------------------------------------------------------------------------- /test/test_evaluation/.datasets/abalone/abalone_public.info: -------------------------------------------------------------------------------- 1 | task = 'multiclass.classification' 2 | has_categorical = 1 3 | feat_type = 'Mixed' 4 | train_num = 2924 5 | metric = 'acc_metric' 6 | test_num = 1253 7 | target_type = 'Categorical' 8 | label_num = 28 9 | target_num = 28 10 | is_sparse = 0 11 | has_missing = 0 12 | usage = 'ParamSklearn Configuration for AutoML challenge 2015' 13 | feat_num = 8 14 | valid_num = 10 15 | name = 'abalone' 16 | -------------------------------------------------------------------------------- /test/test_evaluation/.datasets/abalone/abalone_valid.data: -------------------------------------------------------------------------------- 1 | 0.0 0.43 0.35 0.11 0.406 0.1675 0.081 0.135 2 | 0.0 0.49 0.38 0.135 0.5415 0.2175 0.095 0.19 3 | 1.0 0.535 0.405 0.145 0.6845 0.2725 0.171 0.205 4 | 1.0 0.44 0.34 0.1 0.451 0.188 0.087 0.13 5 | 0.0 0.45 0.32 0.1 0.381 0.1705 0.075 0.115 6 | 0.0 0.355 0.28 0.095 0.2455 0.0955 0.062 0.075 7 | 2.0 0.38 0.275 0.1 0.2255 0.08 0.049 0.085 8 | 1.0 0.565 0.44 0.155 0.9395 0.4275 0.214 0.27 9 | 1.0 0.55 0.415 0.135 0.7635 0.318 0.21 0.2 10 | 1.0 0.56 0.44 0.14 0.9285 0.3825 0.188 0.3 11 | -------------------------------------------------------------------------------- /test/test_evaluation/.datasets/abalone/abalone_valid.solution: -------------------------------------------------------------------------------- 1 | 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 | 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 | 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 | 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 | 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 | 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 | 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 | -------------------------------------------------------------------------------- /test/test_evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /test/test_evaluation/test_custom_splitters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.constants import ( 4 | BINARY_CLASSIFICATION, 5 | MULTICLASS_CLASSIFICATION, 6 | MULTILABEL_CLASSIFICATION, 7 | ) 8 | from autosklearn.evaluation.splitter import CustomStratifiedShuffleSplit 9 | 10 | import pytest 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "task, X, y", 15 | [ 16 | ( 17 | BINARY_CLASSIFICATION, 18 | np.asarray(10000 * [[1, 1, 1, 1, 1]]), 19 | np.asarray(9999 * [0] + 1 * [1]), 20 | ), 21 | ( 22 | MULTICLASS_CLASSIFICATION, 23 | np.asarray(10000 * [[1, 1, 1, 1, 1]]), 24 | np.asarray(4999 * [1] + 4999 * [2] + 1 * [3] + 1 * [4]), 25 | ), 26 | ( 27 | MULTILABEL_CLASSIFICATION, 28 | np.asarray(10000 * [[1, 1, 1, 1, 1]]), 29 | np.asarray( 30 | 4999 * [[0, 1, 1]] 31 | + 4999 * [[1, 1, 0]] 32 | + 1 * [[1, 0, 1]] 33 | + 1 * [[0, 0, 0]] 34 | ), 35 | ), 36 | ], 37 | ) 38 | @pytest.mark.parametrize("train_size", [100, 0.5, 200, 0.75]) 39 | def test_custom_stratified_shuffle_split_returns_unique_labels_and_maintains_size( 40 | task, X, y, train_size 41 | ): 42 | splitter = CustomStratifiedShuffleSplit(train_size=train_size, random_state=1) 43 | left_idxs, _ = next(splitter.split(X=X, y=y)) 44 | y_sampled = y[left_idxs] 45 | X_sampled = X[left_idxs] 46 | 47 | # Assert the train_size param is respected 48 | if isinstance(train_size, float): 49 | n_samples = int(train_size * len(y)) 50 | else: 51 | n_samples = train_size 52 | 53 | assert len(y_sampled) == n_samples 54 | assert len(X_sampled) == n_samples 55 | 56 | # Assert all the unique labels are present in the training set 57 | assert all( 58 | label in np.unique(y_sampled) for label in np.unique(y) 59 | ), f"{task} failed, {np.unique(y)} != {np.unique(y_sampled)}" 60 | -------------------------------------------------------------------------------- /test/test_evaluation/test_dummy_pipelines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.base import clone 3 | from sklearn.datasets import make_classification, make_regression 4 | from sklearn.utils.validation import check_is_fitted 5 | 6 | from autosklearn.evaluation.abstract_evaluator import ( 7 | MyDummyClassifier, 8 | MyDummyRegressor, 9 | ) 10 | 11 | import pytest 12 | 13 | 14 | @pytest.mark.parametrize("task_type", ["classification", "regression"]) 15 | def test_dummy_pipeline(task_type: str) -> None: 16 | if task_type == "classification": 17 | estimator_class = MyDummyClassifier 18 | data_maker = make_classification 19 | elif task_type == "regression": 20 | estimator_class = MyDummyRegressor 21 | data_maker = make_regression 22 | else: 23 | pytest.fail(task_type) 24 | return 25 | 26 | X, y = data_maker(random_state=0) 27 | estimator = estimator_class( 28 | feat_type={i: "numerical" for i in range(X.shape[1])}, config=1, random_state=0 29 | ) 30 | estimator.fit(X, y) 31 | check_is_fitted(estimator) 32 | 33 | assert np.shape(X)[0] == np.shape(estimator.predict(X))[0] 34 | 35 | # make sure we comply with scikit-learn estimator API 36 | clone(estimator) 37 | -------------------------------------------------------------------------------- /test/test_metalearning/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /test/test_metalearning/pyMetaLearn/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_metalearning/pyMetaLearn/metalearning/test_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | def test_learned(self): 3 | kND = KNearestDatasets(metric='learned') 4 | rf = kND.fit(pd.DataFrame([self.krvskp, self.labor]), 5 | {233: self.runs[233], 234: self.runs[234]}) 6 | 7 | self.assertEqual(kND._learned(self.anneal, self.krvskp), 1.5) 8 | self.assertEqual(kND._learned(self.anneal, self.labor), 1.5) 9 | 10 | 11 | def test_learned_sparse(self): 12 | kND = KNearestDatasets(metric='learned') 13 | rf = kND.fit(pd.DataFrame([self.krvskp, self.labor]), 14 | {233: self.runs[233][0:2], 234: self.runs[234][1:3]}) 15 | 16 | self.assertEqual(kND._learned(self.anneal, self.krvskp), 1.5) 17 | self.assertEqual(kND._learned(self.anneal, self.labor), 1.5) 18 | 19 | 20 | def test_feature_selection(self): 21 | kND = KNearestDatasets(metric='mfs_l1', 22 | metric_kwargs={'max_features': 1.0, 23 | 'mode': 'select'}) 24 | self.krvskp.name = 'kr-vs-kp' 25 | selection = kND.fit(pd.DataFrame([self.krvskp, self.labor, self.anneal]), 26 | {'kr-vs-kp': self.runs['krvskp'], 27 | 'labor': self.runs['labor'], 28 | 'anneal': self.runs['anneal']}) 29 | self.assertEqual(1, selection.loc['number_of_classes']) 30 | self.assertEqual(1, selection.loc['number_of_features']) 31 | self.assertEqual(0, selection.loc['number_of_instances']) 32 | 33 | def test_feature_weighting(self): 34 | kND = KNearestDatasets(metric='mfs_l1', 35 | metric_kwargs={'max_features': 1.0, 36 | 'mode': 'weight'}) 37 | self.krvskp.name = 'kr-vs-kp' 38 | selection = kND.fit(pd.DataFrame([self.krvskp, self.labor, self.anneal]), 39 | {'kr-vs-kp': self.runs['krvskp'], 40 | 'labor': self.runs['labor'], 41 | 'anneal': self.runs['anneal']}) 42 | self.assertEqual(type(selection), pd.Series) 43 | self.assertEqual(len(selection), 3) 44 | """ 45 | -------------------------------------------------------------------------------- /test/test_metalearning/pyMetaLearn/test_meta_base_data/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_metalearning/pyMetaLearn/test_meta_base_data/readme.txt -------------------------------------------------------------------------------- /test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner as metalearner # noqa: E501 5 | import autosklearn.pipeline.classification 6 | from autosklearn.metalearning.metalearning.meta_base import MetaBase 7 | 8 | import unittest 9 | 10 | logging.basicConfig() 11 | 12 | 13 | class MetalearningConfiguration(unittest.TestCase): 14 | def test_metalearning_cs_size(self): 15 | self.cwd = os.getcwd() 16 | data_dir = os.path.dirname(__file__) 17 | data_dir = os.path.join(data_dir, "test_meta_base_data") 18 | os.chdir(data_dir) 19 | 20 | # Total: 176, categorical: 3, numerical: 7, string: 7 21 | total = 179 22 | num_numerical = 6 23 | num_string = 11 24 | num_categorical = 3 25 | for feat_type, cs_size in [ 26 | ({"A": "numerical"}, total - num_string - num_categorical), 27 | ({"A": "categorical"}, total - num_string - num_numerical), 28 | ({"A": "string"}, total - num_categorical - num_numerical), 29 | ({"A": "numerical", "B": "categorical"}, total - num_string), 30 | ({"A": "numerical", "B": "string"}, total - num_categorical), 31 | ({"A": "categorical", "B": "string"}, total - num_numerical), 32 | ({"A": "categorical", "B": "string", "C": "numerical"}, total), 33 | ]: 34 | pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline( 35 | feat_type=feat_type 36 | ) 37 | self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) 38 | 39 | self.logger = logging.getLogger() 40 | meta_base = MetaBase(self.cs, data_dir, logger=self.logger) 41 | self.meta_optimizer = metalearner.MetaLearningOptimizer( 42 | "233", self.cs, meta_base, logger=self.logger 43 | ) 44 | self.assertEqual( 45 | len(self.meta_optimizer.configuration_space), cs_size, feat_type 46 | ) 47 | -------------------------------------------------------------------------------- /test/test_metalearning/pyMetaLearn/test_optimizer_base.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from autosklearn.metalearning.optimizers import optimizer_base 4 | 5 | import unittest 6 | 7 | 8 | class OptimizerBaseTest(unittest.TestCase): 9 | _multiprocess_can_split_ = True 10 | 11 | def setUp(self): 12 | self.hyperparameters = OrderedDict() 13 | self.hyperparameters["x"] = [-5, 0, 5, 10] 14 | self.hyperparameters["y"] = [0, 5, 10, 15] 15 | 16 | def test_parse_hyperopt_string(self): 17 | hyperparameter_string = "x {-5, 0, 5, 10}\ny {0, 5, 10, 15}" 18 | expected = OrderedDict( 19 | [["x", ["-5", "0", "5", "10"]], ["y", ["0", "5", "10", "15"]]] 20 | ) 21 | ret = optimizer_base.parse_hyperparameter_string(hyperparameter_string) 22 | self.assertEqual(ret, expected) 23 | 24 | hyperparameter_string = "x {-5, 0, 5, 10} [5]\ny {0, 5, 10, 15}" 25 | ret = optimizer_base.parse_hyperparameter_string(hyperparameter_string) 26 | self.assertEqual(ret, expected) 27 | 28 | hyperparameter_string = "x {-5, 0, 5, 10}\ny {0, 5, 10, 15} [5]" 29 | ret = optimizer_base.parse_hyperparameter_string(hyperparameter_string) 30 | self.assertEqual(ret, expected) 31 | 32 | hyperparameter_string = "x {-5, 0, 5, 10}\ny 0, 5, 10, 15} [5]" 33 | self.assertRaises( 34 | ValueError, 35 | optimizer_base.parse_hyperparameter_string, 36 | hyperparameter_string, 37 | ) 38 | 39 | def test_construct_cli_call(self): 40 | cli_call = optimizer_base.construct_cli_call("cv.py", {"x": -5, "y": 0}) 41 | self.assertEqual(cli_call, "cv.py -x \"'-5'\" -y \"'0'\"") 42 | -------------------------------------------------------------------------------- /test/test_metric/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /test/test_metric/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_metric/test_util.py -------------------------------------------------------------------------------- /test/test_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_pipeline/__init__.py -------------------------------------------------------------------------------- /test/test_pipeline/components/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_adaboost.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.classification.adaboost import AdaboostClassifier 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class AdaBoostComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.93999999999999995 14 | res["default_iris_iterative"] = -1 15 | res["default_iris_proba"] = 0.22452300738472031 16 | res["default_iris_sparse"] = 0.85999999999999999 17 | res["default_digits"] = 0.6879174256223437 18 | res["default_digits_iterative"] = -1 19 | res["default_digits_binary"] = 0.98299939283545845 20 | res["default_digits_multilabel"] = -1 21 | res["default_digits_multilabel_proba"] = -1 22 | 23 | sk_mod = sklearn.ensemble.AdaBoostClassifier 24 | 25 | module = AdaboostClassifier 26 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_bernoulli_nb.py: -------------------------------------------------------------------------------- 1 | import sklearn.naive_bayes 2 | 3 | from autosklearn.pipeline.components.classification.bernoulli_nb import BernoulliNB 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class BernoulliNBComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.26 14 | res["iris_n_calls"] = None 15 | res["default_iris_iterative"] = 0.26 16 | res["default_iris_proba"] = 1.1157508543538652 17 | res["default_iris_sparse"] = 0.38 18 | res["default_digits"] = 0.81238615664845171 19 | res["digits_n_calls"] = None 20 | res["default_digits_iterative"] = 0.81238615664845171 21 | res["default_digits_binary"] = 0.99392835458409234 22 | res["default_digits_multilabel"] = 0.67651391068802913 23 | res["default_digits_multilabel_proba"] = 0.5 24 | 25 | sk_mod = sklearn.naive_bayes.BernoulliNB 26 | module = BernoulliNB 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_decision_tree.py: -------------------------------------------------------------------------------- 1 | import sklearn.tree 2 | 3 | from autosklearn.pipeline.components.classification.decision_tree import DecisionTree 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class DecisionTreeComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.62 14 | res["default_iris_iterative"] = -1 15 | res["default_iris_proba"] = 0.51333963481747835 16 | res["default_iris_sparse"] = 0.41999999999999998 17 | res["default_digits"] = 0.15057680631451123 18 | res["default_digits_iterative"] = -1 19 | res["default_digits_binary"] = 0.92167577413479052 20 | res["default_digits_multilabel"] = 0.076521739130434779 21 | res["default_digits_multilabel_proba"] = 0.80426747311827962 22 | 23 | sk_mod = sklearn.tree.DecisionTreeClassifier 24 | module = DecisionTree 25 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_extra_trees.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.classification.extra_trees import ( 4 | ExtraTreesClassifier, 5 | ) 6 | 7 | from .test_base import BaseClassificationComponentTest 8 | 9 | 10 | class ExtraTreesComponentTest(BaseClassificationComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_iris"] = 0.96 16 | res["iris_n_calls"] = 9 17 | res["default_iris_iterative"] = res["default_iris"] 18 | res["default_iris_proba"] = 0.10053485167017469 19 | res["default_iris_sparse"] = 0.74 20 | res["default_digits"] = 0.9216757741347905 21 | res["digits_n_calls"] = 9 22 | res["default_digits_iterative"] = res["default_digits"] 23 | res["default_digits_iterative_places"] = 3 24 | res["default_digits_binary"] = 0.994535519125683 25 | res["default_digits_multilabel"] = 0.9983621593291405 26 | res["default_digits_multilabel_proba"] = 0.997710730679746 27 | 28 | sk_mod = sklearn.ensemble.ExtraTreesClassifier 29 | module = ExtraTreesClassifier 30 | step_hyperparameter = { 31 | "name": "n_estimators", 32 | "value": module.get_max_iter(), 33 | } 34 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_gaussian_nb.py: -------------------------------------------------------------------------------- 1 | import sklearn.naive_bayes 2 | 3 | from autosklearn.pipeline.components.classification.gaussian_nb import GaussianNB 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class GaussianNBComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.95999999999999996 14 | res["iris_n_calls"] = None 15 | res["default_iris_iterative"] = 0.95999999999999996 16 | res["default_iris_proba"] = 0.11199001987342033 17 | res["default_iris_sparse"] = -1 18 | res["default_digits"] = 0.80692167577413476 19 | res["digits_n_calls"] = None 20 | res["default_digits_iterative"] = 0.80692167577413476 21 | res["default_digits_binary"] = 0.98664238008500305 22 | res["default_digits_multilabel"] = 0.54135471896765841 23 | res["default_digits_multilabel_proba"] = 0.99028976450984096 24 | 25 | sk_mod = sklearn.naive_bayes.GaussianNB 26 | module = GaussianNB 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.classification.gradient_boosting import ( 4 | GradientBoostingClassifier, 5 | ) 6 | 7 | from .test_base import BaseClassificationComponentTest 8 | 9 | 10 | class GradientBoostingComponentTest(BaseClassificationComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_iris"] = 0.92 16 | res["default_iris_iterative"] = 0.92 17 | res["default_iris_proba"] = 1.1099521844626845 18 | res["default_iris_sparse"] = -1 19 | res["default_digits"] = 0.8652094717668488 20 | res["default_digits_iterative"] = 0.8652094717668488 21 | res["default_digits_binary"] = 0.9933211900425015 22 | res["default_digits_multilabel"] = -1 23 | res["default_digits_multilabel_proba"] = -1 24 | 25 | sk_mod = sklearn.ensemble.ExtraTreesClassifier 26 | module = GradientBoostingClassifier 27 | step_hyperparameter = { 28 | "name": "max_iter", 29 | "value": module.get_max_iter(), 30 | } 31 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_k_nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | import sklearn.neighbors 2 | 3 | from autosklearn.pipeline.components.classification.k_nearest_neighbors import ( 4 | KNearestNeighborsClassifier, 5 | ) 6 | 7 | from .test_base import BaseClassificationComponentTest 8 | 9 | 10 | class KNearestNeighborsComponentTest(BaseClassificationComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_iris"] = 0.959999999999999 16 | res["default_iris_iterative"] = -1 17 | res["default_iris_proba"] = 1.381551055796429 18 | res["default_iris_sparse"] = 0.82 19 | res["default_digits"] = 0.93321190042501523 20 | res["default_digits_iterative"] = -1 21 | res["default_digits_binary"] = 0.99574984820886459 22 | res["default_digits_multilabel"] = 0.93433756191199024 23 | res["default_digits_multilabel_proba"] = 0.9713841334968244 24 | 25 | sk_mod = sklearn.neighbors.KNeighborsClassifier 26 | module = KNearestNeighborsClassifier 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_lda.py: -------------------------------------------------------------------------------- 1 | import sklearn.discriminant_analysis 2 | 3 | from autosklearn.pipeline.components.classification.lda import LDA 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class LDAComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 1.0 14 | res["default_iris_iterative"] = -1 15 | res["default_iris_proba"] = 0.5614481896257509 16 | res["default_iris_sparse"] = -1 17 | res["default_digits"] = 0.88585306618093507 18 | res["default_digits_iterative"] = -1 19 | res["default_digits_binary"] = 0.9811778992106861 20 | res["default_digits_multilabel"] = 0.82204896441795205 21 | res["default_digits_multilabel_proba"] = 0.9833070018235553 22 | 23 | sk_mod = sklearn.discriminant_analysis.LinearDiscriminantAnalysis 24 | module = LDA 25 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_liblinear.py: -------------------------------------------------------------------------------- 1 | import sklearn.svm 2 | 3 | from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class LibLinearComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 1 14 | res["default_iris_iterative"] = -1 15 | res["default_iris_proba"] = 0.3350793047400861 16 | res["default_iris_sparse"] = 0.56 17 | res["default_digits"] = 0.914996964177292 18 | res["default_digits_places"] = 2 19 | res["default_digits_iterative"] = -1 20 | res["default_digits_binary"] = 0.98907103825136611 21 | res["default_digits_multilabel"] = 0.89889188078944637 22 | res["default_digits_multilabel_places"] = 2 23 | res["default_digits_multilabel_proba"] = 0.99999999999999989 24 | 25 | sk_mod = sklearn.svm.LinearSVC 26 | module = LibLinear_SVC 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_mlp.py: -------------------------------------------------------------------------------- 1 | import sklearn.neural_network 2 | 3 | from autosklearn.pipeline.components.classification.mlp import MLPClassifier 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class MLPComponentTest(BaseClassificationComponentTest): 9 | # NOTE: `default_iris_proba_places` 10 | # 11 | # Github runners seem to indeterministicly fail `test_default_iris_proba` 12 | # meaning 'default_irish_proba_places' needs to be set. 13 | # There are known differences to occur on different platforms. 14 | # https://github.com/scikit-learn/scikit-learn/issues/13108#issuecomment-461696681 15 | # 16 | # We are assuming results are deterministic on a given platform as locally 17 | # there is no randomness i.e. performing the same test 100 times yeilds the 18 | # same predictions 100 times. 19 | # 20 | # Github runners indicate that they run on microsoft Azure with DS2-v2. 21 | # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#cloud-hosts-for-github-hosted-runners 22 | # 23 | # These seem to have consistent CPU's so I'm unsure what the underlying reason 24 | # for this to randomly fail only sometimes on Github runners 25 | __test__ = True 26 | 27 | res = dict() 28 | res["default_iris"] = 0.98 29 | res["iris_n_calls"] = 7 30 | res["iris_iterative_n_iter"] = 94 31 | res["default_iris_iterative"] = res["default_iris"] 32 | res["default_iris_proba"] = 0.647786774635315 33 | res["default_iris_proba_places"] = 6 34 | res["default_iris_sparse"] = 0.42 35 | res["default_digits"] = 0.8099574984820886 36 | res["digits_n_calls"] = 7 37 | res["digits_iterative_n_iter"] = 124 38 | res["default_digits_iterative"] = res["default_digits"] 39 | res["default_digits_binary"] = 0.99210686095932 40 | res["default_digits_multilabel"] = 0.8083000396415946 41 | res["default_digits_multilabel_proba"] = 0.8096624850657109 42 | 43 | sk_mod = sklearn.neural_network.MLPClassifier 44 | module = MLPClassifier 45 | step_hyperparameter = { 46 | "name": "n_iter_", 47 | "value": module.get_max_iter(), 48 | } 49 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_multinomial_nb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn.naive_bayes 3 | import sklearn.preprocessing 4 | 5 | from autosklearn.pipeline.components.classification.multinomial_nb import MultinomialNB 6 | from autosklearn.pipeline.util import get_dataset 7 | 8 | from .test_base import BaseClassificationComponentTest 9 | 10 | 11 | class MultinomialNBComponentTest(BaseClassificationComponentTest): 12 | 13 | __test__ = True 14 | 15 | res = dict() 16 | res["default_iris"] = 0.97999999999999998 17 | res["iris_n_calls"] = None 18 | res["default_iris_iterative"] = 0.97999999999999998 19 | res["default_iris_proba"] = 0.5865733413579101 20 | res["default_iris_sparse"] = 0.54 21 | res["default_digits"] = 0.89496053430479661 22 | res["digits_n_calls"] = None 23 | res["default_digits_iterative"] = 0.89496053430479661 24 | res["default_digits_binary"] = 0.98967820279295693 25 | res["default_digits_multilabel"] = 0.70484946987667163 26 | res["default_digits_multilabel_proba"] = 0.80324074074074081 27 | 28 | sk_mod = sklearn.naive_bayes.MultinomialNB 29 | module = MultinomialNB 30 | 31 | def test_default_configuration_negative_values(self): 32 | # Custon preprocessing test to check if clipping to zero works 33 | X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits") 34 | ss = sklearn.preprocessing.StandardScaler() 35 | X_train = ss.fit_transform(X_train) 36 | configuration_space = MultinomialNB.get_hyperparameter_search_space() 37 | default = configuration_space.get_default_configuration() 38 | 39 | cls = MultinomialNB( 40 | random_state=1, 41 | **{ 42 | hp_name: default[hp_name] 43 | for hp_name in default 44 | if default[hp_name] is not None 45 | }, 46 | ) 47 | 48 | cls = cls.fit(X_train, Y_train) 49 | prediction = cls.predict(X_test) 50 | self.assertAlmostEqual(np.nanmean(prediction == Y_test), 0.88888888888888884) 51 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_passive_aggressive.py: -------------------------------------------------------------------------------- 1 | import sklearn.linear_model 2 | 3 | from autosklearn.pipeline.components.classification.passive_aggressive import ( 4 | PassiveAggressive, 5 | ) 6 | 7 | from .test_base import BaseClassificationComponentTest 8 | 9 | 10 | class PassiveAggressiveComponentTest(BaseClassificationComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_iris"] = 0.98 16 | res["iris_n_calls"] = 6 17 | res["default_iris_iterative"] = res["default_iris"] 18 | res["iris_iterative_n_iter"] = 64 19 | res["default_iris_proba"] = 0.27840521921952033 20 | res["default_iris_sparse"] = 0.48 21 | res["default_digits"] = 0.9162112932604736 22 | res["digits_n_calls"] = 6 23 | res["default_digits_iterative"] = res["default_digits"] 24 | res["digits_iterative_n_iter"] = 64 25 | res["default_digits_binary"] = 0.99210686095932 26 | res["default_digits_multilabel"] = 0.910908768565592 27 | res["default_digits_multilabel_proba"] = 1.0 28 | 29 | sk_mod = sklearn.linear_model.PassiveAggressiveClassifier 30 | module = PassiveAggressive 31 | 32 | step_hyperparameter = { 33 | "name": "max_iter", 34 | "value": module.get_max_iter(), 35 | } 36 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_qda.py: -------------------------------------------------------------------------------- 1 | import sklearn.discriminant_analysis 2 | 3 | from autosklearn.pipeline.components.classification.qda import QDA 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class QDAComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 1.0 14 | res["default_iris_iterative"] = -1 15 | res["default_iris_proba"] = 0.56124476634783993 16 | res["default_iris_sparse"] = -1 17 | res["default_digits"] = 0.18882817243472982 18 | res["default_digits_iterative"] = -1 19 | res["default_digits_binary"] = 0.89071038251366119 20 | res["default_digits_multilabel"] = 0.17011293429111091 21 | res["default_digits_multilabel_places"] = 1 22 | res["default_digits_multilabel_proba"] = 0.99999999999999989 23 | 24 | sk_mod = sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis 25 | module = QDA 26 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_random_forest.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.classification.random_forest import RandomForest 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class RandomForestComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.96 14 | res["iris_n_calls"] = 9 15 | res["default_iris_iterative"] = res["default_iris"] 16 | res["default_iris_proba"] = 0.0996785324703419 17 | res["default_iris_sparse"] = 0.85999999999999999 18 | res["default_digits"] = 0.8998178506375227 19 | res["digits_n_calls"] = 9 20 | res["default_digits_iterative"] = res["default_digits"] 21 | res["default_digits_binary"] = 0.9896782027929569 22 | res["default_digits_multilabel"] = 0.9973653110879388 23 | res["default_digits_multilabel_proba"] = 0.9965660960196189 24 | 25 | sk_mod = sklearn.ensemble.RandomForestClassifier 26 | module = RandomForest 27 | step_hyperparameter = { 28 | "name": "n_estimators", 29 | "value": module.get_max_iter(), 30 | } 31 | -------------------------------------------------------------------------------- /test/test_pipeline/components/classification/test_sgd.py: -------------------------------------------------------------------------------- 1 | import sklearn.linear_model 2 | 3 | from autosklearn.pipeline.components.classification.sgd import SGD 4 | 5 | from .test_base import BaseClassificationComponentTest 6 | 7 | 8 | class SGDComponentTest(BaseClassificationComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_iris"] = 0.69999999999999996 14 | res["iris_n_calls"] = 9 15 | res["default_iris_iterative"] = res["default_iris"] 16 | res["default_iris_proba"] = 0.5996114465819011 17 | res["default_iris_sparse"] = 0.54 18 | res["default_digits"] = 0.9198542805100182 19 | res["digits_n_calls"] = 7 20 | res["default_digits_iterative"] = res["default_digits"] 21 | res["default_digits_binary"] = 0.9951426836672739 22 | res["default_digits_multilabel"] = -1 23 | res["default_digits_multilabel_proba"] = -1 24 | 25 | sk_mod = sklearn.linear_model.SGDClassifier 26 | module = SGD 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/data_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_pipeline/components/data_preprocessing/test_category_shift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | from autosklearn.pipeline.components.data_preprocessing.category_shift.category_shift import ( # noqa: E501 5 | CategoryShift, 6 | ) 7 | 8 | import unittest 9 | 10 | 11 | class CategoryShiftTest(unittest.TestCase): 12 | def test_data_type_consistency(self): 13 | X = np.random.randint(0, 255, (3, 4)) 14 | Y = CategoryShift().fit_transform(X) 15 | self.assertFalse(scipy.sparse.issparse(Y)) 16 | 17 | X = scipy.sparse.csc_matrix( 18 | ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4) 19 | ) 20 | Y = CategoryShift().fit_transform(X) 21 | self.assertTrue(scipy.sparse.issparse(Y)) 22 | -------------------------------------------------------------------------------- /test/test_pipeline/components/data_preprocessing/test_minority_coalescence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | from autosklearn.pipeline.components.data_preprocessing.minority_coalescense.minority_coalescer import ( # noqa: E501 5 | MinorityCoalescer, 6 | ) 7 | from autosklearn.pipeline.components.data_preprocessing.minority_coalescense.no_coalescense import ( # noqa: E501 8 | NoCoalescence, 9 | ) 10 | 11 | import unittest 12 | 13 | 14 | class MinorityCoalescerTest(unittest.TestCase): 15 | def test_data_type_consistency(self): 16 | X = np.random.randint(3, 6, (3, 4)) 17 | Y = MinorityCoalescer().fit_transform(X) 18 | self.assertFalse(scipy.sparse.issparse(Y)) 19 | 20 | X = scipy.sparse.csc_matrix( 21 | ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4) 22 | ) 23 | Y = MinorityCoalescer().fit_transform(X) 24 | self.assertTrue(scipy.sparse.issparse(Y)) 25 | 26 | def test_no_coalescence(self): 27 | X = np.random.randint(0, 255, (3, 4)) 28 | Y = NoCoalescence().fit_transform(X) 29 | np.testing.assert_array_almost_equal(Y, X) 30 | # Assert no copies were made 31 | self.assertEqual(id(X), id(Y)) 32 | -------------------------------------------------------------------------------- /test/test_pipeline/components/data_preprocessing/test_numerical_imputation.py: -------------------------------------------------------------------------------- 1 | from scipy import sparse 2 | 3 | from autosklearn.pipeline.components.data_preprocessing.imputation.numerical_imputation import ( # noqa: E501 4 | NumericalImputation, 5 | ) 6 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 7 | 8 | 9 | class NumericalImputationTest(PreprocessingTestCase): 10 | def test_default_configuration(self): 11 | transformations = [] 12 | for i in range(2): 13 | transformation, original = _test_preprocessing(NumericalImputation) 14 | self.assertEqual(transformation.shape, original.shape) 15 | self.assertTrue((transformation == original).all()) 16 | transformations.append(transformation) 17 | if len(transformations) > 1: 18 | self.assertTrue((transformations[-1] == transformations[-2]).all()) 19 | 20 | def test_default_configuration_sparse_data(self): 21 | transformations = [] 22 | transformation, original = _test_preprocessing( 23 | NumericalImputation, make_sparse=True 24 | ) 25 | self.assertEqual(transformation.shape, original.shape) 26 | self.assertTrue((transformation.data == original.data).all()) 27 | self.assertIsInstance(transformation, sparse.csc_matrix) 28 | transformations.append(transformation) 29 | 30 | def test_preprocessing_dtype(self): 31 | super(NumericalImputationTest, self)._test_preprocessing_dtype( 32 | NumericalImputation, add_NaNs=True 33 | ) 34 | -------------------------------------------------------------------------------- /test/test_pipeline/components/data_preprocessing/test_variance_threshold.py: -------------------------------------------------------------------------------- 1 | from scipy import sparse 2 | 3 | from autosklearn.pipeline.components.data_preprocessing.variance_threshold.variance_threshold import ( # noqa: E501 4 | VarianceThreshold, 5 | ) 6 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 7 | 8 | 9 | class VarianceThresholdTest(PreprocessingTestCase): 10 | def test_default_configuration(self): 11 | transformations = [] 12 | for i in range(2): 13 | transformation, original = _test_preprocessing(VarianceThreshold) 14 | self.assertEqual(transformation.shape, original.shape) 15 | self.assertTrue((transformation == original).all()) 16 | transformations.append(transformation) 17 | if len(transformations) > 1: 18 | self.assertTrue((transformations[-1] == transformations[-2]).all()) 19 | 20 | def test_default_configuration_sparse_data(self): 21 | transformations = [] 22 | transformation, original = _test_preprocessing( 23 | VarianceThreshold, make_sparse=True 24 | ) 25 | self.assertEqual(transformation.shape, (100, 3)) 26 | self.assertTrue((transformation.toarray() == original.toarray()[:, 1:]).all()) 27 | self.assertIsInstance(transformation, sparse.csr_matrix) 28 | transformations.append(transformation) 29 | 30 | def test_preprocessing_dtype(self): 31 | super(VarianceThresholdTest, self)._test_preprocessing_dtype( 32 | VarianceThreshold, add_NaNs=False 33 | ) 34 | -------------------------------------------------------------------------------- /test/test_pipeline/components/dummy_components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_pipeline/components/dummy_components/__init__.py -------------------------------------------------------------------------------- /test/test_pipeline/components/dummy_components/dummy_component_1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm 5 | 6 | # Add the parent directory to the path to import the parent component 7 | this_directory = os.path.dirname(os.path.abspath(__file__)) 8 | parent_directory = os.path.abspath(os.path.join(this_directory, "..")) 9 | sys.path.append(parent_directory) 10 | 11 | 12 | class DummyComponent1(AutoSklearnClassificationAlgorithm): 13 | pass 14 | -------------------------------------------------------------------------------- /test/test_pipeline/components/dummy_components/dummy_component_2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm 5 | 6 | # Add the parent directory to the path to import the parent component as 7 | # dummy_components.dummy_component_2.DummyComponent1 8 | this_directory = os.path.dirname(os.path.abspath(__file__)) 9 | parent_directory = os.path.abspath(os.path.join(this_directory, "..")) 10 | sys.path.append(parent_directory) 11 | 12 | 13 | class DummyComponent2(AutoSklearnClassificationAlgorithm): 14 | pass 15 | -------------------------------------------------------------------------------- /test/test_pipeline/components/dummy_components/dummy_component_import.py: -------------------------------------------------------------------------------- 1 | from autosklearn.pipeline.components.base import find_components # noqa 2 | from autosklearn.pipeline.components.base import ( # noqa 3 | AutoSklearnClassificationAlgorithm, 4 | ) 5 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_NoPreprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import ( 4 | NoPreprocessing, 5 | ) 6 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 7 | 8 | 9 | class NoneComponentTest(PreprocessingTestCase): 10 | def test_default_configuration(self): 11 | transformation, original = _test_preprocessing(NoPreprocessing) 12 | self.assertEqual(transformation.shape[0], original.shape[0]) 13 | self.assertEqual(transformation.shape[1], original.shape[1]) 14 | self.assertFalse((transformation == 0).all()) 15 | self.assertEqual(np.sum(original), np.sum(transformation)) 16 | self.assertEqual(np.min(original), np.min(transformation)) 17 | self.assertEqual(np.max(original), np.max(transformation)) 18 | self.assertEqual(np.std(original), np.std(transformation)) 19 | self.assertEqual(np.mean(original), np.mean(transformation)) 20 | 21 | def test_preprocessing_dtype(self): 22 | super(NoneComponentTest, self)._test_preprocessing_dtype(NoPreprocessing) 23 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_choice.py: -------------------------------------------------------------------------------- 1 | import autosklearn.pipeline.components.feature_preprocessing as fp 2 | 3 | import unittest 4 | 5 | 6 | class FeatureProcessingTest(unittest.TestCase): 7 | def test_get_available_components(self): 8 | # Target type 9 | for target_type, num_values in [("classification", 15), ("regression", 14)]: 10 | data_properties = {"target_type": target_type} 11 | 12 | available_components = fp.FeaturePreprocessorChoice( 13 | data_properties 14 | ).get_available_components(data_properties) 15 | 16 | self.assertEqual(len(available_components), num_values) 17 | 18 | # Multiclass 19 | data_properties = {"target_type": "classification", "multiclass": True} 20 | available_components = fp.FeaturePreprocessorChoice( 21 | data_properties 22 | ).get_available_components(data_properties) 23 | 24 | self.assertEqual(len(available_components), 15) 25 | 26 | # Multilabel 27 | data_properties = {"target_type": "classification", "multilabel": True} 28 | available_components = fp.FeaturePreprocessorChoice( 29 | data_properties 30 | ).get_available_components(data_properties) 31 | 32 | self.assertEqual(len(available_components), 12) 33 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_densifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.pipeline.components.feature_preprocessing.densifier import Densifier 4 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 5 | 6 | 7 | class DensifierComponentTest(PreprocessingTestCase): 8 | def test_default_configuration(self): 9 | transformation, original = _test_preprocessing(Densifier, make_sparse=True) 10 | self.assertIsInstance(transformation, np.ndarray) 11 | self.assertEqual(transformation.shape, original.shape) 12 | self.assertIsInstance(transformation, np.ndarray) 13 | 14 | def test_preprocessing_dtype(self): 15 | super(DensifierComponentTest, self)._test_preprocessing_dtype(Densifier) 16 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_fast_ica.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | from sklearn.linear_model import Ridge 3 | 4 | from autosklearn.pipeline.components.feature_preprocessing.fast_ica import FastICA 5 | from autosklearn.pipeline.util import ( 6 | PreprocessingTestCase, 7 | _test_preprocessing, 8 | get_dataset, 9 | ) 10 | 11 | import unittest 12 | 13 | 14 | class FastICAComponentTest(PreprocessingTestCase): 15 | def test_default_configuration(self): 16 | transformation, original = _test_preprocessing(FastICA, dataset="diabetes") 17 | self.assertEqual(transformation.shape[0], original.shape[0]) 18 | self.assertFalse((transformation == 0).all()) 19 | 20 | def test_default_configuration_regression(self): 21 | for i in range(5): 22 | X_train, Y_train, X_test, Y_test = get_dataset(dataset="diabetes") 23 | configuration_space = FastICA.get_hyperparameter_search_space() 24 | default = configuration_space.get_default_configuration() 25 | preprocessor = FastICA( 26 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 27 | ) 28 | preprocessor.fit(X_train, Y_train) 29 | X_train_trans = preprocessor.transform(X_train) 30 | X_test_trans = preprocessor.transform(X_test) 31 | 32 | # fit a classifier on top 33 | classifier = Ridge() 34 | predictor = classifier.fit(X_train_trans, Y_train) 35 | predictions = predictor.predict(X_test_trans) 36 | accuracy = sklearn.metrics.r2_score(Y_test, predictions) 37 | self.assertAlmostEqual(accuracy, 0.32614416980439365) 38 | 39 | @unittest.skip("Always returns float64") 40 | def test_preprocessing_dtype(self): 41 | super(FastICAComponentTest, self)._test_preprocessing_dtype( 42 | FastICA, dataset="diabetes" 43 | ) 44 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_feature_agglomeration.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | from sklearn.ensemble import RandomForestClassifier 3 | 4 | from autosklearn.pipeline.components.feature_preprocessing.feature_agglomeration import ( # noqa: E501 5 | FeatureAgglomeration, 6 | ) 7 | from autosklearn.pipeline.util import ( 8 | PreprocessingTestCase, 9 | _test_preprocessing, 10 | get_dataset, 11 | ) 12 | 13 | 14 | class FeatureAgglomerationComponentTest(PreprocessingTestCase): 15 | def test_default_configuration(self): 16 | transformation, original = _test_preprocessing(FeatureAgglomeration) 17 | self.assertEqual(transformation.shape[0], original.shape[0]) 18 | self.assertFalse((transformation == 0).all()) 19 | 20 | def test_default_configuration_classify(self): 21 | for i in range(3): 22 | X_train, Y_train, X_test, Y_test = get_dataset( 23 | dataset="digits", make_sparse=False 24 | ) 25 | configuration_space = FeatureAgglomeration.get_hyperparameter_search_space() 26 | default = configuration_space.get_default_configuration() 27 | preprocessor = FeatureAgglomeration( 28 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 29 | ) 30 | preprocessor.fit(X_train, Y_train) 31 | X_train_trans = preprocessor.transform(X_train) 32 | X_test_trans = preprocessor.transform(X_test) 33 | 34 | # fit a classifier on top 35 | classifier = RandomForestClassifier(random_state=1) 36 | predictor = classifier.fit(X_train_trans, Y_train) 37 | predictions = predictor.predict(X_test_trans) 38 | accuracy = sklearn.metrics.accuracy_score(predictions, Y_test) 39 | self.assertAlmostEqual(accuracy, 0.8761384335154827) 40 | 41 | def test_preprocessing_dtype(self): 42 | super(FeatureAgglomerationComponentTest, self)._test_preprocessing_dtype( 43 | FeatureAgglomeration, test_sparse=False 44 | ) 45 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_kitchen_sinks.py: -------------------------------------------------------------------------------- 1 | from autosklearn.pipeline.components.feature_preprocessing.kitchen_sinks import ( 2 | RandomKitchenSinks, 3 | ) 4 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 5 | 6 | import unittest 7 | 8 | 9 | class KitchenSinkComponent(PreprocessingTestCase): 10 | def test_default_configuration(self): 11 | transformation, original = _test_preprocessing(RandomKitchenSinks) 12 | self.assertEqual(transformation.shape[0], original.shape[0]) 13 | self.assertEqual(transformation.shape[1], 100) 14 | self.assertFalse((transformation == 0).all()) 15 | 16 | @unittest.skip("Right now, the RBFSampler returns a float64 array!") 17 | def test_preprocessing_dtype(self): 18 | super(KitchenSinkComponent, self)._test_preprocessing_dtype(RandomKitchenSinks) 19 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.pipeline.components.feature_preprocessing.pca import PCA 4 | from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing 5 | 6 | 7 | class PCAComponentTest(PreprocessingTestCase): 8 | def test_default_configuration(self): 9 | transformations = [] 10 | for i in range(2): 11 | transformation, original = _test_preprocessing(PCA) 12 | self.assertEqual(transformation.shape, original.shape) 13 | self.assertFalse((transformation == original).all()) 14 | transformations.append(transformation) 15 | if len(transformations) > 1: 16 | np.testing.assert_allclose( 17 | transformations[-1], transformations[-2], rtol=1e-4 18 | ) 19 | 20 | def test_preprocessing_dtype(self): 21 | super(PCAComponentTest, self)._test_preprocessing_dtype(PCA, test_sparse=False) 22 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_random_trees_embedding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding import ( # noqa: E501 5 | RandomTreesEmbedding, 6 | ) 7 | from autosklearn.pipeline.util import _test_preprocessing, get_dataset 8 | 9 | import unittest 10 | 11 | 12 | class RandomTreesEmbeddingComponentTest(unittest.TestCase): 13 | def test_default_configuration(self): 14 | transformation, original = _test_preprocessing(RandomTreesEmbedding) 15 | self.assertEqual(transformation.shape[0], original.shape[0]) 16 | self.assertEqual(transformation.shape[1], 218) 17 | self.assertIsInstance(original, np.ndarray) 18 | self.assertTrue(scipy.sparse.issparse(transformation)) 19 | self.assertTrue(all(transformation.data == 1)) 20 | 21 | @unittest.skip("Right now, the RTE returns a float64 array!") 22 | def test_preprocessing_dtype(self): 23 | # Dense 24 | # np.float32 25 | X_train, Y_train, X_test, Y_test = get_dataset("iris") 26 | self.assertEqual(X_train.dtype, np.float32) 27 | 28 | configuration_space = RandomTreesEmbedding.get_hyperparameter_search_space() 29 | default = configuration_space.get_default_configuration() 30 | preprocessor = RandomTreesEmbedding( 31 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 32 | ) 33 | preprocessor.fit(X_train) 34 | Xt = preprocessor.transform(X_train) 35 | 36 | self.assertEqual(Xt.dtype, np.float32) 37 | 38 | # np.float64 39 | X_train, Y_train, X_test, Y_test = get_dataset("iris") 40 | X_train = X_train.astype(np.float64) 41 | configuration_space = RandomTreesEmbedding.get_hyperparameter_search_space() 42 | default = configuration_space.get_default_configuration() 43 | preprocessor = RandomTreesEmbedding( 44 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 45 | ) 46 | preprocessor.fit(X_train, Y_train) 47 | Xt = preprocessor.transform(X_train) 48 | self.assertEqual(Xt.dtype, np.float64) 49 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_select_percentile_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.pipeline.components.feature_preprocessing.select_percentile_regression import ( # noqa: E501 4 | SelectPercentileRegression, 5 | ) 6 | from autosklearn.pipeline.util import _test_preprocessing, get_dataset 7 | 8 | import unittest 9 | 10 | 11 | class SelectPercentileRegressionTest(unittest.TestCase): 12 | def test_default_configuration(self): 13 | transformation, original = _test_preprocessing( 14 | dataset="boston", 15 | Preprocessor=SelectPercentileRegression, 16 | ) 17 | self.assertEqual(transformation.shape[0], original.shape[0]) 18 | self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) 19 | self.assertFalse((transformation == 0).all()) 20 | 21 | def test_preprocessing_dtype(self): 22 | # Dense 23 | # np.float32 24 | X_train, Y_train, X_test, Y_test = get_dataset("iris") 25 | self.assertEqual(X_train.dtype, np.float32) 26 | 27 | configuration_space = ( 28 | SelectPercentileRegression.get_hyperparameter_search_space() 29 | ) 30 | default = configuration_space.get_default_configuration() 31 | preprocessor = SelectPercentileRegression( 32 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 33 | ) 34 | preprocessor.fit(X_train, Y_train) 35 | Xt = preprocessor.transform(X_train) 36 | self.assertEqual(Xt.dtype, np.float32) 37 | 38 | # np.float64 39 | X_train, Y_train, X_test, Y_test = get_dataset("iris") 40 | X_train = X_train.astype(np.float64) 41 | configuration_space = ( 42 | SelectPercentileRegression.get_hyperparameter_search_space() 43 | ) 44 | default = configuration_space.get_default_configuration() 45 | preprocessor = SelectPercentileRegression( 46 | random_state=1, **{hp_name: default[hp_name] for hp_name in default} 47 | ) 48 | preprocessor.fit(X_train, Y_train) 49 | Xt = preprocessor.transform(X_train) 50 | self.assertEqual(Xt.dtype, np.float64) 51 | -------------------------------------------------------------------------------- /test/test_pipeline/components/feature_preprocessing/test_truncatedSVD.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | from sklearn.linear_model import RidgeClassifier 3 | 4 | from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import ( 5 | TruncatedSVD, 6 | ) 7 | from autosklearn.pipeline.util import ( 8 | PreprocessingTestCase, 9 | _test_preprocessing, 10 | get_dataset, 11 | ) 12 | 13 | import unittest 14 | 15 | 16 | class TruncatedSVDComponentTest(PreprocessingTestCase): 17 | def test_default_configuration(self): 18 | transformation, original = _test_preprocessing(TruncatedSVD) 19 | self.assertEqual(transformation.shape[0], original.shape[0]) 20 | self.assertFalse((transformation == 0).all()) 21 | 22 | def test_default_configuration_classify(self): 23 | for i in range(2): 24 | X_train, Y_train, X_test, Y_test = get_dataset( 25 | dataset="digits", make_sparse=True 26 | ) 27 | configuration_space = TruncatedSVD.get_hyperparameter_search_space() 28 | default = configuration_space.get_default_configuration() 29 | preprocessor = TruncatedSVD( 30 | random_state=1, 31 | **{ 32 | hp_name: default[hp_name] 33 | for hp_name in default 34 | if default[hp_name] is not None 35 | }, 36 | ) 37 | preprocessor.fit(X_train, Y_train) 38 | X_train_trans = preprocessor.transform(X_train) 39 | X_test_trans = preprocessor.transform(X_test) 40 | 41 | # fit a classifier on top 42 | classifier = RidgeClassifier() 43 | predictor = classifier.fit(X_train_trans, Y_train) 44 | predictions = predictor.predict(X_test_trans) 45 | accuracy = sklearn.metrics.accuracy_score(predictions, Y_test) 46 | self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2) 47 | 48 | @unittest.skip("Truncated SVD returns np.float64.") 49 | def test_preprocessing_dtype(self): 50 | super(TruncatedSVDComponentTest, self)._test_preprocessing_dtype( 51 | TruncatedSVD, test_sparse=False 52 | ) 53 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_pipeline/components/regression/__init__.py -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_adaboost.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.regression.adaboost import AdaboostRegressor 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class AdaBoostComponentTest(BaseRegressionComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_boston"] = 0.5951486466070626 14 | res["default_boston_iterative"] = None 15 | res["default_boston_sparse"] = 0.18067558132702222 16 | res["default_boston_iterative_sparse"] = None 17 | res["default_diabetes"] = 0.250565253614339 18 | res["default_diabetes_iterative"] = None 19 | res["default_diabetes_sparse"] = 0.09126705185668416 20 | res["default_diabetes_iterative_sparse"] = None 21 | 22 | sk_mod = sklearn.ensemble.AdaBoostRegressor 23 | 24 | module = AdaboostRegressor 25 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_ard_regression.py: -------------------------------------------------------------------------------- 1 | import sklearn.linear_model 2 | 3 | from autosklearn.pipeline.components.regression.ard_regression import ARDRegression 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class ARDRegressionComponentTest(BaseRegressionComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_boston"] = 0.7033160711079323 14 | res["default_boston_iterative"] = None 15 | res["default_boston_sparse"] = None 16 | res["default_boston_iterative_sparse"] = None 17 | res["default_diabetes"] = 0.4172008418124077 18 | res["default_diabetes_iterative"] = None 19 | res["default_diabetes_sparse"] = None 20 | res["default_diabetes_iterative_sparse"] = None 21 | 22 | sk_mod = sklearn.linear_model.ARDRegression 23 | 24 | module = ARDRegression 25 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_decision_tree.py: -------------------------------------------------------------------------------- 1 | import sklearn.tree 2 | 3 | from autosklearn.pipeline.components.regression.decision_tree import DecisionTree 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class DecisionTreeComponentTest(BaseRegressionComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_boston"] = 0.35616796434879905 14 | res["default_boston_iterative"] = None 15 | res["default_boston_sparse"] = 0.18031669797027394 16 | res["default_boston_iterative_sparse"] = None 17 | res["default_diabetes"] = 0.1564592449511697 18 | res["default_diabetes_iterative"] = None 19 | res["default_diabetes_sparse"] = -0.020818312539637507 20 | res["default_diabetes_iterative_sparse"] = None 21 | 22 | sk_mod = sklearn.tree.DecisionTreeRegressor 23 | 24 | module = DecisionTree 25 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_extra_trees.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.regression.extra_trees import ExtraTreesRegressor 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class ExtraTreesComponentTest(BaseRegressionComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_boston"] = 0.8539264243687228 14 | res["boston_n_calls"] = 9 15 | res["default_boston_iterative"] = res["default_boston"] 16 | res["default_boston_sparse"] = 0.411211701806908 17 | res["default_boston_iterative_sparse"] = res["default_boston_sparse"] 18 | res["default_diabetes"] = 0.3885150255877827 19 | res["diabetes_n_calls"] = 9 20 | res["default_diabetes_iterative"] = res["default_diabetes"] 21 | res["default_diabetes_sparse"] = 0.2422804139169642 22 | res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"] 23 | 24 | sk_mod = sklearn.ensemble.ExtraTreesRegressor 25 | module = ExtraTreesRegressor 26 | step_hyperparameter = { 27 | "name": "n_estimators", 28 | "value": module.get_max_iter(), 29 | } 30 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_gaussian_process.py: -------------------------------------------------------------------------------- 1 | import sklearn.gaussian_process 2 | 3 | from autosklearn.pipeline.components.regression.gaussian_process import GaussianProcess 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class GaussianProcessComponentTest(BaseRegressionComponentTest): 9 | 10 | __test__ = True 11 | 12 | res = dict() 13 | res["default_boston_le_ge"] = [0.6, 0.2] 14 | res["default_boston_places"] = 1 15 | res["default_boston_iterative"] = None 16 | res["default_boston_sparse"] = None 17 | res["default_boston_iterative_sparse"] = None 18 | res["default_diabetes"] = -0.017256687184589836 19 | res["default_diabetes_iterative"] = None 20 | res["default_diabetes_sparse"] = None 21 | res["default_diabetes_iterative_sparse"] = None 22 | 23 | sk_mod = sklearn.gaussian_process.GaussianProcessRegressor 24 | 25 | module = GaussianProcess 26 | 27 | """ 28 | # Leave this here for future reference 29 | # My machine: 0.574913739659292 30 | # travis-ci: 0.49562471963524557 31 | self.assertLessEqual( 32 | sklearn.metrics.r2_score(y_true=targets, y_pred=predictions), 33 | 0.6) 34 | self.assertGreaterEqual( 35 | sklearn.metrics.r2_score(y_true=targets, y_pred=predictions), 36 | 0.4) 37 | """ 38 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.regression.gradient_boosting import ( 4 | GradientBoosting, 5 | ) 6 | 7 | from .test_base import BaseRegressionComponentTest 8 | 9 | 10 | class GradientBoostingComponentTest(BaseRegressionComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_boston"] = 0.7491382574462079 16 | res["default_boston_iterative"] = 0.7491382574462079 17 | res["default_boston_sparse"] = None 18 | res["boston_n_calls"] = 9 19 | res["default_diabetes"] = 0.2872735632261877 20 | res["default_diabetes_iterative"] = 0.2872735632261877 21 | res["default_diabetes_sparse"] = None 22 | res["diabetes_n_call"] = 11 23 | 24 | sk_mod = sklearn.ensemble.GradientBoostingRegressor 25 | module = GradientBoosting 26 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_k_nearest_neighbors.py: -------------------------------------------------------------------------------- 1 | import sklearn.neighbors 2 | 3 | from autosklearn.pipeline.components.regression.k_nearest_neighbors import ( 4 | KNearestNeighborsRegressor, 5 | ) 6 | 7 | from .test_base import BaseRegressionComponentTest 8 | 9 | 10 | class KNearestNeighborsComponentTest(BaseRegressionComponentTest): 11 | 12 | __test__ = True 13 | 14 | res = dict() 15 | res["default_boston"] = 0.18393287980040374 16 | res["default_boston_iterative"] = None 17 | res["default_boston_sparse"] = -0.23029229186279609 18 | res["default_boston_iterative_sparse"] = None 19 | res["default_diabetes"] = 0.068600456340847438 20 | res["default_diabetes_iterative"] = None 21 | res["default_diabetes_sparse"] = -0.16321841460809972 22 | res["default_diabetes_iterative_sparse"] = None 23 | 24 | sk_mod = sklearn.neighbors.KNeighborsRegressor 25 | 26 | module = KNearestNeighborsRegressor 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_liblinear_svr.py: -------------------------------------------------------------------------------- 1 | import sklearn.svm 2 | 3 | from autosklearn.pipeline.components.regression.liblinear_svr import LibLinear_SVR 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class SupportVectorComponentTest(BaseRegressionComponentTest): 9 | __test__ = True 10 | 11 | res = dict() 12 | res["default_boston"] = 0.6768297818275556 13 | res["default_boston_places"] = 2 14 | res["default_boston_iterative"] = None 15 | res["default_boston_sparse"] = 0.12626519114138912 16 | res["default_boston_sparse_places"] = 2 17 | res["default_boston_iterative_sparse"] = None 18 | res["default_diabetes"] = 0.39152218711865661 19 | res["default_diabetes_iterative"] = None 20 | res["default_diabetes_sparse"] = 0.18704323088631891 21 | res["default_diabetes_iterative_sparse"] = None 22 | 23 | sk_mod = sklearn.svm.LinearSVR 24 | 25 | module = LibLinear_SVR 26 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_random_forests.py: -------------------------------------------------------------------------------- 1 | import sklearn.ensemble 2 | 3 | from autosklearn.pipeline.components.regression.random_forest import RandomForest 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class RandomForestComponentTest(BaseRegressionComponentTest): 9 | __test__ = True 10 | 11 | res = dict() 12 | res["default_boston"] = 0.8410063895401654 13 | res["boston_n_calls"] = 9 14 | res["default_boston_iterative"] = res["default_boston"] 15 | res["default_boston_sparse"] = 0.4194462097407078 16 | res["default_boston_iterative_sparse"] = res["default_boston_sparse"] 17 | res["default_diabetes"] = 0.3496051170409269 18 | res["diabetes_n_calls"] = 9 19 | res["default_diabetes_iterative"] = res["default_diabetes"] 20 | res["default_diabetes_sparse"] = 0.2383300978781976 21 | res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"] 22 | 23 | sk_mod = sklearn.ensemble.RandomForestRegressor 24 | module = RandomForest 25 | step_hyperparameter = { 26 | "name": "n_estimators", 27 | "value": module.get_max_iter(), 28 | } 29 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_sgd.py: -------------------------------------------------------------------------------- 1 | import sklearn.linear_model 2 | 3 | from autosklearn.pipeline.components.regression.sgd import SGD 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class SGDComponentTest(BaseRegressionComponentTest): 9 | __test__ = True 10 | 11 | # Values are extremely bad because the invscaling does not drop the 12 | # learning rate aggressively enough! 13 | res = dict() 14 | res["default_boston"] = -1.1811672998629865e28 15 | res["boston_n_calls"] = 6 16 | res["default_boston_iterative"] = res["default_boston"] 17 | res["default_boston_sparse"] = -1.1518512489347601e28 18 | res["default_boston_iterative_sparse"] = res["default_boston_sparse"] 19 | res["default_diabetes"] = 0.27420813549185374 20 | res["diabetes_n_calls"] = 10 21 | res["default_diabetes_iterative"] = res["default_diabetes"] 22 | res["default_diabetes_sparse"] = 0.034801785011824404 23 | res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"] 24 | 25 | sk_mod = sklearn.linear_model.SGDRegressor 26 | module = SGD 27 | -------------------------------------------------------------------------------- /test/test_pipeline/components/regression/test_support_vector_regression.py: -------------------------------------------------------------------------------- 1 | import sklearn.linear_model 2 | 3 | from autosklearn.pipeline.components.regression.libsvm_svr import LibSVM_SVR 4 | 5 | from .test_base import BaseRegressionComponentTest 6 | 7 | 8 | class SupportVectorComponentTest(BaseRegressionComponentTest): 9 | __test__ = True 10 | 11 | res = dict() 12 | res["default_boston"] = -0.030006883949312613 13 | res["default_boston_iterative"] = None 14 | res["default_boston_sparse"] = -0.062749211736050192 15 | res["default_boston_iterative_sparse"] = None 16 | res["default_diabetes"] = 0.12849591861430087 17 | res["default_diabetes_iterative"] = None 18 | res["default_diabetes_sparse"] = 0.0098877566961463881 19 | res["default_diabetes_iterative_sparse"] = None 20 | 21 | sk_mod = sklearn.svm.SVR 22 | 23 | module = LibSVM_SVR 24 | -------------------------------------------------------------------------------- /test/test_pipeline/components/test_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from autosklearn.pipeline.components.base import ( 5 | AutoSklearnClassificationAlgorithm, 6 | find_components, 7 | ) 8 | 9 | import unittest 10 | 11 | this_dir = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(this_dir) 13 | 14 | 15 | class TestBase(unittest.TestCase): 16 | def test_find_components(self): 17 | c = find_components( 18 | "dummy_components", 19 | os.path.join(this_dir, "dummy_components"), 20 | AutoSklearnClassificationAlgorithm, 21 | ) 22 | print("COMPONENTS: %s" % repr(c)) 23 | self.assertEqual(len(c), 2) 24 | self.assertEqual(c["dummy_component_1"].__name__, "DummyComponent1") 25 | self.assertEqual(c["dummy_component_2"].__name__, "DummyComponent2") 26 | -------------------------------------------------------------------------------- /test/test_pipeline/implementations/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "feurerm" 2 | -------------------------------------------------------------------------------- /test/test_pipeline/implementations/test_CategoryShift.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | from autosklearn.pipeline.implementations.CategoryShift import CategoryShift 5 | 6 | import unittest 7 | 8 | 9 | class CategoryShiftTest(unittest.TestCase): 10 | def test_dense(self): 11 | X = np.random.randint(0, 255, (3, 4)) 12 | Y = CategoryShift().fit_transform(X) 13 | self.assertTrue((Y == X + 3).all()) 14 | 15 | def test_sparse(self): 16 | X = scipy.sparse.csc_matrix( 17 | ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4) 18 | ) 19 | Y = CategoryShift().fit_transform(X) 20 | X.data += 3 21 | self.assertTrue((Y.todense() == X.todense()).all()) 22 | # Check if the sparsity stays the same before and after the transformation 23 | self.assertEqual(X.data.shape, Y.data.shape) 24 | self.assertTrue((X.indices == Y.indices).all()) 25 | self.assertTrue((X.indptr == Y.indptr).all()) 26 | 27 | def test_negative(self): 28 | X = np.array([[-1, 2], [3, 4]]) 29 | with self.assertRaises(ValueError): 30 | CategoryShift().fit_transform(X) 31 | 32 | def test_string(self): 33 | X = np.array([["a", "b"], ["c", "d"]]) 34 | with self.assertRaises(ValueError): 35 | CategoryShift().fit_transform(X) 36 | -------------------------------------------------------------------------------- /test/test_pipeline/implementations/test_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autosklearn.pipeline.implementations.util import softmax 4 | 5 | import unittest 6 | 7 | 8 | class UtilTest(unittest.TestCase): 9 | def test_softmax_binary(self): 10 | df = np.array( 11 | [ 12 | -40.00643897, 13 | 34.69754581, 14 | 23.71181359, 15 | -29.89724287, 16 | 27.06071791, 17 | -37.78334103, 18 | -40.15812461, 19 | 40.16139229, 20 | -27.85887801, 21 | 42.67404756, 22 | -36.89753589, 23 | -36.45148009, 24 | 54.68976306, 25 | 19.47886562, 26 | -49.99821027, 27 | -35.70205302, 28 | -40.59639267, 29 | 32.96343916, 30 | -39.23777841, 31 | -37.86535019, 32 | -33.10196906, 33 | 26.84144377, 34 | -36.8569686, 35 | ] 36 | ) 37 | probas = softmax(df) 38 | expected = [[1.0, 0.0] if d < 0.0 else [0.0, 1.0] for d in df] 39 | np.testing.assert_array_almost_equal(expected, probas) 40 | 41 | def test_softmax(self): 42 | df = np.array( 43 | [ 44 | [2.75021367e10, -8.83772371e-01, -2.20516715e27], 45 | [-2.10848072e11, 2.35024444e-01, 5.20106536e25], 46 | ] 47 | ) 48 | # With a numerically unstable softmax, the output would be something 49 | # like this: 50 | # [[ 0. 0. nan] 51 | # [nan 0. 0.]] 52 | probas = softmax(df) 53 | expected = np.array([[1, 0, 0], [0, 0, 1]]) 54 | self.assertTrue((expected == probas).all()) 55 | 56 | df = np.array([[0.1, 0.6, 0.3], [0.2, 0.3, 0.5]]) 57 | probas = softmax(df) 58 | expected = np.array( 59 | [[0.25838965, 0.42601251, 0.31559783], [0.28943311, 0.31987306, 0.39069383]] 60 | ) 61 | np.testing.assert_array_almost_equal(expected, probas) 62 | -------------------------------------------------------------------------------- /test/test_scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/auto-sklearn/673211252ca508b6f5bb92cf5fa87c6455bbad99/test/test_scripts/__init__.py -------------------------------------------------------------------------------- /test/test_util/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = "feurerm" 3 | -------------------------------------------------------------------------------- /test/test_util/example_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | disable_existing_loggers: False 4 | formatters: 5 | simple: 6 | format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s' 7 | 8 | handlers: 9 | console: 10 | class: logging.StreamHandler 11 | level: WARNING 12 | formatter: simple 13 | stream: ext://sys.stdout 14 | 15 | file_handler: 16 | class: logging.FileHandler 17 | level: DEBUG 18 | formatter: simple 19 | filename: autosklearn.log 20 | 21 | distributed_logfile: 22 | class: logging.FileHandler 23 | level: DEBUG 24 | formatter: simple 25 | filename: distributed.log 26 | 27 | root: 28 | level: CRITICAL 29 | handlers: [console, file_handler] 30 | 31 | loggers: 32 | autosklearn.metalearning: 33 | level: NOTSET 34 | handlers: [file_handler] 35 | propagate: no 36 | 37 | autosklearn.automl_common.common.utils.backend: 38 | level: DEBUG 39 | handlers: [file_handler] 40 | propagate: no 41 | 42 | smac.intensification.intensification.Intensifier: 43 | level: INFO 44 | handlers: [file_handler, console] 45 | 46 | smac.optimizer.local_search.LocalSearch: 47 | level: INFO 48 | handlers: [file_handler, console] 49 | 50 | smac.optimizer.smbo.SMBO: 51 | level: INFO 52 | handlers: [file_handler, console] 53 | -------------------------------------------------------------------------------- /test/test_util/test_common.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | import os 3 | 4 | from autosklearn.util.common import check_pid 5 | 6 | import unittest 7 | 8 | 9 | class TestUtilsCommon(unittest.TestCase): 10 | _multiprocess_can_split_ = True 11 | 12 | def test_check_pid(self): 13 | our_pid = os.getpid() 14 | 15 | exists = check_pid(our_pid) 16 | self.assertTrue(exists) 17 | our_pid = -11000 # We hope this pid does not exist 18 | exists = check_pid(our_pid) 19 | self.assertFalse(exists) 20 | 21 | 22 | if __name__ == "__main__": 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /test/test_util/test_dask.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from dask.distributed import Client, LocalCluster 4 | 5 | from autosklearn.util.dask import LocalDask, UserDask 6 | 7 | import pytest 8 | 9 | 10 | @pytest.mark.parametrize("n_jobs", [1, 2]) 11 | def test_user_dask(tmp_path: Path, n_jobs: int) -> None: 12 | """ 13 | Expects 14 | ------- 15 | * A UserDask should not close the client after exiting context 16 | """ 17 | cluster = LocalCluster( 18 | n_workers=n_jobs, 19 | processes=False, 20 | threads_per_worker=1, 21 | local_directory=tmp_path, 22 | ) 23 | client = Client(cluster, heartbeat_interval=10000) 24 | 25 | # Active at creation 26 | dask = UserDask(client) 27 | 28 | client_1 = None 29 | with dask as user_client: 30 | client_1 = user_client 31 | assert user_client.status == "running" 32 | 33 | client_2 = None 34 | with dask as user_client: 35 | assert user_client.status == "running" 36 | client_2 = user_client 37 | 38 | # Make sure they are the same client 39 | assert id(client_1) == id(client_2) 40 | 41 | # Remains running after context 42 | assert client_1.status == "running" 43 | 44 | cluster.close() 45 | client.close() 46 | 47 | assert client.status == "closed" 48 | 49 | 50 | def test_local_dask_creates_new_clients(tmp_path: Path) -> None: 51 | """ 52 | Expects 53 | ------- 54 | * A LocalDask should create new dask clusters at each context usage 55 | """ 56 | # We need 2 to use an actual dask client and not a SingleThreadedClient 57 | local_dask = LocalDask(n_jobs=2) 58 | 59 | client_1 = None 60 | with local_dask as client: 61 | client_1 = client 62 | assert client_1.status == "running" 63 | 64 | assert client_1.status == "closed" 65 | 66 | client_2 = None 67 | with local_dask as client: 68 | client_2 = client 69 | assert client_2.status == "running" 70 | 71 | # Make sure they were different clients 72 | assert id(client_1) != id(client_2) 73 | 74 | assert client_2.status == "closed" 75 | assert client_1.status == "closed" 76 | -------------------------------------------------------------------------------- /test/test_util/test_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.config 3 | import os 4 | import tempfile 5 | 6 | import yaml 7 | 8 | from autosklearn.util import logging_ 9 | 10 | import unittest 11 | 12 | 13 | class LoggingTest(unittest.TestCase): 14 | def test_setup_logger(self): 15 | # Test that setup_logger function correctly configures the logger 16 | # according to the given dictionary, and uses the default 17 | # logging.yaml file if logging_config is not specified. 18 | 19 | with open( 20 | os.path.join(os.path.dirname(__file__), "example_config.yaml"), "r" 21 | ) as fh: 22 | example_config = yaml.safe_load(fh) 23 | 24 | # Configure logger with example_config.yaml. 25 | logging_.setup_logger( 26 | logging_config=example_config, output_dir=tempfile.gettempdir() 27 | ) 28 | 29 | # example_config sets the root logger's level to CRITICAL, 30 | # which corresponds to 50. 31 | self.assertEqual(logging.getLogger().getEffectiveLevel(), 50) 32 | 33 | # This time use the default configuration. 34 | logging_.setup_logger(logging_config=None, output_dir=tempfile.gettempdir()) 35 | 36 | # default config sets the root logger's level to DEBUG, 37 | # which corresponds to 10. 38 | self.assertEqual(logging.getLogger().getEffectiveLevel(), 10) 39 | 40 | # Make sure we log to the desired directory 41 | logging_.setup_logger(output_dir=os.path.dirname(__file__), filename="test.log") 42 | logger = logging.getLogger() 43 | logger.info("test_setup_logger") 44 | 45 | with open(os.path.join(os.path.dirname(__file__), "test.log")) as fh: 46 | self.assertIn("test_setup_logger", "".join(fh.readlines())) 47 | os.remove(os.path.join(os.path.dirname(__file__), "test.log")) 48 | -------------------------------------------------------------------------------- /test/test_util/test_single_thread_client.py: -------------------------------------------------------------------------------- 1 | import dask.distributed 2 | from distributed.utils_test import inc 3 | 4 | from autosklearn.util.single_thread_client import SingleThreadedClient 5 | 6 | import pytest 7 | 8 | 9 | def test_single_thread_client_like_dask_client(): 10 | single_thread_client = SingleThreadedClient() 11 | assert isinstance(single_thread_client, dask.distributed.Client) 12 | future = single_thread_client.submit(inc, 1) 13 | assert isinstance(future, dask.distributed.Future) 14 | assert future.done() 15 | assert future.result() == 2 16 | assert sum(single_thread_client.nthreads().values()) == 1 17 | single_thread_client.close() 18 | single_thread_client.shutdown() 19 | 20 | # Client/Futures are printed, so make sure str works 21 | # str calls __rpr__ which is the purpose of below check 22 | assert str(future) != "" 23 | assert str(single_thread_client) != "" 24 | 25 | with pytest.raises(NotImplementedError): 26 | single_thread_client.get_scheduler_logs() 27 | -------------------------------------------------------------------------------- /test/test_util/test_stopwatch.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from autosklearn.util.stopwatch import StopWatch 4 | 5 | 6 | def test_stopwatch_overhead() -> None: 7 | wall_start = time.time() 8 | cpu_start = time.process_time() 9 | 10 | watch = StopWatch() 11 | for i in range(1, 1000): 12 | watch.start("task_%d" % i) 13 | watch.stop("task_%d" % i) 14 | 15 | cpu_end = time.process_time() 16 | wall_end = time.time() 17 | 18 | wall_duration = wall_end - wall_start 19 | cpu_duration = cpu_end - cpu_start 20 | 21 | cpu_overhead = cpu_duration - watch.total_cpu() 22 | wall_overhead = wall_duration - watch.total_wall() 23 | 24 | assert cpu_overhead < 1 25 | assert wall_overhead < 1 26 | assert watch.total_cpu() < 2 * watch.total_wall() 27 | 28 | 29 | def test_contextmanager() -> None: 30 | watch = StopWatch() 31 | 32 | with watch.time("task"): 33 | assert watch["task"].started() 34 | 35 | assert "task" in watch 36 | assert watch["task"].finished() 37 | 38 | assert watch["task"].cpu_duration is not None 39 | assert watch["task"].wall_duration is not None 40 | -------------------------------------------------------------------------------- /test/util.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from pytest import mark, param 4 | 5 | 6 | def fails(arg: Any, reason: str = "No reason given") -> Any: 7 | """Mark a parameter for pytest parametrize that it should fail 8 | 9 | ..code:: python 10 | 11 | @parametrize("number", [2, 3, fails(5, "some reason")]) 12 | 13 | Parameters 14 | ---------- 15 | arg : Any 16 | The arg that should fail 17 | 18 | reason : str = "No reason given" 19 | The reason for the expected fail 20 | 21 | Returns 22 | ------- 23 | Any 24 | The param object 25 | """ 26 | return param(arg, marks=mark.xfail(reason=reason)) 27 | 28 | 29 | def skip(arg: Any, reason: str = "No reason given") -> Any: 30 | """Mark a parameter for pytest parametrize that should be skipped 31 | 32 | ..code:: python 33 | 34 | @parametrize("number", [2, 3, skip(5, "some reason")]) 35 | 36 | Parameters 37 | ---------- 38 | arg : Any 39 | The arg that should be skipped 40 | 41 | reason : str = "No Reason given" 42 | The reason for skipping it 43 | 44 | Returns 45 | ------- 46 | Any 47 | The param object 48 | """ 49 | return param(arg, marks=mark.skip(reason=reason)) 50 | --------------------------------------------------------------------------------