├── test ├── __init__.py ├── test_utils │ ├── __init__.py │ ├── test_single_thread_client.py │ ├── test_common.py │ └── test_parallel_model_runner.py ├── test_pipeline │ ├── __init__.py │ ├── components │ │ ├── __init__.py │ │ ├── setup │ │ │ ├── __init__.py │ │ │ ├── forecasting │ │ │ │ ├── __init__.py │ │ │ │ └── forecasting_networks │ │ │ │ │ └── __init__.py │ │ │ └── test_setup_image_augmenter.py │ │ ├── training │ │ │ ├── __init__.py │ │ │ ├── test_forecasting_training.py │ │ │ ├── test_image_data_loader.py │ │ │ └── test_feature_data_loader.py │ │ └── preprocessing │ │ │ ├── __init__.py │ │ │ ├── forecasting │ │ │ ├── __init__.py │ │ │ ├── test_encoder_choice.py │ │ │ └── base.py │ │ │ ├── test_normalizers.py │ │ │ ├── test_variance_thresholding.py │ │ │ ├── test_normalizer_choice.py │ │ │ ├── base.py │ │ │ ├── test_encoder_choice.py │ │ │ └── test_scaler_choice.py │ └── test_traditional_pipeline.py ├── test_ensemble │ ├── .autoPyTorch │ │ ├── runs │ │ │ ├── 0_1_0.0 │ │ │ │ ├── 0.1.0.0.model │ │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ │ ├── 0_2_0.0 │ │ │ │ ├── 0.2.0.0.model │ │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ │ └── 0_3_100.0 │ │ │ │ ├── 0.3.0.0.model │ │ │ │ ├── 0.3.100.0.model │ │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ └── predictions_ensemble_true.npy │ └── data │ │ └── .autoPyTorch │ │ ├── runs │ │ ├── 0_1_0.0 │ │ │ ├── 0.1.0.0.model │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ ├── 0_2_0.0 │ │ │ ├── 0.2.0.0.model │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ └── 0_3_100.0 │ │ │ ├── 0.3.0.0.model │ │ │ ├── 0.3.100.0.model │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ ├── .auto-sklearn │ │ ├── runs │ │ │ ├── 0_1_0.0 │ │ │ │ ├── 0.1.0.0.model │ │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ │ ├── 0_2_0.0 │ │ │ │ ├── 0.2.0.0.model │ │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ │ └── 0_3_100.0 │ │ │ │ ├── 0.3.0.0.model │ │ │ │ ├── 0.3.100.0.model │ │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ └── predictions_ensemble_true.npy │ │ └── predictions_ensemble_true.npy ├── test_data │ ├── __init__.py │ ├── utils.py │ └── test_forecasting_target_validator.py ├── test_evaluation │ ├── __init__.py │ └── test_utils.py └── test_datasets │ ├── test_base_dataset.py │ └── test_image_dataset.py ├── autoPyTorch ├── api │ └── __init__.py ├── py.typed ├── configs │ ├── __init__.py │ └── default_pipeline_options.json ├── datasets │ └── __init__.py ├── ensemble │ └── __init__.py ├── optimizer │ └── __init__.py ├── pipeline │ ├── __init__.py │ └── components │ │ ├── __init__.py │ │ ├── setup │ │ ├── __init__.py │ │ ├── network │ │ │ └── __init__.py │ │ ├── augmentation │ │ │ ├── __init__.py │ │ │ └── image │ │ │ │ ├── __init__.py │ │ │ │ ├── VerticalFlip.py │ │ │ │ ├── HorizontalFlip.py │ │ │ │ ├── base_image_augmenter.py │ │ │ │ ├── Resize.py │ │ │ │ ├── ZeroPadAndCrop.py │ │ │ │ └── GaussianNoise.py │ │ ├── early_preprocessor │ │ │ ├── __init__.py │ │ │ ├── EarlyPreprocessing.py │ │ │ └── utils.py │ │ ├── forecasting_target_scaling │ │ │ └── __init__.py │ │ ├── network_head │ │ │ ├── forecasting_network_head │ │ │ │ └── __init__.py │ │ │ └── utils.py │ │ ├── traditional_ml │ │ │ ├── estimator_configs │ │ │ │ ├── rotation_forest.json │ │ │ │ ├── knn.json │ │ │ │ ├── extra_trees.json │ │ │ │ ├── random_forest.json │ │ │ │ ├── svm.json │ │ │ │ ├── catboost.json │ │ │ │ └── lgb.json │ │ │ └── traditional_learner │ │ │ │ ├── utils.py │ │ │ │ └── __init__.py │ │ ├── network_backbone │ │ │ └── forecasting_backbone │ │ │ │ ├── other_components │ │ │ │ └── __init__.py │ │ │ │ ├── forecasting_decoder │ │ │ │ ├── __init__.py │ │ │ │ └── components.py │ │ │ │ └── forecasting_encoder │ │ │ │ └── flat_encoder │ │ │ │ └── __init__.py │ │ ├── lr_scheduler │ │ │ ├── constants.py │ │ │ ├── NoScheduler.py │ │ │ ├── base_scheduler.py │ │ │ └── CosineAnnealingLR.py │ │ ├── base_setup.py │ │ ├── network_initializer │ │ │ ├── NoInit.py │ │ │ ├── SparseInit.py │ │ │ ├── XavierInit.py │ │ │ ├── OrthogonalInit.py │ │ │ └── KaimingInit.py │ │ ├── forecasting_training_loss │ │ │ ├── base_forecasting_loss.py │ │ │ └── RegressionLoss.py │ │ ├── optimizer │ │ │ └── base_optimizer.py │ │ └── network_embedding │ │ │ └── NoEmbedding.py │ │ ├── training │ │ ├── __init__.py │ │ ├── metrics │ │ │ └── __init__.py │ │ ├── data_loader │ │ │ ├── __init__.py │ │ │ └── image_data_loader.py │ │ ├── trainer │ │ │ └── forecasting_trainer │ │ │ │ ├── ForecastingMixUpTrainer.py │ │ │ │ └── ForecastingStandardTrainer.py │ │ └── base_training.py │ │ └── preprocessing │ │ ├── __init__.py │ │ ├── image_preprocessing │ │ ├── __init__.py │ │ ├── base_image_preprocessor.py │ │ └── normalise │ │ │ ├── base_normalizer.py │ │ │ ├── NoNormalizer.py │ │ │ └── ImageNormalizer.py │ │ ├── tabular_preprocessing │ │ ├── __init__.py │ │ ├── imputation │ │ │ ├── __init__.py │ │ │ └── base_imputer.py │ │ ├── variance_thresholding │ │ │ ├── __init__.py │ │ │ └── VarianceThreshold.py │ │ ├── coalescer │ │ │ ├── NoCoalescer.py │ │ │ ├── base_coalescer.py │ │ │ └── MinorityCoalescer.py │ │ ├── scaling │ │ │ ├── base_scaler.py │ │ │ ├── MinMaxScaler.py │ │ │ ├── PowerTransformer.py │ │ │ ├── StandardScaler.py │ │ │ ├── NoScaler.py │ │ │ └── Normalizer.py │ │ ├── encoding │ │ │ ├── base_encoder.py │ │ │ ├── OneHotEncoder.py │ │ │ └── NoEncoder.py │ │ ├── utils.py │ │ ├── feature_preprocessing │ │ │ ├── base_feature_preprocessor.py │ │ │ └── NoFeaturePreprocessor.py │ │ └── base_tabular_preprocessing.py │ │ └── time_series_preprocessing │ │ ├── __init__.py │ │ ├── scaling │ │ └── __init__.py │ │ ├── imputation │ │ └── __init__.py │ │ ├── encoding │ │ ├── __init__.py │ │ ├── time_series_base_encoder.py │ │ ├── NoEncoder.py │ │ └── OneHotEncoder.py │ │ ├── base_time_series_preprocessing.py │ │ └── utils.py ├── utils │ ├── __init__.py │ ├── logging.yaml │ └── parallel.py ├── evaluation │ └── __init__.py ├── data │ └── __init__.py ├── __init__.py ├── metrics │ └── __init__.py └── __version__.py ├── .binder ├── apt.txt ├── requirements.txt └── postBuild ├── codecov.yml ├── figs └── apt_workflow.png ├── examples ├── README.txt ├── 20_basics │ ├── README.txt │ └── example_image_classification.py └── 40_advanced │ ├── README.txt │ ├── example_run_with_portfolio.py │ └── example_parallel_n_jobs.py ├── .gitmodules ├── docs ├── extending.rst ├── _templates │ ├── class.rst │ ├── function.rst │ ├── class_without_init.rst │ └── layout.html ├── api.rst ├── index.rst └── installation.rst ├── .flake8 ├── mypy.ini ├── requirements.txt ├── setup.cfg ├── cicd └── README.md ├── .coveragerc ├── CITATION.cff ├── .pre-commit-config.yaml ├── MANIFEST.in ├── .github ├── workflows │ ├── long_regression_test.yml │ ├── pre-commit.yaml │ ├── release.yml │ ├── dist.yml │ ├── docs.yml │ └── docker-publish.yml ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── Dockerfile ├── .codecov.yml └── .gitignore /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/py.typed: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test/test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.binder/apt.txt: -------------------------------------------------------------------------------- 1 | build-essential 2 | swig 3 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/forecasting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/early_preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/forecasting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/forecasting_target_scaling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | token: 667dbd23-97e1-4ef7-9b80-a87c5ec8cb79 3 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = 'feurerm' 3 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/forecasting/forecasting_networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_head/forecasting_network_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/scaling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = 'feurerm' 3 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /figs/apt_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/figs/apt_workflow.png -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/variance_thresholding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/rotation_forest.json: -------------------------------------------------------------------------------- 1 | { 2 | } 3 | -------------------------------------------------------------------------------- /autoPyTorch/__init__.py: -------------------------------------------------------------------------------- 1 | from autoPyTorch.__version__ import __version__ # noqa (imported but unused) 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/other_components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from autoPyTorch.pipeline.components.training.metrics.metrics import * # noqa 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/knn.json: -------------------------------------------------------------------------------- 1 | { 2 | "weights" : "uniform" 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/extra_trees.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_estimators" : 300 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/random_forest.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_estimators" : 300 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/svm.json: -------------------------------------------------------------------------------- 1 | { 2 | "C" : 1.0, 3 | "degree" : 3 4 | } 5 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | ======== 4 | Examples 5 | ======== 6 | 7 | Practical examples for using *Auto-PyTorch*. 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "autoPyTorch/automl_common"] 2 | path = autoPyTorch/automl_common 3 | url = https://github.com/automl/automl_common.git 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/catboost.json: -------------------------------------------------------------------------------- 1 | { 2 | "iterations" : 10000, 3 | "learning_rate" : 0.1 4 | } 5 | -------------------------------------------------------------------------------- /docs/extending.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _extending: 4 | 5 | ====================== 6 | Extending Auto-PyTorch 7 | ====================== 8 | 9 | TODO -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | show-source = True 4 | application-import-names = autoPyTorch 5 | exclude = 6 | venv 7 | build 8 | -------------------------------------------------------------------------------- /autoPyTorch/__version__.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | # The following line *must* be the last in the module, exactly as formatted: 4 | __version__ = "0.2.1" 5 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_head/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | _activations = { 4 | "relu": torch.nn.ReLU, 5 | "tanh": torch.nn.Tanh, 6 | "sigmoid": torch.nn.Sigmoid 7 | } 8 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /examples/20_basics/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples_tabular_basics: 2 | 3 | 4 | ============================== 5 | Basic Tabular Dataset Examples 6 | ============================== 7 | 8 | Basic examples for using *Auto-PyTorch* on tabular datasets 9 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /examples/40_advanced/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples_tabular_basics: 2 | 3 | 4 | ================================= 5 | Advanced Tabular Dataset Examples 6 | ================================= 7 | 8 | Advanced examples for using *Auto-PyTorch* on tabular datasets. 9 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | # Reports any config lines that are not recognized 3 | warn_unused_configs=True 4 | ignore_missing_imports=True 5 | follow_imports=skip 6 | disallow_untyped_defs=True 7 | disallow_incomplete_defs=True 8 | disallow_untyped_decorators=True 9 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/lgb.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_rounds" : 10000, 3 | "num_leaves" : 128, 4 | "two_round" : "True", 5 | "min_data_in_leaf" : 3, 6 | "feature_fraction" : 0.9, 7 | "boosting_type" : "gbdt", 8 | "learning_rate" : 0.03 9 | } 10 | -------------------------------------------------------------------------------- /docs/_templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /autoPyTorch/configs/default_pipeline_options.json: -------------------------------------------------------------------------------- 1 | { 2 | "device": "cpu", 3 | "budget_type": "epochs", 4 | "epochs": 50, 5 | "runtime": 3600, 6 | "torch_num_threads": 1, 7 | "early_stopping": 20, 8 | "use_tensorboard_logger": "False", 9 | "metrics_during_training": "True" 10 | } 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | torch>=1.10.1 3 | torchvision 4 | tensorboard 5 | scikit-learn>=0.24.0,<0.25.0 6 | numpy 7 | scipy>=1.7 8 | lockfile 9 | imgaug>=0.4.0 10 | ConfigSpace>=0.5.0 11 | pynisher>=0.6.3 12 | pyrfr>=0.7,<0.9 13 | smac>=1.2 14 | dask 15 | distributed>=2.2.0 16 | catboost 17 | lightgbm 18 | flaky 19 | tabulate 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [flake8] 5 | application-import-names = autoPyTorch 6 | max-line-length = 120 7 | ignore = W605,E402,W503 8 | show-source = True 9 | 10 | [mypy] 11 | ignore_missing_imports = True 12 | follow_imports=skip 13 | disallow_untyped_decorators = True 14 | disallow_incomplete_defs = True 15 | disallow_untyped_defs = True -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/traditional_learner/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AutoPyTorchToCatboostMetrics(Enum): 5 | mean_absolute_error = "MAE" 6 | root_mean_squared_error = "RMSE" 7 | mean_squared_log_error = "MSLE" 8 | r2 = "R2" 9 | accuracy = "Accuracy" 10 | balanced_accuracy = "BalancedAccuracy" 11 | f1 = "F1" 12 | roc_auc = "AUC" 13 | precision = "Precision" 14 | recall = "Recall" 15 | log_loss = "Logloss" 16 | -------------------------------------------------------------------------------- /cicd/README.md: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Continuous integration and continuous delivery/deployment 3 | ########################################################### 4 | 5 | This part of the code is tasked to make sure that we can perform reliable NAS. 6 | To this end, we rely on pytest to run some long running configurations from both 7 | the greedy portafolio and the default configuration. 8 | 9 | ``` 10 | python -m pytest cicd/test_preselected_configs.py -vs 11 | ``` 12 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/lr_scheduler/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class StepIntervalUnit(Enum): 5 | """ 6 | By which interval we perform the step for learning rate schedulers. 7 | Attributes: 8 | batch (str): We update every batch evaluation 9 | epoch (str): We update every epoch 10 | valid (str): We update every validation 11 | """ 12 | batch = 'batch' 13 | epoch = 'epoch' 14 | valid = 'valid' 15 | 16 | 17 | StepIntervalUnitChoices = [step_interval.name for step_interval in StepIntervalUnit] 18 | -------------------------------------------------------------------------------- /test/test_datasets/test_base_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pytest 4 | 5 | from autoPyTorch.datasets.base_dataset import _get_output_properties 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "target_labels,dim,task_type", ( 10 | (np.arange(5), 5, "multiclass"), 11 | (np.linspace(0, 1, 3), 1, "continuous"), 12 | (np.linspace(0, 1, 3)[:, np.newaxis], 1, "continuous") 13 | ) 14 | ) 15 | def test_get_output_properties(target_labels, dim, task_type): 16 | train_tensors = np.array([np.empty_like(target_labels), target_labels]) 17 | output_dim, output_type = _get_output_properties(train_tensors) 18 | assert output_dim == dim 19 | assert output_type == task_type 20 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | include = "autoPyTorch/*" 5 | 6 | [report] 7 | # Regexes for lines to exclude from consideration 8 | exclude_lines = 9 | # Have to re-enable the standard pragma 10 | pragma: no cover 11 | 12 | # Don't complain about missing debug-only code: 13 | def __repr__ 14 | if self\.debug 15 | 16 | # Don't complain if tests don't hit defensive assertion code: 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | # Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if __name__ == .__main__.: 23 | 24 | ignore_errors = True 25 | 26 | [html] 27 | directory = coverage_html_report 28 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | preferred-citation: 2 | type: article 3 | authors: 4 | - family-names: "Zimmer" 5 | given-names: "Lucas" 6 | affiliation: "University of Freiburg, Germany" 7 | - family-names: "Lindauer" 8 | given-names: "Marius" 9 | affiliation: "University of Freiburg, Germany" 10 | - family-names: "Hutter" 11 | given-names: "Frank" 12 | affiliation: "University of Freiburg, Germany" 13 | doi: "10.1109/TPAMI.2021.3067763" 14 | journal-title: "IEEE Transactions on Pattern Analysis and Machine Intelligence" 15 | title: "Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL" 16 | year: 2021 17 | note: "also available under https://arxiv.org/abs/2006.13799" 18 | start: 3079 19 | end: 3090 20 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/forecasting_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from autoPyTorch.pipeline.components.base_component import ( 4 | ThirdPartyComponents, find_components) 5 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_decoder.\ 6 | base_forecasting_decoder import BaseForecastingDecoder 7 | 8 | directory = os.path.split(__file__)[0] 9 | decoders = find_components(__package__, 10 | directory, 11 | BaseForecastingDecoder) 12 | 13 | decoder_addons = ThirdPartyComponents(BaseForecastingDecoder) 14 | 15 | 16 | def add_decoder(decoder: BaseForecastingDecoder) -> None: 17 | decoder_addons.add_component(decoder) 18 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/base_setup.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent 4 | 5 | 6 | class autoPyTorchSetupComponent(autoPyTorchComponent): 7 | """Provide an abstract interface for schedulers 8 | in Auto-Pytorch""" 9 | 10 | def __init__(self) -> None: 11 | super(autoPyTorchSetupComponent, self).__init__() 12 | 13 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 14 | """ 15 | Adds the fitted component into the fit dictionary 'X' and returns it. 16 | Args: 17 | X (Dict[str, Any]): 'X' dictionary 18 | Returns: 19 | (Dict[str, Any]): the updated 'X' dictionary 20 | """ 21 | raise NotImplementedError() 22 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {# Custom CSS overrides #} 4 | {# set bootswatch_css_custom = ['_static/my-styles.css'] #} 5 | 6 | {# Add github banner (from: https://github.com/blog/273-github-ribbons). #} 7 | {% block header %} 8 | {{ super() }} 9 | 15 | 22 | {% endblock %} 23 | 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-mypy 3 | rev: v0.761 4 | hooks: 5 | - id: mypy 6 | args: [--show-error-codes, 7 | --warn-redundant-casts, 8 | --warn-return-any, 9 | --warn-unreachable, 10 | ] 11 | files: autoPyTorch/.* 12 | exclude: autoPyTorch/ensemble/ 13 | - repo: https://gitlab.com/pycqa/flake8 14 | rev: 3.8.3 15 | hooks: 16 | - id: flake8 17 | additional_dependencies: 18 | - flake8-print==3.1.4 19 | - flake8-import-order 20 | name: flake8 autoPyTorch 21 | files: autoPyTorch/.* 22 | - id: flake8 23 | additional_dependencies: 24 | - flake8-print==3.1.4 25 | - flake8-import-order 26 | name: flake8 test 27 | files: test/.* -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_initializer/NoInit.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import ( 6 | BaseNetworkInitializerComponent 7 | ) 8 | 9 | 10 | class NoInit(BaseNetworkInitializerComponent): 11 | """ 12 | No initialization on the weights/bias 13 | """ 14 | 15 | def weights_init(self) -> Callable: 16 | """Returns the actual PyTorch model, that is dynamically created 17 | from a self.config object. 18 | 19 | self.config is a dictionary created form a given config in the config space. 20 | It contains the necessary information to build a network. 21 | """ 22 | def initialization(m: torch.nn.Module) -> None: 23 | pass 24 | return initialization 25 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/ForecastingMixUpTrainer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Union 2 | 3 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 4 | from autoPyTorch.pipeline.components.training.trainer.MixUpTrainer import MixUpTrainer 5 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer.forecasting_base_trainer import \ 6 | ForecastingBaseTrainerComponent 7 | 8 | 9 | class ForecastingMixUpTrainer(ForecastingBaseTrainerComponent, MixUpTrainer): 10 | @staticmethod 11 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 12 | ) -> Dict[str, Union[str, bool]]: 13 | return { 14 | 'shortname': 'ForecastingMixUpTrainer', 15 | 'name': 'MixUp Regularized Trainer', 16 | } 17 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/base_image_preprocessor.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent 4 | 5 | 6 | class autoPyTorchImagePreprocessingComponent(autoPyTorchPreprocessingComponent): 7 | """ 8 | Provides abstract interface for preprocessing algorithms in AutoPyTorch. 9 | """ 10 | 11 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "autoPyTorchImagePreprocessingComponent": 12 | """ 13 | Initialises early_preprocessor and returns self. 14 | Args: 15 | X (Dict[str, Any]): 'X' dictionary 16 | 17 | Returns: 18 | autoPyTorchImagePreprocessingComponent: self 19 | """ 20 | self.check_requirements(X, y) 21 | 22 | return self 23 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/ForecastingStandardTrainer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Union 2 | 3 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 4 | from autoPyTorch.pipeline.components.training.trainer.StandardTrainer import StandardTrainer 5 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer.forecasting_base_trainer import \ 6 | ForecastingBaseTrainerComponent 7 | 8 | 9 | class ForecastingStandardTrainer(ForecastingBaseTrainerComponent, StandardTrainer): 10 | @staticmethod 11 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 12 | ) -> Dict[str, Union[str, bool]]: 13 | return { 14 | 'shortname': 'ForecastingStandardTrainer', 15 | 'name': 'Forecasting Standard Trainer', 16 | } 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include autoPyTorch/py.typed 3 | include autoPyTorch/utils/logging.yaml 4 | include autoPyTorch/configs/default_pipeline_options.json 5 | include autoPyTorch/configs/greedy_portfolio.json 6 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/catboost.json 7 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/rotation_forest.json 8 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/random_forest.json 9 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/knn.json 10 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/svm.json 11 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/extra_trees.json 12 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/lgb.json 13 | -------------------------------------------------------------------------------- /test/test_data/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | import pandas as pd 6 | 7 | from scipy.sparse import spmatrix 8 | 9 | 10 | def convert(arr, objtype): 11 | if objtype == np.ndarray: 12 | return arr 13 | elif objtype == list: 14 | return arr.tolist() 15 | else: 16 | return objtype(arr) 17 | 18 | 19 | # Function to get the type of an obj 20 | def dtype(obj): 21 | if isinstance(obj, List): 22 | return type(obj[0][0]) if isinstance(obj[0], List) else type(obj[0]) 23 | elif isinstance(obj, pd.DataFrame): 24 | return obj.dtypes 25 | else: 26 | return obj.dtype 27 | 28 | 29 | # Function to get the size of an object 30 | def size(obj): 31 | if isinstance(obj, spmatrix): # spmatrix doesn't support __len__ 32 | return obj.shape[0] if obj.shape[0] > 1 else obj.shape[1] 33 | else: 34 | return len(obj) 35 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/forecasting/test_encoder_choice.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding import TimeSeriesEncoderChoice 4 | 5 | 6 | class TestEncoderChoice(unittest.TestCase): 7 | def test_get_set_config_space(self): 8 | """Make sure that we can setup a valid choice in the encoder 9 | choice""" 10 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]} 11 | encoder_choice = TimeSeriesEncoderChoice(dataset_properties) 12 | cs = encoder_choice.get_hyperparameter_search_space() 13 | 14 | # Make sure that all hyperparameters are part of the search space 15 | self.assertListEqual( 16 | sorted(cs.get_hyperparameter('__choice__').choices), 17 | sorted(list(encoder_choice.get_components().keys())) 18 | ) 19 | 20 | 21 | if __name__ == '__main__': 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /.github/workflows/long_regression_test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | schedule: 5 | # Every Truesday at 7AM UTC 6 | # TODO teporary set to every day just for the PR 7 | #- cron: '0 07 * * 2' 8 | - cron: '0 07 * * *' 9 | 10 | jobs: 11 | 12 | ubuntu: 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | with: 23 | ref: development 24 | submodules: recursive 25 | - name: Setup Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Install test dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install -e .[forecasting,test] 34 | 35 | - name: Run tests 36 | run: | 37 | python -m pytest --durations=200 cicd/test_preselected_configs.py -vs 38 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | # Allow to manually trigger through github API 5 | workflow_dispatch: 6 | 7 | # Triggers with push to these branches 8 | push: 9 | branches: 10 | - master 11 | - development 12 | 13 | # Triggers with push to a pr aimed at these branches 14 | pull_request: 15 | branches: 16 | - master 17 | - development 18 | 19 | jobs: 20 | 21 | run-all-files: 22 | runs-on: ubuntu-latest 23 | 24 | steps: 25 | - name: Checkout 26 | uses: actions/checkout@v2 27 | 28 | - name: Setup Python 3.7 29 | uses: actions/setup-python@v2 30 | with: 31 | python-version: 3.7 32 | 33 | - name: Init Submodules 34 | run: | 35 | git submodule update --init --recursive 36 | 37 | - name: Install pre-commit 38 | run: | 39 | pip install pre-commit 40 | pre-commit install 41 | 42 | - name: Run pre-commit 43 | run: | 44 | pre-commit run --all-files 45 | -------------------------------------------------------------------------------- /test/test_pipeline/components/training/test_forecasting_training.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from autoPyTorch.constants import FORECASTING_BUDGET_TYPE 4 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer import ForecastingTrainerChoice 5 | 6 | 7 | class TestGetBudgetTracker(unittest.TestCase): 8 | def test_get_budget_tracker(self): 9 | trainer = ForecastingTrainerChoice({}) 10 | max_epoch = 50 11 | 12 | X = {'budget_type': 'epochs', 13 | 'epochs': 5, 14 | } 15 | budget_tracker = trainer.get_budget_tracker(X) 16 | self.assertEqual(budget_tracker.max_epochs, 5) 17 | 18 | for budeget_type in FORECASTING_BUDGET_TYPE: 19 | budget_tracker = trainer.get_budget_tracker({'budget_type': budeget_type}) 20 | self.assertEqual(budget_tracker.max_epochs, max_epoch) 21 | 22 | budget_tracker = trainer.get_budget_tracker({'budget_type': 'runtime'}) 23 | self.assertIsNone(budget_tracker.max_epochs) 24 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/VerticalFlip.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import imgaug.augmenters as iaa 4 | from imgaug.augmenters.meta import Augmenter 5 | 6 | import numpy as np 7 | 8 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 9 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter 10 | 11 | 12 | class VerticalFlip(BaseImageAugmenter): 13 | def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None): 14 | super().__init__() 15 | self.random_state = random_state 16 | 17 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: 18 | self.augmenter: Augmenter = iaa.Flipud(p=0.5, name=self.get_properties()['name']) 19 | 20 | return self 21 | 22 | @staticmethod 23 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 24 | ) -> Dict[str, Any]: 25 | return {'name': 'VerticalFlip'} 26 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/HorizontalFlip.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import imgaug.augmenters as iaa 4 | from imgaug.augmenters.meta import Augmenter 5 | 6 | import numpy as np 7 | 8 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 9 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter 10 | 11 | 12 | class HorizontalFlip(BaseImageAugmenter): 13 | def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None): 14 | super().__init__() 15 | self.random_state = random_state 16 | 17 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: 18 | self.augmenter: Augmenter = iaa.Fliplr(p=0.5, name=self.get_properties()['name']) 19 | 20 | return self 21 | 22 | @staticmethod 23 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 24 | ) -> Dict[str, Any]: 25 | return {'name': 'HorizontalFlip'} 26 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/forecasting_training_loss/base_forecasting_loss.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict, Optional 2 | 3 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent 4 | from autoPyTorch.utils.common import FitRequirement 5 | 6 | 7 | class ForecastingLossComponents(autoPyTorchComponent): 8 | _required_properties = ["name", "handles_tabular", "handles_image", "handles_time_series", 9 | 'handles_regression', 'handles_classification'] 10 | loss: Optional[Callable] = None 11 | net_output_type: Optional[str] = None 12 | 13 | def __init__(self, 14 | **kwargs: Any): 15 | super().__init__() 16 | self.add_fit_requirements([ 17 | FitRequirement('task_type', (str,), user_defined=True, dataset_property=True), 18 | ]) 19 | 20 | def fit(self, X: Dict[str, Any], y: Any = None) -> "autoPyTorchComponent": 21 | self.check_requirements(X, y) 22 | return self 23 | 24 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 25 | X.update({"loss": self.loss, 26 | 'net_output_type': self.net_output_type}) 27 | return X 28 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_initializer/SparseInit.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import ( 6 | BaseNetworkInitializerComponent 7 | ) 8 | 9 | 10 | class SparseInit(BaseNetworkInitializerComponent): 11 | """ 12 | Fills the 2D input Tensor as a sparse matrix 13 | """ 14 | def weights_init(self) -> Callable: 15 | """Returns the actual PyTorch model, that is dynamically created 16 | from a self.config object. 17 | 18 | self.config is a dictionary created form a given config in the config space. 19 | It contains the necessary information to build a network. 20 | """ 21 | 22 | def initialization(m: torch.nn.Module) -> None: 23 | if isinstance(m, (torch.nn.Conv1d, 24 | torch.nn.Conv2d, 25 | torch.nn.Conv3d, 26 | torch.nn.Linear)): 27 | torch.nn.init.sparse_(m.weight.data, 0.9) 28 | if m.bias is not None and self.bias_strategy == 'Zero': 29 | torch.nn.init.constant_(m.bias.data, 0.0) 30 | return initialization 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | WORKDIR /auto-pytorch 4 | 5 | # install linux packages 6 | RUN apt-get update 7 | 8 | # Set the locale 9 | # workaround for https://github.com/automl/auto-sklearn/issues/867 10 | RUN apt-get -y install locales 11 | RUN touch /usr/share/locale/locale.alias 12 | RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen 13 | ENV LANG en_US.UTF-8 14 | ENV LANGUAGE en_US:en 15 | ENV LC_ALL en_US.UTF-8 16 | 17 | # set environment variables to only use one core 18 | RUN export OPENBLAS_NUM_THREADS=1 19 | RUN export MKL_NUM_THREADS=1 20 | RUN export BLAS_NUM_THREADS=1 21 | RUN export OMP_NUM_THREADS=1 22 | 23 | # install build requirements 24 | RUN apt install -y python3-dev python3-pip 25 | RUN pip3 install --upgrade setuptools 26 | RUN apt install -y build-essential 27 | 28 | RUN apt install -y swig 29 | 30 | # Copy the checkout auto-pytorch version for installation 31 | ADD . /auto-pytorch/ 32 | 33 | # Upgrade pip then install dependencies 34 | RUN pip3 install --upgrade pip 35 | RUN pip3 install pytest==4.6.* pep8 codecov pytest-cov flake8 flaky openml 36 | RUN cat /auto-pytorch/requirements.txt | xargs -n 1 -L 1 pip3 install 37 | RUN pip3 install jupyter 38 | 39 | # Install 40 | RUN pip3 install /auto-pytorch/ -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import ( 6 | BaseNetworkInitializerComponent 7 | ) 8 | 9 | 10 | class XavierInit(BaseNetworkInitializerComponent): 11 | """ 12 | Fills the input Tensor with a (semi) orthogonal matrix 13 | """ 14 | 15 | def weights_init(self) -> Callable: 16 | """Returns the actual PyTorch model, that is dynamically created 17 | from a self.config object. 18 | 19 | self.config is a dictionary created form a given config in the config space. 20 | It contains the necessary information to build a network. 21 | """ 22 | def initialization(m: torch.nn.Module) -> None: 23 | if isinstance(m, (torch.nn.Conv1d, 24 | torch.nn.Conv2d, 25 | torch.nn.Conv3d, 26 | torch.nn.Linear)): 27 | torch.nn.init.xavier_normal(m.weight.data) 28 | if m.bias is not None and self.bias_strategy == 'Zero': 29 | torch.nn.init.constant_(m.bias.data, 0.0) 30 | return initialization 31 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_initializer/OrthogonalInit.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import ( 6 | BaseNetworkInitializerComponent 7 | ) 8 | 9 | 10 | class OrthogonalInit(BaseNetworkInitializerComponent): 11 | """ 12 | Fills the input Tensor with a (semi) orthogonal matrix 13 | """ 14 | 15 | def weights_init(self) -> Callable: 16 | """Returns the actual PyTorch model, that is dynamically created 17 | from a self.config object. 18 | 19 | self.config is a dictionary created form a given config in the config space. 20 | It contains the necessary information to build a network. 21 | """ 22 | def initialization(m: torch.nn.Module) -> None: 23 | if isinstance(m, (torch.nn.Conv1d, 24 | torch.nn.Conv2d, 25 | torch.nn.Conv3d, 26 | torch.nn.Linear)): 27 | torch.nn.init.orthogonal_(m.weight.data) 28 | if m.bias is not None and self.bias_strategy == 'Zero': 29 | torch.nn.init.constant_(m.bias.data, 0.0) 30 | return initialization 31 | -------------------------------------------------------------------------------- /test/test_pipeline/components/training/test_image_data_loader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import unittest.mock 3 | 4 | import torchvision 5 | 6 | from autoPyTorch.pipeline.components.training.data_loader.image_data_loader import ( 7 | ImageDataLoader 8 | ) 9 | 10 | 11 | def test_imageloader_build_transform(): 12 | """ 13 | Makes sure a proper composition is created 14 | """ 15 | loader = ImageDataLoader() 16 | 17 | fit_dictionary = dict() 18 | fit_dictionary['dataset_properties'] = dict() 19 | fit_dictionary['dataset_properties']['is_small_preprocess'] = unittest.mock.Mock(()) 20 | fit_dictionary['image_augmenter'] = unittest.mock.Mock() 21 | fit_dictionary['preprocess_transforms'] = unittest.mock.Mock() 22 | 23 | compose = loader.build_transform(fit_dictionary, mode='train') 24 | 25 | assert isinstance(compose, torchvision.transforms.Compose) 26 | 27 | # We expect to tensor and image augmenter 28 | assert len(compose.transforms) == 2 29 | 30 | compose = loader.build_transform(fit_dictionary, mode='test') 31 | assert isinstance(compose, torchvision.transforms.Compose) 32 | assert len(compose.transforms) == 2 33 | 34 | # Check the expected error msgs 35 | loader._check_transform_requirements(fit_dictionary) 36 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/NoCoalescer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer.base_coalescer import BaseCoalescer 6 | 7 | 8 | class NoCoalescer(BaseCoalescer): 9 | def __init__(self, random_state: np.random.RandomState): 10 | super().__init__() 11 | self.random_state = random_state 12 | self._processing = False 13 | 14 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> BaseCoalescer: 15 | """ 16 | As no coalescing happens, only check the requirements. 17 | 18 | Args: 19 | X (Dict[str, Any]): 20 | fit dictionary 21 | y (Optional[Any]): 22 | Parameter to comply with scikit-learn API. Not used. 23 | 24 | Returns: 25 | instance of self 26 | """ 27 | self.check_requirements(X, y) 28 | 29 | return self 30 | 31 | @staticmethod 32 | def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]: 33 | return { 34 | 'shortname': 'NoCoalescer', 35 | 'name': 'NoCoalescer', 36 | 'handles_sparse': True 37 | } 38 | -------------------------------------------------------------------------------- /test/test_utils/test_single_thread_client.py: -------------------------------------------------------------------------------- 1 | import dask.distributed 2 | 3 | from distributed.utils_test import inc 4 | 5 | import pytest 6 | 7 | from autoPyTorch.utils.single_thread_client import SingleThreadedClient 8 | 9 | 10 | def test_single_thread_client_like_dask_client(): 11 | single_thread_client = SingleThreadedClient() 12 | assert isinstance(single_thread_client, dask.distributed.Client) 13 | future = single_thread_client.submit(inc, 1) 14 | assert isinstance(future, dask.distributed.Future) 15 | assert future.done() 16 | assert future.result() == 2 17 | assert sum(single_thread_client.nthreads().values()) == 1 18 | single_thread_client.close() 19 | single_thread_client.shutdown() 20 | 21 | # Client/Futures are printed, so make sure str works 22 | # str calls __rpr__ which is the purpose of below check 23 | assert str(future) != "" 24 | assert str(single_thread_client) != "" 25 | 26 | # Single thread client is an inherited version of dask client 27 | # so that futures run in the same thread as the main job. 28 | # We carefully selected what methods are inherited, and any other 29 | # method should raise a not implemented error to be safe of major 30 | # dask client api changes. 31 | with pytest.raises(NotImplementedError): 32 | single_thread_client.get_scheduler_logs() 33 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | #see https://github.com/codecov/support/wiki/Codecov-Yaml 2 | codecov: 3 | notify: 4 | require_ci_to_pass: yes 5 | 6 | coverage: 7 | precision: 2 # 2 = xx.xx%, 0 = xx% 8 | round: nearest # how coverage is rounded: down/up/nearest 9 | range: 10...90 # custom range of coverage colors from red -> yellow -> green 10 | status: 11 | # https://codecov.readme.io/v1.0/docs/commit-status 12 | project: 13 | default: 14 | against: auto 15 | target: 70% # specify the target coverage for each commit status 16 | threshold: 50% # allow this little decrease on project 17 | # https://github.com/codecov/support/wiki/Filtering-Branches 18 | # branches: master 19 | if_ci_failed: error 20 | # https://github.com/codecov/support/wiki/Patch-Status 21 | patch: 22 | default: 23 | against: auto 24 | target: 30% # specify the target "X%" coverage to hit 25 | threshold: 50% # allow this much decrease on patch 26 | changes: false 27 | 28 | parsers: 29 | gcov: 30 | branch_detection: 31 | conditional: true 32 | loop: true 33 | macro: false 34 | method: false 35 | javascript: 36 | enable_partials: false 37 | 38 | comment: 39 | layout: header, diff 40 | require_changes: false 41 | behavior: default # update if exists else create new 42 | branches: * 43 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( 4 | autoPyTorchTabularPreprocessingComponent 5 | ) 6 | from autoPyTorch.utils.common import FitRequirement 7 | 8 | 9 | class BaseScaler(autoPyTorchTabularPreprocessingComponent): 10 | """ 11 | Provides abstract class interface for Scalers in AutoPytorch 12 | """ 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.add_fit_requirements([ 17 | FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True)]) 18 | 19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 20 | """ 21 | Adds the fitted scalar into the 'X' dictionary and returns it. 22 | Args: 23 | X (Dict[str, Any]): 'X' dictionary 24 | 25 | Returns: 26 | (Dict[str, Any]): the updated 'X' dictionary 27 | """ 28 | if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None: 29 | raise ValueError("cant call transform on {} without fitting first." 30 | .format(self.__class__.__name__)) 31 | X.update({'scaler': self.preprocessor}) 32 | return X 33 | -------------------------------------------------------------------------------- /autoPyTorch/utils/logging.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | disable_existing_loggers: false 4 | formatters: 5 | simple: 6 | format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s' 7 | 8 | handlers: 9 | console: 10 | class: logging.StreamHandler 11 | level: WARNING 12 | formatter: simple 13 | stream: ext://sys.stdout 14 | 15 | file_handler: 16 | class: logging.FileHandler 17 | level: DEBUG 18 | formatter: simple 19 | filename: autoPyTorch.log 20 | 21 | distributed_logfile: 22 | class: logging.FileHandler 23 | level: DEBUG 24 | formatter: simple 25 | filename: distributed.log 26 | 27 | root: 28 | level: DEBUG 29 | handlers: [console, file_handler] 30 | 31 | loggers: 32 | 33 | autoPyTorch.automl_common.common.utils.backend: 34 | level: DEBUG 35 | handlers: [file_handler] 36 | propagate: false 37 | 38 | smac.intensification.intensification.Intensifier: 39 | level: INFO 40 | handlers: [file_handler, console] 41 | 42 | smac.optimizer.local_search.LocalSearch: 43 | level: INFO 44 | handlers: [file_handler, console] 45 | 46 | smac.optimizer.smbo.SMBO: 47 | level: INFO 48 | handlers: [file_handler, console] 49 | 50 | EnsembleBuilder: 51 | level: DEBUG 52 | handlers: [file_handler, console] 53 | 54 | distributed: 55 | level: DEBUG 56 | handlers: [distributed_logfile] 57 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( 4 | autoPyTorchTabularPreprocessingComponent 5 | ) 6 | from autoPyTorch.utils.common import FitRequirement 7 | 8 | 9 | class BaseImputer(autoPyTorchTabularPreprocessingComponent): 10 | """ 11 | Provides abstract class interface for Imputers in AutoPyTorch 12 | """ 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.add_fit_requirements([ 17 | FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True)]) 18 | 19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 20 | """ 21 | Adds self into the 'X' dictionary and returns it. 22 | Args: 23 | X (Dict[str, Any]): 'X' dictionary 24 | 25 | Returns: 26 | (Dict[str, Any]): the updated 'X' dictionary 27 | """ 28 | if self.preprocessor['numerical'] is None and len(X["dataset_properties"]["numerical_columns"]) != 0: 29 | raise ValueError("cant call transform on {} without fitting first." 30 | .format(self.__class__.__name__)) 31 | X.update({'imputer': self.preprocessor}) 32 | return X 33 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_initializer/KaimingInit.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import ( 6 | BaseNetworkInitializerComponent 7 | ) 8 | 9 | 10 | class KaimingInit(BaseNetworkInitializerComponent): 11 | """ 12 | Fills the input Tensor with values according to the method described in 13 | Delving deep into rectifiers: Surpassing human-level performance on 14 | ImageNet classification 15 | """ 16 | 17 | def weights_init(self) -> Callable: 18 | """Returns the actual PyTorch model, that is dynamically created 19 | from a self.config object. 20 | 21 | self.config is a dictionary created form a given config in the config space. 22 | It contains the necessary information to build a network. 23 | """ 24 | def initialization(m: torch.nn.Module) -> None: 25 | if isinstance(m, (torch.nn.Conv1d, 26 | torch.nn.Conv2d, 27 | torch.nn.Conv3d, 28 | torch.nn.Linear)): 29 | torch.nn.init.kaiming_normal_(m.weight.data) 30 | if m.bias is not None and self.bias_strategy == 'Zero': 31 | torch.nn.init.constant_(m.bias.data, 0.0) 32 | return initialization 33 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( 4 | autoPyTorchTabularPreprocessingComponent 5 | ) 6 | from autoPyTorch.utils.common import FitRequirement 7 | 8 | 9 | class BaseEncoder(autoPyTorchTabularPreprocessingComponent): 10 | """ 11 | Base class for encoder 12 | """ 13 | def __init__(self) -> None: 14 | super().__init__() 15 | self.add_fit_requirements([ 16 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True), 17 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True)]) 18 | 19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 20 | """ 21 | Adds the self into the 'X' dictionary and returns it. 22 | Args: 23 | X (Dict[str, Any]): 'X' dictionary 24 | 25 | Returns: 26 | (Dict[str, Any]): the updated 'X' dictionary 27 | """ 28 | if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None: 29 | raise ValueError("cant call transform on {} without fitting first." 30 | .format(self.__class__.__name__)) 31 | X.update({'encoder': self.preprocessor}) 32 | return X 33 | -------------------------------------------------------------------------------- /test/test_datasets/test_image_dataset.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import torch 6 | 7 | import torchvision 8 | 9 | from autoPyTorch.datasets.image_dataset import ImageDataset 10 | 11 | 12 | @unittest.skip(reason="Image Dataset issue") 13 | class DatasetTest(unittest.TestCase): 14 | def runTest(self): 15 | dataset = torchvision.datasets.FashionMNIST(root='../../datasets/', 16 | transform=torchvision.transforms.ToTensor(), 17 | download=True) 18 | ds = ImageDataset(dataset) 19 | self.assertIsInstance(ds.mean, torch.Tensor) 20 | self.assertIsInstance(ds.std, torch.Tensor) 21 | for img, _ in ds.train_tensors: 22 | self.assertIsInstance(img, torch.Tensor) 23 | 24 | 25 | @unittest.skip(reason="Image Dataset issue") 26 | class NumpyArrayTest(unittest.TestCase): 27 | def runTest(self): 28 | matrix = np.random.randint(0, 255, (15, 3, 10, 10)).astype(np.float) 29 | target_df = np.random.randint(0, 5, (15, )).astype(np.float) 30 | ds = ImageDataset((matrix, target_df)) 31 | self.assertIsInstance(ds.mean, torch.Tensor) 32 | self.assertIsInstance(ds.std, torch.Tensor) 33 | for img, _ in ds.train_tensors: 34 | self.assertIsInstance(img, torch.Tensor) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Push to PyPi 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | branches: [master] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | publish: 12 | runs-on: "ubuntu-latest" 13 | 14 | steps: 15 | - name: Checkout source 16 | uses: actions/checkout@master 17 | with: 18 | submodules: recursive 19 | 20 | - name: Set up Python 3.9 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: 3.9 24 | 25 | - name: Install pypa/build 26 | run: >- 27 | python -m 28 | pip install 29 | build 30 | --user 31 | - name: Build a binary wheel and a source tarball 32 | run: >- 33 | python -m 34 | build 35 | --sdist 36 | --wheel 37 | --outdir dist/ 38 | . 39 | 40 | - name: Publish distribution 📦 to Test PyPI 41 | uses: pypa/gh-action-pypi-publish@master 42 | with: 43 | skip_existing: true 44 | user: __token__ 45 | password: ${{ secrets.TEST_PYPI_TOKEN }} 46 | repository_url: https://test.pypi.org/legacy/ 47 | 48 | - name: Publish distribution 📦 to PyPI 49 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 50 | uses: pypa/gh-action-pypi-publish@master 51 | with: 52 | user: __token__ 53 | password: ${{ secrets.pypi_token }} 54 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/base_coalescer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( 4 | autoPyTorchTabularPreprocessingComponent 5 | ) 6 | from autoPyTorch.utils.common import FitRequirement 7 | 8 | 9 | class BaseCoalescer(autoPyTorchTabularPreprocessingComponent): 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self._processing = True 13 | self.add_fit_requirements([ 14 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True), 15 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True) 16 | ]) 17 | 18 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 19 | """ 20 | Add the preprocessor to the provided fit dictionary `X`. 21 | 22 | Args: 23 | X (Dict[str, Any]): fit dictionary in sklearn 24 | 25 | Returns: 26 | X (Dict[str, Any]): the updated fit dictionary 27 | """ 28 | if self._processing and self.preprocessor['categorical'] is None: 29 | # If we apply minority coalescer, we must have categorical preprocessor! 30 | raise RuntimeError(f"fit() must be called before transform() on {self.__class__.__name__}") 31 | 32 | X.update({'coalescer': self.preprocessor}) 33 | return X 34 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from sklearn.base import BaseEstimator 4 | 5 | 6 | def get_tabular_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]: 7 | """ 8 | Expects fit_dictionary(X) to have numerical/categorical preprocessors 9 | (fited numerical/categorical preprocessing nodes) that will build the 10 | column transformer in the TabularColumnTransformer. This function 11 | parses X and extracts such components. 12 | Creates a dictionary with two keys, 13 | numerical- containing list of numerical preprocessors 14 | categorical- containing list of categorical preprocessors 15 | Args: 16 | X: fit dictionary 17 | Returns: 18 | (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors 19 | """ 20 | preprocessor: Dict[str, List[BaseEstimator]] = dict(numerical=list(), categorical=list()) 21 | for key, value in X.items(): 22 | if isinstance(value, dict): 23 | # as each preprocessor is child of BaseEstimator 24 | if 'numerical' in value and isinstance(value['numerical'], BaseEstimator): 25 | preprocessor['numerical'].append(value['numerical']) 26 | if 'categorical' in value and isinstance(value['categorical'], BaseEstimator): 27 | preprocessor['categorical'].append(value['categorical']) 28 | 29 | return preprocessor 30 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/MinMaxScaler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Tuple, Union 2 | 3 | import numpy as np 4 | 5 | from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler 9 | 10 | 11 | class MinMaxScaler(BaseScaler): 12 | """ 13 | Scale numerical columns/features into feature_range 14 | """ 15 | def __init__(self, 16 | random_state: Optional[Union[np.random.RandomState, int]] = None, 17 | feature_range: Tuple[Union[int, float], Union[int, float]] = (0, 1)): 18 | super().__init__() 19 | self.random_state = random_state 20 | self.feature_range = feature_range 21 | 22 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: 23 | 24 | self.check_requirements(X, y) 25 | 26 | self.preprocessor['numerical'] = SklearnMinMaxScaler(feature_range=self.feature_range, copy=False) 27 | return self 28 | 29 | @staticmethod 30 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 31 | ) -> Dict[str, Union[str, bool]]: 32 | return { 33 | 'shortname': 'MinMaxScaler', 34 | 'name': 'MinMaxScaler', 35 | 'handles_sparse': True 36 | } 37 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/base_training.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent 6 | 7 | 8 | class autoPyTorchTrainingComponent(autoPyTorchComponent): 9 | """Provide an abstract interface for training nodes 10 | in Auto-Pytorch""" 11 | 12 | def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None: 13 | super(autoPyTorchTrainingComponent, self).__init__(random_state=random_state) 14 | 15 | def transform(self, X: Dict) -> Dict: 16 | """The transform function calls the transform function of the 17 | underlying model and returns the transformed array. 18 | 19 | Args: 20 | X (Dict): input features 21 | 22 | Returns: 23 | Dict: Transformed features 24 | """ 25 | raise NotImplementedError() 26 | 27 | def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None: 28 | """ 29 | A mechanism in code to ensure the correctness of the fit dictionary 30 | It recursively makes sure that the children and parent level requirements 31 | are honored before fit. 32 | 33 | Args: 34 | X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing 35 | mechanism, in which during a transform, a components adds relevant information 36 | so that further stages can be properly fitted 37 | """ 38 | pass 39 | -------------------------------------------------------------------------------- /autoPyTorch/utils/parallel.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import sys 3 | 4 | 5 | def preload_modules(context: multiprocessing.context.BaseContext) -> None: 6 | """ 7 | This function is meant to be used with the forkserver multiprocessing context. 8 | More details about it can be found here: 9 | https://docs.python.org/3/library/multiprocessing.html 10 | 11 | Forkserver is known to be slower than other contexts. We use it, because it helps 12 | reduce the probability of a deadlock. To make it fast, we pre-load modules so that 13 | forked children have the desired modules available. 14 | 15 | We do not inherit dead-lock problematic modules like logging. 16 | 17 | Args: 18 | context (multiprocessing.context.BaseContext): One of the three supported multiprocessing 19 | contexts being fork, forkserver or spawn. 20 | """ 21 | all_loaded_modules = sys.modules.keys() 22 | preload = [ 23 | loaded_module for loaded_module in all_loaded_modules 24 | if loaded_module.split('.')[0] in ( 25 | 'smac', 26 | 'autoPyTorch', 27 | 'numpy', 28 | 'scipy', 29 | 'pandas', 30 | 'pynisher', 31 | 'sklearn', 32 | 'ConfigSpace', 33 | 'torch', 34 | 'torchvision', 35 | 'tensorboard', 36 | 'imgaug', 37 | 'catboost', 38 | 'lightgbm', 39 | ) and 'logging' not in loaded_module 40 | ] 41 | context.set_forkserver_preload(preload) 42 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/PowerTransformer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from sklearn.preprocessing import PowerTransformer as SklearnPowerTransformer 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler 9 | 10 | 11 | class PowerTransformer(BaseScaler): 12 | """ 13 | Map data to as close to a Gaussian distribution as possible 14 | in order to reduce variance and minimize skewness. 15 | 16 | Uses `yeo-johnson` power transform method. Also, data is normalised 17 | to zero mean and unit variance. 18 | """ 19 | def __init__(self, 20 | random_state: Optional[np.random.RandomState] = None): 21 | super().__init__() 22 | self.random_state = random_state 23 | 24 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: 25 | 26 | self.check_requirements(X, y) 27 | 28 | self.preprocessor['numerical'] = SklearnPowerTransformer(method='yeo-johnson', copy=False) 29 | return self 30 | 31 | @staticmethod 32 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 33 | ) -> Dict[str, Union[str, bool]]: 34 | return { 35 | 'shortname': 'PowerTransformer', 36 | 'name': 'PowerTransformer', 37 | 'handles_sparse': False 38 | } 39 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OneHotEncoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from sklearn.preprocessing import OneHotEncoder as OHE 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder 9 | 10 | 11 | class OneHotEncoder(BaseEncoder): 12 | """ 13 | Encode categorical features as a one-hot numerical array 14 | """ 15 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None): 16 | super().__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder: 20 | 21 | self.check_requirements(X, y) 22 | 23 | self.preprocessor['categorical'] = OHE( 24 | # It is safer to have the OHE produce a 0 array than to crash a good configuration 25 | categories=X['dataset_properties']['categories'] 26 | if len(X['dataset_properties']['categories']) > 0 else 'auto', 27 | sparse=False, 28 | handle_unknown='ignore') 29 | return self 30 | 31 | @staticmethod 32 | def get_properties( 33 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 34 | ) -> Dict[str, Union[str, bool]]: 35 | return { 36 | 'shortname': 'OneHotEncoder', 37 | 'name': 'One Hot Encoder', 38 | 'handles_sparse': False 39 | } 40 | -------------------------------------------------------------------------------- /.github/workflows/dist.yml: -------------------------------------------------------------------------------- 1 | name: dist-check 2 | 3 | on: 4 | # Manually triggerable in github 5 | workflow_dispatch: 6 | 7 | # When a push occurs on either of these branches 8 | push: 9 | branches: 10 | - master 11 | - development 12 | 13 | # When a push occurs on a PR that targets these branches 14 | pull_request: 15 | branches: 16 | - master 17 | - development 18 | 19 | schedule: 20 | # Every day at 7AM UTC 21 | - cron: '0 07 * * *' 22 | 23 | jobs: 24 | 25 | dist: 26 | runs-on: ubuntu-latest 27 | 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v2 31 | with: 32 | submodules: recursive 33 | - name: Setup Python 34 | uses: actions/setup-python@v2 35 | with: 36 | python-version: 3.8 37 | 38 | - name: Build dist 39 | run: | 40 | python setup.py sdist 41 | 42 | - name: Twine check 43 | run: | 44 | pip install twine 45 | last_dist=$(ls -t dist/autoPyTorch-*.tar.gz | head -n 1) 46 | twine check "$last_dist" --strict 47 | 48 | - name: Install dist 49 | run: | 50 | last_dist=$(ls -t dist/autoPyTorch-*.tar.gz | head -n 1) 51 | pip install $last_dist 52 | 53 | - name: PEP 561 Compliance 54 | run: | 55 | pip install mypy 56 | 57 | cd .. # required to use the installed version of autoPyTorch 58 | 59 | # Note this doesn't perform mypy checks, those are handled in pre-commit.yaml 60 | # This only checks if autoPyTorch exports type information 61 | if ! mypy -c "import autoPyTorch"; then exit 1; fi 62 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | from typing import Dict 4 | 5 | from autoPyTorch.pipeline.components.base_component import ( 6 | ThirdPartyComponents, autoPyTorchComponent, find_components) 7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import \ 8 | EncoderChoice 9 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \ 10 | TimeSeriesBaseEncoder 11 | 12 | encoding_directory = os.path.split(__file__)[0] 13 | _encoders = find_components(__package__, 14 | encoding_directory, 15 | TimeSeriesBaseEncoder) 16 | _addons = ThirdPartyComponents(TimeSeriesBaseEncoder) 17 | 18 | 19 | def add_encoder(encoder: TimeSeriesBaseEncoder) -> None: 20 | _addons.add_component(encoder) 21 | 22 | 23 | class TimeSeriesEncoderChoice(EncoderChoice): 24 | """ 25 | Allows for dynamically choosing encoding component at runtime 26 | """ 27 | 28 | def get_components(self) -> Dict[str, autoPyTorchComponent]: 29 | """Returns the available encoder components 30 | 31 | Args: 32 | None 33 | 34 | Returns: 35 | Dict[str, autoPyTorchComponent]: all BaseEncoder components available 36 | as choices for encoding the categorical columns 37 | """ 38 | components = OrderedDict() 39 | components.update(_encoders) 40 | components.update(_addons.components) 41 | return components 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | NOTE: ISSUES ARE NOT FOR CODE HELP - Ask for Help at https://stackoverflow.com 2 | 3 | Your issue may already be reported! 4 | Also, please search on the [issue tracker](../) before creating one. 5 | 6 | * **I'm submitting a ...** 7 | - [ ] bug report 8 | - [ ] feature request 9 | - [ ] support request => Please do not submit support request here, see note at the top of this template. 10 | 11 | # Issue Description 12 | * When Issue Happens 13 | * Steps To Reproduce 14 | 1. 15 | 1. 16 | 1. 17 | 18 | ## Expected Behavior 19 | 20 | 21 | 22 | ## Current Behavior 23 | 24 | 25 | 26 | ## Possible Solution 27 | 28 | 29 | 30 | ## Your Code 31 | 32 | ``` 33 | If relevant, paste all of your challenge code here 34 | ``` 35 | 36 | ## Error message 37 | 38 | ``` 39 | If relevant, paste all of your error messages here 40 | ``` 41 | 42 | ## Your Local environment 43 | * Operating System, version 44 | * Python, version 45 | * Outputs of `pip freeze` or `conda list` 46 | 47 | Make sure to add **all the information needed to understand the bug** so that someone can help. 48 | If the info is missing, we'll add the 'Needs more information' label and close the issue until there is enough information. -------------------------------------------------------------------------------- /test/test_evaluation/test_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the functionality in autoPyTorch.evaluation.utils 3 | """ 4 | import pytest 5 | 6 | from autoPyTorch.evaluation.utils import DisableFileOutputParameters 7 | 8 | 9 | @pytest.mark.parametrize('disable_file_output', 10 | [['pipeline', 'pipelines'], 11 | [DisableFileOutputParameters.pipelines, DisableFileOutputParameters.pipeline]]) 12 | def test_disable_file_output_no_error(disable_file_output): 13 | """ 14 | Checks that `DisableFileOutputParameters.check_compatibility` 15 | does not raise an error for the parameterized values of `disable_file_output`. 16 | 17 | Args: 18 | disable_file_output ([List[Union[str, DisableFileOutputParameters]]]): 19 | Options that should be compatible with the `DisableFileOutputParameters` 20 | defined in `autoPyTorch`. 21 | """ 22 | DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output) 23 | 24 | 25 | def test_disable_file_output_error(): 26 | """ 27 | Checks that `DisableFileOutputParameters.check_compatibility` raises an error 28 | for a value not present in `DisableFileOutputParameters` and ensures that the 29 | expected error is raised. 30 | """ 31 | disable_file_output = ['model'] 32 | with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of" 33 | r" DisableFileOutputParameters or as string value" 34 | r" of a member."): 35 | DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output) 36 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | 5 | from sklearn.utils import check_random_state 6 | 7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import ( 8 | autoPyTorchTabularPreprocessingComponent 9 | ) 10 | 11 | 12 | class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent): 13 | _required_properties: List[str] = [ 14 | 'handles_sparse', 'handles_classification', 'handles_regression'] 15 | 16 | def __init__(self, random_state: Optional[np.random.RandomState] = None): 17 | if random_state is None: 18 | # A trainer components need a random state for 19 | # sampling -- for example in MixUp training 20 | self.random_state = check_random_state(1) 21 | else: 22 | self.random_state = random_state 23 | super().__init__() 24 | 25 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 26 | """ 27 | Adds the fitted feature preprocessor into the 'X' dictionary and returns it. 28 | Args: 29 | X (Dict[str, Any]): 'X' dictionary 30 | 31 | Returns: 32 | (Dict[str, Any]): the updated 'X' dictionary 33 | """ 34 | if self.preprocessor['numerical'] is None: 35 | raise AttributeError("{} can't tranform without fitting first" 36 | .format(self.__class__.__name__)) 37 | X.update({'feature_preprocessor': self.preprocessor}) 38 | return X 39 | -------------------------------------------------------------------------------- /test/test_pipeline/components/training/test_feature_data_loader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import unittest.mock 3 | 4 | import torchvision 5 | 6 | from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import ( 7 | FeatureDataLoader 8 | ) 9 | 10 | 11 | class TestFeatureDataLoader(unittest.TestCase): 12 | def test_build_transform_small_preprocess_true(self): 13 | """ 14 | Makes sure a proper composition is created 15 | """ 16 | loader = FeatureDataLoader() 17 | 18 | fit_dictionary = {'dataset_properties': {'is_small_preprocess': True}} 19 | for thing in ['imputer', 'scaler', 'encoder']: 20 | fit_dictionary[thing] = [unittest.mock.Mock()] 21 | 22 | compose = loader.build_transform(fit_dictionary, mode='train') 23 | 24 | self.assertIsInstance(compose, torchvision.transforms.Compose) 25 | 26 | # No preprocessing needed here as it was done before 27 | self.assertEqual(len(compose.transforms), 1) 28 | 29 | def test_build_transform_small_preprocess_false(self): 30 | """ 31 | Makes sure a proper composition is created 32 | """ 33 | loader = FeatureDataLoader() 34 | 35 | fit_dictionary = {'dataset_properties': {'is_small_preprocess': False}, 36 | 'preprocess_transforms': [unittest.mock.Mock()]} 37 | 38 | compose = loader.build_transform(fit_dictionary, mode='train') 39 | 40 | self.assertIsInstance(compose, torchvision.transforms.Compose) 41 | 42 | # We expect the to tensor, the preproces transforms and the check_array 43 | self.assertEqual(len(compose.transforms), 4) 44 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/time_series_base_encoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Union 2 | 3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import \ 4 | BaseEncoder 5 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.base_time_series_preprocessing import \ 6 | autoPyTorchTimeSeriesPreprocessingComponent 7 | from autoPyTorch.utils.common import FitRequirement 8 | 9 | 10 | class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent): 11 | """ 12 | Base class for encoder 13 | """ 14 | def __init__(self) -> None: 15 | super(TimeSeriesBaseEncoder, self).__init__() 16 | self.add_fit_requirements([ 17 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True), 18 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True), 19 | FitRequirement('feature_names', (tuple,), user_defined=True, dataset_property=True), 20 | FitRequirement('feature_shapes', (Dict, ), user_defined=True, dataset_property=True), 21 | ]) 22 | self.feature_shapes: Union[Dict[str, int]] = {} 23 | 24 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 25 | """ 26 | Adds the self into the 'X' dictionary and returns it. 27 | 28 | Args: 29 | X (Dict[str, Any]): 'X' dictionary 30 | 31 | Returns: 32 | (Dict[str, Any]): the updated 'X' dictionary 33 | """ 34 | X['dataset_properties'].update({'feature_shapes': self.feature_shapes}) 35 | return BaseEncoder.transform(self, X) 36 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/StandardScaler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from sklearn.preprocessing import StandardScaler as SklearnStandardScaler 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler 9 | from autoPyTorch.utils.common import FitRequirement 10 | 11 | 12 | class StandardScaler(BaseScaler): 13 | """ 14 | Standardise numerical columns/features by removing mean and scaling to unit/variance 15 | """ 16 | def __init__(self, 17 | random_state: Optional[Union[np.random.RandomState, int]] = None 18 | ): 19 | super().__init__() 20 | self.random_state = random_state 21 | self.add_fit_requirements([ 22 | FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True) 23 | ]) 24 | 25 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: 26 | 27 | self.check_requirements(X, y) 28 | 29 | with_mean, with_std = (False, False) if X['dataset_properties']['issparse'] else (True, True) 30 | self.preprocessor['numerical'] = SklearnStandardScaler(with_mean=with_mean, with_std=with_std, copy=False) 31 | return self 32 | 33 | @staticmethod 34 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 35 | ) -> Dict[str, Union[str, bool]]: 36 | return { 37 | 'shortname': 'StandardScaler', 38 | 'name': 'Standard Scaler', 39 | 'handles_sparse': True 40 | } 41 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/base_tabular_preprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Union 2 | 3 | from sklearn.base import BaseEstimator 4 | 5 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent 6 | 7 | 8 | class autoPyTorchTabularPreprocessingComponent(autoPyTorchPreprocessingComponent): 9 | """ 10 | Provides abstract interface for preprocessing algorithms in AutoPyTorch. 11 | """ 12 | _required_properties: List[str] = ['handles_sparse'] 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict( 17 | numerical=None, categorical=None) 18 | 19 | def get_preprocessor_dict(self) -> Dict[str, BaseEstimator]: 20 | """ 21 | Returns early_preprocessor dictionary containing the sklearn numerical 22 | and categorical early_preprocessor with "numerical" and "categorical" 23 | keys. May contain None for a key if early_preprocessor does not 24 | handle the datatype defined by key 25 | 26 | Returns: 27 | Dict[str, BaseEstimator]: early_preprocessor dictionary 28 | """ 29 | if (self.preprocessor['numerical'] and self.preprocessor['categorical']) is None: 30 | raise AttributeError("{} can't return early_preprocessor dict without fitting first" 31 | .format(self.__class__.__name__)) 32 | return self.preprocessor 33 | 34 | def __str__(self) -> str: 35 | """ Allow a nice understanding of what components where used """ 36 | string = self.__class__.__name__ 37 | return string 38 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/NoEncoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.NoEncoder import \ 7 | NoEncoder 8 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \ 9 | TimeSeriesBaseEncoder 10 | 11 | 12 | class TimeSeriesNoEncoder(TimeSeriesBaseEncoder): 13 | def __init__(self, 14 | random_state: Optional[Union[np.random.RandomState, int]] = None 15 | ): 16 | super().__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Any = None) -> "TimeSeriesBaseEncoder": 20 | NoEncoder.fit(self, X, y) 21 | self.feature_shapes = X['dataset_properties']['feature_shapes'] 22 | return self 23 | 24 | @staticmethod 25 | def get_properties( 26 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 27 | ) -> Dict[str, Union[str, bool]]: 28 | return { 29 | 'shortname': 'TimeSeriesNoEncoder', 30 | 'name': 'Time Series No Encoder', 31 | 'handles_sparse': True 32 | } 33 | 34 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 35 | """ 36 | Adds the self into the 'X' dictionary and returns it. 37 | 38 | Args: 39 | X (Dict[str, Any]): 'X' dictionary 40 | 41 | Returns: 42 | (Dict[str, Any]): the updated 'X' dictionary 43 | """ 44 | return NoEncoder.transform(self, X) 45 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/optimizer/base_optimizer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent 7 | from autoPyTorch.utils.common import FitRequirement 8 | 9 | 10 | class BaseOptimizerComponent(autoPyTorchSetupComponent): 11 | """Provide an abstract interface for Pytorch Optimizers 12 | in Auto-Pytorch""" 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.optimizer: Optional[Optimizer] = None 17 | self.add_fit_requirements([ 18 | FitRequirement('network', (torch.nn.Module,), user_defined=False, dataset_property=False)]) 19 | 20 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 21 | """The transform function calls the transform function of the 22 | underlying model and returns the transformed array. 23 | 24 | Args: 25 | X (np.ndarray): input features 26 | 27 | Returns: 28 | np.ndarray: Transformed features 29 | """ 30 | X.update({'optimizer': self.optimizer}) 31 | return X 32 | 33 | def get_optimizer(self) -> Optimizer: 34 | """Return the underlying Optimizer object. 35 | Returns: 36 | model : the underlying Optimizer object 37 | """ 38 | assert self.optimizer is not None, "No optimizer was fitted" 39 | return self.optimizer 40 | 41 | def __str__(self) -> str: 42 | """ Allow a nice understanding of what components where used """ 43 | string = self.optimizer.__class__.__name__ 44 | info = vars(self) 45 | string += " (" + str(info) + ")" 46 | return string 47 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/traditional_learner/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Type, Union 2 | 3 | from autoPyTorch.pipeline.components.base_component import ( 4 | ThirdPartyComponents, 5 | ) 6 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.base_traditional_learner import \ 7 | BaseTraditionalLearner 8 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.learners import ( 9 | CatboostModel, 10 | ExtraTreesModel, 11 | KNNModel, 12 | LGBModel, 13 | RFModel, 14 | SVMModel) 15 | 16 | _traditional_learners = { 17 | # Sort by more robust models 18 | # Depending on the allocated time budget, only the 19 | # top models from this dict are two be fitted. 20 | # LGBM is the more robust model, with 21 | # internal measures to prevent crashes, overfit 22 | # Additionally, it is one of the state of the art 23 | # methods for tabular prediction. 24 | # Then follow with catboost for categorical heavy 25 | # datasets. The other models are complementary and 26 | # their ordering is not critical 27 | 'lgb': LGBModel, 28 | 'catboost': CatboostModel, 29 | 'random_forest': RFModel, 30 | 'extra_trees': ExtraTreesModel, 31 | 'svm': SVMModel, 32 | 'knn': KNNModel, 33 | } 34 | _addons = ThirdPartyComponents(BaseTraditionalLearner) 35 | 36 | 37 | def add_traditional_learner(traditional_learner: BaseTraditionalLearner) -> None: 38 | _addons.add_component(traditional_learner) 39 | 40 | 41 | def get_available_traditional_learners() -> Dict[str, Union[Type[BaseTraditionalLearner], Any]]: 42 | traditional_learners = dict() 43 | traditional_learners.update(_traditional_learners) 44 | return traditional_learners 45 | -------------------------------------------------------------------------------- /.binder/postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | python -m pip install .[docs,examples] 6 | 7 | # Taken from https://github.com/scikit-learn/scikit-learn/blob/22cd233e1932457947e9994285dc7fd4e93881e4/.binder/postBuild 8 | # under BSD3 license, copyright the scikit-learn contributors 9 | 10 | # This script is called in a binder context. When this script is called, we are 11 | # inside a git checkout of the automl/Auto-PyTorch repo. This script 12 | # generates notebooks from the Auto-PyTorch python examples. 13 | 14 | if [[ ! -f /.dockerenv ]]; then 15 | echo "This script was written for repo2docker and is supposed to run inside a docker container." 16 | echo "Exiting because this script can delete data if run outside of a docker container." 17 | exit 1 18 | fi 19 | 20 | # Copy content we need from the Auto-PyTorch repo 21 | TMP_CONTENT_DIR=/tmp/Auto-PyTorch 22 | mkdir -p $TMP_CONTENT_DIR 23 | cp -r examples .binder $TMP_CONTENT_DIR 24 | # delete everything in current directory including dot files and dot folders 25 | find . -delete 26 | 27 | # Generate notebooks and remove other files from examples folder 28 | GENERATED_NOTEBOOKS_DIR=examples 29 | cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR 30 | 31 | find $GENERATED_NOTEBOOKS_DIR -name 'example_*.py' -exec sphx_glr_python_to_jupyter.py '{}' + 32 | # Keep __init__.py and custom_metrics.py 33 | NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb' | grep -v 'init' | grep -v 'custom_metrics') 34 | rm -f $NON_NOTEBOOKS 35 | 36 | # Modify path to be consistent by the path given by sphinx-gallery 37 | mkdir notebooks 38 | mv $GENERATED_NOTEBOOKS_DIR notebooks/ 39 | 40 | # Put the .binder folder back (may be useful for debugging purposes) 41 | mv $TMP_CONTENT_DIR/.binder . 42 | # Final clean up 43 | rm -rf $TMP_CONTENT_DIR 44 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/variance_thresholding/VarianceThreshold.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from sklearn.feature_selection import VarianceThreshold as SklearnVarianceThreshold 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import \ 9 | autoPyTorchTabularPreprocessingComponent 10 | 11 | 12 | class VarianceThreshold(autoPyTorchTabularPreprocessingComponent): 13 | """ 14 | Removes features that have the same value in the training data. 15 | """ 16 | def __init__(self, random_state: Optional[np.random.RandomState] = None): 17 | super().__init__() 18 | 19 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> 'VarianceThreshold': 20 | 21 | self.check_requirements(X, y) 22 | 23 | self.preprocessor['numerical'] = SklearnVarianceThreshold( 24 | threshold=0.0 25 | ) 26 | return self 27 | 28 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 29 | if self.preprocessor['numerical'] is None: 30 | raise ValueError("cannot call transform on {} without fitting first." 31 | .format(self.__class__.__name__)) 32 | X.update({'variance_threshold': self.preprocessor}) 33 | return X 34 | 35 | @staticmethod 36 | def get_properties( 37 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 38 | ) -> Dict[str, Union[str, bool]]: 39 | 40 | return { 41 | 'shortname': 'Variance Threshold', 42 | 'name': 'Variance Threshold (constant feature removal)', 43 | 'handles_sparse': True, 44 | } 45 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/base_time_series_preprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Union 2 | 3 | from sklearn.base import BaseEstimator 4 | 5 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import ( 6 | autoPyTorchPreprocessingComponent, autoPyTorchTargetPreprocessingComponent) 7 | 8 | 9 | class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchPreprocessingComponent): 10 | """ 11 | Provides abstract interface for time series preprocessing algorithms in AutoPyTorch. 12 | """ 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict( 17 | numerical=None, categorical=None) 18 | 19 | def __str__(self) -> str: 20 | """ Allow a nice understanding of what components where used """ 21 | string = self.__class__.__name__ 22 | return string 23 | 24 | 25 | class autoPyTorchTimeSeriesTargetPreprocessingComponent(autoPyTorchTargetPreprocessingComponent): 26 | """ 27 | Provides abstract interface for time series target preprocessing algorithms in AutoPyTorch. 28 | Currently only numerical target preprocessing is supported. 29 | # TODO add support for categorical targets! 30 | # TODO define inverse transformation for each inversible numerical transformation (log, deseasonalization, etc. ) 31 | """ 32 | def __init__(self) -> None: 33 | super().__init__() 34 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict( 35 | numerical=None, categorical=None) 36 | 37 | def __str__(self) -> str: 38 | """ Allow a nice understanding of what components where used """ 39 | string = self.__class__.__name__ 40 | return string 41 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.base_image_preprocessor import \ 6 | autoPyTorchImagePreprocessingComponent 7 | from autoPyTorch.utils.common import FitRequirement 8 | 9 | 10 | class BaseNormalizer(autoPyTorchImagePreprocessingComponent): 11 | 12 | def __init__(self) -> None: 13 | super(BaseNormalizer, self).__init__() 14 | self.add_fit_requirements([ 15 | FitRequirement('mean', (np.ndarray,), user_defined=True, dataset_property=True), 16 | FitRequirement('std', (np.ndarray,), user_defined=True, dataset_property=True)]) 17 | 18 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 19 | 20 | X.update({'normalise': self}) 21 | return X 22 | 23 | def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None: 24 | """ 25 | A mechanism in code to ensure the correctness of the fit dictionary 26 | It recursively makes sure that the children and parent level requirements 27 | are honored before fit. 28 | 29 | Args: 30 | X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing 31 | mechanism, in which during a transform, a components adds relevant information 32 | so that further stages can be properly fitted 33 | """ 34 | super().check_requirements(X, y) 35 | 36 | if 0 in X['dataset_properties']['std']: 37 | raise ZeroDivisionError("Can't normalise when std is zero") 38 | 39 | def __str__(self) -> str: 40 | """ Allow a nice understanding of what components where used """ 41 | string = self.__class__.__name__ 42 | return string 43 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder 7 | 8 | 9 | class NoEncoder(BaseEncoder): 10 | """ 11 | Don't perform encoding on categorical features 12 | """ 13 | def __init__(self, 14 | random_state: Optional[Union[np.random.RandomState, int]] = None 15 | ): 16 | super().__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder: 20 | """ 21 | The fit function calls the fit function of the underlying model 22 | and returns the transformed array. 23 | Args: 24 | X (np.ndarray): input features 25 | y (Optional[np.ndarray]): input labels 26 | 27 | Returns: 28 | instance of self 29 | """ 30 | self.check_requirements(X, y) 31 | 32 | return self 33 | 34 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 35 | """ 36 | Adds the self into the 'X' dictionary and returns it. 37 | Args: 38 | X (Dict[str, Any]): 'X' dictionary 39 | 40 | Returns: 41 | (Dict[str, Any]): the updated 'X' dictionary 42 | """ 43 | X.update({'encoder': self.preprocessor}) 44 | return X 45 | 46 | @staticmethod 47 | def get_properties( 48 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 49 | ) -> Dict[str, Union[str, bool]]: 50 | return { 51 | 'shortname': 'NoEncoder', 52 | 'name': 'No Encoder', 53 | 'handles_sparse': True 54 | } 55 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_embedding/NoEmbedding.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | import numpy as np 6 | 7 | import torch 8 | from torch import nn 9 | 10 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 11 | from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding import NetworkEmbeddingComponent 12 | 13 | 14 | class _NoEmbedding(nn.Module): 15 | def get_partial_models(self, subset_features: List[int]) -> "_NoEmbedding": 16 | return self 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | return x 20 | 21 | 22 | class NoEmbedding(NetworkEmbeddingComponent): 23 | """ 24 | Class to learn an embedding for categorical hyperparameters. 25 | """ 26 | 27 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None): 28 | super().__init__(random_state=random_state) 29 | 30 | def build_embedding(self, 31 | num_input_features: np.ndarray, 32 | num_numerical_features: int) -> Tuple[nn.Module, Optional[List[int]]]: 33 | return _NoEmbedding(), None 34 | 35 | @staticmethod 36 | def get_hyperparameter_search_space( 37 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 38 | ) -> ConfigurationSpace: 39 | cs = ConfigurationSpace() 40 | return cs 41 | 42 | @staticmethod 43 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 44 | ) -> Dict[str, Union[str, bool]]: 45 | return { 46 | 'shortname': 'no embedding', 47 | 'name': 'NoEmbedding', 48 | 'handles_tabular': True, 49 | 'handles_image': False, 50 | 'handles_time_series': True, 51 | } 52 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/base_image_augmenter.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | from imgaug.augmenters.meta import Augmenter 6 | 7 | import numpy as np 8 | 9 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 10 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent 11 | 12 | 13 | class BaseImageAugmenter(autoPyTorchSetupComponent): 14 | def __init__(self, use_augmenter: bool = True) -> None: 15 | super().__init__() 16 | self.use_augmenter = use_augmenter 17 | self.augmenter: Optional[Augmenter] = None 18 | 19 | def get_image_augmenter(self) -> Optional[Augmenter]: 20 | """ 21 | Get fitted augmenter. Can only be called if fit() 22 | has been called on the object. 23 | Returns: 24 | BaseEstimator: Fitted augmentor 25 | """ 26 | if self.augmenter is None and self.use_augmenter: 27 | raise AttributeError("Can't return augmenter for {}, as augmenter is " 28 | "set to be used but it has not been fitted" 29 | " yet".format(self.__class__.__name__)) 30 | return self.augmenter 31 | 32 | def __call__(self, X: np.ndarray) -> np.ndarray: 33 | if self.augmenter is None: 34 | raise ValueError("cant call {} without fitting first." 35 | .format(self.__class__.__name__)) 36 | # explicitly converting to np array as currently zeropadandcrop gives a list 37 | return np.array(self.augmenter(images=X)) 38 | 39 | @staticmethod 40 | def get_hyperparameter_search_space( 41 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 42 | ) -> ConfigurationSpace: 43 | cs = ConfigurationSpace() 44 | return cs 45 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler 7 | 8 | 9 | class NoScaler(BaseScaler): 10 | """ 11 | No scaling performed 12 | """ 13 | def __init__(self, 14 | random_state: Optional[Union[np.random.RandomState, int]] = None 15 | ): 16 | super().__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: 20 | """ 21 | The fit function calls the fit function of the underlying model 22 | and returns the transformed array. 23 | Args: 24 | X (np.ndarray): input features 25 | y (Optional[np.ndarray]): input labels 26 | 27 | Returns: 28 | instance of self 29 | """ 30 | 31 | self.check_requirements(X, y) 32 | 33 | return self 34 | 35 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 36 | """ 37 | The transform function calls the transform function of the 38 | underlying model and returns the transformed array. 39 | 40 | Args: 41 | X (np.ndarray): input features 42 | 43 | Returns: 44 | np.ndarray: Transformed features 45 | """ 46 | X.update({'scaler': self.preprocessor}) 47 | return X 48 | 49 | @staticmethod 50 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 51 | ) -> Dict[str, Union[str, bool]]: 52 | return { 53 | 'shortname': 'NoScaler', 54 | 'name': 'No Scaler', 55 | 'handles_sparse': True 56 | } 57 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/test_normalizers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_allclose, assert_array_equal 5 | 6 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.ImageNormalizer import ImageNormalizer 7 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.NoNormalizer import NoNormalizer 8 | 9 | 10 | class TestNormalizers(unittest.TestCase): 11 | def initialise(self): 12 | self.train = np.random.randint(0, 255, (3, 2, 2, 3)) 13 | self.mean = np.array([np.mean(self.train[:, :, :, i]) for i in range(3)]) 14 | self.std = np.array([np.std(self.train[:, :, :, i]) for i in range(3)]) 15 | 16 | def test_image_normalizer(self): 17 | self.initialise() 18 | dataset_properties = {'mean': self.mean, 'std': self.std, } 19 | X = {'dataset_properties': dataset_properties, 'X_train': self.train} 20 | 21 | normalizer = ImageNormalizer() 22 | normalizer = normalizer.fit(X) 23 | X = normalizer.transform(X) 24 | 25 | # check if normalizer added to X is instance of self 26 | self.assertEqual(X['normalise'], normalizer) 27 | epsilon = 1e-8 28 | train = self.train - self.mean 29 | train *= 1.0 / (epsilon + self.std) 30 | 31 | assert_allclose(train, normalizer(self.train), rtol=1e-5) 32 | 33 | def test_no_normalizer(self): 34 | self.initialise() 35 | 36 | dataset_properties = {'mean': self.mean, 'std': self.std, } 37 | X = {'dataset_properties': dataset_properties, 'X_train': self.train} 38 | 39 | normalizer = NoNormalizer() 40 | normalizer = normalizer.fit(X) 41 | X = normalizer.transform(X) 42 | 43 | # check if normalizer added to X is instance of self 44 | self.assertEqual(X['normalise'], normalizer) 45 | 46 | assert_array_equal(self.train, normalizer(self.train)) 47 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/OneHotEncoder.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.OneHotEncoder import \ 7 | OneHotEncoder 8 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \ 9 | TimeSeriesBaseEncoder 10 | 11 | 12 | class TimeSeriesOneHotEncoder(TimeSeriesBaseEncoder): 13 | def __init__(self, 14 | random_state: Optional[Union[np.random.RandomState, int]] = None 15 | ): 16 | super(TimeSeriesOneHotEncoder, self).__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Any = None) -> TimeSeriesBaseEncoder: 20 | OneHotEncoder.fit(self, X, y) 21 | categorical_columns = X['dataset_properties']['categorical_columns'] 22 | n_features_cat = X['dataset_properties']['categories'] 23 | feature_names = X['dataset_properties']['feature_names'] 24 | feature_shapes = X['dataset_properties']['feature_shapes'] 25 | 26 | if len(n_features_cat) == 0: 27 | n_features_cat = self.preprocessor['categorical'].categories # type: ignore 28 | for i, cat_column in enumerate(categorical_columns): 29 | feature_shapes[feature_names[cat_column]] = len(n_features_cat[i]) 30 | self.feature_shapes = feature_shapes 31 | return self 32 | 33 | @staticmethod 34 | def get_properties( 35 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 36 | ) -> Dict[str, Union[str, bool]]: 37 | return { 38 | 'shortname': 'TimeSeriesOneHotEncoder', 39 | 'name': 'Time Series One Hot Encoder', 40 | 'handles_sparse': False 41 | } 42 | -------------------------------------------------------------------------------- /test/test_utils/test_common.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tests the functionality in autoPyTorch/utils/common. 3 | """ 4 | from enum import Enum 5 | 6 | import pytest 7 | 8 | from autoPyTorch.utils.common import autoPyTorchEnum 9 | 10 | 11 | class SubEnum(autoPyTorchEnum): 12 | x = "x" 13 | y = "y" 14 | 15 | 16 | class DummyEnum(Enum): # You need to move it on top 17 | x = "x" 18 | 19 | 20 | @pytest.mark.parametrize('iter', 21 | ([SubEnum.x], 22 | ["x"], 23 | {SubEnum.x: "hello"}, 24 | {'x': 'hello'}, 25 | SubEnum, 26 | ["x", "y"])) 27 | def test_autopytorch_enum(iter): 28 | """ 29 | This test ensures that a subclass of `autoPyTorchEnum` 30 | can be used with strings. 31 | 32 | Args: 33 | iter (Iterable): 34 | iterable to check for compaitbility 35 | """ 36 | 37 | e = SubEnum.x 38 | 39 | assert e in iter 40 | 41 | 42 | @pytest.mark.parametrize('iter', 43 | [[SubEnum.y], 44 | ["y"], 45 | {SubEnum.y: "hello"}, 46 | {'y': 'hello'}]) 47 | def test_autopytorch_enum_false(iter): 48 | """ 49 | This test ensures that a subclass of `autoPyTorchEnum` 50 | can be used with strings. 51 | Args: 52 | iter (Iterable): 53 | iterable to check for compaitbility 54 | """ 55 | 56 | e = SubEnum.x 57 | 58 | assert e not in iter 59 | 60 | 61 | @pytest.mark.parametrize('others', (1, 2.0, SubEnum, DummyEnum.x)) 62 | def test_raise_errors_autopytorch_enum(others): 63 | """ 64 | This test ensures that a subclass of `autoPyTorchEnum` 65 | raises error properly. 66 | Args: 67 | others (Any): 68 | Variable to compare with SubEnum. 69 | """ 70 | 71 | with pytest.raises(RuntimeError): 72 | SubEnum.x == others 73 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/lr_scheduler/NoScheduler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | import numpy as np 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent 9 | from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit 10 | 11 | 12 | class NoScheduler(BaseLRComponent): 13 | """ 14 | Performs no scheduling via a LambdaLR with lambda==1. 15 | 16 | """ 17 | def __init__( 18 | self, 19 | step_interval: Union[str, StepIntervalUnit] = StepIntervalUnit.epoch, 20 | random_state: Optional[np.random.RandomState] = None 21 | ): 22 | 23 | super().__init__(step_interval) 24 | self.random_state = random_state 25 | 26 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent: 27 | """ 28 | Fits a component by using an input dictionary with pre-requisites 29 | 30 | Args: 31 | X (X: Dict[str, Any]): Dependencies needed by current component to perform fit 32 | y (Any): not used. To comply with sklearn API 33 | 34 | Returns: 35 | A instance of self 36 | """ 37 | 38 | # Make sure there is an optimizer 39 | self.check_requirements(X, y) 40 | return self 41 | 42 | @staticmethod 43 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 44 | ) -> Dict[str, Union[str, bool]]: 45 | return { 46 | 'shortname': 'NoScheduler', 47 | 'name': 'No LR Scheduling', 48 | } 49 | 50 | @staticmethod 51 | def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 52 | ) -> ConfigurationSpace: 53 | cs = ConfigurationSpace() 54 | return cs 55 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | import torch 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import ( 9 | BaseNormalizer 10 | ) 11 | 12 | 13 | class NoNormalizer(BaseNormalizer): 14 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None 15 | ): 16 | super().__init__() 17 | self.random_state = random_state 18 | 19 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "NoNormalizer": 20 | """ 21 | Initialises early_preprocessor and returns self. 22 | Args: 23 | X (Dict[str, Any]): 'X' dictionary 24 | 25 | Returns: 26 | autoPyTorchImagePreprocessingComponent: self 27 | """ 28 | self.check_requirements(X, y) 29 | 30 | return self 31 | 32 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 33 | 34 | X.update({'normalise': self}) 35 | return X 36 | 37 | def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: 38 | """ 39 | Makes the autoPyTorchPreprocessingComponent Callable. Calling the component 40 | calls the transform function of the underlying early_preprocessor and 41 | returns the transformed array. 42 | Args: 43 | X (Union[np.ndarray, torch.Tensor]): input data tensor 44 | 45 | Returns: 46 | Union[np.ndarray, torch.Tensor]: Transformed data tensor 47 | """ 48 | return X 49 | 50 | @staticmethod 51 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 52 | ) -> Dict[str, Any]: 53 | return { 54 | 'shortname': 'no-normalize', 55 | 'name': 'No Normalizer Node', 56 | } 57 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/test_variance_thresholding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_array_equal 3 | 4 | 5 | from sklearn.base import BaseEstimator 6 | from sklearn.compose import make_column_transformer 7 | 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.variance_thresholding. \ 9 | VarianceThreshold import VarianceThreshold 10 | 11 | 12 | def test_variance_threshold(): 13 | data = np.array([[1, 2, 1], 14 | [7, 8, 9], 15 | [4, 5, 1], 16 | [11, 12, 1], 17 | [17, 18, 19], 18 | [14, 15, 16]]) 19 | numerical_columns = [0, 1, 2] 20 | train_indices = np.array([0, 2, 3]) 21 | test_indices = np.array([1, 4, 5]) 22 | dataset_properties = { 23 | 'categorical_columns': [], 24 | 'numerical_columns': numerical_columns, 25 | } 26 | X = { 27 | 'X_train': data[train_indices], 28 | 'dataset_properties': dataset_properties 29 | } 30 | component = VarianceThreshold() 31 | 32 | component = component.fit(X) 33 | X = component.transform(X) 34 | variance_threshold = X['variance_threshold']['numerical'] 35 | 36 | # check if the fit dictionary X is modified as expected 37 | assert isinstance(X['variance_threshold'], dict) 38 | assert isinstance(variance_threshold, BaseEstimator) 39 | 40 | # make column transformer with returned encoder to fit on data 41 | column_transformer = make_column_transformer((variance_threshold, 42 | X['dataset_properties']['numerical_columns']), 43 | remainder='passthrough') 44 | column_transformer = column_transformer.fit(X['X_train']) 45 | transformed = column_transformer.transform(data[test_indices]) 46 | 47 | assert_array_equal(transformed, np.array([[7, 8], 48 | [17, 18], 49 | [14, 15]])) 50 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/test_normalizer_choice.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import unittest 3 | 4 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise import ( 5 | NormalizerChoice 6 | ) 7 | 8 | 9 | class TestNormalizerChoice(unittest.TestCase): 10 | 11 | def test_get_set_config_space(self): 12 | """Make sure that we can setup a valid choice in the encoder 13 | choice""" 14 | dataset_properties = {} 15 | normalizer_choice = NormalizerChoice(dataset_properties) 16 | cs = normalizer_choice.get_hyperparameter_search_space() 17 | 18 | # Make sure that all hyperparameters are part of the search space 19 | self.assertListEqual( 20 | sorted(cs.get_hyperparameter('__choice__').choices), 21 | sorted(list(normalizer_choice.get_components().keys())) 22 | ) 23 | 24 | # Make sure we can properly set some random configs 25 | # Whereas just one iteration will make sure the algorithm works, 26 | # doing five iterations increase the confidence. We will be able to 27 | # catch component specific crashes 28 | for i in range(5): 29 | config = cs.sample_configuration() 30 | config_dict = copy.deepcopy(config.get_dictionary()) 31 | normalizer_choice.set_hyperparameters(config) 32 | 33 | self.assertEqual(normalizer_choice.choice.__class__, 34 | normalizer_choice.get_components()[config_dict['__choice__']]) 35 | 36 | # Then check the choice configuration 37 | selected_choice = config_dict.pop('__choice__', None) 38 | for key, value in config_dict.items(): 39 | # Remove the selected_choice string from the parameter 40 | # so we can query in the object for it 41 | key = key.replace(selected_choice + ':', '') 42 | self.assertIn(key, vars(normalizer_choice.choice)) 43 | self.assertEqual(value, normalizer_choice.choice.__dict__[key]) 44 | 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | import torch 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer 9 | 10 | 11 | class ImageNormalizer(BaseNormalizer): 12 | 13 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None 14 | ): 15 | super().__init__() 16 | self.random_state = random_state 17 | self.mean: Optional[np.ndarray] = None 18 | self.std: Optional[np.ndarray] = None 19 | 20 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "ImageNormalizer": 21 | """ 22 | Initialises early_preprocessor and returns self. 23 | Args: 24 | X (Dict[str, Any]): 'X' dictionary 25 | 26 | Returns: 27 | autoPyTorchImagePreprocessingComponent: self 28 | """ 29 | self.check_requirements(X, y) 30 | self.mean = X['dataset_properties']['mean'] 31 | self.std = X['dataset_properties']['std'] 32 | return self 33 | 34 | def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: 35 | """ 36 | Makes the autoPyTorchPreprocessingComponent Callable. Calling the component 37 | calls the transform function of the underlying early_preprocessor and 38 | returns the transformed array. 39 | Args: 40 | X (Union[np.ndarray, torch.Tensor]): input data tensor 41 | 42 | Returns: 43 | Union[np.ndarray, torch.Tensor]: Transformed data tensor 44 | """ 45 | X = (X - self.mean) / self.std 46 | return X 47 | 48 | @staticmethod 49 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 50 | ) -> Dict[str, Any]: 51 | return { 52 | 'shortname': 'normalize', 53 | 'name': 'Image Normalizer Node', 54 | } 55 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/MinorityCoalescer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter 5 | 6 | import numpy as np 7 | 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer.base_coalescer import BaseCoalescer 9 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter 10 | from autoPyTorch.utils.implementations import MinorityCoalesceTransformer 11 | 12 | 13 | class MinorityCoalescer(BaseCoalescer): 14 | """Group together categories whose occurence is less than a specified min_frac """ 15 | def __init__(self, min_frac: float, random_state: np.random.RandomState): 16 | super().__init__() 17 | self.min_frac = min_frac 18 | self.random_state = random_state 19 | 20 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseCoalescer: 21 | self.check_requirements(X, y) 22 | self.preprocessor['categorical'] = MinorityCoalesceTransformer(min_frac=self.min_frac) 23 | return self 24 | 25 | @staticmethod 26 | def get_hyperparameter_search_space( 27 | dataset_properties: Optional[Dict[str, Any]] = None, 28 | min_frac: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter='min_frac', 29 | value_range=(1e-4, 0.5), 30 | default_value=1e-2, 31 | ), 32 | ) -> ConfigurationSpace: 33 | 34 | cs = ConfigurationSpace() 35 | add_hyperparameter(cs, min_frac, UniformFloatHyperparameter) 36 | return cs 37 | 38 | @staticmethod 39 | def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]: 40 | return { 41 | 'shortname': 'MinorityCoalescer', 42 | 'name': 'MinorityCoalescer', 43 | 'handles_sparse': False 44 | } 45 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _api: 4 | 5 | APIs 6 | **** 7 | 8 | ============ 9 | Main modules 10 | ============ 11 | 12 | ~~~~~~~~~~~~~~ 13 | Classification 14 | ~~~~~~~~~~~~~~ 15 | 16 | .. autoclass:: autoPyTorch.api.tabular_classification.TabularClassificationTask 17 | :members: 18 | :inherited-members: search, refit, predict, score 19 | 20 | ~~~~~~~~~~~~~~ 21 | Regression 22 | ~~~~~~~~~~~~~~ 23 | 24 | .. autoclass:: autoPyTorch.api.tabular_regression.TabularRegressionTask 25 | :members: 26 | :inherited-members: search, refit, predict, score 27 | 28 | ~~~~~~~~~~~~~~ 29 | Time Series Forecasting 30 | ~~~~~~~~~~~~~~ 31 | 32 | .. autoclass:: autoPyTorch.api.time_series_forecasting.TimeSeriesForecastingTask 33 | :members: 34 | :inherited-members: search, refit, predict, score 35 | 36 | 37 | 38 | ========= 39 | Pipelines 40 | ========= 41 | 42 | ~~~~~~~~~~~~~~~~~~~~~~ 43 | Tabular Classification 44 | ~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | .. autoclass:: autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline 47 | :members: 48 | 49 | .. autoclass:: autoPyTorch.pipeline.traditional_tabular_classification.TraditionalTabularClassificationPipeline 50 | :members: 51 | 52 | ~~~~~~~~~~~~~~~~~~ 53 | Tabular Regression 54 | ~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline 57 | :members: 58 | 59 | .. autoclass:: autoPyTorch.pipeline.traditional_tabular_regression.TraditionalTabularRegressionPipeline 60 | :members: 61 | 62 | ~~~~~~~~~~~~~~~~~~ 63 | Time Series Forecasting 64 | ~~~~~~~~~~~~~~~~~~ 65 | 66 | .. autoclass:: autoPyTorch.pipeline.time_series_forecasting.TimeSeriesForecastingPipeline 67 | :members: 68 | 69 | 70 | ================= 71 | Steps in Pipeline 72 | ================= 73 | 74 | 75 | ~~~~~~~~~~~~~~~~~~~~ 76 | autoPyTorchComponent 77 | ~~~~~~~~~~~~~~~~~~~~ 78 | 79 | .. autoclass:: autoPyTorch.pipeline.components.base_component.autoPyTorchComponent 80 | :members: 81 | 82 | ~~~~~~~~~~~~~~~~~ 83 | autoPyTorchChoice 84 | ~~~~~~~~~~~~~~~~~ 85 | 86 | .. autoclass:: autoPyTorch.pipeline.components.base_choice.autoPyTorchChoice 87 | :members: -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Types of changes 4 | 5 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 6 | - [ ] Bug fix (non-breaking change which fixes an issue) 7 | - [ ] New feature (non-breaking change which adds functionality) 8 | 9 | Note that a Pull Request should only contain one of refactoring, new features or documentation changes. 10 | Please separate these changes and send us individual PRs for each. 11 | For more information on how to create a good pull request, please refer to [The anatomy of a perfect pull request](https://medium.com/@hugooodias/the-anatomy-of-a-perfect-pull-request-567382bb6067). 12 | 13 | ## Checklist: 14 | 15 | 16 | - [ ] My code follows the code style of this project. 17 | - [ ] My change requires a change to the documentation. 18 | - [ ] I have updated the documentation accordingly. 19 | * [ ] Have you checked to ensure there aren't other open [Pull Requests](../../../pulls) for the same update/change? 20 | * [ ] Have you added an explanation of what your changes do and why you'd like us to include them? 21 | * [ ] Have you written new tests for your core changes, as applicable? 22 | * [ ] Have you successfully ran tests with your changes locally? 23 | 26 | 27 | 28 | ## Description 29 | 30 | 31 | ## Motivation and Context 32 | 33 | 34 | 35 | ## How has this been tested? 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /examples/20_basics/example_image_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================== 3 | Image Classification 4 | ====================== 5 | """ 6 | import numpy as np 7 | 8 | import sklearn.model_selection 9 | 10 | import torchvision.datasets 11 | 12 | from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline 13 | 14 | # Get the training data for tabular classification 15 | trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True) 16 | data = trainset.data.numpy() 17 | data = np.expand_dims(data, axis=3) 18 | # Create a proof of concept pipeline! 19 | dataset_properties = dict() 20 | pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties) 21 | 22 | # Train and test split 23 | train_indices, val_indices = sklearn.model_selection.train_test_split( 24 | list(range(data.shape[0])), 25 | random_state=1, 26 | test_size=0.25, 27 | ) 28 | 29 | # Configuration space 30 | pipeline_cs = pipeline.get_hyperparameter_search_space() 31 | print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}") 32 | config = pipeline_cs.sample_configuration() 33 | print("Pipeline Random Config:\n", '_' * 40, f"\n{config}") 34 | pipeline.set_hyperparameters(config) 35 | 36 | # Fit the pipeline 37 | print("Fitting the pipeline...") 38 | 39 | pipeline.fit(X=dict(X_train=data, 40 | is_small_preprocess=True, 41 | dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]), 42 | std=np.array([np.std(data[:, :, :, i]) for i in range(1)]), 43 | num_classes=10, 44 | num_features=data.shape[1] * data.shape[2], 45 | image_height=data.shape[1], 46 | image_width=data.shape[2], 47 | is_small_preprocess=True), 48 | train_indices=train_indices, 49 | val_indices=val_indices, 50 | ) 51 | ) 52 | 53 | # Showcase some components of the pipeline 54 | print(pipeline) 55 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/forecasting_encoder/flat_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | from typing import Dict, Optional, Type, Union 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.base_component import ( 7 | ThirdPartyComponents, 8 | autoPyTorchComponent, 9 | find_components 10 | ) 11 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_encoder import \ 12 | AbstractForecastingEncoderChoice 13 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_encoder.\ 14 | base_forecasting_encoder import BaseForecastingEncoder 15 | 16 | directory = os.path.split(__file__)[0] 17 | _encoders = find_components(__package__, 18 | directory, 19 | BaseForecastingEncoder) 20 | _addons = ThirdPartyComponents(BaseForecastingEncoder) 21 | 22 | 23 | def add_encoder(encoder: BaseForecastingEncoder) -> None: 24 | _addons.add_component(encoder) 25 | 26 | 27 | class FlatForecastingEncoderChoice(AbstractForecastingEncoderChoice): 28 | def get_components(self) -> Dict[str, Type[autoPyTorchComponent]]: # type: ignore[override] 29 | """Returns the available backbone components 30 | 31 | Args: 32 | None 33 | 34 | Returns: 35 | Dict[str, autoPyTorchComponent]: all basebackbone components available 36 | as choices for learning rate scheduling 37 | """ 38 | components = OrderedDict() 39 | components.update(_encoders) 40 | components.update(_addons.components) 41 | return components 42 | 43 | @staticmethod 44 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 45 | ) -> Dict[str, Union[str, bool]]: 46 | return { 47 | 'shortname': 'FlatEncoder', 48 | 'name': 'FlatEncoder', 49 | 'handles_tabular': False, 50 | 'handles_image': False, 51 | 'handles_time_series': True, 52 | } 53 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ 7 | base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent 8 | 9 | 10 | class NoFeaturePreprocessor(autoPyTorchFeaturePreprocessingComponent): 11 | """ 12 | Don't perform feature preprocessing on categorical features 13 | """ 14 | def __init__(self, 15 | random_state: Optional[Union[np.random.RandomState, int]] = None 16 | ): 17 | super().__init__() 18 | self.random_state = random_state 19 | 20 | def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchFeaturePreprocessingComponent: 21 | """ 22 | The fit function calls the fit function of the underlying model 23 | and returns the transformed array. 24 | Args: 25 | X (np.ndarray): input features 26 | y (Optional[np.ndarray]): input labels 27 | 28 | Returns: 29 | instance of self 30 | """ 31 | self.check_requirements(X, y) 32 | 33 | return self 34 | 35 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 36 | """ 37 | Adds the self into the 'X' dictionary and returns it. 38 | Args: 39 | X (Dict[str, Any]): 'X' dictionary 40 | 41 | Returns: 42 | (Dict[str, Any]): the updated 'X' dictionary 43 | """ 44 | X.update({'feature_preprocessor': self.preprocessor}) 45 | return X 46 | 47 | @staticmethod 48 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 49 | ) -> Dict[str, Union[str, bool]]: 50 | return {'shortname': 'NoFeaturePreprocessing', 51 | 'name': 'No Feature Preprocessing', 52 | 'handles_sparse': True, 53 | 'handles_classification': True, 54 | 'handles_regression': True 55 | } 56 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/base.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple 2 | 3 | from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice 4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import \ 5 | TabularColumnTransformer 6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer import CoalescerChoice 7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import EncoderChoice 8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer 9 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice 10 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.variance_thresholding. \ 11 | VarianceThreshold import VarianceThreshold 12 | from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline 13 | 14 | 15 | class TabularPipeline(TabularClassificationPipeline): 16 | def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]], 17 | ) -> List[Tuple[str, autoPyTorchChoice]]: 18 | """ 19 | Defines what steps a pipeline should follow. 20 | The step itself has choices given via autoPyTorchChoice. 21 | 22 | Returns: 23 | List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised 24 | by the pipeline. 25 | """ 26 | steps: List[Tuple[str, autoPyTorchChoice]] = [] 27 | 28 | default_dataset_properties = {'target_type': 'tabular_classification'} 29 | if dataset_properties is not None: 30 | default_dataset_properties.update(dataset_properties) 31 | 32 | steps.extend([ 33 | ("imputer", SimpleImputer()), 34 | ("variance_threshold", VarianceThreshold()), 35 | ("coalescer", CoalescerChoice(default_dataset_properties)), 36 | ("encoder", EncoderChoice(default_dataset_properties)), 37 | ("scaler", ScalerChoice(default_dataset_properties)), 38 | ("tabular_transformer", TabularColumnTransformer()), 39 | ]) 40 | return steps 41 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: 4 | # Allow to manually trigger through github API 5 | # Wont trigger the push to github pages where the documentation is located 6 | workflow_dispatch: 7 | 8 | # Triggers with push to these branches 9 | push: 10 | branches: 11 | - master 12 | - development 13 | 14 | # Triggers with push to a pr aimed at these branches 15 | pull_request: 16 | branches: 17 | - master 18 | - development 19 | 20 | jobs: 21 | build-and-deploy: 22 | runs-on: ubuntu-latest 23 | 24 | steps: 25 | - name: Checkout 26 | uses: actions/checkout@v2 27 | with: 28 | submodules: recursive 29 | - name: Setup Python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: 3.8 33 | 34 | - name: Install dependencies 35 | run: | 36 | pip install -e .[docs,examples,forecasting] 37 | 38 | - name: Make docs 39 | run: | 40 | cd docs 41 | make html 42 | 43 | - name: Pull latest gh-pages 44 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' 45 | run: | 46 | cd .. 47 | git clone https://github.com/automl/Auto-PyTorch.git --branch gh-pages --single-branch gh-pages 48 | 49 | - name: Copy new doc into gh-pages 50 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' 51 | run: | 52 | branch_name=${GITHUB_REF##*/} 53 | cd ../gh-pages 54 | rm -rf $branch_name 55 | cp -r ../Auto-PyTorch/docs/build/html $branch_name 56 | 57 | - name: Push to gh-pages 58 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' 59 | run: | 60 | last_commit=$(git log --pretty=format:"%an: %s") 61 | cd ../gh-pages 62 | branch_name=${GITHUB_REF##*/} 63 | git add $branch_name/ 64 | git config --global user.name 'Github Actions' 65 | git config --global user.email 'not@mail.com' 66 | git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} 67 | git commit -am "$last_commit" 68 | git push 69 | -------------------------------------------------------------------------------- /examples/40_advanced/example_run_with_portfolio.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================ 3 | Tabular Classification with Greedy Portfolio 4 | ============================================ 5 | 6 | The following example shows how to fit a sample classification model 7 | with AutoPyTorch using the greedy portfolio 8 | """ 9 | import os 10 | import tempfile as tmp 11 | import warnings 12 | 13 | os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() 14 | os.environ['OMP_NUM_THREADS'] = '1' 15 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 16 | os.environ['MKL_NUM_THREADS'] = '1' 17 | 18 | warnings.simplefilter(action='ignore', category=UserWarning) 19 | warnings.simplefilter(action='ignore', category=FutureWarning) 20 | 21 | import sklearn.datasets 22 | import sklearn.model_selection 23 | 24 | from autoPyTorch.api.tabular_classification import TabularClassificationTask 25 | 26 | 27 | ############################################################################ 28 | # Data Loading 29 | # ============ 30 | X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) 31 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( 32 | X, 33 | y, 34 | random_state=42, 35 | ) 36 | 37 | ############################################################################ 38 | # Build and fit a classifier 39 | # ========================== 40 | api = TabularClassificationTask( 41 | seed=42, 42 | ) 43 | 44 | ############################################################################ 45 | # Search for an ensemble of machine learning algorithms 46 | # ===================================================== 47 | api.search( 48 | X_train=X_train, 49 | y_train=y_train, 50 | X_test=X_test.copy(), 51 | y_test=y_test.copy(), 52 | optimize_metric='accuracy', 53 | total_walltime_limit=300, 54 | func_eval_time_limit_secs=50, 55 | # Setting this option to "greedy" 56 | # will make smac run the configurations 57 | # present in 'autoPyTorch/configs/greedy_portfolio.json' 58 | portfolio_selection="greedy" 59 | ) 60 | 61 | ############################################################################ 62 | # Print the final ensemble performance 63 | # ==================================== 64 | y_pred = api.predict(X_test) 65 | score = api.score(y_pred, y_test) 66 | print(score) 67 | # Print the final ensemble built by AutoPyTorch 68 | print(api.show_models()) 69 | 70 | # Print statistics from search 71 | print(api.sprint_statistics()) 72 | -------------------------------------------------------------------------------- /test/test_pipeline/test_traditional_pipeline.py: -------------------------------------------------------------------------------- 1 | import ConfigSpace as CS 2 | 3 | import numpy as np 4 | 5 | import pytest 6 | 7 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners 8 | from autoPyTorch.pipeline.traditional_tabular_classification import ( 9 | TraditionalTabularClassificationPipeline, 10 | ) 11 | 12 | 13 | @pytest.mark.parametrize("fit_dictionary_tabular", 14 | ['classification_numerical_and_categorical', 15 | 'regression_numerical_and_categorical'], indirect=True) 16 | def test_traditional_tabular_pipeline(fit_dictionary_tabular): 17 | pipeline = TraditionalTabularClassificationPipeline( 18 | dataset_properties=fit_dictionary_tabular['dataset_properties'] 19 | ) 20 | assert pipeline._get_estimator_hyperparameter_name() == "traditional_tabular_learner" 21 | cs = pipeline.get_hyperparameter_search_space() 22 | assert isinstance(cs, CS.ConfigurationSpace) 23 | config = cs.sample_configuration() 24 | assert config['model_trainer:tabular_traditional_model:traditional_learner'] in _traditional_learners 25 | assert pipeline.get_pipeline_representation() == { 26 | 'Preprocessing': 'None', 27 | 'Estimator': 'TabularTraditionalModel', 28 | } 29 | 30 | 31 | @pytest.mark.parametrize("fit_dictionary_tabular", 32 | ['classification_numerical_and_categorical'], indirect=True) 33 | def test_traditional_tabular_pipeline_predict(fit_dictionary_tabular): 34 | pipeline = TraditionalTabularClassificationPipeline( 35 | dataset_properties=fit_dictionary_tabular['dataset_properties'] 36 | ) 37 | assert pipeline._get_estimator_hyperparameter_name() == "traditional_tabular_learner" 38 | config = pipeline.get_hyperparameter_search_space().get_default_configuration() 39 | pipeline.set_hyperparameters(config) 40 | pipeline.fit(fit_dictionary_tabular) 41 | prediction = pipeline.predict(fit_dictionary_tabular['X_train']) 42 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0] 43 | assert prediction.shape[1] == 1 44 | prediction = pipeline.predict(fit_dictionary_tabular['X_train'], batch_size=5) 45 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0] 46 | prediction = pipeline.predict_proba(fit_dictionary_tabular['X_train'], batch_size=5) 47 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0] 48 | -------------------------------------------------------------------------------- /examples/40_advanced/example_parallel_n_jobs.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================ 3 | Tabular Classification with n parallel jobs 4 | ============================================ 5 | 6 | The following example shows how to fit a sample classification model parallely on 2 cores 7 | with AutoPyTorch 8 | 9 | """ 10 | import os 11 | import tempfile as tmp 12 | import warnings 13 | 14 | os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() 15 | os.environ['OMP_NUM_THREADS'] = '1' 16 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 17 | os.environ['MKL_NUM_THREADS'] = '1' 18 | 19 | warnings.simplefilter(action='ignore', category=UserWarning) 20 | warnings.simplefilter(action='ignore', category=FutureWarning) 21 | 22 | import sklearn.datasets 23 | import sklearn.model_selection 24 | 25 | from autoPyTorch.api.tabular_classification import TabularClassificationTask 26 | 27 | if __name__ == '__main__': 28 | ############################################################################ 29 | # Data Loading 30 | # ============ 31 | X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) 32 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( 33 | X, 34 | y, 35 | random_state=1, 36 | ) 37 | 38 | ############################################################################ 39 | # Build and fit a classifier 40 | # ========================== 41 | api = TabularClassificationTask( 42 | n_jobs=2, 43 | seed=42, 44 | ) 45 | 46 | ############################################################################ 47 | # Search for an ensemble of machine learning algorithms 48 | # ===================================================== 49 | api.search( 50 | X_train=X_train, 51 | y_train=y_train, 52 | X_test=X_test.copy(), 53 | y_test=y_test.copy(), 54 | optimize_metric='accuracy', 55 | total_walltime_limit=300, 56 | func_eval_time_limit_secs=50, 57 | # Each one of the 2 jobs is allocated 3GB 58 | memory_limit=3072, 59 | ) 60 | 61 | ############################################################################ 62 | # Print the final ensemble performance 63 | # ==================================== 64 | y_pred = api.predict(X_test) 65 | score = api.score(y_pred, y_test) 66 | print(score) 67 | # Print the final ensemble built by AutoPyTorch 68 | print(api.sprint_statistics()) 69 | 70 | -------------------------------------------------------------------------------- /test/test_data/test_forecasting_target_validator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pandas as pd 4 | 5 | import pytest 6 | 7 | from scipy import sparse 8 | 9 | from autoPyTorch.data.time_series_target_validator import TimeSeriesTargetValidator 10 | 11 | 12 | def test_forecasting_target_transform(): 13 | validator = TimeSeriesTargetValidator(is_classification=False) 14 | series_length = 10 15 | y = np.ones(series_length) 16 | validator.fit(y) 17 | y_transformed_0 = validator.transform(y) 18 | assert isinstance(y_transformed_0, pd.DataFrame) 19 | assert np.all(y_transformed_0.index.values == np.zeros(series_length, dtype=np.int64)) 20 | 21 | index_1 = np.full(series_length, 1) 22 | y_transformed_1 = validator.transform(y, index_1) 23 | assert np.all(y_transformed_1.index.values == index_1) 24 | 25 | index_2 = pd.Index([f"a{i}" for i in range(series_length)]) 26 | y_transformed_2 = validator.transform(y, index_2) 27 | assert np.all(y_transformed_2.index.values == index_2) 28 | 29 | index_3 = [('a', 'a')] * (series_length // 3) + \ 30 | [('a', 'b')] * (series_length // 3) + \ 31 | [('b', 'a')] * (series_length - series_length // 3 * 2) 32 | index_3 = pd.MultiIndex.from_tuples(index_3) 33 | y_transformed_3 = validator.transform(y, index_3) 34 | assert isinstance(y_transformed_3.index, pd.MultiIndex) 35 | assert np.all(y_transformed_3.index == index_3) 36 | 37 | 38 | def test_forecasting_target_handle_exception(): 39 | validator = TimeSeriesTargetValidator(is_classification=False) 40 | target_sparse = sparse.csr_matrix(np.array([1, 1, 1])) 41 | with pytest.raises(NotImplementedError, match=r"Sparse Target is unsupported for forecasting task!"): 42 | # sparse matrix is unsupported for nan filling 43 | validator.fit(target_sparse) 44 | 45 | series_length = 10 46 | y = np.ones(series_length) 47 | validator.fit(y) 48 | with pytest.raises(ValueError, match=r"Index must have length as the input targets!"): 49 | validator.transform(y, np.asarray([1, 2, 3])) 50 | 51 | 52 | def test_forecasting_target_missing_values(): 53 | """ 54 | Makes sure we raise a proper message to the user, 55 | when providing not supported data input 56 | """ 57 | validator1 = TimeSeriesTargetValidator(is_classification=False) 58 | target_1 = np.array([np.nan, 1, 2]) 59 | validator1.fit(target_1) 60 | assert validator1.transform(target_1).isnull().values.sum() == 1 61 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/test_encoder_choice.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import unittest 3 | 4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import ( 5 | EncoderChoice 6 | ) 7 | 8 | 9 | class TestEncoderChoice(unittest.TestCase): 10 | def test_get_set_config_space(self): 11 | """Make sure that we can setup a valid choice in the encoder 12 | choice""" 13 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]} 14 | encoder_choice = EncoderChoice(dataset_properties) 15 | cs = encoder_choice.get_hyperparameter_search_space() 16 | 17 | # Make sure that all hyperparameters are part of the search space 18 | self.assertListEqual( 19 | sorted(cs.get_hyperparameter('__choice__').choices), 20 | sorted(list(encoder_choice.get_components().keys())) 21 | ) 22 | 23 | # Make sure we can properly set some random configs 24 | # Whereas just one iteration will make sure the algorithm works, 25 | # doing five iterations increase the confidence. We will be able to 26 | # catch component specific crashes 27 | for i in range(5): 28 | config = cs.sample_configuration() 29 | config_dict = copy.deepcopy(config.get_dictionary()) 30 | encoder_choice.set_hyperparameters(config) 31 | 32 | self.assertEqual(encoder_choice.choice.__class__, 33 | encoder_choice.get_components()[config_dict['__choice__']]) 34 | 35 | # Then check the choice configuration 36 | selected_choice = config_dict.pop('__choice__', None) 37 | for key, value in config_dict.items(): 38 | # Remove the selected_choice string from the parameter 39 | # so we can query in the object for it 40 | key = key.replace(selected_choice + ':', '') 41 | self.assertIn(key, vars(encoder_choice.choice)) 42 | self.assertEqual(value, encoder_choice.choice.__dict__[key]) 43 | 44 | def test_only_numerical(self): 45 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': []} 46 | 47 | chooser = EncoderChoice(dataset_properties) 48 | configspace = chooser.get_hyperparameter_search_space().sample_configuration().get_dictionary() 49 | self.assertEqual(configspace['__choice__'], 'NoEncoder') 50 | 51 | 52 | if __name__ == '__main__': 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from torch.optim import Optimizer 4 | from torch.optim.lr_scheduler import _LRScheduler 5 | 6 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent 7 | from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit, StepIntervalUnitChoices 8 | from autoPyTorch.utils.common import FitRequirement 9 | 10 | 11 | class BaseLRComponent(autoPyTorchSetupComponent): 12 | """Provide an abstract interface for schedulers 13 | in Auto-Pytorch""" 14 | 15 | def __init__(self, step_interval: Union[str, StepIntervalUnit]): 16 | super().__init__() 17 | self.scheduler: Optional[_LRScheduler] = None 18 | self._step_interval: StepIntervalUnit 19 | 20 | if isinstance(step_interval, str): 21 | if step_interval not in StepIntervalUnitChoices: 22 | raise ValueError('step_interval must be either {}, but got {}.'.format( 23 | StepIntervalUnitChoices, 24 | step_interval 25 | )) 26 | self._step_interval = getattr(StepIntervalUnit, step_interval) 27 | else: 28 | self._step_interval = step_interval 29 | 30 | self.add_fit_requirements([ 31 | FitRequirement('optimizer', (Optimizer,), user_defined=False, dataset_property=False)]) 32 | 33 | @property 34 | def step_interval(self) -> StepIntervalUnit: 35 | return self._step_interval 36 | 37 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 38 | """ 39 | Adds the scheduler into the fit dictionary 'X' and returns it. 40 | Args: 41 | X (Dict[str, Any]): 'X' dictionary 42 | Returns: 43 | (Dict[str, Any]): the updated 'X' dictionary 44 | """ 45 | 46 | X.update( 47 | lr_scheduler=self.scheduler, 48 | step_interval=self.step_interval 49 | ) 50 | return X 51 | 52 | def get_scheduler(self) -> _LRScheduler: 53 | """Return the underlying scheduler object. 54 | Returns: 55 | scheduler : the underlying scheduler object 56 | """ 57 | assert self.scheduler is not None, "No scheduler was fit" 58 | return self.scheduler 59 | 60 | def __str__(self) -> str: 61 | """ Allow a nice understanding of what components where used """ 62 | string = self.scheduler.__class__.__name__ 63 | return string 64 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/test_setup_image_augmenter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from imgaug.augmenters.meta import Augmenter, Sequential 4 | 5 | import numpy as np 6 | 7 | from autoPyTorch.pipeline.components.setup.augmentation.image.ImageAugmenter import ImageAugmenter 8 | 9 | 10 | class TestImageAugmenter(unittest.TestCase): 11 | def test_every_augmenter(self): 12 | image_augmenter = ImageAugmenter() 13 | # To test every augmenter, we set the configuration as default where each augmenter 14 | # has use_augmenter set to True 15 | configuration = image_augmenter.get_hyperparameter_search_space().get_default_configuration() 16 | image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration) 17 | X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8), 18 | dataset_properties=dict(image_height=16, image_width=16)) 19 | for name, augmenter in image_augmenter.available_augmenters.items(): 20 | augmenter = augmenter.fit(X) 21 | # check if augmenter in the component has correct name 22 | self.assertEqual(augmenter.get_image_augmenter().name, name) 23 | # test if augmenter has an Augmenter attribute 24 | self.assertIsInstance(augmenter.get_image_augmenter(), Augmenter) 25 | 26 | # test if augmenter works on a random image 27 | train_aug = augmenter(X['X_train']) 28 | self.assertIsInstance(train_aug, np.ndarray) 29 | # check if data was changed 30 | self.assertIsNot(train_aug, X['X_train']) 31 | 32 | def test_get_set_config_space(self): 33 | X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8), 34 | dataset_properties=dict(image_height=16, image_width=16)) 35 | image_augmenter = ImageAugmenter() 36 | configuration = image_augmenter.get_hyperparameter_search_space().sample_configuration() 37 | image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration) 38 | image_augmenter = image_augmenter.fit(X) 39 | X = image_augmenter.transform(X) 40 | 41 | image_augmenter = X['image_augmenter'] 42 | # test if a sequential augmenter was formed 43 | self.assertIsInstance(image_augmenter.augmenter, Sequential) 44 | 45 | # test if augmenter works on a random image 46 | train_aug = image_augmenter(X['X_train']) 47 | self.assertIsInstance(train_aug, np.ndarray) 48 | 49 | 50 | if __name__ == '__main__': 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/forecasting_training_loss/RegressionLoss.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Union 2 | 3 | from ConfigSpace import CategoricalHyperparameter, ConfigurationSpace 4 | 5 | import numpy as np 6 | 7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 8 | from autoPyTorch.pipeline.components.setup.forecasting_training_loss.base_forecasting_loss import \ 9 | ForecastingLossComponents 10 | from autoPyTorch.pipeline.components.training.losses import ( 11 | L1Loss, 12 | MAPELoss, 13 | MASELoss, 14 | MSELoss 15 | ) 16 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter 17 | 18 | 19 | class RegressionLoss(ForecastingLossComponents): 20 | net_output_type = 'regression' 21 | 22 | def __init__(self, 23 | loss_name: str, 24 | random_state: Optional[np.random.RandomState] = None, 25 | ): 26 | super(RegressionLoss, self).__init__() 27 | if loss_name == "l1": 28 | self.loss = L1Loss 29 | elif loss_name == 'mse': 30 | self.loss = MSELoss 31 | elif loss_name == 'mase': 32 | self.loss = MASELoss 33 | elif loss_name == 'mape': 34 | self.loss = MAPELoss 35 | else: 36 | raise ValueError(f"Unsupported loss type {loss_name}!") 37 | self.random_state = random_state 38 | 39 | @staticmethod 40 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 41 | ) -> Dict[str, Union[str, bool]]: 42 | return { 43 | 'shortname': 'RegressionLoss', 44 | 'name': 'RegressionLoss', 45 | "handles_tabular": True, 46 | "handles_image": True, 47 | "handles_time_series": True, 48 | 'handles_regression': True, 49 | 'handles_classification': False 50 | } 51 | 52 | @staticmethod 53 | def get_hyperparameter_search_space( 54 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 55 | loss_name: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="loss_name", 56 | value_range=('l1', 'mse', 'mase', 'mape'), 57 | default_value='mse'), 58 | ) -> ConfigurationSpace: 59 | cs = ConfigurationSpace() 60 | add_hyperparameter(cs, loss_name, CategoricalHyperparameter) 61 | return cs 62 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/test_scaler_choice.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import unittest 3 | 4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice 5 | 6 | 7 | class TestRescalerChoice(unittest.TestCase): 8 | 9 | def test_get_set_config_space(self): 10 | """Make sure that we can setup a valid choice in the encoder 11 | choice""" 12 | dataset_properties = {'categorical_columns': list(range(4)), 13 | 'numerical_columns': [5], 14 | 'issparse': False} 15 | rescaler_choice = ScalerChoice(dataset_properties) 16 | cs = rescaler_choice.get_hyperparameter_search_space() 17 | 18 | # Make sure that all hyperparameters are part of the search space 19 | self.assertListEqual( 20 | sorted(cs.get_hyperparameter('__choice__').choices), 21 | sorted(list(rescaler_choice.get_components().keys())) 22 | ) 23 | 24 | # Make sure we can properly set some random configs 25 | # Whereas just one iteration will make sure the algorithm works, 26 | # doing five iterations increase the confidence. We will be able to 27 | # catch component specific crashes 28 | for i in range(5): 29 | config = cs.sample_configuration() 30 | config_dict = copy.deepcopy(config.get_dictionary()) 31 | rescaler_choice.set_hyperparameters(config) 32 | 33 | self.assertEqual(rescaler_choice.choice.__class__, 34 | rescaler_choice.get_components()[config_dict['__choice__']]) 35 | 36 | # Then check the choice configuration 37 | selected_choice = config_dict.pop('__choice__', None) 38 | for key, value in config_dict.items(): 39 | # Remove the selected_choice string from the parameter 40 | # so we can query in the object for it 41 | key = key.replace(selected_choice + ':', '') 42 | self.assertIn(key, vars(rescaler_choice.choice)) 43 | self.assertEqual(value, rescaler_choice.choice.__dict__[key]) 44 | 45 | def test_only_categorical(self): 46 | dataset_properties = {'categorical_columns': list(range(4)), 'numerical_columns': []} 47 | chooser = ScalerChoice(dataset_properties) 48 | configspace = chooser.get_hyperparameter_search_space(dataset_properties).sample_configuration().\ 49 | get_dictionary() 50 | self.assertEqual(configspace['__choice__'], 'NoScaler') 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | Auto-PyTorch 3 | ************ 4 | 5 | .. role:: bash(code) 6 | :language: bash 7 | 8 | .. role:: python(code) 9 | :language: python 10 | 11 | *Auto-PyTorch* is an automated machine learning toolkit based on PyTorch: 12 | 13 | >>> import autoPyTorch 14 | >>> cls = autoPyTorch.api.tabular_classification.TabularClassificationTask() 15 | >>> cls.search(X_train, y_train) 16 | >>> predictions = cls.predict(X_test) 17 | 18 | *Auto-PyTorch* frees a machine learning user from algorithm selection and 19 | hyperparameter tuning. It leverages recent advantages in *Bayesian 20 | optimization*, *meta-learning* and *ensemble construction*. 21 | Learn more about *Auto-PyTorch* by reading our paper 22 | `Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL `_ 23 | . 24 | 25 | Example 26 | ******* 27 | 28 | Manual 29 | ****** 30 | 31 | * :ref:`installation` 32 | * :ref:`manual` 33 | * :ref:`api` 34 | * :ref:`dev` 35 | * :ref:`extending` 36 | 37 | 38 | License 39 | ******* 40 | *Auto-PyTorch* is licensed the same way as *scikit-learn*, 41 | namely the 3-clause BSD license. 42 | 43 | Citing Auto-PyTorch 44 | ******************* 45 | 46 | If you use *Auto-PyTorch* in a scientific publication, we would appreciate a 47 | reference to the following paper: 48 | 49 | 50 | `Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL 51 | `_, 52 | 53 | Bibtex entry:: 54 | 55 | @article{zimmer2020auto, 56 | title={Auto-pytorch tabular: Multi-fidelity metalearning for efficient and robust autodl}, 57 | author={Zimmer, Lucas and Lindauer, Marius and Hutter, Frank}, 58 | journal={arXiv preprint arXiv:2006.13799}, 59 | year={2020} 60 | } 61 | 62 | Contributing 63 | ************ 64 | 65 | We appreciate all contribution to *Auto-PyTorch*, from bug reports and 66 | documentation to new features. If you want to contribute to the code, you can 67 | pick an issue from the `issue tracker `_ 68 | which is marked with `Needs contributer`. 69 | 70 | .. note:: 71 | 72 | To avoid spending time on duplicate work or features that are unlikely to 73 | get merged, it is highly advised that you contact the developers 74 | by opening a `github issue `_ before starting to work. 76 | 77 | When developing new features, please create a new branch from the refactor_development 78 | branch. When to submitting a pull request, make sure that all tests are 79 | still passing. 80 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/forecasting_decoder/components.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple, Optional, Tuple 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class DecoderProperties(NamedTuple): 8 | """ 9 | Decoder properties 10 | 11 | Args: 12 | has_hidden_states (bool): 13 | if the decoder has hidden states. A decoder with hidden states might have additional output and requires 14 | additional inputs 15 | has_local_layer (bool): 16 | if the decoder has local layer, in which case the output is also a 3D sequential feature 17 | recurrent (bool): 18 | if the decoder is recurrent. This determines if decoders can be auto-regressive 19 | lagged_input (bool): 20 | if the decoder accepts past targets as additional features 21 | multi_blocks (bool): 22 | If the decoder is stacked by multiple blocks (only for N-BEATS) 23 | """ 24 | has_hidden_states: bool = False 25 | has_local_layer: bool = True 26 | recurrent: bool = False 27 | lagged_input: bool = False 28 | multi_blocks: bool = False 29 | 30 | 31 | class DecoderBlockInfo(NamedTuple): 32 | """ 33 | Decoder block infos 34 | 35 | Args: 36 | decoder (nn.Module): 37 | decoder network 38 | decoder_properties (EncoderProperties): 39 | decoder properties 40 | decoder_output_shape (Tuple[int, ...]): 41 | output shape that the decoder ought to output 42 | 43 | decoder_input_shape (Tuple[int, ...]): 44 | requried input shape of the decoder 45 | 46 | """ 47 | decoder: nn.Module 48 | decoder_properties: DecoderProperties 49 | decoder_output_shape: Tuple[int, ...] 50 | decoder_input_shape: Tuple[int, ...] 51 | 52 | 53 | class DecoderNetwork(nn.Module): 54 | def forward(self, x_future: torch.Tensor, 55 | encoder_output: torch.Tensor, 56 | pos_idx: Optional[Tuple[int]] = None) -> torch.Tensor: 57 | """ 58 | Base forecasting Decoder Network, its output needs to be a 3-d Tensor: 59 | 60 | 61 | Args: 62 | x_future: torch.Tensor(B, L_future, N_out), the future features 63 | encoder_output: torch.Tensor(B, L_encoder, N), output of the encoder network, or the hidden states 64 | pos_idx: positional index, indicating the position of the forecasted tensor, used for transformer 65 | Returns: 66 | net_output: torch.Tensor with shape either (B, L_future, N) 67 | 68 | """ 69 | raise NotImplementedError 70 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/Resize.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | from ConfigSpace.hyperparameters import ( 5 | CategoricalHyperparameter, 6 | ) 7 | 8 | import imgaug.augmenters as iaa 9 | from imgaug.augmenters.meta import Augmenter 10 | 11 | import numpy as np 12 | 13 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 14 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter 15 | from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace, add_hyperparameter 16 | 17 | 18 | class Resize(BaseImageAugmenter): 19 | 20 | def __init__(self, use_augmenter: bool = True, 21 | random_state: Optional[Union[int, np.random.RandomState]] = None): 22 | super().__init__(use_augmenter=use_augmenter) 23 | self.random_state = random_state 24 | self.add_fit_requirements([ 25 | FitRequirement('image_height', (int,), user_defined=True, dataset_property=True), 26 | FitRequirement('image_width', (int,), user_defined=True, dataset_property=True)]) 27 | 28 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: 29 | self.check_requirements(X, y) 30 | if self.use_augmenter: 31 | self.augmenter: Augmenter = iaa.Resize(size=(X['dataset_properties']['image_height'], 32 | X['dataset_properties']['image_width']), 33 | interpolation='linear', name=self.get_properties()['name']) 34 | 35 | return self 36 | 37 | @staticmethod 38 | def get_hyperparameter_search_space( 39 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 40 | use_augmenter: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_augmenter", 41 | value_range=(True, False), 42 | default_value=True, 43 | ), 44 | ) -> ConfigurationSpace: 45 | cs = ConfigurationSpace() 46 | add_hyperparameter(cs, use_augmenter, CategoricalHyperparameter) 47 | 48 | return cs 49 | 50 | @staticmethod 51 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 52 | ) -> Dict[str, Any]: 53 | return {'name': 'Resize'} 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Idea workspace and task 132 | **/.idea/workspace.xml 133 | **/.idea/tasks.xml 134 | 135 | # Dask 136 | dask-worker-space/ 137 | 138 | # Test output 139 | tmp/ 140 | .tmp_evaluation 141 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/forecasting/base.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple, Union 2 | 3 | from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice 4 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent 5 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.TimeSeriesTransformer import ( 6 | TimeSeriesFeatureTransformer, 7 | TimeSeriesTargetTransformer 8 | ) 9 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding import TimeSeriesEncoderChoice 10 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.imputation.TimeSeriesImputer import ( 11 | TimeSeriesFeatureImputer, 12 | TimeSeriesTargetImputer 13 | ) 14 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.scaling.base_scaler import BaseScaler 15 | from autoPyTorch.pipeline.time_series_forecasting import TimeSeriesForecastingPipeline 16 | 17 | 18 | class ForecastingPipeline(TimeSeriesForecastingPipeline): 19 | def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]], 20 | ) -> List[Tuple[str, autoPyTorchChoice]]: 21 | """ 22 | Defines what steps a pipeline should follow. 23 | The step itself has choices given via autoPyTorchChoice. 24 | 25 | Returns: 26 | List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised 27 | by the pipeline. 28 | """ 29 | steps: List[Tuple[str, Union[autoPyTorchChoice, autoPyTorchComponent]]] = [] 30 | 31 | default_dataset_properties = {'target_type': 'time_series_forecasting'} 32 | if dataset_properties is not None: 33 | default_dataset_properties.update(dataset_properties) 34 | if not default_dataset_properties['uni_variant']: 35 | 36 | steps.extend([("imputer", TimeSeriesFeatureImputer(random_state=self.random_state)), 37 | ("scaler", BaseScaler(random_state=self.random_state)), 38 | ('encoding', TimeSeriesEncoderChoice(default_dataset_properties, 39 | random_state=self.random_state)), 40 | ("time_series_transformer", TimeSeriesFeatureTransformer(random_state=self.random_state)), 41 | ]) 42 | 43 | steps.extend([("target_imputer", TimeSeriesTargetImputer(random_state=self.random_state)), 44 | ("time_series_target_transformer", TimeSeriesTargetTransformer(random_state=self.random_state)), 45 | ]) 46 | 47 | return steps 48 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/Normalizer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | from ConfigSpace.hyperparameters import ( 5 | CategoricalHyperparameter 6 | ) 7 | 8 | import numpy as np 9 | 10 | from sklearn.preprocessing import Normalizer as SklearnNormalizer 11 | 12 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 13 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler 14 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter 15 | 16 | 17 | class Normalizer(BaseScaler): 18 | """ 19 | Normalises samples individually according to norm {mean_abs, mean_squared, max} 20 | """ 21 | 22 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None, norm: str = 'mean_squared'): 23 | """ 24 | Args: 25 | random_state (Optional[Union[np.random.RandomState, int]]): Determines random number generation for 26 | subsampling and smoothing noise. 27 | norm (str): {mean_abs, mean_squared, max} default: mean_squared 28 | """ 29 | super().__init__() 30 | self.random_state = random_state 31 | self.norm = norm 32 | 33 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: 34 | self.check_requirements(X, y) 35 | 36 | map_norm = dict({"mean_abs": "l1", "mean_squared": "l2", "max": "max"}) 37 | self.preprocessor['numerical'] = SklearnNormalizer(norm=map_norm[self.norm], copy=False) 38 | return self 39 | 40 | @staticmethod 41 | def get_hyperparameter_search_space( 42 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 43 | norm: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="norm", 44 | value_range=("mean_abs", "mean_squared", "max"), 45 | default_value="mean_squared", 46 | ) 47 | ) -> ConfigurationSpace: 48 | cs = ConfigurationSpace() 49 | add_hyperparameter(cs, norm, CategoricalHyperparameter) 50 | return cs 51 | 52 | @staticmethod 53 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 54 | ) -> Dict[str, Union[str, bool]]: 55 | return { 56 | 'shortname': 'Normalizer', 57 | 'name': 'Normalizer', 58 | 'handles_sparse': True 59 | } 60 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | 5 | import numpy as np 6 | 7 | import pandas as pd 8 | 9 | from scipy.sparse import spmatrix 10 | 11 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 12 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent 13 | from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms, preprocess 14 | from autoPyTorch.utils.common import FitRequirement 15 | 16 | 17 | class EarlyPreprocessing(autoPyTorchSetupComponent): 18 | 19 | def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None: 20 | super().__init__() 21 | self.random_state = random_state 22 | self.add_fit_requirements([ 23 | FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True), 24 | FitRequirement('X_train', (np.ndarray, pd.DataFrame, spmatrix), user_defined=True, 25 | dataset_property=False)]) 26 | 27 | def fit(self, X: Dict[str, Any], y: Any = None) -> "EarlyPreprocessing": 28 | self.check_requirements(X, y) 29 | 30 | return self 31 | 32 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 33 | 34 | transforms = get_preprocess_transforms(X) 35 | if X['dataset_properties']['is_small_preprocess']: 36 | if 'X_train' in X: 37 | X_train = X['X_train'] 38 | else: 39 | # Incorporate the transform to the dataset 40 | X_train = X['backend'].load_datamanager().train_tensors[0] 41 | 42 | X['X_train'] = preprocess(dataset=X_train, transforms=transforms) 43 | 44 | # We need to also save the preprocess transforms for inference 45 | X.update({'preprocess_transforms': transforms}) 46 | return X 47 | 48 | @staticmethod 49 | def get_hyperparameter_search_space( 50 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 51 | **kwargs: Any 52 | ) -> ConfigurationSpace: 53 | return ConfigurationSpace() 54 | 55 | @staticmethod 56 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 57 | ) -> Dict[str, Union[str, bool]]: 58 | return { 59 | 'shortname': 'EarlyPreprocessing', 60 | 'name': 'Early Preprocessing Node', 61 | } 62 | 63 | def __str__(self) -> str: 64 | """ Allow a nice understanding of what components where used """ 65 | string = self.__class__.__name__ 66 | return string 67 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/ZeroPadAndCrop.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | from ConfigSpace.hyperparameters import ( 5 | UniformFloatHyperparameter 6 | ) 7 | 8 | import imgaug.augmenters as iaa 9 | from imgaug.augmenters.meta import Augmenter 10 | 11 | import numpy as np 12 | 13 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 14 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter 15 | from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace, add_hyperparameter 16 | 17 | 18 | class ZeroPadAndCrop(BaseImageAugmenter): 19 | 20 | def __init__(self, percent: float = 0.1, 21 | random_state: Optional[Union[int, np.random.RandomState]] = None): 22 | super().__init__() 23 | self.random_state = random_state 24 | self.percent = percent 25 | self.pad_augmenter: Optional[Augmenter] = None 26 | self.crop_augmenter: Optional[Augmenter] = None 27 | self.add_fit_requirements([ 28 | FitRequirement('image_height', (int,), user_defined=True, dataset_property=True), 29 | FitRequirement('image_width', (int,), user_defined=True, dataset_property=True)]) 30 | 31 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: 32 | self.check_requirements(X, y) 33 | self.pad_augmenter = iaa.Pad(percent=self.percent, keep_size=False) 34 | self.crop_augmenter = iaa.CropToFixedSize(height=X['dataset_properties']['image_height'], 35 | width=X['dataset_properties']['image_width']) 36 | self.augmenter: Augmenter = iaa.Sequential([ 37 | self.pad_augmenter, 38 | self.crop_augmenter 39 | ], name=self.get_properties()['name']) 40 | 41 | return self 42 | 43 | @staticmethod 44 | def get_hyperparameter_search_space( 45 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 46 | percent: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter='percent', 47 | value_range=(0, 0.5), 48 | default_value=0.1, 49 | ) 50 | ) -> ConfigurationSpace: 51 | 52 | cs = ConfigurationSpace() 53 | add_hyperparameter(cs, percent, UniformFloatHyperparameter) 54 | return cs 55 | 56 | @staticmethod 57 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 58 | ) -> Dict[str, Any]: 59 | return {'name': 'ZeroPadAndCrop'} 60 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import torchvision 4 | 5 | from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import BaseDataLoaderComponent 6 | 7 | 8 | class ImageDataLoader(BaseDataLoaderComponent): 9 | """This class is an interface to the PyTorch Dataloader. 10 | 11 | Particularly, this data loader builds transformations for 12 | image data. 13 | 14 | """ 15 | 16 | def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transforms.Compose: 17 | """ 18 | Method to build a transformation that can pre-process input data 19 | 20 | Args: 21 | X (X: Dict[str, Any]): Dependencies needed by current component to perform fit 22 | mode (str): train/val/test 23 | 24 | Returns: 25 | A composition of transformations 26 | """ 27 | 28 | if mode not in ['train', 'val', 'test']: 29 | raise ValueError("Unsupported mode provided {}. ".format(mode)) 30 | 31 | transformations = [] 32 | 33 | if 'train' in mode: 34 | transformations.append(X['image_augmenter']) 35 | # In the case of image data, the options currently available 36 | # for preprocessors are: 37 | # + normalise 38 | # These can apply for both train/val/test, so no 39 | # distinction is performed 40 | 41 | # check if data set is small enough to be preprocessed. 42 | # If it is, then no need to add preprocess_transforms to 43 | # the data loader as the data is already preprocessed 44 | if 'test' in mode or not X['dataset_properties']['is_small_preprocess']: 45 | transformations.append(X['preprocess_transforms']) 46 | 47 | # Transform to tensor 48 | transformations.append(torchvision.transforms.ToTensor()) 49 | 50 | return torchvision.transforms.Compose(transformations) 51 | 52 | def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> None: 53 | """ 54 | 55 | Makes sure that the fit dictionary contains the required transformations 56 | that the dataset should go through 57 | 58 | Args: 59 | X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing 60 | mechanism, in which during a transform, a components adds relevant information 61 | so that further stages can be properly fitted 62 | """ 63 | if not X['image_augmenter'] and 'image_augmenter' not in X: 64 | raise ValueError("Cannot find the image_augmenter in the fit dictionary") 65 | 66 | if not X['dataset_properties']['is_small_preprocess'] and 'preprocess_transforms' not in X: 67 | raise ValueError("Cannot find the preprocess_transforms in the fit dictionary") 68 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Any, Dict, List, Optional, Type, Union 3 | 4 | import numpy as np 5 | 6 | import pandas as pd 7 | 8 | from sklearn.utils import check_array 9 | 10 | import torchvision.transforms 11 | 12 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import ( 13 | autoPyTorchPreprocessingComponent as aPTPre, 14 | autoPyTorchTargetPreprocessingComponent as aPTTPre 15 | ) 16 | 17 | 18 | def get_preprocess_transforms(X: Dict[str, Any], 19 | preprocess_type: Union[Type[aPTPre], Type[aPTTPre]] = aPTPre) \ 20 | -> List[Union[Type[aPTPre], Type[aPTTPre]]]: 21 | candidate_transforms = [] 22 | for key, value in X.items(): 23 | if isinstance(value, preprocess_type): 24 | candidate_transforms.append(copy.deepcopy(value)) 25 | 26 | return candidate_transforms 27 | 28 | 29 | def preprocess(dataset: np.ndarray, transforms: torchvision.transforms.Compose, 30 | indices: List[int] = None) -> np.ndarray: 31 | 32 | composite_transforms = torchvision.transforms.Compose(transforms) 33 | if indices is None: 34 | dataset = composite_transforms(dataset) 35 | else: 36 | dataset[indices, :] = composite_transforms(np.take(dataset, indices, axis=0)) 37 | # In case the configuration space is so that no 38 | # sklearn transformation is proposed, we perform 39 | # check array to convert object to float 40 | return check_array( 41 | dataset, 42 | force_all_finite=False, 43 | accept_sparse='csr', 44 | ensure_2d=False, 45 | allow_nd=True, 46 | ) 47 | 48 | 49 | def time_series_preprocess(dataset: pd.DataFrame, transforms: torchvision.transforms.Compose, 50 | indices: Optional[List[int]] = None) -> pd.DataFrame: 51 | """ 52 | preprocess time series data (both features and targets). Dataset should be pandas DataFrame whose index identifies 53 | which series the data belongs to. 54 | 55 | Args: 56 | dataset (pd.DataFrame): a dataset contains multiple series, its index identifies the series number 57 | transforms (torchvision.transforms.Compose): transformation applied to dataset 58 | indices (Optional[List[int]]): the indices that the transformer needs to work with 59 | 60 | Returns: 61 | 62 | """ 63 | # TODO consider Numpy implementation 64 | composite_transforms = torchvision.transforms.Compose(transforms) 65 | if indices is None: 66 | index = dataset.index 67 | dataset = composite_transforms(dataset) 68 | dataset = pd.DataFrame(dataset, index=index) 69 | else: 70 | sub_dataset = dataset.iloc[:, indices] 71 | sub_dataset = composite_transforms(sub_dataset) 72 | dataset.iloc[:, indices] = sub_dataset 73 | return dataset 74 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from sklearn.base import BaseEstimator 4 | 5 | 6 | def get_time_series_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]: 7 | """ 8 | Expects fit_dictionary(X) to have numerical/categorical preprocessors 9 | (fitted numerical/categorical preprocessing nodes) that will build a pipeline in the TimeSeriesTransformer. 10 | This function parses X and extracts such components. 11 | Creates a dictionary with two keys, 12 | numerical- containing list of numerical preprocessors 13 | categorical- containing list of categorical preprocessors 14 | 15 | Args: 16 | X: fit dictionary 17 | 18 | Returns: 19 | (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors 20 | """ 21 | preprocessor = dict(numerical=list(), categorical=list()) # type: Dict[str, List[BaseEstimator]] 22 | for key, value in X.items(): 23 | if isinstance(value, dict): 24 | # as each preprocessor is child of BaseEstimator 25 | if 'numerical' in value and isinstance(value['numerical'], BaseEstimator): 26 | preprocessor['numerical'].append(value['numerical']) 27 | if 'categorical' in value and isinstance(value['categorical'], BaseEstimator): 28 | preprocessor['categorical'].append(value['categorical']) 29 | 30 | return preprocessor 31 | 32 | 33 | def get_time_series_target_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]: 34 | """ 35 | Expects fit_dictionary(X) to have target preprocessors 36 | I leave here interface to target categorical 37 | (fitted numerical/categorical preprocessing nodes) that will build a pipeline in the TimeSeriesTransformer. 38 | This function parses X and extracts such components. 39 | Creates a dictionary with two keys, 40 | numerical- containing list of numerical preprocessors 41 | categorical- containing list of categorical preprocessors 42 | 43 | Args: 44 | X: fit dictionary 45 | 46 | Returns: 47 | (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors 48 | """ 49 | preprocessor = dict(target_numerical=list(), target_categorical=list()) # type: Dict[str, List[BaseEstimator]] 50 | for key, value in X.items(): 51 | if isinstance(value, dict): 52 | # as each preprocessor is child of BaseEstimator 53 | if 'target_numerical' in value and isinstance(value['target_numerical'], BaseEstimator): 54 | preprocessor['target_numerical'].append(value['target_numerical']) 55 | if 'target_categorical' in value and isinstance(value['target_categorical'], BaseEstimator): 56 | preprocessor['target_categorical'].append(value['target_categorical']) 57 | return preprocessor 58 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _installation: 4 | 5 | ============ 6 | Installation 7 | ============ 8 | 9 | System requirements 10 | =================== 11 | 12 | Auto-PyTorch has the following system requirements: 13 | 14 | * Linux operating system (for example Ubuntu) `(get Linux here) `_, 15 | * Python (>=3.7) `(get Python here) `_. 16 | * C++ compiler (with C++11 supports) `(get GCC here) `_ and 17 | * SWIG (version 3.0.* is required; >=4.0.0 is not supported) `(get SWIG here) `_. 18 | 19 | Installing Auto-Pytorch 20 | ======================= 21 | 22 | PyPI Installation 23 | ----------------- 24 | 25 | .. code:: bash 26 | pip install autoPyTorch 27 | 28 | Auto-PyTorch for Time Series Forecasting requires additional dependencies 29 | 30 | .. code:: bash 31 | pip install autoPyTorch[forecasting] 32 | 33 | 34 | Manual Installation 35 | ------------------- 36 | 37 | .. code:: bash 38 | 39 | # Following commands assume the user is in a cloned directory of Auto-Pytorch 40 | 41 | # We also need to initialize the automl_common repository as follows 42 | # You can find more information about this here: 43 | # https://github.com/automl/automl_common/ 44 | git submodule update --init --recursive 45 | 46 | # Create the environment 47 | conda create -n autopytorch python=3.8 48 | conda activate autopytorch 49 | conda install swig 50 | cat requirements.txt | xargs -n 1 -L 1 pip install 51 | python setup.py install 52 | 53 | Similarly, Auto-PyTorch for time series forecasting requires additional dependencies 54 | 55 | .. code:: bash 56 | git submodule update --init --recursive 57 | 58 | conda create -n auto-pytorch python=3.8 59 | conda activate auto-pytorch 60 | conda install swig 61 | pip install -e[forecasting] 62 | 63 | 64 | Docker Image 65 | ============ 66 | A Docker image is also provided on dockerhub. To download from dockerhub, 67 | use: 68 | 69 | .. code:: bash 70 | 71 | docker pull automlorg/autopytorch:master 72 | 73 | You can also verify that the image was downloaded via: 74 | 75 | .. code:: bash 76 | 77 | docker images # Verify that the image was downloaded 78 | 79 | This image can be used to start an interactive session as follows: 80 | 81 | .. code:: bash 82 | 83 | docker run -it automlorg/autopytorch:master 84 | 85 | To start a Jupyter notebook, you could instead run e.g.: 86 | 87 | .. code:: bash 88 | 89 | docker run -it -v ${PWD}:/opt/nb -p 8888:8888 automlorg/autopytorch:master /bin/bash -c "mkdir -p /opt/nb && jupyter notebook --notebook-dir=/opt/nb --ip='0.0.0.0' --port=8888 --no-browser --allow-root" 90 | 91 | Alternatively, it is possible to use the development version of autoPyTorch by replacing all 92 | occurences of ``master`` by ``development``. 93 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: Publish Docker image 7 | 8 | on: 9 | push: 10 | # Push to `master` or `development` 11 | branches: 12 | - master 13 | - development 14 | - fixes_docker 15 | workflow_dispatch: 16 | 17 | jobs: 18 | push_to_registries: 19 | name: Push Docker image to multiple registries 20 | runs-on: ubuntu-latest 21 | permissions: 22 | packages: write 23 | contents: read 24 | steps: 25 | - name: Check out the repo 26 | uses: actions/checkout@v2 27 | 28 | - name: Extract branch name 29 | shell: bash 30 | run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" 31 | id: extract_branch 32 | 33 | - name: Log in to Docker Hub 34 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 35 | with: 36 | username: ${{ secrets.DOCKER_USERNAME }} 37 | password: ${{ secrets.DOCKER_PASSWORD }} 38 | 39 | - name: Log in to the Container registry 40 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 41 | with: 42 | registry: ghcr.io 43 | username: ${{ github.actor }} 44 | password: ${{ secrets.GITHUB_TOKEN }} 45 | 46 | - name: Extract metadata (tags, labels) for Docker 47 | id: meta 48 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 49 | with: 50 | images: | 51 | automlorg/autopytorch 52 | ghcr.io/${{ github.repository }} 53 | 54 | - name: Build and push Docker images 55 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 56 | with: 57 | context: . 58 | push: true 59 | tags: ${{ steps.extract_branch.outputs.branch }} 60 | 61 | - name: Docker Login 62 | run: docker login ghcr.io -u $GITHUB_ACTOR -p $GITHUB_TOKEN 63 | env: 64 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 65 | 66 | - name: Pull Docker image 67 | run: docker pull ghcr.io/$GITHUB_REPOSITORY/autoPyTorch:$BRANCH 68 | env: 69 | BRANCH: ${{ steps.extract_branch.outputs.branch }} 70 | 71 | - name: Run image 72 | run: docker run -i -d --name unittester -v $GITHUB_WORKSPACE:/workspace -w /workspace ghcr.io/$GITHUB_REPOSITORY/autoPyTorch:$BRANCH 73 | env: 74 | BRANCH: ${{ steps.extract_branch.outputs.branch }} 75 | 76 | - name: Auto-PyTorch loaded 77 | run: docker exec -i unittester python3 -c 'import autoPyTorch; print(f"Auto-PyTorch imported from {autoPyTorch.__file__}")' 78 | 79 | - name: Run unit testing 80 | run: docker exec -i unittester python3 -m pytest -v test -------------------------------------------------------------------------------- /test/test_utils/test_parallel_model_runner.py: -------------------------------------------------------------------------------- 1 | import unittest.mock 2 | from test.test_api.utils import dummy_eval_train_function 3 | from test.test_evaluation.evaluation_util import get_binary_classification_datamanager 4 | 5 | from ConfigSpace import Configuration 6 | 7 | from smac.tae import StatusType 8 | 9 | from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics 10 | from autoPyTorch.utils.logging_ import PicklableClientLogger 11 | from autoPyTorch.utils.parallel_model_runner import run_models_on_dataset 12 | from autoPyTorch.utils.pipeline import get_configuration_space, get_dataset_requirements 13 | from autoPyTorch.utils.single_thread_client import SingleThreadedClient 14 | 15 | 16 | @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function', 17 | new=dummy_eval_train_function) 18 | def test_run_models_on_dataset(backend): 19 | dataset = get_binary_classification_datamanager() 20 | backend.save_datamanager(dataset) 21 | # Search for a good configuration 22 | dataset_requirements = get_dataset_requirements( 23 | info=dataset.get_required_dataset_info() 24 | ) 25 | dataset_properties = dataset.get_dataset_properties(dataset_requirements) 26 | search_space = get_configuration_space(info=dataset_properties) 27 | num_random_configs = 5 28 | model_configurations = [(search_space.sample_configuration(), 1) for _ in range(num_random_configs)] 29 | # Add a traditional model 30 | model_configurations.append(('lgb', 1)) 31 | 32 | metric = get_metrics(dataset_properties=dataset_properties, 33 | names=["accuracy"], 34 | all_supported_metrics=False).pop() 35 | logger = unittest.mock.Mock(spec=PicklableClientLogger) 36 | 37 | dask_client = SingleThreadedClient() 38 | 39 | runhistory = run_models_on_dataset( 40 | time_left=15, 41 | func_eval_time_limit_secs=5, 42 | model_configs=model_configurations, 43 | logger=logger, 44 | metric=metric, 45 | dask_client=dask_client, 46 | backend=backend, 47 | seed=1, 48 | multiprocessing_context="fork", 49 | current_search_space=search_space, 50 | ) 51 | 52 | has_successful_model = False 53 | has_matching_config = False 54 | # assert atleast 1 successfully fitted model 55 | for run_key, run_value in runhistory.data.items(): 56 | if run_value.status == StatusType.SUCCESS: 57 | has_successful_model = True 58 | configuration = run_value.additional_info['configuration'] 59 | for (config, _) in model_configurations: 60 | if isinstance(config, Configuration): 61 | config = config.get_dictionary() 62 | if config == configuration: 63 | has_matching_config = True 64 | 65 | assert has_successful_model, "Atleast 1 model should be successfully trained" 66 | assert has_matching_config, "Configurations should match with the passed model configurations" 67 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingLR.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | from ConfigSpace.configuration_space import ConfigurationSpace 4 | from ConfigSpace.hyperparameters import ( 5 | UniformIntegerHyperparameter, 6 | ) 7 | 8 | import numpy as np 9 | 10 | import torch.optim.lr_scheduler 11 | 12 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 13 | from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent 14 | from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit 15 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter 16 | 17 | 18 | class CosineAnnealingLR(BaseLRComponent): 19 | """ 20 | Set the learning rate of each parameter group using a cosine annealing schedule 21 | 22 | Args: 23 | T_max (int): Maximum number of iterations. 24 | 25 | """ 26 | def __init__( 27 | self, 28 | T_max: int, 29 | step_interval: Union[str, StepIntervalUnit] = StepIntervalUnit.epoch, 30 | random_state: Optional[np.random.RandomState] = None 31 | ): 32 | 33 | super().__init__(step_interval) 34 | self.T_max = T_max 35 | self.random_state = random_state 36 | 37 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent: 38 | """ 39 | Fits a component by using an input dictionary with pre-requisites 40 | 41 | Args: 42 | X (X: Dict[str, Any]): Dependencies needed by current component to perform fit 43 | y (Any): not used. To comply with sklearn API 44 | 45 | Returns: 46 | A instance of self 47 | """ 48 | 49 | # Make sure there is an optimizer 50 | self.check_requirements(X, y) 51 | 52 | self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( 53 | optimizer=X['optimizer'], 54 | T_max=int(self.T_max) 55 | ) 56 | return self 57 | 58 | @staticmethod 59 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 60 | ) -> Dict[str, Union[str, bool]]: 61 | return { 62 | 'shortname': 'CosineAnnealing', 63 | 'name': 'Cosine Annealing', 64 | } 65 | 66 | @staticmethod 67 | def get_hyperparameter_search_space( 68 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 69 | T_max: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter='T_max', 70 | value_range=(10, 500), 71 | default_value=200, 72 | ) 73 | ) -> ConfigurationSpace: 74 | 75 | cs = ConfigurationSpace() 76 | add_hyperparameter(cs, T_max, UniformIntegerHyperparameter) 77 | 78 | return cs 79 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/GaussianNoise.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Union 2 | 3 | import ConfigSpace as CS 4 | from ConfigSpace.configuration_space import ConfigurationSpace 5 | from ConfigSpace.hyperparameters import ( 6 | CategoricalHyperparameter, 7 | UniformFloatHyperparameter, 8 | ) 9 | 10 | import imgaug.augmenters as iaa 11 | from imgaug.augmenters.meta import Augmenter 12 | 13 | import numpy as np 14 | 15 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType 16 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter 17 | from autoPyTorch.utils.common import HyperparameterSearchSpace, get_hyperparameter 18 | 19 | 20 | class GaussianNoise(BaseImageAugmenter): 21 | def __init__(self, use_augmenter: bool = True, sigma_offset: float = 0.3, 22 | random_state: Optional[Union[int, np.random.RandomState]] = None): 23 | super().__init__(use_augmenter=use_augmenter) 24 | self.random_state = random_state 25 | self.sigma = (0, sigma_offset) 26 | 27 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: 28 | if self.use_augmenter: 29 | self.augmenter: Augmenter = iaa.AdditiveGaussianNoise(scale=self.sigma, name=self.get_properties()['name']) 30 | return self 31 | 32 | @staticmethod 33 | def get_hyperparameter_search_space( 34 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, 35 | use_augmenter: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_augmenter", 36 | value_range=(True, False), 37 | default_value=True, 38 | ), 39 | sigma_offset: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="sigma_offset", 40 | value_range=(0.0, 3.0), 41 | default_value=0.3, 42 | ), 43 | ) -> ConfigurationSpace: 44 | 45 | cs = ConfigurationSpace() 46 | use_augmenter = get_hyperparameter(use_augmenter, CategoricalHyperparameter) 47 | sigma_offset = get_hyperparameter(sigma_offset, UniformFloatHyperparameter) 48 | cs.add_hyperparameters([use_augmenter, sigma_offset]) 49 | # only add hyperparameters to configuration space if we are using the augmenter 50 | cs.add_condition(CS.EqualsCondition(sigma_offset, use_augmenter, True)) 51 | return cs 52 | 53 | @staticmethod 54 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None 55 | ) -> Dict[str, Any]: 56 | return {'name': 'GaussianNoise'} 57 | --------------------------------------------------------------------------------