├── test ├── __init__.py ├── test_utils │ ├── __init__.py │ ├── test_single_thread_client.py │ ├── test_common.py │ └── test_parallel_model_runner.py ├── test_pipeline │ ├── __init__.py │ ├── components │ │ ├── __init__.py │ │ ├── setup │ │ │ ├── __init__.py │ │ │ ├── forecasting │ │ │ │ ├── __init__.py │ │ │ │ └── forecasting_networks │ │ │ │ │ └── __init__.py │ │ │ └── test_setup_image_augmenter.py │ │ ├── training │ │ │ ├── __init__.py │ │ │ ├── test_forecasting_training.py │ │ │ ├── test_image_data_loader.py │ │ │ └── test_feature_data_loader.py │ │ └── preprocessing │ │ │ ├── __init__.py │ │ │ ├── forecasting │ │ │ ├── __init__.py │ │ │ ├── test_encoder_choice.py │ │ │ └── base.py │ │ │ ├── test_normalizers.py │ │ │ ├── test_variance_thresholding.py │ │ │ ├── test_normalizer_choice.py │ │ │ ├── base.py │ │ │ ├── test_encoder_choice.py │ │ │ └── test_scaler_choice.py │ └── test_traditional_pipeline.py ├── test_ensemble │ ├── .autoPyTorch │ │ ├── runs │ │ │ ├── 0_1_0.0 │ │ │ │ ├── 0.1.0.0.model │ │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ │ ├── 0_2_0.0 │ │ │ │ ├── 0.2.0.0.model │ │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ │ └── 0_3_100.0 │ │ │ │ ├── 0.3.0.0.model │ │ │ │ ├── 0.3.100.0.model │ │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ └── predictions_ensemble_true.npy │ └── data │ │ └── .autoPyTorch │ │ ├── runs │ │ ├── 0_1_0.0 │ │ │ ├── 0.1.0.0.model │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ ├── 0_2_0.0 │ │ │ ├── 0.2.0.0.model │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ └── 0_3_100.0 │ │ │ ├── 0.3.0.0.model │ │ │ ├── 0.3.100.0.model │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ ├── .auto-sklearn │ │ ├── runs │ │ │ ├── 0_1_0.0 │ │ │ │ ├── 0.1.0.0.model │ │ │ │ ├── predictions_test_0_1_0.0.npy │ │ │ │ ├── predictions_valid_0_1_0.0.npy │ │ │ │ └── predictions_ensemble_0_1_0.0.npy │ │ │ ├── 0_2_0.0 │ │ │ │ ├── 0.2.0.0.model │ │ │ │ ├── predictions_test_0_2_0.0.np │ │ │ │ ├── predictions_test_0_2_0.0.npy │ │ │ │ ├── predictions_valid_0_2_0.0.npy │ │ │ │ └── predictions_ensemble_0_2_0.0.npy │ │ │ └── 0_3_100.0 │ │ │ │ ├── 0.3.0.0.model │ │ │ │ ├── 0.3.100.0.model │ │ │ │ ├── predictions_test_0_3_100.0.npy │ │ │ │ ├── predictions_valid_0_3_100.0.npy │ │ │ │ └── predictions_ensemble_0_3_100.0.npy │ │ └── predictions_ensemble_true.npy │ │ └── predictions_ensemble_true.npy ├── test_data │ ├── __init__.py │ ├── utils.py │ └── test_forecasting_target_validator.py ├── test_evaluation │ ├── __init__.py │ └── test_utils.py └── test_datasets │ ├── test_base_dataset.py │ └── test_image_dataset.py ├── autoPyTorch ├── api │ └── __init__.py ├── py.typed ├── configs │ ├── __init__.py │ └── default_pipeline_options.json ├── datasets │ └── __init__.py ├── ensemble │ └── __init__.py ├── optimizer │ └── __init__.py ├── pipeline │ ├── __init__.py │ └── components │ │ ├── __init__.py │ │ ├── setup │ │ ├── __init__.py │ │ ├── network │ │ │ └── __init__.py │ │ ├── augmentation │ │ │ ├── __init__.py │ │ │ └── image │ │ │ │ ├── __init__.py │ │ │ │ ├── VerticalFlip.py │ │ │ │ ├── HorizontalFlip.py │ │ │ │ ├── base_image_augmenter.py │ │ │ │ ├── Resize.py │ │ │ │ ├── ZeroPadAndCrop.py │ │ │ │ └── GaussianNoise.py │ │ ├── early_preprocessor │ │ │ ├── __init__.py │ │ │ ├── EarlyPreprocessing.py │ │ │ └── utils.py │ │ ├── forecasting_target_scaling │ │ │ └── __init__.py │ │ ├── network_head │ │ │ ├── forecasting_network_head │ │ │ │ └── __init__.py │ │ │ └── utils.py │ │ ├── traditional_ml │ │ │ ├── estimator_configs │ │ │ │ ├── rotation_forest.json │ │ │ │ ├── knn.json │ │ │ │ ├── extra_trees.json │ │ │ │ ├── random_forest.json │ │ │ │ ├── svm.json │ │ │ │ ├── catboost.json │ │ │ │ └── lgb.json │ │ │ └── traditional_learner │ │ │ │ ├── utils.py │ │ │ │ └── __init__.py │ │ ├── network_backbone │ │ │ └── forecasting_backbone │ │ │ │ ├── other_components │ │ │ │ └── __init__.py │ │ │ │ ├── forecasting_decoder │ │ │ │ ├── __init__.py │ │ │ │ └── components.py │ │ │ │ └── forecasting_encoder │ │ │ │ └── flat_encoder │ │ │ │ └── __init__.py │ │ ├── lr_scheduler │ │ │ ├── constants.py │ │ │ ├── NoScheduler.py │ │ │ ├── base_scheduler.py │ │ │ └── CosineAnnealingLR.py │ │ ├── base_setup.py │ │ ├── network_initializer │ │ │ ├── NoInit.py │ │ │ ├── SparseInit.py │ │ │ ├── XavierInit.py │ │ │ ├── OrthogonalInit.py │ │ │ └── KaimingInit.py │ │ ├── forecasting_training_loss │ │ │ ├── base_forecasting_loss.py │ │ │ └── RegressionLoss.py │ │ ├── optimizer │ │ │ └── base_optimizer.py │ │ └── network_embedding │ │ │ └── NoEmbedding.py │ │ ├── training │ │ ├── __init__.py │ │ ├── metrics │ │ │ └── __init__.py │ │ ├── data_loader │ │ │ ├── __init__.py │ │ │ └── image_data_loader.py │ │ ├── trainer │ │ │ └── forecasting_trainer │ │ │ │ ├── ForecastingMixUpTrainer.py │ │ │ │ └── ForecastingStandardTrainer.py │ │ └── base_training.py │ │ └── preprocessing │ │ ├── __init__.py │ │ ├── image_preprocessing │ │ ├── __init__.py │ │ ├── base_image_preprocessor.py │ │ └── normalise │ │ │ ├── base_normalizer.py │ │ │ ├── NoNormalizer.py │ │ │ └── ImageNormalizer.py │ │ ├── tabular_preprocessing │ │ ├── __init__.py │ │ ├── imputation │ │ │ ├── __init__.py │ │ │ └── base_imputer.py │ │ ├── variance_thresholding │ │ │ ├── __init__.py │ │ │ └── VarianceThreshold.py │ │ ├── coalescer │ │ │ ├── NoCoalescer.py │ │ │ ├── base_coalescer.py │ │ │ └── MinorityCoalescer.py │ │ ├── scaling │ │ │ ├── base_scaler.py │ │ │ ├── MinMaxScaler.py │ │ │ ├── PowerTransformer.py │ │ │ ├── StandardScaler.py │ │ │ ├── NoScaler.py │ │ │ └── Normalizer.py │ │ ├── encoding │ │ │ ├── base_encoder.py │ │ │ ├── OneHotEncoder.py │ │ │ └── NoEncoder.py │ │ ├── utils.py │ │ ├── feature_preprocessing │ │ │ ├── base_feature_preprocessor.py │ │ │ └── NoFeaturePreprocessor.py │ │ └── base_tabular_preprocessing.py │ │ └── time_series_preprocessing │ │ ├── __init__.py │ │ ├── scaling │ │ └── __init__.py │ │ ├── imputation │ │ └── __init__.py │ │ ├── encoding │ │ ├── __init__.py │ │ ├── time_series_base_encoder.py │ │ ├── NoEncoder.py │ │ └── OneHotEncoder.py │ │ ├── base_time_series_preprocessing.py │ │ └── utils.py ├── utils │ ├── __init__.py │ ├── logging.yaml │ └── parallel.py ├── evaluation │ └── __init__.py ├── data │ └── __init__.py ├── __init__.py ├── metrics │ └── __init__.py └── __version__.py ├── .binder ├── apt.txt ├── requirements.txt └── postBuild ├── codecov.yml ├── figs └── apt_workflow.png ├── examples ├── README.txt ├── 20_basics │ ├── README.txt │ └── example_image_classification.py └── 40_advanced │ ├── README.txt │ ├── example_run_with_portfolio.py │ └── example_parallel_n_jobs.py ├── .gitmodules ├── docs ├── extending.rst ├── _templates │ ├── class.rst │ ├── function.rst │ ├── class_without_init.rst │ └── layout.html ├── api.rst ├── index.rst └── installation.rst ├── .flake8 ├── mypy.ini ├── requirements.txt ├── setup.cfg ├── cicd └── README.md ├── .coveragerc ├── CITATION.cff ├── .pre-commit-config.yaml ├── MANIFEST.in ├── .github ├── workflows │ ├── long_regression_test.yml │ ├── pre-commit.yaml │ ├── release.yml │ ├── dist.yml │ ├── docs.yml │ └── docker-publish.yml ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── Dockerfile ├── .codecov.yml └── .gitignore /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/py.typed: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test/test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.binder/apt.txt: -------------------------------------------------------------------------------- 1 | build-essential 2 | swig 3 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/forecasting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/augmentation/image/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/early_preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/training/data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/preprocessing/forecasting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/image_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/forecasting_target_scaling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | token: 667dbd23-97e1-4ef7-9b80-a87c5ec8cb79 3 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = 'feurerm' 3 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_pipeline/components/setup/forecasting/forecasting_networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_head/forecasting_network_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/scaling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | __author__ = 'feurerm' 3 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/imputation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /figs/apt_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/figs/apt_workflow.png -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/variance_thresholding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/rotation_forest.json: -------------------------------------------------------------------------------- 1 | { 2 | } 3 | -------------------------------------------------------------------------------- /autoPyTorch/__init__.py: -------------------------------------------------------------------------------- 1 | from autoPyTorch.__version__ import __version__ # noqa (imported but unused) 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/other_components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autoPyTorch/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from autoPyTorch.pipeline.components.training.metrics.metrics import * # noqa 2 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/knn.json: -------------------------------------------------------------------------------- 1 | { 2 | "weights" : "uniform" 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/extra_trees.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_estimators" : 300 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/random_forest.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_estimators" : 300 3 | } 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/svm.json: -------------------------------------------------------------------------------- 1 | { 2 | "C" : 1.0, 3 | "degree" : 3 4 | } 5 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | ======== 4 | Examples 5 | ======== 6 | 7 | Practical examples for using *Auto-PyTorch*. 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "autoPyTorch/automl_common"] 2 | path = autoPyTorch/automl_common 3 | url = https://github.com/automl/automl_common.git 4 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/catboost.json: -------------------------------------------------------------------------------- 1 | { 2 | "iterations" : 10000, 3 | "learning_rate" : 0.1 4 | } 5 | -------------------------------------------------------------------------------- /docs/extending.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _extending: 4 | 5 | ====================== 6 | Extending Auto-PyTorch 7 | ====================== 8 | 9 | TODO -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | show-source = True 4 | application-import-names = autoPyTorch 5 | exclude = 6 | venv 7 | build 8 | -------------------------------------------------------------------------------- /autoPyTorch/__version__.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | # The following line *must* be the last in the module, exactly as formatted: 4 | __version__ = "0.2.1" 5 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_head/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | _activations = { 4 | "relu": torch.nn.ReLU, 5 | "tanh": torch.nn.Tanh, 6 | "sigmoid": torch.nn.Sigmoid 7 | } 8 | -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy -------------------------------------------------------------------------------- /examples/20_basics/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples_tabular_basics: 2 | 3 | 4 | ============================== 5 | Basic Tabular Dataset Examples 6 | ============================== 7 | 8 | Basic examples for using *Auto-PyTorch* on tabular datasets 9 | -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy -------------------------------------------------------------------------------- /test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/Auto-PyTorch/HEAD/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /examples/40_advanced/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples_tabular_basics: 2 | 3 | 4 | ================================= 5 | Advanced Tabular Dataset Examples 6 | ================================= 7 | 8 | Advanced examples for using *Auto-PyTorch* on tabular datasets. 9 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | # Reports any config lines that are not recognized 3 | warn_unused_configs=True 4 | ignore_missing_imports=True 5 | follow_imports=skip 6 | disallow_untyped_defs=True 7 | disallow_incomplete_defs=True 8 | disallow_untyped_decorators=True 9 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/lgb.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_rounds" : 10000, 3 | "num_leaves" : 128, 4 | "two_round" : "True", 5 | "min_data_in_leaf" : 3, 6 | "feature_fraction" : 0.9, 7 | "boosting_type" : "gbdt", 8 | "learning_rate" : 0.03 9 | } 10 | -------------------------------------------------------------------------------- /docs/_templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 | 13 | -------------------------------------------------------------------------------- /autoPyTorch/configs/default_pipeline_options.json: -------------------------------------------------------------------------------- 1 | { 2 | "device": "cpu", 3 | "budget_type": "epochs", 4 | "epochs": 50, 5 | "runtime": 3600, 6 | "torch_num_threads": 1, 7 | "early_stopping": 20, 8 | "use_tensorboard_logger": "False", 9 | "metrics_during_training": "True" 10 | } 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | torch>=1.10.1 3 | torchvision 4 | tensorboard 5 | scikit-learn>=0.24.0,<0.25.0 6 | numpy 7 | scipy>=1.7 8 | lockfile 9 | imgaug>=0.4.0 10 | ConfigSpace>=0.5.0 11 | pynisher>=0.6.3 12 | pyrfr>=0.7,<0.9 13 | smac>=1.2 14 | dask 15 | distributed>=2.2.0 16 | catboost 17 | lightgbm 18 | flaky 19 | tabulate 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [flake8] 5 | application-import-names = autoPyTorch 6 | max-line-length = 120 7 | ignore = W605,E402,W503 8 | show-source = True 9 | 10 | [mypy] 11 | ignore_missing_imports = True 12 | follow_imports=skip 13 | disallow_untyped_decorators = True 14 | disallow_incomplete_defs = True 15 | disallow_untyped_defs = True -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/traditional_ml/traditional_learner/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AutoPyTorchToCatboostMetrics(Enum): 5 | mean_absolute_error = "MAE" 6 | root_mean_squared_error = "RMSE" 7 | mean_squared_log_error = "MSLE" 8 | r2 = "R2" 9 | accuracy = "Accuracy" 10 | balanced_accuracy = "BalancedAccuracy" 11 | f1 = "F1" 12 | roc_auc = "AUC" 13 | precision = "Precision" 14 | recall = "Recall" 15 | log_loss = "Logloss" 16 | -------------------------------------------------------------------------------- /cicd/README.md: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Continuous integration and continuous delivery/deployment 3 | ########################################################### 4 | 5 | This part of the code is tasked to make sure that we can perform reliable NAS. 6 | To this end, we rely on pytest to run some long running configurations from both 7 | the greedy portafolio and the default configuration. 8 | 9 | ``` 10 | python -m pytest cicd/test_preselected_configs.py -vs 11 | ``` 12 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/lr_scheduler/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class StepIntervalUnit(Enum): 5 | """ 6 | By which interval we perform the step for learning rate schedulers. 7 | Attributes: 8 | batch (str): We update every batch evaluation 9 | epoch (str): We update every epoch 10 | valid (str): We update every validation 11 | """ 12 | batch = 'batch' 13 | epoch = 'epoch' 14 | valid = 'valid' 15 | 16 | 17 | StepIntervalUnitChoices = [step_interval.name for step_interval in StepIntervalUnit] 18 | -------------------------------------------------------------------------------- /test/test_datasets/test_base_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pytest 4 | 5 | from autoPyTorch.datasets.base_dataset import _get_output_properties 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "target_labels,dim,task_type", ( 10 | (np.arange(5), 5, "multiclass"), 11 | (np.linspace(0, 1, 3), 1, "continuous"), 12 | (np.linspace(0, 1, 3)[:, np.newaxis], 1, "continuous") 13 | ) 14 | ) 15 | def test_get_output_properties(target_labels, dim, task_type): 16 | train_tensors = np.array([np.empty_like(target_labels), target_labels]) 17 | output_dim, output_type = _get_output_properties(train_tensors) 18 | assert output_dim == dim 19 | assert output_type == task_type 20 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | include = "autoPyTorch/*" 5 | 6 | [report] 7 | # Regexes for lines to exclude from consideration 8 | exclude_lines = 9 | # Have to re-enable the standard pragma 10 | pragma: no cover 11 | 12 | # Don't complain about missing debug-only code: 13 | def __repr__ 14 | if self\.debug 15 | 16 | # Don't complain if tests don't hit defensive assertion code: 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | # Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if __name__ == .__main__.: 23 | 24 | ignore_errors = True 25 | 26 | [html] 27 | directory = coverage_html_report 28 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | preferred-citation: 2 | type: article 3 | authors: 4 | - family-names: "Zimmer" 5 | given-names: "Lucas" 6 | affiliation: "University of Freiburg, Germany" 7 | - family-names: "Lindauer" 8 | given-names: "Marius" 9 | affiliation: "University of Freiburg, Germany" 10 | - family-names: "Hutter" 11 | given-names: "Frank" 12 | affiliation: "University of Freiburg, Germany" 13 | doi: "10.1109/TPAMI.2021.3067763" 14 | journal-title: "IEEE Transactions on Pattern Analysis and Machine Intelligence" 15 | title: "Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL" 16 | year: 2021 17 | note: "also available under https://arxiv.org/abs/2006.13799" 18 | start: 3079 19 | end: 3090 20 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/forecasting_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from autoPyTorch.pipeline.components.base_component import ( 4 | ThirdPartyComponents, find_components) 5 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_decoder.\ 6 | base_forecasting_decoder import BaseForecastingDecoder 7 | 8 | directory = os.path.split(__file__)[0] 9 | decoders = find_components(__package__, 10 | directory, 11 | BaseForecastingDecoder) 12 | 13 | decoder_addons = ThirdPartyComponents(BaseForecastingDecoder) 14 | 15 | 16 | def add_decoder(decoder: BaseForecastingDecoder) -> None: 17 | decoder_addons.add_component(decoder) 18 | -------------------------------------------------------------------------------- /autoPyTorch/pipeline/components/setup/base_setup.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent 4 | 5 | 6 | class autoPyTorchSetupComponent(autoPyTorchComponent): 7 | """Provide an abstract interface for schedulers 8 | in Auto-Pytorch""" 9 | 10 | def __init__(self) -> None: 11 | super(autoPyTorchSetupComponent, self).__init__() 12 | 13 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]: 14 | """ 15 | Adds the fitted component into the fit dictionary 'X' and returns it. 16 | Args: 17 | X (Dict[str, Any]): 'X' dictionary 18 | Returns: 19 | (Dict[str, Any]): the updated 'X' dictionary 20 | """ 21 | raise NotImplementedError() 22 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {# Custom CSS overrides #} 4 | {# set bootswatch_css_custom = ['_static/my-styles.css'] #} 5 | 6 | {# Add github banner (from: https://github.com/blog/273-github-ribbons). #} 7 | {% block header %} 8 | {{ super() }} 9 |
15 |
22 | {% endblock %}
23 |
24 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/mirrors-mypy
3 | rev: v0.761
4 | hooks:
5 | - id: mypy
6 | args: [--show-error-codes,
7 | --warn-redundant-casts,
8 | --warn-return-any,
9 | --warn-unreachable,
10 | ]
11 | files: autoPyTorch/.*
12 | exclude: autoPyTorch/ensemble/
13 | - repo: https://gitlab.com/pycqa/flake8
14 | rev: 3.8.3
15 | hooks:
16 | - id: flake8
17 | additional_dependencies:
18 | - flake8-print==3.1.4
19 | - flake8-import-order
20 | name: flake8 autoPyTorch
21 | files: autoPyTorch/.*
22 | - id: flake8
23 | additional_dependencies:
24 | - flake8-print==3.1.4
25 | - flake8-import-order
26 | name: flake8 test
27 | files: test/.*
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_initializer/NoInit.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
6 | BaseNetworkInitializerComponent
7 | )
8 |
9 |
10 | class NoInit(BaseNetworkInitializerComponent):
11 | """
12 | No initialization on the weights/bias
13 | """
14 |
15 | def weights_init(self) -> Callable:
16 | """Returns the actual PyTorch model, that is dynamically created
17 | from a self.config object.
18 |
19 | self.config is a dictionary created form a given config in the config space.
20 | It contains the necessary information to build a network.
21 | """
22 | def initialization(m: torch.nn.Module) -> None:
23 | pass
24 | return initialization
25 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/ForecastingMixUpTrainer.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Union
2 |
3 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
4 | from autoPyTorch.pipeline.components.training.trainer.MixUpTrainer import MixUpTrainer
5 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer.forecasting_base_trainer import \
6 | ForecastingBaseTrainerComponent
7 |
8 |
9 | class ForecastingMixUpTrainer(ForecastingBaseTrainerComponent, MixUpTrainer):
10 | @staticmethod
11 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
12 | ) -> Dict[str, Union[str, bool]]:
13 | return {
14 | 'shortname': 'ForecastingMixUpTrainer',
15 | 'name': 'MixUp Regularized Trainer',
16 | }
17 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/base_image_preprocessor.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent
4 |
5 |
6 | class autoPyTorchImagePreprocessingComponent(autoPyTorchPreprocessingComponent):
7 | """
8 | Provides abstract interface for preprocessing algorithms in AutoPyTorch.
9 | """
10 |
11 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "autoPyTorchImagePreprocessingComponent":
12 | """
13 | Initialises early_preprocessor and returns self.
14 | Args:
15 | X (Dict[str, Any]): 'X' dictionary
16 |
17 | Returns:
18 | autoPyTorchImagePreprocessingComponent: self
19 | """
20 | self.check_requirements(X, y)
21 |
22 | return self
23 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/ForecastingStandardTrainer.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Union
2 |
3 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
4 | from autoPyTorch.pipeline.components.training.trainer.StandardTrainer import StandardTrainer
5 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer.forecasting_base_trainer import \
6 | ForecastingBaseTrainerComponent
7 |
8 |
9 | class ForecastingStandardTrainer(ForecastingBaseTrainerComponent, StandardTrainer):
10 | @staticmethod
11 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
12 | ) -> Dict[str, Union[str, bool]]:
13 | return {
14 | 'shortname': 'ForecastingStandardTrainer',
15 | 'name': 'Forecasting Standard Trainer',
16 | }
17 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include autoPyTorch/py.typed
3 | include autoPyTorch/utils/logging.yaml
4 | include autoPyTorch/configs/default_pipeline_options.json
5 | include autoPyTorch/configs/greedy_portfolio.json
6 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/catboost.json
7 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/rotation_forest.json
8 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/random_forest.json
9 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/knn.json
10 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/svm.json
11 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/extra_trees.json
12 | include autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs/lgb.json
13 |
--------------------------------------------------------------------------------
/test/test_data/utils.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 |
5 | import pandas as pd
6 |
7 | from scipy.sparse import spmatrix
8 |
9 |
10 | def convert(arr, objtype):
11 | if objtype == np.ndarray:
12 | return arr
13 | elif objtype == list:
14 | return arr.tolist()
15 | else:
16 | return objtype(arr)
17 |
18 |
19 | # Function to get the type of an obj
20 | def dtype(obj):
21 | if isinstance(obj, List):
22 | return type(obj[0][0]) if isinstance(obj[0], List) else type(obj[0])
23 | elif isinstance(obj, pd.DataFrame):
24 | return obj.dtypes
25 | else:
26 | return obj.dtype
27 |
28 |
29 | # Function to get the size of an object
30 | def size(obj):
31 | if isinstance(obj, spmatrix): # spmatrix doesn't support __len__
32 | return obj.shape[0] if obj.shape[0] > 1 else obj.shape[1]
33 | else:
34 | return len(obj)
35 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/forecasting/test_encoder_choice.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding import TimeSeriesEncoderChoice
4 |
5 |
6 | class TestEncoderChoice(unittest.TestCase):
7 | def test_get_set_config_space(self):
8 | """Make sure that we can setup a valid choice in the encoder
9 | choice"""
10 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]}
11 | encoder_choice = TimeSeriesEncoderChoice(dataset_properties)
12 | cs = encoder_choice.get_hyperparameter_search_space()
13 |
14 | # Make sure that all hyperparameters are part of the search space
15 | self.assertListEqual(
16 | sorted(cs.get_hyperparameter('__choice__').choices),
17 | sorted(list(encoder_choice.get_components().keys()))
18 | )
19 |
20 |
21 | if __name__ == '__main__':
22 | unittest.main()
23 |
--------------------------------------------------------------------------------
/.github/workflows/long_regression_test.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | schedule:
5 | # Every Truesday at 7AM UTC
6 | # TODO teporary set to every day just for the PR
7 | #- cron: '0 07 * * 2'
8 | - cron: '0 07 * * *'
9 |
10 | jobs:
11 |
12 | ubuntu:
13 | runs-on: ubuntu-latest
14 |
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | python-version: [3.8]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | with:
23 | ref: development
24 | submodules: recursive
25 | - name: Setup Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v2
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 |
30 | - name: Install test dependencies
31 | run: |
32 | python -m pip install --upgrade pip
33 | pip install -e .[forecasting,test]
34 |
35 | - name: Run tests
36 | run: |
37 | python -m pytest --durations=200 cicd/test_preselected_configs.py -vs
38 |
--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
1 | name: pre-commit
2 |
3 | on:
4 | # Allow to manually trigger through github API
5 | workflow_dispatch:
6 |
7 | # Triggers with push to these branches
8 | push:
9 | branches:
10 | - master
11 | - development
12 |
13 | # Triggers with push to a pr aimed at these branches
14 | pull_request:
15 | branches:
16 | - master
17 | - development
18 |
19 | jobs:
20 |
21 | run-all-files:
22 | runs-on: ubuntu-latest
23 |
24 | steps:
25 | - name: Checkout
26 | uses: actions/checkout@v2
27 |
28 | - name: Setup Python 3.7
29 | uses: actions/setup-python@v2
30 | with:
31 | python-version: 3.7
32 |
33 | - name: Init Submodules
34 | run: |
35 | git submodule update --init --recursive
36 |
37 | - name: Install pre-commit
38 | run: |
39 | pip install pre-commit
40 | pre-commit install
41 |
42 | - name: Run pre-commit
43 | run: |
44 | pre-commit run --all-files
45 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/training/test_forecasting_training.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from autoPyTorch.constants import FORECASTING_BUDGET_TYPE
4 | from autoPyTorch.pipeline.components.training.trainer.forecasting_trainer import ForecastingTrainerChoice
5 |
6 |
7 | class TestGetBudgetTracker(unittest.TestCase):
8 | def test_get_budget_tracker(self):
9 | trainer = ForecastingTrainerChoice({})
10 | max_epoch = 50
11 |
12 | X = {'budget_type': 'epochs',
13 | 'epochs': 5,
14 | }
15 | budget_tracker = trainer.get_budget_tracker(X)
16 | self.assertEqual(budget_tracker.max_epochs, 5)
17 |
18 | for budeget_type in FORECASTING_BUDGET_TYPE:
19 | budget_tracker = trainer.get_budget_tracker({'budget_type': budeget_type})
20 | self.assertEqual(budget_tracker.max_epochs, max_epoch)
21 |
22 | budget_tracker = trainer.get_budget_tracker({'budget_type': 'runtime'})
23 | self.assertIsNone(budget_tracker.max_epochs)
24 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/augmentation/image/VerticalFlip.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import imgaug.augmenters as iaa
4 | from imgaug.augmenters.meta import Augmenter
5 |
6 | import numpy as np
7 |
8 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
9 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
10 |
11 |
12 | class VerticalFlip(BaseImageAugmenter):
13 | def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
14 | super().__init__()
15 | self.random_state = random_state
16 |
17 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
18 | self.augmenter: Augmenter = iaa.Flipud(p=0.5, name=self.get_properties()['name'])
19 |
20 | return self
21 |
22 | @staticmethod
23 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
24 | ) -> Dict[str, Any]:
25 | return {'name': 'VerticalFlip'}
26 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/augmentation/image/HorizontalFlip.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import imgaug.augmenters as iaa
4 | from imgaug.augmenters.meta import Augmenter
5 |
6 | import numpy as np
7 |
8 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
9 | from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
10 |
11 |
12 | class HorizontalFlip(BaseImageAugmenter):
13 | def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
14 | super().__init__()
15 | self.random_state = random_state
16 |
17 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
18 | self.augmenter: Augmenter = iaa.Fliplr(p=0.5, name=self.get_properties()['name'])
19 |
20 | return self
21 |
22 | @staticmethod
23 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
24 | ) -> Dict[str, Any]:
25 | return {'name': 'HorizontalFlip'}
26 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/forecasting_training_loss/base_forecasting_loss.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Dict, Optional
2 |
3 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
4 | from autoPyTorch.utils.common import FitRequirement
5 |
6 |
7 | class ForecastingLossComponents(autoPyTorchComponent):
8 | _required_properties = ["name", "handles_tabular", "handles_image", "handles_time_series",
9 | 'handles_regression', 'handles_classification']
10 | loss: Optional[Callable] = None
11 | net_output_type: Optional[str] = None
12 |
13 | def __init__(self,
14 | **kwargs: Any):
15 | super().__init__()
16 | self.add_fit_requirements([
17 | FitRequirement('task_type', (str,), user_defined=True, dataset_property=True),
18 | ])
19 |
20 | def fit(self, X: Dict[str, Any], y: Any = None) -> "autoPyTorchComponent":
21 | self.check_requirements(X, y)
22 | return self
23 |
24 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
25 | X.update({"loss": self.loss,
26 | 'net_output_type': self.net_output_type})
27 | return X
28 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_initializer/SparseInit.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
6 | BaseNetworkInitializerComponent
7 | )
8 |
9 |
10 | class SparseInit(BaseNetworkInitializerComponent):
11 | """
12 | Fills the 2D input Tensor as a sparse matrix
13 | """
14 | def weights_init(self) -> Callable:
15 | """Returns the actual PyTorch model, that is dynamically created
16 | from a self.config object.
17 |
18 | self.config is a dictionary created form a given config in the config space.
19 | It contains the necessary information to build a network.
20 | """
21 |
22 | def initialization(m: torch.nn.Module) -> None:
23 | if isinstance(m, (torch.nn.Conv1d,
24 | torch.nn.Conv2d,
25 | torch.nn.Conv3d,
26 | torch.nn.Linear)):
27 | torch.nn.init.sparse_(m.weight.data, 0.9)
28 | if m.bias is not None and self.bias_strategy == 'Zero':
29 | torch.nn.init.constant_(m.bias.data, 0.0)
30 | return initialization
31 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 |
3 | WORKDIR /auto-pytorch
4 |
5 | # install linux packages
6 | RUN apt-get update
7 |
8 | # Set the locale
9 | # workaround for https://github.com/automl/auto-sklearn/issues/867
10 | RUN apt-get -y install locales
11 | RUN touch /usr/share/locale/locale.alias
12 | RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen
13 | ENV LANG en_US.UTF-8
14 | ENV LANGUAGE en_US:en
15 | ENV LC_ALL en_US.UTF-8
16 |
17 | # set environment variables to only use one core
18 | RUN export OPENBLAS_NUM_THREADS=1
19 | RUN export MKL_NUM_THREADS=1
20 | RUN export BLAS_NUM_THREADS=1
21 | RUN export OMP_NUM_THREADS=1
22 |
23 | # install build requirements
24 | RUN apt install -y python3-dev python3-pip
25 | RUN pip3 install --upgrade setuptools
26 | RUN apt install -y build-essential
27 |
28 | RUN apt install -y swig
29 |
30 | # Copy the checkout auto-pytorch version for installation
31 | ADD . /auto-pytorch/
32 |
33 | # Upgrade pip then install dependencies
34 | RUN pip3 install --upgrade pip
35 | RUN pip3 install pytest==4.6.* pep8 codecov pytest-cov flake8 flaky openml
36 | RUN cat /auto-pytorch/requirements.txt | xargs -n 1 -L 1 pip3 install
37 | RUN pip3 install jupyter
38 |
39 | # Install
40 | RUN pip3 install /auto-pytorch/
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
6 | BaseNetworkInitializerComponent
7 | )
8 |
9 |
10 | class XavierInit(BaseNetworkInitializerComponent):
11 | """
12 | Fills the input Tensor with a (semi) orthogonal matrix
13 | """
14 |
15 | def weights_init(self) -> Callable:
16 | """Returns the actual PyTorch model, that is dynamically created
17 | from a self.config object.
18 |
19 | self.config is a dictionary created form a given config in the config space.
20 | It contains the necessary information to build a network.
21 | """
22 | def initialization(m: torch.nn.Module) -> None:
23 | if isinstance(m, (torch.nn.Conv1d,
24 | torch.nn.Conv2d,
25 | torch.nn.Conv3d,
26 | torch.nn.Linear)):
27 | torch.nn.init.xavier_normal(m.weight.data)
28 | if m.bias is not None and self.bias_strategy == 'Zero':
29 | torch.nn.init.constant_(m.bias.data, 0.0)
30 | return initialization
31 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_initializer/OrthogonalInit.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
6 | BaseNetworkInitializerComponent
7 | )
8 |
9 |
10 | class OrthogonalInit(BaseNetworkInitializerComponent):
11 | """
12 | Fills the input Tensor with a (semi) orthogonal matrix
13 | """
14 |
15 | def weights_init(self) -> Callable:
16 | """Returns the actual PyTorch model, that is dynamically created
17 | from a self.config object.
18 |
19 | self.config is a dictionary created form a given config in the config space.
20 | It contains the necessary information to build a network.
21 | """
22 | def initialization(m: torch.nn.Module) -> None:
23 | if isinstance(m, (torch.nn.Conv1d,
24 | torch.nn.Conv2d,
25 | torch.nn.Conv3d,
26 | torch.nn.Linear)):
27 | torch.nn.init.orthogonal_(m.weight.data)
28 | if m.bias is not None and self.bias_strategy == 'Zero':
29 | torch.nn.init.constant_(m.bias.data, 0.0)
30 | return initialization
31 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/training/test_image_data_loader.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import unittest.mock
3 |
4 | import torchvision
5 |
6 | from autoPyTorch.pipeline.components.training.data_loader.image_data_loader import (
7 | ImageDataLoader
8 | )
9 |
10 |
11 | def test_imageloader_build_transform():
12 | """
13 | Makes sure a proper composition is created
14 | """
15 | loader = ImageDataLoader()
16 |
17 | fit_dictionary = dict()
18 | fit_dictionary['dataset_properties'] = dict()
19 | fit_dictionary['dataset_properties']['is_small_preprocess'] = unittest.mock.Mock(())
20 | fit_dictionary['image_augmenter'] = unittest.mock.Mock()
21 | fit_dictionary['preprocess_transforms'] = unittest.mock.Mock()
22 |
23 | compose = loader.build_transform(fit_dictionary, mode='train')
24 |
25 | assert isinstance(compose, torchvision.transforms.Compose)
26 |
27 | # We expect to tensor and image augmenter
28 | assert len(compose.transforms) == 2
29 |
30 | compose = loader.build_transform(fit_dictionary, mode='test')
31 | assert isinstance(compose, torchvision.transforms.Compose)
32 | assert len(compose.transforms) == 2
33 |
34 | # Check the expected error msgs
35 | loader._check_transform_requirements(fit_dictionary)
36 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/NoCoalescer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer.base_coalescer import BaseCoalescer
6 |
7 |
8 | class NoCoalescer(BaseCoalescer):
9 | def __init__(self, random_state: np.random.RandomState):
10 | super().__init__()
11 | self.random_state = random_state
12 | self._processing = False
13 |
14 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> BaseCoalescer:
15 | """
16 | As no coalescing happens, only check the requirements.
17 |
18 | Args:
19 | X (Dict[str, Any]):
20 | fit dictionary
21 | y (Optional[Any]):
22 | Parameter to comply with scikit-learn API. Not used.
23 |
24 | Returns:
25 | instance of self
26 | """
27 | self.check_requirements(X, y)
28 |
29 | return self
30 |
31 | @staticmethod
32 | def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
33 | return {
34 | 'shortname': 'NoCoalescer',
35 | 'name': 'NoCoalescer',
36 | 'handles_sparse': True
37 | }
38 |
--------------------------------------------------------------------------------
/test/test_utils/test_single_thread_client.py:
--------------------------------------------------------------------------------
1 | import dask.distributed
2 |
3 | from distributed.utils_test import inc
4 |
5 | import pytest
6 |
7 | from autoPyTorch.utils.single_thread_client import SingleThreadedClient
8 |
9 |
10 | def test_single_thread_client_like_dask_client():
11 | single_thread_client = SingleThreadedClient()
12 | assert isinstance(single_thread_client, dask.distributed.Client)
13 | future = single_thread_client.submit(inc, 1)
14 | assert isinstance(future, dask.distributed.Future)
15 | assert future.done()
16 | assert future.result() == 2
17 | assert sum(single_thread_client.nthreads().values()) == 1
18 | single_thread_client.close()
19 | single_thread_client.shutdown()
20 |
21 | # Client/Futures are printed, so make sure str works
22 | # str calls __rpr__ which is the purpose of below check
23 | assert str(future) != ""
24 | assert str(single_thread_client) != ""
25 |
26 | # Single thread client is an inherited version of dask client
27 | # so that futures run in the same thread as the main job.
28 | # We carefully selected what methods are inherited, and any other
29 | # method should raise a not implemented error to be safe of major
30 | # dask client api changes.
31 | with pytest.raises(NotImplementedError):
32 | single_thread_client.get_scheduler_logs()
33 |
--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
1 | #see https://github.com/codecov/support/wiki/Codecov-Yaml
2 | codecov:
3 | notify:
4 | require_ci_to_pass: yes
5 |
6 | coverage:
7 | precision: 2 # 2 = xx.xx%, 0 = xx%
8 | round: nearest # how coverage is rounded: down/up/nearest
9 | range: 10...90 # custom range of coverage colors from red -> yellow -> green
10 | status:
11 | # https://codecov.readme.io/v1.0/docs/commit-status
12 | project:
13 | default:
14 | against: auto
15 | target: 70% # specify the target coverage for each commit status
16 | threshold: 50% # allow this little decrease on project
17 | # https://github.com/codecov/support/wiki/Filtering-Branches
18 | # branches: master
19 | if_ci_failed: error
20 | # https://github.com/codecov/support/wiki/Patch-Status
21 | patch:
22 | default:
23 | against: auto
24 | target: 30% # specify the target "X%" coverage to hit
25 | threshold: 50% # allow this much decrease on patch
26 | changes: false
27 |
28 | parsers:
29 | gcov:
30 | branch_detection:
31 | conditional: true
32 | loop: true
33 | macro: false
34 | method: false
35 | javascript:
36 | enable_partials: false
37 |
38 | comment:
39 | layout: header, diff
40 | require_changes: false
41 | behavior: default # update if exists else create new
42 | branches: *
43 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
4 | autoPyTorchTabularPreprocessingComponent
5 | )
6 | from autoPyTorch.utils.common import FitRequirement
7 |
8 |
9 | class BaseScaler(autoPyTorchTabularPreprocessingComponent):
10 | """
11 | Provides abstract class interface for Scalers in AutoPytorch
12 | """
13 |
14 | def __init__(self) -> None:
15 | super().__init__()
16 | self.add_fit_requirements([
17 | FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True)])
18 |
19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
20 | """
21 | Adds the fitted scalar into the 'X' dictionary and returns it.
22 | Args:
23 | X (Dict[str, Any]): 'X' dictionary
24 |
25 | Returns:
26 | (Dict[str, Any]): the updated 'X' dictionary
27 | """
28 | if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
29 | raise ValueError("cant call transform on {} without fitting first."
30 | .format(self.__class__.__name__))
31 | X.update({'scaler': self.preprocessor})
32 | return X
33 |
--------------------------------------------------------------------------------
/autoPyTorch/utils/logging.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 1
3 | disable_existing_loggers: false
4 | formatters:
5 | simple:
6 | format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s'
7 |
8 | handlers:
9 | console:
10 | class: logging.StreamHandler
11 | level: WARNING
12 | formatter: simple
13 | stream: ext://sys.stdout
14 |
15 | file_handler:
16 | class: logging.FileHandler
17 | level: DEBUG
18 | formatter: simple
19 | filename: autoPyTorch.log
20 |
21 | distributed_logfile:
22 | class: logging.FileHandler
23 | level: DEBUG
24 | formatter: simple
25 | filename: distributed.log
26 |
27 | root:
28 | level: DEBUG
29 | handlers: [console, file_handler]
30 |
31 | loggers:
32 |
33 | autoPyTorch.automl_common.common.utils.backend:
34 | level: DEBUG
35 | handlers: [file_handler]
36 | propagate: false
37 |
38 | smac.intensification.intensification.Intensifier:
39 | level: INFO
40 | handlers: [file_handler, console]
41 |
42 | smac.optimizer.local_search.LocalSearch:
43 | level: INFO
44 | handlers: [file_handler, console]
45 |
46 | smac.optimizer.smbo.SMBO:
47 | level: INFO
48 | handlers: [file_handler, console]
49 |
50 | EnsembleBuilder:
51 | level: DEBUG
52 | handlers: [file_handler, console]
53 |
54 | distributed:
55 | level: DEBUG
56 | handlers: [distributed_logfile]
57 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
4 | autoPyTorchTabularPreprocessingComponent
5 | )
6 | from autoPyTorch.utils.common import FitRequirement
7 |
8 |
9 | class BaseImputer(autoPyTorchTabularPreprocessingComponent):
10 | """
11 | Provides abstract class interface for Imputers in AutoPyTorch
12 | """
13 |
14 | def __init__(self) -> None:
15 | super().__init__()
16 | self.add_fit_requirements([
17 | FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True)])
18 |
19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
20 | """
21 | Adds self into the 'X' dictionary and returns it.
22 | Args:
23 | X (Dict[str, Any]): 'X' dictionary
24 |
25 | Returns:
26 | (Dict[str, Any]): the updated 'X' dictionary
27 | """
28 | if self.preprocessor['numerical'] is None and len(X["dataset_properties"]["numerical_columns"]) != 0:
29 | raise ValueError("cant call transform on {} without fitting first."
30 | .format(self.__class__.__name__))
31 | X.update({'imputer': self.preprocessor})
32 | return X
33 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_initializer/KaimingInit.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
6 | BaseNetworkInitializerComponent
7 | )
8 |
9 |
10 | class KaimingInit(BaseNetworkInitializerComponent):
11 | """
12 | Fills the input Tensor with values according to the method described in
13 | Delving deep into rectifiers: Surpassing human-level performance on
14 | ImageNet classification
15 | """
16 |
17 | def weights_init(self) -> Callable:
18 | """Returns the actual PyTorch model, that is dynamically created
19 | from a self.config object.
20 |
21 | self.config is a dictionary created form a given config in the config space.
22 | It contains the necessary information to build a network.
23 | """
24 | def initialization(m: torch.nn.Module) -> None:
25 | if isinstance(m, (torch.nn.Conv1d,
26 | torch.nn.Conv2d,
27 | torch.nn.Conv3d,
28 | torch.nn.Linear)):
29 | torch.nn.init.kaiming_normal_(m.weight.data)
30 | if m.bias is not None and self.bias_strategy == 'Zero':
31 | torch.nn.init.constant_(m.bias.data, 0.0)
32 | return initialization
33 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
4 | autoPyTorchTabularPreprocessingComponent
5 | )
6 | from autoPyTorch.utils.common import FitRequirement
7 |
8 |
9 | class BaseEncoder(autoPyTorchTabularPreprocessingComponent):
10 | """
11 | Base class for encoder
12 | """
13 | def __init__(self) -> None:
14 | super().__init__()
15 | self.add_fit_requirements([
16 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
17 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True)])
18 |
19 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
20 | """
21 | Adds the self into the 'X' dictionary and returns it.
22 | Args:
23 | X (Dict[str, Any]): 'X' dictionary
24 |
25 | Returns:
26 | (Dict[str, Any]): the updated 'X' dictionary
27 | """
28 | if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
29 | raise ValueError("cant call transform on {} without fitting first."
30 | .format(self.__class__.__name__))
31 | X.update({'encoder': self.preprocessor})
32 | return X
33 |
--------------------------------------------------------------------------------
/test/test_datasets/test_image_dataset.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 |
5 | import torch
6 |
7 | import torchvision
8 |
9 | from autoPyTorch.datasets.image_dataset import ImageDataset
10 |
11 |
12 | @unittest.skip(reason="Image Dataset issue")
13 | class DatasetTest(unittest.TestCase):
14 | def runTest(self):
15 | dataset = torchvision.datasets.FashionMNIST(root='../../datasets/',
16 | transform=torchvision.transforms.ToTensor(),
17 | download=True)
18 | ds = ImageDataset(dataset)
19 | self.assertIsInstance(ds.mean, torch.Tensor)
20 | self.assertIsInstance(ds.std, torch.Tensor)
21 | for img, _ in ds.train_tensors:
22 | self.assertIsInstance(img, torch.Tensor)
23 |
24 |
25 | @unittest.skip(reason="Image Dataset issue")
26 | class NumpyArrayTest(unittest.TestCase):
27 | def runTest(self):
28 | matrix = np.random.randint(0, 255, (15, 3, 10, 10)).astype(np.float)
29 | target_df = np.random.randint(0, 5, (15, )).astype(np.float)
30 | ds = ImageDataset((matrix, target_df))
31 | self.assertIsInstance(ds.mean, torch.Tensor)
32 | self.assertIsInstance(ds.std, torch.Tensor)
33 | for img, _ in ds.train_tensors:
34 | self.assertIsInstance(img, torch.Tensor)
35 |
36 |
37 | if __name__ == '__main__':
38 | unittest.main()
39 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Push to PyPi
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 | branches: [master]
8 | workflow_dispatch:
9 |
10 | jobs:
11 | publish:
12 | runs-on: "ubuntu-latest"
13 |
14 | steps:
15 | - name: Checkout source
16 | uses: actions/checkout@master
17 | with:
18 | submodules: recursive
19 |
20 | - name: Set up Python 3.9
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: 3.9
24 |
25 | - name: Install pypa/build
26 | run: >-
27 | python -m
28 | pip install
29 | build
30 | --user
31 | - name: Build a binary wheel and a source tarball
32 | run: >-
33 | python -m
34 | build
35 | --sdist
36 | --wheel
37 | --outdir dist/
38 | .
39 |
40 | - name: Publish distribution 📦 to Test PyPI
41 | uses: pypa/gh-action-pypi-publish@master
42 | with:
43 | skip_existing: true
44 | user: __token__
45 | password: ${{ secrets.TEST_PYPI_TOKEN }}
46 | repository_url: https://test.pypi.org/legacy/
47 |
48 | - name: Publish distribution 📦 to PyPI
49 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
50 | uses: pypa/gh-action-pypi-publish@master
51 | with:
52 | user: __token__
53 | password: ${{ secrets.pypi_token }}
54 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/base_coalescer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
4 | autoPyTorchTabularPreprocessingComponent
5 | )
6 | from autoPyTorch.utils.common import FitRequirement
7 |
8 |
9 | class BaseCoalescer(autoPyTorchTabularPreprocessingComponent):
10 | def __init__(self) -> None:
11 | super().__init__()
12 | self._processing = True
13 | self.add_fit_requirements([
14 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
15 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True)
16 | ])
17 |
18 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
19 | """
20 | Add the preprocessor to the provided fit dictionary `X`.
21 |
22 | Args:
23 | X (Dict[str, Any]): fit dictionary in sklearn
24 |
25 | Returns:
26 | X (Dict[str, Any]): the updated fit dictionary
27 | """
28 | if self._processing and self.preprocessor['categorical'] is None:
29 | # If we apply minority coalescer, we must have categorical preprocessor!
30 | raise RuntimeError(f"fit() must be called before transform() on {self.__class__.__name__}")
31 |
32 | X.update({'coalescer': self.preprocessor})
33 | return X
34 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List
2 |
3 | from sklearn.base import BaseEstimator
4 |
5 |
6 | def get_tabular_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
7 | """
8 | Expects fit_dictionary(X) to have numerical/categorical preprocessors
9 | (fited numerical/categorical preprocessing nodes) that will build the
10 | column transformer in the TabularColumnTransformer. This function
11 | parses X and extracts such components.
12 | Creates a dictionary with two keys,
13 | numerical- containing list of numerical preprocessors
14 | categorical- containing list of categorical preprocessors
15 | Args:
16 | X: fit dictionary
17 | Returns:
18 | (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors
19 | """
20 | preprocessor: Dict[str, List[BaseEstimator]] = dict(numerical=list(), categorical=list())
21 | for key, value in X.items():
22 | if isinstance(value, dict):
23 | # as each preprocessor is child of BaseEstimator
24 | if 'numerical' in value and isinstance(value['numerical'], BaseEstimator):
25 | preprocessor['numerical'].append(value['numerical'])
26 | if 'categorical' in value and isinstance(value['categorical'], BaseEstimator):
27 | preprocessor['categorical'].append(value['categorical'])
28 |
29 | return preprocessor
30 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/MinMaxScaler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Tuple, Union
2 |
3 | import numpy as np
4 |
5 | from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
9 |
10 |
11 | class MinMaxScaler(BaseScaler):
12 | """
13 | Scale numerical columns/features into feature_range
14 | """
15 | def __init__(self,
16 | random_state: Optional[Union[np.random.RandomState, int]] = None,
17 | feature_range: Tuple[Union[int, float], Union[int, float]] = (0, 1)):
18 | super().__init__()
19 | self.random_state = random_state
20 | self.feature_range = feature_range
21 |
22 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
23 |
24 | self.check_requirements(X, y)
25 |
26 | self.preprocessor['numerical'] = SklearnMinMaxScaler(feature_range=self.feature_range, copy=False)
27 | return self
28 |
29 | @staticmethod
30 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
31 | ) -> Dict[str, Union[str, bool]]:
32 | return {
33 | 'shortname': 'MinMaxScaler',
34 | 'name': 'MinMaxScaler',
35 | 'handles_sparse': True
36 | }
37 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/training/base_training.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
6 |
7 |
8 | class autoPyTorchTrainingComponent(autoPyTorchComponent):
9 | """Provide an abstract interface for training nodes
10 | in Auto-Pytorch"""
11 |
12 | def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
13 | super(autoPyTorchTrainingComponent, self).__init__(random_state=random_state)
14 |
15 | def transform(self, X: Dict) -> Dict:
16 | """The transform function calls the transform function of the
17 | underlying model and returns the transformed array.
18 |
19 | Args:
20 | X (Dict): input features
21 |
22 | Returns:
23 | Dict: Transformed features
24 | """
25 | raise NotImplementedError()
26 |
27 | def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
28 | """
29 | A mechanism in code to ensure the correctness of the fit dictionary
30 | It recursively makes sure that the children and parent level requirements
31 | are honored before fit.
32 |
33 | Args:
34 | X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
35 | mechanism, in which during a transform, a components adds relevant information
36 | so that further stages can be properly fitted
37 | """
38 | pass
39 |
--------------------------------------------------------------------------------
/autoPyTorch/utils/parallel.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import sys
3 |
4 |
5 | def preload_modules(context: multiprocessing.context.BaseContext) -> None:
6 | """
7 | This function is meant to be used with the forkserver multiprocessing context.
8 | More details about it can be found here:
9 | https://docs.python.org/3/library/multiprocessing.html
10 |
11 | Forkserver is known to be slower than other contexts. We use it, because it helps
12 | reduce the probability of a deadlock. To make it fast, we pre-load modules so that
13 | forked children have the desired modules available.
14 |
15 | We do not inherit dead-lock problematic modules like logging.
16 |
17 | Args:
18 | context (multiprocessing.context.BaseContext): One of the three supported multiprocessing
19 | contexts being fork, forkserver or spawn.
20 | """
21 | all_loaded_modules = sys.modules.keys()
22 | preload = [
23 | loaded_module for loaded_module in all_loaded_modules
24 | if loaded_module.split('.')[0] in (
25 | 'smac',
26 | 'autoPyTorch',
27 | 'numpy',
28 | 'scipy',
29 | 'pandas',
30 | 'pynisher',
31 | 'sklearn',
32 | 'ConfigSpace',
33 | 'torch',
34 | 'torchvision',
35 | 'tensorboard',
36 | 'imgaug',
37 | 'catboost',
38 | 'lightgbm',
39 | ) and 'logging' not in loaded_module
40 | ]
41 | context.set_forkserver_preload(preload)
42 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/PowerTransformer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from sklearn.preprocessing import PowerTransformer as SklearnPowerTransformer
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
9 |
10 |
11 | class PowerTransformer(BaseScaler):
12 | """
13 | Map data to as close to a Gaussian distribution as possible
14 | in order to reduce variance and minimize skewness.
15 |
16 | Uses `yeo-johnson` power transform method. Also, data is normalised
17 | to zero mean and unit variance.
18 | """
19 | def __init__(self,
20 | random_state: Optional[np.random.RandomState] = None):
21 | super().__init__()
22 | self.random_state = random_state
23 |
24 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
25 |
26 | self.check_requirements(X, y)
27 |
28 | self.preprocessor['numerical'] = SklearnPowerTransformer(method='yeo-johnson', copy=False)
29 | return self
30 |
31 | @staticmethod
32 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
33 | ) -> Dict[str, Union[str, bool]]:
34 | return {
35 | 'shortname': 'PowerTransformer',
36 | 'name': 'PowerTransformer',
37 | 'handles_sparse': False
38 | }
39 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OneHotEncoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from sklearn.preprocessing import OneHotEncoder as OHE
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
9 |
10 |
11 | class OneHotEncoder(BaseEncoder):
12 | """
13 | Encode categorical features as a one-hot numerical array
14 | """
15 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
16 | super().__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder:
20 |
21 | self.check_requirements(X, y)
22 |
23 | self.preprocessor['categorical'] = OHE(
24 | # It is safer to have the OHE produce a 0 array than to crash a good configuration
25 | categories=X['dataset_properties']['categories']
26 | if len(X['dataset_properties']['categories']) > 0 else 'auto',
27 | sparse=False,
28 | handle_unknown='ignore')
29 | return self
30 |
31 | @staticmethod
32 | def get_properties(
33 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
34 | ) -> Dict[str, Union[str, bool]]:
35 | return {
36 | 'shortname': 'OneHotEncoder',
37 | 'name': 'One Hot Encoder',
38 | 'handles_sparse': False
39 | }
40 |
--------------------------------------------------------------------------------
/.github/workflows/dist.yml:
--------------------------------------------------------------------------------
1 | name: dist-check
2 |
3 | on:
4 | # Manually triggerable in github
5 | workflow_dispatch:
6 |
7 | # When a push occurs on either of these branches
8 | push:
9 | branches:
10 | - master
11 | - development
12 |
13 | # When a push occurs on a PR that targets these branches
14 | pull_request:
15 | branches:
16 | - master
17 | - development
18 |
19 | schedule:
20 | # Every day at 7AM UTC
21 | - cron: '0 07 * * *'
22 |
23 | jobs:
24 |
25 | dist:
26 | runs-on: ubuntu-latest
27 |
28 | steps:
29 | - name: Checkout
30 | uses: actions/checkout@v2
31 | with:
32 | submodules: recursive
33 | - name: Setup Python
34 | uses: actions/setup-python@v2
35 | with:
36 | python-version: 3.8
37 |
38 | - name: Build dist
39 | run: |
40 | python setup.py sdist
41 |
42 | - name: Twine check
43 | run: |
44 | pip install twine
45 | last_dist=$(ls -t dist/autoPyTorch-*.tar.gz | head -n 1)
46 | twine check "$last_dist" --strict
47 |
48 | - name: Install dist
49 | run: |
50 | last_dist=$(ls -t dist/autoPyTorch-*.tar.gz | head -n 1)
51 | pip install $last_dist
52 |
53 | - name: PEP 561 Compliance
54 | run: |
55 | pip install mypy
56 |
57 | cd .. # required to use the installed version of autoPyTorch
58 |
59 | # Note this doesn't perform mypy checks, those are handled in pre-commit.yaml
60 | # This only checks if autoPyTorch exports type information
61 | if ! mypy -c "import autoPyTorch"; then exit 1; fi
62 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import OrderedDict
3 | from typing import Dict
4 |
5 | from autoPyTorch.pipeline.components.base_component import (
6 | ThirdPartyComponents, autoPyTorchComponent, find_components)
7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import \
8 | EncoderChoice
9 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \
10 | TimeSeriesBaseEncoder
11 |
12 | encoding_directory = os.path.split(__file__)[0]
13 | _encoders = find_components(__package__,
14 | encoding_directory,
15 | TimeSeriesBaseEncoder)
16 | _addons = ThirdPartyComponents(TimeSeriesBaseEncoder)
17 |
18 |
19 | def add_encoder(encoder: TimeSeriesBaseEncoder) -> None:
20 | _addons.add_component(encoder)
21 |
22 |
23 | class TimeSeriesEncoderChoice(EncoderChoice):
24 | """
25 | Allows for dynamically choosing encoding component at runtime
26 | """
27 |
28 | def get_components(self) -> Dict[str, autoPyTorchComponent]:
29 | """Returns the available encoder components
30 |
31 | Args:
32 | None
33 |
34 | Returns:
35 | Dict[str, autoPyTorchComponent]: all BaseEncoder components available
36 | as choices for encoding the categorical columns
37 | """
38 | components = OrderedDict()
39 | components.update(_encoders)
40 | components.update(_addons.components)
41 | return components
42 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | NOTE: ISSUES ARE NOT FOR CODE HELP - Ask for Help at https://stackoverflow.com
2 |
3 | Your issue may already be reported!
4 | Also, please search on the [issue tracker](../) before creating one.
5 |
6 | * **I'm submitting a ...**
7 | - [ ] bug report
8 | - [ ] feature request
9 | - [ ] support request => Please do not submit support request here, see note at the top of this template.
10 |
11 | # Issue Description
12 | * When Issue Happens
13 | * Steps To Reproduce
14 | 1.
15 | 1.
16 | 1.
17 |
18 | ## Expected Behavior
19 |
20 |
21 |
22 | ## Current Behavior
23 |
24 |
25 |
26 | ## Possible Solution
27 |
28 |
29 |
30 | ## Your Code
31 |
32 | ```
33 | If relevant, paste all of your challenge code here
34 | ```
35 |
36 | ## Error message
37 |
38 | ```
39 | If relevant, paste all of your error messages here
40 | ```
41 |
42 | ## Your Local environment
43 | * Operating System, version
44 | * Python, version
45 | * Outputs of `pip freeze` or `conda list`
46 |
47 | Make sure to add **all the information needed to understand the bug** so that someone can help.
48 | If the info is missing, we'll add the 'Needs more information' label and close the issue until there is enough information.
--------------------------------------------------------------------------------
/test/test_evaluation/test_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests the functionality in autoPyTorch.evaluation.utils
3 | """
4 | import pytest
5 |
6 | from autoPyTorch.evaluation.utils import DisableFileOutputParameters
7 |
8 |
9 | @pytest.mark.parametrize('disable_file_output',
10 | [['pipeline', 'pipelines'],
11 | [DisableFileOutputParameters.pipelines, DisableFileOutputParameters.pipeline]])
12 | def test_disable_file_output_no_error(disable_file_output):
13 | """
14 | Checks that `DisableFileOutputParameters.check_compatibility`
15 | does not raise an error for the parameterized values of `disable_file_output`.
16 |
17 | Args:
18 | disable_file_output ([List[Union[str, DisableFileOutputParameters]]]):
19 | Options that should be compatible with the `DisableFileOutputParameters`
20 | defined in `autoPyTorch`.
21 | """
22 | DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
23 |
24 |
25 | def test_disable_file_output_error():
26 | """
27 | Checks that `DisableFileOutputParameters.check_compatibility` raises an error
28 | for a value not present in `DisableFileOutputParameters` and ensures that the
29 | expected error is raised.
30 | """
31 | disable_file_output = ['model']
32 | with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of"
33 | r" DisableFileOutputParameters or as string value"
34 | r" of a member."):
35 | DisableFileOutputParameters.check_compatibility(disable_file_output=disable_file_output)
36 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional
2 |
3 | import numpy as np
4 |
5 | from sklearn.utils import check_random_state
6 |
7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
8 | autoPyTorchTabularPreprocessingComponent
9 | )
10 |
11 |
12 | class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
13 | _required_properties: List[str] = [
14 | 'handles_sparse', 'handles_classification', 'handles_regression']
15 |
16 | def __init__(self, random_state: Optional[np.random.RandomState] = None):
17 | if random_state is None:
18 | # A trainer components need a random state for
19 | # sampling -- for example in MixUp training
20 | self.random_state = check_random_state(1)
21 | else:
22 | self.random_state = random_state
23 | super().__init__()
24 |
25 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
26 | """
27 | Adds the fitted feature preprocessor into the 'X' dictionary and returns it.
28 | Args:
29 | X (Dict[str, Any]): 'X' dictionary
30 |
31 | Returns:
32 | (Dict[str, Any]): the updated 'X' dictionary
33 | """
34 | if self.preprocessor['numerical'] is None:
35 | raise AttributeError("{} can't tranform without fitting first"
36 | .format(self.__class__.__name__))
37 | X.update({'feature_preprocessor': self.preprocessor})
38 | return X
39 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/training/test_feature_data_loader.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import unittest.mock
3 |
4 | import torchvision
5 |
6 | from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import (
7 | FeatureDataLoader
8 | )
9 |
10 |
11 | class TestFeatureDataLoader(unittest.TestCase):
12 | def test_build_transform_small_preprocess_true(self):
13 | """
14 | Makes sure a proper composition is created
15 | """
16 | loader = FeatureDataLoader()
17 |
18 | fit_dictionary = {'dataset_properties': {'is_small_preprocess': True}}
19 | for thing in ['imputer', 'scaler', 'encoder']:
20 | fit_dictionary[thing] = [unittest.mock.Mock()]
21 |
22 | compose = loader.build_transform(fit_dictionary, mode='train')
23 |
24 | self.assertIsInstance(compose, torchvision.transforms.Compose)
25 |
26 | # No preprocessing needed here as it was done before
27 | self.assertEqual(len(compose.transforms), 1)
28 |
29 | def test_build_transform_small_preprocess_false(self):
30 | """
31 | Makes sure a proper composition is created
32 | """
33 | loader = FeatureDataLoader()
34 |
35 | fit_dictionary = {'dataset_properties': {'is_small_preprocess': False},
36 | 'preprocess_transforms': [unittest.mock.Mock()]}
37 |
38 | compose = loader.build_transform(fit_dictionary, mode='train')
39 |
40 | self.assertIsInstance(compose, torchvision.transforms.Compose)
41 |
42 | # We expect the to tensor, the preproces transforms and the check_array
43 | self.assertEqual(len(compose.transforms), 4)
44 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/time_series_base_encoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Union
2 |
3 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import \
4 | BaseEncoder
5 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.base_time_series_preprocessing import \
6 | autoPyTorchTimeSeriesPreprocessingComponent
7 | from autoPyTorch.utils.common import FitRequirement
8 |
9 |
10 | class TimeSeriesBaseEncoder(autoPyTorchTimeSeriesPreprocessingComponent):
11 | """
12 | Base class for encoder
13 | """
14 | def __init__(self) -> None:
15 | super(TimeSeriesBaseEncoder, self).__init__()
16 | self.add_fit_requirements([
17 | FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
18 | FitRequirement('categories', (List,), user_defined=True, dataset_property=True),
19 | FitRequirement('feature_names', (tuple,), user_defined=True, dataset_property=True),
20 | FitRequirement('feature_shapes', (Dict, ), user_defined=True, dataset_property=True),
21 | ])
22 | self.feature_shapes: Union[Dict[str, int]] = {}
23 |
24 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
25 | """
26 | Adds the self into the 'X' dictionary and returns it.
27 |
28 | Args:
29 | X (Dict[str, Any]): 'X' dictionary
30 |
31 | Returns:
32 | (Dict[str, Any]): the updated 'X' dictionary
33 | """
34 | X['dataset_properties'].update({'feature_shapes': self.feature_shapes})
35 | return BaseEncoder.transform(self, X)
36 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/StandardScaler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from sklearn.preprocessing import StandardScaler as SklearnStandardScaler
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
9 | from autoPyTorch.utils.common import FitRequirement
10 |
11 |
12 | class StandardScaler(BaseScaler):
13 | """
14 | Standardise numerical columns/features by removing mean and scaling to unit/variance
15 | """
16 | def __init__(self,
17 | random_state: Optional[Union[np.random.RandomState, int]] = None
18 | ):
19 | super().__init__()
20 | self.random_state = random_state
21 | self.add_fit_requirements([
22 | FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)
23 | ])
24 |
25 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
26 |
27 | self.check_requirements(X, y)
28 |
29 | with_mean, with_std = (False, False) if X['dataset_properties']['issparse'] else (True, True)
30 | self.preprocessor['numerical'] = SklearnStandardScaler(with_mean=with_mean, with_std=with_std, copy=False)
31 | return self
32 |
33 | @staticmethod
34 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
35 | ) -> Dict[str, Union[str, bool]]:
36 | return {
37 | 'shortname': 'StandardScaler',
38 | 'name': 'Standard Scaler',
39 | 'handles_sparse': True
40 | }
41 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/base_tabular_preprocessing.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Optional, Union
2 |
3 | from sklearn.base import BaseEstimator
4 |
5 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent
6 |
7 |
8 | class autoPyTorchTabularPreprocessingComponent(autoPyTorchPreprocessingComponent):
9 | """
10 | Provides abstract interface for preprocessing algorithms in AutoPyTorch.
11 | """
12 | _required_properties: List[str] = ['handles_sparse']
13 |
14 | def __init__(self) -> None:
15 | super().__init__()
16 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
17 | numerical=None, categorical=None)
18 |
19 | def get_preprocessor_dict(self) -> Dict[str, BaseEstimator]:
20 | """
21 | Returns early_preprocessor dictionary containing the sklearn numerical
22 | and categorical early_preprocessor with "numerical" and "categorical"
23 | keys. May contain None for a key if early_preprocessor does not
24 | handle the datatype defined by key
25 |
26 | Returns:
27 | Dict[str, BaseEstimator]: early_preprocessor dictionary
28 | """
29 | if (self.preprocessor['numerical'] and self.preprocessor['categorical']) is None:
30 | raise AttributeError("{} can't return early_preprocessor dict without fitting first"
31 | .format(self.__class__.__name__))
32 | return self.preprocessor
33 |
34 | def __str__(self) -> str:
35 | """ Allow a nice understanding of what components where used """
36 | string = self.__class__.__name__
37 | return string
38 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/NoEncoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.NoEncoder import \
7 | NoEncoder
8 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \
9 | TimeSeriesBaseEncoder
10 |
11 |
12 | class TimeSeriesNoEncoder(TimeSeriesBaseEncoder):
13 | def __init__(self,
14 | random_state: Optional[Union[np.random.RandomState, int]] = None
15 | ):
16 | super().__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Any = None) -> "TimeSeriesBaseEncoder":
20 | NoEncoder.fit(self, X, y)
21 | self.feature_shapes = X['dataset_properties']['feature_shapes']
22 | return self
23 |
24 | @staticmethod
25 | def get_properties(
26 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
27 | ) -> Dict[str, Union[str, bool]]:
28 | return {
29 | 'shortname': 'TimeSeriesNoEncoder',
30 | 'name': 'Time Series No Encoder',
31 | 'handles_sparse': True
32 | }
33 |
34 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
35 | """
36 | Adds the self into the 'X' dictionary and returns it.
37 |
38 | Args:
39 | X (Dict[str, Any]): 'X' dictionary
40 |
41 | Returns:
42 | (Dict[str, Any]): the updated 'X' dictionary
43 | """
44 | return NoEncoder.transform(self, X)
45 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional
2 |
3 | import torch
4 | from torch.optim import Optimizer
5 |
6 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
7 | from autoPyTorch.utils.common import FitRequirement
8 |
9 |
10 | class BaseOptimizerComponent(autoPyTorchSetupComponent):
11 | """Provide an abstract interface for Pytorch Optimizers
12 | in Auto-Pytorch"""
13 |
14 | def __init__(self) -> None:
15 | super().__init__()
16 | self.optimizer: Optional[Optimizer] = None
17 | self.add_fit_requirements([
18 | FitRequirement('network', (torch.nn.Module,), user_defined=False, dataset_property=False)])
19 |
20 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
21 | """The transform function calls the transform function of the
22 | underlying model and returns the transformed array.
23 |
24 | Args:
25 | X (np.ndarray): input features
26 |
27 | Returns:
28 | np.ndarray: Transformed features
29 | """
30 | X.update({'optimizer': self.optimizer})
31 | return X
32 |
33 | def get_optimizer(self) -> Optimizer:
34 | """Return the underlying Optimizer object.
35 | Returns:
36 | model : the underlying Optimizer object
37 | """
38 | assert self.optimizer is not None, "No optimizer was fitted"
39 | return self.optimizer
40 |
41 | def __str__(self) -> str:
42 | """ Allow a nice understanding of what components where used """
43 | string = self.optimizer.__class__.__name__
44 | info = vars(self)
45 | string += " (" + str(info) + ")"
46 | return string
47 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/traditional_ml/traditional_learner/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Type, Union
2 |
3 | from autoPyTorch.pipeline.components.base_component import (
4 | ThirdPartyComponents,
5 | )
6 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.base_traditional_learner import \
7 | BaseTraditionalLearner
8 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner.learners import (
9 | CatboostModel,
10 | ExtraTreesModel,
11 | KNNModel,
12 | LGBModel,
13 | RFModel,
14 | SVMModel)
15 |
16 | _traditional_learners = {
17 | # Sort by more robust models
18 | # Depending on the allocated time budget, only the
19 | # top models from this dict are two be fitted.
20 | # LGBM is the more robust model, with
21 | # internal measures to prevent crashes, overfit
22 | # Additionally, it is one of the state of the art
23 | # methods for tabular prediction.
24 | # Then follow with catboost for categorical heavy
25 | # datasets. The other models are complementary and
26 | # their ordering is not critical
27 | 'lgb': LGBModel,
28 | 'catboost': CatboostModel,
29 | 'random_forest': RFModel,
30 | 'extra_trees': ExtraTreesModel,
31 | 'svm': SVMModel,
32 | 'knn': KNNModel,
33 | }
34 | _addons = ThirdPartyComponents(BaseTraditionalLearner)
35 |
36 |
37 | def add_traditional_learner(traditional_learner: BaseTraditionalLearner) -> None:
38 | _addons.add_component(traditional_learner)
39 |
40 |
41 | def get_available_traditional_learners() -> Dict[str, Union[Type[BaseTraditionalLearner], Any]]:
42 | traditional_learners = dict()
43 | traditional_learners.update(_traditional_learners)
44 | return traditional_learners
45 |
--------------------------------------------------------------------------------
/.binder/postBuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | python -m pip install .[docs,examples]
6 |
7 | # Taken from https://github.com/scikit-learn/scikit-learn/blob/22cd233e1932457947e9994285dc7fd4e93881e4/.binder/postBuild
8 | # under BSD3 license, copyright the scikit-learn contributors
9 |
10 | # This script is called in a binder context. When this script is called, we are
11 | # inside a git checkout of the automl/Auto-PyTorch repo. This script
12 | # generates notebooks from the Auto-PyTorch python examples.
13 |
14 | if [[ ! -f /.dockerenv ]]; then
15 | echo "This script was written for repo2docker and is supposed to run inside a docker container."
16 | echo "Exiting because this script can delete data if run outside of a docker container."
17 | exit 1
18 | fi
19 |
20 | # Copy content we need from the Auto-PyTorch repo
21 | TMP_CONTENT_DIR=/tmp/Auto-PyTorch
22 | mkdir -p $TMP_CONTENT_DIR
23 | cp -r examples .binder $TMP_CONTENT_DIR
24 | # delete everything in current directory including dot files and dot folders
25 | find . -delete
26 |
27 | # Generate notebooks and remove other files from examples folder
28 | GENERATED_NOTEBOOKS_DIR=examples
29 | cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR
30 |
31 | find $GENERATED_NOTEBOOKS_DIR -name 'example_*.py' -exec sphx_glr_python_to_jupyter.py '{}' +
32 | # Keep __init__.py and custom_metrics.py
33 | NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb' | grep -v 'init' | grep -v 'custom_metrics')
34 | rm -f $NON_NOTEBOOKS
35 |
36 | # Modify path to be consistent by the path given by sphinx-gallery
37 | mkdir notebooks
38 | mv $GENERATED_NOTEBOOKS_DIR notebooks/
39 |
40 | # Put the .binder folder back (may be useful for debugging purposes)
41 | mv $TMP_CONTENT_DIR/.binder .
42 | # Final clean up
43 | rm -rf $TMP_CONTENT_DIR
44 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/variance_thresholding/VarianceThreshold.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from sklearn.feature_selection import VarianceThreshold as SklearnVarianceThreshold
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import \
9 | autoPyTorchTabularPreprocessingComponent
10 |
11 |
12 | class VarianceThreshold(autoPyTorchTabularPreprocessingComponent):
13 | """
14 | Removes features that have the same value in the training data.
15 | """
16 | def __init__(self, random_state: Optional[np.random.RandomState] = None):
17 | super().__init__()
18 |
19 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> 'VarianceThreshold':
20 |
21 | self.check_requirements(X, y)
22 |
23 | self.preprocessor['numerical'] = SklearnVarianceThreshold(
24 | threshold=0.0
25 | )
26 | return self
27 |
28 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
29 | if self.preprocessor['numerical'] is None:
30 | raise ValueError("cannot call transform on {} without fitting first."
31 | .format(self.__class__.__name__))
32 | X.update({'variance_threshold': self.preprocessor})
33 | return X
34 |
35 | @staticmethod
36 | def get_properties(
37 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
38 | ) -> Dict[str, Union[str, bool]]:
39 |
40 | return {
41 | 'shortname': 'Variance Threshold',
42 | 'name': 'Variance Threshold (constant feature removal)',
43 | 'handles_sparse': True,
44 | }
45 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/base_time_series_preprocessing.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Union
2 |
3 | from sklearn.base import BaseEstimator
4 |
5 | from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import (
6 | autoPyTorchPreprocessingComponent, autoPyTorchTargetPreprocessingComponent)
7 |
8 |
9 | class autoPyTorchTimeSeriesPreprocessingComponent(autoPyTorchPreprocessingComponent):
10 | """
11 | Provides abstract interface for time series preprocessing algorithms in AutoPyTorch.
12 | """
13 |
14 | def __init__(self) -> None:
15 | super().__init__()
16 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
17 | numerical=None, categorical=None)
18 |
19 | def __str__(self) -> str:
20 | """ Allow a nice understanding of what components where used """
21 | string = self.__class__.__name__
22 | return string
23 |
24 |
25 | class autoPyTorchTimeSeriesTargetPreprocessingComponent(autoPyTorchTargetPreprocessingComponent):
26 | """
27 | Provides abstract interface for time series target preprocessing algorithms in AutoPyTorch.
28 | Currently only numerical target preprocessing is supported.
29 | # TODO add support for categorical targets!
30 | # TODO define inverse transformation for each inversible numerical transformation (log, deseasonalization, etc. )
31 | """
32 | def __init__(self) -> None:
33 | super().__init__()
34 | self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
35 | numerical=None, categorical=None)
36 |
37 | def __str__(self) -> str:
38 | """ Allow a nice understanding of what components where used """
39 | string = self.__class__.__name__
40 | return string
41 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.base_image_preprocessor import \
6 | autoPyTorchImagePreprocessingComponent
7 | from autoPyTorch.utils.common import FitRequirement
8 |
9 |
10 | class BaseNormalizer(autoPyTorchImagePreprocessingComponent):
11 |
12 | def __init__(self) -> None:
13 | super(BaseNormalizer, self).__init__()
14 | self.add_fit_requirements([
15 | FitRequirement('mean', (np.ndarray,), user_defined=True, dataset_property=True),
16 | FitRequirement('std', (np.ndarray,), user_defined=True, dataset_property=True)])
17 |
18 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
19 |
20 | X.update({'normalise': self})
21 | return X
22 |
23 | def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
24 | """
25 | A mechanism in code to ensure the correctness of the fit dictionary
26 | It recursively makes sure that the children and parent level requirements
27 | are honored before fit.
28 |
29 | Args:
30 | X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
31 | mechanism, in which during a transform, a components adds relevant information
32 | so that further stages can be properly fitted
33 | """
34 | super().check_requirements(X, y)
35 |
36 | if 0 in X['dataset_properties']['std']:
37 | raise ZeroDivisionError("Can't normalise when std is zero")
38 |
39 | def __str__(self) -> str:
40 | """ Allow a nice understanding of what components where used """
41 | string = self.__class__.__name__
42 | return string
43 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
7 |
8 |
9 | class NoEncoder(BaseEncoder):
10 | """
11 | Don't perform encoding on categorical features
12 | """
13 | def __init__(self,
14 | random_state: Optional[Union[np.random.RandomState, int]] = None
15 | ):
16 | super().__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder:
20 | """
21 | The fit function calls the fit function of the underlying model
22 | and returns the transformed array.
23 | Args:
24 | X (np.ndarray): input features
25 | y (Optional[np.ndarray]): input labels
26 |
27 | Returns:
28 | instance of self
29 | """
30 | self.check_requirements(X, y)
31 |
32 | return self
33 |
34 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
35 | """
36 | Adds the self into the 'X' dictionary and returns it.
37 | Args:
38 | X (Dict[str, Any]): 'X' dictionary
39 |
40 | Returns:
41 | (Dict[str, Any]): the updated 'X' dictionary
42 | """
43 | X.update({'encoder': self.preprocessor})
44 | return X
45 |
46 | @staticmethod
47 | def get_properties(
48 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
49 | ) -> Dict[str, Union[str, bool]]:
50 | return {
51 | 'shortname': 'NoEncoder',
52 | 'name': 'No Encoder',
53 | 'handles_sparse': True
54 | }
55 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_embedding/NoEmbedding.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Optional, Tuple, Union
2 |
3 | from ConfigSpace.configuration_space import ConfigurationSpace
4 |
5 | import numpy as np
6 |
7 | import torch
8 | from torch import nn
9 |
10 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
11 | from autoPyTorch.pipeline.components.setup.network_embedding.base_network_embedding import NetworkEmbeddingComponent
12 |
13 |
14 | class _NoEmbedding(nn.Module):
15 | def get_partial_models(self, subset_features: List[int]) -> "_NoEmbedding":
16 | return self
17 |
18 | def forward(self, x: torch.Tensor) -> torch.Tensor:
19 | return x
20 |
21 |
22 | class NoEmbedding(NetworkEmbeddingComponent):
23 | """
24 | Class to learn an embedding for categorical hyperparameters.
25 | """
26 |
27 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
28 | super().__init__(random_state=random_state)
29 |
30 | def build_embedding(self,
31 | num_input_features: np.ndarray,
32 | num_numerical_features: int) -> Tuple[nn.Module, Optional[List[int]]]:
33 | return _NoEmbedding(), None
34 |
35 | @staticmethod
36 | def get_hyperparameter_search_space(
37 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None,
38 | ) -> ConfigurationSpace:
39 | cs = ConfigurationSpace()
40 | return cs
41 |
42 | @staticmethod
43 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
44 | ) -> Dict[str, Union[str, bool]]:
45 | return {
46 | 'shortname': 'no embedding',
47 | 'name': 'NoEmbedding',
48 | 'handles_tabular': True,
49 | 'handles_image': False,
50 | 'handles_time_series': True,
51 | }
52 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/augmentation/image/base_image_augmenter.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional
2 |
3 | from ConfigSpace.configuration_space import ConfigurationSpace
4 |
5 | from imgaug.augmenters.meta import Augmenter
6 |
7 | import numpy as np
8 |
9 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
10 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
11 |
12 |
13 | class BaseImageAugmenter(autoPyTorchSetupComponent):
14 | def __init__(self, use_augmenter: bool = True) -> None:
15 | super().__init__()
16 | self.use_augmenter = use_augmenter
17 | self.augmenter: Optional[Augmenter] = None
18 |
19 | def get_image_augmenter(self) -> Optional[Augmenter]:
20 | """
21 | Get fitted augmenter. Can only be called if fit()
22 | has been called on the object.
23 | Returns:
24 | BaseEstimator: Fitted augmentor
25 | """
26 | if self.augmenter is None and self.use_augmenter:
27 | raise AttributeError("Can't return augmenter for {}, as augmenter is "
28 | "set to be used but it has not been fitted"
29 | " yet".format(self.__class__.__name__))
30 | return self.augmenter
31 |
32 | def __call__(self, X: np.ndarray) -> np.ndarray:
33 | if self.augmenter is None:
34 | raise ValueError("cant call {} without fitting first."
35 | .format(self.__class__.__name__))
36 | # explicitly converting to np array as currently zeropadandcrop gives a list
37 | return np.array(self.augmenter(images=X))
38 |
39 | @staticmethod
40 | def get_hyperparameter_search_space(
41 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
42 | ) -> ConfigurationSpace:
43 | cs = ConfigurationSpace()
44 | return cs
45 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
7 |
8 |
9 | class NoScaler(BaseScaler):
10 | """
11 | No scaling performed
12 | """
13 | def __init__(self,
14 | random_state: Optional[Union[np.random.RandomState, int]] = None
15 | ):
16 | super().__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
20 | """
21 | The fit function calls the fit function of the underlying model
22 | and returns the transformed array.
23 | Args:
24 | X (np.ndarray): input features
25 | y (Optional[np.ndarray]): input labels
26 |
27 | Returns:
28 | instance of self
29 | """
30 |
31 | self.check_requirements(X, y)
32 |
33 | return self
34 |
35 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
36 | """
37 | The transform function calls the transform function of the
38 | underlying model and returns the transformed array.
39 |
40 | Args:
41 | X (np.ndarray): input features
42 |
43 | Returns:
44 | np.ndarray: Transformed features
45 | """
46 | X.update({'scaler': self.preprocessor})
47 | return X
48 |
49 | @staticmethod
50 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
51 | ) -> Dict[str, Union[str, bool]]:
52 | return {
53 | 'shortname': 'NoScaler',
54 | 'name': 'No Scaler',
55 | 'handles_sparse': True
56 | }
57 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/test_normalizers.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 | from numpy.testing import assert_allclose, assert_array_equal
5 |
6 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.ImageNormalizer import ImageNormalizer
7 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.NoNormalizer import NoNormalizer
8 |
9 |
10 | class TestNormalizers(unittest.TestCase):
11 | def initialise(self):
12 | self.train = np.random.randint(0, 255, (3, 2, 2, 3))
13 | self.mean = np.array([np.mean(self.train[:, :, :, i]) for i in range(3)])
14 | self.std = np.array([np.std(self.train[:, :, :, i]) for i in range(3)])
15 |
16 | def test_image_normalizer(self):
17 | self.initialise()
18 | dataset_properties = {'mean': self.mean, 'std': self.std, }
19 | X = {'dataset_properties': dataset_properties, 'X_train': self.train}
20 |
21 | normalizer = ImageNormalizer()
22 | normalizer = normalizer.fit(X)
23 | X = normalizer.transform(X)
24 |
25 | # check if normalizer added to X is instance of self
26 | self.assertEqual(X['normalise'], normalizer)
27 | epsilon = 1e-8
28 | train = self.train - self.mean
29 | train *= 1.0 / (epsilon + self.std)
30 |
31 | assert_allclose(train, normalizer(self.train), rtol=1e-5)
32 |
33 | def test_no_normalizer(self):
34 | self.initialise()
35 |
36 | dataset_properties = {'mean': self.mean, 'std': self.std, }
37 | X = {'dataset_properties': dataset_properties, 'X_train': self.train}
38 |
39 | normalizer = NoNormalizer()
40 | normalizer = normalizer.fit(X)
41 | X = normalizer.transform(X)
42 |
43 | # check if normalizer added to X is instance of self
44 | self.assertEqual(X['normalise'], normalizer)
45 |
46 | assert_array_equal(self.train, normalizer(self.train))
47 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/time_series_preprocessing/encoding/OneHotEncoder.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.OneHotEncoder import \
7 | OneHotEncoder
8 | from autoPyTorch.pipeline.components.preprocessing.time_series_preprocessing.encoding.time_series_base_encoder import \
9 | TimeSeriesBaseEncoder
10 |
11 |
12 | class TimeSeriesOneHotEncoder(TimeSeriesBaseEncoder):
13 | def __init__(self,
14 | random_state: Optional[Union[np.random.RandomState, int]] = None
15 | ):
16 | super(TimeSeriesOneHotEncoder, self).__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Any = None) -> TimeSeriesBaseEncoder:
20 | OneHotEncoder.fit(self, X, y)
21 | categorical_columns = X['dataset_properties']['categorical_columns']
22 | n_features_cat = X['dataset_properties']['categories']
23 | feature_names = X['dataset_properties']['feature_names']
24 | feature_shapes = X['dataset_properties']['feature_shapes']
25 |
26 | if len(n_features_cat) == 0:
27 | n_features_cat = self.preprocessor['categorical'].categories # type: ignore
28 | for i, cat_column in enumerate(categorical_columns):
29 | feature_shapes[feature_names[cat_column]] = len(n_features_cat[i])
30 | self.feature_shapes = feature_shapes
31 | return self
32 |
33 | @staticmethod
34 | def get_properties(
35 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
36 | ) -> Dict[str, Union[str, bool]]:
37 | return {
38 | 'shortname': 'TimeSeriesOneHotEncoder',
39 | 'name': 'Time Series One Hot Encoder',
40 | 'handles_sparse': False
41 | }
42 |
--------------------------------------------------------------------------------
/test/test_utils/test_common.py:
--------------------------------------------------------------------------------
1 | """
2 | This tests the functionality in autoPyTorch/utils/common.
3 | """
4 | from enum import Enum
5 |
6 | import pytest
7 |
8 | from autoPyTorch.utils.common import autoPyTorchEnum
9 |
10 |
11 | class SubEnum(autoPyTorchEnum):
12 | x = "x"
13 | y = "y"
14 |
15 |
16 | class DummyEnum(Enum): # You need to move it on top
17 | x = "x"
18 |
19 |
20 | @pytest.mark.parametrize('iter',
21 | ([SubEnum.x],
22 | ["x"],
23 | {SubEnum.x: "hello"},
24 | {'x': 'hello'},
25 | SubEnum,
26 | ["x", "y"]))
27 | def test_autopytorch_enum(iter):
28 | """
29 | This test ensures that a subclass of `autoPyTorchEnum`
30 | can be used with strings.
31 |
32 | Args:
33 | iter (Iterable):
34 | iterable to check for compaitbility
35 | """
36 |
37 | e = SubEnum.x
38 |
39 | assert e in iter
40 |
41 |
42 | @pytest.mark.parametrize('iter',
43 | [[SubEnum.y],
44 | ["y"],
45 | {SubEnum.y: "hello"},
46 | {'y': 'hello'}])
47 | def test_autopytorch_enum_false(iter):
48 | """
49 | This test ensures that a subclass of `autoPyTorchEnum`
50 | can be used with strings.
51 | Args:
52 | iter (Iterable):
53 | iterable to check for compaitbility
54 | """
55 |
56 | e = SubEnum.x
57 |
58 | assert e not in iter
59 |
60 |
61 | @pytest.mark.parametrize('others', (1, 2.0, SubEnum, DummyEnum.x))
62 | def test_raise_errors_autopytorch_enum(others):
63 | """
64 | This test ensures that a subclass of `autoPyTorchEnum`
65 | raises error properly.
66 | Args:
67 | others (Any):
68 | Variable to compare with SubEnum.
69 | """
70 |
71 | with pytest.raises(RuntimeError):
72 | SubEnum.x == others
73 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/lr_scheduler/NoScheduler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | from ConfigSpace.configuration_space import ConfigurationSpace
4 |
5 | import numpy as np
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
9 | from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit
10 |
11 |
12 | class NoScheduler(BaseLRComponent):
13 | """
14 | Performs no scheduling via a LambdaLR with lambda==1.
15 |
16 | """
17 | def __init__(
18 | self,
19 | step_interval: Union[str, StepIntervalUnit] = StepIntervalUnit.epoch,
20 | random_state: Optional[np.random.RandomState] = None
21 | ):
22 |
23 | super().__init__(step_interval)
24 | self.random_state = random_state
25 |
26 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
27 | """
28 | Fits a component by using an input dictionary with pre-requisites
29 |
30 | Args:
31 | X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
32 | y (Any): not used. To comply with sklearn API
33 |
34 | Returns:
35 | A instance of self
36 | """
37 |
38 | # Make sure there is an optimizer
39 | self.check_requirements(X, y)
40 | return self
41 |
42 | @staticmethod
43 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
44 | ) -> Dict[str, Union[str, bool]]:
45 | return {
46 | 'shortname': 'NoScheduler',
47 | 'name': 'No LR Scheduling',
48 | }
49 |
50 | @staticmethod
51 | def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
52 | ) -> ConfigurationSpace:
53 | cs = ConfigurationSpace()
54 | return cs
55 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | import torch
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import (
9 | BaseNormalizer
10 | )
11 |
12 |
13 | class NoNormalizer(BaseNormalizer):
14 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None
15 | ):
16 | super().__init__()
17 | self.random_state = random_state
18 |
19 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "NoNormalizer":
20 | """
21 | Initialises early_preprocessor and returns self.
22 | Args:
23 | X (Dict[str, Any]): 'X' dictionary
24 |
25 | Returns:
26 | autoPyTorchImagePreprocessingComponent: self
27 | """
28 | self.check_requirements(X, y)
29 |
30 | return self
31 |
32 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
33 |
34 | X.update({'normalise': self})
35 | return X
36 |
37 | def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
38 | """
39 | Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
40 | calls the transform function of the underlying early_preprocessor and
41 | returns the transformed array.
42 | Args:
43 | X (Union[np.ndarray, torch.Tensor]): input data tensor
44 |
45 | Returns:
46 | Union[np.ndarray, torch.Tensor]: Transformed data tensor
47 | """
48 | return X
49 |
50 | @staticmethod
51 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
52 | ) -> Dict[str, Any]:
53 | return {
54 | 'shortname': 'no-normalize',
55 | 'name': 'No Normalizer Node',
56 | }
57 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/test_variance_thresholding.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.testing import assert_array_equal
3 |
4 |
5 | from sklearn.base import BaseEstimator
6 | from sklearn.compose import make_column_transformer
7 |
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.variance_thresholding. \
9 | VarianceThreshold import VarianceThreshold
10 |
11 |
12 | def test_variance_threshold():
13 | data = np.array([[1, 2, 1],
14 | [7, 8, 9],
15 | [4, 5, 1],
16 | [11, 12, 1],
17 | [17, 18, 19],
18 | [14, 15, 16]])
19 | numerical_columns = [0, 1, 2]
20 | train_indices = np.array([0, 2, 3])
21 | test_indices = np.array([1, 4, 5])
22 | dataset_properties = {
23 | 'categorical_columns': [],
24 | 'numerical_columns': numerical_columns,
25 | }
26 | X = {
27 | 'X_train': data[train_indices],
28 | 'dataset_properties': dataset_properties
29 | }
30 | component = VarianceThreshold()
31 |
32 | component = component.fit(X)
33 | X = component.transform(X)
34 | variance_threshold = X['variance_threshold']['numerical']
35 |
36 | # check if the fit dictionary X is modified as expected
37 | assert isinstance(X['variance_threshold'], dict)
38 | assert isinstance(variance_threshold, BaseEstimator)
39 |
40 | # make column transformer with returned encoder to fit on data
41 | column_transformer = make_column_transformer((variance_threshold,
42 | X['dataset_properties']['numerical_columns']),
43 | remainder='passthrough')
44 | column_transformer = column_transformer.fit(X['X_train'])
45 | transformed = column_transformer.transform(data[test_indices])
46 |
47 | assert_array_equal(transformed, np.array([[7, 8],
48 | [17, 18],
49 | [14, 15]]))
50 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/test_normalizer_choice.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import unittest
3 |
4 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise import (
5 | NormalizerChoice
6 | )
7 |
8 |
9 | class TestNormalizerChoice(unittest.TestCase):
10 |
11 | def test_get_set_config_space(self):
12 | """Make sure that we can setup a valid choice in the encoder
13 | choice"""
14 | dataset_properties = {}
15 | normalizer_choice = NormalizerChoice(dataset_properties)
16 | cs = normalizer_choice.get_hyperparameter_search_space()
17 |
18 | # Make sure that all hyperparameters are part of the search space
19 | self.assertListEqual(
20 | sorted(cs.get_hyperparameter('__choice__').choices),
21 | sorted(list(normalizer_choice.get_components().keys()))
22 | )
23 |
24 | # Make sure we can properly set some random configs
25 | # Whereas just one iteration will make sure the algorithm works,
26 | # doing five iterations increase the confidence. We will be able to
27 | # catch component specific crashes
28 | for i in range(5):
29 | config = cs.sample_configuration()
30 | config_dict = copy.deepcopy(config.get_dictionary())
31 | normalizer_choice.set_hyperparameters(config)
32 |
33 | self.assertEqual(normalizer_choice.choice.__class__,
34 | normalizer_choice.get_components()[config_dict['__choice__']])
35 |
36 | # Then check the choice configuration
37 | selected_choice = config_dict.pop('__choice__', None)
38 | for key, value in config_dict.items():
39 | # Remove the selected_choice string from the parameter
40 | # so we can query in the object for it
41 | key = key.replace(selected_choice + ':', '')
42 | self.assertIn(key, vars(normalizer_choice.choice))
43 | self.assertEqual(value, normalizer_choice.choice.__dict__[key])
44 |
45 |
46 | if __name__ == '__main__':
47 | unittest.main()
48 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | import torch
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
9 |
10 |
11 | class ImageNormalizer(BaseNormalizer):
12 |
13 | def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None
14 | ):
15 | super().__init__()
16 | self.random_state = random_state
17 | self.mean: Optional[np.ndarray] = None
18 | self.std: Optional[np.ndarray] = None
19 |
20 | def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "ImageNormalizer":
21 | """
22 | Initialises early_preprocessor and returns self.
23 | Args:
24 | X (Dict[str, Any]): 'X' dictionary
25 |
26 | Returns:
27 | autoPyTorchImagePreprocessingComponent: self
28 | """
29 | self.check_requirements(X, y)
30 | self.mean = X['dataset_properties']['mean']
31 | self.std = X['dataset_properties']['std']
32 | return self
33 |
34 | def __call__(self, X: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]:
35 | """
36 | Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
37 | calls the transform function of the underlying early_preprocessor and
38 | returns the transformed array.
39 | Args:
40 | X (Union[np.ndarray, torch.Tensor]): input data tensor
41 |
42 | Returns:
43 | Union[np.ndarray, torch.Tensor]: Transformed data tensor
44 | """
45 | X = (X - self.mean) / self.std
46 | return X
47 |
48 | @staticmethod
49 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
50 | ) -> Dict[str, Any]:
51 | return {
52 | 'shortname': 'normalize',
53 | 'name': 'Image Normalizer Node',
54 | }
55 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/MinorityCoalescer.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | from ConfigSpace.configuration_space import ConfigurationSpace
4 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter
5 |
6 | import numpy as np
7 |
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer.base_coalescer import BaseCoalescer
9 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter
10 | from autoPyTorch.utils.implementations import MinorityCoalesceTransformer
11 |
12 |
13 | class MinorityCoalescer(BaseCoalescer):
14 | """Group together categories whose occurence is less than a specified min_frac """
15 | def __init__(self, min_frac: float, random_state: np.random.RandomState):
16 | super().__init__()
17 | self.min_frac = min_frac
18 | self.random_state = random_state
19 |
20 | def fit(self, X: Dict[str, Any], y: Any = None) -> BaseCoalescer:
21 | self.check_requirements(X, y)
22 | self.preprocessor['categorical'] = MinorityCoalesceTransformer(min_frac=self.min_frac)
23 | return self
24 |
25 | @staticmethod
26 | def get_hyperparameter_search_space(
27 | dataset_properties: Optional[Dict[str, Any]] = None,
28 | min_frac: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter='min_frac',
29 | value_range=(1e-4, 0.5),
30 | default_value=1e-2,
31 | ),
32 | ) -> ConfigurationSpace:
33 |
34 | cs = ConfigurationSpace()
35 | add_hyperparameter(cs, min_frac, UniformFloatHyperparameter)
36 | return cs
37 |
38 | @staticmethod
39 | def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
40 | return {
41 | 'shortname': 'MinorityCoalescer',
42 | 'name': 'MinorityCoalescer',
43 | 'handles_sparse': False
44 | }
45 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | :orphan:
2 |
3 | .. _api:
4 |
5 | APIs
6 | ****
7 |
8 | ============
9 | Main modules
10 | ============
11 |
12 | ~~~~~~~~~~~~~~
13 | Classification
14 | ~~~~~~~~~~~~~~
15 |
16 | .. autoclass:: autoPyTorch.api.tabular_classification.TabularClassificationTask
17 | :members:
18 | :inherited-members: search, refit, predict, score
19 |
20 | ~~~~~~~~~~~~~~
21 | Regression
22 | ~~~~~~~~~~~~~~
23 |
24 | .. autoclass:: autoPyTorch.api.tabular_regression.TabularRegressionTask
25 | :members:
26 | :inherited-members: search, refit, predict, score
27 |
28 | ~~~~~~~~~~~~~~
29 | Time Series Forecasting
30 | ~~~~~~~~~~~~~~
31 |
32 | .. autoclass:: autoPyTorch.api.time_series_forecasting.TimeSeriesForecastingTask
33 | :members:
34 | :inherited-members: search, refit, predict, score
35 |
36 |
37 |
38 | =========
39 | Pipelines
40 | =========
41 |
42 | ~~~~~~~~~~~~~~~~~~~~~~
43 | Tabular Classification
44 | ~~~~~~~~~~~~~~~~~~~~~~
45 |
46 | .. autoclass:: autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline
47 | :members:
48 |
49 | .. autoclass:: autoPyTorch.pipeline.traditional_tabular_classification.TraditionalTabularClassificationPipeline
50 | :members:
51 |
52 | ~~~~~~~~~~~~~~~~~~
53 | Tabular Regression
54 | ~~~~~~~~~~~~~~~~~~
55 |
56 | .. autoclass:: autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline
57 | :members:
58 |
59 | .. autoclass:: autoPyTorch.pipeline.traditional_tabular_regression.TraditionalTabularRegressionPipeline
60 | :members:
61 |
62 | ~~~~~~~~~~~~~~~~~~
63 | Time Series Forecasting
64 | ~~~~~~~~~~~~~~~~~~
65 |
66 | .. autoclass:: autoPyTorch.pipeline.time_series_forecasting.TimeSeriesForecastingPipeline
67 | :members:
68 |
69 |
70 | =================
71 | Steps in Pipeline
72 | =================
73 |
74 |
75 | ~~~~~~~~~~~~~~~~~~~~
76 | autoPyTorchComponent
77 | ~~~~~~~~~~~~~~~~~~~~
78 |
79 | .. autoclass:: autoPyTorch.pipeline.components.base_component.autoPyTorchComponent
80 | :members:
81 |
82 | ~~~~~~~~~~~~~~~~~
83 | autoPyTorchChoice
84 | ~~~~~~~~~~~~~~~~~
85 |
86 | .. autoclass:: autoPyTorch.pipeline.components.base_choice.autoPyTorchChoice
87 | :members:
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Types of changes
4 |
5 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
6 | - [ ] Bug fix (non-breaking change which fixes an issue)
7 | - [ ] New feature (non-breaking change which adds functionality)
8 |
9 | Note that a Pull Request should only contain one of refactoring, new features or documentation changes.
10 | Please separate these changes and send us individual PRs for each.
11 | For more information on how to create a good pull request, please refer to [The anatomy of a perfect pull request](https://medium.com/@hugooodias/the-anatomy-of-a-perfect-pull-request-567382bb6067).
12 |
13 | ## Checklist:
14 |
15 |
16 | - [ ] My code follows the code style of this project.
17 | - [ ] My change requires a change to the documentation.
18 | - [ ] I have updated the documentation accordingly.
19 | * [ ] Have you checked to ensure there aren't other open [Pull Requests](../../../pulls) for the same update/change?
20 | * [ ] Have you added an explanation of what your changes do and why you'd like us to include them?
21 | * [ ] Have you written new tests for your core changes, as applicable?
22 | * [ ] Have you successfully ran tests with your changes locally?
23 |
26 |
27 |
28 | ## Description
29 |
30 |
31 | ## Motivation and Context
32 |
33 |
34 |
35 | ## How has this been tested?
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/examples/20_basics/example_image_classification.py:
--------------------------------------------------------------------------------
1 | """
2 | ======================
3 | Image Classification
4 | ======================
5 | """
6 | import numpy as np
7 |
8 | import sklearn.model_selection
9 |
10 | import torchvision.datasets
11 |
12 | from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
13 |
14 | # Get the training data for tabular classification
15 | trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)
16 | data = trainset.data.numpy()
17 | data = np.expand_dims(data, axis=3)
18 | # Create a proof of concept pipeline!
19 | dataset_properties = dict()
20 | pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
21 |
22 | # Train and test split
23 | train_indices, val_indices = sklearn.model_selection.train_test_split(
24 | list(range(data.shape[0])),
25 | random_state=1,
26 | test_size=0.25,
27 | )
28 |
29 | # Configuration space
30 | pipeline_cs = pipeline.get_hyperparameter_search_space()
31 | print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
32 | config = pipeline_cs.sample_configuration()
33 | print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
34 | pipeline.set_hyperparameters(config)
35 |
36 | # Fit the pipeline
37 | print("Fitting the pipeline...")
38 |
39 | pipeline.fit(X=dict(X_train=data,
40 | is_small_preprocess=True,
41 | dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),
42 | std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),
43 | num_classes=10,
44 | num_features=data.shape[1] * data.shape[2],
45 | image_height=data.shape[1],
46 | image_width=data.shape[2],
47 | is_small_preprocess=True),
48 | train_indices=train_indices,
49 | val_indices=val_indices,
50 | )
51 | )
52 |
53 | # Showcase some components of the pipeline
54 | print(pipeline)
55 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/network_backbone/forecasting_backbone/forecasting_encoder/flat_encoder/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import OrderedDict
3 | from typing import Dict, Optional, Type, Union
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.base_component import (
7 | ThirdPartyComponents,
8 | autoPyTorchComponent,
9 | find_components
10 | )
11 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_encoder import \
12 | AbstractForecastingEncoderChoice
13 | from autoPyTorch.pipeline.components.setup.network_backbone.forecasting_backbone.forecasting_encoder.\
14 | base_forecasting_encoder import BaseForecastingEncoder
15 |
16 | directory = os.path.split(__file__)[0]
17 | _encoders = find_components(__package__,
18 | directory,
19 | BaseForecastingEncoder)
20 | _addons = ThirdPartyComponents(BaseForecastingEncoder)
21 |
22 |
23 | def add_encoder(encoder: BaseForecastingEncoder) -> None:
24 | _addons.add_component(encoder)
25 |
26 |
27 | class FlatForecastingEncoderChoice(AbstractForecastingEncoderChoice):
28 | def get_components(self) -> Dict[str, Type[autoPyTorchComponent]]: # type: ignore[override]
29 | """Returns the available backbone components
30 |
31 | Args:
32 | None
33 |
34 | Returns:
35 | Dict[str, autoPyTorchComponent]: all basebackbone components available
36 | as choices for learning rate scheduling
37 | """
38 | components = OrderedDict()
39 | components.update(_encoders)
40 | components.update(_addons.components)
41 | return components
42 |
43 | @staticmethod
44 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
45 | ) -> Dict[str, Union[str, bool]]:
46 | return {
47 | 'shortname': 'FlatEncoder',
48 | 'name': 'FlatEncoder',
49 | 'handles_tabular': False,
50 | 'handles_image': False,
51 | 'handles_time_series': True,
52 | }
53 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/NoFeaturePreprocessor.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | import numpy as np
4 |
5 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\
7 | base_feature_preprocessor import autoPyTorchFeaturePreprocessingComponent
8 |
9 |
10 | class NoFeaturePreprocessor(autoPyTorchFeaturePreprocessingComponent):
11 | """
12 | Don't perform feature preprocessing on categorical features
13 | """
14 | def __init__(self,
15 | random_state: Optional[Union[np.random.RandomState, int]] = None
16 | ):
17 | super().__init__()
18 | self.random_state = random_state
19 |
20 | def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchFeaturePreprocessingComponent:
21 | """
22 | The fit function calls the fit function of the underlying model
23 | and returns the transformed array.
24 | Args:
25 | X (np.ndarray): input features
26 | y (Optional[np.ndarray]): input labels
27 |
28 | Returns:
29 | instance of self
30 | """
31 | self.check_requirements(X, y)
32 |
33 | return self
34 |
35 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
36 | """
37 | Adds the self into the 'X' dictionary and returns it.
38 | Args:
39 | X (Dict[str, Any]): 'X' dictionary
40 |
41 | Returns:
42 | (Dict[str, Any]): the updated 'X' dictionary
43 | """
44 | X.update({'feature_preprocessor': self.preprocessor})
45 | return X
46 |
47 | @staticmethod
48 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
49 | ) -> Dict[str, Union[str, bool]]:
50 | return {'shortname': 'NoFeaturePreprocessing',
51 | 'name': 'No Feature Preprocessing',
52 | 'handles_sparse': True,
53 | 'handles_classification': True,
54 | 'handles_regression': True
55 | }
56 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/base.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional, Tuple
2 |
3 | from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import \
5 | TabularColumnTransformer
6 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer import CoalescerChoice
7 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import EncoderChoice
8 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
9 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
10 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.variance_thresholding. \
11 | VarianceThreshold import VarianceThreshold
12 | from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
13 |
14 |
15 | class TabularPipeline(TabularClassificationPipeline):
16 | def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
17 | ) -> List[Tuple[str, autoPyTorchChoice]]:
18 | """
19 | Defines what steps a pipeline should follow.
20 | The step itself has choices given via autoPyTorchChoice.
21 |
22 | Returns:
23 | List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
24 | by the pipeline.
25 | """
26 | steps: List[Tuple[str, autoPyTorchChoice]] = []
27 |
28 | default_dataset_properties = {'target_type': 'tabular_classification'}
29 | if dataset_properties is not None:
30 | default_dataset_properties.update(dataset_properties)
31 |
32 | steps.extend([
33 | ("imputer", SimpleImputer()),
34 | ("variance_threshold", VarianceThreshold()),
35 | ("coalescer", CoalescerChoice(default_dataset_properties)),
36 | ("encoder", EncoderChoice(default_dataset_properties)),
37 | ("scaler", ScalerChoice(default_dataset_properties)),
38 | ("tabular_transformer", TabularColumnTransformer()),
39 | ])
40 | return steps
41 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on:
4 | # Allow to manually trigger through github API
5 | # Wont trigger the push to github pages where the documentation is located
6 | workflow_dispatch:
7 |
8 | # Triggers with push to these branches
9 | push:
10 | branches:
11 | - master
12 | - development
13 |
14 | # Triggers with push to a pr aimed at these branches
15 | pull_request:
16 | branches:
17 | - master
18 | - development
19 |
20 | jobs:
21 | build-and-deploy:
22 | runs-on: ubuntu-latest
23 |
24 | steps:
25 | - name: Checkout
26 | uses: actions/checkout@v2
27 | with:
28 | submodules: recursive
29 | - name: Setup Python
30 | uses: actions/setup-python@v2
31 | with:
32 | python-version: 3.8
33 |
34 | - name: Install dependencies
35 | run: |
36 | pip install -e .[docs,examples,forecasting]
37 |
38 | - name: Make docs
39 | run: |
40 | cd docs
41 | make html
42 |
43 | - name: Pull latest gh-pages
44 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
45 | run: |
46 | cd ..
47 | git clone https://github.com/automl/Auto-PyTorch.git --branch gh-pages --single-branch gh-pages
48 |
49 | - name: Copy new doc into gh-pages
50 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
51 | run: |
52 | branch_name=${GITHUB_REF##*/}
53 | cd ../gh-pages
54 | rm -rf $branch_name
55 | cp -r ../Auto-PyTorch/docs/build/html $branch_name
56 |
57 | - name: Push to gh-pages
58 | if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
59 | run: |
60 | last_commit=$(git log --pretty=format:"%an: %s")
61 | cd ../gh-pages
62 | branch_name=${GITHUB_REF##*/}
63 | git add $branch_name/
64 | git config --global user.name 'Github Actions'
65 | git config --global user.email 'not@mail.com'
66 | git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
67 | git commit -am "$last_commit"
68 | git push
69 |
--------------------------------------------------------------------------------
/examples/40_advanced/example_run_with_portfolio.py:
--------------------------------------------------------------------------------
1 | """
2 | ============================================
3 | Tabular Classification with Greedy Portfolio
4 | ============================================
5 |
6 | The following example shows how to fit a sample classification model
7 | with AutoPyTorch using the greedy portfolio
8 | """
9 | import os
10 | import tempfile as tmp
11 | import warnings
12 |
13 | os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
14 | os.environ['OMP_NUM_THREADS'] = '1'
15 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
16 | os.environ['MKL_NUM_THREADS'] = '1'
17 |
18 | warnings.simplefilter(action='ignore', category=UserWarning)
19 | warnings.simplefilter(action='ignore', category=FutureWarning)
20 |
21 | import sklearn.datasets
22 | import sklearn.model_selection
23 |
24 | from autoPyTorch.api.tabular_classification import TabularClassificationTask
25 |
26 |
27 | ############################################################################
28 | # Data Loading
29 | # ============
30 | X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
31 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
32 | X,
33 | y,
34 | random_state=42,
35 | )
36 |
37 | ############################################################################
38 | # Build and fit a classifier
39 | # ==========================
40 | api = TabularClassificationTask(
41 | seed=42,
42 | )
43 |
44 | ############################################################################
45 | # Search for an ensemble of machine learning algorithms
46 | # =====================================================
47 | api.search(
48 | X_train=X_train,
49 | y_train=y_train,
50 | X_test=X_test.copy(),
51 | y_test=y_test.copy(),
52 | optimize_metric='accuracy',
53 | total_walltime_limit=300,
54 | func_eval_time_limit_secs=50,
55 | # Setting this option to "greedy"
56 | # will make smac run the configurations
57 | # present in 'autoPyTorch/configs/greedy_portfolio.json'
58 | portfolio_selection="greedy"
59 | )
60 |
61 | ############################################################################
62 | # Print the final ensemble performance
63 | # ====================================
64 | y_pred = api.predict(X_test)
65 | score = api.score(y_pred, y_test)
66 | print(score)
67 | # Print the final ensemble built by AutoPyTorch
68 | print(api.show_models())
69 |
70 | # Print statistics from search
71 | print(api.sprint_statistics())
72 |
--------------------------------------------------------------------------------
/test/test_pipeline/test_traditional_pipeline.py:
--------------------------------------------------------------------------------
1 | import ConfigSpace as CS
2 |
3 | import numpy as np
4 |
5 | import pytest
6 |
7 | from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners
8 | from autoPyTorch.pipeline.traditional_tabular_classification import (
9 | TraditionalTabularClassificationPipeline,
10 | )
11 |
12 |
13 | @pytest.mark.parametrize("fit_dictionary_tabular",
14 | ['classification_numerical_and_categorical',
15 | 'regression_numerical_and_categorical'], indirect=True)
16 | def test_traditional_tabular_pipeline(fit_dictionary_tabular):
17 | pipeline = TraditionalTabularClassificationPipeline(
18 | dataset_properties=fit_dictionary_tabular['dataset_properties']
19 | )
20 | assert pipeline._get_estimator_hyperparameter_name() == "traditional_tabular_learner"
21 | cs = pipeline.get_hyperparameter_search_space()
22 | assert isinstance(cs, CS.ConfigurationSpace)
23 | config = cs.sample_configuration()
24 | assert config['model_trainer:tabular_traditional_model:traditional_learner'] in _traditional_learners
25 | assert pipeline.get_pipeline_representation() == {
26 | 'Preprocessing': 'None',
27 | 'Estimator': 'TabularTraditionalModel',
28 | }
29 |
30 |
31 | @pytest.mark.parametrize("fit_dictionary_tabular",
32 | ['classification_numerical_and_categorical'], indirect=True)
33 | def test_traditional_tabular_pipeline_predict(fit_dictionary_tabular):
34 | pipeline = TraditionalTabularClassificationPipeline(
35 | dataset_properties=fit_dictionary_tabular['dataset_properties']
36 | )
37 | assert pipeline._get_estimator_hyperparameter_name() == "traditional_tabular_learner"
38 | config = pipeline.get_hyperparameter_search_space().get_default_configuration()
39 | pipeline.set_hyperparameters(config)
40 | pipeline.fit(fit_dictionary_tabular)
41 | prediction = pipeline.predict(fit_dictionary_tabular['X_train'])
42 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0]
43 | assert prediction.shape[1] == 1
44 | prediction = pipeline.predict(fit_dictionary_tabular['X_train'], batch_size=5)
45 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0]
46 | prediction = pipeline.predict_proba(fit_dictionary_tabular['X_train'], batch_size=5)
47 | assert np.shape(fit_dictionary_tabular['X_train'])[0] == prediction.shape[0]
48 |
--------------------------------------------------------------------------------
/examples/40_advanced/example_parallel_n_jobs.py:
--------------------------------------------------------------------------------
1 | """
2 | ============================================
3 | Tabular Classification with n parallel jobs
4 | ============================================
5 |
6 | The following example shows how to fit a sample classification model parallely on 2 cores
7 | with AutoPyTorch
8 |
9 | """
10 | import os
11 | import tempfile as tmp
12 | import warnings
13 |
14 | os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
15 | os.environ['OMP_NUM_THREADS'] = '1'
16 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
17 | os.environ['MKL_NUM_THREADS'] = '1'
18 |
19 | warnings.simplefilter(action='ignore', category=UserWarning)
20 | warnings.simplefilter(action='ignore', category=FutureWarning)
21 |
22 | import sklearn.datasets
23 | import sklearn.model_selection
24 |
25 | from autoPyTorch.api.tabular_classification import TabularClassificationTask
26 |
27 | if __name__ == '__main__':
28 | ############################################################################
29 | # Data Loading
30 | # ============
31 | X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
32 | X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
33 | X,
34 | y,
35 | random_state=1,
36 | )
37 |
38 | ############################################################################
39 | # Build and fit a classifier
40 | # ==========================
41 | api = TabularClassificationTask(
42 | n_jobs=2,
43 | seed=42,
44 | )
45 |
46 | ############################################################################
47 | # Search for an ensemble of machine learning algorithms
48 | # =====================================================
49 | api.search(
50 | X_train=X_train,
51 | y_train=y_train,
52 | X_test=X_test.copy(),
53 | y_test=y_test.copy(),
54 | optimize_metric='accuracy',
55 | total_walltime_limit=300,
56 | func_eval_time_limit_secs=50,
57 | # Each one of the 2 jobs is allocated 3GB
58 | memory_limit=3072,
59 | )
60 |
61 | ############################################################################
62 | # Print the final ensemble performance
63 | # ====================================
64 | y_pred = api.predict(X_test)
65 | score = api.score(y_pred, y_test)
66 | print(score)
67 | # Print the final ensemble built by AutoPyTorch
68 | print(api.sprint_statistics())
69 |
70 |
--------------------------------------------------------------------------------
/test/test_data/test_forecasting_target_validator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import pandas as pd
4 |
5 | import pytest
6 |
7 | from scipy import sparse
8 |
9 | from autoPyTorch.data.time_series_target_validator import TimeSeriesTargetValidator
10 |
11 |
12 | def test_forecasting_target_transform():
13 | validator = TimeSeriesTargetValidator(is_classification=False)
14 | series_length = 10
15 | y = np.ones(series_length)
16 | validator.fit(y)
17 | y_transformed_0 = validator.transform(y)
18 | assert isinstance(y_transformed_0, pd.DataFrame)
19 | assert np.all(y_transformed_0.index.values == np.zeros(series_length, dtype=np.int64))
20 |
21 | index_1 = np.full(series_length, 1)
22 | y_transformed_1 = validator.transform(y, index_1)
23 | assert np.all(y_transformed_1.index.values == index_1)
24 |
25 | index_2 = pd.Index([f"a{i}" for i in range(series_length)])
26 | y_transformed_2 = validator.transform(y, index_2)
27 | assert np.all(y_transformed_2.index.values == index_2)
28 |
29 | index_3 = [('a', 'a')] * (series_length // 3) + \
30 | [('a', 'b')] * (series_length // 3) + \
31 | [('b', 'a')] * (series_length - series_length // 3 * 2)
32 | index_3 = pd.MultiIndex.from_tuples(index_3)
33 | y_transformed_3 = validator.transform(y, index_3)
34 | assert isinstance(y_transformed_3.index, pd.MultiIndex)
35 | assert np.all(y_transformed_3.index == index_3)
36 |
37 |
38 | def test_forecasting_target_handle_exception():
39 | validator = TimeSeriesTargetValidator(is_classification=False)
40 | target_sparse = sparse.csr_matrix(np.array([1, 1, 1]))
41 | with pytest.raises(NotImplementedError, match=r"Sparse Target is unsupported for forecasting task!"):
42 | # sparse matrix is unsupported for nan filling
43 | validator.fit(target_sparse)
44 |
45 | series_length = 10
46 | y = np.ones(series_length)
47 | validator.fit(y)
48 | with pytest.raises(ValueError, match=r"Index must have length as the input targets!"):
49 | validator.transform(y, np.asarray([1, 2, 3]))
50 |
51 |
52 | def test_forecasting_target_missing_values():
53 | """
54 | Makes sure we raise a proper message to the user,
55 | when providing not supported data input
56 | """
57 | validator1 = TimeSeriesTargetValidator(is_classification=False)
58 | target_1 = np.array([np.nan, 1, 2])
59 | validator1.fit(target_1)
60 | assert validator1.transform(target_1).isnull().values.sum() == 1
61 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/test_encoder_choice.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import unittest
3 |
4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding import (
5 | EncoderChoice
6 | )
7 |
8 |
9 | class TestEncoderChoice(unittest.TestCase):
10 | def test_get_set_config_space(self):
11 | """Make sure that we can setup a valid choice in the encoder
12 | choice"""
13 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]}
14 | encoder_choice = EncoderChoice(dataset_properties)
15 | cs = encoder_choice.get_hyperparameter_search_space()
16 |
17 | # Make sure that all hyperparameters are part of the search space
18 | self.assertListEqual(
19 | sorted(cs.get_hyperparameter('__choice__').choices),
20 | sorted(list(encoder_choice.get_components().keys()))
21 | )
22 |
23 | # Make sure we can properly set some random configs
24 | # Whereas just one iteration will make sure the algorithm works,
25 | # doing five iterations increase the confidence. We will be able to
26 | # catch component specific crashes
27 | for i in range(5):
28 | config = cs.sample_configuration()
29 | config_dict = copy.deepcopy(config.get_dictionary())
30 | encoder_choice.set_hyperparameters(config)
31 |
32 | self.assertEqual(encoder_choice.choice.__class__,
33 | encoder_choice.get_components()[config_dict['__choice__']])
34 |
35 | # Then check the choice configuration
36 | selected_choice = config_dict.pop('__choice__', None)
37 | for key, value in config_dict.items():
38 | # Remove the selected_choice string from the parameter
39 | # so we can query in the object for it
40 | key = key.replace(selected_choice + ':', '')
41 | self.assertIn(key, vars(encoder_choice.choice))
42 | self.assertEqual(value, encoder_choice.choice.__dict__[key])
43 |
44 | def test_only_numerical(self):
45 | dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': []}
46 |
47 | chooser = EncoderChoice(dataset_properties)
48 | configspace = chooser.get_hyperparameter_search_space().sample_configuration().get_dictionary()
49 | self.assertEqual(configspace['__choice__'], 'NoEncoder')
50 |
51 |
52 | if __name__ == '__main__':
53 | unittest.main()
54 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Optional, Union
2 |
3 | from torch.optim import Optimizer
4 | from torch.optim.lr_scheduler import _LRScheduler
5 |
6 | from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
7 | from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit, StepIntervalUnitChoices
8 | from autoPyTorch.utils.common import FitRequirement
9 |
10 |
11 | class BaseLRComponent(autoPyTorchSetupComponent):
12 | """Provide an abstract interface for schedulers
13 | in Auto-Pytorch"""
14 |
15 | def __init__(self, step_interval: Union[str, StepIntervalUnit]):
16 | super().__init__()
17 | self.scheduler: Optional[_LRScheduler] = None
18 | self._step_interval: StepIntervalUnit
19 |
20 | if isinstance(step_interval, str):
21 | if step_interval not in StepIntervalUnitChoices:
22 | raise ValueError('step_interval must be either {}, but got {}.'.format(
23 | StepIntervalUnitChoices,
24 | step_interval
25 | ))
26 | self._step_interval = getattr(StepIntervalUnit, step_interval)
27 | else:
28 | self._step_interval = step_interval
29 |
30 | self.add_fit_requirements([
31 | FitRequirement('optimizer', (Optimizer,), user_defined=False, dataset_property=False)])
32 |
33 | @property
34 | def step_interval(self) -> StepIntervalUnit:
35 | return self._step_interval
36 |
37 | def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
38 | """
39 | Adds the scheduler into the fit dictionary 'X' and returns it.
40 | Args:
41 | X (Dict[str, Any]): 'X' dictionary
42 | Returns:
43 | (Dict[str, Any]): the updated 'X' dictionary
44 | """
45 |
46 | X.update(
47 | lr_scheduler=self.scheduler,
48 | step_interval=self.step_interval
49 | )
50 | return X
51 |
52 | def get_scheduler(self) -> _LRScheduler:
53 | """Return the underlying scheduler object.
54 | Returns:
55 | scheduler : the underlying scheduler object
56 | """
57 | assert self.scheduler is not None, "No scheduler was fit"
58 | return self.scheduler
59 |
60 | def __str__(self) -> str:
61 | """ Allow a nice understanding of what components where used """
62 | string = self.scheduler.__class__.__name__
63 | return string
64 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/setup/test_setup_image_augmenter.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from imgaug.augmenters.meta import Augmenter, Sequential
4 |
5 | import numpy as np
6 |
7 | from autoPyTorch.pipeline.components.setup.augmentation.image.ImageAugmenter import ImageAugmenter
8 |
9 |
10 | class TestImageAugmenter(unittest.TestCase):
11 | def test_every_augmenter(self):
12 | image_augmenter = ImageAugmenter()
13 | # To test every augmenter, we set the configuration as default where each augmenter
14 | # has use_augmenter set to True
15 | configuration = image_augmenter.get_hyperparameter_search_space().get_default_configuration()
16 | image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration)
17 | X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8),
18 | dataset_properties=dict(image_height=16, image_width=16))
19 | for name, augmenter in image_augmenter.available_augmenters.items():
20 | augmenter = augmenter.fit(X)
21 | # check if augmenter in the component has correct name
22 | self.assertEqual(augmenter.get_image_augmenter().name, name)
23 | # test if augmenter has an Augmenter attribute
24 | self.assertIsInstance(augmenter.get_image_augmenter(), Augmenter)
25 |
26 | # test if augmenter works on a random image
27 | train_aug = augmenter(X['X_train'])
28 | self.assertIsInstance(train_aug, np.ndarray)
29 | # check if data was changed
30 | self.assertIsNot(train_aug, X['X_train'])
31 |
32 | def test_get_set_config_space(self):
33 | X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8),
34 | dataset_properties=dict(image_height=16, image_width=16))
35 | image_augmenter = ImageAugmenter()
36 | configuration = image_augmenter.get_hyperparameter_search_space().sample_configuration()
37 | image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration)
38 | image_augmenter = image_augmenter.fit(X)
39 | X = image_augmenter.transform(X)
40 |
41 | image_augmenter = X['image_augmenter']
42 | # test if a sequential augmenter was formed
43 | self.assertIsInstance(image_augmenter.augmenter, Sequential)
44 |
45 | # test if augmenter works on a random image
46 | train_aug = image_augmenter(X['X_train'])
47 | self.assertIsInstance(train_aug, np.ndarray)
48 |
49 |
50 | if __name__ == '__main__':
51 | unittest.main()
52 |
--------------------------------------------------------------------------------
/autoPyTorch/pipeline/components/setup/forecasting_training_loss/RegressionLoss.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Union
2 |
3 | from ConfigSpace import CategoricalHyperparameter, ConfigurationSpace
4 |
5 | import numpy as np
6 |
7 | from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
8 | from autoPyTorch.pipeline.components.setup.forecasting_training_loss.base_forecasting_loss import \
9 | ForecastingLossComponents
10 | from autoPyTorch.pipeline.components.training.losses import (
11 | L1Loss,
12 | MAPELoss,
13 | MASELoss,
14 | MSELoss
15 | )
16 | from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter
17 |
18 |
19 | class RegressionLoss(ForecastingLossComponents):
20 | net_output_type = 'regression'
21 |
22 | def __init__(self,
23 | loss_name: str,
24 | random_state: Optional[np.random.RandomState] = None,
25 | ):
26 | super(RegressionLoss, self).__init__()
27 | if loss_name == "l1":
28 | self.loss = L1Loss
29 | elif loss_name == 'mse':
30 | self.loss = MSELoss
31 | elif loss_name == 'mase':
32 | self.loss = MASELoss
33 | elif loss_name == 'mape':
34 | self.loss = MAPELoss
35 | else:
36 | raise ValueError(f"Unsupported loss type {loss_name}!")
37 | self.random_state = random_state
38 |
39 | @staticmethod
40 | def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None
41 | ) -> Dict[str, Union[str, bool]]:
42 | return {
43 | 'shortname': 'RegressionLoss',
44 | 'name': 'RegressionLoss',
45 | "handles_tabular": True,
46 | "handles_image": True,
47 | "handles_time_series": True,
48 | 'handles_regression': True,
49 | 'handles_classification': False
50 | }
51 |
52 | @staticmethod
53 | def get_hyperparameter_search_space(
54 | dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None,
55 | loss_name: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="loss_name",
56 | value_range=('l1', 'mse', 'mase', 'mape'),
57 | default_value='mse'),
58 | ) -> ConfigurationSpace:
59 | cs = ConfigurationSpace()
60 | add_hyperparameter(cs, loss_name, CategoricalHyperparameter)
61 | return cs
62 |
--------------------------------------------------------------------------------
/test/test_pipeline/components/preprocessing/test_scaler_choice.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import unittest
3 |
4 | from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling import ScalerChoice
5 |
6 |
7 | class TestRescalerChoice(unittest.TestCase):
8 |
9 | def test_get_set_config_space(self):
10 | """Make sure that we can setup a valid choice in the encoder
11 | choice"""
12 | dataset_properties = {'categorical_columns': list(range(4)),
13 | 'numerical_columns': [5],
14 | 'issparse': False}
15 | rescaler_choice = ScalerChoice(dataset_properties)
16 | cs = rescaler_choice.get_hyperparameter_search_space()
17 |
18 | # Make sure that all hyperparameters are part of the search space
19 | self.assertListEqual(
20 | sorted(cs.get_hyperparameter('__choice__').choices),
21 | sorted(list(rescaler_choice.get_components().keys()))
22 | )
23 |
24 | # Make sure we can properly set some random configs
25 | # Whereas just one iteration will make sure the algorithm works,
26 | # doing five iterations increase the confidence. We will be able to
27 | # catch component specific crashes
28 | for i in range(5):
29 | config = cs.sample_configuration()
30 | config_dict = copy.deepcopy(config.get_dictionary())
31 | rescaler_choice.set_hyperparameters(config)
32 |
33 | self.assertEqual(rescaler_choice.choice.__class__,
34 | rescaler_choice.get_components()[config_dict['__choice__']])
35 |
36 | # Then check the choice configuration
37 | selected_choice = config_dict.pop('__choice__', None)
38 | for key, value in config_dict.items():
39 | # Remove the selected_choice string from the parameter
40 | # so we can query in the object for it
41 | key = key.replace(selected_choice + ':', '')
42 | self.assertIn(key, vars(rescaler_choice.choice))
43 | self.assertEqual(value, rescaler_choice.choice.__dict__[key])
44 |
45 | def test_only_categorical(self):
46 | dataset_properties = {'categorical_columns': list(range(4)), 'numerical_columns': []}
47 | chooser = ScalerChoice(dataset_properties)
48 | configspace = chooser.get_hyperparameter_search_space(dataset_properties).sample_configuration().\
49 | get_dictionary()
50 | self.assertEqual(configspace['__choice__'], 'NoScaler')
51 |
52 |
53 | if __name__ == '__main__':
54 | unittest.main()
55 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | ************
2 | Auto-PyTorch
3 | ************
4 |
5 | .. role:: bash(code)
6 | :language: bash
7 |
8 | .. role:: python(code)
9 | :language: python
10 |
11 | *Auto-PyTorch* is an automated machine learning toolkit based on PyTorch:
12 |
13 | >>> import autoPyTorch
14 | >>> cls = autoPyTorch.api.tabular_classification.TabularClassificationTask()
15 | >>> cls.search(X_train, y_train)
16 | >>> predictions = cls.predict(X_test)
17 |
18 | *Auto-PyTorch* frees a machine learning user from algorithm selection and
19 | hyperparameter tuning. It leverages recent advantages in *Bayesian
20 | optimization*, *meta-learning* and *ensemble construction*.
21 | Learn more about *Auto-PyTorch* by reading our paper
22 | `Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL