├── .gitignore
├── LICENSE
├── README.md
├── baselines
    ├── autogluon
    │   ├── main_experiment.py
    │   └── refit_experiment.py
    ├── autosklearn
    │   └── main_experiment.py
    ├── bohb
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── loader.py
    │   ├── main_experiment.py
    │   ├── refit_experiment.py
    │   ├── utilities.py
    │   └── worker.py
    └── node
    │   └── node_experiment.py
├── cocktails
    ├── main_experiment.py
    └── refit_experiment.py
├── dataset_collection.py
├── figures
    └── all_baselines_diagram.png
├── results.py
└── utilities.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [NeurIPS 2021] Well-tuned Simple Nets Excel on Tabular Datasets
  2 | 
  3 | ## Introduction
  4 | 
  5 | This repo contains the source code accompanying the paper:
  6 | 
  7 | **Well-tuned Simple Nets Excel on Tabular Datasets**
  8 | 
  9 | Authors: Arlind Kadra, Marius Lindauer, Frank Hutter, Josif Grabocka
 10 | 
 11 | Tabular datasets are the last "unconquered castle" for deep learning, with traditional ML methods like Gradient-Boosted Decision Trees still performing strongly even against recent specialized neural architectures. In this paper, we hypothesize that the key to boosting the performance of neural networks lies in rethinking the joint and simultaneous application of a large set of modern regularization techniques. As a result, we propose regularizing plain Multilayer Perceptron (MLP) networks by searching for the optimal combination/cocktail of 13 regularization techniques for each dataset using a joint optimization over the decision on which regularizers to apply and their subsidiary hyperparameters.
 12 | 
 13 | We empirically assess the impact of these **regularization cocktails** for MLPs on a large-scale empirical study comprising 40 tabular datasets and demonstrate that: (i) well-regularized plain MLPs significantly outperform recent state-of-the-art specialized neural network architectures, and (ii) they even outperform strong traditional ML methods, such as XGBoost.
 14 | 
 15 | 
 16 | *News: Our work is accepted in the Thirty-fifth Conference on Neural Information Processing Systems (NeurIPS 2021).*
 17 | 
 18 | 
 19 | ## Setting up the virtual environment
 20 | 
 21 | Our work is built on top of AutoPyTorch. To look at our implementation of the regularization cocktail ingredients, you can do the following:
 22 | 
 23 | 
 24 | ```
 25 | git clone https://github.com/automl/Auto-PyTorch.git
 26 | cd Auto-PyTorch/
 27 | git checkout regularization_cocktails
 28 | ```
 29 | To install the version of AutoPyTorch that features our work, you can use these additional commands:
 30 | 
 31 | ```
 32 | # The following commands assume the user is in the cloned directory
 33 | conda create -n reg_cocktails python=3.8
 34 | conda activate reg_cocktails
 35 | conda install gxx_linux-64 gcc_linux-64 swig
 36 | cat requirements.txt | xargs -n 1 -L 1 pip install
 37 | python setup.py install
 38 | ```
 39 | 
 40 | ## Running the Regularization Cocktail code
 41 | 
 42 | The main files to run the regularization cocktails are in the `cocktails` folder and are
 43 | `main_experiment.py` and `refit_experiment.py`. The first module can be used to start a full
 44 | HPO search, while, the other module can be used to refit on certain datasets when the time does not suffice to perform the full HPO search and to complete the refit of the incumbent hyperparameter
 45 | configuration.
 46 | 
 47 | The main arguments for `main_experiment.py`:
 48 | 
 49 | - `--task_id`: The task id in OpenML. Basically the dataset that will be used in the experiment.
 50 | - `--wall_time`: The total runtime to be used. It is the total runtime for the HPO search and also final refit.
 51 | - `--func_eval_time`: The maximal time for one function evaluation parametrized by a certain hyperparameter configuration.
 52 | - `--epochs`: The number of epochs for one hyperparameter configuration to be evaluated on.
 53 | - `--seed`: The seed to be used for the run.
 54 | - `--tmp_dir`: The temporary directory for the results to be stored in.
 55 | - `--output_dir`: The output directory for the results to be stored in.
 56 | - `--nr_workers`: The number of workers which corresponds to the number of hyperparameter configurations run in parallel.
 57 | - `--nr_threads`: The number of threads. 
 58 | - `--cash_cocktail`: An important flag that activates the regularization cocktail formulation.
 59 | 
 60 | **A minimal example of running the regularization cocktails**:
 61 | 
 62 | ```
 63 | python main_experiment.py --task_id 233088 --wall_time 600 --func_eval_time 60 --epochs 10 --seed 42 --cash_cocktail True
 64 | ```
 65 | 
 66 | The example above will run the regularization cocktails for 10 minutes, with a function evaluation limit of 50 seconds for task 233088. Every
 67 | hyperparameter configuration will be evaluated for 10 epochs, the seed 42 will be used for the experiment and data splits.
 68 | 
 69 | **A minimal example of running only one regularization method:**
 70 | ```
 71 | python main_experiment.py --task_id 233088 --wall_time 600 --func_eval_time 60 --epochs 10 --seed 42 --use_weight_decay
 72 | ```
 73 | In case you would like to investigate individual regularization methods, you can look at the different arguments
 74 | that control them in the `main_experiment.py`. Additionally, if you want to remove 
 75 | the limit on the number of hyperparameter configurations, you can remove the following lines:
 76 | 
 77 | ```
 78 | smac_scenario_args={
 79 |     'runcount_limit': number_of_configurations_limit,
 80 | }
 81 | ```
 82 | ## Plots
 83 | 
 84 | The plots that are included in our paper were generated from the functions in the module `results.py`.
 85 | Although mentioned in most function documentations, most of the functions that plot the baseline diagrams and
 86 | plots expect a folder structure as follows:
 87 | 
 88 | `common_result_folder/baseline/results.csv`
 89 | 
 90 | There are functions inside the module itself that generate the `results.csv` files.
 91 | 
 92 | ## Baselines
 93 | 
 94 | The code for running the baselines can be found in the `baselines` folder.
 95 | 
 96 | - TabNet, XGBoost, CatBoost can be found in the `baselines/bohb` folder.
 97 | - The other baselines like AutoGluon, auto-sklearn and Node can be found in the corresponding folders named the same.
 98 | 
 99 | TabNet, XGBoost, CatBoost and AutoGluon have the same two main files as our regularization cocktails, `main_experiment.py` and `refit_experiment.py`.
100 | 
101 | ## Figures
102 | 
103 | ![alt text](https://github.com/releaunifreiburg/WellTunedSimpleNets/blob/main/figures/all_baselines_diagram.png "Critical Difference diagram of all the methods")
104 | 
105 | ## Citation
106 | ```
107 | @inproceedings{kadra2021well,
108 |   title={Well-tuned Simple Nets Excel on Tabular Datasets},
109 |   author={Kadra, Arlind and Lindauer, Marius and Hutter, Frank and Grabocka, Josif},
110 |   booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
111 |   year={2021}
112 | }
113 | ```
114 | 


--------------------------------------------------------------------------------
/baselines/autogluon/main_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import json
  4 | import os
  5 | import pickle
  6 | import shutil
  7 | import tempfile
  8 | import warnings
  9 | 
 10 | import matplotlib
 11 | 
 12 | import numpy as np
 13 | 
 14 | import openml
 15 | 
 16 | import pandas as pd
 17 | 
 18 | import psutil
 19 | 
 20 | from sklearn.metrics import balanced_accuracy_score
 21 | from sklearn.model_selection import train_test_split
 22 | from sklearn.utils.multiclass import type_of_target
 23 | 
 24 | from autogluon.tabular import TabularPredictor
 25 | from autogluon.core.utils.savers import save_pd, save_pkl
 26 | import autogluon.core.metrics as metrics
 27 | from autogluon.tabular.version import __version__
 28 | from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config
 29 | 
 30 | warnings.simplefilter("ignore")
 31 | matplotlib.use('agg')  # no need for tk
 32 | log = logging.getLogger(__name__)
 33 | log.setLevel(logging.DEBUG)
 34 | 
 35 | 
 36 | # Credits to Francisco Rivera
 37 | 
 38 | 
 39 | def get_data(
 40 |     task_id: int,
 41 |     test_size: float = 0.2,
 42 |     seed: int = 11,
 43 | ):
 44 | 
 45 |     task = openml.tasks.get_task(task_id=task_id)
 46 |     dataset = task.get_dataset()
 47 |     train, y, categorical_indicator, _ = dataset.get_data(
 48 |         dataset_format='dataframe',
 49 |         target=dataset.default_target_attribute,
 50 |     )
 51 | 
 52 |     # AutoGluon expects a frame with the label data inside
 53 |     train['target'] = y
 54 | 
 55 |     train, test = train_test_split(
 56 |         train,
 57 |         test_size=test_size,
 58 |         random_state=seed,
 59 |         stratify=y,
 60 |     )
 61 | 
 62 |     return {
 63 |         'train': train,
 64 |         'test': test,
 65 |         'label': 'target',
 66 |     }
 67 | 
 68 | 
 69 | def run(config):
 70 | 
 71 |     log.info(f"\n**** AutoGluon [v{__version__}] ****\n")
 72 |     log.info(f"config:\n{pd.DataFrame([{a:b for a, b in config.items() if a not in ['train', 'test']}]).to_markdown()}")
 73 | 
 74 |     metrics_mapping = dict(
 75 |         acc=metrics.accuracy,
 76 |         auc=metrics.roc_auc,
 77 |         f1=metrics.f1,
 78 |         logloss=metrics.log_loss,
 79 |         mae=metrics.mean_absolute_error,
 80 |         mse=metrics.mean_squared_error,
 81 |         r2=metrics.r2,
 82 |         rmse=metrics.root_mean_squared_error,
 83 |         balacc=metrics.balanced_accuracy,
 84 |     )
 85 | 
 86 |     perf_metric = metrics_mapping[config["metric"]]
 87 |     if perf_metric is None:
 88 |         raise ValueError(f"Need a valid metric, one from {metrics_mapping}")
 89 | 
 90 |     is_classification = config["type"] == 'classification'
 91 | 
 92 |     log.info(f"Columns dtypes:\n{config['train'].dtypes}")
 93 |     params = get_hyperparameter_config('default')
 94 |     if not config['traditional']:
 95 |         params.pop('GBM')
 96 |         params.pop('CAT')
 97 |         params.pop('XGB')
 98 |         params.pop('RF')
 99 |         params.pop('XT')
100 |         params.pop('KNN')
101 |     log.info(f"Models to use:\n{json.dumps(params, indent=4, sort_keys=True)}")
102 | 
103 |     predictor = TabularPredictor(
104 |         label=config['label'],
105 |         eval_metric=perf_metric.name,
106 |         path=config['output_dir'],
107 |     ).fit(
108 |         train_data=config['train'],
109 |         # Enable stacking!
110 |         presets='best_quality',
111 |         hyperparameter_tune_kwargs={
112 |             'searcher': 'bayesopt',
113 |             'scheduler': 'local',
114 |             'num_trials': 840,
115 |         },
116 |         holdout_frac=0.25,
117 |         refit_full=True,
118 |         time_limit=config["max_runtime_seconds"],
119 |         hyperparameters=params,
120 |     )
121 | 
122 |     y_test = config['test'][config['label']]
123 |     test = config['test'].drop(columns=config['label'])
124 | 
125 |     if is_classification:
126 |         probabilities = predictor.predict_proba(test, as_multiclass=True)
127 |         predictions = probabilities.idxmax(axis=1).to_numpy()
128 |     else:
129 |         predictions = predictor.predict(test, as_pandas=False)
130 |         probabilities = None
131 | 
132 |     leaderboard_kwargs = dict(silent=True, extra_info=True)
133 |     test[config['label']] = y_test
134 |     leaderboard_kwargs['data'] = test
135 | 
136 |     leaderboard = predictor.leaderboard(**leaderboard_kwargs)
137 |     with pd.option_context('display.max_rows', None, 'display.max_columns', None,
138 |                            'display.width', 1000):
139 |         log.info(leaderboard)
140 | 
141 |     log.info("\n\n\n")
142 |     leaderboard_kwargs['extra_info'] = False
143 |     leaderboard = predictor.leaderboard(**leaderboard_kwargs)
144 |     with pd.option_context('display.max_rows', None, 'display.max_columns', None,
145 |                            'display.width', 1000):
146 |         log.info(leaderboard)
147 | 
148 |     return predictions, probabilities, y_test, predictor
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     parser = argparse.ArgumentParser(
153 |         description='Run autogluon on a benchmark'
154 |     )
155 |     # experiment setup arguments
156 |     parser.add_argument(
157 |         '--task_id',
158 |         type=int,
159 |         default=233088,
160 |     )
161 |     parser.add_argument(
162 |         '--max_runtime_seconds',
163 |         type=int,
164 |         default=345600,
165 |     )
166 |     parser.add_argument(
167 |         '--seed',
168 |         type=int,
169 |         default=11,
170 |     )
171 |     parser.add_argument(
172 |         '--test_size',
173 |         type=float,
174 |         default=0.2,
175 |     )
176 |     parser.add_argument(
177 |         '--output_dir',
178 |         type=str,
179 |         default='./autogluon_run/'
180 |     )
181 |     parser.add_argument(
182 |         '--traditional',
183 |         type=bool,
184 |         default=False,
185 |     )
186 |     args = parser.parse_args()
187 | 
188 |     output_dir = os.path.join(
189 |         args.output_dir,
190 |         f'{args.seed}',
191 |         f'{args.task_id}',
192 |     )
193 |     os.makedirs(output_dir, exist_ok=True)
194 |     # Log to a file
195 |     logFormatter = logging.Formatter(
196 |         "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")
197 | 
198 |     fileHandler = logging.FileHandler(os.path.join(output_dir, 'info.log'))
199 |     fileHandler.setFormatter(logFormatter)
200 |     log.addHandler(fileHandler)
201 | 
202 |     # Build a configuration to run the experiments
203 |     config = {'task_id': args.task_id, 'output_dir': output_dir}
204 | 
205 |     # Add the train and test data
206 |     config.update(get_data(task_id=args.task_id, test_size=args.test_size, seed=args.seed))
207 | 
208 |     config.update({
209 |         'metric': 'balacc',
210 |         'type': 'classification',
211 |         'max_runtime_seconds': args.max_runtime_seconds,
212 |         'traditional': args.traditional,
213 |     })
214 | 
215 |     # Run the example -- and also warn the user about autogluon settings
216 |     log.warning(f"Autogluon does not accept a seed. Also, the cores are taken automatically "
217 |                 f"from the system, and in this case {psutil.cpu_count()} cores are used.")
218 |     predictions, probabilities, truth, predictor = run(config)
219 | 
220 |     # Store the predictions if things go south
221 |     with open(os.path.join(output_dir, f"predictions.{args.task_id}.pickle"), 'wb') as handle:
222 |         pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)
223 |     with open(os.path.join(output_dir, f"truth.{args.task_id}.pickle"), 'wb') as handle:
224 |         pickle.dump(truth, handle, protocol=pickle.HIGHEST_PROTOCOL)
225 | 
226 |     predictor.save()
227 | 
228 |     try:
229 |         score = balanced_accuracy_score(truth, predictions)
230 |     except ValueError:
231 |         # Autogluon predictions have unkown data type. Align to the dtype of the train
232 |         # data
233 |         from sklearn import preprocessing
234 |         le = preprocessing.LabelEncoder()
235 |         if isinstance(truth, pd.Series):
236 |             truth = pd.Series(truth, dtype=config['train']['target'].dtype)
237 |             predictions = pd.Series(predictions, dtype=config['train']['target'].dtype)
238 |         le.fit(config['train']['target'])
239 |         score = balanced_accuracy_score(le.transform(truth), le.transform(predictions))
240 | 
241 |     log.info(f"Trained AutoGluon on task {args.task_id} resulted in score {score}")
242 | 
243 |     # save score to a file, just in case!
244 |     config.pop('train')
245 |     config.pop('test')
246 |     config['score'] = score
247 |     task_csv_dir = os.path.join(
248 |         output_dir,
249 |         'results.csv',
250 |     )
251 |     pd.DataFrame([config]).to_csv(
252 |         task_csv_dir,
253 |     )
254 | 
255 |     # Exit with a success status!
256 |     exit(0)
257 | 


--------------------------------------------------------------------------------
/baselines/autogluon/refit_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import json
  4 | import os
  5 | import pickle
  6 | import warnings
  7 | 
  8 | import matplotlib
  9 | import openml
 10 | 
 11 | import pandas as pd
 12 | 
 13 | import psutil
 14 | 
 15 | from sklearn.metrics import balanced_accuracy_score
 16 | from sklearn.model_selection import train_test_split
 17 | 
 18 | from autogluon.tabular import TabularPredictor
 19 | import autogluon.core.metrics as metrics
 20 | from autogluon.tabular.version import __version__
 21 | from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config
 22 | 
 23 | warnings.simplefilter("ignore")
 24 | matplotlib.use('agg')  # no need for tk
 25 | log = logging.getLogger(__name__)
 26 | log.setLevel(logging.DEBUG)
 27 | 
 28 | 
 29 | def get_data(
 30 |     task_id: int,
 31 |     test_size: float = 0.2,
 32 |     seed: int = 11,
 33 | ):
 34 | 
 35 |     task = openml.tasks.get_task(task_id=task_id)
 36 |     dataset = task.get_dataset()
 37 |     train, y, categorical_indicator, _ = dataset.get_data(
 38 |         dataset_format='dataframe',
 39 |         target=dataset.default_target_attribute,
 40 |     )
 41 | 
 42 |     # AutoGluon expects a frame with the label data inside
 43 |     train['target'] = y
 44 | 
 45 |     train, test = train_test_split(
 46 |         train,
 47 |         test_size=test_size,
 48 |         random_state=seed,
 49 |         stratify=y,
 50 |     )
 51 | 
 52 |     return {
 53 |         'train': train,
 54 |         'test': test,
 55 |         'label': 'target',
 56 |     }
 57 | 
 58 | 
 59 | def run(config):
 60 | 
 61 |     log.info(f"\n**** AutoGluon [v{__version__}] ****\n")
 62 |     log.info(f"config:\n{pd.DataFrame([{a:b for a, b in config.items() if a not in ['train', 'test']}]).to_markdown()}")
 63 | 
 64 |     metrics_mapping = dict(
 65 |         acc=metrics.accuracy,
 66 |         auc=metrics.roc_auc,
 67 |         f1=metrics.f1,
 68 |         logloss=metrics.log_loss,
 69 |         mae=metrics.mean_absolute_error,
 70 |         mse=metrics.mean_squared_error,
 71 |         r2=metrics.r2,
 72 |         rmse=metrics.root_mean_squared_error,
 73 |         balacc=metrics.balanced_accuracy,
 74 |     )
 75 | 
 76 |     perf_metric = metrics_mapping[config["metric"]]
 77 |     if perf_metric is None:
 78 |         raise ValueError(f"Need a valid metric, one from {metrics_mapping}")
 79 | 
 80 |     is_classification = config["type"] == 'classification'
 81 | 
 82 |     log.info(f"Columns dtypes:\n{config['train'].dtypes}")
 83 |     params = get_hyperparameter_config('default')
 84 |     if not config['traditional']:
 85 |         params.pop('GBM')
 86 |         params.pop('CAT')
 87 |         params.pop('XGB')
 88 |         params.pop('RF')
 89 |         params.pop('XT')
 90 |         params.pop('KNN')
 91 |     log.info(f"Models to use:\n{json.dumps(params, indent=4, sort_keys=True)}")
 92 | 
 93 |     predictor = TabularPredictor.load(config['output_dir'])
 94 | 
 95 |     y_test = config['test'][config['label']]
 96 |     test = config['test'].drop(columns=config['label'])
 97 | 
 98 |     if is_classification:
 99 |         probabilities = predictor.predict_proba(test, as_multiclass=True)
100 |         predictions = probabilities.idxmax(axis=1).to_numpy()
101 |     else:
102 |         predictions = predictor.predict(test, as_pandas=False)
103 |         probabilities = None
104 | 
105 |     leaderboard_kwargs = dict(silent=True, extra_info=True)
106 |     test[config['label']] = y_test
107 |     leaderboard_kwargs['data'] = test
108 | 
109 |     leaderboard = predictor.leaderboard(**leaderboard_kwargs)
110 |     with pd.option_context('display.max_rows', None, 'display.max_columns', None,
111 |                            'display.width', 1000):
112 |         log.info(leaderboard)
113 | 
114 |     log.info("\n\n\n")
115 |     leaderboard_kwargs['extra_info'] = False
116 |     leaderboard = predictor.leaderboard(**leaderboard_kwargs)
117 |     with pd.option_context('display.max_rows', None, 'display.max_columns', None,
118 |                            'display.width', 1000):
119 |         log.info(leaderboard)
120 | 
121 |     return predictions, probabilities, y_test, predictor
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     parser = argparse.ArgumentParser(
126 |         description='Run autogluon on a benchmark'
127 |     )
128 |     # experiment setup arguments
129 |     parser.add_argument(
130 |         '--task_id',
131 |         type=int,
132 |         default=233088,
133 |     )
134 |     parser.add_argument(
135 |         '--max_runtime_seconds',
136 |         type=int,
137 |         default=600,
138 |     )
139 |     parser.add_argument(
140 |         '--seed',
141 |         type=int,
142 |         default=11,
143 |     )
144 |     parser.add_argument(
145 |         '--test_size',
146 |         type=float,
147 |         default=0.2,
148 |     )
149 |     parser.add_argument(
150 |         '--output_dir',
151 |         type=str,
152 |         default='./autogluon_run/'
153 |     )
154 |     parser.add_argument(
155 |         '--traditional',
156 |         type=bool,
157 |         default=False,
158 |     )
159 |     args = parser.parse_args()
160 | 
161 |     output_dir = os.path.join(
162 |         args.output_dir,
163 |         f'{args.seed}',
164 |         f'{args.task_id}',
165 |     )
166 |     os.makedirs(output_dir, exist_ok=True)
167 |     # Log to a file
168 |     logFormatter = logging.Formatter(
169 |         "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")
170 | 
171 |     fileHandler = logging.FileHandler(os.path.join(output_dir, 'info.log'))
172 |     fileHandler.setFormatter(logFormatter)
173 |     log.addHandler(fileHandler)
174 | 
175 |     # Build a configuration to run the experiments
176 |     config = {'task_id': args.task_id, 'output_dir': output_dir}
177 | 
178 |     # Add the train and test data
179 |     config.update(get_data(task_id=args.task_id, test_size=args.test_size, seed=args.seed))
180 | 
181 |     config.update({
182 |         'metric': 'balacc',
183 |         'type': 'classification',
184 |         'max_runtime_seconds': args.max_runtime_seconds,
185 |         'traditional': args.traditional,
186 |     })
187 | 
188 |     # Run the example -- and also warn the user about autogluon settings
189 |     log.warning(f"Autogluon does not accept a seed. Also, the cores are taken automatically "
190 |                 f"from the system, and in this case {psutil.cpu_count()} cores are used.")
191 |     predictions, probabilities, truth, predictor = run(config)
192 | 
193 |     # Store the predictions if things go south
194 |     with open(os.path.join(output_dir, f"predictions.{args.task_id}.pickle"), 'wb') as handle:
195 |         pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)
196 |     with open(os.path.join(output_dir, f"truth.{args.task_id}.pickle"), 'wb') as handle:
197 |         pickle.dump(truth, handle, protocol=pickle.HIGHEST_PROTOCOL)
198 | 
199 |     predictor.save()
200 | 
201 |     try:
202 |         score = balanced_accuracy_score(truth, predictions)
203 |     except ValueError:
204 |         # Autogluon predictions have unkown data type. Align to the dtype of the train
205 |         # data
206 |         from sklearn import preprocessing
207 |         le = preprocessing.LabelEncoder()
208 |         if isinstance(truth, pd.Series):
209 |             truth = pd.Series(truth, dtype=config['train']['target'].dtype)
210 |             predictions = pd.Series(predictions, dtype=config['train']['target'].dtype)
211 |         le.fit(config['train']['target'])
212 |         score = balanced_accuracy_score(le.transform(truth), le.transform(predictions))
213 | 
214 |     log.info(f"Trained AutoGluon on task {args.task_id} resulted in score {score}")
215 | 
216 |     # save score to a file, just in case!
217 |     config.pop('train')
218 |     config.pop('test')
219 |     config['score'] = score
220 |     task_csv_dir = os.path.join(
221 |         output_dir,
222 |         'results.csv',
223 |     )
224 |     pd.DataFrame([config]).to_csv(
225 |         task_csv_dir,
226 |     )
227 | 
228 |     # Exit with a success status!
229 |     exit(0)
230 | 


--------------------------------------------------------------------------------
/baselines/autosklearn/main_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | 
  6 | import autosklearn.classification
  7 | from autosklearn.metrics import balanced_accuracy
  8 | import numpy as np
  9 | import openml
 10 | from sklearn.metrics import balanced_accuracy_score
 11 | from sklearn.model_selection import StratifiedShuffleSplit
 12 | from sklearn.model_selection import train_test_split
 13 | 
 14 | 
 15 | def create_dir(
 16 |         path: str,
 17 | ):
 18 |     """Create the directory/subdirectories for the given path.
 19 | 
 20 |     Given a path, check the directory/subdirectories that are
 21 |     part of the path, perform a few checks and create the parts
 22 |     that are missing.
 23 | 
 24 |     Parameters:
 25 |     -----------
 26 |     path: str
 27 |         The path to be created.
 28 |     """
 29 |     if os.path.exists(path):
 30 |         if not os.path.isdir(path):
 31 |             os.makedirs(path)
 32 |     else:
 33 |         os.makedirs(path)
 34 | 
 35 | 
 36 | parser = argparse.ArgumentParser(
 37 |     description='autosklearn_gb'
 38 | )
 39 | parser.add_argument(
 40 |     '--run_id',
 41 |     help='Unique id to identify the AutoSklearn run.',
 42 |     default='autosklearn_gb',
 43 |     type=str,
 44 | )
 45 | parser.add_argument(
 46 |     '--tmp_dir',
 47 |     help='Temporary node storage.',
 48 |     default='path/temporary_storage',
 49 |     type=str,
 50 | )
 51 | parser.add_argument(
 52 |     '--working_dir',
 53 |     help='Working directory where to store the results.',
 54 |     default='path/working_dir',
 55 |     type=str,
 56 | )
 57 | parser.add_argument(
 58 |     '--task_id',
 59 |     help='Task id so that the dataset can be retrieved from OpenML.',
 60 |     default=233088,
 61 |     type=int,
 62 | )
 63 | parser.add_argument(
 64 |     '--nr_workers',
 65 |     help='Number of workers.',
 66 |     default=10,
 67 |     type=int,
 68 | )
 69 | parser.add_argument(
 70 |     '--seed',
 71 |     help='Seed number.',
 72 |     default=11,
 73 |     type=int,
 74 | )
 75 | 
 76 | args = parser.parse_args()
 77 | np.random.seed(args.seed)
 78 | random.seed(args.seed)
 79 | 
 80 | task = openml.tasks.get_task(task_id=args.task_id)
 81 | dataset = task.get_dataset()
 82 | X, y, categorical_indicator, _ = dataset.get_data(
 83 |     dataset_format='array',
 84 |     target=dataset.default_target_attribute,
 85 | )
 86 | X_train, X_test, y_train, y_test = train_test_split(
 87 |     X,
 88 |     y,
 89 |     test_size=0.2,
 90 |     random_state=args.seed,
 91 |     stratify=y,
 92 | )
 93 | 
 94 | output_directory = os.path.join(
 95 |     args.working_dir,
 96 |     f'{args.seed}',
 97 |     f'{args.task_id}',
 98 |     'output',
 99 | )
100 | result_directory = os.path.join(
101 |     args.working_dir,
102 |     f'{args.seed}',
103 |     f'{args.task_id}',
104 |     'results',
105 | )
106 | 
107 | feat_types = ['Categorical' if feature else 'Numerical' for feature in categorical_indicator]
108 | resampling_strategy = StratifiedShuffleSplit
109 | resampling_strategy_arguments = {'test_size': 0.25, 'random_state': args.seed, 'n_splits': 1}
110 | # This is a stratified split, so this should work better.
111 | # validation_policy = {'holdout': {'train_size': 0.75, 'shuffle': True}}
112 | 
113 | if __name__ == '__main__':
114 |     gb_autosklearn = autosklearn.classification.AutoSklearnClassifier(
115 |         include_estimators=['gradient_boosting'],
116 |         include_preprocessors=['no_preprocessing'],
117 |         time_left_for_this_task=324000,
118 |         ensemble_size=1,
119 |         seed=args.seed,
120 |         memory_limit=12000,
121 |         output_folder=output_directory,
122 |         tmp_folder=os.path.join(args.tmp_dir, 'autosklearn'),
123 |         resampling_strategy=resampling_strategy,
124 |         resampling_strategy_arguments=resampling_strategy_arguments,
125 |         initial_configurations_via_metalearning=0,
126 |         metric=balanced_accuracy,
127 |         n_jobs=args.nr_workers,
128 |         smac_scenario_args={'runcount_limit': 840},
129 |     )
130 |     gb_autosklearn.fit(X_train.copy(), y_train.copy(), dataset_name=dataset.name)
131 |     print(gb_autosklearn.sprint_statistics())
132 |     gb_autosklearn.refit(X_train.copy(), y_train.copy())
133 |     y_test_pred = gb_autosklearn.predict(X_test)
134 |     y_train_pred = gb_autosklearn.predict(X_train)
135 | 
136 |     train_acc = balanced_accuracy_score(
137 |         y_train,
138 |         y_train_pred,
139 |     )
140 |     test_acc = balanced_accuracy_score(
141 |         y_test,
142 |         y_test_pred,
143 |     )
144 | 
145 |     information = {
146 |         'train_acc': train_acc,
147 |         'test_acc': test_acc,
148 |     }
149 |     create_dir(result_directory)
150 |     with open(os.path.join(result_directory, 'refit_result.json'), 'w') as file:
151 |         json.dump(information, file)
152 | 


--------------------------------------------------------------------------------
/baselines/bohb/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/machinelearningnuremberg/WellTunedSimpleNets/54058460d5b587bc84107c200e6f1c44755a87e0/baselines/bohb/data/__init__.py


--------------------------------------------------------------------------------
/baselines/bohb/data/loader.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import numpy as np
 4 | 
 5 | from utilities import get_dataset_openml, get_dataset_split
 6 | 
 7 | 
 8 | # Loader class which provides the data splits
 9 | class Loader:
10 | 
11 |     def __init__(
12 |             self,
13 |             task_id: int,
14 |             val_fraction: float = 0.2,
15 |             test_fraction: float = 0.2,
16 |             seed: int = 11,
17 |             apply_one_hot_encoding: bool = False,
18 |             apply_imputation: bool = False,
19 |     ):
20 | 
21 |         # download the dataset
22 |         dataset = get_dataset_openml(task_id)
23 |         # get the splits according to the given fractions and seed,
24 |         # together with the categorical indicator
25 |         self.categorical_information, self.splits = get_dataset_split(
26 |             dataset,
27 |             val_fraction=val_fraction,
28 |             test_fraction=test_fraction,
29 |             seed=seed,
30 |             apply_one_hot_encoding=apply_one_hot_encoding,
31 |             apply_imputation=apply_imputation,
32 |         )
33 |         self.dataset_id = dataset.dataset_id
34 | 
35 |     def get_splits(self) -> Dict[str, np.array]:
36 |         """Return the dataset splits for the different sets.
37 |         """
38 | 
39 |         return self.splits
40 | 
41 |     def get_dataset_id(self) -> int:
42 |         """Return the dataset id.
43 |         """
44 |         return self.dataset_id
45 | 


--------------------------------------------------------------------------------
/baselines/bohb/main_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | logging.basicConfig(level=logging.DEBUG)
  5 | import os
  6 | import pickle
  7 | import random
  8 | import time
  9 | 
 10 | import hpbandster.core.nameserver as hpns
 11 | import hpbandster.core.result as hpres
 12 | from hpbandster.optimizers import BOHB as BOHB
 13 | from hpbandster.optimizers import RandomSearch as RS
 14 | import numpy as np
 15 | import openml
 16 | 
 17 | from worker import CatBoostWorker, XGBoostWorker, TabNetWorker
 18 | 
 19 | 
 20 | parser = argparse.ArgumentParser(
 21 |     description='Baseline experiment.'
 22 | )
 23 | parser.add_argument(
 24 |     '--run_id',
 25 |     type=str,
 26 |     help='The run id of the optimization run.',
 27 |     default='tabular_baseline',
 28 | )
 29 | parser.add_argument(
 30 |     '--working_directory',
 31 |     type=str,
 32 |     help='The working directory where results will be stored.',
 33 |     default='.',
 34 | )
 35 | parser.add_argument(
 36 |     '--nic_name',
 37 |     type=str,
 38 |     help='Which network interface to use for communication.',
 39 |     default='ib0',
 40 | )
 41 | parser.add_argument(
 42 |     '--optimizer',
 43 |     type=str,
 44 |     help='Which optimizer to use for the experiment.',
 45 |     default='bohb',
 46 | )
 47 | parser.add_argument(
 48 |     '--model',
 49 |     type=str,
 50 |     help='Which model to use for the experiment.',
 51 |     default='tabnet',
 52 | )
 53 | parser.add_argument(
 54 |     '--task_id',
 55 |     type=int,
 56 |     help='Task id used for the experiment.',
 57 |     default=233109,
 58 | )
 59 | parser.add_argument(
 60 |     '--seed',
 61 |     type=int,
 62 |     help='Seed used for the experiment.',
 63 |     default=11,
 64 | )
 65 | parser.add_argument(
 66 |     '--max_budget',
 67 |     type=float,
 68 |     help='Maximum budget used during the optimization.',
 69 |     default=1,
 70 | )
 71 | parser.add_argument(
 72 |     '--min_budget',
 73 |     type=float,
 74 |     help='Minimum budget used during the optimization.',
 75 |     default=1,
 76 | )
 77 | parser.add_argument(
 78 |     '--n_iterations',
 79 |     type=int,
 80 |     help='Number of BOHB iterations.',
 81 |     default=10,
 82 | )
 83 | parser.add_argument(
 84 |     '--n_workers',
 85 |     type=int,
 86 |     help='Number of workers to run in parallel.',
 87 |     default=2,
 88 | )
 89 | parser.add_argument(
 90 |     '--nr_threads',
 91 |     type=int,
 92 |     help='Number of threads for one worker.',
 93 |     default=2,
 94 | )
 95 | parser.add_argument(
 96 |     '--worker',
 97 |     help='Flag to turn this into a worker process',
 98 |     action='store_true',
 99 | )
100 | 
101 | args = parser.parse_args()
102 | 
103 | np.random.seed(args.seed)
104 | random.seed(args.seed)
105 | 
106 | host = hpns.nic_name_to_host(args.nic_name)
107 | 
108 | # determine the problem type, if it is binary
109 | # or multiclass classification
110 | task_id = args.task_id
111 | task = openml.tasks.get_task(task_id, download_data=False)
112 | nr_classes = int(openml.datasets.get_dataset(task.dataset_id, download_data=False).qualities['NumberOfClasses'])
113 | 
114 | run_directory = os.path.join(
115 |     args.working_directory,
116 |     f'{args.task_id}',
117 |     f'{args.seed}',
118 | )
119 | os.makedirs(run_directory, exist_ok=True)
120 | 
121 | worker_choices = {
122 |     'tabnet': TabNetWorker,
123 |     'xgboost': XGBoostWorker,
124 |     'catboost': CatBoostWorker,
125 | }
126 | 
127 | model_worker = worker_choices[args.model]
128 | # build the model setting configuration
129 | if args.model == 'tabnet':
130 |     param = model_worker.get_parameters(
131 |         task_id=task_id,
132 |         seed=args.seed,
133 |     )
134 | elif args.model =='xgboost':
135 |     param = model_worker.get_parameters(
136 |         task_id=task_id,
137 |         nr_classes=nr_classes,
138 |         seed=args.seed,
139 |         nr_threads=args.nr_threads,
140 |         output_directory=run_directory,
141 |     )
142 | else:
143 |     param = model_worker.get_parameters(
144 |         task_id=task_id,
145 |         nr_classes=nr_classes,
146 |         seed=args.seed,
147 |         output_directory=run_directory,
148 |     )
149 | 
150 | if args.worker:
151 |     # short artificial delay to make sure the nameserver is already running
152 |     time.sleep(5)
153 |     worker = model_worker(
154 |         run_id=args.run_id,
155 |         host=host,
156 |         param=param,
157 |     )
158 |     while True:
159 |         try:
160 |             worker.load_nameserver_credentials(
161 |                 working_directory=args.working_directory,
162 |             )
163 |             break
164 |         except RuntimeError:
165 |             pass
166 |     worker.run(background=False)
167 |     exit(0)
168 | 
169 | print(f'Experiment started with task id: {args.task_id}')
170 | 
171 | 
172 | NS = hpns.NameServer(
173 |     run_id=args.run_id,
174 |     host=host,
175 |     port=0,
176 |     working_directory=args.working_directory,
177 | )
178 | ns_host, ns_port = NS.start()
179 | 
180 | worker = model_worker(
181 |     run_id=args.run_id,
182 |     host=host,
183 |     param=param,
184 |     nameserver=ns_host,
185 |     nameserver_port=ns_port
186 | )
187 | worker.run(background=True)
188 | result_logger = hpres.json_result_logger(directory=run_directory, overwrite=False)
189 | 
190 | optimizer_choices = {
191 |     'bohb': BOHB,
192 |     'random_search': RS,
193 | }
194 | 
195 | optimizer = optimizer_choices[args.optimizer]
196 | 
197 | # for the moment only available to XGBoost
198 | if args.model == 'xgboost':
199 |     config_space = model_worker.get_default_configspace(
200 |         seed=args.seed,
201 |         early_stopping=True,
202 |         conditional_imputation=False,
203 |     )
204 | else:
205 |     config_space = model_worker.get_default_configspace(seed=args.seed)
206 | 
207 | bohb = optimizer(
208 |     configspace=config_space,
209 |     run_id=args.run_id,
210 |     host=host,
211 |     nameserver=ns_host,
212 |     nameserver_port=ns_port,
213 |     min_budget=args.min_budget,
214 |     max_budget=args.max_budget,
215 |     result_logger=result_logger,
216 | )
217 | 
218 | res = bohb.run(
219 |     n_iterations=args.n_iterations,
220 |     min_n_workers=args.n_workers
221 | )
222 | 
223 | bohb.shutdown(shutdown_workers=True)
224 | NS.shutdown()
225 | 
226 | with open(os.path.join(run_directory, 'results.pkl'), 'wb') as fh:
227 |     pickle.dump(res, fh)
228 | 
229 | id2config = res.get_id2config_mapping()
230 | incumbent = res.get_incumbent_id()
231 | incumbent_runs = res.get_runs_by_id(incumbent)
232 | best_config = id2config[incumbent]['config']
233 | 
234 | # default values to find the config with the
235 | # best performance, so we can pull the best
236 | # iteration number.
237 | val_error_min = 100
238 | best_round = 0
239 | if 'early_stopping_rounds' in best_config:
240 |     for run in incumbent_runs:
241 |         print(run)
242 |         print(run.info)
243 |         if run.loss < val_error_min:
244 |             val_error_min = run.loss
245 |             if 'best_round' in run.info:
246 |                 best_round = run.info['best_round']
247 |     # no need for the early stopping rounds anymore
248 |     del best_config['early_stopping_rounds']
249 |     # train only for the best performance achieved
250 |     # for the 'best_round' iteration
251 |     if args.model == 'tabnet':
252 |         best_config['max_epochs'] = best_round
253 |     else:
254 |         best_config['num_round'] = best_round
255 | 
256 |     print(f'Best round for {args.model} refit: {best_round}')
257 | 
258 | 
259 | all_runs = res.get_all_runs()
260 | print('Best found configuration:', best_config)
261 | print('A total of %i unique configurations where sampled.' % len(id2config.keys()))
262 | print('A total of %i runs where executed.' % len(res.get_all_runs()))
263 | print('Total budget corresponds to %.1f full function evaluations.'
264 |       % (sum([r.budget for r in all_runs])/args.max_budget))
265 | print('Total budget corresponds to %.1f full function evaluations.'
266 |       % (sum([r.budget for r in all_runs])/args.max_budget))
267 | print('The run took  %.1f seconds to complete.'
268 |       % (all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
269 | 
270 | worker = model_worker(
271 |     args.run_id,
272 |     param=param,
273 |     nameserver='127.0.0.1',
274 | )
275 | refit_result = worker.refit(best_config)
276 | with open(os.path.join(run_directory, 'refit_result.json'), 'w') as file:
277 |     json.dump(refit_result, file)
278 | 


--------------------------------------------------------------------------------
/baselines/bohb/refit_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | logging.basicConfig(level=logging.DEBUG)
  5 | import os
  6 | import random
  7 | 
  8 | import hpbandster.core.result as hpres
  9 | import numpy as np
 10 | import openml
 11 | 
 12 | from data.loader import Loader
 13 | from worker import CatBoostWorker, XGBoostWorker, TabNetWorker
 14 | 
 15 | 
 16 | parser = argparse.ArgumentParser(
 17 |     description='Baseline refit experiment.'
 18 | )
 19 | parser.add_argument(
 20 |     '--run_id',
 21 |     type=str,
 22 |     help='The run id of the optimization run.',
 23 |     default='Baseline',
 24 | )
 25 | parser.add_argument(
 26 |     '--working_directory',
 27 |     type=str,
 28 |     help='The working directory where results will be stored.',
 29 |     default='.',
 30 | )
 31 | parser.add_argument(
 32 |     '--model',
 33 |     type=str,
 34 |     help='Which model to use for the experiment.',
 35 |     default='tabnet',
 36 | )
 37 | parser.add_argument(
 38 |     '--task_id',
 39 |     type=int,
 40 |     help='Minimum budget used during the optimization.',
 41 |     default=233109,
 42 | )
 43 | parser.add_argument(
 44 |     '--seed',
 45 |     type=int,
 46 |     help='Seed used for the experiment.',
 47 |     default=11,
 48 | )
 49 | parser.add_argument(
 50 |     '--nr_threads',
 51 |     type=int,
 52 |     help='Number of threads for one worker.',
 53 |     default=2,
 54 | )
 55 | args = parser.parse_args()
 56 | 
 57 | np.random.seed(args.seed)
 58 | random.seed(args.seed)
 59 | 
 60 | task_id = args.task_id
 61 | task = openml.tasks.get_task(task_id, download_data=False)
 62 | nr_classes = int(openml.datasets.get_dataset(task.dataset_id, download_data=False).qualities['NumberOfClasses'])
 63 | 
 64 | run_directory = os.path.join(
 65 |     args.working_directory,
 66 |     f'{args.task_id}',
 67 |     f'{args.seed}',
 68 | )
 69 | os.makedirs(run_directory, exist_ok=True)
 70 | 
 71 | worker_choices = {
 72 |     'tabnet': TabNetWorker,
 73 |     'xgboost': XGBoostWorker,
 74 |     'catboost': CatBoostWorker,
 75 | }
 76 | 
 77 | model_worker = worker_choices[args.model]
 78 | 
 79 | if args.model == 'tabnet':
 80 |     param = model_worker.get_parameters(
 81 |         task_id=args.task_id,
 82 |         seed=args.seed,
 83 |     )
 84 | elif args.model =='xgboost':
 85 |     param = model_worker.get_parameters(
 86 |         task_id=args.task_id,
 87 |         nr_classes=nr_classes,
 88 |         seed=args.seed,
 89 |         nr_threads=args.nr_threads,
 90 |         output_directory=run_directory,
 91 |     )
 92 | else:
 93 |     param = model_worker.get_parameters(
 94 |         task_id=args.task_id,
 95 |         nr_classes=nr_classes,
 96 |         seed=args.seed,
 97 |         output_directory=run_directory,
 98 |     )
 99 | 
100 | print(f'Refit experiment started with task id: {args.task_id}')
101 | 
102 | worker = model_worker(
103 |     args.run_id,
104 |     param=param,
105 |     nameserver='127.0.0.1',
106 | )
107 | 
108 | result = hpres.logged_results_to_HBS_result(run_directory)
109 | all_runs = result.get_all_runs()
110 | id2conf = result.get_id2config_mapping()
111 | 
112 | inc_id = result.get_incumbent_id()
113 | inc_runs = result.get_runs_by_id(inc_id)
114 | inc_config = id2conf[inc_id]['config']
115 | print(f"Best Configuration So far {inc_config}")
116 | 
117 | # default values to find the config with the
118 | # best performance, so we can pull the best
119 | # iteration number.
120 | val_error_min = 100
121 | best_round = 0
122 | if 'early_stopping_rounds' in inc_config:
123 |     for run in inc_runs:
124 |         print(run)
125 |         print(run.info)
126 |         if run.loss < val_error_min:
127 |             val_error_min = run.loss
128 |             if 'best_round' in run.info:
129 |                 best_round = run.info['best_round']
130 |     # no need for the early stopping rounds anymore
131 |     del inc_config['early_stopping_rounds']
132 |     # train only for the best performance achieved
133 |     # for the 'best_round' iteration
134 |     if args.model == 'tabnet':
135 |         inc_config['max_epochs'] = best_round
136 |     else:
137 |         inc_config['num_round'] = best_round
138 | 
139 |     print(f'Best round for {args.model} refit: {best_round}')
140 | 
141 | refit_result = worker.refit(inc_config)
142 | with open(os.path.join(run_directory, 'refit_result.json'), 'w') as file:
143 |     json.dump(refit_result, file)
144 | 


--------------------------------------------------------------------------------
/baselines/bohb/utilities.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | import json
  3 | import os
  4 | from typing import Dict, List, Tuple, Union
  5 | 
  6 | import numpy as np
  7 | import openml
  8 | import pandas as pd
  9 | import scipy
 10 | from scipy.stats import wilcoxon, rankdata
 11 | from sklearn.compose import ColumnTransformer
 12 | from sklearn.impute import SimpleImputer
 13 | from sklearn.model_selection import train_test_split
 14 | from sklearn.pipeline import Pipeline
 15 | from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
 16 | 
 17 | 
 18 | def get_dataset_split(
 19 |     dataset: openml.datasets.OpenMLDataset,
 20 |     val_fraction: float = 0.2,
 21 |     test_fraction: float = 0.2,
 22 |     seed: int = 11,
 23 |     apply_one_hot_encoding: bool = False,
 24 |     apply_imputation: bool = False,
 25 |     model: str = 'xgboost',
 26 | ) -> Tuple[Dict[str, Union[List, np.ndarray]], Dict[str, np.ndarray]]:
 27 |     """Split the dataset into training, test and possibly validation set.
 28 | 
 29 |     Based on the arguments given, splits the datasets into the corresponding
 30 |     sets.
 31 | 
 32 |     Parameters:
 33 |     -----------
 34 |     dataset: openml.datasets.OpenMLDataset
 35 |         The dataset that will be split into the corresponding sets.
 36 |     val_fraction: float
 37 |         The fraction for the size of the validation set from the whole dataset.
 38 |     test_fraction: float
 39 |         The fraction for the size of the test set from the whole dataset.
 40 |     seed: int
 41 |         The seed used for the splitting of the dataset.
 42 |     apply_one_hot_encoding: bool
 43 |         Apply one hot encodings to categorical features of the given dataset.
 44 |     apply_imputation: bool
 45 |         Substitute missing values from the given dataset.
 46 | 
 47 |     Returns:
 48 |     --------
 49 |     (categorical_information, dataset_splits): tuple(np.array, dict)
 50 |         Returns a tuple, where the first arguments provides categorical information
 51 |         about the features. While the second argument, is a dictionary with the splits
 52 |         for the different sets.
 53 |     """
 54 |     X, y, categorical_indicator, _ = dataset.get_data(
 55 |         dataset_format='dataframe',
 56 |         target=dataset.default_target_attribute,
 57 |     )
 58 | 
 59 |     label_encoder = LabelEncoder()
 60 |     empty_features = []
 61 | 
 62 |     # remove nan features from the dataframe
 63 |     nan_columns = X.isna().all()
 64 |     for col_index, col_status in enumerate(nan_columns):
 65 |         if col_status:
 66 |             empty_features.append(col_index)
 67 |     # if there are null categorical columns, remove them
 68 |     # from the categorical column indicator.
 69 |     if len(empty_features) > 0:
 70 |         for feature_index in sorted(empty_features, reverse=True):
 71 |             del categorical_indicator[feature_index]
 72 | 
 73 |     column_names = list(X.columns)
 74 |     # delete empty feature columns.
 75 |     # Normally this would be done by the simple imputer, but
 76 |     # since now it is conditional, we do it ourselves.
 77 |     empty_feature_names = [column_names[feat_index] for feat_index in empty_features]
 78 |     if any(nan_columns):
 79 |         X.drop(labels=empty_feature_names, axis='columns', inplace=True)
 80 | 
 81 |     column_names = list(X.columns)
 82 |     numerical_columns = []
 83 |     categorical_columns = []
 84 | 
 85 |     index = 0
 86 |     categorical_col_indices = []
 87 |     for cat_column_indicator, column_name in zip(categorical_indicator, column_names):
 88 |         if cat_column_indicator:
 89 |             categorical_columns.append(column_name)
 90 |             categorical_col_indices.append(index)
 91 |         else:
 92 |             numerical_columns.append(column_name)
 93 |         index += 1
 94 | 
 95 |     transformers = []
 96 | 
 97 |     if len(numerical_columns) > 0:
 98 |         numeric_transformer = Pipeline(
 99 |             steps=[
100 |                 ('num_imputer', SimpleImputer(strategy='constant')),
101 |                 ('scaler', StandardScaler())
102 |             ]
103 |         )
104 |         transformers.append(('num', numeric_transformer, numerical_columns))
105 | 
106 |     if len(categorical_columns) > 0:
107 |         steps=[
108 |                 ('cat_imputer', SimpleImputer(strategy='constant')),
109 |         ]
110 |         if apply_one_hot_encoding:
111 |             steps.append(('cat_encoding', OneHotEncoder(handle_unknown='ignore')))
112 |         else:
113 |             pass
114 |             # steps.append(('cat_encoding', LabelEncoder()))
115 |         categorical_transformer = Pipeline(
116 |             steps=steps,
117 |         )
118 |         transformers.append(('cat', categorical_transformer, categorical_columns))
119 | 
120 |     preprocessor = ColumnTransformer(
121 |         transformers=transformers,
122 |     )
123 | 
124 |     # label encode the targets
125 |     y = label_encoder.fit_transform(y)
126 | 
127 |     X_train, X_test, y_train, y_test = train_test_split(
128 |         X,
129 |         y,
130 |         test_size=test_fraction,
131 |         random_state=seed,
132 |         stratify=y,
133 |     )
134 | 
135 |     categorical_dimensions = []
136 |     if model == 'tabnet':
137 |         for cat_column in categorical_columns:
138 |             column_unique_values = X[cat_column].nunique()
139 |             # categorical columns with only one unique value
140 |             # do not need an embedding.
141 |             if column_unique_values == 1:
142 |                 continue
143 | 
144 |             categorical_dimensions.append(column_unique_values)
145 | 
146 |     if val_fraction != 0:
147 |         new_val_fraction = val_fraction / (1 - test_fraction)
148 |         X_train, X_val, y_train, y_val = train_test_split(
149 |             X_train,
150 |             y_train,
151 |             test_size=new_val_fraction,
152 |             random_state=seed,
153 |             stratify=y_train,
154 |         )
155 | 
156 |     preprocessor.fit(X_train, y_train)
157 | 
158 |     X_train = preprocessor.transform(X_train)
159 |     X_test = preprocessor.transform(X_test)
160 | 
161 |     dataset_splits = {
162 |         'X_train': X_train,
163 |         'X_test': X_test,
164 |         'y_train': y_train,
165 |         'y_test': y_test,
166 |     }
167 | 
168 |     if val_fraction != 0:
169 |         X_val = preprocessor.transform(X_val)
170 |         dataset_splits['X_val'] = X_val
171 |         dataset_splits['y_val'] = y_val
172 | 
173 |     new_categorical_indicator = []
174 |     new_categorical_indices = []
175 | 
176 |     for i in range(len(column_names)):
177 |         if i < len(numerical_columns):
178 |             categorical_status = False
179 |         else:
180 |             categorical_status = True
181 |             new_categorical_indices.append(i)
182 |         new_categorical_indicator.append(categorical_status)
183 | 
184 |     categorical_information = {
185 |         'categorical_ind': new_categorical_indicator,
186 |         'categorical_columns': new_categorical_indices,
187 |         'categorical_dimensions': categorical_dimensions,
188 |     }
189 | 
190 |     return categorical_information, dataset_splits
191 | 
192 | 
193 | def standardize_data(
194 |     X_train: np.ndarray,
195 |     X_test: np.ndarray,
196 |     is_sparse: bool
197 | ) -> Tuple[np.ndarray, np.ndarray]:
198 |     """Standardize the data.
199 | 
200 |     Compute the mean and std from the train set and
201 |     standardize the data (train and test set).
202 | 
203 |     Parameters:
204 |     -----------
205 |     X_train: np.ndarray
206 |         The dataset examples used for training the model.
207 |     X_test: np.ndarray
208 |         The dataset examples used for testing the model.
209 | 
210 |     Returns:
211 |     --------
212 |     (X_train, X_test): tuple(np.ndarray, np.ndarray)
213 |         Corresponding sets after being standardized.
214 |     """
215 |     # Center data only on not sparse matrices
216 |     center_data = not is_sparse
217 |     scaler = StandardScaler(with_mean=center_data).fit(X_train)
218 |     X_train = scaler.transform(X_train)
219 |     X_test = scaler.transform(X_test)
220 | 
221 |     return X_train, X_test
222 | 
223 | 
224 | def impute_missing_data(
225 |     X: np.ndarray,
226 | ) -> np.ndarray:
227 |     """Impute missing data from the given dataset.
228 | 
229 |     Impute missing data from the given dataset by using
230 |     constant values.
231 | 
232 |     Parameters:
233 |     -----------
234 |     X: np.ndarray
235 |         The dataset examples used for the experiment.
236 | 
237 |     Returns:
238 |     --------
239 |     _: np.ndarray
240 |         Data after imputation.
241 |     """
242 |     # Constant strategy for imputation.
243 |     imputer = SimpleImputer(strategy='constant')
244 | 
245 |     return imputer.fit_transform(X)
246 | 
247 | 
248 | def ohe_the_data(
249 |     X_train: np.ndarray,
250 |     X_test: np.ndarray,
251 | ) -> Tuple[np.ndarray, np.ndarray]:
252 |     """One hot encode the data.
253 | 
254 |     One hot encode the categorical features of the
255 |     given dataset.
256 | 
257 |     Parameters:
258 |     -----------
259 |     X_train: np.ndarray
260 |         The dataset examples used for training the model.
261 |     X_test: np.ndarray
262 |         The dataset examples used for testing the model.
263 | 
264 |     Returns:
265 |     --------
266 |     (X_train, X_test): tuple(np.ndarray, np.ndarray)
267 |         Corresponding sets after being one hot encoded.
268 |     """
269 |     enc = OneHotEncoder(handle_unknown='ignore')
270 |     X = np.concatenate((X_train, X_test), axis=0)
271 |     enc.fit(X)
272 |     X_train = enc.transform(X_train)
273 |     X_test = enc.transform(X_test)
274 | 
275 |     return X_train, X_test
276 | 
277 | 
278 | def get_dataset_openml(
279 |         task_id: int = 11,
280 | ) -> openml.datasets.OpenMLDataset:
281 |     """Download a dataset from OpenML
282 | 
283 |     Based on a given task id, download the task and retrieve
284 |     the dataset that belongs to the corresponding task.
285 | 
286 |     Parameters:
287 |     -----------
288 |     task_id: int
289 |         The task id that represents the task for which the dataset will be downloaded.
290 | 
291 |     Returns:
292 |     --------
293 |     dataset: openml.datasets.OpenMLDataset
294 |         The OpenML dataset that is requested..
295 |     """
296 |     task = openml.tasks.get_task(task_id)
297 |     dataset = task.get_dataset()
298 | 
299 |     return dataset
300 | 
301 | 
302 | def check_leak_status(splits):
303 |     """Check the leak status.
304 | 
305 |     This function goes through the different splits of the dataset
306 |     and checks if there is a leak between the different sets.
307 | 
308 |     Parameters:
309 |     -----------
310 |     splits: dict
311 |         A dictionary that contains the different sets train, test (possibly validation)
312 |         of the whole dataset.
313 | 
314 |     Returns:
315 |     --------
316 |     None - Does not return anything, only raises an error if there is a leak.
317 |     """
318 |     X_train = splits['X_train']
319 |     X_valid = splits['X_val']
320 |     X_test = splits['X_test']
321 | 
322 |     for train_example in X_train:
323 |         for valid_example in X_valid:
324 |             if np.array_equal(train_example, valid_example):
325 |                 raise AssertionError('Leak between the training and validation set')
326 |         for test_example in X_test:
327 |             if np.array_equal(train_example, test_example):
328 |                 raise AssertionError('Leak between the training and test set')
329 |     for valid_example in X_valid:
330 |         for test_example in X_test:
331 |             if np.array_equal(valid_example, test_example):
332 |                 raise AssertionError('Leak between the validation and test set')
333 | 
334 |     print('Leak check passed')
335 | 
336 | 
337 | def check_split_stratification(splits):
338 |     """Check the split stratification and the shape of the examples and labels
339 |     for the different sets.
340 | 
341 |     This function goes through the different splits of the dataset
342 |     and checks if there is stratification. In this example, if there
343 |     is nearly the same number of examples for each class in the corresponding
344 |     splits. The function also verifies that the shape of the examples and
345 |     labels is the same for the different splits.
346 | 
347 |     Parameters:
348 |     -----------
349 |     splits: dict
350 |         A dictionary that contains the different sets train, test (possibly validation)
351 |         of the whole dataset.
352 |     """
353 |     X_train = splits['X_train']
354 |     X_val = splits['X_val']
355 |     X_test = splits['X_test']
356 |     y_train = splits['y_train']
357 |     y_val = splits['y_val']
358 |     y_test = splits['y_test']
359 |     train_occurences = Counter(y_train)
360 |     val_occurences = Counter(y_val)
361 |     test_occurences = Counter(y_test)
362 | 
363 |     print(train_occurences)
364 |     print(val_occurences)
365 |     print(test_occurences)
366 |     print(X_train.shape)
367 |     print(X_val.shape)
368 |     print(X_test.shape)
369 |     assert X_train.shape[0] == y_train.shape[0]
370 |     assert X_val.shape[0] == y_val.shape[0]
371 |     assert X_test.shape[0] == y_test.shape[0]
372 | 
373 | 
374 | def get_task_list(
375 |     benchmark_task_file: str = 'path/to/tasks.txt',
376 | ) -> List[int]:
377 |     """Get the task id list.
378 | 
379 |     Goes through the given file and collects all of the task
380 |     ids.
381 | 
382 |     Parameters:
383 |     -----------
384 |     benchmark_task_file: str
385 |         A string to the path of the benchmark task file. Including
386 |         the task file name.
387 | 
388 |     Returns:
389 |     --------
390 |     benchmark_task_ids - list
391 |         A list of all the task ids for the benchmark.
392 |     """
393 |     with open(os.path.join(benchmark_task_file), 'r') as f:
394 |         benchmark_info_str = f.readline()
395 |         benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')]
396 | 
397 |     return benchmark_task_ids
398 | 
399 | 
400 | def status_exp_tasks(
401 |         working_directory: str,
402 |         seed: int = 11,
403 |         model_name: str = 'xgboost',
404 | ):
405 |     """Analyze the different tasks of the experiment.
406 | 
407 |     Goes through the results in the directory given and
408 |     it analyzes which one finished succesfully and which one
409 |     did not.
410 | 
411 |     Parameters:
412 |     -----------
413 |     working_directory: str
414 |         The directory where the results are located.
415 |     seed: int
416 |         The seed that was used for the experiment.
417 |     model_name: int
418 |         The name of the model that was used.
419 |     """
420 |     not_finished = 0
421 |     finished = 0
422 |     benchmark_task_file = 'benchmark_datasets.txt'
423 |     benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file)
424 |     result_directory = os.path.join(working_directory, model_name)
425 |     task_ids = get_task_list(benchmark_task_file_path)
426 |     for task_id in task_ids:
427 |         task_result_directory = os.path.join(result_directory, f'{task_id}', f'{seed}')
428 |         print(task_result_directory)
429 |         try:
430 |             with open(os.path.join(task_result_directory, 'refit_result.json'), 'r') as file:
431 |                 # do nothing with the result for now
432 |                 _ = json.load(file)
433 |                 print(f'Task {task_id} finished.')
434 |                 finished += 1
435 |                 # TODO do something with the result
436 |         except FileNotFoundError:
437 |             print(f'Task {task_id} not finished.')
438 |             not_finished += 1
439 |     print(f'Finished tasks: {finished} , not finished tasks: {not_finished}')
440 | 
441 | 
442 | def read_baseline_values(
443 |         working_directory: str,
444 |         seed: int = 11,
445 |         model_name: str = 'xgboost',
446 | ) -> Dict[int, float]:
447 |     """Prepares the results of the experiment with the baselines.
448 | 
449 |     Goes through the results at the given directory and it generates a
450 |     dictionary for the baseline with the performances on every task
451 |     of the benchmark.
452 | 
453 |     Parameters:
454 |     -----------
455 |     working_directory: str
456 |         The directory where the results are located.
457 |     seed: int
458 |         The seed that was used for the experiment.
459 |     model_name: int
460 |         The name of the model that was used.
461 | 
462 |     Returns:
463 |     --------
464 |     baseline_results - dict
465 |         A dictionary with the results of the baseline algorithm.
466 |         Each key of the dictionary represents a task id, while,
467 |         each value corresponds to the performance of the algorithm.
468 |     """
469 |     baseline_results = {}
470 |     benchmark_task_file = 'benchmark_datasets.txt'
471 |     benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file)
472 |     result_directory = os.path.join(working_directory, model_name)
473 |     task_ids = get_task_list(benchmark_task_file_path)
474 |     for task_id in task_ids:
475 |         task_result_directory = os.path.join(result_directory, f'{task_id}', f'{seed}')
476 |         try:
477 |             with open(os.path.join(task_result_directory, 'refit_result.json'), 'r') as file:
478 |                 task_result = json.load(file)
479 |             baseline_results[task_id] = task_result['test_accuracy']
480 |         except FileNotFoundError:
481 |             print(f'Task {task_id} not finished.')
482 |             baseline_results[task_id] = None
483 | 
484 |     return baseline_results
485 | 
486 | 
487 | def read_autosklearn_values(
488 |         working_directory,
489 |         seed=11,
490 |         model_name='autosklearn'
491 | ) -> Dict[int, float]:
492 |     """Prepares the results of the experiment with auto-sklearn.
493 | 
494 |     Goes through the results at the given directory and it generates a
495 |     dictionary for autosklearn with the performances on every task
496 |     of the benchmark.
497 | 
498 |     Parameters:
499 |     -----------
500 |     working_directory: str
501 |         The directory where the results are located.
502 |     seed: int
503 |         The seed that was used for the experiment.
504 |     model_name: int
505 |         The name of the model that was used.
506 | 
507 |     Returns:
508 |     --------
509 |     autosklearn_results - dict
510 |         A dictionary with the results of the autosklearn algorithm.
511 |         Each key of the dictionary represents a task id, while,
512 |         each value corresponds to the performance of the algorithm.
513 |     """
514 |     autosklearn_results = {}
515 |     benchmark_task_file = 'benchmark_datasets.txt'
516 |     benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file)
517 |     result_directory = os.path.join(working_directory, model_name)
518 |     task_ids = get_task_list(benchmark_task_file_path)
519 |     for task_id in task_ids:
520 |         task_result_directory = os.path.join(result_directory, f'{seed}', f'{task_id}', 'results')
521 |         try:
522 |             with open(os.path.join(task_result_directory, 'performance.txt'), 'r') as baseline_file:
523 |                 baseline_test_acc = float(baseline_file.readline())
524 |                 autosklearn_results[task_id] = baseline_test_acc
525 |         except FileNotFoundError:
526 |             print(f'Task {task_id} not finished.')
527 |             autosklearn_results[task_id] = None
528 |             continue
529 | 
530 |     return autosklearn_results
531 | 
532 | 
533 | def read_cocktail_values(
534 |         cocktail_result_dir: str,
535 |         benchmark_task_file_dir: str,
536 |         seed: int = 11,
537 |         cocktail_version: str = 'cocktail',
538 | ) -> Dict[int, float]:
539 |     """Prepares the results of the experiment with the regularization
540 |     cocktail.
541 | 
542 |     Goes through the results at the given directory and it generates a
543 |     dictionary for the regularization cocktails with the performances
544 |     on every task of the benchmark.
545 | 
546 |     Parameters:
547 |     -----------
548 |     cocktail_result_dir: str
549 |         The directory where the results are located for the regularization
550 |         cocktails.
551 |     benchmark_task_file_dir: str
552 |         The directory where the benchmark task file is located.
553 |         The file contains all the task ids. The file name is
554 |         not needed to be given.
555 |     seed: int
556 |         The seed that was used for the experiment.
557 | 
558 |     Returns:
559 |     --------
560 |     cocktail_results - dict
561 |         A dictionary with the results of the regularization cocktail method.
562 |         Each key of the dictionary represents a task id, while,
563 |         each value corresponds to the performance of the algorithm.
564 |     """
565 |     cocktail_results = {}
566 | 
567 |     result_path = os.path.join(
568 |         cocktail_result_dir,
569 |         cocktail_version,
570 |         '512',
571 |     )
572 | 
573 |     benchmark_task_file = 'benchmark_datasets.txt'
574 |     benchmark_task_file_path = os.path.join(
575 |         benchmark_task_file_dir,
576 |         benchmark_task_file
577 |     )
578 | 
579 |     task_ids = get_task_list(benchmark_task_file_path)
580 | 
581 |     for task_id in task_ids:
582 |         task_result_path = os.path.join(
583 |             result_path,
584 |             f'{task_id}',
585 |             'refit_run',
586 |             f'{seed}',
587 |         )
588 | 
589 |         if os.path.exists(task_result_path):
590 |             if not os.path.isdir(task_result_path):
591 |                 task_result_path = os.path.join(
592 |                     result_path,
593 |                     f'{task_id}',
594 |                 )
595 |         else:
596 |             task_result_path = os.path.join(
597 |                 result_path,
598 |                 f'{task_id}',
599 |             )
600 | 
601 |         try:
602 |             with open(os.path.join(task_result_path, 'run_results.txt')) as f:
603 |                 test_results = json.load(f)
604 |             cocktail_results[task_id] = test_results['mean_test_bal_acc']
605 |         except FileNotFoundError:
606 |             cocktail_results[task_id] = None
607 | 
608 |     return cocktail_results
609 | 
610 | 
611 | def compare_models(
612 |         baseline_dir: str,
613 |         cocktail_dir: str,
614 | ) -> pd.DataFrame:
615 |     """Prepares the results of the experiments with all methods.
616 | 
617 |     Goes through the results at the given directories and builds
618 |     a table with all the methods over the different tasks.
619 | 
620 |     Parameters:
621 |     -----------
622 |     baseline_dir: str
623 |         The directory where the results are located for the baseline
624 |         methods.
625 |     cocktail_dir: str
626 |         The directory where the results are located for the regularization
627 |         cocktails.
628 | 
629 |     Returns:
630 |     --------
631 |     comparison_table - pd.DataFrame
632 |         A DataFrame with the results for all methods over the different tasks.
633 |     """
634 |     xgboost_results = read_baseline_values(baseline_dir, model_name='xgboost')
635 |     tabnet_results = read_baseline_values(baseline_dir, model_name='tabnet')
636 |     cocktail_results = read_cocktail_values(cocktail_dir, baseline_dir, cocktail_version='plain_network')
637 |     autosklearn_results = read_autosklearn_values(cocktail_dir)
638 | 
639 |     table_dict = {
640 |         'Task Id': [],
641 |         'Tabnet': [],
642 |         'XGBoost': [],
643 |         'AutoSklearn': [],
644 |         'Cocktail': [],
645 |     }
646 | 
647 |     cocktail_wins = 0
648 |     cocktail_losses = 0
649 |     cocktail_ties = 0
650 |     autosklearn_looses = 0
651 |     autosklearn_ties = 0
652 |     autosklearn_wins = 0
653 |     cocktail_performances = []
654 |     xgboost_performances = []
655 |     autosklearn_performances = []
656 |     print(cocktail_results)
657 |     print(xgboost_results)
658 | 
659 |     for task_id in xgboost_results:
660 |         xgboost_task_result = xgboost_results[task_id]
661 |         if xgboost_task_result is None:
662 |             continue
663 |         tabnet_task_result = tabnet_results[task_id]
664 |         cocktail_task_result = cocktail_results[task_id]
665 |         autosklearn_task_result = autosklearn_results[task_id]
666 |         cocktail_performances.append(cocktail_task_result)
667 |         xgboost_performances.append(xgboost_task_result)
668 |         autosklearn_performances.append(autosklearn_task_result)
669 |         if cocktail_task_result > xgboost_task_result:
670 |             cocktail_wins += 1
671 |         elif cocktail_task_result < xgboost_task_result:
672 |             cocktail_losses += 1
673 |         else:
674 |             cocktail_ties += 1
675 |         if autosklearn_task_result > xgboost_task_result:
676 |             autosklearn_wins += 1
677 |         elif autosklearn_task_result < xgboost_task_result:
678 |             autosklearn_looses += 1
679 |         else:
680 |             autosklearn_ties += 1
681 |         table_dict['Task Id'].append(task_id)
682 |         if tabnet_task_result is not None:
683 |             table_dict['Tabnet'].append(tabnet_task_result)
684 |         else:
685 |             table_dict['Tabnet'].append(tabnet_task_result)
686 |         table_dict['XGBoost'].append(xgboost_task_result)
687 |         table_dict['Cocktail'].append(cocktail_task_result)
688 |         table_dict['AutoSklearn'].append(autosklearn_task_result)
689 | 
690 |     comparison_table = pd.DataFrame.from_dict(table_dict)
691 |     print(
692 |         comparison_table.to_latex(
693 |             index=False,
694 |             caption='The performances of the Regularization Cocktail '
695 |                     'and the state-of-the-art competitors '
696 |                     'over the different datasets.',
697 |             label='app:cocktail_vs_benchmarks_table',
698 |         )
699 |     )
700 |     comparison_table.to_csv(os.path.join(baseline_dir, 'table_comparison.csv'), index=False)
701 |     _, p_value = wilcoxon(cocktail_performances, xgboost_performances)
702 |     print(f'Cocktail wins: {cocktail_wins}, ties: {cocktail_ties}, looses: {cocktail_losses} against XGBoost')
703 |     print(f'P-value: {p_value}')
704 |     _, p_value = wilcoxon(xgboost_performances, autosklearn_performances)
705 |     print(f'Xgboost vs AutoSklearn, P-value: {p_value}')
706 |     print(f'AutoSklearn wins: {autosklearn_wins}, '
707 |           f'ties: {autosklearn_ties}, '
708 |           f'looses: {autosklearn_looses} against XGBoost')
709 | 
710 |     return comparison_table
711 | 
712 | 
713 | def build_cd_diagram(
714 |     baseline_dir: str,
715 |     cocktail_dir: str,
716 | ) -> pd.DataFrame:
717 |     """Prepare the results for a critical difference diagram.
718 | 
719 |     This function prepares all the results into a pandas dataframe
720 |     so that it can be used to create a critical difference diagram
721 |     of all the methods.
722 | 
723 |     Parameters:
724 |     -----------
725 |     baseline_dir: str
726 |         The directory where the results are located for the baseline
727 |         methods.
728 |     cocktail_dir: str
729 |         The directory where the results are located for the regularization
730 |         cocktails.
731 | 
732 |     Returns:
733 |     --------
734 |         result_df: pd.DataFrame
735 |             A table with the accuracies of all methods over the different tasks.
736 |             The results are prepared in such a way that a critical difference
737 |             diagram can be generated from the pandas dataframe.
738 |     """
739 |     xgboost_results = read_baseline_values(baseline_dir, model_name='xgboost')
740 |     tabnet_results = read_baseline_values(baseline_dir, model_name='tabnet')
741 |     cocktail_results = read_cocktail_values(cocktail_dir, baseline_dir)
742 |     autosklearn_results = read_autosklearn_values(cocktail_dir)
743 | 
744 |     models = ['Regularization Cocktail', 'XGBoost', 'AutoSklearn-GB', 'TabNet']
745 |     table_results = {
746 |         'Network': [],
747 |         'Task Id': [],
748 |         'Balanced Accuracy': [],
749 |     }
750 |     for task_id in cocktail_results:
751 |         for model_name in models:
752 |             try:
753 |                 if model_name == 'Regularization Cocktail':
754 |                     task_result = cocktail_results[task_id]
755 |                 elif model_name == 'XGBoost':
756 |                     task_result = xgboost_results[task_id]
757 |                 elif model_name == 'TabNet':
758 |                     task_result = tabnet_results[task_id]
759 |                 elif model_name == 'AutoSklearn-GB':
760 |                     task_result = autosklearn_results[task_id]
761 |                 else:
762 |                     raise ValueError("Illegal model value")
763 |             except FileNotFoundError:
764 |                 task_result = 0
765 |                 print(f'No results for task: {task_id} for model: {model_name}')
766 | 
767 |             table_results['Network'].append(model_name)
768 |             table_results['Task Id'].append(task_id)
769 |             table_results['Balanced Accuracy'].append(task_result)
770 | 
771 |     result_df = pd.DataFrame(data=table_results)
772 |     result_df.to_csv(os.path.join(baseline_dir, f'cd_data.csv'), index=False)
773 | 
774 |     return result_df
775 | 
776 | 
777 | def generate_ranks_data(
778 |     all_data: pd.DataFrame,
779 | ):
780 |     """
781 |     Parameters
782 |     ----------
783 |     all_data: pd.DataFrame
784 |         A dataframe where each row consists of
785 |         tasks values across different models.
786 | 
787 |     Returns
788 |     -------
789 |     ranks_df: pd.DataFrame
790 |         A dataframe of the ranks of all methods over
791 |         the different tasks.
792 |     """
793 |     all_ranked_data = []
794 |     all_data.drop(columns=['Task Id'], inplace=True)
795 |     column_names = all_data.columns
796 | 
797 |     for row in all_data.itertuples(index=False):
798 |         task_regularization_data = list(row)
799 |         task_ranked_data = rankdata(
800 |             task_regularization_data,
801 |             method='dense',
802 |         )
803 | 
804 |         reversed_data = len(task_ranked_data) + 1 - task_ranked_data.astype(int)
805 |         """for i, column_name in enumerate(column_names):
806 |             all_ranked_data.append([column_name, task_ranked_data[i]])
807 |         """
808 |         all_ranked_data.append(reversed_data)
809 |     ranks_df = pd.DataFrame(all_ranked_data, columns=column_names)
810 | 
811 |     return ranks_df
812 | 
813 | 
814 | def compare_cocktail_versions(
815 |     cocktail_result_folder: str,
816 |     benchmark_file_path: str
817 | ) -> pd.DataFrame:
818 |     """Prepares the results of the experiments with the different
819 |     cocktail versions.
820 | 
821 |     Goes through the results at the given directories and builds
822 |     a table with the different cocktail versions over the different
823 |     tasks.
824 | 
825 |     Parameters:
826 |     -----------
827 |     cocktail_result_folder: str
828 |         The folder directory where the results are located for the
829 |         regularization cocktails.
830 |     benchmark_file_path: str
831 |         The directory where the benchmark task file is located.
832 |         The file contains all the task ids. The file name is
833 |         not needed to be given.
834 | 
835 |     Returns:
836 |     --------
837 |     comparison_table - pd.DataFrame
838 |         A DataFrame with the results for all methods over the different tasks.
839 |     """
840 |     fixed_cocktail_results = read_cocktail_values(
841 |         cocktail_dir,
842 |         benchmark_file_path,
843 |         cocktail_version='cocktail',
844 |     )
845 |     dynamic_cocktail_results = read_cocktail_values(
846 |         cocktail_dir,
847 |         benchmark_file_path,
848 |         cocktail_version='cocktail_lr',
849 |     )
850 | 
851 |     table_dict = {
852 |         'Task Id': [],
853 |         'Fixed Lr Cocktail': [],
854 |         'Dynamic Lr Cocktail': [],
855 |     }
856 | 
857 |     cocktail_fixed_wins = 0
858 |     cocktail_fixed_losses = 0
859 |     cocktail_fixed_ties = 0
860 |     fixed_cocktail_performances = []
861 |     dynamic_cocktail_performances = []
862 | 
863 |     for task_id in fixed_cocktail_results:
864 | 
865 |         fixed_cocktail_task_result = fixed_cocktail_results[task_id]
866 |         dynamic_cocktail_task_result = dynamic_cocktail_results[task_id]
867 | 
868 |         fixed_cocktail_performances.append(fixed_cocktail_task_result)
869 |         dynamic_cocktail_performances.append(dynamic_cocktail_task_result)
870 | 
871 |         if fixed_cocktail_task_result > dynamic_cocktail_task_result:
872 |             cocktail_fixed_wins += 1
873 |         elif fixed_cocktail_task_result < dynamic_cocktail_task_result:
874 |             cocktail_fixed_losses += 1
875 |         else:
876 |             cocktail_fixed_ties += 1
877 | 
878 |         table_dict['Task Id'].append(task_id)
879 |         table_dict['Fixed Lr Cocktail'].append(f'{fixed_cocktail_task_result * 100:.3f}')
880 |         table_dict['Dynamic Lr Cocktail'].append(f'{dynamic_cocktail_task_result * 100:.3f}')
881 | 
882 | 
883 |     comparison_table = pd.DataFrame.from_dict(table_dict)
884 |     print(
885 |         comparison_table.to_latex(
886 |             index=False,
887 |             caption='The performances of the Regularization Cocktail '
888 |                     'and the state-of-the-art competitors '
889 |                     'over the different datasets.',
890 |             label='app:cocktail_vs_benchmarks_table',
891 |         )
892 |     )
893 | 
894 |     _, p_value = wilcoxon(fixed_cocktail_performances, dynamic_cocktail_performances)
895 |     print(f'Fixed Lr Cocktail wins: {cocktail_fixed_wins}, '
896 |           f'ties: {cocktail_fixed_ties}, '
897 |           f'looses: {cocktail_fixed_losses} against Dynamic Lr Cocktail')
898 |     print(f'P-value: {p_value}')
899 | 
900 |     return comparison_table
901 | 
902 | 
903 | xgboost_dir = os.path.expanduser(
904 |     os.path.join(
905 |         '~',
906 |         'Desktop',
907 |         'xgboost_results',
908 |     )
909 | )
910 | 
911 | 
912 | cocktail_dir = os.path.expanduser(
913 |     os.path.join(
914 |         '~',
915 |         'Desktop',
916 |         'PhD',
917 |         'Rezultate',
918 |         'RegularizationCocktail',
919 |         'NEMO',
920 |     )
921 | )
922 | """
923 | compare_models(
924 |     xgboost_dir,
925 |     cocktail_dir
926 | )
927 | compare_cocktail_versions(
928 |     cocktail_dir,
929 |     xgboost_dir,
930 | )"""
931 | 


--------------------------------------------------------------------------------
/baselines/node/node_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import time
  5 | from typing import List
  6 | 
  7 | import numpy as np
  8 | 
  9 | import openml
 10 | 
 11 | from category_encoders import LeaveOneOutEncoder
 12 | 
 13 | from qhoptim.pyt import QHAdam
 14 | 
 15 | from sklearn.model_selection import ParameterGrid
 16 | from sklearn.model_selection import train_test_split
 17 | from sklearn.preprocessing import LabelEncoder
 18 | from sklearn.metrics import balanced_accuracy_score
 19 | 
 20 | import torch, torch.nn as nn
 21 | import torch.nn.functional as F
 22 | 
 23 | import lib
 24 | from lib.utils import check_numpy, process_in_chunks
 25 | 
 26 | 
 27 | def get_task_list(
 28 |     benchmark_task_file: str = 'path/to/tasks.txt',
 29 | ) -> List[int]:
 30 |     """Get the task id list.
 31 |     Goes through the given file and collects all of the task
 32 |     ids.
 33 |     Parameters:
 34 |     -----------
 35 |     benchmark_task_file: str
 36 |         A string to the path of the benchmark task file. Including
 37 |         the task file name.
 38 |     Returns:
 39 |     --------
 40 |     benchmark_task_ids - list
 41 |         A list of all the task ids for the benchmark.
 42 |     """
 43 |     with open(os.path.join(benchmark_task_file), 'r') as f:
 44 |         benchmark_info_str = f.readline()
 45 |         benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')]
 46 | 
 47 |     return benchmark_task_ids
 48 | 
 49 | 
 50 | def get_data(
 51 |     task_id: int,
 52 |     test_size: float = 0.2,
 53 |     validation_size: float = 0.25,
 54 |     seed: int = 11,
 55 | ):
 56 |     task = openml.tasks.get_task(task_id=task_id)
 57 |     dataset = task.get_dataset()
 58 |     X, y, categorical_indicator, _ = dataset.get_data(
 59 |         dataset_format='dataframe',
 60 |         target=dataset.default_target_attribute,
 61 |     )
 62 |     label_encoder = LabelEncoder()
 63 |     y = label_encoder.fit_transform(y)
 64 |     X_train, X_test, y_train, y_test = train_test_split(
 65 |         X,
 66 |         y,
 67 |         test_size=test_size,
 68 |         random_state=seed,
 69 |         stratify=y,
 70 |     )
 71 |     if validation_size != 0:
 72 |         X_train, X_val, y_train, y_val = train_test_split(
 73 |             X_train,
 74 |             y_train,
 75 |             test_size=validation_size,
 76 |             random_state=seed,
 77 |             stratify=y_train,
 78 |         )
 79 |     else:
 80 |         X_val = None
 81 |         y_val = None
 82 | 
 83 |     # the code below drops columns that are
 84 |     # completely null in the train set, however, are not null in the validation
 85 |     # and test set.
 86 |     train_column_nan_info = X_train.isna().all()
 87 |     only_nan_columns = [label for label, value in train_column_nan_info.items() if value]
 88 |     only_nan_columns = set(only_nan_columns)
 89 |     X_train.drop(only_nan_columns, axis='columns', inplace=True)
 90 |     X_test.drop(only_nan_columns, axis='columns', inplace=True)
 91 | 
 92 |     if validation_size != 0:
 93 |         X_val.drop(only_nan_columns, axis='columns', inplace=True)
 94 | 
 95 |     cat_encoder = LeaveOneOutEncoder()
 96 |     column_names = X_train.columns.to_numpy()
 97 |     categorical_column_names = [column_name for column_indicator, column_name in zip(categorical_indicator, column_names) if column_indicator]
 98 | 
 99 |     cat_encoder.fit(X_train[categorical_column_names], y_train)
100 |     X_train[categorical_column_names] = cat_encoder.transform(X_train[categorical_column_names])
101 |     if validation_size != 0:
102 |         X_val[categorical_column_names] = cat_encoder.transform(X_val[categorical_column_names])
103 |         X_val = X_val.values.astype('float32')
104 | 
105 |     X_test[categorical_column_names] = cat_encoder.transform(X_test[categorical_column_names])
106 |     X_train = X_train.values.astype('float32')
107 |     X_test = X_test.values.astype('float32')
108 | 
109 |     dataset_name = dataset.name
110 | 
111 |     return {
112 |         'X_train': X_train,
113 |         'y_train': y_train,
114 |         'X_val': X_val,
115 |         'y_val': y_val,
116 |         'X_test': X_test,
117 |         'y_test': y_test,
118 |         'name': dataset_name,
119 |     }
120 | 
121 | 
122 | def get_node_dataset(
123 |     task_id: int,
124 |     test_size: float = 0.2,
125 |     validation_size: float = 0.25,
126 |     seed: int = 11,
127 |     refit=False,
128 | ):
129 |     if not refit:
130 |         data_splits = get_data(
131 |             task_id,
132 |             seed=seed,
133 |             test_size=test_size,
134 |             validation_size=validation_size,
135 |         )
136 | 
137 |     else:
138 |         data_splits = get_data(
139 |             task_id,
140 |             seed=seed,
141 |             test_size=test_size,
142 |             validation_size=0,
143 |         )
144 | 
145 |     node_dataset = lib.Dataset(
146 |         dataset=data_splits['name'],
147 |         random_state=seed,
148 |         quantile_transform=True,
149 |         quantile_noise=1e-3,
150 |         X_train=data_splits['X_train'],
151 |         X_valid=data_splits['X_val'],
152 |         X_test=data_splits['X_test'],
153 |         y_train=data_splits['y_train'],
154 |         y_valid=data_splits['y_val'],
155 |         y_test=data_splits['y_test'],
156 |     )
157 | 
158 |     return node_dataset
159 | 
160 | 
161 | def evaluate_balanced_classification_error(
162 |     trainer,
163 |     X_test,
164 |     y_test,
165 |     device,
166 |     batch_size=128,
167 | ):
168 |     X_test = torch.as_tensor(X_test, device=device)
169 |     y_test = check_numpy(y_test)
170 |     trainer.train(False)
171 |     with torch.no_grad():
172 |         logits = process_in_chunks(trainer.model, X_test, batch_size=batch_size)
173 |         logits = check_numpy(logits)
174 |         y_pred = np.argmax(logits, axis=1)
175 | 
176 |         error_rate = 1 - balanced_accuracy_score(y_test, y_pred)
177 | 
178 |     return error_rate
179 | 
180 | 
181 | def evaluate_node(
182 |     data,
183 |     config,
184 |     device,
185 |     experiment_name,
186 |     epochs=105,
187 |     batch_size=128,
188 |     refit=False,
189 | ):
190 |     config_start_time = time.time()
191 |     num_examples = data.X_train.shape[0]
192 |     num_features = data.X_train.shape[1]
193 |     num_classes = len(set(data.y_train))
194 | 
195 |     model = nn.Sequential(
196 |         lib.DenseBlock(
197 |             num_features,
198 |             layer_dim=config['total_tree_count'],
199 |             num_layers=config['num_layers'],
200 |             tree_dim=num_classes + 1,
201 |             flatten_output=False,
202 |             depth=config['tree_depth'],
203 |             choice_function=lib.entmax15,
204 |             bin_function=lib.entmoid15,
205 |         ),
206 |         lib.Lambda(lambda x: x[..., :num_classes].mean(dim=-2)),
207 |     ).to(device)
208 | 
209 |     with torch.no_grad():
210 |         res = model(torch.as_tensor(data.X_train[:batch_size], device=device))
211 |         # trigger data-aware init
212 | 
213 |     if torch.cuda.device_count() > 1:
214 |         model = nn.DataParallel(model)
215 | 
216 |     trainer = lib.Trainer(
217 |         model=model,
218 |         loss_function=F.cross_entropy,
219 |         experiment_name=experiment_name,
220 |         warm_start=False,
221 |         Optimizer=QHAdam,
222 |         optimizer_params=dict(nus=(0.7, 1.0), betas=(0.95, 0.998)),
223 |         verbose=True,
224 |         n_last_checkpoints=5
225 |     )
226 | 
227 |     loss_history, err_history = [], []
228 |     best_val_err = 1.0
229 |     best_step = 0
230 | 
231 |     # calculate the number of early stopping rounds to
232 |     # be around 10 epochs. Allow incomplete batches.
233 |     number_batches_epoch = int(np.ceil(num_examples / batch_size))
234 |     early_stopping_rounds = 10 * number_batches_epoch
235 |     report_frequency = number_batches_epoch
236 |     print(early_stopping_rounds)
237 |     # Flag if early stopping is hit or not
238 |     early_stopping_activated = False
239 | 
240 |     for batch in lib.iterate_minibatches(
241 |             data.X_train,
242 |             data.y_train,
243 |             batch_size=batch_size,
244 |             shuffle=True,
245 |             epochs=epochs,
246 |     ):
247 |         metrics = trainer.train_on_batch(
248 |             *batch,
249 |             device=device,
250 |         )
251 | 
252 |         loss_history.append(metrics['loss'].item())
253 | 
254 |         # calculate the information below on every epoch
255 |         if trainer.step % report_frequency == 0:
256 |             train_err = evaluate_balanced_classification_error(
257 |                 trainer,
258 |                 data.X_train,
259 |                 data.y_train,
260 |                 device=device,
261 |                 batch_size=batch_size,
262 |             )
263 |             if not refit:
264 |                 val_err = evaluate_balanced_classification_error(
265 |                     trainer,
266 |                     data.X_valid,
267 |                     data.y_valid,
268 |                     device=device,
269 |                     batch_size=batch_size,
270 |                 )
271 |                 err_history.append(val_err)
272 |                 print("Val Error Rate: %0.5f" % (val_err))
273 | 
274 |                 if val_err < best_val_err:
275 |                     best_val_err = val_err
276 |                     best_step = trainer.step
277 |                     trainer.save_checkpoint(tag='best')
278 | 
279 |             print("Loss %.5f" % (metrics['loss']))
280 |             print("Train Error Rate: %0.5f" % (train_err))
281 | 
282 |         if not refit:
283 |             if trainer.step > best_step + early_stopping_rounds:
284 |                 print('BREAK. There is no improvement for {} steps'.format(early_stopping_rounds))
285 |                 print("Best step: ", best_step)
286 |                 print("Best Val Error Rate: %0.5f" % (best_val_err))
287 |                 early_stopping_activated = True
288 |                 break
289 | 
290 |     config_duration = time.time() - config_start_time
291 | 
292 |     if early_stopping_activated:
293 |         best_epoch = int(best_step / report_frequency)
294 |     else:
295 |         best_epoch = int(trainer.step / report_frequency)
296 |         # save the model in the end
297 |         trainer.save_checkpoint(tag='best')
298 | 
299 |     # we will always have a best checkpoint, be it
300 |     # from early stopping, be it from the normal training.
301 |     trainer.load_checkpoint(tag='best')
302 |     train_error_rate = evaluate_balanced_classification_error(
303 |         trainer,
304 |         data.X_train,
305 |         data.y_train,
306 |         device=device,
307 |         batch_size=batch_size,
308 |     )
309 |     if not refit:
310 |         val_error_rate = evaluate_balanced_classification_error(
311 |             trainer,
312 |             data.X_valid,
313 |             data.y_valid,
314 |             device=device,
315 |             batch_size=batch_size,
316 |         )
317 |     else:
318 |         val_error_rate = None
319 | 
320 |     test_error_rate = evaluate_balanced_classification_error(
321 |         trainer,
322 |         data.X_test,
323 |         data.y_test,
324 |         device=device,
325 |         batch_size=batch_size,
326 |     )
327 | 
328 |     run_information = {
329 |         'train_error': train_error_rate,
330 |         'val_error': val_error_rate,
331 |         'test_error': test_error_rate,
332 |         'best_epoch': best_epoch,
333 |         'duration': config_duration
334 |     }
335 | 
336 |     return run_information
337 | 
338 | 
339 | def predict_node(
340 |     data,
341 |     config,
342 |     device,
343 |     experiment_name,
344 |     batch_size=128,
345 |     refit=True,
346 | ):
347 |     num_features = data.X_train.shape[1]
348 |     num_classes = len(set(data.y_train))
349 | 
350 |     model = nn.Sequential(
351 |         lib.DenseBlock(
352 |             num_features,
353 |             layer_dim=config['total_tree_count'],
354 |             num_layers=config['num_layers'],
355 |             tree_dim=num_classes + 1,
356 |             flatten_output=False,
357 |             depth=config['tree_depth'],
358 |             choice_function=lib.entmax15,
359 |             bin_function=lib.entmoid15,
360 |         ),
361 |         lib.Lambda(lambda x: x[..., :num_classes].mean(dim=-2)),
362 |     ).to(device)
363 | 
364 |     with torch.no_grad():
365 |         res = model(torch.as_tensor(data.X_train[:batch_size], device=device))
366 |         # trigger data-aware init
367 | 
368 |     if torch.cuda.device_count() > 1:
369 |         model = nn.DataParallel(model)
370 | 
371 |     trainer = lib.Trainer(
372 |         model=model,
373 |         warm_start=True,
374 |         loss_function=F.cross_entropy,
375 |         experiment_name=experiment_name,
376 |         Optimizer=QHAdam,
377 |         optimizer_params=dict(nus=(0.7, 1.0), betas=(0.95, 0.998)),
378 |         verbose=True,
379 |         n_last_checkpoints=5
380 |     )
381 |     # we will always have a best checkpoint, be it
382 |     # from early stopping, be it from the normal training.
383 |     trainer.load_checkpoint(tag='best')
384 | 
385 |     train_error_rate = evaluate_balanced_classification_error(
386 |         trainer,
387 |         data.X_train,
388 |         data.y_train,
389 |         device=device,
390 |         batch_size=batch_size,
391 |     )
392 |     if not refit:
393 |         val_error_rate = evaluate_balanced_classification_error(
394 |             trainer,
395 |             data.X_valid,
396 |             data.y_valid,
397 |             device=device,
398 |             batch_size=batch_size,
399 |         )
400 |     else:
401 |         val_error_rate = None
402 | 
403 |     test_error_rate = evaluate_balanced_classification_error(
404 |         trainer,
405 |         data.X_test,
406 |         data.y_test,
407 |         device=device,
408 |         batch_size=batch_size,
409 |     )
410 | 
411 |     run_information = {
412 |         'train_error': train_error_rate,
413 |         'val_error': val_error_rate,
414 |         'test_error': test_error_rate,
415 |     }
416 | 
417 |     return run_information
418 | 
419 | parser = argparse.ArgumentParser(
420 |     description='Run node on a benchmark'
421 | )
422 | # experiment setup arguments
423 | parser.add_argument(
424 |     '--task_id',
425 |     type=int,
426 |     default=233090,
427 | )
428 | parser.add_argument(
429 |     '--batch_size',
430 |     type=int,
431 |     default=128,
432 | )
433 | parser.add_argument(
434 |     '--epochs',
435 |     type=int,
436 |     default=1,
437 | )
438 | parser.add_argument(
439 |     '--test_size',
440 |     type=float,
441 |     default=0.2,
442 | )
443 | parser.add_argument(
444 |     '--validation_size',
445 |     type=float,
446 |     default=0.25,
447 | )
448 | parser.add_argument(
449 |     '--seed',
450 |     type=int,
451 |     default=11,
452 | )
453 | parser.add_argument(
454 |     '--device',
455 |     type=str,
456 |     default="cpu",
457 | )
458 | parser.add_argument(
459 |     '--output_dir',
460 |     type=str,
461 |     default="./node_experiments",
462 | )
463 | 
464 | args = parser.parse_args()
465 | options = vars(args)
466 | print(options)
467 | 
468 | 
469 | if __name__ == '__main__':
470 | 
471 |     print("Experiment Started")
472 |     start_time = time.time()
473 |     hpo_phase = False
474 |     task_dir = os.path.expanduser(
475 |         os.path.join(
476 |             args.output_dir,
477 |             f'{args.seed}',
478 |             f'{args.task_id}',
479 |         )
480 |     )
481 |     data = get_node_dataset(
482 |         seed=args.seed,
483 |         task_id=args.task_id,
484 |         test_size=args.test_size,
485 |         validation_size=args.validation_size,
486 |         refit=False,
487 |     )
488 |     if hpo_phase:
489 |         # Start HPO Phase
490 |         print("HPO Phase started")
491 | 
492 |         param_grid = ParameterGrid(
493 |             {
494 |                 'num_layers': {2, 4, 8},
495 |                 'total_tree_count': {1024, 2048},
496 |                 'tree_depth': {6, 8},
497 |                 'tree_output_dim': {2, 3}
498 |             }
499 |         )
500 |         results = []
501 |         for config_counter, params in enumerate(param_grid):
502 |             config_dir = os.path.join(task_dir, f'{config_counter}')
503 |             print(params)
504 |             run_information = evaluate_node(
505 |                 batch_size=args.batch_size,
506 |                 refit=False,
507 |                 data=data,
508 |                 config=params,
509 |                 device=args.device,
510 |                 experiment_name=config_dir,
511 |                 epochs=args.epochs,
512 |             )
513 |             print(params)
514 |             print(run_information)
515 |             results.append(
516 |                 {
517 |                     'val_error': run_information['val_error'],
518 |                     'best_epoch': run_information['best_epoch'],
519 |                     'config': params,
520 |                 }
521 |             )
522 | 
523 |         incumbent = sorted(results, key=lambda result: result['val_error'])[0]
524 |         print(f"Best results, with validation error: {incumbent['val_error']}, "
525 |               f"configuration: {incumbent['config']}")
526 |         best_config = incumbent['config']
527 |         best_epoch = incumbent['best_epoch']
528 |     else:
529 |         best_config = {
530 |             'num_layers': 2,
531 |             'total_tree_count': 1024,
532 |             'tree_depth': 6,
533 |             'tree_output_dim': 2,
534 |         }
535 |         run_information = evaluate_node(
536 |             batch_size=args.batch_size,
537 |             refit=False,
538 |             data=data,
539 |             config=best_config,
540 |             device=args.device,
541 |             experiment_name=os.path.join(task_dir, 'run'),
542 |             epochs=args.epochs,
543 |         )
544 |         best_epoch = run_information['best_epoch']
545 | 
546 |     # Start Refit Phase
547 |     print("Refit Started")
548 |     refit_dir = os.path.join(task_dir, 'refit')
549 |     print(f'Best epoch found for task: {args.task_id} in refit is: {best_epoch}')
550 |     data = get_node_dataset(
551 |         seed=args.seed,
552 |         task_id=args.task_id,
553 |         test_size=args.test_size,
554 |         validation_size=0,
555 |         refit=True,
556 |     )
557 | 
558 |     run_information = evaluate_node(
559 |         batch_size=args.batch_size,
560 |         refit=True,
561 |         data=data,
562 |         config=best_config,
563 |         device=args.device,
564 |         experiment_name=refit_dir,
565 |         epochs=best_epoch,
566 |     )
567 | 
568 |     duration = time.time() - start_time
569 |     os.makedirs(task_dir, exist_ok=True)
570 | 
571 |     result_dir = os.path.join(
572 |         task_dir,
573 |         'results.json',
574 |     )
575 | 
576 |     result_dict = {
577 |         'train balanced accuracy': 1 - run_information['train_error'],
578 |         'test balanced accuracy': 1 - run_information['test_error'],
579 |         'task_id': args.task_id,
580 |         'duration': duration,
581 |     }
582 | 
583 |     with open(result_dir, 'w') as file:
584 |         json.dump(result_dict, file)


--------------------------------------------------------------------------------
/cocktails/main_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import pickle
  5 | import random
  6 | import time
  7 | import warnings
  8 | 
  9 | # this corresponds to the number of threads
 10 | os.environ['OMP_NUM_THREADS'] = '1'
 11 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
 12 | os.environ['MKL_NUM_THREADS'] = '1'
 13 | 
 14 | warnings.simplefilter(action='ignore', category=UserWarning)
 15 | warnings.simplefilter(action='ignore', category=FutureWarning)
 16 | 
 17 | import torch
 18 | 
 19 | from autoPyTorch.api.tabular_classification import TabularClassificationTask
 20 | from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes, NoResamplingStrategyTypes
 21 | from autoPyTorch.data.tabular_validator import TabularInputValidator
 22 | from autoPyTorch.datasets.tabular_dataset import TabularDataset
 23 | from autoPyTorch import metrics
 24 | 
 25 | import numpy as np
 26 | 
 27 | from utilities import \
 28 |     get_data, \
 29 |     get_incumbent_results, \
 30 |     get_smac_object, \
 31 |     get_updates_for_regularization_cocktails
 32 | 
 33 | 
 34 | def str2bool(v):
 35 |     if isinstance(v, bool):
 36 |         return [v, ]
 37 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 38 |         return [True, ]
 39 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 40 |         return [False, ]
 41 |     elif v.lower() == 'conditional':
 42 |         return [True, False]
 43 |     else:
 44 |         raise argparse.ArgumentTypeError('No valid value given.')
 45 | 
 46 | 
 47 | parser = argparse.ArgumentParser(
 48 |     description='Run AutoPyTorch on a benchmark.',
 49 | )
 50 | # experiment setup arguments
 51 | parser.add_argument(
 52 |     '--task_id',
 53 |     type=int,
 54 |     default=233088,
 55 | )
 56 | parser.add_argument(
 57 |     '--wall_time',
 58 |     type=int,
 59 |     default=9000,
 60 | )
 61 | parser.add_argument(
 62 |     '--func_eval_time',
 63 |     type=int,
 64 |     default=1000,
 65 | )
 66 | parser.add_argument(
 67 |     '--epochs',
 68 |     type=int,
 69 |     default=105,
 70 | )
 71 | parser.add_argument(
 72 |     '--seed',
 73 |     type=int,
 74 |     default=11,
 75 | )
 76 | parser.add_argument(
 77 |     '--tmp_dir',
 78 |     type=str,
 79 |     default='./runs/autoPyTorch_cocktails',
 80 | )
 81 | parser.add_argument(
 82 |     '--output_dir',
 83 |     type=str,
 84 |     default='./runs/autoPyTorch_cocktails',
 85 | )
 86 | parser.add_argument(
 87 |     '--nr_workers',
 88 |     type=int,
 89 |     default=1,
 90 | )
 91 | parser.add_argument(
 92 |     '--nr_threads',
 93 |     type=int,
 94 |     default=1,
 95 | )
 96 | parser.add_argument(
 97 |     '--cash_cocktail',
 98 |     help='If the regularization cocktail should be used.',
 99 |     type=bool,
100 |     default=False,
101 | )
102 | 
103 | # regularization ingredient arguments
104 | parser.add_argument(
105 |     '--use_swa',
106 |     help='If stochastic weight averaging should be used.',
107 |     type=str2bool,
108 |     nargs='?',
109 |     const=[True],
110 |     default=[False],
111 | )
112 | parser.add_argument(
113 |     '--use_se',
114 |     help='If snapshot ensembling should be used.',
115 |     type=str2bool,
116 |     nargs='?',
117 |     const=[True],
118 |     default=[False],
119 | )
120 | parser.add_argument(
121 |     '--use_lookahead',
122 |     help='If the lookahead optimizing technique should be used.',
123 |     type=str2bool,
124 |     nargs='?',
125 |     const=[True],
126 |     default=[False],
127 | )
128 | parser.add_argument(
129 |     '--use_weight_decay',
130 |     help='If weight decay regularization should be used.',
131 |     type=str2bool,
132 |     nargs='?',
133 |     const=[True],
134 |     default=[False],
135 | )
136 | parser.add_argument(
137 |     '--use_batch_normalization',
138 |     help='If batch normalization regularization should be used.',
139 |     type=str2bool,
140 |     nargs='?',
141 |     const=[True],
142 |     default=[False],
143 | )
144 | parser.add_argument(
145 |     '--use_skip_connection',
146 |     help='If skip connections should be used. '
147 |          'Turns the network into a residual network.',
148 |     type=str2bool,
149 |     nargs='?',
150 |     const=[True],
151 |     default=[False],
152 | )
153 | parser.add_argument(
154 |     '--use_dropout',
155 |     help='If dropout regularization should be used.',
156 |     type=str2bool,
157 |     nargs='?',
158 |     const=[True],
159 |     default=[False],
160 | )
161 | parser.add_argument(
162 |     '--mb_choice',
163 |     help='Multibranch network regularization. '
164 |          'Only active when skip_connection is active.',
165 |     type=str,
166 |     choices=['none', 'shake-shake', 'shake-drop'],
167 |     default='none',
168 | )
169 | parser.add_argument(
170 |     '--augmentation',
171 |     help='If methods that augment examples should be used',
172 |     type=str,
173 |     choices=['mixup', 'cutout', 'cutmix', 'standard', 'adversarial'],
174 |     default='standard',
175 | )
176 | 
177 | 
178 | args = parser.parse_args()
179 | options = vars(args)
180 | print(options)
181 | 
182 | 
183 | hps_for_method = {
184 |     'stochastic_weight_averaging': 0,
185 |     'snapshot_ensembling': 0,
186 |     'batch_normalization': 0,
187 |     'skip_connection': 0,
188 |     'shake_shake': 0,
189 |     'adversarial_training': 0,
190 |     'cutmix': 1,
191 |     'mixup': 1,
192 |     'weight_decay': 1,
193 |     'shake_drop': 1,
194 |     'lookahead': 2,
195 |     'cutout': 2,
196 |     'dropout': 2,
197 | }
198 | 
199 | 
200 | if __name__ == '__main__':
201 | 
202 |     # Setting up reproducibility
203 |     torch.backends.cudnn.deterministic = True
204 |     torch.backends.cudnn.benchmark = False
205 |     torch.manual_seed(args.seed)
206 |     np.random.seed(args.seed)
207 |     random.seed(args.seed)
208 | 
209 |     number_of_configurations_limit = 0
210 | 
211 |     if args.cash_cocktail:
212 |         # for the cocktail we use 840 configurations
213 |         number_of_configurations_limit = 840
214 |     else:
215 |         method_number_of_hps = 0
216 |         if any(args.use_swa):
217 |             method_number_of_hps = hps_for_method['stochastic_weight_averaging']
218 |         elif any(args.use_se):
219 |             method_number_of_hps = hps_for_method['snapshot_ensembling']
220 |         elif any(args.use_batch_normalization):
221 |             method_number_of_hps = hps_for_method['batch_normalization']
222 |         elif any(args.use_skip_connection) and args.mb_choice == 'none':
223 |             method_number_of_hps = hps_for_method['skip_connection']
224 |         elif any(args.use_skip_connection) and args.mb_choice == 'shake-shake':
225 |             method_number_of_hps = hps_for_method['shake_shake']
226 |         elif any(args.use_skip_connection) and args.mb_choice == 'shake-drop':
227 |             method_number_of_hps = hps_for_method['shake_drop']
228 |         elif args.augmentation == 'cutmix':
229 |             method_number_of_hps = hps_for_method['cutmix']
230 |         elif args.augmentation == 'mixup':
231 |             method_number_of_hps = hps_for_method['mixup']
232 |         elif args.augmentation == 'cutout':
233 |             method_number_of_hps = hps_for_method['cutout']
234 |         elif args.augmentation == 'adversarial':
235 |             method_number_of_hps = hps_for_method['adversarial_training']
236 |         elif any(args.use_dropout):
237 |             method_number_of_hps = hps_for_method['dropout']
238 |         elif any(args.use_weight_decay):
239 |             method_number_of_hps = hps_for_method['weight_decay']
240 |         elif any(args.use_lookahead):
241 |             method_number_of_hps = hps_for_method['lookahead']
242 | 
243 |         number_of_configurations_limit = 40 * method_number_of_hps
244 | 
245 |     print(f'Number of configurations limit: {number_of_configurations_limit}')
246 | 
247 |     ############################################################################
248 |     # Data Loading
249 |     # ============
250 |     start_time = time.time()
251 | 
252 |     X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator = get_data(
253 |         task_id=args.task_id,
254 |         seed=args.seed,
255 |     )
256 | 
257 |     pipeline_update, search_space_updates, include_updates = get_updates_for_regularization_cocktails(
258 |         categorical_indicator,
259 |         args,
260 |     )
261 | 
262 |     output_dir = os.path.expanduser(
263 |         os.path.join(
264 |             args.output_dir,
265 |             f'{args.seed}',
266 |             f'{args.task_id}',
267 |             f'{args.task_id}_out',
268 |         )
269 |     )
270 |     temp_dir = os.path.expanduser(
271 |         os.path.join(
272 |             args.tmp_dir,
273 |             f'{args.seed}',
274 |             f'{args.task_id}',
275 |             f'{args.task_id}_tmp',
276 |         )
277 |     )
278 | 
279 |     ############################################################################
280 |     # Build and fit a classifier
281 |     # ==========================
282 |     # if we use HPO, we can use multiple workers in parallel
283 |     if number_of_configurations_limit != 0:
284 |         nr_workers = args.nr_workers
285 |     else:
286 |         nr_workers = 1
287 | 
288 |     api = TabularClassificationTask(
289 |         temporary_directory=temp_dir,
290 |         output_directory=output_dir,
291 |         delete_tmp_folder_after_terminate=False,
292 |         delete_output_folder_after_terminate=False,
293 |         resampling_strategy=HoldoutValTypes.stratified_holdout_validation,
294 |         resampling_strategy_args=resampling_strategy_args,
295 |         ensemble_size=1,
296 |         ensemble_nbest=1,
297 |         max_models_on_disc=10,
298 |         include_components=include_updates,
299 |         search_space_updates=search_space_updates,
300 |         seed=args.seed,
301 |         n_jobs=nr_workers,
302 |         n_threads=args.nr_threads,
303 |     )
304 | 
305 |     api.set_pipeline_config(**pipeline_update)
306 |     ############################################################################
307 |     # Search for the best hp configuration
308 |     # ====================================
309 |     # We search for the best hp configuration only in the case of a cocktail ingredient
310 |     # that has hyperparameters.
311 |     if number_of_configurations_limit != 0:
312 |         api.search(
313 |             X_train=X_train.copy(),
314 |             y_train=y_train.copy(),
315 |             X_test=X_test.copy(),
316 |             y_test=y_test.copy(),
317 |             optimize_metric='balanced_accuracy',
318 |             total_walltime_limit=args.wall_time,
319 |             memory_limit=12000,
320 |             func_eval_time_limit_secs=args.func_eval_time,
321 |             enable_traditional_pipeline=False,
322 |             get_smac_object_callback=get_smac_object,
323 |             smac_scenario_args={
324 |                 'runcount_limit': number_of_configurations_limit,
325 |             },
326 |         )
327 | 
328 |         # Dump the pipeline for reuse in the future
329 |         pickle_directory = os.path.expanduser(
330 |             os.path.join(
331 |                 args.output_dir,
332 |                 f'{args.seed}',
333 |                 f'{args.task_id}',
334 |                 'estimator.pickle',
335 |             )
336 |         )
337 |         with open(pickle_directory, 'wb') as file_handle:
338 |             pickle.dump(api, file_handle, protocol=pickle.HIGHEST_PROTOCOL)
339 | 
340 |     ############################################################################
341 |     # Refit on the best hp configuration
342 |     # ==================================
343 |     input_validator = TabularInputValidator(
344 |         is_classification=True,
345 |     )
346 |     input_validator.fit(
347 |         X_train=X_train.copy(),
348 |         y_train=y_train.copy(),
349 |         X_test=X_test.copy(),
350 |         y_test=y_test.copy(),
351 |     )
352 | 
353 |     dataset = TabularDataset(
354 |         X=X_train,
355 |         Y=y_train,
356 |         X_test=X_test,
357 |         Y_test=y_test,
358 |         seed=args.seed,
359 |         validator=input_validator,
360 |         resampling_strategy=NoResamplingStrategyTypes.no_resampling,
361 |     )
362 |     dataset.is_small_preprocess = False
363 |     print(f"Fitting pipeline with {args.epochs} epochs")
364 | 
365 |     search_space = api.get_search_space(dataset)
366 |     # only when we perform hpo will there be an incumbent configuration
367 |     # otherwise take a default configuration.
368 |     if number_of_configurations_limit != 0:
369 |         configuration, incumbent_run_value = get_incumbent_results(
370 |             os.path.join(
371 |                 temp_dir,
372 |                 'smac3-output',
373 |                 'run_{}'.format(args.seed),
374 |                 'runhistory.json'),
375 |                 search_space,
376 |             )
377 |         print(f"Incumbent configuration: {configuration}")
378 |         print(f"Incumbent trajectory: {api.trajectory}")
379 |     else:
380 |         # default configuration
381 |         configuration = search_space.get_default_configuration()
382 |         print(f"Default configuration: {configuration}")
383 | 
384 |     fitted_pipeline, run_info, run_value, dataset = api.fit_pipeline(
385 |         configuration=configuration,
386 |         budget_type='epochs',
387 |         budget=args.epochs,
388 |         dataset=dataset,
389 |         run_time_limit_secs=args.func_eval_time,
390 |         eval_metric='balanced_accuracy',
391 |         memory_limit=12000,
392 |     )
393 | 
394 |     X_train = dataset.train_tensors[0]
395 |     y_train = dataset.train_tensors[1]
396 |     X_test = dataset.test_tensors[0]
397 |     y_test = dataset.test_tensors[1]
398 | 
399 |     train_predictions = fitted_pipeline.predict(X_train)
400 |     test_predictions = fitted_pipeline.predict(X_test)
401 | 
402 |     # Store the predictions if things go south
403 |     with open(os.path.join(output_dir, f"predictions_{args.task_id}.pickle"), 'wb') as handle:
404 |         pickle.dump(test_predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)
405 |     with open(os.path.join(output_dir, f"truth_{args.task_id}.pickle"), 'wb') as handle:
406 |         pickle.dump(y_test, handle, protocol=pickle.HIGHEST_PROTOCOL)
407 | 
408 |     train_balanced_accuracy = metrics.balanced_accuracy(y_train, train_predictions.squeeze())
409 |     test_balanced_accuracy = metrics.balanced_accuracy(y_test, test_predictions.squeeze())
410 |     duration = time.time() - start_time
411 | 
412 |     print(f'Final Train Balanced accuracy: {train_balanced_accuracy}')
413 |     print(f'Final Test Balanced accuracy: {test_balanced_accuracy}')
414 |     print(f'Time taken: {duration}')
415 | 
416 |     result_directory = os.path.expanduser(
417 |         os.path.join(
418 |             args.output_dir,
419 |             f'{args.seed}',
420 |             f'{args.task_id}',
421 |             'final_result.json',
422 |         )
423 |     )
424 |     result_dict = {
425 |         'train balanced accuracy': train_balanced_accuracy,
426 |         'test balanced accuracy': test_balanced_accuracy,
427 |         'task_id': args.task_id,
428 |         'duration': duration,
429 |     }
430 | 
431 |     with open(result_directory, 'w') as file:
432 |         json.dump(result_dict, file)
433 | 


--------------------------------------------------------------------------------
/cocktails/refit_experiment.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | import time
  6 | import warnings
  7 | 
  8 | os.environ['OMP_NUM_THREADS'] = '1'
  9 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
 10 | os.environ['MKL_NUM_THREADS'] = '1'
 11 | 
 12 | warnings.simplefilter(action='ignore', category=UserWarning)
 13 | warnings.simplefilter(action='ignore', category=FutureWarning)
 14 | 
 15 | import torch
 16 | 
 17 | from autoPyTorch.api.tabular_classification import TabularClassificationTask
 18 | from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes
 19 | from autoPyTorch.data.tabular_validator import TabularInputValidator
 20 | from autoPyTorch.datasets.tabular_dataset import TabularDataset
 21 | from autoPyTorch import metrics
 22 | 
 23 | import numpy as np
 24 | 
 25 | from utilities import \
 26 |     get_data, \
 27 |     get_incumbent_results, \
 28 |     get_updates_for_regularization_cocktails
 29 | 
 30 | 
 31 | def str2bool(v):
 32 |     if isinstance(v, bool):
 33 |         return [v, ]
 34 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 35 |         return [True, ]
 36 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 37 |         return [False, ]
 38 |     elif v.lower() == 'conditional':
 39 |         return [True, False]
 40 |     else:
 41 |         raise argparse.ArgumentTypeError('No valid value given.')
 42 | 
 43 | 
 44 | parser = argparse.ArgumentParser(
 45 |     description='Refit autoPyTorch on a benchmark.'
 46 | )
 47 | # experiment setup arguments
 48 | parser.add_argument(
 49 |     '--task_id',
 50 |     type=int,
 51 |     default=233088,
 52 | )
 53 | parser.add_argument(
 54 |     '--wall_time',
 55 |     type=int,
 56 |     default=1300,
 57 | )
 58 | parser.add_argument(
 59 |     '--func_eval_time',
 60 |     type=int,
 61 |     default=700,
 62 | )
 63 | parser.add_argument(
 64 |     '--epochs',
 65 |     type=int,
 66 |     default=105,
 67 | )
 68 | parser.add_argument(
 69 |     '--seed',
 70 |     type=int,
 71 |     default=11,
 72 | )
 73 | parser.add_argument(
 74 |     '--tmp_dir',
 75 |     type=str,
 76 |     default='./runs/autoPyTorch_cocktails',
 77 | )
 78 | parser.add_argument(
 79 |     '--output_dir',
 80 |     type=str,
 81 |     default='./runs/autoPyTorch_cocktails',
 82 | )
 83 | parser.add_argument(
 84 |     '--cash_cocktail',
 85 |     help='If the regularization cocktail should be used.',
 86 |     type=bool,
 87 |     default=False,
 88 | )
 89 | 
 90 | # regularization ingredient arguments
 91 | parser.add_argument(
 92 |     '--use_swa',
 93 |     help='If stochastic weight averaging should be used.',
 94 |     type=str2bool,
 95 |     nargs='?',
 96 |     const=[True],
 97 |     default=[False],
 98 | )
 99 | parser.add_argument(
100 |     '--use_se',
101 |     help='If snapshot ensembling should be used.',
102 |     type=str2bool,
103 |     nargs='?',
104 |     const=[True],
105 |     default=[False],
106 | )
107 | parser.add_argument(
108 |     '--use_lookahead',
109 |     help='If the lookahead optimizing technique should be used.',
110 |     type=str2bool,
111 |     nargs='?',
112 |     const=[True],
113 |     default=[False],
114 | )
115 | parser.add_argument(
116 |     '--use_weight_decay',
117 |     help='If weight decay regularization should be used.',
118 |     type=str2bool,
119 |     nargs='?',
120 |     const=[True],
121 |     default=[False],
122 | )
123 | parser.add_argument(
124 |     '--use_batch_normalization',
125 |     help='If batch normalization regularization should be used.',
126 |     type=str2bool,
127 |     nargs='?',
128 |     const=[True],
129 |     default=[False],
130 | )
131 | parser.add_argument(
132 |     '--use_skip_connection',
133 |     help='If skip connections should be used. '
134 |          'Turns the network into a residual network.',
135 |     type=str2bool,
136 |     nargs='?',
137 |     const=[True],
138 |     default=[False],
139 | )
140 | parser.add_argument(
141 |     '--use_dropout',
142 |     help='If dropout regularization should be used.',
143 |     type=str2bool,
144 |     nargs='?',
145 |     const=[True],
146 |     default=[False],
147 | )
148 | parser.add_argument(
149 |     '--mb_choice',
150 |     help='Multibranch network regularization. '
151 |          'Only active when skip_connection is active.',
152 |     type=str,
153 |     choices=['none', 'shake-shake', 'shake-drop'],
154 |     default='none',
155 | )
156 | parser.add_argument(
157 |     '--augmentation',
158 |     help='If methods that augment examples should be used',
159 |     type=str,
160 |     choices=['mixup', 'cutout', 'cutmix', 'standard', 'adversarial'],
161 |     default='standard',
162 | )
163 | 
164 | 
165 | args = parser.parse_args()
166 | options = vars(args)
167 | print(options)
168 | 
169 | 
170 | if __name__ == '__main__':
171 | 
172 |     # Setting up reproducibility
173 |     torch.backends.cudnn.deterministic = True
174 |     torch.backends.cudnn.benchmark = False
175 |     torch.manual_seed(args.seed)
176 |     np.random.seed(args.seed)
177 |     random.seed(args.seed)
178 | 
179 |     ############################################################################
180 |     # Data Loading
181 |     # ============
182 |     start_time = time.time()
183 |     X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator = get_data(
184 |         task_id=args.task_id,
185 |         seed=args.seed,
186 |     )
187 | 
188 |     pipeline_update, search_space_updates, include_updates = get_updates_for_regularization_cocktails(
189 |         categorical_indicator,
190 |         args,
191 |     )
192 |     output_dir = os.path.expanduser(
193 |         os.path.join(
194 |             args.output_dir,
195 |             f'{args.seed}',
196 |             f'{args.task_id}',
197 |             f'{args.task_id}_out',
198 |         )
199 |     )
200 |     temp_dir = os.path.expanduser(
201 |         os.path.join(
202 |             args.tmp_dir,
203 |             f'{args.seed}',
204 |             f'{args.task_id}',
205 |             f'{args.task_id}_tmp',
206 |         )
207 |     )
208 | 
209 |     refit_out_dir = os.path.join(output_dir, 'refit')
210 |     refit_tmp_dir = os.path.join(temp_dir, 'refit')
211 | 
212 |     ############################################################################
213 |     # Build and fit a classifier
214 |     # ==========================
215 |     api = TabularClassificationTask(
216 |         temporary_directory=refit_tmp_dir,
217 |         output_directory=refit_out_dir,
218 |         delete_tmp_folder_after_terminate=False,
219 |         delete_output_folder_after_terminate=False,
220 |         resampling_strategy=NoResamplingStrategyTypes.no_resampling,
221 |         ensemble_size=1,
222 |         ensemble_nbest=1,
223 |         max_models_on_disc=1,
224 |         include_components=include_updates,
225 |         search_space_updates=search_space_updates,
226 |         seed=args.seed,
227 |         n_jobs=1,
228 |     )
229 | 
230 |     api.set_pipeline_config(**pipeline_update)
231 |     ############################################################################
232 |     # Refit the hp configuration
233 |     # ==========================
234 |     input_validator = TabularInputValidator(
235 |         is_classification=True,
236 |     )
237 | 
238 |     input_validator.fit(
239 |         X_train=X_train.copy(),
240 |         y_train=y_train.copy(),
241 |         X_test=X_test.copy(),
242 |         y_test=y_test.copy(),
243 |     )
244 | 
245 |     dataset = TabularDataset(
246 |         X=X_train,
247 |         Y=y_train,
248 |         X_test=X_test,
249 |         Y_test=y_test,
250 |         validator=input_validator,
251 |         seed=args.seed,
252 |         resampling_strategy=NoResamplingStrategyTypes.no_resampling,
253 |     )
254 |     dataset.is_small_preprocess = False
255 |     print(f"Fitting pipeline with {args.epochs} epochs")
256 | 
257 |     search_space = api.get_search_space(dataset)
258 | 
259 |     # There has been an hpo search, find the best hyperparameter configuration.
260 |     run_history_path = os.path.join(
261 |         temp_dir,
262 |         'smac3-output',
263 |         'run_{}'.format(args.seed),
264 |         'runhistory.json',
265 |     )
266 | 
267 |     inc_config, inc_value = get_incumbent_results(run_history_path, search_space)
268 | 
269 |     print(f'The value that the incumbent had on the validation set before the refit:{inc_value}')
270 |     print(f"Incumbent configuration: {inc_config}")
271 | 
272 |     fitted_pipeline, run_info, run_value, dataset = api.fit_pipeline(
273 |         configuration=inc_config,
274 |         budget_type='epochs',
275 |         budget=args.epochs,
276 |         dataset=dataset,
277 |         run_time_limit_secs=args.func_eval_time,
278 |         eval_metric='balanced_accuracy',
279 |         memory_limit=12000,
280 |     )
281 | 
282 |     X_train = dataset.train_tensors[0]
283 |     y_train = dataset.train_tensors[1]
284 |     X_test = dataset.test_tensors[0]
285 |     y_test = dataset.test_tensors[1]
286 | 
287 |     train_predictions = fitted_pipeline.predict(X_train)
288 |     test_predictions = fitted_pipeline.predict(X_test)
289 |     train_balanced_accuracy = metrics.balanced_accuracy(y_train, train_predictions.squeeze())
290 |     test_balanced_accuracy = metrics.balanced_accuracy(y_test, test_predictions.squeeze())
291 |     duration = time.time() - start_time
292 | 
293 |     print(f'Final Train Balanced accuracy: {train_balanced_accuracy}')
294 |     print(f'Final Test Balanced accuracy: {test_balanced_accuracy}')
295 |     print(f'Time taken: {duration}')
296 | 
297 |     result_directory = os.path.expanduser(
298 |         os.path.join(
299 |             args.output_dir,
300 |             f'{args.seed}',
301 |             f'{args.task_id}',
302 |             'final_result.json',
303 |         )
304 |     )
305 |     result_dict = {
306 |         'train balanced accuracy': train_balanced_accuracy,
307 |         'test balanced accuracy': test_balanced_accuracy,
308 |         'task_id': args.task_id,
309 |         'duration': duration,
310 |     }
311 | 
312 |     with open(result_directory, 'w') as file:
313 |         json.dump(result_dict, file)
314 | 


--------------------------------------------------------------------------------
/dataset_collection.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import openml
 4 | import pandas as pd
 5 | 
 6 | 
 7 | suite = openml.study.get_suite(218)
 8 | task_ids = suite.tasks
 9 | 
10 | dataset_table = {
11 |     'Task Id': [],
12 |     'Dataset Name': [],
13 |     'Number of examples': [],
14 |     'Number of features': [],
15 |     'Majority class percentage': [],
16 |     'Minority class percentage': [],
17 | }
18 | 
19 | for task_id in task_ids:
20 |     task = openml.tasks.get_task(task_id, download_data=False)
21 |     dataset = openml.datasets.get_dataset(task.dataset_id, download_data=False)
22 |     dataset_table['Task Id'].append(task_id)
23 |     dataset_table['Dataset Name'].append(dataset.name)
24 |     dataset_table['Number of examples'].append(dataset.qualities['NumberOfInstances'])
25 |     dataset_table['Number of features'].append(dataset.qualities['NumberOfFeatures'])
26 |     dataset_table['Majority class percentage'].append(f"{dataset.qualities['MajorityClassPercentage']:.3f}")
27 |     dataset_table['Minority class percentage'].append(f"{dataset.qualities['MinorityClassPercentage']:.3f}")
28 | 
29 | output_path = os.path.expanduser(
30 |     os.path.join(
31 |         '~',
32 |         'Desktop',
33 |         'dataset_collection.csv'
34 |     )
35 | )
36 | 
37 | dataset_info_frame = pd.DataFrame.from_dict(dataset_table)
38 | dataset_info_frame.to_csv(output_path, index=False)
39 | 


--------------------------------------------------------------------------------
/figures/all_baselines_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/machinelearningnuremberg/WellTunedSimpleNets/54058460d5b587bc84107c200e6f1c44755a87e0/figures/all_baselines_diagram.png


--------------------------------------------------------------------------------
/results.py:
--------------------------------------------------------------------------------
   1 | import json
   2 | import os
   3 | from typing import List, Tuple
   4 | 
   5 | import matplotlib.pyplot as plt
   6 | import numpy as np
   7 | import pandas as pd
   8 | import openml
   9 | from scipy.stats import wilcoxon, rankdata
  10 | import seaborn as sns
  11 | 
  12 | 
  13 | sns.set(
  14 |     rc={
  15 |         'figure.figsize': (11.7, 8.27),
  16 |         'font.size': 35,
  17 |         'axes.titlesize': 35,
  18 |         'axes.labelsize': 35,
  19 |         'xtick.labelsize': 35,
  20 |         'ytick.labelsize': 35,
  21 |     },
  22 |     style="white"
  23 | )
  24 | 
  25 | 
  26 | def get_task_list(
  27 |     benchmark_task_file: str = 'path/to/tasks.txt',
  28 | ) -> List[int]:
  29 |     """Get the task id list.
  30 | 
  31 |     Goes through the given file and collects all of the task
  32 |     ids.
  33 | 
  34 |     Args:
  35 |         benchmark_task_file (str):
  36 |             A string to the path of the benchmark task file. Including
  37 |             the task file name.
  38 | 
  39 |     Returns:
  40 |         benchmark_task_ids (List[int]):
  41 |             A list of all the task ids for the benchmark.
  42 |     """
  43 |     with open(os.path.join(benchmark_task_file), 'r') as f:
  44 |         benchmark_info_str = f.readline()
  45 |         benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')]
  46 | 
  47 |     return benchmark_task_ids
  48 | 
  49 | 
  50 | # TODO merge all the build_table functions
  51 | def build_table_from_autopytorch_data(
  52 |     output_dir: str,
  53 |     benchmark_task_file: str,
  54 |     seed: int = 11,
  55 | ):
  56 |     """
  57 |     Stores the final performance for the autopytorch algorithm on every dataset
  58 |     to a csv file in the output_dir.
  59 | 
  60 |     Args:
  61 |         output_dir (str): The output directory where the results are stored.
  62 |         benchmark_task_file (str): The path where the benchmark txt file is located.
  63 |         seed (int):  The seed used for the experiment.
  64 |     """
  65 |     experiment_table = {
  66 |         'Task Id': [],
  67 |         'Test Performance': [],
  68 |     }
  69 |     benchmark_task_ids = get_task_list(benchmark_task_file)
  70 |     for task_id in benchmark_task_ids:
  71 |         task_dir = os.path.join(
  72 |             output_dir,
  73 |             f'{seed}',
  74 |             f'{task_id}',
  75 |             'final_result.json'
  76 |         )
  77 | 
  78 |         try:
  79 |             with open(task_dir, 'r') as fp:
  80 |                 task_performance_info = json.load(fp)
  81 |                 task_test_performance = task_performance_info['test balanced accuracy']
  82 |                 experiment_table['Task Id'].append(task_id)
  83 |                 experiment_table['Test Performance'].append(task_test_performance)
  84 |         except FileNotFoundError:
  85 |             print(f'Refit for task id:{task_id} not found')
  86 | 
  87 | 
  88 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
  89 |     df_dir = os.path.join(
  90 |         output_dir,
  91 |         'results.csv',
  92 |     )
  93 |     experiment_df.to_csv(df_dir, index=False)
  94 | 
  95 | 
  96 | def build_table_from_node_data(
  97 |     output_dir: str,
  98 |     benchmark_task_file: str,
  99 |     seed: int = 11,
 100 | ):
 101 |     """
 102 |     Stores the final performance for the node algorithm on every dataset
 103 |     to a csv file in the output_dir.
 104 | 
 105 |     Args:
 106 |         output_dir (str): The output directory where the results are stored.
 107 |         benchmark_task_file (str): The path where the benchmark txt file is located.
 108 |         seed (int):  The seed used for the experiment.
 109 |     """
 110 |     experiment_table = {
 111 |         'Task Id': [],
 112 |         'Test Performance': [],
 113 |     }
 114 |     benchmark_task_ids = get_task_list(benchmark_task_file)
 115 |     for task_id in benchmark_task_ids:
 116 |         task_dir = os.path.join(
 117 |             output_dir,
 118 |             f'{seed}',
 119 |             f'{task_id}',
 120 |             'results.json'
 121 |         )
 122 | 
 123 |         try:
 124 |             with open(task_dir, 'r') as fp:
 125 |                 task_performance_info = json.load(fp)
 126 |                 task_test_performance = task_performance_info['test balanced accuracy']
 127 |                 experiment_table['Task Id'].append(task_id)
 128 |                 experiment_table['Test Performance'].append(task_test_performance)
 129 |         except FileNotFoundError:
 130 |             print(f'Refit for task id:{task_id} not found')
 131 | 
 132 | 
 133 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
 134 |     df_dir = os.path.join(
 135 |         output_dir,
 136 |         'results.csv',
 137 |     )
 138 |     experiment_df.to_csv(df_dir, index=False)
 139 | 
 140 | 
 141 | def build_table_from_tabnet_data(
 142 |     output_dir: str,
 143 |     benchmark_task_file: str,
 144 |     seed: int = 11,
 145 | ):
 146 |     """
 147 |     Stores the final performance for the TabNet algorithm on every dataset
 148 |     to a csv file in the output_dir.
 149 | 
 150 |     Args:
 151 |         output_dir (str): The output directory where the results are stored.
 152 |         benchmark_task_file (str): The path where the benchmark txt file is located.
 153 |         seed (int):  The seed used for the experiment.
 154 |     """
 155 |     experiment_table = {
 156 |         'Task Id': [],
 157 |         'Test Performance': [],
 158 |     }
 159 |     benchmark_task_ids = get_task_list(benchmark_task_file)
 160 |     for task_id in benchmark_task_ids:
 161 |         task_dir = os.path.join(
 162 |             output_dir,
 163 |             f'{task_id}',
 164 |             f'{seed}',
 165 |             'refit_results.json'
 166 |         )
 167 | 
 168 |         try:
 169 |             with open(task_dir, 'r') as fp:
 170 |                 task_performance_info = json.load(fp)
 171 |                 task_test_performance = task_performance_info['test_accuracy']
 172 |                 experiment_table['Task Id'].append(task_id)
 173 |                 experiment_table['Test Performance'].append(task_test_performance)
 174 |         except FileNotFoundError:
 175 |             print(f'Refit for task id:{task_id} not found')
 176 |             experiment_table['Task Id'].append(task_id)
 177 |             experiment_table['Test Performance'].append(-1)
 178 | 
 179 | 
 180 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
 181 |     df_dir = os.path.join(
 182 |         output_dir,
 183 |         'results.csv',
 184 |     )
 185 |     experiment_df.to_csv(df_dir, index=False)
 186 | 
 187 | 
 188 | def build_table_from_autogluon_data(
 189 |     output_dir: str,
 190 |     benchmark_task_file: str,
 191 |     seed: int = 11,
 192 | ):
 193 |     """
 194 |     Stores the final performance for the AutoGluon algorithm on every dataset
 195 |     to a csv file in the output_dir.
 196 | 
 197 |     Args:
 198 |         output_dir (str): The output directory where the results are stored.
 199 |         benchmark_task_file (str): The path where the benchmark txt file is located.
 200 |         seed (int):  The seed used for the experiment.
 201 |     """
 202 |     experiment_table = {
 203 |         'Task Id': [],
 204 |         'Test Performance': [],
 205 |     }
 206 |     benchmark_task_ids = get_task_list(benchmark_task_file)
 207 |     for task_id in benchmark_task_ids:
 208 |         task_dir = os.path.join(
 209 |             output_dir,
 210 |             f'{seed}',
 211 |             f'{task_id}',
 212 |             'results.csv',
 213 |         )
 214 | 
 215 |         try:
 216 |             performance_df = pd.read_csv(task_dir)
 217 |             score = performance_df['score'].to_numpy()
 218 |             score = score[0]
 219 |         except FileNotFoundError:
 220 |             print(f'No results for task id:{task_id}')
 221 |             score = -1
 222 |         experiment_table['Task Id'].append(task_id)
 223 |         experiment_table['Test Performance'].append(score)
 224 | 
 225 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
 226 |     df_dir = os.path.join(
 227 |         output_dir,
 228 |         'results.csv',
 229 |     )
 230 |     experiment_df.to_csv(df_dir, index=False)
 231 | 
 232 | 
 233 | def build_table_from_cocktails_data(
 234 |     output_dir: str,
 235 |     benchmark_task_file: str,
 236 |     seed: int = 11,
 237 | ):
 238 |     """
 239 |     Stores the final performance for the old autopytorch algorithm on every
 240 |     dataset to a csv file in the output_dir.
 241 | 
 242 |     Args:
 243 |         output_dir (str): The output directory where the results are stored.
 244 |         benchmark_task_file (str): The path where the benchmark txt file is located.
 245 |         seed (int):  The seed used for the experiment.
 246 |     """
 247 |     experiment_table = {
 248 |         'Task Id': [],
 249 |         'Duration': [],
 250 |     }
 251 |     benchmark_task_ids = get_task_list(benchmark_task_file)
 252 |     for task_id in benchmark_task_ids:
 253 |         task_dir = os.path.join(
 254 |             output_dir,
 255 |             '512',
 256 |             f'{task_id}',
 257 |             'refit_run',
 258 |             f'{seed}',
 259 |             'run_results.txt',
 260 |         )
 261 |         if not os.path.exists(task_dir):
 262 |             task_dir = os.path.join(
 263 |                 output_dir,
 264 |                 '512',
 265 |                 f'{task_id}',
 266 |                 'run_results.txt',
 267 |             )
 268 | 
 269 |         try:
 270 |             with open(task_dir, 'r') as fp:
 271 |                 task_performance_info = json.load(fp)
 272 |                 task_performance = float(task_performance_info['mean_test_bal_acc'])
 273 |                 experiment_table['Task Id'].append(task_id)
 274 |                 experiment_table['Test Performance'].append(task_performance)
 275 |         except FileNotFoundError:
 276 |             print(f'Refit for task id:{task_id} not found')
 277 |             experiment_table['Task Id'].append(task_id)
 278 |             experiment_table['Test Performance'].append(-1)
 279 | 
 280 | 
 281 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
 282 |     df_dir = os.path.join(
 283 |         output_dir,
 284 |         'results.csv',
 285 |     )
 286 |     experiment_df.to_csv(df_dir, index=False)
 287 | 
 288 | 
 289 | def generate_times_from_autopytorch_data(
 290 |     output_dir: str,
 291 |     benchmark_task_file: str,
 292 |     seed: int = 11,
 293 | ):
 294 |     """
 295 |     Stores the duration for the autopytorch algorithm on every dataset
 296 |     to a csv file in the output_dir.
 297 | 
 298 |     Args:
 299 |         output_dir (str): The output directory where the results are stored.
 300 |         benchmark_task_file (str): The path where the benchmark txt file is located.
 301 |         seed (int):  The seed used for the experiment.
 302 |     """
 303 |     experiment_table = {
 304 |         'Task Id': [],
 305 |         'Duration': [],
 306 |     }
 307 |     benchmark_task_ids = get_task_list(benchmark_task_file)
 308 |     for task_id in benchmark_task_ids:
 309 |         task_dir = os.path.join(
 310 |             output_dir,
 311 |             f'{seed}',
 312 |             f'{task_id}',
 313 |             'final_result.json'
 314 |         )
 315 | 
 316 |         try:
 317 |             with open(task_dir, 'r') as fp:
 318 |                 task_performance_info = json.load(fp)
 319 |                 task_duration = float(task_performance_info['duration'])
 320 |                 experiment_table['Task Id'].append(task_id)
 321 |                 experiment_table['Duration'].append(task_duration)
 322 |         except FileNotFoundError:
 323 |             print(f'Refit for task id:{task_id} not found')
 324 |             experiment_table['Task Id'].append(task_id)
 325 |             experiment_table['Duration'].append(-1)
 326 | 
 327 | 
 328 |     experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns')
 329 |     df_dir = os.path.join(
 330 |         output_dir,
 331 |         'durations.csv',
 332 |     )
 333 |     experiment_df.to_csv(df_dir, index=False)
 334 | 
 335 | 
 336 | def build_all_table(
 337 |     result_dir: str,
 338 | ):
 339 |     """Generates a table with all the baselines and their final performances on every
 340 |     dataset.
 341 | 
 342 |     Args:
 343 |         result_dir (str): The results folder where the data for every baseline is organized.
 344 | 
 345 |     Returns:
 346 |         output (pd.DataFrame): The DataFrame with all the baseline final results.
 347 | 
 348 |     Note:
 349 |         The folder structure should be result_dir/baseline_name/results.csv, where results.csv
 350 |             corresponds to a table with the performance of the baseline of every task.
 351 |     """
 352 |     method_folders = [
 353 |         'plain_network',
 354 |         'dropout',
 355 |         'selu',
 356 |         'XGBoost/ES',
 357 |         'neurips_xgboost_es',
 358 |         'neurips_xgboost_no_es',
 359 |         'catboost_v2',
 360 |         'XGBoost/No ES',
 361 |         'autosklearn',
 362 |         'tabnet/ES',
 363 |         'autogluon_only_hpo',
 364 |         'tabnet/No ES',
 365 |         'node',
 366 |         'autogluon/nn_only_4_days',
 367 |         'autogluon/full_4_days',
 368 |         'cocktail',
 369 |         'new_cocktail',
 370 |         'search_cocktail',
 371 |     ]
 372 | 
 373 |     pretty_names = {
 374 |         'autogluon/nn_only_4_days': ' AutoGL. + Stacking',
 375 |         'autogluon/full_4_days': 'Full AutoGL',
 376 |         'autogluon_only_hpo': 'AutoGL. + HPO',
 377 |         'cocktail': ' MLP + C ',
 378 |         'new_cocktail': 'SMAC MLP + C ',
 379 |         'search_cocktail': 'Search Smac + C',
 380 |         'plain_network': ' MLP ',
 381 |         'dropout': ' MLP + D ',
 382 |         'node': ' NODE  ',
 383 |         'tabnet/ES': ' TabN. + ES ',
 384 |         'XGBoost/ES': ' XGB. + ES ',
 385 |         'tabnet/No ES': ' TabN. ',
 386 |         'XGBoost/No ES': ' XGB. ',
 387 |         'autosklearn': ' ASK-G. ',
 388 |         'selu': 'MLP + S',
 389 |         'catboost_v2': 'CatBoost',
 390 |         'neurips_xgboost_es': 'XGB. + ES + ENC',
 391 |         'neurips_xgboost_no_es': 'XGB. + ENC',
 392 |     }
 393 | 
 394 |     pandas_frames = []
 395 |     drop_task_ids = False
 396 |     for method in method_folders:
 397 |         method_results = os.path.join(result_dir, method)
 398 |         method_df = pd.read_csv(os.path.join(method_results, 'results.csv'))
 399 |         method_df.columns = ['Task Id', pretty_names[method]]
 400 |         if drop_task_ids:
 401 |             method_df = method_df.drop(labels=['Task Id'], axis=1)
 402 |         else:
 403 |             drop_task_ids = True
 404 |         pandas_frames.append(method_df)
 405 |     output = pd.concat(pandas_frames, join='outer', axis=1)
 406 |     task_infos = []
 407 |     for task_id in output['Task Id']:
 408 |         task = openml.tasks.get_task(task_id, download_data=False)
 409 |         dataset = openml.datasets.get_dataset(task.dataset_id, download_data=False)
 410 |         task_info = f'{dataset.qualities["NumberOfInstances"]}/{dataset.qualities["NumberOfFeatures"]}'
 411 |         task_infos.append(task_info)
 412 | 
 413 |     # uncomment if you want to add dataset information in the form of instances
 414 |     # and features
 415 |     # output.insert(loc=1, column='Size (Ins./Feat.)', value=task_infos)
 416 |     # output['Task Id'] = output['Task Id'].apply(lambda x: openml.datasets.get_dataset(openml.tasks.get_task(x, download_data=False).dataset_id, download_data=False).name)
 417 |     output = output.fillna(-1)
 418 | 
 419 |     # skipping the task information columns to format in a pretty way
 420 |     # the baseline result columns.
 421 |     method_columns = output.columns[1:]
 422 | 
 423 |     # only keep precision up to 3 numbers after the dot/comma
 424 |     for column in method_columns:
 425 |         output[column] = output[column].apply(lambda x: f'{x * 100:.3f}' if x != -1 else np.nan)
 426 | 
 427 |     return output
 428 | 
 429 | 
 430 | def compare_models(
 431 |     result_dir: str,
 432 |     baseline: str = 'autogluon/full_4_days',
 433 |     cocktails: str = 'autopytorch',
 434 | ):
 435 |     """Compare a baseline with the regularization cocktail.
 436 | 
 437 |     The method will print the necessary information based on the results.
 438 |     The results should be stored on a certain format:
 439 |     result_dir/baseline_name/results.csv
 440 | 
 441 |     Args:
 442 |         result_dir (str): The directory where the results are stored.
 443 |         baseline (str): The baseline name.
 444 |         cocktails (str):  The main method name.
 445 |     """
 446 |     baseline_results = os.path.join(
 447 |         result_dir,
 448 |         baseline,
 449 |         'results.csv',
 450 |     )
 451 |     cocktail_results = os.path.join(
 452 |         result_dir,
 453 |         cocktails,
 454 |         'results.csv',
 455 |     )
 456 | 
 457 |     cocktails_df = pd.read_csv(cocktail_results)
 458 |     baseline_df = pd.read_csv(baseline_results)
 459 | 
 460 |     task_ids = list(cocktails_df['Task Id'])
 461 |     task_ids = [int(task_id) for task_id in task_ids]
 462 | 
 463 |     cocktail_performances = []
 464 |     baseline_performances = []
 465 |     cocktail_wins = 0
 466 |     cocktail_looses = 0
 467 |     cocktail_draws = 0
 468 | 
 469 |     for task_id in task_ids:
 470 |         cocktail_task_performance = cocktails_df.query(f'`Task Id`=={task_id}')['Test Performance']
 471 |         cocktail_task_performance = cocktail_task_performance.to_numpy()[0]
 472 |         baseline_task_performance = baseline_df.query(f'`Task Id`=={task_id}')['Test Performance']
 473 |         baseline_task_performance = baseline_task_performance.to_numpy()[0]
 474 | 
 475 |         # if a task has not finished for the baseline, do not use it
 476 |         # in the comparison against the regularization cocktail.
 477 |         if baseline_task_performance != -1.0:
 478 |             cocktail_performances.append(cocktail_task_performance)
 479 |             baseline_performances.append(baseline_task_performance)
 480 |             if cocktail_task_performance > baseline_task_performance:
 481 |                 cocktail_wins += 1
 482 |             elif cocktail_task_performance == baseline_task_performance:
 483 |                 cocktail_draws += 1
 484 |             else:
 485 |                 cocktail_looses += 1
 486 |     _, p_value = wilcoxon(cocktail_performances, baseline_performances)
 487 | 
 488 |     print(f'Cocktail against {baseline}, '
 489 |           f'wins {cocktail_wins}, '
 490 |           f'looses {cocktail_looses}, '
 491 |           f'draws {cocktail_draws}')
 492 |     print(f'Wilxocon p-value {p_value}')
 493 | 
 494 | 
 495 | def build_cd_diagram(
 496 |     results_dir: str,
 497 | ) -> pd.DataFrame:
 498 |     """Prepare the results for a critical difference diagram.
 499 |     This function prepares all the results into a pandas dataframe
 500 |     so that it can be used to create a critical difference diagram
 501 |     of all the methods.
 502 | 
 503 |     Args:
 504 |         results_dir (str): The directory where the results are stored.
 505 | 
 506 |     Returns:
 507 |         result_df (pd.DataFrame):
 508 |             The DataFrame that contains the final results for all the baselines
 509 |             in a format that can be used as an input for hte cd-diagram plot.
 510 | 
 511 |     Note:
 512 |         The folder structure should be result_dir/baseline_name/results.csv, where results.csv
 513 |             corresponds to a table with the performance of the baseline of every task.
 514 |     """
 515 |     method_folders = [
 516 |         'plain_network',
 517 |         'dropout',
 518 |         'XGBoost/ES',
 519 |         'XGBoost/No ES',
 520 |         'autosklearn',
 521 |         'autogluon_only_hpo',
 522 |         'tabnet/ES',
 523 |         'tabnet/No ES',
 524 |         'node',
 525 |         'selu',
 526 |         'autogluon/nn_only_4_days',
 527 |         'autogluon/full_4_days',
 528 |         'cocktail',
 529 |         'new_cocktail',
 530 |         'search_cocktail',
 531 |         'catboost_v2',
 532 |         'neurips_xgboost_es',
 533 |         'neurips_xgboost_no_es',
 534 |     ]
 535 | 
 536 |     pretty_names = {
 537 |         'autogluon/nn_only_4_days': ' AutoGL. S',
 538 |         'autogluon/full_4_days': 'Full AutoGL',
 539 |         'autogluon_only_hpo': 'AutoGL. HPO',
 540 |         'cocktail': ' MLP + C ',
 541 |         'new_cocktail': 'SMAC MLP + C ',
 542 |         'search_cocktail': 'Search Smac + C',
 543 |         'plain_network': ' MLP ',
 544 |         'dropout': ' MLP + D ',
 545 |         'node': ' NODE  ',
 546 |         'tabnet/ES': ' TabN. + ES ',
 547 |         'XGBoost/ES': ' XGB. + ES ',
 548 |         'tabnet/No ES': ' TabN. ',
 549 |         'XGBoost/No ES': ' XGB. ',
 550 |         'autosklearn': ' ASK-G. ',
 551 |         'selu': 'MLP + SELU',
 552 |         'catboost_v2': 'CatBoost',
 553 |         'neurips_xgboost_es': 'XGB. + ES + ENC',
 554 |         'neurips_xgboost_no_es': 'XGB. + ENC',
 555 |     }
 556 | 
 557 |     table_results = {
 558 |         'Network': [],
 559 |         'Task Id': [],
 560 |         'Balanced Accuracy': [],
 561 |     }
 562 | 
 563 |     search_results = os.path.join(results_dir, 'cocktail')
 564 |     search_df = pd.read_csv(os.path.join(search_results, 'results.csv'))
 565 |     task_ids = list(search_df['Task Id'])
 566 |     task_ids = [int(task_id) for task_id in task_ids]
 567 | 
 568 |     for method in method_folders:
 569 |         method_results = os.path.join(results_dir, method)
 570 |         method_df = pd.read_csv(os.path.join(method_results, 'results.csv'))
 571 |         method_df.columns = ['Task Id', pretty_names[method]]
 572 |         for index, row in method_df.iterrows():
 573 |             if int(row['Task Id']) in task_ids:
 574 |                 table_results['Network'].append(pretty_names[method])
 575 |                 table_results['Task Id'].append(row['Task Id'])
 576 |                 accuracy = row[pretty_names[method]]
 577 |                 table_results['Balanced Accuracy'].append(accuracy if accuracy != -1 else np.nan)
 578 | 
 579 |     result_df = pd.DataFrame(data=table_results)
 580 | 
 581 |     return result_df
 582 | 
 583 | 
 584 | def generate_ranks_data(
 585 |     all_data: pd.DataFrame,
 586 | ) -> pd.DataFrame:
 587 |     """Generate the ranks of the baselines for every dataset.
 588 | 
 589 |     Args:
 590 |         all_data (pd.DataFrame):
 591 |             A dataframe where each row consists of tasks values
 592 |             across different models.
 593 | 
 594 |     Returns:
 595 |         ranks_df (pd.DataFrame):
 596 |             A dataframe of the ranks of all methods over
 597 |             the different tasks.
 598 |     """
 599 |     all_ranked_data = []
 600 |     all_data.drop(columns=['Task Id'], inplace=True)
 601 |     column_names = all_data.columns
 602 |     for row in all_data.itertuples(index=False):
 603 |         task_regularization_data = list(row)
 604 |         task_regularization_data = [float(x) for x in task_regularization_data]
 605 | 
 606 |         task_ranked_data = rankdata(
 607 |             task_regularization_data,
 608 |             method='average',
 609 |         )
 610 |         reversed_data = len(task_ranked_data) + 1 - task_ranked_data
 611 |         all_ranked_data.append(reversed_data)
 612 |     ranks_df = pd.DataFrame(all_ranked_data, columns=column_names)
 613 | 
 614 |     return ranks_df
 615 | 
 616 | 
 617 | def patch_violinplot():
 618 |     """Patch seaborn's violinplot in current axis
 619 |     to workaround matplotlib's bug ##5423."""
 620 |     from matplotlib.collections import PolyCollection
 621 |     ax = plt.gca()
 622 |     for art in ax.get_children():
 623 |         if isinstance(art, PolyCollection):
 624 |             art.set_edgecolor((0.3, 0.3, 0.3))
 625 | 
 626 | 
 627 | def generate_ranks_comparison(
 628 |     all_data: pd.DataFrame,
 629 | ):
 630 |     """Generate a ranks comparison between all methods.
 631 |     Creates a violin plot that showcases the ranks that
 632 |     the different methods achieve over all the tasks/datasets
 633 |     and saves it in the current executing folder.
 634 | 
 635 |     Args:
 636 |         all_data (pd.DataFrame):
 637 |             A dataframe where each row consists of method
 638 |             ranks over a certain task.
 639 |     """
 640 |     all_data_ranked = generate_ranks_data(all_data)
 641 |     all_data = pd.melt(
 642 |         all_data_ranked,
 643 |         value_vars=all_data.columns,
 644 |         var_name='Method',
 645 |         value_name='Rank',
 646 |     )
 647 | 
 648 |     fig, _ = plt.subplots()
 649 |     sns.violinplot(x='Method', y='Rank', linewidth=3, data=all_data, cut=0, kind='violin')
 650 |     patch_violinplot()
 651 |     plt.title('Ranks of the baselines and the MLP + C')
 652 |     plt.xlabel("")
 653 |     # plt.xticks(rotation=60)
 654 |     plt.tick_params(
 655 |         axis='x',  # changes apply to the x-axis
 656 |         which='both',  # both major and minor ticks are affected
 657 |         top=False,
 658 |         bottom=True,
 659 |         # ticks along the top edge are off
 660 |     )
 661 |     fig.autofmt_xdate()
 662 |     plt.savefig(
 663 |         'violin_ranks.pdf',
 664 |         bbox_inches='tight',
 665 |         pad_inches=0.15,
 666 |         margins=0.1,
 667 |     )
 668 | 
 669 | 
 670 | def plot_models_error_rate(
 671 |     result_dir,
 672 |     baseline,
 673 |     cocktails,
 674 | ):
 675 |     """Plot a comparison of the models and generate descriptive
 676 |     statistics based on the results of all the models.
 677 |     Generates plots which showcase the gain of the cocktail versus
 678 |     the baseline. (Plots the error rate of the baseline divided
 679 |     by the error rate of the cocktail.) Furthermore, it
 680 |     generates information regarding the wins, looses and draws
 681 |     of both methods, including a significance result. Saves the
 682 |     plot to the current folder.
 683 | 
 684 |     Args:
 685 |         baseline_dir (str):
 686 |             The directory where the results are located for the baseline
 687 |             methods.
 688 |         cocktail_dir (str):
 689 |             The directory where the results are located for the regularization
 690 |             cocktails.
 691 |     """
 692 |     pretty_names = {
 693 |         'cocktail': 'MLP + C',
 694 |         'autogluon/nn_only_4_days': 'AutoGL. S',
 695 |         'XGBoost/No ES': 'XGB.',
 696 |         'autosklearn': 'ASK-G.',
 697 |     }
 698 |     cocktail_error_rates = []
 699 |     baseline_error_rates = []
 700 | 
 701 |     baseline_results = os.path.join(
 702 |         result_dir,
 703 |         baseline,
 704 |         'results.csv',
 705 |     )
 706 |     cocktail_results = os.path.join(
 707 |         result_dir,
 708 |         cocktails,
 709 |         'results.csv',
 710 |     )
 711 |     cocktails_df = pd.read_csv(cocktail_results)
 712 |     baseline_df = pd.read_csv(baseline_results)
 713 | 
 714 |     task_ids = list(cocktails_df['Task Id'])
 715 |     for task_id in task_ids:
 716 |         cocktail_task_performance = cocktails_df.query(f'`Task Id`=={task_id}')['Test Performance']
 717 |         cocktail_task_performance = cocktail_task_performance.to_numpy()[0]
 718 |         baseline_task_performance = baseline_df.query(f'`Task Id`=={task_id}')['Test Performance']
 719 |         baseline_task_performance = baseline_task_performance.to_numpy()[0]
 720 | 
 721 |         cocktail_task_result_error = 1 - cocktail_task_performance
 722 |         benchmark_task_result_error = 1 - baseline_task_performance
 723 |         cocktail_error_rates.append(cocktail_task_result_error)
 724 |         baseline_error_rates.append(benchmark_task_result_error)
 725 | 
 726 |     fig, ax = plt.subplots()
 727 |     plt.scatter(baseline_error_rates, cocktail_error_rates, s=100, c='#273E47', label='Test Error Rate')
 728 |     lims = [
 729 |         np.min([0, 0]),  # min of both axes
 730 |         np.max([ax.get_xlim(), ax.get_ylim()]),  # max of both axes
 731 |     ]
 732 |     yticks = ax.yaxis.get_major_ticks()
 733 |     yticks[0].set_visible(False)
 734 |     # now plot both limits against eachother
 735 |     ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0, color='r')
 736 |     ax.set_aspect('equal')
 737 |     ax.set_xlim(lims)
 738 |     ax.set_ylim(lims)
 739 |     plt.xlabel(f"{pretty_names[baseline]} Error Rate")
 740 |     plt.ylabel(f"{pretty_names[cocktails]} Error Rate")
 741 | 
 742 |     plt.tick_params(
 743 |         axis='x',  # changes apply to the x-axis
 744 |         which='both',  # both major and minor ticks are affected
 745 |         top=False,
 746 |         bottom=True,
 747 |         # ticks along the top edge are off
 748 |     )
 749 |     plt.tick_params(
 750 |         axis='y',
 751 |         which='both',
 752 |         left=True,
 753 |         right=False,
 754 |     )
 755 | 
 756 |     # plt.title("Comparison with XGBoost")
 757 |     plt.savefig(
 758 |         f'cocktail_vs_{pretty_names[baseline]}.pdf',
 759 |         bbox_inches='tight',
 760 |         pad_inches=0.15,
 761 |         margins=0.1,
 762 |     )
 763 | 
 764 | 
 765 | def incumbent_time_dataset(
 766 |     result_path: str,
 767 |     dataset_id: int,
 768 |     seed: int,
 769 |     max_number_configs: int = 840,
 770 |     method: str = 'cocktail',
 771 | ) -> Tuple[float, int]:
 772 |     """Return the time needed to find the incumbent configuration
 773 |     given a maximal number of configurations for a certain dataset
 774 |     and a certain algorithm.
 775 | 
 776 |     Args:
 777 |         result_path (str): The path of the folder where the results are
 778 |             stored.
 779 |         dataset_id (int): The task id-
 780 |         seed (int): The seed used for the experiment.
 781 |         max_number_configs (int): The maximal number of configurations.
 782 |         method (str): The method name.
 783 | 
 784 |     Returns:
 785 |         incumbent_time, incumbent_index (Tuple[float, int]):
 786 |             A tuple with the time needed to find the incumbent configuration
 787 |             and the index of the incumbent configuration.
 788 |     """
 789 |     if method == 'cocktail':
 790 |         task_result_folder = os.path.expanduser(
 791 |             os.path.join(
 792 |                 result_path,
 793 |                 f'{dataset_id}',
 794 |                 'hpo_run',
 795 |                 f'{seed}',
 796 |             )
 797 |         )
 798 |     else:
 799 |         task_result_folder = os.path.expanduser(
 800 |             os.path.join(
 801 |                 result_path,
 802 |                 f'{dataset_id}',
 803 |                 f'{seed}',
 804 |             )
 805 |         )
 806 | 
 807 |     index = 0
 808 |     incumbent_accuracy = 0
 809 |     start_time = None
 810 |     incumbent_time = None
 811 |     incumbent_index = None
 812 |     x_times = []
 813 |     y_accuracies = []
 814 | 
 815 |     with open(os.path.join(task_result_folder, 'results.json')) as result_file:
 816 |         for line in result_file:
 817 |             config_info = json.loads(line)
 818 |             # config_id
 819 |             _ = config_info[0]
 820 |             job_stats = config_info[2]
 821 |             started = job_stats['started']
 822 |             finished = job_stats['finished']
 823 | 
 824 |             if index == 0:
 825 |                 start_time = started
 826 |             try:
 827 |                 result_info = config_info[3]['info']
 828 |             except Exception:
 829 |                 print(f'Worked Died problem')
 830 | 
 831 |             if method == 'cocktail':
 832 |                 validation_curve = result_info[0]['val_balanced_accuracy']
 833 |                 validation_accuracy = validation_curve[-1]
 834 |             else:
 835 |                 validation_accuracy = result_info['val_accuracy']
 836 | 
 837 | 
 838 |             if validation_accuracy > incumbent_accuracy:
 839 |                 incumbent_accuracy = validation_accuracy
 840 |                 incumbent_time = finished - start_time
 841 |                 incumbent_index = index
 842 | 
 843 |             index += 1
 844 | 
 845 |             estimated_time = finished - start_time
 846 |             x_times.append(estimated_time)
 847 |             y_accuracies.append(incumbent_accuracy)
 848 | 
 849 |             if index == max_number_configs:
 850 |                 print("Max number of configs reached")
 851 |                 break
 852 | 
 853 |     return incumbent_time, incumbent_index
 854 | 
 855 | 
 856 | def runtime_dataset(
 857 |     result_path: str,
 858 |     dataset_id: int,
 859 |     seed: int,
 860 |     max_number_configs: int = 840,
 861 |     method: str = 'cocktail',
 862 | ) -> float:
 863 |     """Return the time needed to perform the HPO search
 864 |     given a maximal number of configurations for a certain
 865 |     dataset and a certain algorithm.
 866 | 
 867 |     Args:
 868 |         result_path (str): The path of the folder where the results are
 869 |             stored.
 870 |         dataset_id (int): The task id-
 871 |         seed (int): The seed used for the experiment.
 872 |         max_number_configs (int): The maximal number of configurations.
 873 |         method (str): The method name.
 874 | 
 875 |     Returns:
 876 |         estimated_time (float):
 877 |             The time elapsed for the HPO search.
 878 |     """
 879 |     if method == 'cocktail':
 880 |         task_result_folder = os.path.expanduser(
 881 |             os.path.join(
 882 |                 result_path,
 883 |                 f'{dataset_id}',
 884 |                 'hpo_run',
 885 |                 f'{seed}',
 886 |             )
 887 |         )
 888 |     else:
 889 |         task_result_folder = os.path.expanduser(
 890 |             os.path.join(
 891 |                 result_path,
 892 |                 f'{dataset_id}',
 893 |                 f'{seed}',
 894 |             )
 895 |         )
 896 | 
 897 |     index = 0
 898 |     start_time = None
 899 | 
 900 |     with open(os.path.join(task_result_folder, 'results.json')) as result_file:
 901 |         for line in result_file:
 902 |             config_info = json.loads(line)
 903 |             job_stats = config_info[2]
 904 |             started = job_stats['started']
 905 |             finished = job_stats['finished']
 906 | 
 907 |             if index == 0:
 908 |                 start_time = started
 909 | 
 910 |             estimated_time = finished - start_time
 911 |             index += 1
 912 | 
 913 |             if index == max_number_configs:
 914 |                 print("Max number of configs reached")
 915 |                 break
 916 | 
 917 |     return estimated_time
 918 | 
 919 | 
 920 | def generate_cocktail_vs_xgboost_incumbent_times(
 921 |     cocktail_folder: str,
 922 |     baseline_folder: str,
 923 |     baseline_name: str,
 924 |     benchmark_task_file: str,
 925 | ):
 926 |     """Generate the cocktail vs XGBoost incumbent times
 927 |     information.
 928 | 
 929 |     Generates information regarding the cocktail vs xgboost time
 930 |     performance and saves a plot with the time distributions of what
 931 |     every method took to find the incumbent configuration.
 932 | 
 933 |     Args:
 934 |         cocktail_folder (str): The path where the cocktail folder is located.
 935 |         baseline_folder (str): The path where the baseline results are located.
 936 |         baseline_name (str): The baseline name.
 937 |         benchmark_task_file (str): The benchmark task file path.
 938 |     """
 939 |     task_ids = get_task_list(benchmark_task_file)
 940 |     cocktail_incumbent_task_times = []
 941 |     xgboost_incumbent_task_times = []
 942 |     info_dict = {
 943 |         'Cocktail': [],
 944 |         'XGBoost': [],
 945 |     }
 946 |     for task_id in task_ids:
 947 |         print(task_id)
 948 |         cocktail_task_time, cocktail_task_index = incumbent_time_dataset(
 949 |             cocktail_folder,
 950 |             task_id,
 951 |             11,
 952 |         )
 953 |         xgboost_task_time, xgboost_task_index = incumbent_time_dataset(
 954 |             baseline_folder,
 955 |             task_id,
 956 |             11,
 957 |             method=baseline_name,
 958 |         )
 959 |         cocktail_incumbent_task_times.append(cocktail_task_time)
 960 |         xgboost_incumbent_task_times.append(xgboost_task_time)
 961 |         info_dict['Cocktail'].append(cocktail_task_time)
 962 |         info_dict['XGBoost'].append(xgboost_task_time)
 963 | 
 964 |     print(f'Cocktail mean: {np.mean(cocktail_incumbent_task_times)}')
 965 |     print(f'Cocktail min: {np.min(cocktail_incumbent_task_times)}')
 966 |     print(f'XGBoost mean: {np.mean(xgboost_incumbent_task_times)}')
 967 |     print(f'Cocktail std: {np.std(cocktail_incumbent_task_times)}')
 968 |     print(f'XGBoost std: {np.std(xgboost_incumbent_task_times)}')
 969 |     info_frame = pd.DataFrame.from_dict(info_dict)
 970 | 
 971 |     sns.boxplot(data=info_frame)
 972 |     plt.ylabel('Time (seconds)')
 973 |     plt.tight_layout()
 974 |     plt.savefig('comparison_incumbents_times.pdf')
 975 | 
 976 | 
 977 | def incumbent_performance_time_dataset(
 978 |     result_path: str,
 979 |     dataset_id: int,
 980 |     seed: int,
 981 |     max_number_configs: int = 840,
 982 |     method: str = 'cocktail',
 983 |     time: int = 3600,
 984 | ) -> float:
 985 |     """Return the test accuracy of the incumbent configuration
 986 |     given a maximal number of configurations for a certain dataset
 987 |     and a certain algorithm for a given time marker.
 988 | 
 989 |     Args:
 990 |         result_path (str): The path of the folder where the results are
 991 |             stored.
 992 |         dataset_id (int): The task id-
 993 |         seed (int): The seed used for the experiment.
 994 |         max_number_configs (int): The maximal number of configurations.
 995 |         method (str): The method name.
 996 |         time (int): The time marker.
 997 | 
 998 |     Returns:
 999 |         incumbent_test_accuracy (float):
1000 |             The incumbent test accuracy.
1001 |     """
1002 |     if method == 'cocktail':
1003 |         task_result_folder = os.path.expanduser(
1004 |             os.path.join(
1005 |                 result_path,
1006 |                 f'{dataset_id}',
1007 |                 'hpo_run',
1008 |                 f'{seed}',
1009 |             )
1010 |         )
1011 |     else:
1012 |         task_result_folder = os.path.expanduser(
1013 |             os.path.join(
1014 |                 result_path,
1015 |                 f'{dataset_id}',
1016 |                 f'{seed}',
1017 |             )
1018 |         )
1019 | 
1020 |     index = 0
1021 |     incumbent_val_accuracy = 0
1022 |     incumbent_test_accuracy = 0
1023 |     start_time = None
1024 | 
1025 |     with open(os.path.join(task_result_folder, 'results.json')) as result_file:
1026 |         for line in result_file:
1027 |             config_info = json.loads(line)
1028 |             job_stats = config_info[2]
1029 |             started = job_stats['started']
1030 |             finished = job_stats['finished']
1031 | 
1032 |             # start the time
1033 |             if index == 0:
1034 |                 start_time = started
1035 | 
1036 |             try:
1037 |                 result_info = config_info[3]['info']
1038 |             except Exception:
1039 |                 pass
1040 |                 # print(f'Worked Died problem')
1041 | 
1042 |             if method == 'cocktail':
1043 |                 validation_curve = result_info[0]['val_balanced_accuracy']
1044 |                 validation_accuracy = validation_curve[-1]
1045 |                 test_curve = result_info[0]['test_result']
1046 |                 test_accuracy = test_curve[-1]
1047 |             else:
1048 |                 validation_accuracy = result_info['val_accuracy']
1049 |                 test_accuracy = result_info['test_accuracy']
1050 | 
1051 |             estimated_time = finished - start_time
1052 |             if estimated_time >= time:
1053 |                 return incumbent_test_accuracy
1054 | 
1055 |             if validation_accuracy > incumbent_val_accuracy:
1056 |                 incumbent_val_accuracy = validation_accuracy
1057 |                 incumbent_test_accuracy = test_accuracy
1058 | 
1059 |             index += 1
1060 | 
1061 |             if index == max_number_configs:
1062 |                 # print("Max number of configs reached")
1063 |                 break
1064 | 
1065 |     return incumbent_test_accuracy
1066 | 
1067 | def generate_performance_comparison_over_time(
1068 |     cocktail_folder: str,
1069 |     baseline_folder: str,
1070 |     baseline_name: str,
1071 |     benchmark_task_file: str,
1072 | ):
1073 |     """Generate the cocktail vs XGBoost incumbent
1074 |     performance over time information.
1075 | 
1076 |     Generates information regarding the cocktail vs xgboost time
1077 |     performance and saves a plot with the average ranks of the
1078 |     methods over time.
1079 | 
1080 |     Args:
1081 |         cocktail_folder (str): The path where the cocktail folder is located.
1082 |         baseline_folder (str): The path where the baseline results are located.
1083 |         baseline_name (str): The baseline name.
1084 |         benchmark_task_file (str): The benchmark task file path.
1085 |     """
1086 |     task_ids = get_task_list(benchmark_task_file)
1087 |     times = [900, 1800, 3600, 7200, 14400, 28800, 57600, 115200, 230400, 345600]
1088 | 
1089 |     cocktail_ranks_over_time = []
1090 |     cocktail_stds_over_time = []
1091 |     baseline_ranks_over_time = []
1092 |     baseline_stds_over_time = []
1093 | 
1094 |     for time in times:
1095 |         baseline_ranks = []
1096 |         cocktail_ranks = []
1097 | 
1098 |         cocktail_wins = 0
1099 |         cocktail_ties = 0
1100 |         cocktail_loses = 0
1101 |         cocktail_performances = []
1102 |         baseline_performances = []
1103 | 
1104 |         for task_id in task_ids:
1105 | 
1106 |             cocktail_incumbent_performance = incumbent_performance_time_dataset(
1107 |                 cocktail_folder,
1108 |                 task_id,
1109 |                 11,
1110 |                 time=time,
1111 |             )
1112 |             baseline_incumbent_performance = incumbent_performance_time_dataset(
1113 |                 baseline_folder,
1114 |                 task_id,
1115 |                 11,
1116 |                 method=baseline_name,
1117 |                 time=time,
1118 |             )
1119 |             cocktail_performances.append(cocktail_incumbent_performance)
1120 |             baseline_performances.append(baseline_incumbent_performance)
1121 | 
1122 |             if cocktail_incumbent_performance == 0 and baseline_incumbent_performance == 0:
1123 |                 continue
1124 |             elif cocktail_incumbent_performance == 0:
1125 |                 cocktail_loses += 1
1126 |                 cocktail_ranks.append(2)
1127 |                 baseline_ranks.append(1)
1128 |                 continue
1129 |             elif baseline_incumbent_performance == 0:
1130 |                 cocktail_wins += 1
1131 |                 cocktail_ranks.append(1)
1132 |                 baseline_ranks.append(2)
1133 |                 continue
1134 | 
1135 |             if cocktail_incumbent_performance > baseline_incumbent_performance:
1136 |                 cocktail_wins += 1
1137 |                 cocktail_ranks.append(1)
1138 |                 baseline_ranks.append(2)
1139 |             elif cocktail_incumbent_performance == baseline_incumbent_performance:
1140 |                 cocktail_ties += 1
1141 |                 cocktail_ranks.append(1.5)
1142 |                 baseline_ranks.append(1.5)
1143 |             else:
1144 |                 cocktail_loses += 1
1145 |                 cocktail_ranks.append(2)
1146 |                 baseline_ranks.append(1)
1147 | 
1148 |         _, p_value = wilcoxon(cocktail_performances, baseline_performances)
1149 |         cocktail_ranks_over_time.append(np.mean(cocktail_ranks))
1150 |         cocktail_stds_over_time.append(np.std(cocktail_ranks))
1151 |         baseline_ranks_over_time.append(np.mean(baseline_ranks))
1152 |         baseline_stds_over_time.append(np.std(baseline_ranks))
1153 |         print(f'For a runtime of {time / 3600} hours, The cocktails won: {cocktail_wins} times, tied: {cocktail_ties} times, lost: {cocktail_loses} times\np_value: {p_value}')
1154 | 
1155 |     plt.plot([time / 3600 for time in times], cocktail_ranks_over_time, label='MLP + C average rank')
1156 |     plt.plot([time / 3600 for time in times], baseline_ranks_over_time, label=f'XGBoost average rank')
1157 |     plt.legend()
1158 |     plt.xlabel('Time (Hours)')
1159 |     plt.ylabel('Average Rank')
1160 |     plt.tight_layout()
1161 |     plt.savefig('average_time_ranks.pdf')
1162 | 


--------------------------------------------------------------------------------
/utilities.py:
--------------------------------------------------------------------------------
  1 | from argparse import Namespace
  2 | from typing import Any, Callable, Dict, Optional, Tuple
  3 | 
  4 | import ConfigSpace
  5 | import pandas as pd
  6 | from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
  7 | 
  8 | import dask.distributed
  9 | 
 10 | import openml
 11 | import numpy as np
 12 | 
 13 | from sklearn.model_selection import train_test_split
 14 | 
 15 | from smac.intensification.simple_intensifier import SimpleIntensifier
 16 | from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
 17 | from smac.scenario.scenario import Scenario
 18 | from smac.facade.smac_ac_facade import SMAC4AC
 19 | from smac.runhistory.runhistory import RunHistory
 20 | 
 21 | 
 22 | def get_data(
 23 |     task_id: int,
 24 |     val_share: float = 0.25,
 25 |     test_size: float = 0.2,
 26 |     seed: int = 11,
 27 | ) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray, dict, np.ndarray]:
 28 |     """
 29 |     Given  a task id and split size information, return
 30 |     the dataset splits based on a seed for the main algorithm
 31 |     to use.
 32 | 
 33 |     Args:
 34 |         task_id (int):
 35 |             The id of the task which will be used for the run.
 36 |         val_share (float):
 37 |             The validation split size from the train set.
 38 |         test_size (float):
 39 |             The test split size from the whole dataset.
 40 |         seed (int):
 41 |             The seed used for the dataset preparation.
 42 | 
 43 |     Returns:
 44 | 
 45 |         X_train, X_test, y_train, y_test, resampling_strategy_args, categorical indicator
 46 |             (tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray, dict, np.ndarray]):
 47 | 
 48 |             The train examples, the test examples, the train labels, the test labels,
 49 |             the resampling strategy to be used and the categorical indicator for the features.
 50 |     """
 51 |     task = openml.tasks.get_task(task_id=task_id)
 52 |     dataset = task.get_dataset()
 53 |     X, y, categorical_indicator, _ = dataset.get_data(
 54 |         dataset_format='dataframe',
 55 |         target=dataset.default_target_attribute,
 56 |     )
 57 | 
 58 |     # AutoPyTorch fails when it is given a y DataFrame with False and True values
 59 |     # and category as dtype. In its inner workings it uses sklearn which cannot
 60 |     # detect the column type.
 61 |     if isinstance(y[1], bool):
 62 |         y = y.astype('bool')
 63 | 
 64 |     # uncomment only for np.arrays
 65 |     """
 66 |     # patch categorical values to string
 67 |     for index_nr, categorical_feature in enumerate(categorical_indicator):
 68 |         if categorical_feature:
 69 |             X[index_nr] = X[index_nr].astype("category")
 70 |     """
 71 |     X_train, X_test, y_train, y_test = train_test_split(
 72 |         X,
 73 |         y,
 74 |         test_size=test_size,
 75 |         random_state=seed,
 76 |         stratify=y,
 77 |         shuffle=True,
 78 |     )
 79 |     resampling_strategy_args = {
 80 |         'val_share': val_share,
 81 |     }
 82 | 
 83 |     """
 84 |     This was an earlier fix to the AutoPyTorch failures for imbalanced datasets. In particular
 85 |     having variables with only null values in the train set. Now this is handled inside AutoPyTorch.
 86 |     
 87 |     train_column_nan_info = X_train.isna().all()
 88 |     test_column_nan_info = X_test.isna().all()
 89 |     only_nan_columns = [label for label, value in train_column_nan_info.items() if value]
 90 |     test_nan_columns = [label for label, value in test_column_nan_info.items() if value]
 91 |     only_nan_columns.extend(test_nan_columns)
 92 |     only_nan_columns = set(only_nan_columns)
 93 |     X_train.drop(only_nan_columns, axis='columns', inplace=True)
 94 |     X_test.drop(only_nan_columns, axis='columns', inplace=True)
 95 |     """
 96 |     # TODO turn this into a dictionary
 97 | 
 98 |     return X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator
 99 | 
100 | 
101 | def get_smac_object(
102 |     scenario_dict: Dict[str, Any],
103 |     seed: int,
104 |     ta: Callable,
105 |     ta_kwargs: Dict[str, Any],
106 |     n_jobs: int,
107 |     initial_budget: int,
108 |     max_budget: int,
109 |     dask_client: Optional[dask.distributed.Client],
110 | ) -> SMAC4AC:
111 |     """
112 |     This function returns an SMAC object that is gonna be used as
113 |     optimizer of pipelines.
114 | 
115 |     Args:
116 |         scenario_dict (typing.Dict[str, typing.Any]): constrain on how to run
117 |             the jobs.
118 |         seed (int): to make the job deterministic.
119 |         ta (typing.Callable): the function to be intensified by smac.
120 |         ta_kwargs (typing.Dict[str, typing.Any]): Arguments to the above ta.
121 |         n_jobs (int): Amount of cores to use for this task.
122 |         initial_budget (int):
123 |             The initial budget for a configuration.
124 |         max_budget (int):
125 |             The maximal budget for a configuration.
126 |         dask_client (dask.distributed.Client): User provided scheduler.
127 | 
128 |     Returns:
129 |         (SMAC4AC): sequential model algorithm configuration object
130 |     """
131 |     # multi-fidelity is disabled, that is why initial_budget and max_budget
132 |     # are not used.
133 |     rh2EPM = RunHistory2EPM4LogCost
134 | 
135 |     return SMAC4AC(
136 |         scenario=Scenario(scenario_dict),
137 |         rng=seed,
138 |         runhistory2epm=rh2EPM,
139 |         tae_runner=ta,
140 |         tae_runner_kwargs=ta_kwargs,
141 |         initial_configurations=None,
142 |         run_id=seed,
143 |         intensifier=SimpleIntensifier,
144 |         dask_client=dask_client,
145 |         n_jobs=n_jobs,
146 |     )
147 | 
148 | 
149 | def get_updates_for_regularization_cocktails(
150 |     categorical_indicator: np.ndarray,
151 |     args: Namespace,
152 | ) -> Tuple[Dict, HyperparameterSearchSpaceUpdates, Dict]:
153 |     """
154 |     These updates replicate the regularization cocktail paper search space.
155 | 
156 |     Args:
157 |         categorical_indicator (np.ndarray)
158 |             An array that indicates whether a feature is categorical or not.
159 |         args (Namespace):
160 |             The different updates for the setup of the run, mostly updates
161 |             for the different regularization ingredients.
162 | 
163 |     Returns:
164 |     ________
165 |         pipeline_update, search_space_updates, include_updates (Tuple[dict, HyperparameterSearchSpaceUpdates, dict]):
166 |             The pipeline updates like number of epochs, budget, seed etc.
167 |             The search space updates like setting different hps to different values or ranges.
168 |             Lastly include updates, which can be used to include different features.
169 |     """
170 |     augmentation_names_to_trainers = {
171 |         'mixup': 'MixUpTrainer',
172 |         'cutout': 'RowCutOutTrainer',
173 |         'cutmix': 'RowCutMixTrainer',
174 |         'standard': 'StandardTrainer',
175 |         'adversarial': 'AdversarialTrainer',
176 |     }
177 | 
178 |     include_updates = dict()
179 |     include_updates['network_embedding'] = ['NoEmbedding']
180 |     include_updates['network_init'] = ['NoInit']
181 | 
182 |     has_cat_features = any(categorical_indicator)
183 |     has_numerical_features = not all(categorical_indicator)
184 |     search_space_updates = HyperparameterSearchSpaceUpdates()
185 | 
186 |     # architecture head
187 |     search_space_updates.append(
188 |         node_name='network_head',
189 |         hyperparameter='__choice__',
190 |         value_range=['no_head'],
191 |         default_value='no_head',
192 |     )
193 |     search_space_updates.append(
194 |         node_name='network_head',
195 |         hyperparameter='no_head:activation',
196 |         value_range=['relu'],
197 |         default_value='relu',
198 |     )
199 | 
200 |     # backbone architecture
201 |     search_space_updates.append(
202 |         node_name='network_backbone',
203 |         hyperparameter='__choice__',
204 |         value_range=['ShapedResNetBackbone'],
205 |         default_value='ShapedResNetBackbone',
206 |     )
207 |     search_space_updates.append(
208 |         node_name='network_backbone',
209 |         hyperparameter='ShapedResNetBackbone:resnet_shape',
210 |         value_range=['brick'],
211 |         default_value='brick',
212 |     )
213 |     search_space_updates.append(
214 |         node_name='network_backbone',
215 |         hyperparameter='ShapedResNetBackbone:num_groups',
216 |         value_range=[2],
217 |         default_value=2,
218 |     )
219 |     search_space_updates.append(
220 |         node_name='network_backbone',
221 |         hyperparameter='ShapedResNetBackbone:blocks_per_group',
222 |         value_range=[2],
223 |         default_value=2,
224 |     )
225 |     search_space_updates.append(
226 |         node_name='network_backbone',
227 |         hyperparameter='ShapedResNetBackbone:output_dim',
228 |         value_range=[512],
229 |         default_value=512,
230 |     )
231 |     search_space_updates.append(
232 |         node_name='network_backbone',
233 |         hyperparameter='ShapedResNetBackbone:max_units',
234 |         value_range=[512],
235 |         default_value=512,
236 |     )
237 |     search_space_updates.append(
238 |         node_name='network_backbone',
239 |         hyperparameter='ShapedResNetBackbone:activation',
240 |         value_range=['relu'],
241 |         default_value='relu',
242 |     )
243 |     search_space_updates.append(
244 |         node_name='network_backbone',
245 |         hyperparameter='ShapedResNetBackbone:shake_shake_update_func',
246 |         value_range=['even-even'],
247 |         default_value='even-even',
248 |     )
249 | 
250 |     # training updates
251 |     search_space_updates.append(
252 |         node_name='lr_scheduler',
253 |         hyperparameter='__choice__',
254 |         value_range=['CosineAnnealingWarmRestarts'],
255 |         default_value='CosineAnnealingWarmRestarts',
256 |     )
257 |     search_space_updates.append(
258 |         node_name='lr_scheduler',
259 |         hyperparameter='CosineAnnealingWarmRestarts:n_restarts',
260 |         value_range=[3],
261 |         default_value=3,
262 |     )
263 |     search_space_updates.append(
264 |         node_name='optimizer',
265 |         hyperparameter='__choice__',
266 |         value_range=['AdamWOptimizer'],
267 |         default_value='AdamWOptimizer',
268 |     )
269 |     search_space_updates.append(
270 |         node_name='optimizer',
271 |         hyperparameter='AdamWOptimizer:lr',
272 |         value_range=[1e-3],
273 |         default_value=1e-3,
274 |     )
275 |     search_space_updates.append(
276 |         node_name='data_loader',
277 |         hyperparameter='batch_size',
278 |         value_range=[128],
279 |         default_value=128,
280 |     )
281 | 
282 |     # preprocessing
283 |     search_space_updates.append(
284 |         node_name='feature_preprocessor',
285 |         hyperparameter='__choice__',
286 |         value_range=['NoFeaturePreprocessor'],
287 |         default_value='NoFeaturePreprocessor',
288 |     )
289 | 
290 |     if has_numerical_features:
291 |         search_space_updates.append(
292 |             node_name='imputer',
293 |             hyperparameter='numerical_strategy',
294 |             value_range=['median'],
295 |             default_value='median',
296 |         )
297 |         search_space_updates.append(
298 |             node_name='scaler',
299 |             hyperparameter='__choice__',
300 |             value_range=['StandardScaler'],
301 |             default_value='StandardScaler',
302 |         )
303 | 
304 |     if has_cat_features:
305 |         search_space_updates.append(
306 |             node_name='imputer',
307 |             hyperparameter='categorical_strategy',
308 |             value_range=['constant_!missing!'],
309 |             default_value='constant_!missing!',
310 |         )
311 |         search_space_updates.append(
312 |             node_name='encoder',
313 |             hyperparameter='__choice__',
314 |             value_range=['OneHotEncoder'],
315 |             default_value='OneHotEncoder',
316 |         )
317 | 
318 |     search_space_updates.append(
319 |         node_name='optimizer',
320 |         hyperparameter='AdamWOptimizer:beta1',
321 |         value_range=[0.9],
322 |         default_value=0.9,
323 |     )
324 |     search_space_updates.append(
325 |         node_name='optimizer',
326 |         hyperparameter='AdamWOptimizer:beta2',
327 |         value_range=[0.999],
328 |         default_value=0.999,
329 |     )
330 | 
331 |     # if the cash formulation of the cocktail is not activated,
332 |     # otherwise the methods activation will be chosen by the SMBO optimizer.
333 |     if not args.cash_cocktail:
334 |         # regularization ingredients updates
335 |         search_space_updates.append(
336 |             node_name='network_backbone',
337 |             hyperparameter='ShapedResNetBackbone:use_dropout',
338 |             value_range=args.use_dropout,
339 |             default_value=args.use_dropout[0],
340 |         )
341 |         search_space_updates.append(
342 |             node_name='network_backbone',
343 |             hyperparameter='ShapedResNetBackbone:use_batch_norm',
344 |             value_range=args.use_batch_normalization,
345 |             default_value=args.use_batch_normalization[0],
346 |         )
347 |         search_space_updates.append(
348 |             node_name='network_backbone',
349 |             hyperparameter='ShapedResNetBackbone:use_skip_connection',
350 |             value_range=args.use_skip_connection,
351 |             default_value=args.use_skip_connection[0],
352 |         )
353 | 
354 |         multi_branch_choice = [args.mb_choice]
355 | 
356 |         search_space_updates.append(
357 |             node_name='network_backbone',
358 |             hyperparameter='ShapedResNetBackbone:multi_branch_choice',
359 |             value_range=multi_branch_choice,
360 |             default_value=multi_branch_choice[0],
361 |         )
362 | 
363 |         search_space_updates.append(
364 |             node_name='optimizer',
365 |             hyperparameter='AdamWOptimizer:use_weight_decay',
366 |             value_range=args.use_weight_decay,
367 |             default_value=args.use_weight_decay[0],
368 |         )
369 | 
370 |         trainer_choice = [augmentation_names_to_trainers[args.augmentation]]
371 | 
372 |         search_space_updates.append(
373 |             node_name='trainer',
374 |             hyperparameter='__choice__',
375 |             value_range=trainer_choice,
376 |             default_value=trainer_choice[0],
377 |         )
378 | 
379 |         search_space_updates.append(
380 |             node_name='trainer',
381 |             hyperparameter=f'{trainer_choice[0]}:weighted_loss',
382 |             value_range=[1],
383 |             default_value=1,
384 |         )
385 |         search_space_updates.append(
386 |             node_name='trainer',
387 |             hyperparameter=f'{trainer_choice[0]}:use_lookahead_optimizer',
388 |             value_range=args.use_lookahead,
389 |             default_value=args.use_lookahead[0],
390 |         )
391 |         search_space_updates.append(
392 |             node_name='trainer',
393 |             hyperparameter=f'{trainer_choice[0]}:use_stochastic_weight_averaging',
394 |             value_range=args.use_swa,
395 |             default_value=args.use_swa[0],
396 |         )
397 |         search_space_updates.append(
398 |             node_name='trainer',
399 |             hyperparameter=f'{trainer_choice[0]}:use_snapshot_ensemble',
400 |             value_range=args.use_se,
401 |             default_value=args.use_se[0],
402 |         )
403 | 
404 |     # No early stopping and train on gpu
405 |     pipeline_update = {
406 |         'early_stopping': -1,
407 |         'min_epochs': args.epochs,
408 |         'epochs': args.epochs,
409 |         "device": 'cpu',
410 |     }
411 | 
412 |     return pipeline_update, search_space_updates, include_updates
413 | 
414 | 
415 | def get_incumbent_results(
416 |     run_history_file: str,
417 |     search_space: ConfigSpace.ConfigurationSpace,
418 | ) -> Tuple[ConfigSpace.Configuration, float]:
419 |     """
420 |     Get the incumbent configuration and performance from the previous run HPO
421 |     search with AutoPytorch.
422 | 
423 |     Args:
424 |         run_history_file (str):
425 |             The path where the AutoPyTorch search data is located.
426 |         search_space (ConfigSpace.ConfigurationSpace):
427 |             The ConfigurationSpace that was previously used for the HPO
428 |             search space.
429 | 
430 |     Returns:
431 |         config, incumbent_run_value (Tuple[ConfigSpace.Configuration, float]):
432 |             The incumbent configuration found from HPO search and the validation
433 |             performance it achieved.
434 | 
435 |     """
436 |     run_history = RunHistory()
437 |     run_history.load_json(
438 |         run_history_file,
439 |         search_space,
440 |     )
441 | 
442 |     run_history_data = run_history.data
443 |     sorted_runvalue_by_cost = sorted(run_history_data.items(), key=lambda item: item[1].cost)
444 |     incumbent_run_key, incumbent_run_value = sorted_runvalue_by_cost[0]
445 |     config = run_history.ids_config[incumbent_run_key.config_id]
446 |     return config, incumbent_run_value
447 | 


--------------------------------------------------------------------------------