├── .gitignore ├── LICENSE ├── README.md ├── baselines ├── autogluon │ ├── main_experiment.py │ └── refit_experiment.py ├── autosklearn │ └── main_experiment.py ├── bohb │ ├── data │ │ ├── __init__.py │ │ └── loader.py │ ├── main_experiment.py │ ├── refit_experiment.py │ ├── utilities.py │ └── worker.py └── node │ └── node_experiment.py ├── cocktails ├── main_experiment.py └── refit_experiment.py ├── dataset_collection.py ├── figures └── all_baselines_diagram.png ├── results.py └── utilities.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [NeurIPS 2021] Well-tuned Simple Nets Excel on Tabular Datasets 2 | 3 | ## Introduction 4 | 5 | This repo contains the source code accompanying the paper: 6 | 7 | **Well-tuned Simple Nets Excel on Tabular Datasets** 8 | 9 | Authors: Arlind Kadra, Marius Lindauer, Frank Hutter, Josif Grabocka 10 | 11 | Tabular datasets are the last "unconquered castle" for deep learning, with traditional ML methods like Gradient-Boosted Decision Trees still performing strongly even against recent specialized neural architectures. In this paper, we hypothesize that the key to boosting the performance of neural networks lies in rethinking the joint and simultaneous application of a large set of modern regularization techniques. As a result, we propose regularizing plain Multilayer Perceptron (MLP) networks by searching for the optimal combination/cocktail of 13 regularization techniques for each dataset using a joint optimization over the decision on which regularizers to apply and their subsidiary hyperparameters. 12 | 13 | We empirically assess the impact of these **regularization cocktails** for MLPs on a large-scale empirical study comprising 40 tabular datasets and demonstrate that: (i) well-regularized plain MLPs significantly outperform recent state-of-the-art specialized neural network architectures, and (ii) they even outperform strong traditional ML methods, such as XGBoost. 14 | 15 | 16 | *News: Our work is accepted in the Thirty-fifth Conference on Neural Information Processing Systems (NeurIPS 2021).* 17 | 18 | 19 | ## Setting up the virtual environment 20 | 21 | Our work is built on top of AutoPyTorch. To look at our implementation of the regularization cocktail ingredients, you can do the following: 22 | 23 | 24 | ``` 25 | git clone https://github.com/automl/Auto-PyTorch.git 26 | cd Auto-PyTorch/ 27 | git checkout regularization_cocktails 28 | ``` 29 | To install the version of AutoPyTorch that features our work, you can use these additional commands: 30 | 31 | ``` 32 | # The following commands assume the user is in the cloned directory 33 | conda create -n reg_cocktails python=3.8 34 | conda activate reg_cocktails 35 | conda install gxx_linux-64 gcc_linux-64 swig 36 | cat requirements.txt | xargs -n 1 -L 1 pip install 37 | python setup.py install 38 | ``` 39 | 40 | ## Running the Regularization Cocktail code 41 | 42 | The main files to run the regularization cocktails are in the `cocktails` folder and are 43 | `main_experiment.py` and `refit_experiment.py`. The first module can be used to start a full 44 | HPO search, while, the other module can be used to refit on certain datasets when the time does not suffice to perform the full HPO search and to complete the refit of the incumbent hyperparameter 45 | configuration. 46 | 47 | The main arguments for `main_experiment.py`: 48 | 49 | - `--task_id`: The task id in OpenML. Basically the dataset that will be used in the experiment. 50 | - `--wall_time`: The total runtime to be used. It is the total runtime for the HPO search and also final refit. 51 | - `--func_eval_time`: The maximal time for one function evaluation parametrized by a certain hyperparameter configuration. 52 | - `--epochs`: The number of epochs for one hyperparameter configuration to be evaluated on. 53 | - `--seed`: The seed to be used for the run. 54 | - `--tmp_dir`: The temporary directory for the results to be stored in. 55 | - `--output_dir`: The output directory for the results to be stored in. 56 | - `--nr_workers`: The number of workers which corresponds to the number of hyperparameter configurations run in parallel. 57 | - `--nr_threads`: The number of threads. 58 | - `--cash_cocktail`: An important flag that activates the regularization cocktail formulation. 59 | 60 | **A minimal example of running the regularization cocktails**: 61 | 62 | ``` 63 | python main_experiment.py --task_id 233088 --wall_time 600 --func_eval_time 60 --epochs 10 --seed 42 --cash_cocktail True 64 | ``` 65 | 66 | The example above will run the regularization cocktails for 10 minutes, with a function evaluation limit of 50 seconds for task 233088. Every 67 | hyperparameter configuration will be evaluated for 10 epochs, the seed 42 will be used for the experiment and data splits. 68 | 69 | **A minimal example of running only one regularization method:** 70 | ``` 71 | python main_experiment.py --task_id 233088 --wall_time 600 --func_eval_time 60 --epochs 10 --seed 42 --use_weight_decay 72 | ``` 73 | In case you would like to investigate individual regularization methods, you can look at the different arguments 74 | that control them in the `main_experiment.py`. Additionally, if you want to remove 75 | the limit on the number of hyperparameter configurations, you can remove the following lines: 76 | 77 | ``` 78 | smac_scenario_args={ 79 | 'runcount_limit': number_of_configurations_limit, 80 | } 81 | ``` 82 | ## Plots 83 | 84 | The plots that are included in our paper were generated from the functions in the module `results.py`. 85 | Although mentioned in most function documentations, most of the functions that plot the baseline diagrams and 86 | plots expect a folder structure as follows: 87 | 88 | `common_result_folder/baseline/results.csv` 89 | 90 | There are functions inside the module itself that generate the `results.csv` files. 91 | 92 | ## Baselines 93 | 94 | The code for running the baselines can be found in the `baselines` folder. 95 | 96 | - TabNet, XGBoost, CatBoost can be found in the `baselines/bohb` folder. 97 | - The other baselines like AutoGluon, auto-sklearn and Node can be found in the corresponding folders named the same. 98 | 99 | TabNet, XGBoost, CatBoost and AutoGluon have the same two main files as our regularization cocktails, `main_experiment.py` and `refit_experiment.py`. 100 | 101 | ## Figures 102 | 103 | ![alt text](https://github.com/releaunifreiburg/WellTunedSimpleNets/blob/main/figures/all_baselines_diagram.png "Critical Difference diagram of all the methods") 104 | 105 | ## Citation 106 | ``` 107 | @inproceedings{kadra2021well, 108 | title={Well-tuned Simple Nets Excel on Tabular Datasets}, 109 | author={Kadra, Arlind and Lindauer, Marius and Hutter, Frank and Grabocka, Josif}, 110 | booktitle={Thirty-Fifth Conference on Neural Information Processing Systems}, 111 | year={2021} 112 | } 113 | ``` 114 | -------------------------------------------------------------------------------- /baselines/autogluon/main_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import json 4 | import os 5 | import pickle 6 | import shutil 7 | import tempfile 8 | import warnings 9 | 10 | import matplotlib 11 | 12 | import numpy as np 13 | 14 | import openml 15 | 16 | import pandas as pd 17 | 18 | import psutil 19 | 20 | from sklearn.metrics import balanced_accuracy_score 21 | from sklearn.model_selection import train_test_split 22 | from sklearn.utils.multiclass import type_of_target 23 | 24 | from autogluon.tabular import TabularPredictor 25 | from autogluon.core.utils.savers import save_pd, save_pkl 26 | import autogluon.core.metrics as metrics 27 | from autogluon.tabular.version import __version__ 28 | from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config 29 | 30 | warnings.simplefilter("ignore") 31 | matplotlib.use('agg') # no need for tk 32 | log = logging.getLogger(__name__) 33 | log.setLevel(logging.DEBUG) 34 | 35 | 36 | # Credits to Francisco Rivera 37 | 38 | 39 | def get_data( 40 | task_id: int, 41 | test_size: float = 0.2, 42 | seed: int = 11, 43 | ): 44 | 45 | task = openml.tasks.get_task(task_id=task_id) 46 | dataset = task.get_dataset() 47 | train, y, categorical_indicator, _ = dataset.get_data( 48 | dataset_format='dataframe', 49 | target=dataset.default_target_attribute, 50 | ) 51 | 52 | # AutoGluon expects a frame with the label data inside 53 | train['target'] = y 54 | 55 | train, test = train_test_split( 56 | train, 57 | test_size=test_size, 58 | random_state=seed, 59 | stratify=y, 60 | ) 61 | 62 | return { 63 | 'train': train, 64 | 'test': test, 65 | 'label': 'target', 66 | } 67 | 68 | 69 | def run(config): 70 | 71 | log.info(f"\n**** AutoGluon [v{__version__}] ****\n") 72 | log.info(f"config:\n{pd.DataFrame([{a:b for a, b in config.items() if a not in ['train', 'test']}]).to_markdown()}") 73 | 74 | metrics_mapping = dict( 75 | acc=metrics.accuracy, 76 | auc=metrics.roc_auc, 77 | f1=metrics.f1, 78 | logloss=metrics.log_loss, 79 | mae=metrics.mean_absolute_error, 80 | mse=metrics.mean_squared_error, 81 | r2=metrics.r2, 82 | rmse=metrics.root_mean_squared_error, 83 | balacc=metrics.balanced_accuracy, 84 | ) 85 | 86 | perf_metric = metrics_mapping[config["metric"]] 87 | if perf_metric is None: 88 | raise ValueError(f"Need a valid metric, one from {metrics_mapping}") 89 | 90 | is_classification = config["type"] == 'classification' 91 | 92 | log.info(f"Columns dtypes:\n{config['train'].dtypes}") 93 | params = get_hyperparameter_config('default') 94 | if not config['traditional']: 95 | params.pop('GBM') 96 | params.pop('CAT') 97 | params.pop('XGB') 98 | params.pop('RF') 99 | params.pop('XT') 100 | params.pop('KNN') 101 | log.info(f"Models to use:\n{json.dumps(params, indent=4, sort_keys=True)}") 102 | 103 | predictor = TabularPredictor( 104 | label=config['label'], 105 | eval_metric=perf_metric.name, 106 | path=config['output_dir'], 107 | ).fit( 108 | train_data=config['train'], 109 | # Enable stacking! 110 | presets='best_quality', 111 | hyperparameter_tune_kwargs={ 112 | 'searcher': 'bayesopt', 113 | 'scheduler': 'local', 114 | 'num_trials': 840, 115 | }, 116 | holdout_frac=0.25, 117 | refit_full=True, 118 | time_limit=config["max_runtime_seconds"], 119 | hyperparameters=params, 120 | ) 121 | 122 | y_test = config['test'][config['label']] 123 | test = config['test'].drop(columns=config['label']) 124 | 125 | if is_classification: 126 | probabilities = predictor.predict_proba(test, as_multiclass=True) 127 | predictions = probabilities.idxmax(axis=1).to_numpy() 128 | else: 129 | predictions = predictor.predict(test, as_pandas=False) 130 | probabilities = None 131 | 132 | leaderboard_kwargs = dict(silent=True, extra_info=True) 133 | test[config['label']] = y_test 134 | leaderboard_kwargs['data'] = test 135 | 136 | leaderboard = predictor.leaderboard(**leaderboard_kwargs) 137 | with pd.option_context('display.max_rows', None, 'display.max_columns', None, 138 | 'display.width', 1000): 139 | log.info(leaderboard) 140 | 141 | log.info("\n\n\n") 142 | leaderboard_kwargs['extra_info'] = False 143 | leaderboard = predictor.leaderboard(**leaderboard_kwargs) 144 | with pd.option_context('display.max_rows', None, 'display.max_columns', None, 145 | 'display.width', 1000): 146 | log.info(leaderboard) 147 | 148 | return predictions, probabilities, y_test, predictor 149 | 150 | 151 | if __name__ == '__main__': 152 | parser = argparse.ArgumentParser( 153 | description='Run autogluon on a benchmark' 154 | ) 155 | # experiment setup arguments 156 | parser.add_argument( 157 | '--task_id', 158 | type=int, 159 | default=233088, 160 | ) 161 | parser.add_argument( 162 | '--max_runtime_seconds', 163 | type=int, 164 | default=345600, 165 | ) 166 | parser.add_argument( 167 | '--seed', 168 | type=int, 169 | default=11, 170 | ) 171 | parser.add_argument( 172 | '--test_size', 173 | type=float, 174 | default=0.2, 175 | ) 176 | parser.add_argument( 177 | '--output_dir', 178 | type=str, 179 | default='./autogluon_run/' 180 | ) 181 | parser.add_argument( 182 | '--traditional', 183 | type=bool, 184 | default=False, 185 | ) 186 | args = parser.parse_args() 187 | 188 | output_dir = os.path.join( 189 | args.output_dir, 190 | f'{args.seed}', 191 | f'{args.task_id}', 192 | ) 193 | os.makedirs(output_dir, exist_ok=True) 194 | # Log to a file 195 | logFormatter = logging.Formatter( 196 | "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s") 197 | 198 | fileHandler = logging.FileHandler(os.path.join(output_dir, 'info.log')) 199 | fileHandler.setFormatter(logFormatter) 200 | log.addHandler(fileHandler) 201 | 202 | # Build a configuration to run the experiments 203 | config = {'task_id': args.task_id, 'output_dir': output_dir} 204 | 205 | # Add the train and test data 206 | config.update(get_data(task_id=args.task_id, test_size=args.test_size, seed=args.seed)) 207 | 208 | config.update({ 209 | 'metric': 'balacc', 210 | 'type': 'classification', 211 | 'max_runtime_seconds': args.max_runtime_seconds, 212 | 'traditional': args.traditional, 213 | }) 214 | 215 | # Run the example -- and also warn the user about autogluon settings 216 | log.warning(f"Autogluon does not accept a seed. Also, the cores are taken automatically " 217 | f"from the system, and in this case {psutil.cpu_count()} cores are used.") 218 | predictions, probabilities, truth, predictor = run(config) 219 | 220 | # Store the predictions if things go south 221 | with open(os.path.join(output_dir, f"predictions.{args.task_id}.pickle"), 'wb') as handle: 222 | pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL) 223 | with open(os.path.join(output_dir, f"truth.{args.task_id}.pickle"), 'wb') as handle: 224 | pickle.dump(truth, handle, protocol=pickle.HIGHEST_PROTOCOL) 225 | 226 | predictor.save() 227 | 228 | try: 229 | score = balanced_accuracy_score(truth, predictions) 230 | except ValueError: 231 | # Autogluon predictions have unkown data type. Align to the dtype of the train 232 | # data 233 | from sklearn import preprocessing 234 | le = preprocessing.LabelEncoder() 235 | if isinstance(truth, pd.Series): 236 | truth = pd.Series(truth, dtype=config['train']['target'].dtype) 237 | predictions = pd.Series(predictions, dtype=config['train']['target'].dtype) 238 | le.fit(config['train']['target']) 239 | score = balanced_accuracy_score(le.transform(truth), le.transform(predictions)) 240 | 241 | log.info(f"Trained AutoGluon on task {args.task_id} resulted in score {score}") 242 | 243 | # save score to a file, just in case! 244 | config.pop('train') 245 | config.pop('test') 246 | config['score'] = score 247 | task_csv_dir = os.path.join( 248 | output_dir, 249 | 'results.csv', 250 | ) 251 | pd.DataFrame([config]).to_csv( 252 | task_csv_dir, 253 | ) 254 | 255 | # Exit with a success status! 256 | exit(0) 257 | -------------------------------------------------------------------------------- /baselines/autogluon/refit_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import json 4 | import os 5 | import pickle 6 | import warnings 7 | 8 | import matplotlib 9 | import openml 10 | 11 | import pandas as pd 12 | 13 | import psutil 14 | 15 | from sklearn.metrics import balanced_accuracy_score 16 | from sklearn.model_selection import train_test_split 17 | 18 | from autogluon.tabular import TabularPredictor 19 | import autogluon.core.metrics as metrics 20 | from autogluon.tabular.version import __version__ 21 | from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config 22 | 23 | warnings.simplefilter("ignore") 24 | matplotlib.use('agg') # no need for tk 25 | log = logging.getLogger(__name__) 26 | log.setLevel(logging.DEBUG) 27 | 28 | 29 | def get_data( 30 | task_id: int, 31 | test_size: float = 0.2, 32 | seed: int = 11, 33 | ): 34 | 35 | task = openml.tasks.get_task(task_id=task_id) 36 | dataset = task.get_dataset() 37 | train, y, categorical_indicator, _ = dataset.get_data( 38 | dataset_format='dataframe', 39 | target=dataset.default_target_attribute, 40 | ) 41 | 42 | # AutoGluon expects a frame with the label data inside 43 | train['target'] = y 44 | 45 | train, test = train_test_split( 46 | train, 47 | test_size=test_size, 48 | random_state=seed, 49 | stratify=y, 50 | ) 51 | 52 | return { 53 | 'train': train, 54 | 'test': test, 55 | 'label': 'target', 56 | } 57 | 58 | 59 | def run(config): 60 | 61 | log.info(f"\n**** AutoGluon [v{__version__}] ****\n") 62 | log.info(f"config:\n{pd.DataFrame([{a:b for a, b in config.items() if a not in ['train', 'test']}]).to_markdown()}") 63 | 64 | metrics_mapping = dict( 65 | acc=metrics.accuracy, 66 | auc=metrics.roc_auc, 67 | f1=metrics.f1, 68 | logloss=metrics.log_loss, 69 | mae=metrics.mean_absolute_error, 70 | mse=metrics.mean_squared_error, 71 | r2=metrics.r2, 72 | rmse=metrics.root_mean_squared_error, 73 | balacc=metrics.balanced_accuracy, 74 | ) 75 | 76 | perf_metric = metrics_mapping[config["metric"]] 77 | if perf_metric is None: 78 | raise ValueError(f"Need a valid metric, one from {metrics_mapping}") 79 | 80 | is_classification = config["type"] == 'classification' 81 | 82 | log.info(f"Columns dtypes:\n{config['train'].dtypes}") 83 | params = get_hyperparameter_config('default') 84 | if not config['traditional']: 85 | params.pop('GBM') 86 | params.pop('CAT') 87 | params.pop('XGB') 88 | params.pop('RF') 89 | params.pop('XT') 90 | params.pop('KNN') 91 | log.info(f"Models to use:\n{json.dumps(params, indent=4, sort_keys=True)}") 92 | 93 | predictor = TabularPredictor.load(config['output_dir']) 94 | 95 | y_test = config['test'][config['label']] 96 | test = config['test'].drop(columns=config['label']) 97 | 98 | if is_classification: 99 | probabilities = predictor.predict_proba(test, as_multiclass=True) 100 | predictions = probabilities.idxmax(axis=1).to_numpy() 101 | else: 102 | predictions = predictor.predict(test, as_pandas=False) 103 | probabilities = None 104 | 105 | leaderboard_kwargs = dict(silent=True, extra_info=True) 106 | test[config['label']] = y_test 107 | leaderboard_kwargs['data'] = test 108 | 109 | leaderboard = predictor.leaderboard(**leaderboard_kwargs) 110 | with pd.option_context('display.max_rows', None, 'display.max_columns', None, 111 | 'display.width', 1000): 112 | log.info(leaderboard) 113 | 114 | log.info("\n\n\n") 115 | leaderboard_kwargs['extra_info'] = False 116 | leaderboard = predictor.leaderboard(**leaderboard_kwargs) 117 | with pd.option_context('display.max_rows', None, 'display.max_columns', None, 118 | 'display.width', 1000): 119 | log.info(leaderboard) 120 | 121 | return predictions, probabilities, y_test, predictor 122 | 123 | 124 | if __name__ == '__main__': 125 | parser = argparse.ArgumentParser( 126 | description='Run autogluon on a benchmark' 127 | ) 128 | # experiment setup arguments 129 | parser.add_argument( 130 | '--task_id', 131 | type=int, 132 | default=233088, 133 | ) 134 | parser.add_argument( 135 | '--max_runtime_seconds', 136 | type=int, 137 | default=600, 138 | ) 139 | parser.add_argument( 140 | '--seed', 141 | type=int, 142 | default=11, 143 | ) 144 | parser.add_argument( 145 | '--test_size', 146 | type=float, 147 | default=0.2, 148 | ) 149 | parser.add_argument( 150 | '--output_dir', 151 | type=str, 152 | default='./autogluon_run/' 153 | ) 154 | parser.add_argument( 155 | '--traditional', 156 | type=bool, 157 | default=False, 158 | ) 159 | args = parser.parse_args() 160 | 161 | output_dir = os.path.join( 162 | args.output_dir, 163 | f'{args.seed}', 164 | f'{args.task_id}', 165 | ) 166 | os.makedirs(output_dir, exist_ok=True) 167 | # Log to a file 168 | logFormatter = logging.Formatter( 169 | "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s") 170 | 171 | fileHandler = logging.FileHandler(os.path.join(output_dir, 'info.log')) 172 | fileHandler.setFormatter(logFormatter) 173 | log.addHandler(fileHandler) 174 | 175 | # Build a configuration to run the experiments 176 | config = {'task_id': args.task_id, 'output_dir': output_dir} 177 | 178 | # Add the train and test data 179 | config.update(get_data(task_id=args.task_id, test_size=args.test_size, seed=args.seed)) 180 | 181 | config.update({ 182 | 'metric': 'balacc', 183 | 'type': 'classification', 184 | 'max_runtime_seconds': args.max_runtime_seconds, 185 | 'traditional': args.traditional, 186 | }) 187 | 188 | # Run the example -- and also warn the user about autogluon settings 189 | log.warning(f"Autogluon does not accept a seed. Also, the cores are taken automatically " 190 | f"from the system, and in this case {psutil.cpu_count()} cores are used.") 191 | predictions, probabilities, truth, predictor = run(config) 192 | 193 | # Store the predictions if things go south 194 | with open(os.path.join(output_dir, f"predictions.{args.task_id}.pickle"), 'wb') as handle: 195 | pickle.dump(predictions, handle, protocol=pickle.HIGHEST_PROTOCOL) 196 | with open(os.path.join(output_dir, f"truth.{args.task_id}.pickle"), 'wb') as handle: 197 | pickle.dump(truth, handle, protocol=pickle.HIGHEST_PROTOCOL) 198 | 199 | predictor.save() 200 | 201 | try: 202 | score = balanced_accuracy_score(truth, predictions) 203 | except ValueError: 204 | # Autogluon predictions have unkown data type. Align to the dtype of the train 205 | # data 206 | from sklearn import preprocessing 207 | le = preprocessing.LabelEncoder() 208 | if isinstance(truth, pd.Series): 209 | truth = pd.Series(truth, dtype=config['train']['target'].dtype) 210 | predictions = pd.Series(predictions, dtype=config['train']['target'].dtype) 211 | le.fit(config['train']['target']) 212 | score = balanced_accuracy_score(le.transform(truth), le.transform(predictions)) 213 | 214 | log.info(f"Trained AutoGluon on task {args.task_id} resulted in score {score}") 215 | 216 | # save score to a file, just in case! 217 | config.pop('train') 218 | config.pop('test') 219 | config['score'] = score 220 | task_csv_dir = os.path.join( 221 | output_dir, 222 | 'results.csv', 223 | ) 224 | pd.DataFrame([config]).to_csv( 225 | task_csv_dir, 226 | ) 227 | 228 | # Exit with a success status! 229 | exit(0) 230 | -------------------------------------------------------------------------------- /baselines/autosklearn/main_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import random 5 | 6 | import autosklearn.classification 7 | from autosklearn.metrics import balanced_accuracy 8 | import numpy as np 9 | import openml 10 | from sklearn.metrics import balanced_accuracy_score 11 | from sklearn.model_selection import StratifiedShuffleSplit 12 | from sklearn.model_selection import train_test_split 13 | 14 | 15 | def create_dir( 16 | path: str, 17 | ): 18 | """Create the directory/subdirectories for the given path. 19 | 20 | Given a path, check the directory/subdirectories that are 21 | part of the path, perform a few checks and create the parts 22 | that are missing. 23 | 24 | Parameters: 25 | ----------- 26 | path: str 27 | The path to be created. 28 | """ 29 | if os.path.exists(path): 30 | if not os.path.isdir(path): 31 | os.makedirs(path) 32 | else: 33 | os.makedirs(path) 34 | 35 | 36 | parser = argparse.ArgumentParser( 37 | description='autosklearn_gb' 38 | ) 39 | parser.add_argument( 40 | '--run_id', 41 | help='Unique id to identify the AutoSklearn run.', 42 | default='autosklearn_gb', 43 | type=str, 44 | ) 45 | parser.add_argument( 46 | '--tmp_dir', 47 | help='Temporary node storage.', 48 | default='path/temporary_storage', 49 | type=str, 50 | ) 51 | parser.add_argument( 52 | '--working_dir', 53 | help='Working directory where to store the results.', 54 | default='path/working_dir', 55 | type=str, 56 | ) 57 | parser.add_argument( 58 | '--task_id', 59 | help='Task id so that the dataset can be retrieved from OpenML.', 60 | default=233088, 61 | type=int, 62 | ) 63 | parser.add_argument( 64 | '--nr_workers', 65 | help='Number of workers.', 66 | default=10, 67 | type=int, 68 | ) 69 | parser.add_argument( 70 | '--seed', 71 | help='Seed number.', 72 | default=11, 73 | type=int, 74 | ) 75 | 76 | args = parser.parse_args() 77 | np.random.seed(args.seed) 78 | random.seed(args.seed) 79 | 80 | task = openml.tasks.get_task(task_id=args.task_id) 81 | dataset = task.get_dataset() 82 | X, y, categorical_indicator, _ = dataset.get_data( 83 | dataset_format='array', 84 | target=dataset.default_target_attribute, 85 | ) 86 | X_train, X_test, y_train, y_test = train_test_split( 87 | X, 88 | y, 89 | test_size=0.2, 90 | random_state=args.seed, 91 | stratify=y, 92 | ) 93 | 94 | output_directory = os.path.join( 95 | args.working_dir, 96 | f'{args.seed}', 97 | f'{args.task_id}', 98 | 'output', 99 | ) 100 | result_directory = os.path.join( 101 | args.working_dir, 102 | f'{args.seed}', 103 | f'{args.task_id}', 104 | 'results', 105 | ) 106 | 107 | feat_types = ['Categorical' if feature else 'Numerical' for feature in categorical_indicator] 108 | resampling_strategy = StratifiedShuffleSplit 109 | resampling_strategy_arguments = {'test_size': 0.25, 'random_state': args.seed, 'n_splits': 1} 110 | # This is a stratified split, so this should work better. 111 | # validation_policy = {'holdout': {'train_size': 0.75, 'shuffle': True}} 112 | 113 | if __name__ == '__main__': 114 | gb_autosklearn = autosklearn.classification.AutoSklearnClassifier( 115 | include_estimators=['gradient_boosting'], 116 | include_preprocessors=['no_preprocessing'], 117 | time_left_for_this_task=324000, 118 | ensemble_size=1, 119 | seed=args.seed, 120 | memory_limit=12000, 121 | output_folder=output_directory, 122 | tmp_folder=os.path.join(args.tmp_dir, 'autosklearn'), 123 | resampling_strategy=resampling_strategy, 124 | resampling_strategy_arguments=resampling_strategy_arguments, 125 | initial_configurations_via_metalearning=0, 126 | metric=balanced_accuracy, 127 | n_jobs=args.nr_workers, 128 | smac_scenario_args={'runcount_limit': 840}, 129 | ) 130 | gb_autosklearn.fit(X_train.copy(), y_train.copy(), dataset_name=dataset.name) 131 | print(gb_autosklearn.sprint_statistics()) 132 | gb_autosklearn.refit(X_train.copy(), y_train.copy()) 133 | y_test_pred = gb_autosklearn.predict(X_test) 134 | y_train_pred = gb_autosklearn.predict(X_train) 135 | 136 | train_acc = balanced_accuracy_score( 137 | y_train, 138 | y_train_pred, 139 | ) 140 | test_acc = balanced_accuracy_score( 141 | y_test, 142 | y_test_pred, 143 | ) 144 | 145 | information = { 146 | 'train_acc': train_acc, 147 | 'test_acc': test_acc, 148 | } 149 | create_dir(result_directory) 150 | with open(os.path.join(result_directory, 'refit_result.json'), 'w') as file: 151 | json.dump(information, file) 152 | -------------------------------------------------------------------------------- /baselines/bohb/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machinelearningnuremberg/WellTunedSimpleNets/54058460d5b587bc84107c200e6f1c44755a87e0/baselines/bohb/data/__init__.py -------------------------------------------------------------------------------- /baselines/bohb/data/loader.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import numpy as np 4 | 5 | from utilities import get_dataset_openml, get_dataset_split 6 | 7 | 8 | # Loader class which provides the data splits 9 | class Loader: 10 | 11 | def __init__( 12 | self, 13 | task_id: int, 14 | val_fraction: float = 0.2, 15 | test_fraction: float = 0.2, 16 | seed: int = 11, 17 | apply_one_hot_encoding: bool = False, 18 | apply_imputation: bool = False, 19 | ): 20 | 21 | # download the dataset 22 | dataset = get_dataset_openml(task_id) 23 | # get the splits according to the given fractions and seed, 24 | # together with the categorical indicator 25 | self.categorical_information, self.splits = get_dataset_split( 26 | dataset, 27 | val_fraction=val_fraction, 28 | test_fraction=test_fraction, 29 | seed=seed, 30 | apply_one_hot_encoding=apply_one_hot_encoding, 31 | apply_imputation=apply_imputation, 32 | ) 33 | self.dataset_id = dataset.dataset_id 34 | 35 | def get_splits(self) -> Dict[str, np.array]: 36 | """Return the dataset splits for the different sets. 37 | """ 38 | 39 | return self.splits 40 | 41 | def get_dataset_id(self) -> int: 42 | """Return the dataset id. 43 | """ 44 | return self.dataset_id 45 | -------------------------------------------------------------------------------- /baselines/bohb/main_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | logging.basicConfig(level=logging.DEBUG) 5 | import os 6 | import pickle 7 | import random 8 | import time 9 | 10 | import hpbandster.core.nameserver as hpns 11 | import hpbandster.core.result as hpres 12 | from hpbandster.optimizers import BOHB as BOHB 13 | from hpbandster.optimizers import RandomSearch as RS 14 | import numpy as np 15 | import openml 16 | 17 | from worker import CatBoostWorker, XGBoostWorker, TabNetWorker 18 | 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Baseline experiment.' 22 | ) 23 | parser.add_argument( 24 | '--run_id', 25 | type=str, 26 | help='The run id of the optimization run.', 27 | default='tabular_baseline', 28 | ) 29 | parser.add_argument( 30 | '--working_directory', 31 | type=str, 32 | help='The working directory where results will be stored.', 33 | default='.', 34 | ) 35 | parser.add_argument( 36 | '--nic_name', 37 | type=str, 38 | help='Which network interface to use for communication.', 39 | default='ib0', 40 | ) 41 | parser.add_argument( 42 | '--optimizer', 43 | type=str, 44 | help='Which optimizer to use for the experiment.', 45 | default='bohb', 46 | ) 47 | parser.add_argument( 48 | '--model', 49 | type=str, 50 | help='Which model to use for the experiment.', 51 | default='tabnet', 52 | ) 53 | parser.add_argument( 54 | '--task_id', 55 | type=int, 56 | help='Task id used for the experiment.', 57 | default=233109, 58 | ) 59 | parser.add_argument( 60 | '--seed', 61 | type=int, 62 | help='Seed used for the experiment.', 63 | default=11, 64 | ) 65 | parser.add_argument( 66 | '--max_budget', 67 | type=float, 68 | help='Maximum budget used during the optimization.', 69 | default=1, 70 | ) 71 | parser.add_argument( 72 | '--min_budget', 73 | type=float, 74 | help='Minimum budget used during the optimization.', 75 | default=1, 76 | ) 77 | parser.add_argument( 78 | '--n_iterations', 79 | type=int, 80 | help='Number of BOHB iterations.', 81 | default=10, 82 | ) 83 | parser.add_argument( 84 | '--n_workers', 85 | type=int, 86 | help='Number of workers to run in parallel.', 87 | default=2, 88 | ) 89 | parser.add_argument( 90 | '--nr_threads', 91 | type=int, 92 | help='Number of threads for one worker.', 93 | default=2, 94 | ) 95 | parser.add_argument( 96 | '--worker', 97 | help='Flag to turn this into a worker process', 98 | action='store_true', 99 | ) 100 | 101 | args = parser.parse_args() 102 | 103 | np.random.seed(args.seed) 104 | random.seed(args.seed) 105 | 106 | host = hpns.nic_name_to_host(args.nic_name) 107 | 108 | # determine the problem type, if it is binary 109 | # or multiclass classification 110 | task_id = args.task_id 111 | task = openml.tasks.get_task(task_id, download_data=False) 112 | nr_classes = int(openml.datasets.get_dataset(task.dataset_id, download_data=False).qualities['NumberOfClasses']) 113 | 114 | run_directory = os.path.join( 115 | args.working_directory, 116 | f'{args.task_id}', 117 | f'{args.seed}', 118 | ) 119 | os.makedirs(run_directory, exist_ok=True) 120 | 121 | worker_choices = { 122 | 'tabnet': TabNetWorker, 123 | 'xgboost': XGBoostWorker, 124 | 'catboost': CatBoostWorker, 125 | } 126 | 127 | model_worker = worker_choices[args.model] 128 | # build the model setting configuration 129 | if args.model == 'tabnet': 130 | param = model_worker.get_parameters( 131 | task_id=task_id, 132 | seed=args.seed, 133 | ) 134 | elif args.model =='xgboost': 135 | param = model_worker.get_parameters( 136 | task_id=task_id, 137 | nr_classes=nr_classes, 138 | seed=args.seed, 139 | nr_threads=args.nr_threads, 140 | output_directory=run_directory, 141 | ) 142 | else: 143 | param = model_worker.get_parameters( 144 | task_id=task_id, 145 | nr_classes=nr_classes, 146 | seed=args.seed, 147 | output_directory=run_directory, 148 | ) 149 | 150 | if args.worker: 151 | # short artificial delay to make sure the nameserver is already running 152 | time.sleep(5) 153 | worker = model_worker( 154 | run_id=args.run_id, 155 | host=host, 156 | param=param, 157 | ) 158 | while True: 159 | try: 160 | worker.load_nameserver_credentials( 161 | working_directory=args.working_directory, 162 | ) 163 | break 164 | except RuntimeError: 165 | pass 166 | worker.run(background=False) 167 | exit(0) 168 | 169 | print(f'Experiment started with task id: {args.task_id}') 170 | 171 | 172 | NS = hpns.NameServer( 173 | run_id=args.run_id, 174 | host=host, 175 | port=0, 176 | working_directory=args.working_directory, 177 | ) 178 | ns_host, ns_port = NS.start() 179 | 180 | worker = model_worker( 181 | run_id=args.run_id, 182 | host=host, 183 | param=param, 184 | nameserver=ns_host, 185 | nameserver_port=ns_port 186 | ) 187 | worker.run(background=True) 188 | result_logger = hpres.json_result_logger(directory=run_directory, overwrite=False) 189 | 190 | optimizer_choices = { 191 | 'bohb': BOHB, 192 | 'random_search': RS, 193 | } 194 | 195 | optimizer = optimizer_choices[args.optimizer] 196 | 197 | # for the moment only available to XGBoost 198 | if args.model == 'xgboost': 199 | config_space = model_worker.get_default_configspace( 200 | seed=args.seed, 201 | early_stopping=True, 202 | conditional_imputation=False, 203 | ) 204 | else: 205 | config_space = model_worker.get_default_configspace(seed=args.seed) 206 | 207 | bohb = optimizer( 208 | configspace=config_space, 209 | run_id=args.run_id, 210 | host=host, 211 | nameserver=ns_host, 212 | nameserver_port=ns_port, 213 | min_budget=args.min_budget, 214 | max_budget=args.max_budget, 215 | result_logger=result_logger, 216 | ) 217 | 218 | res = bohb.run( 219 | n_iterations=args.n_iterations, 220 | min_n_workers=args.n_workers 221 | ) 222 | 223 | bohb.shutdown(shutdown_workers=True) 224 | NS.shutdown() 225 | 226 | with open(os.path.join(run_directory, 'results.pkl'), 'wb') as fh: 227 | pickle.dump(res, fh) 228 | 229 | id2config = res.get_id2config_mapping() 230 | incumbent = res.get_incumbent_id() 231 | incumbent_runs = res.get_runs_by_id(incumbent) 232 | best_config = id2config[incumbent]['config'] 233 | 234 | # default values to find the config with the 235 | # best performance, so we can pull the best 236 | # iteration number. 237 | val_error_min = 100 238 | best_round = 0 239 | if 'early_stopping_rounds' in best_config: 240 | for run in incumbent_runs: 241 | print(run) 242 | print(run.info) 243 | if run.loss < val_error_min: 244 | val_error_min = run.loss 245 | if 'best_round' in run.info: 246 | best_round = run.info['best_round'] 247 | # no need for the early stopping rounds anymore 248 | del best_config['early_stopping_rounds'] 249 | # train only for the best performance achieved 250 | # for the 'best_round' iteration 251 | if args.model == 'tabnet': 252 | best_config['max_epochs'] = best_round 253 | else: 254 | best_config['num_round'] = best_round 255 | 256 | print(f'Best round for {args.model} refit: {best_round}') 257 | 258 | 259 | all_runs = res.get_all_runs() 260 | print('Best found configuration:', best_config) 261 | print('A total of %i unique configurations where sampled.' % len(id2config.keys())) 262 | print('A total of %i runs where executed.' % len(res.get_all_runs())) 263 | print('Total budget corresponds to %.1f full function evaluations.' 264 | % (sum([r.budget for r in all_runs])/args.max_budget)) 265 | print('Total budget corresponds to %.1f full function evaluations.' 266 | % (sum([r.budget for r in all_runs])/args.max_budget)) 267 | print('The run took %.1f seconds to complete.' 268 | % (all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started'])) 269 | 270 | worker = model_worker( 271 | args.run_id, 272 | param=param, 273 | nameserver='127.0.0.1', 274 | ) 275 | refit_result = worker.refit(best_config) 276 | with open(os.path.join(run_directory, 'refit_result.json'), 'w') as file: 277 | json.dump(refit_result, file) 278 | -------------------------------------------------------------------------------- /baselines/bohb/refit_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | logging.basicConfig(level=logging.DEBUG) 5 | import os 6 | import random 7 | 8 | import hpbandster.core.result as hpres 9 | import numpy as np 10 | import openml 11 | 12 | from data.loader import Loader 13 | from worker import CatBoostWorker, XGBoostWorker, TabNetWorker 14 | 15 | 16 | parser = argparse.ArgumentParser( 17 | description='Baseline refit experiment.' 18 | ) 19 | parser.add_argument( 20 | '--run_id', 21 | type=str, 22 | help='The run id of the optimization run.', 23 | default='Baseline', 24 | ) 25 | parser.add_argument( 26 | '--working_directory', 27 | type=str, 28 | help='The working directory where results will be stored.', 29 | default='.', 30 | ) 31 | parser.add_argument( 32 | '--model', 33 | type=str, 34 | help='Which model to use for the experiment.', 35 | default='tabnet', 36 | ) 37 | parser.add_argument( 38 | '--task_id', 39 | type=int, 40 | help='Minimum budget used during the optimization.', 41 | default=233109, 42 | ) 43 | parser.add_argument( 44 | '--seed', 45 | type=int, 46 | help='Seed used for the experiment.', 47 | default=11, 48 | ) 49 | parser.add_argument( 50 | '--nr_threads', 51 | type=int, 52 | help='Number of threads for one worker.', 53 | default=2, 54 | ) 55 | args = parser.parse_args() 56 | 57 | np.random.seed(args.seed) 58 | random.seed(args.seed) 59 | 60 | task_id = args.task_id 61 | task = openml.tasks.get_task(task_id, download_data=False) 62 | nr_classes = int(openml.datasets.get_dataset(task.dataset_id, download_data=False).qualities['NumberOfClasses']) 63 | 64 | run_directory = os.path.join( 65 | args.working_directory, 66 | f'{args.task_id}', 67 | f'{args.seed}', 68 | ) 69 | os.makedirs(run_directory, exist_ok=True) 70 | 71 | worker_choices = { 72 | 'tabnet': TabNetWorker, 73 | 'xgboost': XGBoostWorker, 74 | 'catboost': CatBoostWorker, 75 | } 76 | 77 | model_worker = worker_choices[args.model] 78 | 79 | if args.model == 'tabnet': 80 | param = model_worker.get_parameters( 81 | task_id=args.task_id, 82 | seed=args.seed, 83 | ) 84 | elif args.model =='xgboost': 85 | param = model_worker.get_parameters( 86 | task_id=args.task_id, 87 | nr_classes=nr_classes, 88 | seed=args.seed, 89 | nr_threads=args.nr_threads, 90 | output_directory=run_directory, 91 | ) 92 | else: 93 | param = model_worker.get_parameters( 94 | task_id=args.task_id, 95 | nr_classes=nr_classes, 96 | seed=args.seed, 97 | output_directory=run_directory, 98 | ) 99 | 100 | print(f'Refit experiment started with task id: {args.task_id}') 101 | 102 | worker = model_worker( 103 | args.run_id, 104 | param=param, 105 | nameserver='127.0.0.1', 106 | ) 107 | 108 | result = hpres.logged_results_to_HBS_result(run_directory) 109 | all_runs = result.get_all_runs() 110 | id2conf = result.get_id2config_mapping() 111 | 112 | inc_id = result.get_incumbent_id() 113 | inc_runs = result.get_runs_by_id(inc_id) 114 | inc_config = id2conf[inc_id]['config'] 115 | print(f"Best Configuration So far {inc_config}") 116 | 117 | # default values to find the config with the 118 | # best performance, so we can pull the best 119 | # iteration number. 120 | val_error_min = 100 121 | best_round = 0 122 | if 'early_stopping_rounds' in inc_config: 123 | for run in inc_runs: 124 | print(run) 125 | print(run.info) 126 | if run.loss < val_error_min: 127 | val_error_min = run.loss 128 | if 'best_round' in run.info: 129 | best_round = run.info['best_round'] 130 | # no need for the early stopping rounds anymore 131 | del inc_config['early_stopping_rounds'] 132 | # train only for the best performance achieved 133 | # for the 'best_round' iteration 134 | if args.model == 'tabnet': 135 | inc_config['max_epochs'] = best_round 136 | else: 137 | inc_config['num_round'] = best_round 138 | 139 | print(f'Best round for {args.model} refit: {best_round}') 140 | 141 | refit_result = worker.refit(inc_config) 142 | with open(os.path.join(run_directory, 'refit_result.json'), 'w') as file: 143 | json.dump(refit_result, file) 144 | -------------------------------------------------------------------------------- /baselines/bohb/utilities.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import json 3 | import os 4 | from typing import Dict, List, Tuple, Union 5 | 6 | import numpy as np 7 | import openml 8 | import pandas as pd 9 | import scipy 10 | from scipy.stats import wilcoxon, rankdata 11 | from sklearn.compose import ColumnTransformer 12 | from sklearn.impute import SimpleImputer 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.pipeline import Pipeline 15 | from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder 16 | 17 | 18 | def get_dataset_split( 19 | dataset: openml.datasets.OpenMLDataset, 20 | val_fraction: float = 0.2, 21 | test_fraction: float = 0.2, 22 | seed: int = 11, 23 | apply_one_hot_encoding: bool = False, 24 | apply_imputation: bool = False, 25 | model: str = 'xgboost', 26 | ) -> Tuple[Dict[str, Union[List, np.ndarray]], Dict[str, np.ndarray]]: 27 | """Split the dataset into training, test and possibly validation set. 28 | 29 | Based on the arguments given, splits the datasets into the corresponding 30 | sets. 31 | 32 | Parameters: 33 | ----------- 34 | dataset: openml.datasets.OpenMLDataset 35 | The dataset that will be split into the corresponding sets. 36 | val_fraction: float 37 | The fraction for the size of the validation set from the whole dataset. 38 | test_fraction: float 39 | The fraction for the size of the test set from the whole dataset. 40 | seed: int 41 | The seed used for the splitting of the dataset. 42 | apply_one_hot_encoding: bool 43 | Apply one hot encodings to categorical features of the given dataset. 44 | apply_imputation: bool 45 | Substitute missing values from the given dataset. 46 | 47 | Returns: 48 | -------- 49 | (categorical_information, dataset_splits): tuple(np.array, dict) 50 | Returns a tuple, where the first arguments provides categorical information 51 | about the features. While the second argument, is a dictionary with the splits 52 | for the different sets. 53 | """ 54 | X, y, categorical_indicator, _ = dataset.get_data( 55 | dataset_format='dataframe', 56 | target=dataset.default_target_attribute, 57 | ) 58 | 59 | label_encoder = LabelEncoder() 60 | empty_features = [] 61 | 62 | # remove nan features from the dataframe 63 | nan_columns = X.isna().all() 64 | for col_index, col_status in enumerate(nan_columns): 65 | if col_status: 66 | empty_features.append(col_index) 67 | # if there are null categorical columns, remove them 68 | # from the categorical column indicator. 69 | if len(empty_features) > 0: 70 | for feature_index in sorted(empty_features, reverse=True): 71 | del categorical_indicator[feature_index] 72 | 73 | column_names = list(X.columns) 74 | # delete empty feature columns. 75 | # Normally this would be done by the simple imputer, but 76 | # since now it is conditional, we do it ourselves. 77 | empty_feature_names = [column_names[feat_index] for feat_index in empty_features] 78 | if any(nan_columns): 79 | X.drop(labels=empty_feature_names, axis='columns', inplace=True) 80 | 81 | column_names = list(X.columns) 82 | numerical_columns = [] 83 | categorical_columns = [] 84 | 85 | index = 0 86 | categorical_col_indices = [] 87 | for cat_column_indicator, column_name in zip(categorical_indicator, column_names): 88 | if cat_column_indicator: 89 | categorical_columns.append(column_name) 90 | categorical_col_indices.append(index) 91 | else: 92 | numerical_columns.append(column_name) 93 | index += 1 94 | 95 | transformers = [] 96 | 97 | if len(numerical_columns) > 0: 98 | numeric_transformer = Pipeline( 99 | steps=[ 100 | ('num_imputer', SimpleImputer(strategy='constant')), 101 | ('scaler', StandardScaler()) 102 | ] 103 | ) 104 | transformers.append(('num', numeric_transformer, numerical_columns)) 105 | 106 | if len(categorical_columns) > 0: 107 | steps=[ 108 | ('cat_imputer', SimpleImputer(strategy='constant')), 109 | ] 110 | if apply_one_hot_encoding: 111 | steps.append(('cat_encoding', OneHotEncoder(handle_unknown='ignore'))) 112 | else: 113 | pass 114 | # steps.append(('cat_encoding', LabelEncoder())) 115 | categorical_transformer = Pipeline( 116 | steps=steps, 117 | ) 118 | transformers.append(('cat', categorical_transformer, categorical_columns)) 119 | 120 | preprocessor = ColumnTransformer( 121 | transformers=transformers, 122 | ) 123 | 124 | # label encode the targets 125 | y = label_encoder.fit_transform(y) 126 | 127 | X_train, X_test, y_train, y_test = train_test_split( 128 | X, 129 | y, 130 | test_size=test_fraction, 131 | random_state=seed, 132 | stratify=y, 133 | ) 134 | 135 | categorical_dimensions = [] 136 | if model == 'tabnet': 137 | for cat_column in categorical_columns: 138 | column_unique_values = X[cat_column].nunique() 139 | # categorical columns with only one unique value 140 | # do not need an embedding. 141 | if column_unique_values == 1: 142 | continue 143 | 144 | categorical_dimensions.append(column_unique_values) 145 | 146 | if val_fraction != 0: 147 | new_val_fraction = val_fraction / (1 - test_fraction) 148 | X_train, X_val, y_train, y_val = train_test_split( 149 | X_train, 150 | y_train, 151 | test_size=new_val_fraction, 152 | random_state=seed, 153 | stratify=y_train, 154 | ) 155 | 156 | preprocessor.fit(X_train, y_train) 157 | 158 | X_train = preprocessor.transform(X_train) 159 | X_test = preprocessor.transform(X_test) 160 | 161 | dataset_splits = { 162 | 'X_train': X_train, 163 | 'X_test': X_test, 164 | 'y_train': y_train, 165 | 'y_test': y_test, 166 | } 167 | 168 | if val_fraction != 0: 169 | X_val = preprocessor.transform(X_val) 170 | dataset_splits['X_val'] = X_val 171 | dataset_splits['y_val'] = y_val 172 | 173 | new_categorical_indicator = [] 174 | new_categorical_indices = [] 175 | 176 | for i in range(len(column_names)): 177 | if i < len(numerical_columns): 178 | categorical_status = False 179 | else: 180 | categorical_status = True 181 | new_categorical_indices.append(i) 182 | new_categorical_indicator.append(categorical_status) 183 | 184 | categorical_information = { 185 | 'categorical_ind': new_categorical_indicator, 186 | 'categorical_columns': new_categorical_indices, 187 | 'categorical_dimensions': categorical_dimensions, 188 | } 189 | 190 | return categorical_information, dataset_splits 191 | 192 | 193 | def standardize_data( 194 | X_train: np.ndarray, 195 | X_test: np.ndarray, 196 | is_sparse: bool 197 | ) -> Tuple[np.ndarray, np.ndarray]: 198 | """Standardize the data. 199 | 200 | Compute the mean and std from the train set and 201 | standardize the data (train and test set). 202 | 203 | Parameters: 204 | ----------- 205 | X_train: np.ndarray 206 | The dataset examples used for training the model. 207 | X_test: np.ndarray 208 | The dataset examples used for testing the model. 209 | 210 | Returns: 211 | -------- 212 | (X_train, X_test): tuple(np.ndarray, np.ndarray) 213 | Corresponding sets after being standardized. 214 | """ 215 | # Center data only on not sparse matrices 216 | center_data = not is_sparse 217 | scaler = StandardScaler(with_mean=center_data).fit(X_train) 218 | X_train = scaler.transform(X_train) 219 | X_test = scaler.transform(X_test) 220 | 221 | return X_train, X_test 222 | 223 | 224 | def impute_missing_data( 225 | X: np.ndarray, 226 | ) -> np.ndarray: 227 | """Impute missing data from the given dataset. 228 | 229 | Impute missing data from the given dataset by using 230 | constant values. 231 | 232 | Parameters: 233 | ----------- 234 | X: np.ndarray 235 | The dataset examples used for the experiment. 236 | 237 | Returns: 238 | -------- 239 | _: np.ndarray 240 | Data after imputation. 241 | """ 242 | # Constant strategy for imputation. 243 | imputer = SimpleImputer(strategy='constant') 244 | 245 | return imputer.fit_transform(X) 246 | 247 | 248 | def ohe_the_data( 249 | X_train: np.ndarray, 250 | X_test: np.ndarray, 251 | ) -> Tuple[np.ndarray, np.ndarray]: 252 | """One hot encode the data. 253 | 254 | One hot encode the categorical features of the 255 | given dataset. 256 | 257 | Parameters: 258 | ----------- 259 | X_train: np.ndarray 260 | The dataset examples used for training the model. 261 | X_test: np.ndarray 262 | The dataset examples used for testing the model. 263 | 264 | Returns: 265 | -------- 266 | (X_train, X_test): tuple(np.ndarray, np.ndarray) 267 | Corresponding sets after being one hot encoded. 268 | """ 269 | enc = OneHotEncoder(handle_unknown='ignore') 270 | X = np.concatenate((X_train, X_test), axis=0) 271 | enc.fit(X) 272 | X_train = enc.transform(X_train) 273 | X_test = enc.transform(X_test) 274 | 275 | return X_train, X_test 276 | 277 | 278 | def get_dataset_openml( 279 | task_id: int = 11, 280 | ) -> openml.datasets.OpenMLDataset: 281 | """Download a dataset from OpenML 282 | 283 | Based on a given task id, download the task and retrieve 284 | the dataset that belongs to the corresponding task. 285 | 286 | Parameters: 287 | ----------- 288 | task_id: int 289 | The task id that represents the task for which the dataset will be downloaded. 290 | 291 | Returns: 292 | -------- 293 | dataset: openml.datasets.OpenMLDataset 294 | The OpenML dataset that is requested.. 295 | """ 296 | task = openml.tasks.get_task(task_id) 297 | dataset = task.get_dataset() 298 | 299 | return dataset 300 | 301 | 302 | def check_leak_status(splits): 303 | """Check the leak status. 304 | 305 | This function goes through the different splits of the dataset 306 | and checks if there is a leak between the different sets. 307 | 308 | Parameters: 309 | ----------- 310 | splits: dict 311 | A dictionary that contains the different sets train, test (possibly validation) 312 | of the whole dataset. 313 | 314 | Returns: 315 | -------- 316 | None - Does not return anything, only raises an error if there is a leak. 317 | """ 318 | X_train = splits['X_train'] 319 | X_valid = splits['X_val'] 320 | X_test = splits['X_test'] 321 | 322 | for train_example in X_train: 323 | for valid_example in X_valid: 324 | if np.array_equal(train_example, valid_example): 325 | raise AssertionError('Leak between the training and validation set') 326 | for test_example in X_test: 327 | if np.array_equal(train_example, test_example): 328 | raise AssertionError('Leak between the training and test set') 329 | for valid_example in X_valid: 330 | for test_example in X_test: 331 | if np.array_equal(valid_example, test_example): 332 | raise AssertionError('Leak between the validation and test set') 333 | 334 | print('Leak check passed') 335 | 336 | 337 | def check_split_stratification(splits): 338 | """Check the split stratification and the shape of the examples and labels 339 | for the different sets. 340 | 341 | This function goes through the different splits of the dataset 342 | and checks if there is stratification. In this example, if there 343 | is nearly the same number of examples for each class in the corresponding 344 | splits. The function also verifies that the shape of the examples and 345 | labels is the same for the different splits. 346 | 347 | Parameters: 348 | ----------- 349 | splits: dict 350 | A dictionary that contains the different sets train, test (possibly validation) 351 | of the whole dataset. 352 | """ 353 | X_train = splits['X_train'] 354 | X_val = splits['X_val'] 355 | X_test = splits['X_test'] 356 | y_train = splits['y_train'] 357 | y_val = splits['y_val'] 358 | y_test = splits['y_test'] 359 | train_occurences = Counter(y_train) 360 | val_occurences = Counter(y_val) 361 | test_occurences = Counter(y_test) 362 | 363 | print(train_occurences) 364 | print(val_occurences) 365 | print(test_occurences) 366 | print(X_train.shape) 367 | print(X_val.shape) 368 | print(X_test.shape) 369 | assert X_train.shape[0] == y_train.shape[0] 370 | assert X_val.shape[0] == y_val.shape[0] 371 | assert X_test.shape[0] == y_test.shape[0] 372 | 373 | 374 | def get_task_list( 375 | benchmark_task_file: str = 'path/to/tasks.txt', 376 | ) -> List[int]: 377 | """Get the task id list. 378 | 379 | Goes through the given file and collects all of the task 380 | ids. 381 | 382 | Parameters: 383 | ----------- 384 | benchmark_task_file: str 385 | A string to the path of the benchmark task file. Including 386 | the task file name. 387 | 388 | Returns: 389 | -------- 390 | benchmark_task_ids - list 391 | A list of all the task ids for the benchmark. 392 | """ 393 | with open(os.path.join(benchmark_task_file), 'r') as f: 394 | benchmark_info_str = f.readline() 395 | benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')] 396 | 397 | return benchmark_task_ids 398 | 399 | 400 | def status_exp_tasks( 401 | working_directory: str, 402 | seed: int = 11, 403 | model_name: str = 'xgboost', 404 | ): 405 | """Analyze the different tasks of the experiment. 406 | 407 | Goes through the results in the directory given and 408 | it analyzes which one finished succesfully and which one 409 | did not. 410 | 411 | Parameters: 412 | ----------- 413 | working_directory: str 414 | The directory where the results are located. 415 | seed: int 416 | The seed that was used for the experiment. 417 | model_name: int 418 | The name of the model that was used. 419 | """ 420 | not_finished = 0 421 | finished = 0 422 | benchmark_task_file = 'benchmark_datasets.txt' 423 | benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file) 424 | result_directory = os.path.join(working_directory, model_name) 425 | task_ids = get_task_list(benchmark_task_file_path) 426 | for task_id in task_ids: 427 | task_result_directory = os.path.join(result_directory, f'{task_id}', f'{seed}') 428 | print(task_result_directory) 429 | try: 430 | with open(os.path.join(task_result_directory, 'refit_result.json'), 'r') as file: 431 | # do nothing with the result for now 432 | _ = json.load(file) 433 | print(f'Task {task_id} finished.') 434 | finished += 1 435 | # TODO do something with the result 436 | except FileNotFoundError: 437 | print(f'Task {task_id} not finished.') 438 | not_finished += 1 439 | print(f'Finished tasks: {finished} , not finished tasks: {not_finished}') 440 | 441 | 442 | def read_baseline_values( 443 | working_directory: str, 444 | seed: int = 11, 445 | model_name: str = 'xgboost', 446 | ) -> Dict[int, float]: 447 | """Prepares the results of the experiment with the baselines. 448 | 449 | Goes through the results at the given directory and it generates a 450 | dictionary for the baseline with the performances on every task 451 | of the benchmark. 452 | 453 | Parameters: 454 | ----------- 455 | working_directory: str 456 | The directory where the results are located. 457 | seed: int 458 | The seed that was used for the experiment. 459 | model_name: int 460 | The name of the model that was used. 461 | 462 | Returns: 463 | -------- 464 | baseline_results - dict 465 | A dictionary with the results of the baseline algorithm. 466 | Each key of the dictionary represents a task id, while, 467 | each value corresponds to the performance of the algorithm. 468 | """ 469 | baseline_results = {} 470 | benchmark_task_file = 'benchmark_datasets.txt' 471 | benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file) 472 | result_directory = os.path.join(working_directory, model_name) 473 | task_ids = get_task_list(benchmark_task_file_path) 474 | for task_id in task_ids: 475 | task_result_directory = os.path.join(result_directory, f'{task_id}', f'{seed}') 476 | try: 477 | with open(os.path.join(task_result_directory, 'refit_result.json'), 'r') as file: 478 | task_result = json.load(file) 479 | baseline_results[task_id] = task_result['test_accuracy'] 480 | except FileNotFoundError: 481 | print(f'Task {task_id} not finished.') 482 | baseline_results[task_id] = None 483 | 484 | return baseline_results 485 | 486 | 487 | def read_autosklearn_values( 488 | working_directory, 489 | seed=11, 490 | model_name='autosklearn' 491 | ) -> Dict[int, float]: 492 | """Prepares the results of the experiment with auto-sklearn. 493 | 494 | Goes through the results at the given directory and it generates a 495 | dictionary for autosklearn with the performances on every task 496 | of the benchmark. 497 | 498 | Parameters: 499 | ----------- 500 | working_directory: str 501 | The directory where the results are located. 502 | seed: int 503 | The seed that was used for the experiment. 504 | model_name: int 505 | The name of the model that was used. 506 | 507 | Returns: 508 | -------- 509 | autosklearn_results - dict 510 | A dictionary with the results of the autosklearn algorithm. 511 | Each key of the dictionary represents a task id, while, 512 | each value corresponds to the performance of the algorithm. 513 | """ 514 | autosklearn_results = {} 515 | benchmark_task_file = 'benchmark_datasets.txt' 516 | benchmark_task_file_path = os.path.join(working_directory, benchmark_task_file) 517 | result_directory = os.path.join(working_directory, model_name) 518 | task_ids = get_task_list(benchmark_task_file_path) 519 | for task_id in task_ids: 520 | task_result_directory = os.path.join(result_directory, f'{seed}', f'{task_id}', 'results') 521 | try: 522 | with open(os.path.join(task_result_directory, 'performance.txt'), 'r') as baseline_file: 523 | baseline_test_acc = float(baseline_file.readline()) 524 | autosklearn_results[task_id] = baseline_test_acc 525 | except FileNotFoundError: 526 | print(f'Task {task_id} not finished.') 527 | autosklearn_results[task_id] = None 528 | continue 529 | 530 | return autosklearn_results 531 | 532 | 533 | def read_cocktail_values( 534 | cocktail_result_dir: str, 535 | benchmark_task_file_dir: str, 536 | seed: int = 11, 537 | cocktail_version: str = 'cocktail', 538 | ) -> Dict[int, float]: 539 | """Prepares the results of the experiment with the regularization 540 | cocktail. 541 | 542 | Goes through the results at the given directory and it generates a 543 | dictionary for the regularization cocktails with the performances 544 | on every task of the benchmark. 545 | 546 | Parameters: 547 | ----------- 548 | cocktail_result_dir: str 549 | The directory where the results are located for the regularization 550 | cocktails. 551 | benchmark_task_file_dir: str 552 | The directory where the benchmark task file is located. 553 | The file contains all the task ids. The file name is 554 | not needed to be given. 555 | seed: int 556 | The seed that was used for the experiment. 557 | 558 | Returns: 559 | -------- 560 | cocktail_results - dict 561 | A dictionary with the results of the regularization cocktail method. 562 | Each key of the dictionary represents a task id, while, 563 | each value corresponds to the performance of the algorithm. 564 | """ 565 | cocktail_results = {} 566 | 567 | result_path = os.path.join( 568 | cocktail_result_dir, 569 | cocktail_version, 570 | '512', 571 | ) 572 | 573 | benchmark_task_file = 'benchmark_datasets.txt' 574 | benchmark_task_file_path = os.path.join( 575 | benchmark_task_file_dir, 576 | benchmark_task_file 577 | ) 578 | 579 | task_ids = get_task_list(benchmark_task_file_path) 580 | 581 | for task_id in task_ids: 582 | task_result_path = os.path.join( 583 | result_path, 584 | f'{task_id}', 585 | 'refit_run', 586 | f'{seed}', 587 | ) 588 | 589 | if os.path.exists(task_result_path): 590 | if not os.path.isdir(task_result_path): 591 | task_result_path = os.path.join( 592 | result_path, 593 | f'{task_id}', 594 | ) 595 | else: 596 | task_result_path = os.path.join( 597 | result_path, 598 | f'{task_id}', 599 | ) 600 | 601 | try: 602 | with open(os.path.join(task_result_path, 'run_results.txt')) as f: 603 | test_results = json.load(f) 604 | cocktail_results[task_id] = test_results['mean_test_bal_acc'] 605 | except FileNotFoundError: 606 | cocktail_results[task_id] = None 607 | 608 | return cocktail_results 609 | 610 | 611 | def compare_models( 612 | baseline_dir: str, 613 | cocktail_dir: str, 614 | ) -> pd.DataFrame: 615 | """Prepares the results of the experiments with all methods. 616 | 617 | Goes through the results at the given directories and builds 618 | a table with all the methods over the different tasks. 619 | 620 | Parameters: 621 | ----------- 622 | baseline_dir: str 623 | The directory where the results are located for the baseline 624 | methods. 625 | cocktail_dir: str 626 | The directory where the results are located for the regularization 627 | cocktails. 628 | 629 | Returns: 630 | -------- 631 | comparison_table - pd.DataFrame 632 | A DataFrame with the results for all methods over the different tasks. 633 | """ 634 | xgboost_results = read_baseline_values(baseline_dir, model_name='xgboost') 635 | tabnet_results = read_baseline_values(baseline_dir, model_name='tabnet') 636 | cocktail_results = read_cocktail_values(cocktail_dir, baseline_dir, cocktail_version='plain_network') 637 | autosklearn_results = read_autosklearn_values(cocktail_dir) 638 | 639 | table_dict = { 640 | 'Task Id': [], 641 | 'Tabnet': [], 642 | 'XGBoost': [], 643 | 'AutoSklearn': [], 644 | 'Cocktail': [], 645 | } 646 | 647 | cocktail_wins = 0 648 | cocktail_losses = 0 649 | cocktail_ties = 0 650 | autosklearn_looses = 0 651 | autosklearn_ties = 0 652 | autosklearn_wins = 0 653 | cocktail_performances = [] 654 | xgboost_performances = [] 655 | autosklearn_performances = [] 656 | print(cocktail_results) 657 | print(xgboost_results) 658 | 659 | for task_id in xgboost_results: 660 | xgboost_task_result = xgboost_results[task_id] 661 | if xgboost_task_result is None: 662 | continue 663 | tabnet_task_result = tabnet_results[task_id] 664 | cocktail_task_result = cocktail_results[task_id] 665 | autosklearn_task_result = autosklearn_results[task_id] 666 | cocktail_performances.append(cocktail_task_result) 667 | xgboost_performances.append(xgboost_task_result) 668 | autosklearn_performances.append(autosklearn_task_result) 669 | if cocktail_task_result > xgboost_task_result: 670 | cocktail_wins += 1 671 | elif cocktail_task_result < xgboost_task_result: 672 | cocktail_losses += 1 673 | else: 674 | cocktail_ties += 1 675 | if autosklearn_task_result > xgboost_task_result: 676 | autosklearn_wins += 1 677 | elif autosklearn_task_result < xgboost_task_result: 678 | autosklearn_looses += 1 679 | else: 680 | autosklearn_ties += 1 681 | table_dict['Task Id'].append(task_id) 682 | if tabnet_task_result is not None: 683 | table_dict['Tabnet'].append(tabnet_task_result) 684 | else: 685 | table_dict['Tabnet'].append(tabnet_task_result) 686 | table_dict['XGBoost'].append(xgboost_task_result) 687 | table_dict['Cocktail'].append(cocktail_task_result) 688 | table_dict['AutoSklearn'].append(autosklearn_task_result) 689 | 690 | comparison_table = pd.DataFrame.from_dict(table_dict) 691 | print( 692 | comparison_table.to_latex( 693 | index=False, 694 | caption='The performances of the Regularization Cocktail ' 695 | 'and the state-of-the-art competitors ' 696 | 'over the different datasets.', 697 | label='app:cocktail_vs_benchmarks_table', 698 | ) 699 | ) 700 | comparison_table.to_csv(os.path.join(baseline_dir, 'table_comparison.csv'), index=False) 701 | _, p_value = wilcoxon(cocktail_performances, xgboost_performances) 702 | print(f'Cocktail wins: {cocktail_wins}, ties: {cocktail_ties}, looses: {cocktail_losses} against XGBoost') 703 | print(f'P-value: {p_value}') 704 | _, p_value = wilcoxon(xgboost_performances, autosklearn_performances) 705 | print(f'Xgboost vs AutoSklearn, P-value: {p_value}') 706 | print(f'AutoSklearn wins: {autosklearn_wins}, ' 707 | f'ties: {autosklearn_ties}, ' 708 | f'looses: {autosklearn_looses} against XGBoost') 709 | 710 | return comparison_table 711 | 712 | 713 | def build_cd_diagram( 714 | baseline_dir: str, 715 | cocktail_dir: str, 716 | ) -> pd.DataFrame: 717 | """Prepare the results for a critical difference diagram. 718 | 719 | This function prepares all the results into a pandas dataframe 720 | so that it can be used to create a critical difference diagram 721 | of all the methods. 722 | 723 | Parameters: 724 | ----------- 725 | baseline_dir: str 726 | The directory where the results are located for the baseline 727 | methods. 728 | cocktail_dir: str 729 | The directory where the results are located for the regularization 730 | cocktails. 731 | 732 | Returns: 733 | -------- 734 | result_df: pd.DataFrame 735 | A table with the accuracies of all methods over the different tasks. 736 | The results are prepared in such a way that a critical difference 737 | diagram can be generated from the pandas dataframe. 738 | """ 739 | xgboost_results = read_baseline_values(baseline_dir, model_name='xgboost') 740 | tabnet_results = read_baseline_values(baseline_dir, model_name='tabnet') 741 | cocktail_results = read_cocktail_values(cocktail_dir, baseline_dir) 742 | autosklearn_results = read_autosklearn_values(cocktail_dir) 743 | 744 | models = ['Regularization Cocktail', 'XGBoost', 'AutoSklearn-GB', 'TabNet'] 745 | table_results = { 746 | 'Network': [], 747 | 'Task Id': [], 748 | 'Balanced Accuracy': [], 749 | } 750 | for task_id in cocktail_results: 751 | for model_name in models: 752 | try: 753 | if model_name == 'Regularization Cocktail': 754 | task_result = cocktail_results[task_id] 755 | elif model_name == 'XGBoost': 756 | task_result = xgboost_results[task_id] 757 | elif model_name == 'TabNet': 758 | task_result = tabnet_results[task_id] 759 | elif model_name == 'AutoSklearn-GB': 760 | task_result = autosklearn_results[task_id] 761 | else: 762 | raise ValueError("Illegal model value") 763 | except FileNotFoundError: 764 | task_result = 0 765 | print(f'No results for task: {task_id} for model: {model_name}') 766 | 767 | table_results['Network'].append(model_name) 768 | table_results['Task Id'].append(task_id) 769 | table_results['Balanced Accuracy'].append(task_result) 770 | 771 | result_df = pd.DataFrame(data=table_results) 772 | result_df.to_csv(os.path.join(baseline_dir, f'cd_data.csv'), index=False) 773 | 774 | return result_df 775 | 776 | 777 | def generate_ranks_data( 778 | all_data: pd.DataFrame, 779 | ): 780 | """ 781 | Parameters 782 | ---------- 783 | all_data: pd.DataFrame 784 | A dataframe where each row consists of 785 | tasks values across different models. 786 | 787 | Returns 788 | ------- 789 | ranks_df: pd.DataFrame 790 | A dataframe of the ranks of all methods over 791 | the different tasks. 792 | """ 793 | all_ranked_data = [] 794 | all_data.drop(columns=['Task Id'], inplace=True) 795 | column_names = all_data.columns 796 | 797 | for row in all_data.itertuples(index=False): 798 | task_regularization_data = list(row) 799 | task_ranked_data = rankdata( 800 | task_regularization_data, 801 | method='dense', 802 | ) 803 | 804 | reversed_data = len(task_ranked_data) + 1 - task_ranked_data.astype(int) 805 | """for i, column_name in enumerate(column_names): 806 | all_ranked_data.append([column_name, task_ranked_data[i]]) 807 | """ 808 | all_ranked_data.append(reversed_data) 809 | ranks_df = pd.DataFrame(all_ranked_data, columns=column_names) 810 | 811 | return ranks_df 812 | 813 | 814 | def compare_cocktail_versions( 815 | cocktail_result_folder: str, 816 | benchmark_file_path: str 817 | ) -> pd.DataFrame: 818 | """Prepares the results of the experiments with the different 819 | cocktail versions. 820 | 821 | Goes through the results at the given directories and builds 822 | a table with the different cocktail versions over the different 823 | tasks. 824 | 825 | Parameters: 826 | ----------- 827 | cocktail_result_folder: str 828 | The folder directory where the results are located for the 829 | regularization cocktails. 830 | benchmark_file_path: str 831 | The directory where the benchmark task file is located. 832 | The file contains all the task ids. The file name is 833 | not needed to be given. 834 | 835 | Returns: 836 | -------- 837 | comparison_table - pd.DataFrame 838 | A DataFrame with the results for all methods over the different tasks. 839 | """ 840 | fixed_cocktail_results = read_cocktail_values( 841 | cocktail_dir, 842 | benchmark_file_path, 843 | cocktail_version='cocktail', 844 | ) 845 | dynamic_cocktail_results = read_cocktail_values( 846 | cocktail_dir, 847 | benchmark_file_path, 848 | cocktail_version='cocktail_lr', 849 | ) 850 | 851 | table_dict = { 852 | 'Task Id': [], 853 | 'Fixed Lr Cocktail': [], 854 | 'Dynamic Lr Cocktail': [], 855 | } 856 | 857 | cocktail_fixed_wins = 0 858 | cocktail_fixed_losses = 0 859 | cocktail_fixed_ties = 0 860 | fixed_cocktail_performances = [] 861 | dynamic_cocktail_performances = [] 862 | 863 | for task_id in fixed_cocktail_results: 864 | 865 | fixed_cocktail_task_result = fixed_cocktail_results[task_id] 866 | dynamic_cocktail_task_result = dynamic_cocktail_results[task_id] 867 | 868 | fixed_cocktail_performances.append(fixed_cocktail_task_result) 869 | dynamic_cocktail_performances.append(dynamic_cocktail_task_result) 870 | 871 | if fixed_cocktail_task_result > dynamic_cocktail_task_result: 872 | cocktail_fixed_wins += 1 873 | elif fixed_cocktail_task_result < dynamic_cocktail_task_result: 874 | cocktail_fixed_losses += 1 875 | else: 876 | cocktail_fixed_ties += 1 877 | 878 | table_dict['Task Id'].append(task_id) 879 | table_dict['Fixed Lr Cocktail'].append(f'{fixed_cocktail_task_result * 100:.3f}') 880 | table_dict['Dynamic Lr Cocktail'].append(f'{dynamic_cocktail_task_result * 100:.3f}') 881 | 882 | 883 | comparison_table = pd.DataFrame.from_dict(table_dict) 884 | print( 885 | comparison_table.to_latex( 886 | index=False, 887 | caption='The performances of the Regularization Cocktail ' 888 | 'and the state-of-the-art competitors ' 889 | 'over the different datasets.', 890 | label='app:cocktail_vs_benchmarks_table', 891 | ) 892 | ) 893 | 894 | _, p_value = wilcoxon(fixed_cocktail_performances, dynamic_cocktail_performances) 895 | print(f'Fixed Lr Cocktail wins: {cocktail_fixed_wins}, ' 896 | f'ties: {cocktail_fixed_ties}, ' 897 | f'looses: {cocktail_fixed_losses} against Dynamic Lr Cocktail') 898 | print(f'P-value: {p_value}') 899 | 900 | return comparison_table 901 | 902 | 903 | xgboost_dir = os.path.expanduser( 904 | os.path.join( 905 | '~', 906 | 'Desktop', 907 | 'xgboost_results', 908 | ) 909 | ) 910 | 911 | 912 | cocktail_dir = os.path.expanduser( 913 | os.path.join( 914 | '~', 915 | 'Desktop', 916 | 'PhD', 917 | 'Rezultate', 918 | 'RegularizationCocktail', 919 | 'NEMO', 920 | ) 921 | ) 922 | """ 923 | compare_models( 924 | xgboost_dir, 925 | cocktail_dir 926 | ) 927 | compare_cocktail_versions( 928 | cocktail_dir, 929 | xgboost_dir, 930 | )""" 931 | -------------------------------------------------------------------------------- /baselines/node/node_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import time 5 | from typing import List 6 | 7 | import numpy as np 8 | 9 | import openml 10 | 11 | from category_encoders import LeaveOneOutEncoder 12 | 13 | from qhoptim.pyt import QHAdam 14 | 15 | from sklearn.model_selection import ParameterGrid 16 | from sklearn.model_selection import train_test_split 17 | from sklearn.preprocessing import LabelEncoder 18 | from sklearn.metrics import balanced_accuracy_score 19 | 20 | import torch, torch.nn as nn 21 | import torch.nn.functional as F 22 | 23 | import lib 24 | from lib.utils import check_numpy, process_in_chunks 25 | 26 | 27 | def get_task_list( 28 | benchmark_task_file: str = 'path/to/tasks.txt', 29 | ) -> List[int]: 30 | """Get the task id list. 31 | Goes through the given file and collects all of the task 32 | ids. 33 | Parameters: 34 | ----------- 35 | benchmark_task_file: str 36 | A string to the path of the benchmark task file. Including 37 | the task file name. 38 | Returns: 39 | -------- 40 | benchmark_task_ids - list 41 | A list of all the task ids for the benchmark. 42 | """ 43 | with open(os.path.join(benchmark_task_file), 'r') as f: 44 | benchmark_info_str = f.readline() 45 | benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')] 46 | 47 | return benchmark_task_ids 48 | 49 | 50 | def get_data( 51 | task_id: int, 52 | test_size: float = 0.2, 53 | validation_size: float = 0.25, 54 | seed: int = 11, 55 | ): 56 | task = openml.tasks.get_task(task_id=task_id) 57 | dataset = task.get_dataset() 58 | X, y, categorical_indicator, _ = dataset.get_data( 59 | dataset_format='dataframe', 60 | target=dataset.default_target_attribute, 61 | ) 62 | label_encoder = LabelEncoder() 63 | y = label_encoder.fit_transform(y) 64 | X_train, X_test, y_train, y_test = train_test_split( 65 | X, 66 | y, 67 | test_size=test_size, 68 | random_state=seed, 69 | stratify=y, 70 | ) 71 | if validation_size != 0: 72 | X_train, X_val, y_train, y_val = train_test_split( 73 | X_train, 74 | y_train, 75 | test_size=validation_size, 76 | random_state=seed, 77 | stratify=y_train, 78 | ) 79 | else: 80 | X_val = None 81 | y_val = None 82 | 83 | # the code below drops columns that are 84 | # completely null in the train set, however, are not null in the validation 85 | # and test set. 86 | train_column_nan_info = X_train.isna().all() 87 | only_nan_columns = [label for label, value in train_column_nan_info.items() if value] 88 | only_nan_columns = set(only_nan_columns) 89 | X_train.drop(only_nan_columns, axis='columns', inplace=True) 90 | X_test.drop(only_nan_columns, axis='columns', inplace=True) 91 | 92 | if validation_size != 0: 93 | X_val.drop(only_nan_columns, axis='columns', inplace=True) 94 | 95 | cat_encoder = LeaveOneOutEncoder() 96 | column_names = X_train.columns.to_numpy() 97 | categorical_column_names = [column_name for column_indicator, column_name in zip(categorical_indicator, column_names) if column_indicator] 98 | 99 | cat_encoder.fit(X_train[categorical_column_names], y_train) 100 | X_train[categorical_column_names] = cat_encoder.transform(X_train[categorical_column_names]) 101 | if validation_size != 0: 102 | X_val[categorical_column_names] = cat_encoder.transform(X_val[categorical_column_names]) 103 | X_val = X_val.values.astype('float32') 104 | 105 | X_test[categorical_column_names] = cat_encoder.transform(X_test[categorical_column_names]) 106 | X_train = X_train.values.astype('float32') 107 | X_test = X_test.values.astype('float32') 108 | 109 | dataset_name = dataset.name 110 | 111 | return { 112 | 'X_train': X_train, 113 | 'y_train': y_train, 114 | 'X_val': X_val, 115 | 'y_val': y_val, 116 | 'X_test': X_test, 117 | 'y_test': y_test, 118 | 'name': dataset_name, 119 | } 120 | 121 | 122 | def get_node_dataset( 123 | task_id: int, 124 | test_size: float = 0.2, 125 | validation_size: float = 0.25, 126 | seed: int = 11, 127 | refit=False, 128 | ): 129 | if not refit: 130 | data_splits = get_data( 131 | task_id, 132 | seed=seed, 133 | test_size=test_size, 134 | validation_size=validation_size, 135 | ) 136 | 137 | else: 138 | data_splits = get_data( 139 | task_id, 140 | seed=seed, 141 | test_size=test_size, 142 | validation_size=0, 143 | ) 144 | 145 | node_dataset = lib.Dataset( 146 | dataset=data_splits['name'], 147 | random_state=seed, 148 | quantile_transform=True, 149 | quantile_noise=1e-3, 150 | X_train=data_splits['X_train'], 151 | X_valid=data_splits['X_val'], 152 | X_test=data_splits['X_test'], 153 | y_train=data_splits['y_train'], 154 | y_valid=data_splits['y_val'], 155 | y_test=data_splits['y_test'], 156 | ) 157 | 158 | return node_dataset 159 | 160 | 161 | def evaluate_balanced_classification_error( 162 | trainer, 163 | X_test, 164 | y_test, 165 | device, 166 | batch_size=128, 167 | ): 168 | X_test = torch.as_tensor(X_test, device=device) 169 | y_test = check_numpy(y_test) 170 | trainer.train(False) 171 | with torch.no_grad(): 172 | logits = process_in_chunks(trainer.model, X_test, batch_size=batch_size) 173 | logits = check_numpy(logits) 174 | y_pred = np.argmax(logits, axis=1) 175 | 176 | error_rate = 1 - balanced_accuracy_score(y_test, y_pred) 177 | 178 | return error_rate 179 | 180 | 181 | def evaluate_node( 182 | data, 183 | config, 184 | device, 185 | experiment_name, 186 | epochs=105, 187 | batch_size=128, 188 | refit=False, 189 | ): 190 | config_start_time = time.time() 191 | num_examples = data.X_train.shape[0] 192 | num_features = data.X_train.shape[1] 193 | num_classes = len(set(data.y_train)) 194 | 195 | model = nn.Sequential( 196 | lib.DenseBlock( 197 | num_features, 198 | layer_dim=config['total_tree_count'], 199 | num_layers=config['num_layers'], 200 | tree_dim=num_classes + 1, 201 | flatten_output=False, 202 | depth=config['tree_depth'], 203 | choice_function=lib.entmax15, 204 | bin_function=lib.entmoid15, 205 | ), 206 | lib.Lambda(lambda x: x[..., :num_classes].mean(dim=-2)), 207 | ).to(device) 208 | 209 | with torch.no_grad(): 210 | res = model(torch.as_tensor(data.X_train[:batch_size], device=device)) 211 | # trigger data-aware init 212 | 213 | if torch.cuda.device_count() > 1: 214 | model = nn.DataParallel(model) 215 | 216 | trainer = lib.Trainer( 217 | model=model, 218 | loss_function=F.cross_entropy, 219 | experiment_name=experiment_name, 220 | warm_start=False, 221 | Optimizer=QHAdam, 222 | optimizer_params=dict(nus=(0.7, 1.0), betas=(0.95, 0.998)), 223 | verbose=True, 224 | n_last_checkpoints=5 225 | ) 226 | 227 | loss_history, err_history = [], [] 228 | best_val_err = 1.0 229 | best_step = 0 230 | 231 | # calculate the number of early stopping rounds to 232 | # be around 10 epochs. Allow incomplete batches. 233 | number_batches_epoch = int(np.ceil(num_examples / batch_size)) 234 | early_stopping_rounds = 10 * number_batches_epoch 235 | report_frequency = number_batches_epoch 236 | print(early_stopping_rounds) 237 | # Flag if early stopping is hit or not 238 | early_stopping_activated = False 239 | 240 | for batch in lib.iterate_minibatches( 241 | data.X_train, 242 | data.y_train, 243 | batch_size=batch_size, 244 | shuffle=True, 245 | epochs=epochs, 246 | ): 247 | metrics = trainer.train_on_batch( 248 | *batch, 249 | device=device, 250 | ) 251 | 252 | loss_history.append(metrics['loss'].item()) 253 | 254 | # calculate the information below on every epoch 255 | if trainer.step % report_frequency == 0: 256 | train_err = evaluate_balanced_classification_error( 257 | trainer, 258 | data.X_train, 259 | data.y_train, 260 | device=device, 261 | batch_size=batch_size, 262 | ) 263 | if not refit: 264 | val_err = evaluate_balanced_classification_error( 265 | trainer, 266 | data.X_valid, 267 | data.y_valid, 268 | device=device, 269 | batch_size=batch_size, 270 | ) 271 | err_history.append(val_err) 272 | print("Val Error Rate: %0.5f" % (val_err)) 273 | 274 | if val_err < best_val_err: 275 | best_val_err = val_err 276 | best_step = trainer.step 277 | trainer.save_checkpoint(tag='best') 278 | 279 | print("Loss %.5f" % (metrics['loss'])) 280 | print("Train Error Rate: %0.5f" % (train_err)) 281 | 282 | if not refit: 283 | if trainer.step > best_step + early_stopping_rounds: 284 | print('BREAK. There is no improvement for {} steps'.format(early_stopping_rounds)) 285 | print("Best step: ", best_step) 286 | print("Best Val Error Rate: %0.5f" % (best_val_err)) 287 | early_stopping_activated = True 288 | break 289 | 290 | config_duration = time.time() - config_start_time 291 | 292 | if early_stopping_activated: 293 | best_epoch = int(best_step / report_frequency) 294 | else: 295 | best_epoch = int(trainer.step / report_frequency) 296 | # save the model in the end 297 | trainer.save_checkpoint(tag='best') 298 | 299 | # we will always have a best checkpoint, be it 300 | # from early stopping, be it from the normal training. 301 | trainer.load_checkpoint(tag='best') 302 | train_error_rate = evaluate_balanced_classification_error( 303 | trainer, 304 | data.X_train, 305 | data.y_train, 306 | device=device, 307 | batch_size=batch_size, 308 | ) 309 | if not refit: 310 | val_error_rate = evaluate_balanced_classification_error( 311 | trainer, 312 | data.X_valid, 313 | data.y_valid, 314 | device=device, 315 | batch_size=batch_size, 316 | ) 317 | else: 318 | val_error_rate = None 319 | 320 | test_error_rate = evaluate_balanced_classification_error( 321 | trainer, 322 | data.X_test, 323 | data.y_test, 324 | device=device, 325 | batch_size=batch_size, 326 | ) 327 | 328 | run_information = { 329 | 'train_error': train_error_rate, 330 | 'val_error': val_error_rate, 331 | 'test_error': test_error_rate, 332 | 'best_epoch': best_epoch, 333 | 'duration': config_duration 334 | } 335 | 336 | return run_information 337 | 338 | 339 | def predict_node( 340 | data, 341 | config, 342 | device, 343 | experiment_name, 344 | batch_size=128, 345 | refit=True, 346 | ): 347 | num_features = data.X_train.shape[1] 348 | num_classes = len(set(data.y_train)) 349 | 350 | model = nn.Sequential( 351 | lib.DenseBlock( 352 | num_features, 353 | layer_dim=config['total_tree_count'], 354 | num_layers=config['num_layers'], 355 | tree_dim=num_classes + 1, 356 | flatten_output=False, 357 | depth=config['tree_depth'], 358 | choice_function=lib.entmax15, 359 | bin_function=lib.entmoid15, 360 | ), 361 | lib.Lambda(lambda x: x[..., :num_classes].mean(dim=-2)), 362 | ).to(device) 363 | 364 | with torch.no_grad(): 365 | res = model(torch.as_tensor(data.X_train[:batch_size], device=device)) 366 | # trigger data-aware init 367 | 368 | if torch.cuda.device_count() > 1: 369 | model = nn.DataParallel(model) 370 | 371 | trainer = lib.Trainer( 372 | model=model, 373 | warm_start=True, 374 | loss_function=F.cross_entropy, 375 | experiment_name=experiment_name, 376 | Optimizer=QHAdam, 377 | optimizer_params=dict(nus=(0.7, 1.0), betas=(0.95, 0.998)), 378 | verbose=True, 379 | n_last_checkpoints=5 380 | ) 381 | # we will always have a best checkpoint, be it 382 | # from early stopping, be it from the normal training. 383 | trainer.load_checkpoint(tag='best') 384 | 385 | train_error_rate = evaluate_balanced_classification_error( 386 | trainer, 387 | data.X_train, 388 | data.y_train, 389 | device=device, 390 | batch_size=batch_size, 391 | ) 392 | if not refit: 393 | val_error_rate = evaluate_balanced_classification_error( 394 | trainer, 395 | data.X_valid, 396 | data.y_valid, 397 | device=device, 398 | batch_size=batch_size, 399 | ) 400 | else: 401 | val_error_rate = None 402 | 403 | test_error_rate = evaluate_balanced_classification_error( 404 | trainer, 405 | data.X_test, 406 | data.y_test, 407 | device=device, 408 | batch_size=batch_size, 409 | ) 410 | 411 | run_information = { 412 | 'train_error': train_error_rate, 413 | 'val_error': val_error_rate, 414 | 'test_error': test_error_rate, 415 | } 416 | 417 | return run_information 418 | 419 | parser = argparse.ArgumentParser( 420 | description='Run node on a benchmark' 421 | ) 422 | # experiment setup arguments 423 | parser.add_argument( 424 | '--task_id', 425 | type=int, 426 | default=233090, 427 | ) 428 | parser.add_argument( 429 | '--batch_size', 430 | type=int, 431 | default=128, 432 | ) 433 | parser.add_argument( 434 | '--epochs', 435 | type=int, 436 | default=1, 437 | ) 438 | parser.add_argument( 439 | '--test_size', 440 | type=float, 441 | default=0.2, 442 | ) 443 | parser.add_argument( 444 | '--validation_size', 445 | type=float, 446 | default=0.25, 447 | ) 448 | parser.add_argument( 449 | '--seed', 450 | type=int, 451 | default=11, 452 | ) 453 | parser.add_argument( 454 | '--device', 455 | type=str, 456 | default="cpu", 457 | ) 458 | parser.add_argument( 459 | '--output_dir', 460 | type=str, 461 | default="./node_experiments", 462 | ) 463 | 464 | args = parser.parse_args() 465 | options = vars(args) 466 | print(options) 467 | 468 | 469 | if __name__ == '__main__': 470 | 471 | print("Experiment Started") 472 | start_time = time.time() 473 | hpo_phase = False 474 | task_dir = os.path.expanduser( 475 | os.path.join( 476 | args.output_dir, 477 | f'{args.seed}', 478 | f'{args.task_id}', 479 | ) 480 | ) 481 | data = get_node_dataset( 482 | seed=args.seed, 483 | task_id=args.task_id, 484 | test_size=args.test_size, 485 | validation_size=args.validation_size, 486 | refit=False, 487 | ) 488 | if hpo_phase: 489 | # Start HPO Phase 490 | print("HPO Phase started") 491 | 492 | param_grid = ParameterGrid( 493 | { 494 | 'num_layers': {2, 4, 8}, 495 | 'total_tree_count': {1024, 2048}, 496 | 'tree_depth': {6, 8}, 497 | 'tree_output_dim': {2, 3} 498 | } 499 | ) 500 | results = [] 501 | for config_counter, params in enumerate(param_grid): 502 | config_dir = os.path.join(task_dir, f'{config_counter}') 503 | print(params) 504 | run_information = evaluate_node( 505 | batch_size=args.batch_size, 506 | refit=False, 507 | data=data, 508 | config=params, 509 | device=args.device, 510 | experiment_name=config_dir, 511 | epochs=args.epochs, 512 | ) 513 | print(params) 514 | print(run_information) 515 | results.append( 516 | { 517 | 'val_error': run_information['val_error'], 518 | 'best_epoch': run_information['best_epoch'], 519 | 'config': params, 520 | } 521 | ) 522 | 523 | incumbent = sorted(results, key=lambda result: result['val_error'])[0] 524 | print(f"Best results, with validation error: {incumbent['val_error']}, " 525 | f"configuration: {incumbent['config']}") 526 | best_config = incumbent['config'] 527 | best_epoch = incumbent['best_epoch'] 528 | else: 529 | best_config = { 530 | 'num_layers': 2, 531 | 'total_tree_count': 1024, 532 | 'tree_depth': 6, 533 | 'tree_output_dim': 2, 534 | } 535 | run_information = evaluate_node( 536 | batch_size=args.batch_size, 537 | refit=False, 538 | data=data, 539 | config=best_config, 540 | device=args.device, 541 | experiment_name=os.path.join(task_dir, 'run'), 542 | epochs=args.epochs, 543 | ) 544 | best_epoch = run_information['best_epoch'] 545 | 546 | # Start Refit Phase 547 | print("Refit Started") 548 | refit_dir = os.path.join(task_dir, 'refit') 549 | print(f'Best epoch found for task: {args.task_id} in refit is: {best_epoch}') 550 | data = get_node_dataset( 551 | seed=args.seed, 552 | task_id=args.task_id, 553 | test_size=args.test_size, 554 | validation_size=0, 555 | refit=True, 556 | ) 557 | 558 | run_information = evaluate_node( 559 | batch_size=args.batch_size, 560 | refit=True, 561 | data=data, 562 | config=best_config, 563 | device=args.device, 564 | experiment_name=refit_dir, 565 | epochs=best_epoch, 566 | ) 567 | 568 | duration = time.time() - start_time 569 | os.makedirs(task_dir, exist_ok=True) 570 | 571 | result_dir = os.path.join( 572 | task_dir, 573 | 'results.json', 574 | ) 575 | 576 | result_dict = { 577 | 'train balanced accuracy': 1 - run_information['train_error'], 578 | 'test balanced accuracy': 1 - run_information['test_error'], 579 | 'task_id': args.task_id, 580 | 'duration': duration, 581 | } 582 | 583 | with open(result_dir, 'w') as file: 584 | json.dump(result_dict, file) -------------------------------------------------------------------------------- /cocktails/main_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import pickle 5 | import random 6 | import time 7 | import warnings 8 | 9 | # this corresponds to the number of threads 10 | os.environ['OMP_NUM_THREADS'] = '1' 11 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 12 | os.environ['MKL_NUM_THREADS'] = '1' 13 | 14 | warnings.simplefilter(action='ignore', category=UserWarning) 15 | warnings.simplefilter(action='ignore', category=FutureWarning) 16 | 17 | import torch 18 | 19 | from autoPyTorch.api.tabular_classification import TabularClassificationTask 20 | from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes, NoResamplingStrategyTypes 21 | from autoPyTorch.data.tabular_validator import TabularInputValidator 22 | from autoPyTorch.datasets.tabular_dataset import TabularDataset 23 | from autoPyTorch import metrics 24 | 25 | import numpy as np 26 | 27 | from utilities import \ 28 | get_data, \ 29 | get_incumbent_results, \ 30 | get_smac_object, \ 31 | get_updates_for_regularization_cocktails 32 | 33 | 34 | def str2bool(v): 35 | if isinstance(v, bool): 36 | return [v, ] 37 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 38 | return [True, ] 39 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 40 | return [False, ] 41 | elif v.lower() == 'conditional': 42 | return [True, False] 43 | else: 44 | raise argparse.ArgumentTypeError('No valid value given.') 45 | 46 | 47 | parser = argparse.ArgumentParser( 48 | description='Run AutoPyTorch on a benchmark.', 49 | ) 50 | # experiment setup arguments 51 | parser.add_argument( 52 | '--task_id', 53 | type=int, 54 | default=233088, 55 | ) 56 | parser.add_argument( 57 | '--wall_time', 58 | type=int, 59 | default=9000, 60 | ) 61 | parser.add_argument( 62 | '--func_eval_time', 63 | type=int, 64 | default=1000, 65 | ) 66 | parser.add_argument( 67 | '--epochs', 68 | type=int, 69 | default=105, 70 | ) 71 | parser.add_argument( 72 | '--seed', 73 | type=int, 74 | default=11, 75 | ) 76 | parser.add_argument( 77 | '--tmp_dir', 78 | type=str, 79 | default='./runs/autoPyTorch_cocktails', 80 | ) 81 | parser.add_argument( 82 | '--output_dir', 83 | type=str, 84 | default='./runs/autoPyTorch_cocktails', 85 | ) 86 | parser.add_argument( 87 | '--nr_workers', 88 | type=int, 89 | default=1, 90 | ) 91 | parser.add_argument( 92 | '--nr_threads', 93 | type=int, 94 | default=1, 95 | ) 96 | parser.add_argument( 97 | '--cash_cocktail', 98 | help='If the regularization cocktail should be used.', 99 | type=bool, 100 | default=False, 101 | ) 102 | 103 | # regularization ingredient arguments 104 | parser.add_argument( 105 | '--use_swa', 106 | help='If stochastic weight averaging should be used.', 107 | type=str2bool, 108 | nargs='?', 109 | const=[True], 110 | default=[False], 111 | ) 112 | parser.add_argument( 113 | '--use_se', 114 | help='If snapshot ensembling should be used.', 115 | type=str2bool, 116 | nargs='?', 117 | const=[True], 118 | default=[False], 119 | ) 120 | parser.add_argument( 121 | '--use_lookahead', 122 | help='If the lookahead optimizing technique should be used.', 123 | type=str2bool, 124 | nargs='?', 125 | const=[True], 126 | default=[False], 127 | ) 128 | parser.add_argument( 129 | '--use_weight_decay', 130 | help='If weight decay regularization should be used.', 131 | type=str2bool, 132 | nargs='?', 133 | const=[True], 134 | default=[False], 135 | ) 136 | parser.add_argument( 137 | '--use_batch_normalization', 138 | help='If batch normalization regularization should be used.', 139 | type=str2bool, 140 | nargs='?', 141 | const=[True], 142 | default=[False], 143 | ) 144 | parser.add_argument( 145 | '--use_skip_connection', 146 | help='If skip connections should be used. ' 147 | 'Turns the network into a residual network.', 148 | type=str2bool, 149 | nargs='?', 150 | const=[True], 151 | default=[False], 152 | ) 153 | parser.add_argument( 154 | '--use_dropout', 155 | help='If dropout regularization should be used.', 156 | type=str2bool, 157 | nargs='?', 158 | const=[True], 159 | default=[False], 160 | ) 161 | parser.add_argument( 162 | '--mb_choice', 163 | help='Multibranch network regularization. ' 164 | 'Only active when skip_connection is active.', 165 | type=str, 166 | choices=['none', 'shake-shake', 'shake-drop'], 167 | default='none', 168 | ) 169 | parser.add_argument( 170 | '--augmentation', 171 | help='If methods that augment examples should be used', 172 | type=str, 173 | choices=['mixup', 'cutout', 'cutmix', 'standard', 'adversarial'], 174 | default='standard', 175 | ) 176 | 177 | 178 | args = parser.parse_args() 179 | options = vars(args) 180 | print(options) 181 | 182 | 183 | hps_for_method = { 184 | 'stochastic_weight_averaging': 0, 185 | 'snapshot_ensembling': 0, 186 | 'batch_normalization': 0, 187 | 'skip_connection': 0, 188 | 'shake_shake': 0, 189 | 'adversarial_training': 0, 190 | 'cutmix': 1, 191 | 'mixup': 1, 192 | 'weight_decay': 1, 193 | 'shake_drop': 1, 194 | 'lookahead': 2, 195 | 'cutout': 2, 196 | 'dropout': 2, 197 | } 198 | 199 | 200 | if __name__ == '__main__': 201 | 202 | # Setting up reproducibility 203 | torch.backends.cudnn.deterministic = True 204 | torch.backends.cudnn.benchmark = False 205 | torch.manual_seed(args.seed) 206 | np.random.seed(args.seed) 207 | random.seed(args.seed) 208 | 209 | number_of_configurations_limit = 0 210 | 211 | if args.cash_cocktail: 212 | # for the cocktail we use 840 configurations 213 | number_of_configurations_limit = 840 214 | else: 215 | method_number_of_hps = 0 216 | if any(args.use_swa): 217 | method_number_of_hps = hps_for_method['stochastic_weight_averaging'] 218 | elif any(args.use_se): 219 | method_number_of_hps = hps_for_method['snapshot_ensembling'] 220 | elif any(args.use_batch_normalization): 221 | method_number_of_hps = hps_for_method['batch_normalization'] 222 | elif any(args.use_skip_connection) and args.mb_choice == 'none': 223 | method_number_of_hps = hps_for_method['skip_connection'] 224 | elif any(args.use_skip_connection) and args.mb_choice == 'shake-shake': 225 | method_number_of_hps = hps_for_method['shake_shake'] 226 | elif any(args.use_skip_connection) and args.mb_choice == 'shake-drop': 227 | method_number_of_hps = hps_for_method['shake_drop'] 228 | elif args.augmentation == 'cutmix': 229 | method_number_of_hps = hps_for_method['cutmix'] 230 | elif args.augmentation == 'mixup': 231 | method_number_of_hps = hps_for_method['mixup'] 232 | elif args.augmentation == 'cutout': 233 | method_number_of_hps = hps_for_method['cutout'] 234 | elif args.augmentation == 'adversarial': 235 | method_number_of_hps = hps_for_method['adversarial_training'] 236 | elif any(args.use_dropout): 237 | method_number_of_hps = hps_for_method['dropout'] 238 | elif any(args.use_weight_decay): 239 | method_number_of_hps = hps_for_method['weight_decay'] 240 | elif any(args.use_lookahead): 241 | method_number_of_hps = hps_for_method['lookahead'] 242 | 243 | number_of_configurations_limit = 40 * method_number_of_hps 244 | 245 | print(f'Number of configurations limit: {number_of_configurations_limit}') 246 | 247 | ############################################################################ 248 | # Data Loading 249 | # ============ 250 | start_time = time.time() 251 | 252 | X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator = get_data( 253 | task_id=args.task_id, 254 | seed=args.seed, 255 | ) 256 | 257 | pipeline_update, search_space_updates, include_updates = get_updates_for_regularization_cocktails( 258 | categorical_indicator, 259 | args, 260 | ) 261 | 262 | output_dir = os.path.expanduser( 263 | os.path.join( 264 | args.output_dir, 265 | f'{args.seed}', 266 | f'{args.task_id}', 267 | f'{args.task_id}_out', 268 | ) 269 | ) 270 | temp_dir = os.path.expanduser( 271 | os.path.join( 272 | args.tmp_dir, 273 | f'{args.seed}', 274 | f'{args.task_id}', 275 | f'{args.task_id}_tmp', 276 | ) 277 | ) 278 | 279 | ############################################################################ 280 | # Build and fit a classifier 281 | # ========================== 282 | # if we use HPO, we can use multiple workers in parallel 283 | if number_of_configurations_limit != 0: 284 | nr_workers = args.nr_workers 285 | else: 286 | nr_workers = 1 287 | 288 | api = TabularClassificationTask( 289 | temporary_directory=temp_dir, 290 | output_directory=output_dir, 291 | delete_tmp_folder_after_terminate=False, 292 | delete_output_folder_after_terminate=False, 293 | resampling_strategy=HoldoutValTypes.stratified_holdout_validation, 294 | resampling_strategy_args=resampling_strategy_args, 295 | ensemble_size=1, 296 | ensemble_nbest=1, 297 | max_models_on_disc=10, 298 | include_components=include_updates, 299 | search_space_updates=search_space_updates, 300 | seed=args.seed, 301 | n_jobs=nr_workers, 302 | n_threads=args.nr_threads, 303 | ) 304 | 305 | api.set_pipeline_config(**pipeline_update) 306 | ############################################################################ 307 | # Search for the best hp configuration 308 | # ==================================== 309 | # We search for the best hp configuration only in the case of a cocktail ingredient 310 | # that has hyperparameters. 311 | if number_of_configurations_limit != 0: 312 | api.search( 313 | X_train=X_train.copy(), 314 | y_train=y_train.copy(), 315 | X_test=X_test.copy(), 316 | y_test=y_test.copy(), 317 | optimize_metric='balanced_accuracy', 318 | total_walltime_limit=args.wall_time, 319 | memory_limit=12000, 320 | func_eval_time_limit_secs=args.func_eval_time, 321 | enable_traditional_pipeline=False, 322 | get_smac_object_callback=get_smac_object, 323 | smac_scenario_args={ 324 | 'runcount_limit': number_of_configurations_limit, 325 | }, 326 | ) 327 | 328 | # Dump the pipeline for reuse in the future 329 | pickle_directory = os.path.expanduser( 330 | os.path.join( 331 | args.output_dir, 332 | f'{args.seed}', 333 | f'{args.task_id}', 334 | 'estimator.pickle', 335 | ) 336 | ) 337 | with open(pickle_directory, 'wb') as file_handle: 338 | pickle.dump(api, file_handle, protocol=pickle.HIGHEST_PROTOCOL) 339 | 340 | ############################################################################ 341 | # Refit on the best hp configuration 342 | # ================================== 343 | input_validator = TabularInputValidator( 344 | is_classification=True, 345 | ) 346 | input_validator.fit( 347 | X_train=X_train.copy(), 348 | y_train=y_train.copy(), 349 | X_test=X_test.copy(), 350 | y_test=y_test.copy(), 351 | ) 352 | 353 | dataset = TabularDataset( 354 | X=X_train, 355 | Y=y_train, 356 | X_test=X_test, 357 | Y_test=y_test, 358 | seed=args.seed, 359 | validator=input_validator, 360 | resampling_strategy=NoResamplingStrategyTypes.no_resampling, 361 | ) 362 | dataset.is_small_preprocess = False 363 | print(f"Fitting pipeline with {args.epochs} epochs") 364 | 365 | search_space = api.get_search_space(dataset) 366 | # only when we perform hpo will there be an incumbent configuration 367 | # otherwise take a default configuration. 368 | if number_of_configurations_limit != 0: 369 | configuration, incumbent_run_value = get_incumbent_results( 370 | os.path.join( 371 | temp_dir, 372 | 'smac3-output', 373 | 'run_{}'.format(args.seed), 374 | 'runhistory.json'), 375 | search_space, 376 | ) 377 | print(f"Incumbent configuration: {configuration}") 378 | print(f"Incumbent trajectory: {api.trajectory}") 379 | else: 380 | # default configuration 381 | configuration = search_space.get_default_configuration() 382 | print(f"Default configuration: {configuration}") 383 | 384 | fitted_pipeline, run_info, run_value, dataset = api.fit_pipeline( 385 | configuration=configuration, 386 | budget_type='epochs', 387 | budget=args.epochs, 388 | dataset=dataset, 389 | run_time_limit_secs=args.func_eval_time, 390 | eval_metric='balanced_accuracy', 391 | memory_limit=12000, 392 | ) 393 | 394 | X_train = dataset.train_tensors[0] 395 | y_train = dataset.train_tensors[1] 396 | X_test = dataset.test_tensors[0] 397 | y_test = dataset.test_tensors[1] 398 | 399 | train_predictions = fitted_pipeline.predict(X_train) 400 | test_predictions = fitted_pipeline.predict(X_test) 401 | 402 | # Store the predictions if things go south 403 | with open(os.path.join(output_dir, f"predictions_{args.task_id}.pickle"), 'wb') as handle: 404 | pickle.dump(test_predictions, handle, protocol=pickle.HIGHEST_PROTOCOL) 405 | with open(os.path.join(output_dir, f"truth_{args.task_id}.pickle"), 'wb') as handle: 406 | pickle.dump(y_test, handle, protocol=pickle.HIGHEST_PROTOCOL) 407 | 408 | train_balanced_accuracy = metrics.balanced_accuracy(y_train, train_predictions.squeeze()) 409 | test_balanced_accuracy = metrics.balanced_accuracy(y_test, test_predictions.squeeze()) 410 | duration = time.time() - start_time 411 | 412 | print(f'Final Train Balanced accuracy: {train_balanced_accuracy}') 413 | print(f'Final Test Balanced accuracy: {test_balanced_accuracy}') 414 | print(f'Time taken: {duration}') 415 | 416 | result_directory = os.path.expanduser( 417 | os.path.join( 418 | args.output_dir, 419 | f'{args.seed}', 420 | f'{args.task_id}', 421 | 'final_result.json', 422 | ) 423 | ) 424 | result_dict = { 425 | 'train balanced accuracy': train_balanced_accuracy, 426 | 'test balanced accuracy': test_balanced_accuracy, 427 | 'task_id': args.task_id, 428 | 'duration': duration, 429 | } 430 | 431 | with open(result_directory, 'w') as file: 432 | json.dump(result_dict, file) 433 | -------------------------------------------------------------------------------- /cocktails/refit_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import random 5 | import time 6 | import warnings 7 | 8 | os.environ['OMP_NUM_THREADS'] = '1' 9 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 10 | os.environ['MKL_NUM_THREADS'] = '1' 11 | 12 | warnings.simplefilter(action='ignore', category=UserWarning) 13 | warnings.simplefilter(action='ignore', category=FutureWarning) 14 | 15 | import torch 16 | 17 | from autoPyTorch.api.tabular_classification import TabularClassificationTask 18 | from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes 19 | from autoPyTorch.data.tabular_validator import TabularInputValidator 20 | from autoPyTorch.datasets.tabular_dataset import TabularDataset 21 | from autoPyTorch import metrics 22 | 23 | import numpy as np 24 | 25 | from utilities import \ 26 | get_data, \ 27 | get_incumbent_results, \ 28 | get_updates_for_regularization_cocktails 29 | 30 | 31 | def str2bool(v): 32 | if isinstance(v, bool): 33 | return [v, ] 34 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 35 | return [True, ] 36 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 37 | return [False, ] 38 | elif v.lower() == 'conditional': 39 | return [True, False] 40 | else: 41 | raise argparse.ArgumentTypeError('No valid value given.') 42 | 43 | 44 | parser = argparse.ArgumentParser( 45 | description='Refit autoPyTorch on a benchmark.' 46 | ) 47 | # experiment setup arguments 48 | parser.add_argument( 49 | '--task_id', 50 | type=int, 51 | default=233088, 52 | ) 53 | parser.add_argument( 54 | '--wall_time', 55 | type=int, 56 | default=1300, 57 | ) 58 | parser.add_argument( 59 | '--func_eval_time', 60 | type=int, 61 | default=700, 62 | ) 63 | parser.add_argument( 64 | '--epochs', 65 | type=int, 66 | default=105, 67 | ) 68 | parser.add_argument( 69 | '--seed', 70 | type=int, 71 | default=11, 72 | ) 73 | parser.add_argument( 74 | '--tmp_dir', 75 | type=str, 76 | default='./runs/autoPyTorch_cocktails', 77 | ) 78 | parser.add_argument( 79 | '--output_dir', 80 | type=str, 81 | default='./runs/autoPyTorch_cocktails', 82 | ) 83 | parser.add_argument( 84 | '--cash_cocktail', 85 | help='If the regularization cocktail should be used.', 86 | type=bool, 87 | default=False, 88 | ) 89 | 90 | # regularization ingredient arguments 91 | parser.add_argument( 92 | '--use_swa', 93 | help='If stochastic weight averaging should be used.', 94 | type=str2bool, 95 | nargs='?', 96 | const=[True], 97 | default=[False], 98 | ) 99 | parser.add_argument( 100 | '--use_se', 101 | help='If snapshot ensembling should be used.', 102 | type=str2bool, 103 | nargs='?', 104 | const=[True], 105 | default=[False], 106 | ) 107 | parser.add_argument( 108 | '--use_lookahead', 109 | help='If the lookahead optimizing technique should be used.', 110 | type=str2bool, 111 | nargs='?', 112 | const=[True], 113 | default=[False], 114 | ) 115 | parser.add_argument( 116 | '--use_weight_decay', 117 | help='If weight decay regularization should be used.', 118 | type=str2bool, 119 | nargs='?', 120 | const=[True], 121 | default=[False], 122 | ) 123 | parser.add_argument( 124 | '--use_batch_normalization', 125 | help='If batch normalization regularization should be used.', 126 | type=str2bool, 127 | nargs='?', 128 | const=[True], 129 | default=[False], 130 | ) 131 | parser.add_argument( 132 | '--use_skip_connection', 133 | help='If skip connections should be used. ' 134 | 'Turns the network into a residual network.', 135 | type=str2bool, 136 | nargs='?', 137 | const=[True], 138 | default=[False], 139 | ) 140 | parser.add_argument( 141 | '--use_dropout', 142 | help='If dropout regularization should be used.', 143 | type=str2bool, 144 | nargs='?', 145 | const=[True], 146 | default=[False], 147 | ) 148 | parser.add_argument( 149 | '--mb_choice', 150 | help='Multibranch network regularization. ' 151 | 'Only active when skip_connection is active.', 152 | type=str, 153 | choices=['none', 'shake-shake', 'shake-drop'], 154 | default='none', 155 | ) 156 | parser.add_argument( 157 | '--augmentation', 158 | help='If methods that augment examples should be used', 159 | type=str, 160 | choices=['mixup', 'cutout', 'cutmix', 'standard', 'adversarial'], 161 | default='standard', 162 | ) 163 | 164 | 165 | args = parser.parse_args() 166 | options = vars(args) 167 | print(options) 168 | 169 | 170 | if __name__ == '__main__': 171 | 172 | # Setting up reproducibility 173 | torch.backends.cudnn.deterministic = True 174 | torch.backends.cudnn.benchmark = False 175 | torch.manual_seed(args.seed) 176 | np.random.seed(args.seed) 177 | random.seed(args.seed) 178 | 179 | ############################################################################ 180 | # Data Loading 181 | # ============ 182 | start_time = time.time() 183 | X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator = get_data( 184 | task_id=args.task_id, 185 | seed=args.seed, 186 | ) 187 | 188 | pipeline_update, search_space_updates, include_updates = get_updates_for_regularization_cocktails( 189 | categorical_indicator, 190 | args, 191 | ) 192 | output_dir = os.path.expanduser( 193 | os.path.join( 194 | args.output_dir, 195 | f'{args.seed}', 196 | f'{args.task_id}', 197 | f'{args.task_id}_out', 198 | ) 199 | ) 200 | temp_dir = os.path.expanduser( 201 | os.path.join( 202 | args.tmp_dir, 203 | f'{args.seed}', 204 | f'{args.task_id}', 205 | f'{args.task_id}_tmp', 206 | ) 207 | ) 208 | 209 | refit_out_dir = os.path.join(output_dir, 'refit') 210 | refit_tmp_dir = os.path.join(temp_dir, 'refit') 211 | 212 | ############################################################################ 213 | # Build and fit a classifier 214 | # ========================== 215 | api = TabularClassificationTask( 216 | temporary_directory=refit_tmp_dir, 217 | output_directory=refit_out_dir, 218 | delete_tmp_folder_after_terminate=False, 219 | delete_output_folder_after_terminate=False, 220 | resampling_strategy=NoResamplingStrategyTypes.no_resampling, 221 | ensemble_size=1, 222 | ensemble_nbest=1, 223 | max_models_on_disc=1, 224 | include_components=include_updates, 225 | search_space_updates=search_space_updates, 226 | seed=args.seed, 227 | n_jobs=1, 228 | ) 229 | 230 | api.set_pipeline_config(**pipeline_update) 231 | ############################################################################ 232 | # Refit the hp configuration 233 | # ========================== 234 | input_validator = TabularInputValidator( 235 | is_classification=True, 236 | ) 237 | 238 | input_validator.fit( 239 | X_train=X_train.copy(), 240 | y_train=y_train.copy(), 241 | X_test=X_test.copy(), 242 | y_test=y_test.copy(), 243 | ) 244 | 245 | dataset = TabularDataset( 246 | X=X_train, 247 | Y=y_train, 248 | X_test=X_test, 249 | Y_test=y_test, 250 | validator=input_validator, 251 | seed=args.seed, 252 | resampling_strategy=NoResamplingStrategyTypes.no_resampling, 253 | ) 254 | dataset.is_small_preprocess = False 255 | print(f"Fitting pipeline with {args.epochs} epochs") 256 | 257 | search_space = api.get_search_space(dataset) 258 | 259 | # There has been an hpo search, find the best hyperparameter configuration. 260 | run_history_path = os.path.join( 261 | temp_dir, 262 | 'smac3-output', 263 | 'run_{}'.format(args.seed), 264 | 'runhistory.json', 265 | ) 266 | 267 | inc_config, inc_value = get_incumbent_results(run_history_path, search_space) 268 | 269 | print(f'The value that the incumbent had on the validation set before the refit:{inc_value}') 270 | print(f"Incumbent configuration: {inc_config}") 271 | 272 | fitted_pipeline, run_info, run_value, dataset = api.fit_pipeline( 273 | configuration=inc_config, 274 | budget_type='epochs', 275 | budget=args.epochs, 276 | dataset=dataset, 277 | run_time_limit_secs=args.func_eval_time, 278 | eval_metric='balanced_accuracy', 279 | memory_limit=12000, 280 | ) 281 | 282 | X_train = dataset.train_tensors[0] 283 | y_train = dataset.train_tensors[1] 284 | X_test = dataset.test_tensors[0] 285 | y_test = dataset.test_tensors[1] 286 | 287 | train_predictions = fitted_pipeline.predict(X_train) 288 | test_predictions = fitted_pipeline.predict(X_test) 289 | train_balanced_accuracy = metrics.balanced_accuracy(y_train, train_predictions.squeeze()) 290 | test_balanced_accuracy = metrics.balanced_accuracy(y_test, test_predictions.squeeze()) 291 | duration = time.time() - start_time 292 | 293 | print(f'Final Train Balanced accuracy: {train_balanced_accuracy}') 294 | print(f'Final Test Balanced accuracy: {test_balanced_accuracy}') 295 | print(f'Time taken: {duration}') 296 | 297 | result_directory = os.path.expanduser( 298 | os.path.join( 299 | args.output_dir, 300 | f'{args.seed}', 301 | f'{args.task_id}', 302 | 'final_result.json', 303 | ) 304 | ) 305 | result_dict = { 306 | 'train balanced accuracy': train_balanced_accuracy, 307 | 'test balanced accuracy': test_balanced_accuracy, 308 | 'task_id': args.task_id, 309 | 'duration': duration, 310 | } 311 | 312 | with open(result_directory, 'w') as file: 313 | json.dump(result_dict, file) 314 | -------------------------------------------------------------------------------- /dataset_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import openml 4 | import pandas as pd 5 | 6 | 7 | suite = openml.study.get_suite(218) 8 | task_ids = suite.tasks 9 | 10 | dataset_table = { 11 | 'Task Id': [], 12 | 'Dataset Name': [], 13 | 'Number of examples': [], 14 | 'Number of features': [], 15 | 'Majority class percentage': [], 16 | 'Minority class percentage': [], 17 | } 18 | 19 | for task_id in task_ids: 20 | task = openml.tasks.get_task(task_id, download_data=False) 21 | dataset = openml.datasets.get_dataset(task.dataset_id, download_data=False) 22 | dataset_table['Task Id'].append(task_id) 23 | dataset_table['Dataset Name'].append(dataset.name) 24 | dataset_table['Number of examples'].append(dataset.qualities['NumberOfInstances']) 25 | dataset_table['Number of features'].append(dataset.qualities['NumberOfFeatures']) 26 | dataset_table['Majority class percentage'].append(f"{dataset.qualities['MajorityClassPercentage']:.3f}") 27 | dataset_table['Minority class percentage'].append(f"{dataset.qualities['MinorityClassPercentage']:.3f}") 28 | 29 | output_path = os.path.expanduser( 30 | os.path.join( 31 | '~', 32 | 'Desktop', 33 | 'dataset_collection.csv' 34 | ) 35 | ) 36 | 37 | dataset_info_frame = pd.DataFrame.from_dict(dataset_table) 38 | dataset_info_frame.to_csv(output_path, index=False) 39 | -------------------------------------------------------------------------------- /figures/all_baselines_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machinelearningnuremberg/WellTunedSimpleNets/54058460d5b587bc84107c200e6f1c44755a87e0/figures/all_baselines_diagram.png -------------------------------------------------------------------------------- /results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import List, Tuple 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pandas as pd 8 | import openml 9 | from scipy.stats import wilcoxon, rankdata 10 | import seaborn as sns 11 | 12 | 13 | sns.set( 14 | rc={ 15 | 'figure.figsize': (11.7, 8.27), 16 | 'font.size': 35, 17 | 'axes.titlesize': 35, 18 | 'axes.labelsize': 35, 19 | 'xtick.labelsize': 35, 20 | 'ytick.labelsize': 35, 21 | }, 22 | style="white" 23 | ) 24 | 25 | 26 | def get_task_list( 27 | benchmark_task_file: str = 'path/to/tasks.txt', 28 | ) -> List[int]: 29 | """Get the task id list. 30 | 31 | Goes through the given file and collects all of the task 32 | ids. 33 | 34 | Args: 35 | benchmark_task_file (str): 36 | A string to the path of the benchmark task file. Including 37 | the task file name. 38 | 39 | Returns: 40 | benchmark_task_ids (List[int]): 41 | A list of all the task ids for the benchmark. 42 | """ 43 | with open(os.path.join(benchmark_task_file), 'r') as f: 44 | benchmark_info_str = f.readline() 45 | benchmark_task_ids = [int(task_id) for task_id in benchmark_info_str.split(' ')] 46 | 47 | return benchmark_task_ids 48 | 49 | 50 | # TODO merge all the build_table functions 51 | def build_table_from_autopytorch_data( 52 | output_dir: str, 53 | benchmark_task_file: str, 54 | seed: int = 11, 55 | ): 56 | """ 57 | Stores the final performance for the autopytorch algorithm on every dataset 58 | to a csv file in the output_dir. 59 | 60 | Args: 61 | output_dir (str): The output directory where the results are stored. 62 | benchmark_task_file (str): The path where the benchmark txt file is located. 63 | seed (int): The seed used for the experiment. 64 | """ 65 | experiment_table = { 66 | 'Task Id': [], 67 | 'Test Performance': [], 68 | } 69 | benchmark_task_ids = get_task_list(benchmark_task_file) 70 | for task_id in benchmark_task_ids: 71 | task_dir = os.path.join( 72 | output_dir, 73 | f'{seed}', 74 | f'{task_id}', 75 | 'final_result.json' 76 | ) 77 | 78 | try: 79 | with open(task_dir, 'r') as fp: 80 | task_performance_info = json.load(fp) 81 | task_test_performance = task_performance_info['test balanced accuracy'] 82 | experiment_table['Task Id'].append(task_id) 83 | experiment_table['Test Performance'].append(task_test_performance) 84 | except FileNotFoundError: 85 | print(f'Refit for task id:{task_id} not found') 86 | 87 | 88 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 89 | df_dir = os.path.join( 90 | output_dir, 91 | 'results.csv', 92 | ) 93 | experiment_df.to_csv(df_dir, index=False) 94 | 95 | 96 | def build_table_from_node_data( 97 | output_dir: str, 98 | benchmark_task_file: str, 99 | seed: int = 11, 100 | ): 101 | """ 102 | Stores the final performance for the node algorithm on every dataset 103 | to a csv file in the output_dir. 104 | 105 | Args: 106 | output_dir (str): The output directory where the results are stored. 107 | benchmark_task_file (str): The path where the benchmark txt file is located. 108 | seed (int): The seed used for the experiment. 109 | """ 110 | experiment_table = { 111 | 'Task Id': [], 112 | 'Test Performance': [], 113 | } 114 | benchmark_task_ids = get_task_list(benchmark_task_file) 115 | for task_id in benchmark_task_ids: 116 | task_dir = os.path.join( 117 | output_dir, 118 | f'{seed}', 119 | f'{task_id}', 120 | 'results.json' 121 | ) 122 | 123 | try: 124 | with open(task_dir, 'r') as fp: 125 | task_performance_info = json.load(fp) 126 | task_test_performance = task_performance_info['test balanced accuracy'] 127 | experiment_table['Task Id'].append(task_id) 128 | experiment_table['Test Performance'].append(task_test_performance) 129 | except FileNotFoundError: 130 | print(f'Refit for task id:{task_id} not found') 131 | 132 | 133 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 134 | df_dir = os.path.join( 135 | output_dir, 136 | 'results.csv', 137 | ) 138 | experiment_df.to_csv(df_dir, index=False) 139 | 140 | 141 | def build_table_from_tabnet_data( 142 | output_dir: str, 143 | benchmark_task_file: str, 144 | seed: int = 11, 145 | ): 146 | """ 147 | Stores the final performance for the TabNet algorithm on every dataset 148 | to a csv file in the output_dir. 149 | 150 | Args: 151 | output_dir (str): The output directory where the results are stored. 152 | benchmark_task_file (str): The path where the benchmark txt file is located. 153 | seed (int): The seed used for the experiment. 154 | """ 155 | experiment_table = { 156 | 'Task Id': [], 157 | 'Test Performance': [], 158 | } 159 | benchmark_task_ids = get_task_list(benchmark_task_file) 160 | for task_id in benchmark_task_ids: 161 | task_dir = os.path.join( 162 | output_dir, 163 | f'{task_id}', 164 | f'{seed}', 165 | 'refit_results.json' 166 | ) 167 | 168 | try: 169 | with open(task_dir, 'r') as fp: 170 | task_performance_info = json.load(fp) 171 | task_test_performance = task_performance_info['test_accuracy'] 172 | experiment_table['Task Id'].append(task_id) 173 | experiment_table['Test Performance'].append(task_test_performance) 174 | except FileNotFoundError: 175 | print(f'Refit for task id:{task_id} not found') 176 | experiment_table['Task Id'].append(task_id) 177 | experiment_table['Test Performance'].append(-1) 178 | 179 | 180 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 181 | df_dir = os.path.join( 182 | output_dir, 183 | 'results.csv', 184 | ) 185 | experiment_df.to_csv(df_dir, index=False) 186 | 187 | 188 | def build_table_from_autogluon_data( 189 | output_dir: str, 190 | benchmark_task_file: str, 191 | seed: int = 11, 192 | ): 193 | """ 194 | Stores the final performance for the AutoGluon algorithm on every dataset 195 | to a csv file in the output_dir. 196 | 197 | Args: 198 | output_dir (str): The output directory where the results are stored. 199 | benchmark_task_file (str): The path where the benchmark txt file is located. 200 | seed (int): The seed used for the experiment. 201 | """ 202 | experiment_table = { 203 | 'Task Id': [], 204 | 'Test Performance': [], 205 | } 206 | benchmark_task_ids = get_task_list(benchmark_task_file) 207 | for task_id in benchmark_task_ids: 208 | task_dir = os.path.join( 209 | output_dir, 210 | f'{seed}', 211 | f'{task_id}', 212 | 'results.csv', 213 | ) 214 | 215 | try: 216 | performance_df = pd.read_csv(task_dir) 217 | score = performance_df['score'].to_numpy() 218 | score = score[0] 219 | except FileNotFoundError: 220 | print(f'No results for task id:{task_id}') 221 | score = -1 222 | experiment_table['Task Id'].append(task_id) 223 | experiment_table['Test Performance'].append(score) 224 | 225 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 226 | df_dir = os.path.join( 227 | output_dir, 228 | 'results.csv', 229 | ) 230 | experiment_df.to_csv(df_dir, index=False) 231 | 232 | 233 | def build_table_from_cocktails_data( 234 | output_dir: str, 235 | benchmark_task_file: str, 236 | seed: int = 11, 237 | ): 238 | """ 239 | Stores the final performance for the old autopytorch algorithm on every 240 | dataset to a csv file in the output_dir. 241 | 242 | Args: 243 | output_dir (str): The output directory where the results are stored. 244 | benchmark_task_file (str): The path where the benchmark txt file is located. 245 | seed (int): The seed used for the experiment. 246 | """ 247 | experiment_table = { 248 | 'Task Id': [], 249 | 'Duration': [], 250 | } 251 | benchmark_task_ids = get_task_list(benchmark_task_file) 252 | for task_id in benchmark_task_ids: 253 | task_dir = os.path.join( 254 | output_dir, 255 | '512', 256 | f'{task_id}', 257 | 'refit_run', 258 | f'{seed}', 259 | 'run_results.txt', 260 | ) 261 | if not os.path.exists(task_dir): 262 | task_dir = os.path.join( 263 | output_dir, 264 | '512', 265 | f'{task_id}', 266 | 'run_results.txt', 267 | ) 268 | 269 | try: 270 | with open(task_dir, 'r') as fp: 271 | task_performance_info = json.load(fp) 272 | task_performance = float(task_performance_info['mean_test_bal_acc']) 273 | experiment_table['Task Id'].append(task_id) 274 | experiment_table['Test Performance'].append(task_performance) 275 | except FileNotFoundError: 276 | print(f'Refit for task id:{task_id} not found') 277 | experiment_table['Task Id'].append(task_id) 278 | experiment_table['Test Performance'].append(-1) 279 | 280 | 281 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 282 | df_dir = os.path.join( 283 | output_dir, 284 | 'results.csv', 285 | ) 286 | experiment_df.to_csv(df_dir, index=False) 287 | 288 | 289 | def generate_times_from_autopytorch_data( 290 | output_dir: str, 291 | benchmark_task_file: str, 292 | seed: int = 11, 293 | ): 294 | """ 295 | Stores the duration for the autopytorch algorithm on every dataset 296 | to a csv file in the output_dir. 297 | 298 | Args: 299 | output_dir (str): The output directory where the results are stored. 300 | benchmark_task_file (str): The path where the benchmark txt file is located. 301 | seed (int): The seed used for the experiment. 302 | """ 303 | experiment_table = { 304 | 'Task Id': [], 305 | 'Duration': [], 306 | } 307 | benchmark_task_ids = get_task_list(benchmark_task_file) 308 | for task_id in benchmark_task_ids: 309 | task_dir = os.path.join( 310 | output_dir, 311 | f'{seed}', 312 | f'{task_id}', 313 | 'final_result.json' 314 | ) 315 | 316 | try: 317 | with open(task_dir, 'r') as fp: 318 | task_performance_info = json.load(fp) 319 | task_duration = float(task_performance_info['duration']) 320 | experiment_table['Task Id'].append(task_id) 321 | experiment_table['Duration'].append(task_duration) 322 | except FileNotFoundError: 323 | print(f'Refit for task id:{task_id} not found') 324 | experiment_table['Task Id'].append(task_id) 325 | experiment_table['Duration'].append(-1) 326 | 327 | 328 | experiment_df = pd.DataFrame.from_dict(experiment_table, orient='columns') 329 | df_dir = os.path.join( 330 | output_dir, 331 | 'durations.csv', 332 | ) 333 | experiment_df.to_csv(df_dir, index=False) 334 | 335 | 336 | def build_all_table( 337 | result_dir: str, 338 | ): 339 | """Generates a table with all the baselines and their final performances on every 340 | dataset. 341 | 342 | Args: 343 | result_dir (str): The results folder where the data for every baseline is organized. 344 | 345 | Returns: 346 | output (pd.DataFrame): The DataFrame with all the baseline final results. 347 | 348 | Note: 349 | The folder structure should be result_dir/baseline_name/results.csv, where results.csv 350 | corresponds to a table with the performance of the baseline of every task. 351 | """ 352 | method_folders = [ 353 | 'plain_network', 354 | 'dropout', 355 | 'selu', 356 | 'XGBoost/ES', 357 | 'neurips_xgboost_es', 358 | 'neurips_xgboost_no_es', 359 | 'catboost_v2', 360 | 'XGBoost/No ES', 361 | 'autosklearn', 362 | 'tabnet/ES', 363 | 'autogluon_only_hpo', 364 | 'tabnet/No ES', 365 | 'node', 366 | 'autogluon/nn_only_4_days', 367 | 'autogluon/full_4_days', 368 | 'cocktail', 369 | 'new_cocktail', 370 | 'search_cocktail', 371 | ] 372 | 373 | pretty_names = { 374 | 'autogluon/nn_only_4_days': ' AutoGL. + Stacking', 375 | 'autogluon/full_4_days': 'Full AutoGL', 376 | 'autogluon_only_hpo': 'AutoGL. + HPO', 377 | 'cocktail': ' MLP + C ', 378 | 'new_cocktail': 'SMAC MLP + C ', 379 | 'search_cocktail': 'Search Smac + C', 380 | 'plain_network': ' MLP ', 381 | 'dropout': ' MLP + D ', 382 | 'node': ' NODE ', 383 | 'tabnet/ES': ' TabN. + ES ', 384 | 'XGBoost/ES': ' XGB. + ES ', 385 | 'tabnet/No ES': ' TabN. ', 386 | 'XGBoost/No ES': ' XGB. ', 387 | 'autosklearn': ' ASK-G. ', 388 | 'selu': 'MLP + S', 389 | 'catboost_v2': 'CatBoost', 390 | 'neurips_xgboost_es': 'XGB. + ES + ENC', 391 | 'neurips_xgboost_no_es': 'XGB. + ENC', 392 | } 393 | 394 | pandas_frames = [] 395 | drop_task_ids = False 396 | for method in method_folders: 397 | method_results = os.path.join(result_dir, method) 398 | method_df = pd.read_csv(os.path.join(method_results, 'results.csv')) 399 | method_df.columns = ['Task Id', pretty_names[method]] 400 | if drop_task_ids: 401 | method_df = method_df.drop(labels=['Task Id'], axis=1) 402 | else: 403 | drop_task_ids = True 404 | pandas_frames.append(method_df) 405 | output = pd.concat(pandas_frames, join='outer', axis=1) 406 | task_infos = [] 407 | for task_id in output['Task Id']: 408 | task = openml.tasks.get_task(task_id, download_data=False) 409 | dataset = openml.datasets.get_dataset(task.dataset_id, download_data=False) 410 | task_info = f'{dataset.qualities["NumberOfInstances"]}/{dataset.qualities["NumberOfFeatures"]}' 411 | task_infos.append(task_info) 412 | 413 | # uncomment if you want to add dataset information in the form of instances 414 | # and features 415 | # output.insert(loc=1, column='Size (Ins./Feat.)', value=task_infos) 416 | # output['Task Id'] = output['Task Id'].apply(lambda x: openml.datasets.get_dataset(openml.tasks.get_task(x, download_data=False).dataset_id, download_data=False).name) 417 | output = output.fillna(-1) 418 | 419 | # skipping the task information columns to format in a pretty way 420 | # the baseline result columns. 421 | method_columns = output.columns[1:] 422 | 423 | # only keep precision up to 3 numbers after the dot/comma 424 | for column in method_columns: 425 | output[column] = output[column].apply(lambda x: f'{x * 100:.3f}' if x != -1 else np.nan) 426 | 427 | return output 428 | 429 | 430 | def compare_models( 431 | result_dir: str, 432 | baseline: str = 'autogluon/full_4_days', 433 | cocktails: str = 'autopytorch', 434 | ): 435 | """Compare a baseline with the regularization cocktail. 436 | 437 | The method will print the necessary information based on the results. 438 | The results should be stored on a certain format: 439 | result_dir/baseline_name/results.csv 440 | 441 | Args: 442 | result_dir (str): The directory where the results are stored. 443 | baseline (str): The baseline name. 444 | cocktails (str): The main method name. 445 | """ 446 | baseline_results = os.path.join( 447 | result_dir, 448 | baseline, 449 | 'results.csv', 450 | ) 451 | cocktail_results = os.path.join( 452 | result_dir, 453 | cocktails, 454 | 'results.csv', 455 | ) 456 | 457 | cocktails_df = pd.read_csv(cocktail_results) 458 | baseline_df = pd.read_csv(baseline_results) 459 | 460 | task_ids = list(cocktails_df['Task Id']) 461 | task_ids = [int(task_id) for task_id in task_ids] 462 | 463 | cocktail_performances = [] 464 | baseline_performances = [] 465 | cocktail_wins = 0 466 | cocktail_looses = 0 467 | cocktail_draws = 0 468 | 469 | for task_id in task_ids: 470 | cocktail_task_performance = cocktails_df.query(f'`Task Id`=={task_id}')['Test Performance'] 471 | cocktail_task_performance = cocktail_task_performance.to_numpy()[0] 472 | baseline_task_performance = baseline_df.query(f'`Task Id`=={task_id}')['Test Performance'] 473 | baseline_task_performance = baseline_task_performance.to_numpy()[0] 474 | 475 | # if a task has not finished for the baseline, do not use it 476 | # in the comparison against the regularization cocktail. 477 | if baseline_task_performance != -1.0: 478 | cocktail_performances.append(cocktail_task_performance) 479 | baseline_performances.append(baseline_task_performance) 480 | if cocktail_task_performance > baseline_task_performance: 481 | cocktail_wins += 1 482 | elif cocktail_task_performance == baseline_task_performance: 483 | cocktail_draws += 1 484 | else: 485 | cocktail_looses += 1 486 | _, p_value = wilcoxon(cocktail_performances, baseline_performances) 487 | 488 | print(f'Cocktail against {baseline}, ' 489 | f'wins {cocktail_wins}, ' 490 | f'looses {cocktail_looses}, ' 491 | f'draws {cocktail_draws}') 492 | print(f'Wilxocon p-value {p_value}') 493 | 494 | 495 | def build_cd_diagram( 496 | results_dir: str, 497 | ) -> pd.DataFrame: 498 | """Prepare the results for a critical difference diagram. 499 | This function prepares all the results into a pandas dataframe 500 | so that it can be used to create a critical difference diagram 501 | of all the methods. 502 | 503 | Args: 504 | results_dir (str): The directory where the results are stored. 505 | 506 | Returns: 507 | result_df (pd.DataFrame): 508 | The DataFrame that contains the final results for all the baselines 509 | in a format that can be used as an input for hte cd-diagram plot. 510 | 511 | Note: 512 | The folder structure should be result_dir/baseline_name/results.csv, where results.csv 513 | corresponds to a table with the performance of the baseline of every task. 514 | """ 515 | method_folders = [ 516 | 'plain_network', 517 | 'dropout', 518 | 'XGBoost/ES', 519 | 'XGBoost/No ES', 520 | 'autosklearn', 521 | 'autogluon_only_hpo', 522 | 'tabnet/ES', 523 | 'tabnet/No ES', 524 | 'node', 525 | 'selu', 526 | 'autogluon/nn_only_4_days', 527 | 'autogluon/full_4_days', 528 | 'cocktail', 529 | 'new_cocktail', 530 | 'search_cocktail', 531 | 'catboost_v2', 532 | 'neurips_xgboost_es', 533 | 'neurips_xgboost_no_es', 534 | ] 535 | 536 | pretty_names = { 537 | 'autogluon/nn_only_4_days': ' AutoGL. S', 538 | 'autogluon/full_4_days': 'Full AutoGL', 539 | 'autogluon_only_hpo': 'AutoGL. HPO', 540 | 'cocktail': ' MLP + C ', 541 | 'new_cocktail': 'SMAC MLP + C ', 542 | 'search_cocktail': 'Search Smac + C', 543 | 'plain_network': ' MLP ', 544 | 'dropout': ' MLP + D ', 545 | 'node': ' NODE ', 546 | 'tabnet/ES': ' TabN. + ES ', 547 | 'XGBoost/ES': ' XGB. + ES ', 548 | 'tabnet/No ES': ' TabN. ', 549 | 'XGBoost/No ES': ' XGB. ', 550 | 'autosklearn': ' ASK-G. ', 551 | 'selu': 'MLP + SELU', 552 | 'catboost_v2': 'CatBoost', 553 | 'neurips_xgboost_es': 'XGB. + ES + ENC', 554 | 'neurips_xgboost_no_es': 'XGB. + ENC', 555 | } 556 | 557 | table_results = { 558 | 'Network': [], 559 | 'Task Id': [], 560 | 'Balanced Accuracy': [], 561 | } 562 | 563 | search_results = os.path.join(results_dir, 'cocktail') 564 | search_df = pd.read_csv(os.path.join(search_results, 'results.csv')) 565 | task_ids = list(search_df['Task Id']) 566 | task_ids = [int(task_id) for task_id in task_ids] 567 | 568 | for method in method_folders: 569 | method_results = os.path.join(results_dir, method) 570 | method_df = pd.read_csv(os.path.join(method_results, 'results.csv')) 571 | method_df.columns = ['Task Id', pretty_names[method]] 572 | for index, row in method_df.iterrows(): 573 | if int(row['Task Id']) in task_ids: 574 | table_results['Network'].append(pretty_names[method]) 575 | table_results['Task Id'].append(row['Task Id']) 576 | accuracy = row[pretty_names[method]] 577 | table_results['Balanced Accuracy'].append(accuracy if accuracy != -1 else np.nan) 578 | 579 | result_df = pd.DataFrame(data=table_results) 580 | 581 | return result_df 582 | 583 | 584 | def generate_ranks_data( 585 | all_data: pd.DataFrame, 586 | ) -> pd.DataFrame: 587 | """Generate the ranks of the baselines for every dataset. 588 | 589 | Args: 590 | all_data (pd.DataFrame): 591 | A dataframe where each row consists of tasks values 592 | across different models. 593 | 594 | Returns: 595 | ranks_df (pd.DataFrame): 596 | A dataframe of the ranks of all methods over 597 | the different tasks. 598 | """ 599 | all_ranked_data = [] 600 | all_data.drop(columns=['Task Id'], inplace=True) 601 | column_names = all_data.columns 602 | for row in all_data.itertuples(index=False): 603 | task_regularization_data = list(row) 604 | task_regularization_data = [float(x) for x in task_regularization_data] 605 | 606 | task_ranked_data = rankdata( 607 | task_regularization_data, 608 | method='average', 609 | ) 610 | reversed_data = len(task_ranked_data) + 1 - task_ranked_data 611 | all_ranked_data.append(reversed_data) 612 | ranks_df = pd.DataFrame(all_ranked_data, columns=column_names) 613 | 614 | return ranks_df 615 | 616 | 617 | def patch_violinplot(): 618 | """Patch seaborn's violinplot in current axis 619 | to workaround matplotlib's bug ##5423.""" 620 | from matplotlib.collections import PolyCollection 621 | ax = plt.gca() 622 | for art in ax.get_children(): 623 | if isinstance(art, PolyCollection): 624 | art.set_edgecolor((0.3, 0.3, 0.3)) 625 | 626 | 627 | def generate_ranks_comparison( 628 | all_data: pd.DataFrame, 629 | ): 630 | """Generate a ranks comparison between all methods. 631 | Creates a violin plot that showcases the ranks that 632 | the different methods achieve over all the tasks/datasets 633 | and saves it in the current executing folder. 634 | 635 | Args: 636 | all_data (pd.DataFrame): 637 | A dataframe where each row consists of method 638 | ranks over a certain task. 639 | """ 640 | all_data_ranked = generate_ranks_data(all_data) 641 | all_data = pd.melt( 642 | all_data_ranked, 643 | value_vars=all_data.columns, 644 | var_name='Method', 645 | value_name='Rank', 646 | ) 647 | 648 | fig, _ = plt.subplots() 649 | sns.violinplot(x='Method', y='Rank', linewidth=3, data=all_data, cut=0, kind='violin') 650 | patch_violinplot() 651 | plt.title('Ranks of the baselines and the MLP + C') 652 | plt.xlabel("") 653 | # plt.xticks(rotation=60) 654 | plt.tick_params( 655 | axis='x', # changes apply to the x-axis 656 | which='both', # both major and minor ticks are affected 657 | top=False, 658 | bottom=True, 659 | # ticks along the top edge are off 660 | ) 661 | fig.autofmt_xdate() 662 | plt.savefig( 663 | 'violin_ranks.pdf', 664 | bbox_inches='tight', 665 | pad_inches=0.15, 666 | margins=0.1, 667 | ) 668 | 669 | 670 | def plot_models_error_rate( 671 | result_dir, 672 | baseline, 673 | cocktails, 674 | ): 675 | """Plot a comparison of the models and generate descriptive 676 | statistics based on the results of all the models. 677 | Generates plots which showcase the gain of the cocktail versus 678 | the baseline. (Plots the error rate of the baseline divided 679 | by the error rate of the cocktail.) Furthermore, it 680 | generates information regarding the wins, looses and draws 681 | of both methods, including a significance result. Saves the 682 | plot to the current folder. 683 | 684 | Args: 685 | baseline_dir (str): 686 | The directory where the results are located for the baseline 687 | methods. 688 | cocktail_dir (str): 689 | The directory where the results are located for the regularization 690 | cocktails. 691 | """ 692 | pretty_names = { 693 | 'cocktail': 'MLP + C', 694 | 'autogluon/nn_only_4_days': 'AutoGL. S', 695 | 'XGBoost/No ES': 'XGB.', 696 | 'autosklearn': 'ASK-G.', 697 | } 698 | cocktail_error_rates = [] 699 | baseline_error_rates = [] 700 | 701 | baseline_results = os.path.join( 702 | result_dir, 703 | baseline, 704 | 'results.csv', 705 | ) 706 | cocktail_results = os.path.join( 707 | result_dir, 708 | cocktails, 709 | 'results.csv', 710 | ) 711 | cocktails_df = pd.read_csv(cocktail_results) 712 | baseline_df = pd.read_csv(baseline_results) 713 | 714 | task_ids = list(cocktails_df['Task Id']) 715 | for task_id in task_ids: 716 | cocktail_task_performance = cocktails_df.query(f'`Task Id`=={task_id}')['Test Performance'] 717 | cocktail_task_performance = cocktail_task_performance.to_numpy()[0] 718 | baseline_task_performance = baseline_df.query(f'`Task Id`=={task_id}')['Test Performance'] 719 | baseline_task_performance = baseline_task_performance.to_numpy()[0] 720 | 721 | cocktail_task_result_error = 1 - cocktail_task_performance 722 | benchmark_task_result_error = 1 - baseline_task_performance 723 | cocktail_error_rates.append(cocktail_task_result_error) 724 | baseline_error_rates.append(benchmark_task_result_error) 725 | 726 | fig, ax = plt.subplots() 727 | plt.scatter(baseline_error_rates, cocktail_error_rates, s=100, c='#273E47', label='Test Error Rate') 728 | lims = [ 729 | np.min([0, 0]), # min of both axes 730 | np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes 731 | ] 732 | yticks = ax.yaxis.get_major_ticks() 733 | yticks[0].set_visible(False) 734 | # now plot both limits against eachother 735 | ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0, color='r') 736 | ax.set_aspect('equal') 737 | ax.set_xlim(lims) 738 | ax.set_ylim(lims) 739 | plt.xlabel(f"{pretty_names[baseline]} Error Rate") 740 | plt.ylabel(f"{pretty_names[cocktails]} Error Rate") 741 | 742 | plt.tick_params( 743 | axis='x', # changes apply to the x-axis 744 | which='both', # both major and minor ticks are affected 745 | top=False, 746 | bottom=True, 747 | # ticks along the top edge are off 748 | ) 749 | plt.tick_params( 750 | axis='y', 751 | which='both', 752 | left=True, 753 | right=False, 754 | ) 755 | 756 | # plt.title("Comparison with XGBoost") 757 | plt.savefig( 758 | f'cocktail_vs_{pretty_names[baseline]}.pdf', 759 | bbox_inches='tight', 760 | pad_inches=0.15, 761 | margins=0.1, 762 | ) 763 | 764 | 765 | def incumbent_time_dataset( 766 | result_path: str, 767 | dataset_id: int, 768 | seed: int, 769 | max_number_configs: int = 840, 770 | method: str = 'cocktail', 771 | ) -> Tuple[float, int]: 772 | """Return the time needed to find the incumbent configuration 773 | given a maximal number of configurations for a certain dataset 774 | and a certain algorithm. 775 | 776 | Args: 777 | result_path (str): The path of the folder where the results are 778 | stored. 779 | dataset_id (int): The task id- 780 | seed (int): The seed used for the experiment. 781 | max_number_configs (int): The maximal number of configurations. 782 | method (str): The method name. 783 | 784 | Returns: 785 | incumbent_time, incumbent_index (Tuple[float, int]): 786 | A tuple with the time needed to find the incumbent configuration 787 | and the index of the incumbent configuration. 788 | """ 789 | if method == 'cocktail': 790 | task_result_folder = os.path.expanduser( 791 | os.path.join( 792 | result_path, 793 | f'{dataset_id}', 794 | 'hpo_run', 795 | f'{seed}', 796 | ) 797 | ) 798 | else: 799 | task_result_folder = os.path.expanduser( 800 | os.path.join( 801 | result_path, 802 | f'{dataset_id}', 803 | f'{seed}', 804 | ) 805 | ) 806 | 807 | index = 0 808 | incumbent_accuracy = 0 809 | start_time = None 810 | incumbent_time = None 811 | incumbent_index = None 812 | x_times = [] 813 | y_accuracies = [] 814 | 815 | with open(os.path.join(task_result_folder, 'results.json')) as result_file: 816 | for line in result_file: 817 | config_info = json.loads(line) 818 | # config_id 819 | _ = config_info[0] 820 | job_stats = config_info[2] 821 | started = job_stats['started'] 822 | finished = job_stats['finished'] 823 | 824 | if index == 0: 825 | start_time = started 826 | try: 827 | result_info = config_info[3]['info'] 828 | except Exception: 829 | print(f'Worked Died problem') 830 | 831 | if method == 'cocktail': 832 | validation_curve = result_info[0]['val_balanced_accuracy'] 833 | validation_accuracy = validation_curve[-1] 834 | else: 835 | validation_accuracy = result_info['val_accuracy'] 836 | 837 | 838 | if validation_accuracy > incumbent_accuracy: 839 | incumbent_accuracy = validation_accuracy 840 | incumbent_time = finished - start_time 841 | incumbent_index = index 842 | 843 | index += 1 844 | 845 | estimated_time = finished - start_time 846 | x_times.append(estimated_time) 847 | y_accuracies.append(incumbent_accuracy) 848 | 849 | if index == max_number_configs: 850 | print("Max number of configs reached") 851 | break 852 | 853 | return incumbent_time, incumbent_index 854 | 855 | 856 | def runtime_dataset( 857 | result_path: str, 858 | dataset_id: int, 859 | seed: int, 860 | max_number_configs: int = 840, 861 | method: str = 'cocktail', 862 | ) -> float: 863 | """Return the time needed to perform the HPO search 864 | given a maximal number of configurations for a certain 865 | dataset and a certain algorithm. 866 | 867 | Args: 868 | result_path (str): The path of the folder where the results are 869 | stored. 870 | dataset_id (int): The task id- 871 | seed (int): The seed used for the experiment. 872 | max_number_configs (int): The maximal number of configurations. 873 | method (str): The method name. 874 | 875 | Returns: 876 | estimated_time (float): 877 | The time elapsed for the HPO search. 878 | """ 879 | if method == 'cocktail': 880 | task_result_folder = os.path.expanduser( 881 | os.path.join( 882 | result_path, 883 | f'{dataset_id}', 884 | 'hpo_run', 885 | f'{seed}', 886 | ) 887 | ) 888 | else: 889 | task_result_folder = os.path.expanduser( 890 | os.path.join( 891 | result_path, 892 | f'{dataset_id}', 893 | f'{seed}', 894 | ) 895 | ) 896 | 897 | index = 0 898 | start_time = None 899 | 900 | with open(os.path.join(task_result_folder, 'results.json')) as result_file: 901 | for line in result_file: 902 | config_info = json.loads(line) 903 | job_stats = config_info[2] 904 | started = job_stats['started'] 905 | finished = job_stats['finished'] 906 | 907 | if index == 0: 908 | start_time = started 909 | 910 | estimated_time = finished - start_time 911 | index += 1 912 | 913 | if index == max_number_configs: 914 | print("Max number of configs reached") 915 | break 916 | 917 | return estimated_time 918 | 919 | 920 | def generate_cocktail_vs_xgboost_incumbent_times( 921 | cocktail_folder: str, 922 | baseline_folder: str, 923 | baseline_name: str, 924 | benchmark_task_file: str, 925 | ): 926 | """Generate the cocktail vs XGBoost incumbent times 927 | information. 928 | 929 | Generates information regarding the cocktail vs xgboost time 930 | performance and saves a plot with the time distributions of what 931 | every method took to find the incumbent configuration. 932 | 933 | Args: 934 | cocktail_folder (str): The path where the cocktail folder is located. 935 | baseline_folder (str): The path where the baseline results are located. 936 | baseline_name (str): The baseline name. 937 | benchmark_task_file (str): The benchmark task file path. 938 | """ 939 | task_ids = get_task_list(benchmark_task_file) 940 | cocktail_incumbent_task_times = [] 941 | xgboost_incumbent_task_times = [] 942 | info_dict = { 943 | 'Cocktail': [], 944 | 'XGBoost': [], 945 | } 946 | for task_id in task_ids: 947 | print(task_id) 948 | cocktail_task_time, cocktail_task_index = incumbent_time_dataset( 949 | cocktail_folder, 950 | task_id, 951 | 11, 952 | ) 953 | xgboost_task_time, xgboost_task_index = incumbent_time_dataset( 954 | baseline_folder, 955 | task_id, 956 | 11, 957 | method=baseline_name, 958 | ) 959 | cocktail_incumbent_task_times.append(cocktail_task_time) 960 | xgboost_incumbent_task_times.append(xgboost_task_time) 961 | info_dict['Cocktail'].append(cocktail_task_time) 962 | info_dict['XGBoost'].append(xgboost_task_time) 963 | 964 | print(f'Cocktail mean: {np.mean(cocktail_incumbent_task_times)}') 965 | print(f'Cocktail min: {np.min(cocktail_incumbent_task_times)}') 966 | print(f'XGBoost mean: {np.mean(xgboost_incumbent_task_times)}') 967 | print(f'Cocktail std: {np.std(cocktail_incumbent_task_times)}') 968 | print(f'XGBoost std: {np.std(xgboost_incumbent_task_times)}') 969 | info_frame = pd.DataFrame.from_dict(info_dict) 970 | 971 | sns.boxplot(data=info_frame) 972 | plt.ylabel('Time (seconds)') 973 | plt.tight_layout() 974 | plt.savefig('comparison_incumbents_times.pdf') 975 | 976 | 977 | def incumbent_performance_time_dataset( 978 | result_path: str, 979 | dataset_id: int, 980 | seed: int, 981 | max_number_configs: int = 840, 982 | method: str = 'cocktail', 983 | time: int = 3600, 984 | ) -> float: 985 | """Return the test accuracy of the incumbent configuration 986 | given a maximal number of configurations for a certain dataset 987 | and a certain algorithm for a given time marker. 988 | 989 | Args: 990 | result_path (str): The path of the folder where the results are 991 | stored. 992 | dataset_id (int): The task id- 993 | seed (int): The seed used for the experiment. 994 | max_number_configs (int): The maximal number of configurations. 995 | method (str): The method name. 996 | time (int): The time marker. 997 | 998 | Returns: 999 | incumbent_test_accuracy (float): 1000 | The incumbent test accuracy. 1001 | """ 1002 | if method == 'cocktail': 1003 | task_result_folder = os.path.expanduser( 1004 | os.path.join( 1005 | result_path, 1006 | f'{dataset_id}', 1007 | 'hpo_run', 1008 | f'{seed}', 1009 | ) 1010 | ) 1011 | else: 1012 | task_result_folder = os.path.expanduser( 1013 | os.path.join( 1014 | result_path, 1015 | f'{dataset_id}', 1016 | f'{seed}', 1017 | ) 1018 | ) 1019 | 1020 | index = 0 1021 | incumbent_val_accuracy = 0 1022 | incumbent_test_accuracy = 0 1023 | start_time = None 1024 | 1025 | with open(os.path.join(task_result_folder, 'results.json')) as result_file: 1026 | for line in result_file: 1027 | config_info = json.loads(line) 1028 | job_stats = config_info[2] 1029 | started = job_stats['started'] 1030 | finished = job_stats['finished'] 1031 | 1032 | # start the time 1033 | if index == 0: 1034 | start_time = started 1035 | 1036 | try: 1037 | result_info = config_info[3]['info'] 1038 | except Exception: 1039 | pass 1040 | # print(f'Worked Died problem') 1041 | 1042 | if method == 'cocktail': 1043 | validation_curve = result_info[0]['val_balanced_accuracy'] 1044 | validation_accuracy = validation_curve[-1] 1045 | test_curve = result_info[0]['test_result'] 1046 | test_accuracy = test_curve[-1] 1047 | else: 1048 | validation_accuracy = result_info['val_accuracy'] 1049 | test_accuracy = result_info['test_accuracy'] 1050 | 1051 | estimated_time = finished - start_time 1052 | if estimated_time >= time: 1053 | return incumbent_test_accuracy 1054 | 1055 | if validation_accuracy > incumbent_val_accuracy: 1056 | incumbent_val_accuracy = validation_accuracy 1057 | incumbent_test_accuracy = test_accuracy 1058 | 1059 | index += 1 1060 | 1061 | if index == max_number_configs: 1062 | # print("Max number of configs reached") 1063 | break 1064 | 1065 | return incumbent_test_accuracy 1066 | 1067 | def generate_performance_comparison_over_time( 1068 | cocktail_folder: str, 1069 | baseline_folder: str, 1070 | baseline_name: str, 1071 | benchmark_task_file: str, 1072 | ): 1073 | """Generate the cocktail vs XGBoost incumbent 1074 | performance over time information. 1075 | 1076 | Generates information regarding the cocktail vs xgboost time 1077 | performance and saves a plot with the average ranks of the 1078 | methods over time. 1079 | 1080 | Args: 1081 | cocktail_folder (str): The path where the cocktail folder is located. 1082 | baseline_folder (str): The path where the baseline results are located. 1083 | baseline_name (str): The baseline name. 1084 | benchmark_task_file (str): The benchmark task file path. 1085 | """ 1086 | task_ids = get_task_list(benchmark_task_file) 1087 | times = [900, 1800, 3600, 7200, 14400, 28800, 57600, 115200, 230400, 345600] 1088 | 1089 | cocktail_ranks_over_time = [] 1090 | cocktail_stds_over_time = [] 1091 | baseline_ranks_over_time = [] 1092 | baseline_stds_over_time = [] 1093 | 1094 | for time in times: 1095 | baseline_ranks = [] 1096 | cocktail_ranks = [] 1097 | 1098 | cocktail_wins = 0 1099 | cocktail_ties = 0 1100 | cocktail_loses = 0 1101 | cocktail_performances = [] 1102 | baseline_performances = [] 1103 | 1104 | for task_id in task_ids: 1105 | 1106 | cocktail_incumbent_performance = incumbent_performance_time_dataset( 1107 | cocktail_folder, 1108 | task_id, 1109 | 11, 1110 | time=time, 1111 | ) 1112 | baseline_incumbent_performance = incumbent_performance_time_dataset( 1113 | baseline_folder, 1114 | task_id, 1115 | 11, 1116 | method=baseline_name, 1117 | time=time, 1118 | ) 1119 | cocktail_performances.append(cocktail_incumbent_performance) 1120 | baseline_performances.append(baseline_incumbent_performance) 1121 | 1122 | if cocktail_incumbent_performance == 0 and baseline_incumbent_performance == 0: 1123 | continue 1124 | elif cocktail_incumbent_performance == 0: 1125 | cocktail_loses += 1 1126 | cocktail_ranks.append(2) 1127 | baseline_ranks.append(1) 1128 | continue 1129 | elif baseline_incumbent_performance == 0: 1130 | cocktail_wins += 1 1131 | cocktail_ranks.append(1) 1132 | baseline_ranks.append(2) 1133 | continue 1134 | 1135 | if cocktail_incumbent_performance > baseline_incumbent_performance: 1136 | cocktail_wins += 1 1137 | cocktail_ranks.append(1) 1138 | baseline_ranks.append(2) 1139 | elif cocktail_incumbent_performance == baseline_incumbent_performance: 1140 | cocktail_ties += 1 1141 | cocktail_ranks.append(1.5) 1142 | baseline_ranks.append(1.5) 1143 | else: 1144 | cocktail_loses += 1 1145 | cocktail_ranks.append(2) 1146 | baseline_ranks.append(1) 1147 | 1148 | _, p_value = wilcoxon(cocktail_performances, baseline_performances) 1149 | cocktail_ranks_over_time.append(np.mean(cocktail_ranks)) 1150 | cocktail_stds_over_time.append(np.std(cocktail_ranks)) 1151 | baseline_ranks_over_time.append(np.mean(baseline_ranks)) 1152 | baseline_stds_over_time.append(np.std(baseline_ranks)) 1153 | print(f'For a runtime of {time / 3600} hours, The cocktails won: {cocktail_wins} times, tied: {cocktail_ties} times, lost: {cocktail_loses} times\np_value: {p_value}') 1154 | 1155 | plt.plot([time / 3600 for time in times], cocktail_ranks_over_time, label='MLP + C average rank') 1156 | plt.plot([time / 3600 for time in times], baseline_ranks_over_time, label=f'XGBoost average rank') 1157 | plt.legend() 1158 | plt.xlabel('Time (Hours)') 1159 | plt.ylabel('Average Rank') 1160 | plt.tight_layout() 1161 | plt.savefig('average_time_ranks.pdf') 1162 | -------------------------------------------------------------------------------- /utilities.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Any, Callable, Dict, Optional, Tuple 3 | 4 | import ConfigSpace 5 | import pandas as pd 6 | from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates 7 | 8 | import dask.distributed 9 | 10 | import openml 11 | import numpy as np 12 | 13 | from sklearn.model_selection import train_test_split 14 | 15 | from smac.intensification.simple_intensifier import SimpleIntensifier 16 | from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost 17 | from smac.scenario.scenario import Scenario 18 | from smac.facade.smac_ac_facade import SMAC4AC 19 | from smac.runhistory.runhistory import RunHistory 20 | 21 | 22 | def get_data( 23 | task_id: int, 24 | val_share: float = 0.25, 25 | test_size: float = 0.2, 26 | seed: int = 11, 27 | ) -> Tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray, dict, np.ndarray]: 28 | """ 29 | Given a task id and split size information, return 30 | the dataset splits based on a seed for the main algorithm 31 | to use. 32 | 33 | Args: 34 | task_id (int): 35 | The id of the task which will be used for the run. 36 | val_share (float): 37 | The validation split size from the train set. 38 | test_size (float): 39 | The test split size from the whole dataset. 40 | seed (int): 41 | The seed used for the dataset preparation. 42 | 43 | Returns: 44 | 45 | X_train, X_test, y_train, y_test, resampling_strategy_args, categorical indicator 46 | (tuple[pd.DataFrame, pd.DataFrame, np.ndarray, np.ndarray, dict, np.ndarray]): 47 | 48 | The train examples, the test examples, the train labels, the test labels, 49 | the resampling strategy to be used and the categorical indicator for the features. 50 | """ 51 | task = openml.tasks.get_task(task_id=task_id) 52 | dataset = task.get_dataset() 53 | X, y, categorical_indicator, _ = dataset.get_data( 54 | dataset_format='dataframe', 55 | target=dataset.default_target_attribute, 56 | ) 57 | 58 | # AutoPyTorch fails when it is given a y DataFrame with False and True values 59 | # and category as dtype. In its inner workings it uses sklearn which cannot 60 | # detect the column type. 61 | if isinstance(y[1], bool): 62 | y = y.astype('bool') 63 | 64 | # uncomment only for np.arrays 65 | """ 66 | # patch categorical values to string 67 | for index_nr, categorical_feature in enumerate(categorical_indicator): 68 | if categorical_feature: 69 | X[index_nr] = X[index_nr].astype("category") 70 | """ 71 | X_train, X_test, y_train, y_test = train_test_split( 72 | X, 73 | y, 74 | test_size=test_size, 75 | random_state=seed, 76 | stratify=y, 77 | shuffle=True, 78 | ) 79 | resampling_strategy_args = { 80 | 'val_share': val_share, 81 | } 82 | 83 | """ 84 | This was an earlier fix to the AutoPyTorch failures for imbalanced datasets. In particular 85 | having variables with only null values in the train set. Now this is handled inside AutoPyTorch. 86 | 87 | train_column_nan_info = X_train.isna().all() 88 | test_column_nan_info = X_test.isna().all() 89 | only_nan_columns = [label for label, value in train_column_nan_info.items() if value] 90 | test_nan_columns = [label for label, value in test_column_nan_info.items() if value] 91 | only_nan_columns.extend(test_nan_columns) 92 | only_nan_columns = set(only_nan_columns) 93 | X_train.drop(only_nan_columns, axis='columns', inplace=True) 94 | X_test.drop(only_nan_columns, axis='columns', inplace=True) 95 | """ 96 | # TODO turn this into a dictionary 97 | 98 | return X_train, X_test, y_train, y_test, resampling_strategy_args, categorical_indicator 99 | 100 | 101 | def get_smac_object( 102 | scenario_dict: Dict[str, Any], 103 | seed: int, 104 | ta: Callable, 105 | ta_kwargs: Dict[str, Any], 106 | n_jobs: int, 107 | initial_budget: int, 108 | max_budget: int, 109 | dask_client: Optional[dask.distributed.Client], 110 | ) -> SMAC4AC: 111 | """ 112 | This function returns an SMAC object that is gonna be used as 113 | optimizer of pipelines. 114 | 115 | Args: 116 | scenario_dict (typing.Dict[str, typing.Any]): constrain on how to run 117 | the jobs. 118 | seed (int): to make the job deterministic. 119 | ta (typing.Callable): the function to be intensified by smac. 120 | ta_kwargs (typing.Dict[str, typing.Any]): Arguments to the above ta. 121 | n_jobs (int): Amount of cores to use for this task. 122 | initial_budget (int): 123 | The initial budget for a configuration. 124 | max_budget (int): 125 | The maximal budget for a configuration. 126 | dask_client (dask.distributed.Client): User provided scheduler. 127 | 128 | Returns: 129 | (SMAC4AC): sequential model algorithm configuration object 130 | """ 131 | # multi-fidelity is disabled, that is why initial_budget and max_budget 132 | # are not used. 133 | rh2EPM = RunHistory2EPM4LogCost 134 | 135 | return SMAC4AC( 136 | scenario=Scenario(scenario_dict), 137 | rng=seed, 138 | runhistory2epm=rh2EPM, 139 | tae_runner=ta, 140 | tae_runner_kwargs=ta_kwargs, 141 | initial_configurations=None, 142 | run_id=seed, 143 | intensifier=SimpleIntensifier, 144 | dask_client=dask_client, 145 | n_jobs=n_jobs, 146 | ) 147 | 148 | 149 | def get_updates_for_regularization_cocktails( 150 | categorical_indicator: np.ndarray, 151 | args: Namespace, 152 | ) -> Tuple[Dict, HyperparameterSearchSpaceUpdates, Dict]: 153 | """ 154 | These updates replicate the regularization cocktail paper search space. 155 | 156 | Args: 157 | categorical_indicator (np.ndarray) 158 | An array that indicates whether a feature is categorical or not. 159 | args (Namespace): 160 | The different updates for the setup of the run, mostly updates 161 | for the different regularization ingredients. 162 | 163 | Returns: 164 | ________ 165 | pipeline_update, search_space_updates, include_updates (Tuple[dict, HyperparameterSearchSpaceUpdates, dict]): 166 | The pipeline updates like number of epochs, budget, seed etc. 167 | The search space updates like setting different hps to different values or ranges. 168 | Lastly include updates, which can be used to include different features. 169 | """ 170 | augmentation_names_to_trainers = { 171 | 'mixup': 'MixUpTrainer', 172 | 'cutout': 'RowCutOutTrainer', 173 | 'cutmix': 'RowCutMixTrainer', 174 | 'standard': 'StandardTrainer', 175 | 'adversarial': 'AdversarialTrainer', 176 | } 177 | 178 | include_updates = dict() 179 | include_updates['network_embedding'] = ['NoEmbedding'] 180 | include_updates['network_init'] = ['NoInit'] 181 | 182 | has_cat_features = any(categorical_indicator) 183 | has_numerical_features = not all(categorical_indicator) 184 | search_space_updates = HyperparameterSearchSpaceUpdates() 185 | 186 | # architecture head 187 | search_space_updates.append( 188 | node_name='network_head', 189 | hyperparameter='__choice__', 190 | value_range=['no_head'], 191 | default_value='no_head', 192 | ) 193 | search_space_updates.append( 194 | node_name='network_head', 195 | hyperparameter='no_head:activation', 196 | value_range=['relu'], 197 | default_value='relu', 198 | ) 199 | 200 | # backbone architecture 201 | search_space_updates.append( 202 | node_name='network_backbone', 203 | hyperparameter='__choice__', 204 | value_range=['ShapedResNetBackbone'], 205 | default_value='ShapedResNetBackbone', 206 | ) 207 | search_space_updates.append( 208 | node_name='network_backbone', 209 | hyperparameter='ShapedResNetBackbone:resnet_shape', 210 | value_range=['brick'], 211 | default_value='brick', 212 | ) 213 | search_space_updates.append( 214 | node_name='network_backbone', 215 | hyperparameter='ShapedResNetBackbone:num_groups', 216 | value_range=[2], 217 | default_value=2, 218 | ) 219 | search_space_updates.append( 220 | node_name='network_backbone', 221 | hyperparameter='ShapedResNetBackbone:blocks_per_group', 222 | value_range=[2], 223 | default_value=2, 224 | ) 225 | search_space_updates.append( 226 | node_name='network_backbone', 227 | hyperparameter='ShapedResNetBackbone:output_dim', 228 | value_range=[512], 229 | default_value=512, 230 | ) 231 | search_space_updates.append( 232 | node_name='network_backbone', 233 | hyperparameter='ShapedResNetBackbone:max_units', 234 | value_range=[512], 235 | default_value=512, 236 | ) 237 | search_space_updates.append( 238 | node_name='network_backbone', 239 | hyperparameter='ShapedResNetBackbone:activation', 240 | value_range=['relu'], 241 | default_value='relu', 242 | ) 243 | search_space_updates.append( 244 | node_name='network_backbone', 245 | hyperparameter='ShapedResNetBackbone:shake_shake_update_func', 246 | value_range=['even-even'], 247 | default_value='even-even', 248 | ) 249 | 250 | # training updates 251 | search_space_updates.append( 252 | node_name='lr_scheduler', 253 | hyperparameter='__choice__', 254 | value_range=['CosineAnnealingWarmRestarts'], 255 | default_value='CosineAnnealingWarmRestarts', 256 | ) 257 | search_space_updates.append( 258 | node_name='lr_scheduler', 259 | hyperparameter='CosineAnnealingWarmRestarts:n_restarts', 260 | value_range=[3], 261 | default_value=3, 262 | ) 263 | search_space_updates.append( 264 | node_name='optimizer', 265 | hyperparameter='__choice__', 266 | value_range=['AdamWOptimizer'], 267 | default_value='AdamWOptimizer', 268 | ) 269 | search_space_updates.append( 270 | node_name='optimizer', 271 | hyperparameter='AdamWOptimizer:lr', 272 | value_range=[1e-3], 273 | default_value=1e-3, 274 | ) 275 | search_space_updates.append( 276 | node_name='data_loader', 277 | hyperparameter='batch_size', 278 | value_range=[128], 279 | default_value=128, 280 | ) 281 | 282 | # preprocessing 283 | search_space_updates.append( 284 | node_name='feature_preprocessor', 285 | hyperparameter='__choice__', 286 | value_range=['NoFeaturePreprocessor'], 287 | default_value='NoFeaturePreprocessor', 288 | ) 289 | 290 | if has_numerical_features: 291 | search_space_updates.append( 292 | node_name='imputer', 293 | hyperparameter='numerical_strategy', 294 | value_range=['median'], 295 | default_value='median', 296 | ) 297 | search_space_updates.append( 298 | node_name='scaler', 299 | hyperparameter='__choice__', 300 | value_range=['StandardScaler'], 301 | default_value='StandardScaler', 302 | ) 303 | 304 | if has_cat_features: 305 | search_space_updates.append( 306 | node_name='imputer', 307 | hyperparameter='categorical_strategy', 308 | value_range=['constant_!missing!'], 309 | default_value='constant_!missing!', 310 | ) 311 | search_space_updates.append( 312 | node_name='encoder', 313 | hyperparameter='__choice__', 314 | value_range=['OneHotEncoder'], 315 | default_value='OneHotEncoder', 316 | ) 317 | 318 | search_space_updates.append( 319 | node_name='optimizer', 320 | hyperparameter='AdamWOptimizer:beta1', 321 | value_range=[0.9], 322 | default_value=0.9, 323 | ) 324 | search_space_updates.append( 325 | node_name='optimizer', 326 | hyperparameter='AdamWOptimizer:beta2', 327 | value_range=[0.999], 328 | default_value=0.999, 329 | ) 330 | 331 | # if the cash formulation of the cocktail is not activated, 332 | # otherwise the methods activation will be chosen by the SMBO optimizer. 333 | if not args.cash_cocktail: 334 | # regularization ingredients updates 335 | search_space_updates.append( 336 | node_name='network_backbone', 337 | hyperparameter='ShapedResNetBackbone:use_dropout', 338 | value_range=args.use_dropout, 339 | default_value=args.use_dropout[0], 340 | ) 341 | search_space_updates.append( 342 | node_name='network_backbone', 343 | hyperparameter='ShapedResNetBackbone:use_batch_norm', 344 | value_range=args.use_batch_normalization, 345 | default_value=args.use_batch_normalization[0], 346 | ) 347 | search_space_updates.append( 348 | node_name='network_backbone', 349 | hyperparameter='ShapedResNetBackbone:use_skip_connection', 350 | value_range=args.use_skip_connection, 351 | default_value=args.use_skip_connection[0], 352 | ) 353 | 354 | multi_branch_choice = [args.mb_choice] 355 | 356 | search_space_updates.append( 357 | node_name='network_backbone', 358 | hyperparameter='ShapedResNetBackbone:multi_branch_choice', 359 | value_range=multi_branch_choice, 360 | default_value=multi_branch_choice[0], 361 | ) 362 | 363 | search_space_updates.append( 364 | node_name='optimizer', 365 | hyperparameter='AdamWOptimizer:use_weight_decay', 366 | value_range=args.use_weight_decay, 367 | default_value=args.use_weight_decay[0], 368 | ) 369 | 370 | trainer_choice = [augmentation_names_to_trainers[args.augmentation]] 371 | 372 | search_space_updates.append( 373 | node_name='trainer', 374 | hyperparameter='__choice__', 375 | value_range=trainer_choice, 376 | default_value=trainer_choice[0], 377 | ) 378 | 379 | search_space_updates.append( 380 | node_name='trainer', 381 | hyperparameter=f'{trainer_choice[0]}:weighted_loss', 382 | value_range=[1], 383 | default_value=1, 384 | ) 385 | search_space_updates.append( 386 | node_name='trainer', 387 | hyperparameter=f'{trainer_choice[0]}:use_lookahead_optimizer', 388 | value_range=args.use_lookahead, 389 | default_value=args.use_lookahead[0], 390 | ) 391 | search_space_updates.append( 392 | node_name='trainer', 393 | hyperparameter=f'{trainer_choice[0]}:use_stochastic_weight_averaging', 394 | value_range=args.use_swa, 395 | default_value=args.use_swa[0], 396 | ) 397 | search_space_updates.append( 398 | node_name='trainer', 399 | hyperparameter=f'{trainer_choice[0]}:use_snapshot_ensemble', 400 | value_range=args.use_se, 401 | default_value=args.use_se[0], 402 | ) 403 | 404 | # No early stopping and train on gpu 405 | pipeline_update = { 406 | 'early_stopping': -1, 407 | 'min_epochs': args.epochs, 408 | 'epochs': args.epochs, 409 | "device": 'cpu', 410 | } 411 | 412 | return pipeline_update, search_space_updates, include_updates 413 | 414 | 415 | def get_incumbent_results( 416 | run_history_file: str, 417 | search_space: ConfigSpace.ConfigurationSpace, 418 | ) -> Tuple[ConfigSpace.Configuration, float]: 419 | """ 420 | Get the incumbent configuration and performance from the previous run HPO 421 | search with AutoPytorch. 422 | 423 | Args: 424 | run_history_file (str): 425 | The path where the AutoPyTorch search data is located. 426 | search_space (ConfigSpace.ConfigurationSpace): 427 | The ConfigurationSpace that was previously used for the HPO 428 | search space. 429 | 430 | Returns: 431 | config, incumbent_run_value (Tuple[ConfigSpace.Configuration, float]): 432 | The incumbent configuration found from HPO search and the validation 433 | performance it achieved. 434 | 435 | """ 436 | run_history = RunHistory() 437 | run_history.load_json( 438 | run_history_file, 439 | search_space, 440 | ) 441 | 442 | run_history_data = run_history.data 443 | sorted_runvalue_by_cost = sorted(run_history_data.items(), key=lambda item: item[1].cost) 444 | incumbent_run_key, incumbent_run_value = sorted_runvalue_by_cost[0] 445 | config = run_history.ids_config[incumbent_run_key.config_id] 446 | return config, incumbent_run_value 447 | --------------------------------------------------------------------------------