├── COPYING ├── environment.yaml ├── readme.md ├── rgpe ├── __init__.py ├── exploring_openml.py ├── methods │ ├── GCPplusPrior.py │ ├── __init__.py │ ├── ablr.py │ ├── kl_weighting.py │ ├── noisy_ei.py │ ├── rgpe.py │ ├── rmogp.py │ ├── taf.py │ ├── tstr.py │ └── warmstarting_ac.py ├── test_functions.py └── utils.py └── scripts ├── generate_commands.py ├── install.sh └── run_benchmark.py /COPYING: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Matthias Feurer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: rgpe │····· 2 | channels: │····· 3 | - defaults │····· 4 | dependencies: │····· 5 | - _libgcc_mutex=0.1=main │····· 6 | - attrs=19.3.0=py_0 │····· 7 | - backcall=0.2.0=py_0 │····· 8 | - binutils_impl_linux-64=2.33.1=he6710b0_7 │····· 9 | - binutils_linux-64=2.33.1=h9595d00_15 │····· 10 | - blas=1.0=mkl │····· 11 | - bleach=3.1.5=py_0 │····· 12 | - ca-certificates=2020.6.24=0 │····· 13 | - certifi=2020.6.20=py37_0 │····· 14 | - cycler=0.10.0=py37_0 │····· 15 | - cython=0.29.13=py37he6710b0_0 │····· 16 | - dbus=1.13.16=hb2f20db_0 │····· 17 | - decorator=4.4.2=py_0 │····· 18 | - defusedxml=0.6.0=py_0 │····· 19 | - entrypoints=0.3=py37_0 │····· 20 | - expat=2.2.9=he6710b0_2 │····· 21 | - fontconfig=2.13.0=h9420a91_0 │····· 22 | - freetype=2.10.2=h5ab3b9f_0 │····· 23 | - gcc_impl_linux-64=7.3.0=habb00fd_1 │····· 24 | - gcc_linux-64=7.3.0=h553295d_15 │····· 25 | - glib=2.65.0=h3eb4bd4_0 │····· 26 | - gst-plugins-base=1.14.0=hbbd80ab_1 │····· 27 | - gstreamer=1.14.0=hb31296c_0 │····· 28 | - gxx_impl_linux-64=7.3.0=hdf63c60_1 │····· 29 | - gxx_linux-64=7.3.0=h553295d_15 30 | - icu=58.2=he6710b0_3 │····· 31 | - importlib-metadata=1.7.0=py37_0 │····· 32 | - importlib_metadata=1.7.0=0 │····· 33 | - intel-openmp=2020.1=217 │····· 34 | - ipykernel=5.3.4=py37h5ca1d4c_0 │····· 35 | - ipython=7.16.1=py37h5ca1d4c_0 │····· 36 | - ipython_genutils=0.2.0=py37_0 │····· 37 | - ipywidgets=7.5.1=py_0 │····· 38 | - jedi=0.17.2=py37_0 │····· 39 | - jinja2=2.11.2=py_0 │····· 40 | - joblib=0.16.0=py_0 │····· 41 | - jpeg=9b=h024ee3a_2 │····· 42 | - jsonschema=3.2.0=py37_1 │····· 43 | - jupyter=1.0.0=py37_7 │····· 44 | - jupyter_client=6.1.6=py_0 │····· 45 | - jupyter_console=6.1.0=py_0 │····· 46 | - jupyter_core=4.6.3=py37_0 │····· 47 | - kiwisolver=1.2.0=py37hfd86e86_0 │····· 48 | - ld_impl_linux-64=2.33.1=h53a641e_7 │····· 49 | - libedit=3.1.20191231=h14c3975_1 │····· 50 | - libffi=3.3=he6710b0_2 │····· 51 | - libgcc-ng=9.1.0=hdf63c60_0 │····· 52 | - libgfortran-ng=7.3.0=hdf63c60_0 │····· 53 | - libpng=1.6.37=hbc83047_0 │····· 54 | - libsodium=1.0.18=h7b6447c_0 │····· 55 | - libstdcxx-ng=9.1.0=hdf63c60_0 │····· 56 | - libuuid=1.0.3=h1bed415_2 │····· 57 | - libxcb=1.14=h7b6447c_0 │····· 58 | - libxml2=2.9.10=he19cac6_1 │····· 59 | - markupsafe=1.1.1=py37h14c3975_1 │····· 60 | - matplotlib=3.2.2=0 │····· 61 | - matplotlib-base=3.2.2=py37hef1b27d_0 │····· 62 | - mistune=0.8.4=py37h14c3975_1001 │····· 63 | - mkl=2020.1=217 │····· 64 | - mkl-service=2.3.0=py37he904b0f_0 │····· 65 | - mkl_fft=1.1.0=py37h23d657b_0 │····· 66 | - mkl_random=1.1.1=py37h0573a6f_0 │····· 67 | - nbconvert=5.6.1=py37_1 │····· 68 | - nbformat=5.0.7=py_0 │····· 69 | - ncurses=6.2=he6710b0_1 │····· 70 | - notebook=6.0.3=py37_0 │····· 71 | - numpy=1.18.1=py37h4f9e942_0 │····· 72 | - numpy-base=1.18.1=py37hde5b4d6_1 73 | - openssl=1.1.1g=h7b6447c_0 │····· 74 | - packaging=20.4=py_0 │····· 75 | - pandas=0.25.3=py37he6710b0_0 │····· 76 | - pandoc=2.10=0 │····· 77 | - pandocfilters=1.4.2=py37_1 │····· 78 | - parso=0.7.0=py_0 │····· 79 | - pcre=8.44=he6710b0_0 │····· 80 | - pexpect=4.8.0=py37_1 │····· 81 | - pickleshare=0.7.5=py37_1001 │····· 82 | - pip=20.1.1=py37_1 │····· 83 | - prometheus_client=0.8.0=py_0 │····· 84 | - prompt-toolkit=3.0.5=py_0 │····· 85 | - prompt_toolkit=3.0.5=0 │····· 86 | - ptyprocess=0.6.0=py37_0 │····· 87 | - pygments=2.6.1=py_0 │····· 88 | - pyparsing=2.4.7=py_0 │····· 89 | - pyqt=5.9.2=py37h05f1152_2 │····· 90 | - pyrsistent=0.16.0=py37h7b6447c_0 │····· 91 | - python=3.7.7=hcff3b4d_5 │····· 92 | - python-dateutil=2.8.1=py_0 │····· 93 | - pytz=2020.1=py_0 │····· 94 | - pyzmq=19.0.1=py37he6710b0_1 │····· 95 | - qt=5.9.7=h5867ecd_1 │····· 96 | - qtconsole=4.7.5=py_0 │····· 97 | - qtpy=1.9.0=py_0 │····· 98 | - readline=8.0=h7b6447c_0 │····· 99 | - scikit-learn=0.22.1=py37hd81dba3_0 │····· 100 | - scipy=1.4.1=py37h0b6359f_0 │····· 101 | - send2trash=1.5.0=py37_0 │····· 102 | - setuptools=49.2.0=py37_0 │····· 103 | - sip=4.19.8=py37hf484d3e_0 │····· 104 | - six=1.15.0=py_0 │····· 105 | - sqlite=3.32.3=h62c20be_0 │····· 106 | - swig=3.0.12=h38cdd7d_3 │····· 107 | - terminado=0.8.3=py37_0 │····· 108 | - testpath=0.4.4=py_0 │····· 109 | - tk=8.6.10=hbc83047_0 │····· 110 | - tornado=6.0.4=py37h7b6447c_1 │····· 111 | - traitlets=4.3.3=py37_0 │····· 112 | - wcwidth=0.2.5=py_0 │····· 113 | - webencodings=0.5.1=py37_1 │····· 114 | - wheel=0.34.2=py37_0 │····· 115 | - widgetsnbextension=3.5.1=py37_0 116 | - xz=5.2.5=h7b6447c_0 │····· 117 | - zeromq=4.3.2=he6710b0_2 │····· 118 | - zipp=3.1.0=py_0 │····· 119 | - zlib=1.2.11=h7b6447c_3 │····· 120 | - pip: │····· 121 | - alabaster==0.7.12 │····· 122 | - babel==2.8.0 │····· 123 | - botorch==0.2.5 │····· 124 | - chardet==3.0.4 │····· 125 | - configspace==0.4.11 │····· 126 | - docutils==0.16 │····· 127 | - emcee==3.0.2 │····· 128 | - future==0.18.2 │····· 129 | - gpytorch==1.1.1 │····· 130 | - hpolib2==0.0.1 │····· 131 | - idna==2.10 │····· 132 | - imagesize==1.2.0 │····· 133 | - lazy-import==0.2.2 │····· 134 | - lockfile==0.12.2 │····· 135 | - pillow==7.2.0 │····· 136 | - psutil==5.7.2 │····· 137 | - pyaml==20.4.0 │····· 138 | - pydoe==0.3.8 │····· 139 | - pynisher==0.5.0 │····· 140 | - pyrfr==0.8.0 │····· 141 | - pyyaml==5.3.1 │····· 142 | - requests==2.24.0 │····· 143 | - scikit-optimize==0.7.4 │····· 144 | - smac==0.12.3 │····· 145 | - snowballstemmer==2.0.0 │····· 146 | - sphinx==3.1.2 │····· 147 | - sphinx-gallery==0.5.0 │····· 148 | - sphinx-rtd-theme==0.5.0 │····· 149 | - sphinxcontrib-applehelp==1.0.2 │····· 150 | - sphinxcontrib-devhelp==1.0.2 │····· 151 | - sphinxcontrib-htmlhelp==1.0.3 │····· 152 | - sphinxcontrib-jsmath==1.0.1 │····· 153 | - sphinxcontrib-qthelp==1.0.3 │····· 154 | - sphinxcontrib-serializinghtml==1.1.4 │····· 155 | - torch==1.5.0+cpu │····· 156 | - torchvision==0.6.0+cpu │····· 157 | - urllib3==1.25.10 158 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Practical Transfer Learning for Bayesian Optimization 2 | 3 | Code accompanying 4 | 5 | Practical Transfer Learning for Bayesian Optimization 6 | Matthias Feurer, Benjamin Letham, Frank Hutter and Eytan Bakshy 7 | https://arxiv.org/pdf/1802.02219v3.pdf 8 | 9 | All code is developed using Python 3.7 and SMAC3 v0.12.3. The exact versions of the software 10 | used are given in environment.yaml. 11 | 12 | ## Guide to the code 13 | 14 | ### scripts/generate_commands.py 15 | 16 | Generates the commands for running experiments. See bottom of this file for usage. 17 | 18 | ### scripts/run_benchmark.py 19 | 20 | Main script. Configures SMAC to use the actual transfer learning methods. Then it applies SMAC 21 | to the chosen benchmark function and outputs a `.json` file containing the results 22 | 23 | ### scripts/install.sh 24 | 25 | Installation file used to setup the conda environment. We cannot guarantee that this leads to 26 | the exact same environment that we used for our experiments. 27 | 28 | ### rgpe/methods 29 | 30 | Contains the actual implementations of all methods used throughout the paper: 31 | 32 | * ablr.py: Perrone et al., NeurIPS 2019 33 | * GCPplusPrior.py: Salinas et al., ICML 2020 34 | * kl_weighting.py: Ramachandran et al., ECML 2019 35 | * noisy_ei.py: Letham et al., Bayesian Analysis, 2019 36 | * rgpe.py: This paper 37 | * rmogp.py: This paper 38 | * taf.py: Wistuba et al., Machine Learning, 2018 39 | * tstr.py: Wistuba et al., ECML 2016 40 | * warmstarting_ac.py: Lindauer et al., AAAI 2018 41 | 42 | ### rgpe/test_functions.py 43 | 44 | Implementation of all test functions used throughout the paper. Required data is either downloaded 45 | from the internet (for surrogates based on OpenML data), or needs to be downloaded manually 46 | (AdaBoost, SVM, LCBench). 47 | 48 | To run the LCBench benchmark, close the [LCBench repository](https://github.com/automl/LCBench/) 49 | and set the paths in the class `NNGrid` to point to where you cloned the repository to, and the 50 | directories to where you downloaded the LCBench data 51 | (see [here](https://github.com/automl/LCBench/#downloading-the-data) for downloading the data). 52 | 53 | ### rgpe/adaboost 54 | 55 | AdaBoost data from 56 | [Schilling et al.](https://github.com/nicoschilling/ECML2016/tree/master/data/adaboost). Please 57 | download these files from Nico's repository and place them here. 58 | 59 | ### rgpe/svm 60 | 61 | SVM data from [Schilling et al.](https://github.com/nicoschilling/ECML2016/tree/master/data/svm). 62 | Please download the files from Nico's repository and place them here. 63 | 64 | ### rgpe/utils.py 65 | 66 | Helper functions for obtaining Gaussian process objects, conducting Sobol sequence construction 67 | and computing expected improvement. 68 | 69 | ## Example calls 70 | 71 | ### RGPE 72 | 73 | ``` 74 | python scripts/run_benchmark_smac.py --benchmark adaboost --method rgpe --seed 5 --task 20 \ 75 | --empirical-meta-configs --learned-initial-design copula --weight-dilution-strategy probabilistic-ld \ 76 | --sampling-mode bootstrap --normalization Copula --num-posterior-samples 1000 --n-init 1 \ 77 | --output-file results/adaboost/rgpe-bootstrap-Copula-probabilistic-ld-NoisyEI-1000--gpmetadata-learnedinit-1/5_50_20.json \ 78 | --variance-mode average --acquisition-function-name 30 --target-model-incumbent False 79 | ``` 80 | 81 | ### TAF 82 | 83 | ``` 84 | python scripts/run_benchmark_smac.py --benchmark adaboost --method taf --seed 8 --task 47 \ 85 | --empirical-meta-configs --learned-initial-design unscaled --bandwidth 0.1 \ 86 | --weighting-mode tstr --n-init 1 --normalization None --weight_dilution_strategy None \ 87 | --output-file results/adaboost/taf-tstr-None-None-0.100000-gpmetadata-learnedinit-2/8_50_47.json 88 | ``` 89 | 90 | ## How to reproduce the experiments 91 | 92 | 1. Install everything 93 | 2. Run only GP(MAP) to obtain data to warmstart transfer learning methods with. To obtain 94 | commands for doing so run `python generate_commands.py --benchmark adaboost --setup None` 95 | 3. Run (almost) everything else. To obtain the commands for doing so run 96 | `python generate_commands.py --benchmark adaboost --setup -gpmetadata-learnedinit` 97 | 4. Finally, also run some methods which do not contain a learned initialization: 98 | `python generate_commands.py --benchmark adaboost --setup -gpmetadata` 99 | 100 | -------------------------------------------------------------------------------- /rgpe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/transfer-hpo-framework/60a2146c53b2489015576878946ec638d868d990/rgpe/__init__.py -------------------------------------------------------------------------------- /rgpe/methods/GCPplusPrior.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Union 2 | 3 | from ConfigSpace import Configuration 4 | import numpy as np 5 | from scipy.stats import norm 6 | import torch.nn as nn 7 | import torch 8 | 9 | from smac.configspace import convert_configurations_to_array 10 | from smac.epm.base_epm import AbstractEPM 11 | from smac.optimizer.acquisition import AbstractAcquisitionFunction 12 | from rgpe.utils import get_gaussian_process, copula_transform 13 | 14 | 15 | class GCPplusPrior(AbstractEPM): 16 | 17 | def __init__( 18 | self, 19 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 20 | **kwargs 21 | ): 22 | """ 23 | Gaussian Copula Process plus prior from "A Quantile-based Approach for Hyperparameter 24 | Transfer Learning" by Salinas, Shen and Perrone, ICML 2020, 25 | https://proceedings.icml.cc/static/paper_files/icml/2020/4367-Paper.pdf 26 | 27 | This is a re-implementation that is not based on the original code which can be found at 28 | https://github.com/geoalgo/A-Quantile-based-Approach-for-Hyperparameter-Transfer-Learning 29 | 30 | Parameters 31 | ---------- 32 | training_data 33 | Dictionary containing the training data for each meta-task. Mapping from an integer ( 34 | task ID) to a dictionary, which is a mapping from configuration to performance. 35 | """ 36 | 37 | if kwargs.get('instance_features') is not None: 38 | raise NotImplementedError() 39 | super().__init__(**kwargs) 40 | self.training_data = training_data 41 | 42 | self.categorical_mask = np.array(self.types) > 0 43 | self.n_categories = np.sum(self.types) 44 | 45 | torch.manual_seed(self.seed) 46 | self.rng = np.random.RandomState(self.seed) 47 | 48 | X_train = [] 49 | y_train = [] 50 | for task in training_data: 51 | Y = training_data[task]['y'] 52 | y_scaled = copula_transform(Y) 53 | configs = training_data[task]['configurations'] 54 | X = convert_configurations_to_array(configs) 55 | for x, y in zip(X, y_scaled): 56 | X_train.append(x) 57 | y_train.append(y) 58 | X_train = np.array(X_train) 59 | X_train = self._preprocess(X_train) 60 | y_train = np.array(y_train) 61 | 62 | class NLLHLoss(nn.Module): 63 | 64 | def forward(self, input, target): 65 | # Assuming network outputs var 66 | std = torch.log(1 + torch.exp(input[:, 1])) + 10e-12 67 | mu = input[:, 0].view(-1, 1) 68 | 69 | # Pytorch Normal indeed takes the standard deviation as argument 70 | n = torch.distributions.normal.Normal(mu, std) 71 | loss = n.log_prob(target) 72 | return -torch.mean(loss) 73 | 74 | # TODO we could add embeddings for categorical hyperparameters here to improve performance? 75 | model = torch.nn.Sequential( 76 | torch.nn.Linear(X_train.shape[1], 50).float(), 77 | torch.nn.Dropout(0.1), 78 | torch.nn.ReLU(), 79 | torch.nn.Linear(50, 50).float(), 80 | torch.nn.Dropout(0.1), 81 | torch.nn.ReLU(), 82 | torch.nn.Linear(50, 50).float(), 83 | torch.nn.Dropout(0.1), 84 | torch.nn.ReLU(), 85 | torch.nn.Linear(50, 2).float(), 86 | ) 87 | loss_fn = NLLHLoss() 88 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 89 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, 90 | step_size=1000, 91 | gamma=0.2) 92 | for iter in range(3000): 93 | 94 | batch = self.rng.choice(len(X_train), size=64) 95 | x_batch = torch.tensor(X_train[batch]).float() 96 | y_batch = torch.tensor(y_train[batch]).float() 97 | 98 | y_pred = model(x_batch) 99 | 100 | # Compute and print loss. 101 | loss = loss_fn(y_pred, y_batch) 102 | if iter % 100 == 99: 103 | print(iter, loss.item()) 104 | 105 | optimizer.zero_grad() 106 | loss.backward() 107 | optimizer.step() 108 | scheduler.step() 109 | self.prior = model 110 | 111 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 112 | X = self._preprocess(X) 113 | prior_prediction = self.prior.forward(torch.tensor(X).float()).detach().numpy() 114 | prior_mean = prior_prediction[:, 0].flatten() 115 | prior_std = prior_prediction[:, 1].flatten() 116 | prior_std = np.log(1 + np.exp(prior_std)) + 10e-12 117 | 118 | y_scaled = copula_transform(Y).flatten() 119 | residual = (y_scaled - prior_mean) / prior_std 120 | 121 | self.target_model = get_gaussian_process( 122 | bounds=self.bounds, 123 | types=self.types, 124 | configspace=self.configspace, 125 | rng=self.rng, 126 | kernel=None, 127 | ) 128 | self.target_model._train(X, residual) 129 | 130 | return self 131 | 132 | def predict(self, X: np.ndarray, cov_return_type: str = 'diagonal_cov') -> Tuple[np.ndarray, np.ndarray]: 133 | X = self._preprocess(X) 134 | prior_prediction = self.prior.forward(torch.tensor(X).float()).detach().numpy() 135 | prior_mean = prior_prediction[:, 0] 136 | prior_std = prior_prediction[:, 1] 137 | prior_std = (np.log(1 + np.exp(prior_std)) + 10e-12) 138 | gp_mean, gp_var = self.target_model._predict(X) 139 | mean_x = gp_mean * prior_std + prior_mean 140 | covar_x = np.sqrt(gp_var) * prior_std 141 | return mean_x.reshape((-1, 1)), covar_x.reshape((-1, 1)) 142 | 143 | def _preprocess(self, X: np.ndarray) -> np.ndarray: 144 | """Perform one-hot-encoding of categorical hyperparameters.""" 145 | categories_array = np.zeros((X.shape[0], self.n_categories)) 146 | categories_idx = 0 147 | for idx in range(len(self.types)): 148 | if self.types[idx] == 0: 149 | continue 150 | else: 151 | for j in range(self.types[idx]): 152 | mask = X[:, idx] == j 153 | categories_array[mask, categories_idx] = 1 154 | categories_idx += 1 155 | numerical_array = X[:, ~self.categorical_mask] 156 | X = np.concatenate((numerical_array, categories_array), axis=1) 157 | X[np.isnan(X)] = -1.0 158 | return X 159 | 160 | 161 | class CustomEI(AbstractAcquisitionFunction): 162 | """EI for residual GP as defined in Section 4.2 of Salinas et al.""" 163 | 164 | def __init__(self, model: AbstractEPM): 165 | 166 | super().__init__(model) 167 | self.eta = None 168 | self._required_updates = ('model', 'eta') 169 | 170 | def _compute(self, X: np.ndarray) -> np.ndarray: 171 | if len(X.shape) == 1: 172 | X = X[:, np.newaxis] 173 | 174 | m, v = self.model.predict_marginalized_over_instances(X) 175 | s = np.sqrt(v) 176 | 177 | if self.eta is None: 178 | raise ValueError('No current best specified. Call update(' 179 | 'eta=) to inform the acquisition function ' 180 | 'about the current best value.') 181 | 182 | def calculate_f(): 183 | z = (self.eta - m) / v 184 | return v * (z * norm.cdf(z) + norm.pdf(z)) 185 | 186 | if np.any(s == 0.0): 187 | # if std is zero, we have observed x on all instances 188 | # using a RF, std should be never exactly 0.0 189 | # Avoid zero division by setting all zeros in s to one. 190 | # Consider the corresponding results in f to be zero. 191 | self.logger.warning("Predicted std is 0.0 for at least one sample.") 192 | s_copy = np.copy(s) 193 | s[s_copy == 0.0] = 1.0 194 | f = calculate_f() 195 | f[s_copy == 0.0] = 0.0 196 | else: 197 | f = calculate_f() 198 | if (f < 0).any(): 199 | raise ValueError( 200 | "Expected Improvement is smaller than 0 for at least one " 201 | "sample.") 202 | return f 203 | -------------------------------------------------------------------------------- /rgpe/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/transfer-hpo-framework/60a2146c53b2489015576878946ec638d868d990/rgpe/methods/__init__.py -------------------------------------------------------------------------------- /rgpe/methods/ablr.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Union 2 | 3 | from ConfigSpace import Configuration 4 | import numpy as np 5 | import scipy.optimize 6 | import torch.nn as nn 7 | import torch 8 | 9 | from smac.configspace import convert_configurations_to_array 10 | from smac.epm.base_epm import AbstractEPM 11 | 12 | from rgpe.utils import copula_transform 13 | 14 | D = 50 # Hidden layer size 15 | 16 | precision = 32 17 | if precision == 32: 18 | t_dtype = torch.float32 19 | np_dtype = np.float32 20 | else: 21 | t_dtype = torch.float64 22 | np_dtype = np.float64 23 | 24 | 25 | class Net(torch.nn.Module): 26 | """ 27 | Implementation of the Adaptive Bayesian Linear Regression (ABLR) for multi-task 28 | hyperparameter optimization. 29 | 30 | For details see https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf 31 | 32 | This class implements the neural network. For the class connecting it to SMAC see below.""" 33 | 34 | def __init__(self, num_tasks, n_attributes, meta_data=None, use_copula_transform=False): 35 | 36 | self.num_tasks = num_tasks 37 | self.n_attributes = n_attributes 38 | self.meta_data = meta_data 39 | self.use_copula_transform = use_copula_transform 40 | 41 | self.mean_ = None 42 | self.std_ = None 43 | 44 | super().__init__() 45 | self.total_n_params = 0 46 | 47 | hidden1 = nn.Linear(self.n_attributes, D) 48 | hidden2 = nn.Linear(D, D) 49 | hidden3 = nn.Linear(D, D) 50 | self.layers = [ 51 | hidden1, hidden2, hidden3 52 | ] 53 | if precision == 32: 54 | self.layers = [layer.float() for layer in self.layers] 55 | else: 56 | self.layers = [layer.double() for layer in self.layers] 57 | 58 | # initialization of alpha and beta 59 | # Instead of alpha, we model 1/alpha and use a different range for the values 60 | # (i.e. 1e-6 to 1 instead of 1 to 1e6) 61 | self.alpha_t = torch.tensor([1] * self.num_tasks, requires_grad=True, dtype=t_dtype) 62 | self.total_n_params += len(self.alpha_t) 63 | self.beta_t = torch.tensor([1e3] * self.num_tasks, requires_grad=True, dtype=t_dtype) 64 | self.total_n_params += len(self.beta_t) 65 | 66 | # initialization of the weights 67 | for layer in self.layers: 68 | torch.nn.init.xavier_normal_(layer.weight) 69 | if len(layer.weight.shape) == 1: 70 | size = layer.weight.shape[0] 71 | else: 72 | size = layer.weight.shape[0] * layer.weight.shape[1] 73 | self.total_n_params += size 74 | 75 | # initialize arrays for the optimization of sum log-likelihood 76 | self.K_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)] 77 | self.L_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)] 78 | self.L_t_inv = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)] 79 | self.e_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)] 80 | 81 | def forward(self, x): 82 | """ 83 | Simple forward pass through the neural network 84 | """ 85 | 86 | for layer in self.layers: 87 | x = layer(x) 88 | x = torch.tanh(x) 89 | 90 | return x 91 | 92 | def loss(self, hp, training_datasets): 93 | """ 94 | Negative log marginal likelihood of multi-task ABLR 95 | hp : np.ndarray 96 | Contains the weights of the network, alpha and beta 97 | training_datasets : list 98 | tuples (X, y) for the meta-datasets and the current dataset 99 | """ 100 | # Apply the flattened hyperparameter array to the neural network 101 | 102 | if precision == 32: 103 | hp = hp.astype(np.float32) 104 | 105 | idx = 0 106 | for layer in self.layers: 107 | weights = layer.weight.data.numpy().astype(np_dtype) 108 | if len(weights.shape) == 1: 109 | size = weights.shape[0] 110 | else: 111 | size = weights.shape[0] * weights.shape[1] 112 | layer.weight.data = torch.from_numpy(hp[idx: idx + size].reshape(weights.shape)) 113 | layer.weight.requires_grad_() 114 | idx += size 115 | 116 | self.alpha_t.data = torch.from_numpy(hp[idx: idx + self.num_tasks]) 117 | idx += self.num_tasks 118 | self.alpha_t.requires_grad_() 119 | self.beta_t.data = torch.from_numpy(hp[idx: idx + self.num_tasks]) 120 | idx += self.num_tasks 121 | self.beta_t.requires_grad_() 122 | assert idx == self.total_n_params 123 | 124 | # Likelihood computation starts here 125 | self.likelihood = None 126 | 127 | for i, (x, y) in enumerate(training_datasets): 128 | 129 | out = self.forward(x) 130 | 131 | # Loss function calculations, see 6th Equation on the first page of the Appendix 132 | # https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning-supplemental.zip 133 | assert (torch.t(out).shape == (D, x.shape[0])) 134 | # Remember that we model 1/alpha instead of alpha 135 | r = self.beta_t[i] * self.alpha_t[i] 136 | K_t = torch.add( 137 | torch.eye(D, dtype=t_dtype), 138 | r * torch.matmul(torch.t(out), out) 139 | ) 140 | self.K_t[i] = K_t.clone() 141 | assert (K_t.shape == (D, D)) 142 | 143 | L_t = torch.cholesky(K_t, upper=False) 144 | self.L_t[i] = L_t.clone() 145 | # Naive version: 146 | # self.L_t_inv[i] = torch.inverse(L_t) 147 | # e_t = torch.matmul(self.L_t_inv[i], torch.matmul(torch.t(out), y)) 148 | e_t = torch.triangular_solve(torch.matmul(torch.t(out), y), L_t, upper=False).solution 149 | self.e_t[i] = e_t.view((D, 1)).clone() 150 | assert (self.e_t[i].shape == (D, 1)) 151 | 152 | norm_y_t = torch.norm(y, 2, 0) 153 | norm_c_t = torch.norm(e_t[i], 2, 0) 154 | 155 | L1 = -(x.shape[0] / 2 * torch.log(self.beta_t[i])) 156 | L2 = self.beta_t[i] / 2 * (torch.pow(norm_y_t, 2) -r * torch.pow(norm_c_t, 2)) 157 | L3 = torch.sum(torch.log(torch.diag(L_t))) 158 | L = L1 + L2 + L3 159 | 160 | if self.likelihood is None: 161 | self.likelihood = L 162 | else: 163 | self.likelihood = torch.add(self.likelihood, L) 164 | 165 | # Get the gratient and put transform it into the flat array structure required by 166 | # scipy.optimize 167 | g = np.zeros((self.total_n_params)) 168 | self.likelihood.backward() 169 | 170 | idx = 0 171 | for layer in self.layers: 172 | gradients = layer.weight.grad.data.numpy().astype(np_dtype) 173 | if len(gradients.shape) == 1: 174 | size = gradients.shape[0] 175 | else: 176 | size = gradients.shape[0] * gradients.shape[1] 177 | g[idx: idx + size] = gradients.flatten() 178 | idx += size 179 | layer.weight.grad.zero_() 180 | 181 | g[idx: idx + self.num_tasks] = self.alpha_t.grad.data.numpy().astype(np_dtype) 182 | idx += self.num_tasks 183 | g[idx: idx + self.num_tasks] = self.beta_t.grad.data.numpy().astype(np_dtype) 184 | idx += self.num_tasks 185 | self.alpha_t.grad.data.zero_() 186 | self.beta_t.grad.data.zero_() 187 | self._gradient = g 188 | 189 | return self.likelihood 190 | 191 | def gradient(self, hp, training_datasets): 192 | """ 193 | Gradient of the parameters of the network that are optimized through LBFGS 194 | 195 | The gradient is actually stored during the forward pass, this is only a convenience 196 | function to work with the LBFGS interface of scipy. 197 | """ 198 | 199 | return self._gradient 200 | 201 | def optimize(self, training_datasets): 202 | """ 203 | Optimize weights, alpha and beta with LBFGSB 204 | """ 205 | 206 | # Initial flattened array of weights used as a starting point of LBFGS 207 | init = np.ones((self.total_n_params), dtype=np_dtype) 208 | 209 | idx = 0 210 | for layer in self.layers: 211 | weights = layer.weight.data.numpy().astype(np_dtype) 212 | if len(weights.shape) == 1: 213 | size = weights.shape[0] 214 | else: 215 | size = weights.shape[0] * weights.shape[1] 216 | init[idx: idx + size] = weights.flatten() 217 | idx += size 218 | mybounds = [[None, None] for i in range(idx)] 219 | 220 | init[idx: idx + self.num_tasks] = self.alpha_t.data.numpy().astype(np_dtype) 221 | idx += self.num_tasks 222 | mybounds.extend([[1e-3, 1e3]] * self.num_tasks) 223 | init[idx: idx + self.num_tasks] = self.beta_t.data.numpy().astype(np_dtype) 224 | idx += self.num_tasks 225 | mybounds.extend([[1, 1e6]] * self.num_tasks) 226 | 227 | assert self.total_n_params == len(mybounds), (self.total_n_params, len(mybounds)) 228 | assert self.total_n_params == idx 229 | 230 | res = scipy.optimize.fmin_l_bfgs_b( 231 | lambda *args: float(self.loss(*args)), 232 | x0=init, 233 | bounds=mybounds, 234 | fprime=self.gradient, 235 | args=(training_datasets, ), 236 | ) 237 | print(self.loss(res[0], training_datasets)) # This updates the internal states 238 | print(res) 239 | 240 | def train(self, X: np.ndarray, y: np.ndarray): 241 | """Optimize the neural network given training data ``X``. 242 | 243 | Training data is concatenated with meta-data and then passed to the optimize function. 244 | """ 245 | y = y.reshape((y.shape[0], 1)) 246 | 247 | training_datasets = [] 248 | for meta_task in self.meta_data: 249 | meta_task_data = self.meta_data[meta_task] 250 | X_t = meta_task_data[0] 251 | y_t = meta_task_data[1] 252 | if X_t.shape[1] != self.n_attributes: 253 | raise ValueError((X_t.shape[1], self.n_attributes)) 254 | 255 | if self.use_copula_transform: 256 | y_t = copula_transform(y_t) 257 | else: 258 | mean = y_t.mean() 259 | std = y_t.std() 260 | if std == 0: 261 | std = 1 262 | y_t = (y_t.copy() - mean) / std 263 | y_t = y_t.reshape(y_t.shape[0], 1) 264 | 265 | training_datasets.append(( 266 | torch.tensor(X_t, dtype=t_dtype), 267 | torch.tensor(y_t, dtype=t_dtype), 268 | )) 269 | 270 | if X.shape[1] != self.n_attributes: 271 | raise ValueError((X.shape[1], self.n_attributes)) 272 | 273 | if self.use_copula_transform: 274 | self.mean_ = 0 275 | self.std_ = 1 276 | y_ = copula_transform(y.copy()) 277 | else: 278 | self.mean_ = y.mean() 279 | self.std_ = y.std() 280 | if self.std_ == 0: 281 | self.std_ = 1 282 | y_ = (y.copy() - self.mean_) / self.std_ 283 | 284 | training_datasets.append(( 285 | torch.tensor(X, dtype=t_dtype), 286 | torch.tensor(y_, dtype=t_dtype), 287 | )) 288 | if len(training_datasets) != self.num_tasks: 289 | raise ValueError((len(training_datasets), self.num_tasks)) 290 | 291 | self.optimize(training_datasets) 292 | 293 | def predict(self, X_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 294 | """Returns the predictive mean and variance of the objective function at 295 | the given test points. 296 | """ 297 | X_test = torch.tensor(X_test, dtype=t_dtype) 298 | out = self.forward(X_test) 299 | 300 | # Naive implementation: 301 | #m = torch.matmul(torch.matmul(torch.t(self.e_t[-1]), self.L_t_inv[-1]), torch.t(out)) 302 | m = torch.matmul( 303 | torch.t(self.e_t[-1]), 304 | torch.triangular_solve(torch.t(out), self.L_t[-1], upper=False).solution, 305 | ) 306 | # Remember that we model 1/alpha instead of alpha 307 | m = (self.beta_t[-1] * self.alpha_t[-1]) * m.reshape((m.shape[1], 1)) 308 | assert (m.shape == (X_test.shape[0], 1)) 309 | if not torch.isfinite(m).all(): 310 | raise ValueError('Infinite predictions %s for input %s' % (m, X_test)) 311 | m = m * self.std_ + self.mean_ 312 | 313 | # Naive implementation 314 | #v = torch.matmul(self.L_inv_t[-1], torch.t(out)) 315 | v = torch.triangular_solve(torch.t(out), self.L_t[-1], upper=False).solution 316 | # Remember that we model 1/alpha instead of alpha 317 | v = self.alpha_t[-1] * torch.pow(torch.norm(v, dim=0), 2) 318 | v = v.reshape((-1, 1)) 319 | assert (v.shape == (X_test.shape[0], 1)), v.shape 320 | if not torch.isfinite(v).all(): 321 | raise ValueError('Infinite predictions %s for input %s' % (v, X_test)) 322 | v = v * (self.std_ ** 2) 323 | 324 | return m.detach().numpy(), v.detach().numpy() 325 | 326 | 327 | class ABLR(AbstractEPM): 328 | """ 329 | Implementation of the Adaptive Bayesian Linear Regression (ABLR) for multi-task 330 | hyperparameter optimization. 331 | 332 | For details see https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf 333 | 334 | This is the wrapper class to be used with SMAC, which internally uses the neural network 335 | class in the code above. 336 | """ 337 | 338 | def __init__( 339 | self, 340 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 341 | use_copula_transform: bool = False, 342 | **kwargs 343 | ): 344 | if kwargs.get('instance_features') is not None: 345 | raise NotImplementedError() 346 | super().__init__(**kwargs) 347 | self.training_data = training_data 348 | self.use_copula_transform = use_copula_transform 349 | self.nn = None 350 | torch.manual_seed(self.seed) 351 | self.rng = np.random.RandomState(self.seed) 352 | 353 | self.categorical_mask = np.array(self.types) > 0 354 | self.n_categories = np.sum(self.types) 355 | 356 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 357 | meta_data = dict() 358 | for id_ in self.training_data: 359 | configs = self.training_data[id_]['configurations'] 360 | X_ = convert_configurations_to_array(configs) 361 | X_ = self._preprocess(X_) 362 | meta_data[id_] = ( 363 | X_, 364 | self.training_data[id_]['y'].flatten(), 365 | None, 366 | ) 367 | 368 | X = self._preprocess(X) 369 | for i in range(10): 370 | try: 371 | # Sometimes the neural network training fails due to numerical issues - we 372 | # then retrain the network from scratch 373 | if self.nn is None: 374 | self.nn = Net( 375 | num_tasks=len(self.training_data) + 1, 376 | n_attributes=X.shape[1], 377 | meta_data=meta_data, 378 | use_copula_transform=self.use_copula_transform, 379 | ) 380 | self.nn.train(X, Y) 381 | break 382 | except Exception as e: 383 | print('Training failed %d/%d!' % (i + 1, 10)) 384 | print(e) 385 | self.nn = None 386 | 387 | return self 388 | 389 | def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 390 | X = self._preprocess(X) 391 | if self.nn: 392 | return self.nn.predict(X) 393 | else: 394 | return self.rng.randn(X.shape[0], 1), self.rng.randn(X.shape[0], 1) 395 | 396 | def _preprocess(self, X: np.ndarray) -> np.ndarray: 397 | """Perform one-hot-encoding of categorical hyperparameters.""" 398 | categories_array = np.zeros((X.shape[0], self.n_categories)) 399 | categories_idx = 0 400 | for idx in range(len(self.types)): 401 | if self.types[idx] == 0: 402 | continue 403 | else: 404 | for j in range(self.types[idx]): 405 | mask = X[:, idx] == j 406 | categories_array[mask, categories_idx] = 1 407 | categories_idx += 1 408 | numerical_array = X[:, ~self.categorical_mask] 409 | X = np.concatenate((numerical_array, categories_array), axis=1) 410 | X[np.isnan(X)] = -1.0 411 | return X 412 | -------------------------------------------------------------------------------- /rgpe/methods/kl_weighting.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import json 3 | from typing import Dict, List, Optional, Tuple, Union 4 | 5 | from ConfigSpace import Configuration 6 | import numpy as np 7 | import numpy.linalg as npla 8 | import scipy.linalg as spla 9 | import scipy.optimize 10 | import sklearn.metrics 11 | from smac.configspace import convert_configurations_to_array 12 | from smac.epm.base_epm import AbstractEPM 13 | 14 | from rgpe.utils import get_gaussian_process 15 | 16 | # Code from https://github.com/HIPS/Spearmint/blob/PESC/spearmint/acquisition_functions/predictive_entropy_search.py#L944 17 | """ 18 | See Miguel's paper (http://arxiv.org/pdf/1406.2541v1.pdf) section 2.1 and Appendix A 19 | Returns a function the samples from the approximation... 20 | if testing=True, it does not return the result but instead the random cosine for testing only 21 | We express the kernel as an expectation. But then we approximate the expectation with a weighted sum 22 | theta are the coefficients for this weighted sum. that is why we take the dot product of theta at 23 | the end 24 | we also need to scale at the end so that it's an average of the random features. 25 | if use_woodbury_if_faster is False, it never uses the woodbury version 26 | """ 27 | 28 | def chol2inv(chol): 29 | return spla.cho_solve((chol, False), np.eye(chol.shape[0])) 30 | 31 | 32 | def sample_gp_with_random_features(gp, nFeatures, rng, testing=False, use_woodbury_if_faster=True): 33 | d = len(gp.configspace.get_hyperparameters()) 34 | N_data = gp.gp.X_train_.shape[0] 35 | 36 | nu2 = np.exp(gp.gp.kernel.theta[-1]) 37 | 38 | sigma2 = np.exp(gp.gp.kernel.theta[0]) # the kernel amplitude 39 | 40 | # We draw the random features - in contrast to the original code we only support Matern5/2 41 | m = 5.0 / 2.0 42 | W = ( 43 | rng.randn(nFeatures, d) / gp.gp.kernel.theta[1: -1] / 44 | np.sqrt(rng.gamma(shape=m, scale=1.0 / m, size=(nFeatures, 1))) 45 | ) 46 | b = rng.uniform(low=0, high=2 * np.pi, size=nFeatures)[:, None] 47 | 48 | # Just for testing the random features in W and b... doesn't test the weights theta 49 | if testing: 50 | return lambda x: np.sqrt(2 * sigma2 / nFeatures) * np.cos(np.dot(W, x.T) + b) 51 | # K(x1, x2) \approx np.dot(test(x1).T, tst_fun(x2)) 52 | 53 | randomness = rng.randn(nFeatures) 54 | 55 | # W has size nFeatures by d 56 | # tDesignMatrix has size Nfeatures by Ndata 57 | # woodbury has size Ndata by Ndata 58 | # z is a vector of length nFeatures 59 | 60 | gp_inputs = gp.gp.X_train_ 61 | 62 | # tDesignMatrix has size Nfeatures by Ndata 63 | tDesignMatrix = np.sqrt(2.0 * sigma2 / nFeatures) * np.cos(np.dot(W, gp_inputs.T) + b) 64 | 65 | if use_woodbury_if_faster and N_data < nFeatures: 66 | # you can do things in cost N^2d instead of d^3 by doing this woodbury thing 67 | 68 | # We obtain the posterior on the coefficients 69 | woodbury = np.dot(tDesignMatrix.T, tDesignMatrix) + nu2 * np.eye(N_data) 70 | chol_woodbury = spla.cholesky(woodbury) 71 | # inverseWoodbury = chol2inv(chol_woodbury) 72 | z = np.dot(tDesignMatrix, gp.gp.y_train_ / nu2) 73 | # m = z - np.dot(tDesignMatrix, np.dot(inverseWoodbury, np.dot(tDesignMatrix.T, z))) 74 | m = z - np.dot(tDesignMatrix, 75 | spla.cho_solve((chol_woodbury, False), np.dot(tDesignMatrix.T, z))) 76 | # (above) alternative to original but with cho_solve 77 | 78 | # z = np.dot(tDesignMatrix, gp.observed_values / nu2) 79 | # m = np.dot(np.eye(nFeatures) - \ 80 | # np.dot(tDesignMatrix, spla.cho_solve((chol_woodbury, False), tDesignMatrix.T)), z) 81 | 82 | # woodbury has size N_data by N_data 83 | D, U = npla.eigh(woodbury) 84 | # sort the eigenvalues (not sure if this matters) 85 | idx = D.argsort()[::-1] # in decreasing order instead of increasing 86 | D = D[idx] 87 | U = U[:, idx] 88 | R = 1.0 / (np.sqrt(D) * (np.sqrt(D) + np.sqrt(nu2))) 89 | # R = 1.0 / (D + np.sqrt(D*nu2)) 90 | 91 | # We sample from the posterior of the coefficients 92 | theta = randomness - \ 93 | np.dot(tDesignMatrix, 94 | np.dot(U, (R * np.dot(U.T, np.dot(tDesignMatrix.T, randomness))))) + m 95 | 96 | else: 97 | # all you are doing here is sampling from the posterior of the linear model 98 | # that approximates the GP 99 | # Sigma = matrixInverse(np.dot(tDesignMatrix, tDesignMatrix.T) / nu2 + np.eye( 100 | # nFeatures)) 101 | # m = np.dot(Sigma, np.dot(tDesignMatrix, gp.observed_values / nu2)) 102 | # theta = m + np.dot(randomness, spla.cholesky(Sigma, lower=False)).T 103 | 104 | # Sigma = matrixInverse(np.dot(tDesignMatrix, tDesignMatrix.T) + nu2*np.eye(nFeatures)) 105 | # m = np.dot(Sigma, np.dot(tDesignMatrix, gp.observed_values)) 106 | # theta = m + np.dot(randomness, spla.cholesky(Sigma*nu2, lower=False)).T 107 | 108 | approx_Kxx = np.dot(tDesignMatrix, tDesignMatrix.T) 109 | while True: 110 | try: 111 | print(approx_Kxx, nu2) 112 | chol_Sigma_inverse = spla.cholesky(approx_Kxx + nu2 * np.eye(nFeatures)) 113 | break 114 | except np.linalg.LinAlgError: 115 | nu2 = np.log(nu2) 116 | nu2 += 1 117 | nu2 = np.exp(nu2) 118 | Sigma = chol2inv(chol_Sigma_inverse) 119 | m = spla.cho_solve((chol_Sigma_inverse, False), 120 | np.dot(tDesignMatrix, gp.gp.y_train_)) 121 | theta = m + np.dot(randomness, spla.cholesky(Sigma * nu2, lower=False)).T 122 | # the above commented out version might be less stable? i forget why i changed it 123 | # that's ok. 124 | 125 | def wrapper(gradient, x): 126 | # the argument "gradient" is 127 | # not the usual compute_grad that computes BOTH when true 128 | # here it only computes the objective when true 129 | if x.ndim == 1: 130 | x = x[None] 131 | 132 | if not gradient: 133 | result = np.dot(theta.T, np.sqrt(2.0 * sigma2 / nFeatures) * np.cos(np.dot(W, x.T) + b)) 134 | if result.size == 1: 135 | result = float( 136 | result) # if the answer is just a number, take it out of the numpy array 137 | # wrapper 138 | # (failure to do so messed up NLopt and it only gives a cryptic error message) 139 | return result 140 | else: 141 | grad = np.dot(theta.T, 142 | -np.sqrt(2.0 * sigma2 / nFeatures) * np.sin(np.dot(W, x.T) + b) * W) 143 | return grad 144 | 145 | return wrapper 146 | 147 | 148 | class KLWeighting(AbstractEPM): 149 | 150 | """Weighting method from "Information-theoretic Transfer Learning framework for Bayesian 151 | optimization" by Ramachandran et al., MLKDD 2018 152 | 153 | This does not implement PES! 154 | """ 155 | 156 | def __init__( 157 | self, 158 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 159 | eta: float, # https://github.com/AnilRamachandran/ITTLBO/blob/master/BO_TL_PES_loop.m#L218 160 | variance_mode: str = 'target', 161 | ** kwargs 162 | ): 163 | if kwargs.get('instance_features') is not None: 164 | raise NotImplementedError() 165 | super().__init__(**kwargs) 166 | self.training_data = training_data 167 | self.eta = eta 168 | 169 | self.rng = np.random.RandomState(self.seed) 170 | self.variance_mode = variance_mode 171 | 172 | # https://github.com/AnilRamachandran/ITTLBO/blob/master/BO_TL_PES_loop.m#L153 173 | self.num_samples = 100 174 | self.num_features = 500 175 | 176 | base_models = [] 177 | for task in training_data: 178 | model = get_gaussian_process( 179 | bounds=self.bounds, 180 | types=self.types, 181 | configspace=self.configspace, 182 | rng=self.rng, 183 | kernel=None, 184 | ) 185 | Y = training_data[task]['y'] 186 | mean = Y.mean() 187 | std = Y.std() 188 | if std == 0: 189 | std = 1 190 | 191 | y_scaled = (Y - mean) / std 192 | y_scaled = y_scaled.flatten() 193 | configs = training_data[task]['configurations'] 194 | X = convert_configurations_to_array(configs) 195 | 196 | model.train( 197 | X=X, 198 | Y=y_scaled, 199 | ) 200 | base_models.append(model) 201 | self.base_models = base_models 202 | 203 | self.weights_over_time = [] 204 | 205 | bounds = [(0, 1)] * len(self.configspace.get_hyperparameters()) 206 | samples = [] 207 | for s in range(len(base_models)): 208 | samples_base_task = [] 209 | for _ in range(self.num_samples): 210 | x0 = self.configspace.sample_configuration().get_array() 211 | base_gp_sample = sample_gp_with_random_features(self.base_models[s], self.num_features, 212 | self.rng) 213 | opt_base = scipy.optimize.minimize(functools.partial(base_gp_sample, False), x0, 214 | jac=functools.partial(base_gp_sample, True), 215 | bounds=bounds) 216 | samples_base_task.append(opt_base.x) 217 | 218 | samples.append(np.array(samples_base_task)) 219 | self.samples = samples 220 | 221 | def _compute_weights(self): 222 | 223 | pseudo_weights = [] 224 | bounds = [(0, 1)] * len(self.configspace.get_hyperparameters()) 225 | 226 | samples_target_task = [] 227 | for _ in range(self.num_samples): 228 | target_gp_sample = sample_gp_with_random_features(self.target_model, self.num_features, 229 | self.rng) 230 | x0 = self.configspace.sample_configuration().get_array() 231 | opt_target = scipy.optimize.minimize(functools.partial(target_gp_sample, False), x0, 232 | jac=functools.partial(target_gp_sample, True), 233 | bounds=bounds) 234 | samples_target_task.append(opt_target.x) 235 | samples_target_task = np.array(samples_target_task) 236 | 237 | for s in range(len(self.model_list_)): 238 | if s == len(self.model_list_) - 1: 239 | pseudo_weights.append(1) 240 | else: 241 | exp_arg = 0 242 | 243 | masks = np.eye(self.num_samples, dtype=bool) 244 | for i in range(self.num_samples): 245 | samples_base_task = self.samples[s] 246 | tau_i = sklearn.metrics.pairwise_distances( 247 | samples_target_task[i].reshape((1, -1)), 248 | Y=samples_base_task, metric='euclidean').min() + 1e-14 249 | rho_i = sklearn.metrics.pairwise_distances( 250 | samples_base_task[i].reshape((1, -1)), 251 | Y=samples_base_task[~masks[i]], metric='euclidean' 252 | ).min() + 1e-14 253 | exp_arg += np.log(tau_i / rho_i) 254 | #print(tau_i, rho_i, np.log(tau_i / rho_i)) 255 | 256 | exp_arg *= (len(self.configspace.get_hyperparameters()) / self.num_samples) 257 | exp_arg += np.log(self.num_samples / (self.num_samples - 1)) 258 | #print(exp_arg) 259 | pseudo_weights.append(np.exp(- exp_arg / self.eta)) 260 | 261 | pseudo_weights = np.array(pseudo_weights) 262 | #print(pseudo_weights) 263 | self.weights_ = pseudo_weights / np.sum(pseudo_weights) 264 | 265 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 266 | Y = Y.flatten() 267 | mean = Y.mean() 268 | std = Y.std() 269 | if std == 0: 270 | std = 1 271 | 272 | y_scaled = (Y - mean) / std 273 | self.Y_mean_ = mean 274 | self.Y_std_ = std 275 | 276 | target_model = get_gaussian_process( 277 | bounds=self.bounds, 278 | types=self.types, 279 | configspace=self.configspace, 280 | rng=self.rng, 281 | kernel=None, 282 | ) 283 | self.target_model = target_model.train(X, y_scaled) 284 | self.model_list_ = self.base_models + [target_model] 285 | try: 286 | self._compute_weights() 287 | except Exception as e: 288 | print(e) 289 | self.weights_ = np.zeros((len(self.model_list_, ))) 290 | self.weights_[-1] = 1 291 | print('Weights', self.weights_) 292 | self.weights_over_time.append(self.weights_) 293 | 294 | # create model and acquisition function 295 | return self 296 | 297 | def _predict(self, X: np.ndarray, cov_return_type: bool) -> Tuple[np.ndarray, np.ndarray]: 298 | 299 | # compute posterior for each model 300 | weighted_means = [] 301 | weighted_covars = [] 302 | 303 | # filter model with zero weights 304 | # weights on covariance matrices are weight**2 305 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 306 | non_zero_weights = self.weights_[non_zero_weight_indices] 307 | # re-normalize 308 | non_zero_weights /= non_zero_weights.sum() 309 | 310 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 311 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 312 | weight = non_zero_weights[non_zero_weight_idx] 313 | mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type=cov_return_type) 314 | weighted_means.append(weight * mean) 315 | if self.variance_mode == 'average': 316 | weighted_covars.append(covar * weight ** 2) 317 | elif self.variance_mode == 'target': 318 | if raw_idx + 1 == len(self.weights_): 319 | weighted_covars.append(covar) 320 | else: 321 | raise ValueError() 322 | 323 | if self.variance_mode == 'target': 324 | assert len(weighted_covars) == 1 325 | 326 | # set mean and covariance to be the rank-weighted sum the means and covariances 327 | # of the 328 | # base models and target model 329 | mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_ 330 | covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2) 331 | return mean_x, covar_x 332 | -------------------------------------------------------------------------------- /rgpe/methods/noisy_ei.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Optional 3 | 4 | import numpy as np 5 | from scipy.stats import norm 6 | 7 | from smac.epm.base_epm import AbstractEPM 8 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI 9 | 10 | from rgpe.utils import sample_sobol 11 | 12 | 13 | class NoisyEI(AbstractAcquisitionFunction): 14 | """Implements the Noisy Expected Improvement by Letham et al. described in 15 | https://arxiv.org/abs/1706.07094 and used in https://arxiv.org/abs/1802.02219 16 | 17 | This implementation requires an ensemble of methods, for example RGPE and assumes that each 18 | method itself is a Gaussian Process as implemented in SMAC. 19 | 20 | If you are looking for a general implementation of NoisyEI we recommend having a look at 21 | BoTorch. 22 | """ 23 | 24 | def __init__( 25 | self, 26 | model: AbstractEPM, 27 | target_model_incumbent: bool, 28 | acquisition_function: Optional[AbstractAcquisitionFunction] = None, 29 | par: float = 0.0, 30 | n_samples: int = 30, 31 | ): 32 | 33 | super().__init__(model) 34 | self.long_name = 'Noisy Expected Improvement' 35 | self.par = par 36 | self.eta = None 37 | self.target_model_incumbent = target_model_incumbent 38 | 39 | if acquisition_function is None: 40 | self.acq = EI(model=None) 41 | else: 42 | self.acq = acquisition_function 43 | self.n_samples = n_samples 44 | 45 | self._functions = None 46 | self._do_integrate = True 47 | 48 | self.base_models = None 49 | 50 | def update(self, model: AbstractEPM, **kwargs): 51 | 52 | X = kwargs['X'] 53 | 54 | if model.weights_[-1] != 1: 55 | 56 | del kwargs['eta'] 57 | models = [] 58 | etas = [] 59 | 60 | input_locations = [] 61 | samples = [] 62 | 63 | self._do_integrate = True 64 | 65 | if self.base_models is None: 66 | self.base_models = [] 67 | for _ in range(self.n_samples): 68 | model_ = copy.deepcopy(model) 69 | self.base_models.append(model_.base_models) 70 | 71 | # First, create samples from each model of the ensemble to integrate over 72 | for model_idx, weight in enumerate(model.weights_): 73 | if weight <= 0: 74 | # Ignore models with zero weight 75 | samples.append(None) 76 | input_locations.append(None) 77 | continue 78 | submodel = model.model_list_[model_idx] 79 | original_training_data = submodel.gp.X_train_ 80 | if model_idx == len(model.weights_) - 1: 81 | integrate = original_training_data.copy() 82 | else: 83 | integrate = np.vstack((original_training_data, X)) 84 | try: 85 | sample = sample_sobol(submodel, integrate, self.n_samples, model.rng.randint(10000)) 86 | except: 87 | sample = submodel.predict(integrate)[0].transpose() 88 | sample = np.tile(sample, reps=self.n_samples) 89 | samples.append(sample) 90 | input_locations.append(integrate) 91 | 92 | # Second, train the integrated GPs for each base model 93 | for sample_idx in range(self.n_samples): 94 | 95 | # Copy the individual models 96 | # This is substantially faster than doing a deepcopy of all models as it avoids 97 | # doing a deepcopy of the base models 98 | model_ = copy.copy(model) 99 | model_.base_models = self.base_models[sample_idx] 100 | # do a deep copy of the target model so we don't mess with it's original noise 101 | # estimate. The original noise estimate will be used as the basis for the GPs HPO 102 | # when fitting it the next time. 103 | model_.target_model = copy.deepcopy(model.target_model) 104 | model_.model_list_ = model_.base_models + [model_.target_model] 105 | models.append(model_) 106 | 107 | # Train the individual models 108 | for model_idx, (submodel, weight) in enumerate(zip(model_.model_list_, model_.weights_)): 109 | if weight <= 0: 110 | continue 111 | theta = submodel.gp.kernel.theta 112 | theta[-1] = -25 113 | submodel.gp.kernel.theta = theta 114 | sample = samples[model_idx][sample_idx].reshape((-1, 1)) 115 | submodel._train(input_locations[model_idx], sample, do_optimize=False) 116 | 117 | for model_ in models: 118 | if self.target_model_incumbent: 119 | predictions, _ = model_.target_model.predict(X) 120 | predictions = predictions * model_.Y_std_ + model_.Y_mean_ 121 | else: 122 | predictions, _ = model_.predict(X) 123 | etas.append(np.min(predictions)) 124 | 125 | if self._functions is None or len(self._functions) != len(models): 126 | self._functions = [copy.deepcopy(self.acq) for _ in models] 127 | for model, func, eta in zip(models, self._functions, etas): 128 | func.update(model=model, eta=eta, **kwargs) 129 | else: 130 | print('No need to integrate...') 131 | self._do_integrate = False 132 | del kwargs['eta'] 133 | predictions, _ = model.predict(X) 134 | kwargs['eta'] = np.min(predictions) 135 | self.acq.update(model=model, **kwargs) 136 | 137 | def _compute(self, X: np.ndarray): 138 | if self._do_integrate: 139 | val = np.array([func._compute(X) for func in self._functions]).mean(axis=0) 140 | return val 141 | else: 142 | return self.acq._compute(X) 143 | 144 | 145 | class ClosedFormNei(AbstractAcquisitionFunction): 146 | """Closed-form adaptation of the Noisy Expected Improvement. 147 | 148 | While it is substantially faster to compute it does not consider the uncertainty about 149 | which noisy observation is the best observation made so far. 150 | """ 151 | 152 | def update(self, **kwargs): 153 | 154 | X = kwargs['X'] 155 | self.model = kwargs['model'] 156 | # Model prediction is only used when not integrating over base models 157 | prediction, _ = self.model.predict(X) 158 | self.incumbent_array = X[np.argmin(prediction)].reshape((1, -1)) 159 | self.eta = np.min(prediction) 160 | 161 | def _compute(self, X: np.ndarray, **kwargs): 162 | 163 | if len(X.shape) == 1: 164 | X = X[:, np.newaxis] 165 | 166 | if self.model.weights_[-1] != 1: 167 | # Due to the joint prediction, it is not possible to compute EI only with respect to 168 | # the predicted value on the target task 169 | X_new = np.concatenate((self.incumbent_array, X), axis=0) 170 | m_pred, v_pred = self.model._predict(X_new, cov_return_type='full_cov') 171 | m_inc = m_pred[0] 172 | v_inc = v_pred[0][0] 173 | m_cand = m_pred[1:] 174 | cov = v_pred[0][1:] 175 | v_cand = np.diag(v_pred)[1:] 176 | m = m_inc - m_cand 177 | v = v_inc + v_cand - 2 * cov 178 | s = np.sqrt(v) 179 | eta_minus_m = m.reshape((-1, 1)) 180 | s = s.reshape((-1, 1)) 181 | else: 182 | m, v = self.model.predict(X) 183 | s = np.sqrt(v) 184 | eta_minus_m = self.eta - m 185 | 186 | def calculate_f(): 187 | z = (eta_minus_m) / s 188 | return (eta_minus_m) * norm.cdf(z) + s * norm.pdf(z) 189 | 190 | if np.any(s == 0.0): 191 | self.logger.warning("Predicted std is 0.0 for at least one sample.") 192 | s_copy = np.copy(s) 193 | s[s_copy == 0.0] = 1.0 194 | f = calculate_f() 195 | f[s_copy == 0.0] = 0.0 196 | else: 197 | f = calculate_f() 198 | if (f < 0).any(): 199 | raise ValueError( 200 | "Expected Improvement is smaller than 0 for at least one " 201 | "sample.") 202 | return f 203 | -------------------------------------------------------------------------------- /rgpe/methods/rgpe.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, List, Optional, Tuple, Union, Callable 3 | 4 | import numpy as np 5 | 6 | from ConfigSpace import Configuration 7 | from smac.configspace import convert_configurations_to_array 8 | from smac.epm.base_epm import AbstractEPM 9 | from smac.epm.gaussian_process import GaussianProcess 10 | from rgpe.utils import get_gaussian_process, sample_sobol, copula_transform 11 | 12 | 13 | def roll_col(X: np.ndarray, shift: int) -> np.ndarray: 14 | """ 15 | Rotate columns to right by shift. 16 | """ 17 | return np.concatenate((X[:, -shift:], X[:, :-shift]), axis=1) 18 | 19 | 20 | def compute_ranking_loss( 21 | f_samps: np.ndarray, 22 | target_y: np.ndarray, 23 | target_model: bool, 24 | ) -> np.ndarray: 25 | """ 26 | Compute ranking loss for each sample from the posterior over target points. 27 | """ 28 | y_stack = np.tile(target_y.reshape((-1, 1)), f_samps.shape[0]).transpose() 29 | rank_loss = np.zeros(f_samps.shape[0]) 30 | if not target_model: 31 | for i in range(1, target_y.shape[0]): 32 | rank_loss += np.sum( 33 | (roll_col(f_samps, i) < f_samps) ^ (roll_col(y_stack, i) < y_stack), 34 | axis=1 35 | ) 36 | else: 37 | for i in range(1, target_y.shape[0]): 38 | rank_loss += np.sum( 39 | (roll_col(f_samps, i) < y_stack) ^ (roll_col(y_stack, i) < y_stack), 40 | axis=1 41 | ) 42 | 43 | return rank_loss 44 | 45 | 46 | def get_target_model_loocv_sample_preds( 47 | train_x: np.ndarray, 48 | train_y: np.ndarray, 49 | num_samples: int, 50 | model: GaussianProcess, 51 | engine_seed: int, 52 | ) -> np.ndarray: 53 | """ 54 | Use LOOCV to fit len(train_y) independent GPs and sample from their posterior to obtain an 55 | approximate sample from the target model. 56 | 57 | This sampling does not take into account the correlation between observations which occurs 58 | when the predictive uncertainty of the Gaussian process is unequal zero. 59 | """ 60 | masks = np.eye(len(train_x), dtype=np.bool) 61 | train_x_cv = np.stack([train_x[~m] for m in masks]) 62 | train_y_cv = np.stack([train_y[~m] for m in masks]) 63 | test_x_cv = np.stack([train_x[m] for m in masks]) 64 | 65 | samples = np.zeros((num_samples, train_y.shape[0])) 66 | for i in range(train_y.shape[0]): 67 | loo_model = get_gaussian_process( 68 | configspace=model.configspace, 69 | bounds=model.bounds, 70 | types=model.types, 71 | rng=model.rng, 72 | kernel=model.kernel, 73 | ) 74 | loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False) 75 | 76 | samples_i = sample_sobol(loo_model, test_x_cv[i], num_samples, engine_seed).flatten() 77 | 78 | samples[:, i] = samples_i 79 | 80 | return samples 81 | 82 | 83 | def compute_target_model_ranking_loss( 84 | train_x: np.ndarray, 85 | train_y: np.ndarray, 86 | num_samples: int, 87 | model: GaussianProcess, 88 | engine_seed: int, 89 | ) -> np.ndarray: 90 | """ 91 | Use LOOCV to fit len(train_y) independent GPs and sample from their posterior to obtain an 92 | approximate sample from the target model. 93 | 94 | This function does joint draws from all observations (both training data and left out sample) 95 | to take correlation between observations into account, which can occur if the predictive 96 | variance of the Gaussian process is unequal zero. To avoid returning a tensor, this function 97 | directly computes the ranking loss. 98 | """ 99 | masks = np.eye(len(train_x), dtype=np.bool) 100 | train_x_cv = np.stack([train_x[~m] for m in masks]) 101 | train_y_cv = np.stack([train_y[~m] for m in masks]) 102 | 103 | ranking_losses = np.zeros(num_samples, dtype=np.int) 104 | for i in range(train_y.shape[0]): 105 | loo_model = get_gaussian_process( 106 | configspace=model.configspace, 107 | bounds=model.bounds, 108 | types=model.types, 109 | rng=model.rng, 110 | kernel=model.kernel, 111 | ) 112 | loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False) 113 | samples_i = sample_sobol(loo_model, train_x, num_samples, engine_seed) 114 | 115 | for j in range(len(train_y)): 116 | ranking_losses += (samples_i[:, i] < samples_i[:, j]) ^ (train_y[i] < train_y[j]) 117 | 118 | return ranking_losses 119 | 120 | 121 | def compute_rank_weights( 122 | train_x: np.ndarray, 123 | train_y: np.ndarray, 124 | base_models: List[GaussianProcess], 125 | target_model: GaussianProcess, 126 | num_samples: int, 127 | sampling_mode: str, 128 | weight_dilution_strategy: Union[int, Callable], 129 | number_of_function_evaluations, 130 | rng: np.random.RandomState, 131 | alpha: float = 0.0, 132 | ) -> np.ndarray: 133 | """ 134 | Compute ranking weights for each base model and the target model 135 | (using LOOCV for the target model). 136 | 137 | Returns 138 | ------- 139 | weights : np.ndarray 140 | """ 141 | 142 | if sampling_mode == 'bootstrap': 143 | 144 | predictions = [] 145 | for model_idx in range(len(base_models)): 146 | model = base_models[model_idx] 147 | predictions.append(model.predict(train_x)[0].flatten()) 148 | 149 | masks = np.eye(len(train_x), dtype=np.bool) 150 | train_x_cv = np.stack([train_x[~m] for m in masks]) 151 | train_y_cv = np.stack([train_y[~m] for m in masks]) 152 | test_x_cv = np.stack([train_x[m] for m in masks]) 153 | 154 | loo_prediction = [] 155 | for i in range(train_y.shape[0]): 156 | loo_model = get_gaussian_process( 157 | configspace=target_model.configspace, 158 | bounds=target_model.bounds, 159 | types=target_model.types, 160 | rng=target_model.rng, 161 | kernel=target_model.kernel, 162 | ) 163 | loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False) 164 | loo_prediction.append(loo_model.predict(test_x_cv[i])[0][0][0]) 165 | predictions.append(loo_prediction) 166 | predictions = np.array(predictions) 167 | 168 | bootstrap_indices = rng.choice(predictions.shape[1], 169 | size=(num_samples, predictions.shape[1]), 170 | replace=True) 171 | 172 | bootstrap_predictions = [] 173 | bootstrap_targets = train_y[bootstrap_indices].reshape((num_samples, len(train_y))) 174 | for m in range(len(base_models) + 1): 175 | bootstrap_predictions.append(predictions[m, bootstrap_indices]) 176 | 177 | ranking_losses = np.zeros((len(base_models) + 1, num_samples)) 178 | for i in range(len(base_models)): 179 | 180 | for j in range(len(train_y)): 181 | ranking_losses[i] += np.sum( 182 | ( 183 | roll_col(bootstrap_predictions[i], j) < bootstrap_predictions[i]) 184 | ^ (roll_col(bootstrap_targets, j) < bootstrap_targets 185 | ), axis=1 186 | ) 187 | for j in range(len(train_y)): 188 | ranking_losses[-1] += np.sum( 189 | ( 190 | (roll_col(bootstrap_predictions[-1], j) < bootstrap_targets) 191 | ^ (roll_col(bootstrap_targets, j) < bootstrap_targets) 192 | ), axis=1 193 | ) 194 | 195 | elif sampling_mode in ['simplified', 'correct']: 196 | # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf 197 | ranking_losses = [] 198 | # compute ranking loss for each base model 199 | for model_idx in range(len(base_models)): 200 | model = base_models[model_idx] 201 | # compute posterior over training points for target task 202 | f_samps = sample_sobol(model, train_x, num_samples, rng.randint(10000)) 203 | # compute and save ranking loss 204 | ranking_losses.append(compute_ranking_loss(f_samps, train_y, target_model=False)) 205 | 206 | # compute ranking loss for target model using LOOCV 207 | if sampling_mode == 'simplified': 208 | # Independent draw of the leave one out sample, other "samples" are noise-free and the 209 | # actual observation 210 | f_samps = get_target_model_loocv_sample_preds(train_x, train_y, num_samples, target_model, 211 | rng.randint(10000)) 212 | ranking_losses.append(compute_ranking_loss(f_samps, train_y, target_model=True)) 213 | elif sampling_mode == 'correct': 214 | # Joint draw of the leave one out sample and the other observations 215 | ranking_losses.append( 216 | compute_target_model_ranking_loss(train_x, train_y, num_samples, target_model, 217 | rng.randint(10000)) 218 | ) 219 | else: 220 | raise ValueError(sampling_mode) 221 | else: 222 | raise NotImplementedError(sampling_mode) 223 | 224 | if isinstance(weight_dilution_strategy, int): 225 | weight_dilution_percentile_target = weight_dilution_strategy 226 | weight_dilution_percentile_base = 50 227 | elif weight_dilution_strategy is None or weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']: 228 | pass 229 | else: 230 | raise ValueError(weight_dilution_strategy) 231 | 232 | ranking_loss = np.array(ranking_losses) 233 | 234 | # perform model pruning 235 | p_drop = [] 236 | if weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']: 237 | for i in range(len(base_models)): 238 | better_than_target = np.sum(ranking_loss[i, :] < ranking_loss[-1, :]) 239 | worse_than_target = np.sum(ranking_loss[i, :] >= ranking_loss[-1, :]) 240 | correction_term = alpha * (better_than_target + worse_than_target) 241 | proba_keep = better_than_target / (better_than_target + worse_than_target + correction_term) 242 | if weight_dilution_strategy == 'probabilistic-ld': 243 | proba_keep = proba_keep * (1 - len(train_x) / float(number_of_function_evaluations)) 244 | proba_drop = 1 - proba_keep 245 | p_drop.append(proba_drop) 246 | r = rng.rand() 247 | if r < proba_drop: 248 | ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1 249 | elif weight_dilution_strategy is not None: 250 | # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf 251 | percentile_base = np.percentile(ranking_loss[: -1, :], weight_dilution_percentile_base, axis=1) 252 | percentile_target = np.percentile(ranking_loss[-1, :], weight_dilution_percentile_target) 253 | for i in range(len(base_models)): 254 | if percentile_base[i] >= percentile_target: 255 | ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1 256 | 257 | # compute best model (minimum ranking loss) for each sample 258 | # this differs from v1, where the weight is given only to the target model in case of a tie. 259 | # Here, we distribute the weight fairly among all participants of the tie. 260 | minima = np.min(ranking_loss, axis=0) 261 | assert len(minima) == num_samples 262 | best_models = np.zeros(len(base_models) + 1) 263 | for i, minimum in enumerate(minima): 264 | minimum_locations = ranking_loss[:, i] == minimum 265 | sample_from = np.where(minimum_locations)[0] 266 | 267 | for sample in sample_from: 268 | best_models[sample] += 1. / len(sample_from) 269 | 270 | # compute proportion of samples for which each model is best 271 | rank_weights = best_models / num_samples 272 | return rank_weights, p_drop 273 | 274 | 275 | class RGPE(AbstractEPM): 276 | 277 | def __init__( 278 | self, 279 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 280 | num_posterior_samples: int, 281 | weight_dilution_strategy: Union[int, str], 282 | number_of_function_evaluations: int, 283 | sampling_mode: str = 'correct', 284 | variance_mode: str = 'average', 285 | normalization: str = 'mean/var', 286 | alpha: float = 0.0, 287 | **kwargs 288 | ): 289 | """Ranking-Weighted Gaussian Process Ensemble. 290 | 291 | Parameters 292 | ---------- 293 | training_data 294 | Dictionary containing the training data for each meta-task. Mapping from an integer ( 295 | task ID) to a dictionary, which is a mapping from configuration to performance. 296 | num_posterior_samples 297 | Number of samples to draw for approximating the posterior probability of a model 298 | being the best model to explain the observations on the target task. 299 | weight_dilution_strategy 300 | Can be one of the following four: 301 | * ``'probabilistic-ld'``: the method presented in the paper 302 | * ``'probabilistic'``: the method presented in the paper, but without the time-dependent 303 | pruning of meta-models 304 | * an integer: a deterministic strategy described in https://arxiv.org/abs/1802.02219v1 305 | * ``None``: no weight dilution prevention 306 | number_of_function_evaluations 307 | Optimization horizon - used to compute the time-dependent factor in the probability 308 | of dropping base models for the weight dilution prevention strategy 309 | ``'probabilistic-ld'``. 310 | sampling_mode 311 | Can be any of: 312 | * ``'bootstrap'`` 313 | * ``'correct'`` 314 | * ``'simplified'`` 315 | variance_mode 316 | Can be either ``'average'`` to return the weighted average of the variance 317 | predictions of the individual models or ``'target'`` to only obtain the variance 318 | prediction of the target model. Changing this is only necessary to use the model 319 | together with the expected improvement. 320 | normalization 321 | Can be either: 322 | * ``None``: No normalization per task 323 | * ``'mean/var'``: Zero mean unit standard deviation normalization per task as 324 | proposed by Yogatama et al. (AISTATS 2014). 325 | * ``'Copula'``: Copula transform as proposed by Salinas et al., 2020 326 | alpha 327 | Regularization hyperparameter to increase aggressiveness of dropping base models when 328 | using the weight dilution strategies ``'probabilistic-ld'`` or ``'probabilistic'``. 329 | """ 330 | 331 | if kwargs.get('instance_features') is not None: 332 | raise NotImplementedError() 333 | super().__init__(**kwargs) 334 | self.training_data = training_data 335 | 336 | self.number_of_function_evaluations = number_of_function_evaluations 337 | self.num_posterior_samples = num_posterior_samples 338 | self.rng = np.random.RandomState(self.seed) 339 | self.sampling_mode = sampling_mode 340 | self.variance_mode = variance_mode 341 | self.normalization = normalization 342 | self.alpha = alpha 343 | 344 | if self.normalization not in ['None', 'mean/var', 'Copula']: 345 | raise ValueError(self.normalization) 346 | 347 | if weight_dilution_strategy is None or weight_dilution_strategy == 'None': 348 | weight_dilution_strategy = None 349 | elif weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']: 350 | pass 351 | else: 352 | weight_dilution_strategy = int(weight_dilution_strategy) 353 | 354 | self.weight_dilution_strategy = weight_dilution_strategy 355 | 356 | base_models = [] 357 | for task in training_data: 358 | model = get_gaussian_process( 359 | bounds=self.bounds, 360 | types=self.types, 361 | configspace=self.configspace, 362 | rng=self.rng, 363 | kernel=None, 364 | ) 365 | y = training_data[task]['y'] 366 | if self.normalization == 'mean/var': 367 | mean = y.mean() 368 | std = y.std() 369 | if std == 0: 370 | std = 1 371 | 372 | y_scaled = (y - mean) / std 373 | y_scaled = y_scaled.flatten() 374 | elif self.normalization == 'Copula': 375 | y_scaled = copula_transform(y) 376 | elif self.normalization == 'None': 377 | y_scaled = y 378 | else: 379 | raise ValueError(self.normalization) 380 | configs = training_data[task]['configurations'] 381 | X = convert_configurations_to_array(configs) 382 | 383 | model.train( 384 | X=X, 385 | Y=y_scaled, 386 | ) 387 | base_models.append(model) 388 | self.base_models = base_models 389 | self.weights_over_time = [] 390 | self.p_drop_over_time = [] 391 | 392 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 393 | """SMAC training function""" 394 | print(self.normalization) 395 | if self.normalization == 'mean/var': 396 | Y = Y.flatten() 397 | mean = Y.mean() 398 | std = Y.std() 399 | if std == 0: 400 | std = 1 401 | 402 | y_scaled = (Y - mean) / std 403 | self.Y_std_ = std 404 | self.Y_mean_ = mean 405 | elif self.normalization in ['None', 'Copula']: 406 | self.Y_mean_ = 0. 407 | self.Y_std_ = 1. 408 | y_scaled = Y 409 | if self.normalization == 'Copula': 410 | y_scaled = copula_transform(Y) 411 | else: 412 | raise ValueError(self.normalization) 413 | 414 | target_model = get_gaussian_process( 415 | bounds=self.bounds, 416 | types=self.types, 417 | configspace=self.configspace, 418 | rng=self.rng, 419 | kernel=None, 420 | ) 421 | self.target_model = target_model.train(X, y_scaled) 422 | self.model_list_ = self.base_models + [target_model] 423 | 424 | if X.shape[0] < 3: 425 | self.weights_ = np.ones(len(self.model_list_)) / len(self.model_list_) 426 | p_drop = np.ones((len(self.base_models, ))) * np.NaN 427 | else: 428 | try: 429 | self.weights_, p_drop = compute_rank_weights( 430 | train_x=X, 431 | train_y=y_scaled, 432 | base_models=self.base_models, 433 | target_model=target_model, 434 | num_samples=self.num_posterior_samples, 435 | sampling_mode=self.sampling_mode, 436 | weight_dilution_strategy=self.weight_dilution_strategy, 437 | number_of_function_evaluations=self.number_of_function_evaluations, 438 | rng=self.rng, 439 | alpha=self.alpha, 440 | ) 441 | except Exception as e: 442 | print(e) 443 | self.weights_ = np.zeros((len(self.model_list_, ))) 444 | self.weights_[-1] = 1 445 | p_drop = np.ones((len(self.base_models, ))) * np.NaN 446 | 447 | print('Weights', self.weights_) 448 | self.weights_over_time.append(self.weights_) 449 | self.p_drop_over_time.append(p_drop) 450 | 451 | return self 452 | 453 | def _predict(self, X: np.ndarray, cov_return_type='diagonal_cov') -> Tuple[np.ndarray, np.ndarray]: 454 | """SMAC predict function""" 455 | 456 | # compute posterior for each model 457 | weighted_means = [] 458 | weighted_covars = [] 459 | 460 | # filter model with zero weights 461 | # weights on covariance matrices are weight**2 462 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 463 | non_zero_weights = self.weights_[non_zero_weight_indices] 464 | # re-normalize 465 | non_zero_weights /= non_zero_weights.sum() 466 | 467 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 468 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 469 | weight = non_zero_weights[non_zero_weight_idx] 470 | mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type) 471 | 472 | weighted_means.append(weight * mean) 473 | 474 | if self.variance_mode == 'average': 475 | weighted_covars.append(covar * weight ** 2) 476 | elif self.variance_mode == 'target': 477 | if raw_idx + 1 == len(self.weights_): 478 | weighted_covars.append(covar) 479 | else: 480 | raise ValueError() 481 | 482 | if len(weighted_covars) == 0: 483 | if self.variance_mode != 'target': 484 | raise ValueError(self.variance_mode) 485 | _, covar = self.model_list_[-1]._predict(X, cov_return_type=cov_return_type) 486 | weighted_covars.append(covar) 487 | 488 | mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_ 489 | covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2) 490 | return mean_x, covar_x 491 | 492 | def sample_functions(self, X_test: np.ndarray, n_funcs: int = 1) -> np.ndarray: 493 | """ 494 | Sample function values from the posterior of the specified test points. 495 | """ 496 | 497 | # filter model with zero weights 498 | # weights on covariance matrices are weight**2 499 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 500 | non_zero_weights = self.weights_[non_zero_weight_indices] 501 | # re-normalize 502 | non_zero_weights /= non_zero_weights.sum() 503 | 504 | samples = [] 505 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 506 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 507 | weight = non_zero_weights[non_zero_weight_idx] 508 | 509 | funcs = self.model_list_[raw_idx].sample_functions(X_test, n_funcs) 510 | funcs = funcs * weight 511 | samples.append(funcs) 512 | samples = np.sum(samples, axis=0) 513 | return samples 514 | -------------------------------------------------------------------------------- /rgpe/methods/rmogp.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Optional 3 | 4 | import numpy as np 5 | from scipy.stats import norm 6 | 7 | from smac.epm.base_epm import AbstractEPM 8 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI 9 | 10 | from rgpe.utils import sample_sobol 11 | 12 | 13 | class MixtureOfGPs(AbstractAcquisitionFunction): 14 | 15 | def __init__(self, 16 | model: AbstractEPM, 17 | use_expectation=True, 18 | use_global_incumbent=False, 19 | ): 20 | """Ranking-weighted Mixture of Gaussian Processes acquisition function 21 | 22 | Parameters 23 | ---------- 24 | model : AbstractEPM 25 | An linearly-weighted ensemble which contains a model for each base task and the target 26 | task. 27 | use_expectation : bool 28 | Whether to compute the expectation per base task. Defaults to ``True``, 29 | using ``False`` makes the acquisition function behave similar to the transfer 30 | acquisition function (Wistuba et al., Machine Learning 2018). 31 | use_global_incumbent : bool 32 | Whether to use a global incumbent or an incumbent per task. Defaults to ``False``, 33 | using ``True`` makes the acquisition function behave more similar to 'Active Testing' 34 | from Leite and Brazdil (2012). 35 | """ 36 | 37 | super().__init__(model) 38 | self.long_name = 'Transfer Acquisition Function' 39 | self.eta = None 40 | self.etas = None 41 | self.n_models = 0 42 | self.use_expectation = use_expectation 43 | self.use_global_incumbent = use_global_incumbent 44 | 45 | self.base_models = None 46 | 47 | def update(self, **kwargs): 48 | """SMAC's acquisition function update mechanism. 49 | 50 | This is a fast implementation which copies the base models once in the beginning. Do use 51 | with care if moving the acquisition function to a new version of SMAC, a different 52 | Bayesian optimization library or somehow else change the experimental setup. If you are 53 | unsure about this, please use the slower implementation below which does a deepcopy in 54 | every iteration. 55 | """ 56 | model = kwargs['model'] 57 | self.n_models = len(self.model.model_list_) 58 | 59 | X = kwargs['X'] 60 | 61 | if self.base_models is None: 62 | self.base_models = copy.deepcopy(model.base_models) 63 | model = copy.copy(model) 64 | 65 | etas = [] 66 | for i, (submodel, weight) in enumerate(zip(model.model_list_, model.weights_)): 67 | if weight <= 0: 68 | etas.append(np.inf) 69 | continue 70 | if self.use_expectation and i != self.n_models - 1: 71 | # Use the re-parametrization trick to get rid of noise 72 | original_training_data = submodel.gp.X_train_.copy() 73 | integrate = np.vstack((original_training_data, X)) 74 | sample, _ = submodel.predict(integrate) 75 | theta = self.base_models[i].gp.kernel.theta 76 | theta[-1] = -25 77 | self.base_models[i].gp.kernel.theta = theta 78 | self.base_models[i]._train(integrate, sample, do_optimize=False) 79 | if self.use_global_incumbent: 80 | eta, _ = submodel.predict(self.incumbent_array) 81 | else: 82 | means, _ = submodel.predict(X) 83 | eta = np.min(means) 84 | etas.append(eta) 85 | 86 | model.base_models = [] 87 | model.model_list_ = [] 88 | for submodel in self.base_models: 89 | model.base_models.append(submodel) 90 | model.model_list_.append(submodel) 91 | model.model_list_.append(model.target_model) 92 | 93 | self.model = model 94 | self.etas = etas 95 | 96 | def update_slow(self, **kwargs): 97 | """SMAC's acquisition function update mechanism.""" 98 | model = kwargs['model'] 99 | self.n_models = len(self.model.model_list_) 100 | 101 | X = kwargs['X'] 102 | 103 | self._do_integrate = True 104 | model_ = copy.deepcopy(model) 105 | etas = [] 106 | for submodel, weight in zip(model_.model_list_, model_.weights_): 107 | if weight <= 0: 108 | etas.append(np.inf) 109 | continue 110 | if self.use_expectation: 111 | # Use the re-parametrization trick to get rid of noise 112 | original_training_data = submodel.gp.X_train_.copy() 113 | integrate = np.vstack((original_training_data, X)) 114 | sample, _ = submodel.predict(integrate) 115 | print('before', submodel.gp.kernel.theta) 116 | theta = submodel.gp.kernel.theta 117 | theta[-1] = -25 118 | submodel.gp.kernel.theta = theta 119 | print('after', submodel.gp.kernel.theta) 120 | submodel._train(integrate, sample, do_optimize=False) 121 | if self.use_global_incumbent: 122 | eta, _ = submodel.predict(self.incumbent_array) 123 | else: 124 | means, _ = submodel.predict(X) 125 | eta = np.min(means) 126 | etas.append(eta) 127 | 128 | self.model = model_ 129 | self.etas = etas 130 | 131 | def _compute(self, X: np.ndarray, **kwargs): 132 | """SMAC's acquisition function computation mechanism.""" 133 | 134 | ei_values = [] 135 | 136 | for i, (weight, model) in enumerate(zip(self.model.weights_, self.model.model_list_)): 137 | if weight == 0: 138 | continue 139 | else: 140 | eta = self.etas[i] 141 | if self.use_expectation or i == self.n_models - 1: 142 | 143 | m, v = model.predict(X) 144 | s = np.sqrt(v) 145 | eta_minus_m = eta - m 146 | 147 | def calculate_f(): 148 | z = eta_minus_m / s 149 | return eta_minus_m * norm.cdf(z) + s * norm.pdf(z) 150 | 151 | if np.any(s == 0.0): 152 | # if std is zero, we have observed x on all instances 153 | # using a RF, std should be never exactly 0.0 154 | # Avoid zero division by setting all zeros in s to one. 155 | # Consider the corresponding results in f to be zero. 156 | self.logger.warning("Predicted std is 0.0 for at least one sample.") 157 | s_copy = np.copy(s) 158 | s[s_copy == 0.0] = 1.0 159 | ei = calculate_f() 160 | ei[s_copy == 0.0] = 0.0 161 | else: 162 | ei = calculate_f() 163 | if (ei < 0).any(): 164 | raise ValueError( 165 | "Expected Improvement is smaller than 0 for at least one " 166 | "sample.") 167 | 168 | ei_values.append(ei * weight) 169 | else: 170 | m, _ = model.predict(X) 171 | improvement = eta - m 172 | improvement = improvement 173 | improvement = np.maximum(improvement, 0) 174 | ei_values.append(improvement * weight) 175 | 176 | rval = np.sum(ei_values, axis=0) 177 | rval = rval.reshape((-1, 1)) 178 | 179 | return rval 180 | 181 | -------------------------------------------------------------------------------- /rgpe/methods/taf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from smac.epm.base_epm import AbstractEPM 4 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI 5 | 6 | 7 | class TAF(AbstractAcquisitionFunction): 8 | 9 | def __init__(self, model: AbstractEPM): 10 | """Transfer acquisition function from "Scalable Gaussian process-based transfer surrogates 11 | for hyperparameter optimization" by Wistuba, Schilling and Schmidt-Thieme, 12 | Machine Learning 2018, https://link.springer.com/article/10.1007/s10994-017-5684-y 13 | 14 | Works both with TST-R and RGPE weighting. 15 | """ 16 | 17 | super().__init__(model) 18 | self.long_name = 'Transfer Acquisition Function' 19 | self.eta = None 20 | self.acq = EI(model=None) 21 | 22 | def update(self, **kwargs): 23 | 24 | X = kwargs['X'] 25 | prediction, _ = self.model.target_model.predict(X) 26 | self.incumbent_array = X[np.argmin(prediction)].reshape((1, -1)) 27 | eta = np.min(prediction) 28 | assert (id(kwargs['model']) == id(self.model)) 29 | kwargs = {} 30 | kwargs['model'] = self.model.target_model 31 | kwargs['eta'] = eta 32 | self.acq.model = None 33 | self.acq.update(**kwargs) 34 | best_values = [] 35 | for weight, base_model in zip(self.model.weights_, self.model.base_models): 36 | if weight == 0: 37 | best_values.append(None) 38 | else: 39 | values, _ = base_model.predict(X) 40 | min_value = np.min(values) 41 | best_values.append(min_value) 42 | self.best_values = best_values 43 | 44 | def _compute(self, X: np.ndarray, **kwargs): 45 | 46 | ei = self.acq._compute(X) 47 | 48 | if self.model.weights_[-1] == 1: 49 | return ei 50 | 51 | else: 52 | improvements = [] 53 | 54 | for weight, best_value, base_model in zip(self.model.weights_, self.best_values, self.model.base_models): 55 | if weight == 0: 56 | continue 57 | else: 58 | predictions, _ = base_model._predict(X, cov_return_type=None) 59 | improvement = np.maximum(best_value - predictions, 0).flatten() * weight 60 | improvements.append(improvement) 61 | 62 | improvements = np.sum(improvements, axis=0) 63 | 64 | rval = ei.flatten() * self.model.weights_[-1] + improvements 65 | rval = rval.reshape((-1, 1)) 66 | 67 | return rval 68 | -------------------------------------------------------------------------------- /rgpe/methods/tstr.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, List, Optional, Tuple, Union 3 | 4 | from ConfigSpace import Configuration 5 | import numpy as np 6 | from smac.configspace import convert_configurations_to_array 7 | from smac.epm.base_epm import AbstractEPM 8 | from rgpe.utils import get_gaussian_process, copula_transform 9 | 10 | 11 | class TSTR(AbstractEPM): 12 | 13 | def __init__( 14 | self, 15 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 16 | bandwidth: float = 0.1, 17 | variance_mode: str = 'target', 18 | normalization: str = 'mean/var', 19 | weight_dilution_strategy: Union[int, str] = 'None', 20 | number_of_function_evaluations: float = 50, 21 | **kwargs 22 | ): 23 | """ 24 | Two-stage transfer surrogate with ranking from "Scalable Gaussian process-based 25 | transfer surrogates for hyperparameter optimization" by Wistuba, Schilling and 26 | Schmidt-Thieme, Machine Learning 2018, 27 | https://link.springer.com/article/10.1007/s10994-017-5684-y 28 | 29 | Parameters 30 | ---------- 31 | training_data 32 | Dictionary containing the training data for each meta-task. Mapping from an integer ( 33 | task ID) to a dictionary, which is a mapping from configuration to performance. 34 | bandwidth 35 | rho in the original paper 36 | variance_mode 37 | Can be either ``'average'`` to return the weighted average of the variance 38 | predictions of the individual models or ``'target'`` to only obtain the variance 39 | prediction of the target model. Changing this is only necessary to use the model 40 | together with the expected improvement. 41 | normalization 42 | Can be either: 43 | * ``None``: No normalization per task 44 | * ``'mean/var'``: Zero mean unit standard deviation normalization per task as 45 | proposed by Yogatama et al. (AISTATS 2014). 46 | * ``'Copula'``: Copula transform as proposed by Salinas et al., 2020 47 | weight_dilution_strategy 48 | Can be one of the following four: 49 | * ``'probabilistic-ld'``: the method presented in the paper 50 | * ``'probabilistic'``: the method presented in the paper, but without the time-dependent 51 | pruning of meta-models 52 | * an integer: a deterministic strategy described in https://arxiv.org/abs/1802.02219v1 53 | * ``None``: no weight dilution prevention 54 | number_of_function_evaluations 55 | Optimization horizon - used to compute the time-dependent factor in the probability 56 | of dropping base models for the weight dilution prevention strategy 57 | ``'probabilistic-ld'``. 58 | """ 59 | 60 | if kwargs.get('instance_features') is not None: 61 | raise NotImplementedError() 62 | super().__init__(**kwargs) 63 | self.training_data = training_data 64 | 65 | self.bandwidth = bandwidth 66 | self.rng = np.random.RandomState(self.seed) 67 | self.variance_mode = variance_mode 68 | self.normalization = normalization 69 | self.weight_dilution_strategy = weight_dilution_strategy 70 | self.number_of_function_evaluations = number_of_function_evaluations 71 | 72 | if self.normalization not in ['None', 'mean/var', 'Copula']: 73 | raise ValueError(self.normalization) 74 | 75 | base_models = [] 76 | for task in training_data: 77 | model = get_gaussian_process( 78 | bounds=self.bounds, 79 | types=self.types, 80 | configspace=self.configspace, 81 | rng=self.rng, 82 | kernel=None, 83 | ) 84 | Y = training_data[task]['y'] 85 | 86 | if self.normalization == 'mean/var': 87 | mean = Y.mean() 88 | std = Y.std() 89 | if std == 0: 90 | std = 1 91 | 92 | y_scaled = (Y - mean) / std 93 | y_scaled = y_scaled.flatten() 94 | elif self.normalization == 'Copula': 95 | y_scaled = copula_transform(Y) 96 | elif self.normalization == 'None': 97 | y_scaled = Y 98 | else: 99 | raise ValueError(self.normalization) 100 | configs = training_data[task]['configurations'] 101 | X = convert_configurations_to_array(configs) 102 | 103 | model.train( 104 | X=X, 105 | Y=y_scaled, 106 | ) 107 | base_models.append(model) 108 | self.base_models = base_models 109 | self.weights_over_time = [] 110 | 111 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 112 | if self.normalization == 'mean/var': 113 | Y = Y.flatten() 114 | mean = Y.mean() 115 | std = Y.std() 116 | if std == 0: 117 | std = 1 118 | 119 | y_scaled = (Y - mean) / std 120 | self.Y_std_ = std 121 | self.Y_mean_ = mean 122 | elif self.normalization in ['None', 'Copula']: 123 | self.Y_mean_ = 0. 124 | self.Y_std_ = 1. 125 | y_scaled = Y 126 | if self.normalization == 'Copula': 127 | y_scaled = copula_transform(Y) 128 | else: 129 | raise ValueError(self.normalization) 130 | 131 | target_model = get_gaussian_process( 132 | bounds=self.bounds, 133 | types=self.types, 134 | configspace=self.configspace, 135 | rng=self.rng, 136 | kernel=None, 137 | ) 138 | self.target_model = target_model.train(X, y_scaled) 139 | self.model_list_ = self.base_models + [target_model] 140 | 141 | weights = np.zeros(len(self.model_list_)) 142 | weights[-1] = 0.75 143 | 144 | discordant_pairs_per_task = {} 145 | 146 | for model_idx, model in enumerate(self.base_models): 147 | if X.shape[0] < 2: 148 | weights[model_idx] = 0.75 149 | else: 150 | mean, _ = model.predict(X) 151 | discordant_pairs = 0 152 | total_pairs = 0 153 | for i in range(X.shape[0]): 154 | for j in range(i + 1, X.shape[0]): 155 | if (Y[i] < Y[j]) ^ (mean[i] < mean[j]): 156 | discordant_pairs += 1 157 | total_pairs += 1 158 | t = discordant_pairs / total_pairs / self.bandwidth 159 | discordant_pairs_per_task[model_idx] = discordant_pairs 160 | if t < 1: # The paper says <=, but the code says < (https://github.com/wistuba/TST/blob/master/src/de/ismll/hylap/surrogateModel/TwoStageSurrogate.java) 161 | weights[model_idx] = 0.75 * (1 - t ** 2) 162 | else: 163 | weights[model_idx] = 0 164 | 165 | # perform model pruning 166 | # use this only for ablation 167 | if X.shape[0] >= 2: 168 | p_drop = [] 169 | if self.weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']: 170 | for i in range(len(self.base_models)): 171 | concordant_pairs = total_pairs - discordant_pairs_per_task[i] 172 | proba_keep = concordant_pairs / total_pairs 173 | if self.weight_dilution_strategy == 'probabilistic-ld': 174 | proba_keep = proba_keep * (1 - len(X) / float(self.number_of_function_evaluations)) 175 | proba_drop = 1 - proba_keep 176 | p_drop.append(proba_drop) 177 | r = self.rng.rand() 178 | if r < proba_drop: 179 | weights[i] = 0 180 | elif self.weight_dilution_strategy == 'None': 181 | pass 182 | else: 183 | raise ValueError(self.weight_dilution_strategy) 184 | 185 | weights /= np.sum(weights) 186 | print(weights) 187 | self.weights_ = weights 188 | 189 | self.weights_over_time.append(weights) 190 | # create model and acquisition function 191 | return self 192 | 193 | def _predict(self, X: np.ndarray, cov_return_type: str = 'diagonal_cov') -> Tuple[np.ndarray, np.ndarray]: 194 | 195 | if cov_return_type != 'diagonal_cov': 196 | raise NotImplementedError(cov_return_type) 197 | 198 | # compute posterior for each model 199 | weighted_means = [] 200 | weighted_covars = [] 201 | 202 | # filter model with zero weights 203 | # weights on covariance matrices are weight**2 204 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 205 | non_zero_weights = self.weights_[non_zero_weight_indices] 206 | # re-normalize 207 | non_zero_weights /= non_zero_weights.sum() 208 | 209 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 210 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 211 | weight = non_zero_weights[non_zero_weight_idx] 212 | mean, covar = self.model_list_[raw_idx]._predict(X) 213 | 214 | weighted_means.append(weight * mean) 215 | 216 | if self.variance_mode == 'average': 217 | weighted_covars.append(covar * weight ** 2) 218 | elif self.variance_mode == 'target': 219 | if raw_idx + 1 == len(self.weights_): 220 | weighted_covars.append(covar) 221 | else: 222 | raise ValueError() 223 | 224 | if len(weighted_covars) == 0: 225 | if self.variance_mode != 'target': 226 | raise ValueError(self.variance_mode) 227 | _, covar = self.model_list_[-1]._predict(X, cov_return_type) 228 | weighted_covars.append(covar) 229 | 230 | # set mean and covariance to be the rank-weighted sum the means and covariances 231 | # of the base models and target model 232 | mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_ 233 | covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2) 234 | return mean_x, covar_x 235 | 236 | def sample_functions(self, X_test: np.ndarray, n_funcs: int=1) -> np.ndarray: 237 | """ 238 | Samples F function values from the current posterior at the N 239 | specified test points. 240 | 241 | Parameters 242 | ---------- 243 | X_test: np.ndarray (N, D) 244 | Input test points 245 | n_funcs: int 246 | Number of function values that are drawn at each test point. 247 | 248 | Returns 249 | ---------- 250 | function_samples: np.array(F, N) 251 | The F function values drawn at the N test points. 252 | """ 253 | 254 | # filter model with zero weights 255 | # weights on covariance matrices are weight**2 256 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 257 | non_zero_weights = self.weights_[non_zero_weight_indices] 258 | # re-normalize 259 | non_zero_weights /= non_zero_weights.sum() 260 | 261 | samples = [] 262 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 263 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 264 | weight = non_zero_weights[non_zero_weight_idx] 265 | 266 | funcs = self.model_list_[raw_idx].sample_functions(X_test, n_funcs) 267 | funcs = funcs * weight 268 | samples.append(funcs) 269 | samples = np.sum(samples, axis=0) 270 | return samples 271 | -------------------------------------------------------------------------------- /rgpe/methods/warmstarting_ac.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, List, Optional, Tuple, Union 3 | 4 | from ConfigSpace import Configuration 5 | import numpy as np 6 | from sklearn.linear_model import SGDRegressor 7 | from smac.configspace import convert_configurations_to_array 8 | from smac.epm.base_epm import AbstractEPM 9 | 10 | from rgpe.utils import get_gaussian_process 11 | 12 | 13 | class WarmstartingAC(AbstractEPM): 14 | 15 | """Weighting method from "Warmstarting of Model-based Algorithm Configuration" by Lindauer 16 | and Hutter, AAAI 2018 17 | 18 | https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17235/15829 19 | """ 20 | 21 | def __init__( 22 | self, 23 | training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]], 24 | variance_mode: str = 'average', 25 | ** kwargs 26 | ): 27 | if kwargs.get('instance_features') is not None: 28 | raise NotImplementedError() 29 | super().__init__(**kwargs) 30 | self.training_data = training_data 31 | 32 | self.rng = np.random.RandomState(self.seed) 33 | self.variance_mode = variance_mode 34 | 35 | base_models = [] 36 | for task in training_data: 37 | model = get_gaussian_process( 38 | bounds=self.bounds, 39 | types=self.types, 40 | configspace=self.configspace, 41 | rng=self.rng, 42 | kernel=None, 43 | ) 44 | Y = training_data[task]['y'] 45 | mean = Y.mean() 46 | std = Y.std() 47 | if std == 0: 48 | std = 1 49 | 50 | y_scaled = (Y - mean) / std 51 | y_scaled = y_scaled.flatten() 52 | configs = training_data[task]['configurations'] 53 | X = convert_configurations_to_array(configs) 54 | 55 | model.train( 56 | X=X, 57 | Y=y_scaled, 58 | ) 59 | base_models.append(model) 60 | self.base_models = base_models 61 | self.sgd = SGDRegressor(random_state=12345, warm_start=True, max_iter=100) 62 | 63 | self.weights_over_time = [] 64 | 65 | def _compute_weights(self, X, y): 66 | if X.shape[0] == 1: 67 | self.weights_ = np.ones(len(self.base_models) + 1) / (len(self.base_models) + 1) 68 | return 69 | predictions = [] 70 | for base_model in self.base_models: 71 | m, _ = base_model.predict(X) 72 | predictions.append(m.flatten()) 73 | loo_predictions = [] 74 | for i in range(X.shape[0]): 75 | X_tmp = list(X) 76 | x_loo = X_tmp[i] 77 | del X_tmp[i] 78 | X_tmp = np.array(X_tmp) 79 | y_tmp = list(y) 80 | del y_tmp[i] 81 | y_tmp = np.array(y_tmp) 82 | self.target_model._train(X_tmp, y_tmp, do_optimize=False) 83 | m, _ = self.target_model.predict(np.array([x_loo])) 84 | loo_predictions.append(m) 85 | predictions.append(np.array(loo_predictions).flatten()) 86 | predictions = np.array(predictions) 87 | self.sgd.fit(predictions.transpose(), y) 88 | self.weights_ = self.sgd.coef_ 89 | # Counteract the following weird failure case: 90 | # * all observations so far have the same value -> normalization makes them all 0.0 91 | # * all predictions via cross-validation have a value of 0.0 92 | # -> this results in SGD having all weights being zero 93 | if np.sum(self.weights_) == 0: 94 | self.weights_[-1] = 1 95 | 96 | def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM: 97 | Y = Y.flatten() 98 | mean = Y.mean() 99 | std = Y.std() 100 | if std == 0: 101 | std = 1 102 | 103 | y_scaled = (Y - mean) / std 104 | self.Y_mean_ = mean 105 | self.Y_std_ = std 106 | 107 | target_model = get_gaussian_process( 108 | bounds=self.bounds, 109 | types=self.types, 110 | configspace=self.configspace, 111 | rng=self.rng, 112 | kernel=None, 113 | ) 114 | self.target_model = target_model.train(X, y_scaled) 115 | self.model_list_ = self.base_models + [target_model] 116 | self._compute_weights(X, Y) 117 | print('Weights', self.weights_) 118 | self.weights_over_time.append(self.weights_) 119 | 120 | # create model and acquisition function 121 | return self 122 | 123 | def _predict(self, X: np.ndarray, cov_return_type) -> Tuple[np.ndarray, np.ndarray]: 124 | 125 | # compute posterior for each model 126 | weighted_means = [] 127 | weighted_covars = [] 128 | 129 | # filter model with zero weights 130 | # weights on covariance matrices are weight**2 131 | non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0] 132 | non_zero_weights = self.weights_[non_zero_weight_indices] 133 | # re-normalize 134 | non_zero_weights /= non_zero_weights.sum() 135 | 136 | for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]): 137 | raw_idx = non_zero_weight_indices[non_zero_weight_idx].item() 138 | weight = non_zero_weights[non_zero_weight_idx] 139 | mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type) 140 | weighted_means.append(weight * mean) 141 | if self.variance_mode == 'average': 142 | weighted_covars.append(covar * weight) 143 | elif self.variance_mode == 'correct-average': 144 | weighted_covars.append(covar * weight ** 2) 145 | elif self.variance_mode == 'target': 146 | if raw_idx + 1 == len(self.weights_): 147 | weighted_covars.append(covar) 148 | else: 149 | raise ValueError() 150 | 151 | if self.variance_mode == 'target': 152 | assert len(weighted_covars) == 1 153 | 154 | # set mean and covariance to be the rank-weighted sum the means and covariances 155 | # of the 156 | # base models and target model 157 | mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_ 158 | covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2) 159 | return mean_x, covar_x 160 | -------------------------------------------------------------------------------- /rgpe/test_functions.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import pickle 4 | import sys 5 | from typing import Dict, Optional 6 | 7 | from ConfigSpace.configuration_space import Configuration, ConfigurationSpace 8 | from ConfigSpace.hyperparameters import ( 9 | UniformFloatHyperparameter, 10 | UniformIntegerHyperparameter, 11 | ) 12 | from hpolib.abstract_benchmark import AbstractBenchmark 13 | import hpolib 14 | import lockfile 15 | 16 | import numpy as np 17 | import scipy.optimize 18 | 19 | 20 | class Alpine1D(AbstractBenchmark): 21 | """Modified Alpine1D function as used in v1: https://arxiv.org/pdf/1802.02219v1.pdf""" 22 | 23 | def __init__(self, task, load_all=True, *args, **kwargs): 24 | super().__init__(*args, **kwargs) 25 | self.task = task 26 | 27 | @AbstractBenchmark._check_configuration 28 | def objective_function(self, configuration: Configuration, **kwargs) -> Dict: 29 | x = configuration['x'] 30 | 31 | shift = kwargs.get('task') 32 | if shift is None: 33 | shift = self.task 34 | shift = shift * np.pi / 12 35 | 36 | rval = (x * np.sin(x + np.pi + shift) + 0.1 * x) 37 | return {'function_value': rval} 38 | 39 | def objective_function_test(self, configuration: Configuration, **kwargs): 40 | raise NotImplementedError 41 | 42 | @staticmethod 43 | def get_configuration_space(): 44 | cs = ConfigurationSpace() 45 | cs.add_hyperparameter(UniformFloatHyperparameter('x', -10, 10)) 46 | return cs 47 | 48 | @staticmethod 49 | def get_meta_information(): 50 | return { 51 | 'num_function_evals': 50, 52 | 'name': 'Modified Alpine 1D', 53 | 'reference': """@inproceedings{feurer-automl18, 54 | author = {Matthias Feurer and Benjamin Letham and Eytan Bakshy}, 55 | title = {Scalable Meta-Learning for Bayesian Optimization using Ranking-Weighted Gaussian Process Ensembles}, 56 | booktitle = {ICML 2018 AutoML Workshop}, 57 | year = {2018}, 58 | month = jul, 59 | } 60 | """, 61 | } 62 | 63 | def get_num_base_tasks(self) -> int: 64 | return 5 65 | 66 | def get_empirical_f_opt(self, task: Optional[int] = None) -> float: 67 | """Return the empirical f_opt. 68 | 69 | Because ``get_meta_information`` is a static function it has no access to the actual 70 | function values predicted by the surrogate. This helper function gives access. 71 | 72 | Returns 73 | ------- 74 | Configuration 75 | """ 76 | if task is None: 77 | task = self.task 78 | 79 | cs = self.get_configuration_space() 80 | bounds = [(-10, 10)] 81 | def target(x, task): 82 | config = Configuration(cs, {'x': x[0]}) 83 | return float(self.objective_function(config, task=task)['function_value']) 84 | res = scipy.optimize.differential_evolution( 85 | func=target, bounds=bounds, args=(task, ), popsize=1000, polish=True, 86 | seed=self.rng, 87 | ) 88 | return res.fun 89 | 90 | def get_empirical_f_worst(self, task: Optional[int] = None) -> float: 91 | """Return the empirical f_worst. 92 | 93 | Because ``get_meta_information`` is a static function it has no access to the actual 94 | function values predicted by the surrogate. This helper function gives access. 95 | 96 | Returns 97 | ------- 98 | Configuration 99 | """ 100 | if task is None: 101 | task = self.task 102 | 103 | cs = self.get_configuration_space() 104 | bounds = [(-10, 10)] 105 | def target(x, task): 106 | try: 107 | config = Configuration(cs, {'x': x[0]}) 108 | return -float(self.objective_function(config, task=task)['function_value']) 109 | except: 110 | return -1e10 111 | res = scipy.optimize.differential_evolution( 112 | func=target, bounds=bounds, args=(task, ), popsize=1000, polish=True, 113 | seed=self.rng, 114 | ) 115 | return -res.fun 116 | 117 | def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False): 118 | # Sample data for each base task 119 | if num_base_tasks is None: 120 | num_base_tasks = self.get_num_base_tasks() 121 | 122 | if fixed_grid: 123 | seed = self.rng.randint(0, 10000) 124 | else: 125 | seed = None 126 | 127 | data_by_task = {} 128 | for task in range(num_base_tasks + 1): 129 | if task == self.task: 130 | continue 131 | 132 | cs = self.get_configuration_space() 133 | if fixed_grid: 134 | cs.seed(seed) 135 | num_training_points = 20 136 | else: 137 | num_training_points = self.rng.randint(low=15, high=25) 138 | cs.seed(self.rng.randint(0, 10000)) 139 | configurations = cs.sample_configuration(num_training_points) 140 | 141 | # get observed values 142 | train_y = [ 143 | self.objective_function(config, task=task)['function_value'] 144 | for config in configurations 145 | ] 146 | train_y = np.array(train_y) 147 | # store training data 148 | data_by_task[task] = { 149 | # scale x to [0, 1] 150 | 'configurations': configurations, 151 | 'y': train_y, 152 | } 153 | 154 | return data_by_task 155 | 156 | 157 | num_dimensions = 3 158 | class Quadratic(AbstractBenchmark): 159 | """Quadratic function as used by Perrone et al., 2018""" 160 | 161 | def __init__(self, task, load_all=True, *args, **kwargs): 162 | super().__init__(*args, **kwargs) 163 | self.task = task 164 | self._functions = dict() 165 | self._sample_coefficients(task) 166 | self._cache_dir = os.path.join(hpolib._config.data_dir, "artificial", "quadratic") 167 | try: 168 | os.makedirs(self._cache_dir) 169 | except: 170 | pass 171 | 172 | def _sample_coefficients(self, task): 173 | rng = np.random.RandomState(task) 174 | coefficients = rng.rand(3) * (10 - 0.1) + 0.1 175 | self._functions[task] = coefficients 176 | 177 | @AbstractBenchmark._check_configuration 178 | def objective_function(self, configuration: Configuration, **kwargs) -> Dict: 179 | x = [] 180 | for i in range(1, num_dimensions + 1): 181 | x.append(configuration['x%d' % i]) 182 | x = np.array(x) 183 | 184 | task = kwargs.get('task') 185 | if task is None: 186 | task = self.task 187 | if task not in self._functions: 188 | self._sample_coefficients(task) 189 | a, b, c = self._functions[task] 190 | 191 | rval = 0.5 * a * np.linalg.norm(x) ** 2 + b * np.sum(x) + 3 * c 192 | return {'function_value': rval} 193 | 194 | def objective_function_test(self, configuration: Configuration, **kwargs): 195 | raise NotImplementedError 196 | 197 | @classmethod 198 | def get_configuration_space(cls): 199 | cs = ConfigurationSpace() 200 | for i in range(1, num_dimensions + 1): 201 | cs.add_hyperparameter(UniformFloatHyperparameter('x%d' % i, -5, 5)) 202 | return cs 203 | 204 | @staticmethod 205 | def get_meta_information(): 206 | return { 207 | 'num_function_evals': 50, 208 | 'name': '3D Quadratic Function', 209 | 'reference': """@incollection{NIPS2018_7917, 210 | title = {Scalable Hyperparameter Transfer Learning}, 211 | author = {Perrone, Valerio and Jenatton, Rodolphe and Seeger, Matthias W and Archambeau, Cedric}, 212 | booktitle = {Advances in Neural Information Processing Systems 31}, 213 | editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett}, 214 | pages = {6845--6855}, 215 | year = {2018}, 216 | publisher = {Curran Associates, Inc.}, 217 | url = {http://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf} 218 | } 219 | """, 220 | } 221 | 222 | def get_num_base_tasks(self) -> int: 223 | return 29 224 | 225 | def get_cache_key(self) -> str: 226 | return '-'.join([str(float(entry)) for entry in self._functions[self.task]]) 227 | 228 | def get_empirical_f_opt(self) -> float: 229 | """Return the empirical f_opt. 230 | 231 | Because ``get_meta_information`` is a static function it has no access to the actual 232 | function values predicted by the surrogate. This helper function gives access. 233 | 234 | Returns 235 | ------- 236 | Configuration 237 | """ 238 | 239 | cache_key = self.get_cache_key() 240 | opt_file_name = os.path.join(self._cache_dir, cache_key + 'opt') 241 | 242 | while True: 243 | try: 244 | if not os.path.exists(opt_file_name): 245 | with lockfile.LockFile(opt_file_name, timeout=10): 246 | 247 | cs = self.get_configuration_space() 248 | bounds = [(-5, 5)] * num_dimensions 249 | 250 | def target(x, task): 251 | try: 252 | config = Configuration( 253 | cs, 254 | {'x%d' % (i + 1): x[i] for i in range(num_dimensions)}, 255 | ) 256 | return float( 257 | self.objective_function(config, task=task)['function_value']) 258 | except: 259 | return 1e10 260 | 261 | res = scipy.optimize.differential_evolution( 262 | func=target, bounds=bounds, args=(self.task,), popsize=1000, 263 | polish=True, 264 | seed=self.rng, 265 | ) 266 | opt = res.fun 267 | 268 | with open(opt_file_name, 'wb') as fh: 269 | pickle.dump(opt, fh) 270 | break 271 | else: 272 | try: 273 | with open(opt_file_name, 'rb') as fh: 274 | opt = pickle.load(fh) 275 | break 276 | except: 277 | continue 278 | except lockfile.LockTimeout: 279 | pass 280 | 281 | return opt 282 | 283 | 284 | def get_empirical_f_worst(self) -> float: 285 | """Return the empirical f_opt. 286 | 287 | Because ``get_meta_information`` is a static function it has no access to the actual 288 | function values predicted by the surrogate. This helper function gives access. 289 | 290 | Returns 291 | ------- 292 | Configuration 293 | """ 294 | 295 | cache_key = self.get_cache_key() 296 | opt_file_name = os.path.join(self._cache_dir, cache_key + 'worst') 297 | 298 | while True: 299 | try: 300 | if not os.path.exists(opt_file_name): 301 | with lockfile.LockFile(opt_file_name, timeout=10): 302 | 303 | cs = self.get_configuration_space() 304 | bounds = [(-5, 5)] * num_dimensions 305 | 306 | def target(x, task): 307 | try: 308 | config = Configuration( 309 | cs, 310 | {'x%d' % (i + 1): x[i] for i in range(num_dimensions)}, 311 | ) 312 | return -float(self.objective_function(config, task=task)['function_value']) 313 | except: 314 | return -1e10 315 | 316 | res = scipy.optimize.differential_evolution( 317 | func=target, bounds=bounds, args=(self.task,), popsize=1000, 318 | polish=True, 319 | seed=self.rng, 320 | ) 321 | opt = -res.fun 322 | 323 | with open(opt_file_name, 'wb') as fh: 324 | pickle.dump(opt, fh) 325 | break 326 | else: 327 | try: 328 | with open(opt_file_name, 'rb') as fh: 329 | opt = pickle.load(fh) 330 | break 331 | except: 332 | continue 333 | except lockfile.LockTimeout: 334 | pass 335 | 336 | return opt 337 | 338 | def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False): 339 | # Sample data for each base task 340 | if num_base_tasks is None: 341 | num_base_tasks = self.get_num_base_tasks() 342 | 343 | if fixed_grid: 344 | seed = self.rng.randint(0, 10000) 345 | else: 346 | seed = None 347 | 348 | data_by_task = {} 349 | for task_ in range(num_base_tasks + 1): 350 | if self.task == task_: 351 | continue 352 | 353 | cs = self.get_configuration_space() 354 | if fixed_grid: 355 | cs.seed(seed) 356 | else: 357 | cs.seed(self.rng.randint(0, 10000)) 358 | configurations = cs.sample_configuration(10) 359 | 360 | # get observed values 361 | train_y = [ 362 | self.objective_function(config, task=task_)['function_value'] 363 | for config in configurations 364 | ] 365 | train_y = np.array(train_y) 366 | # store training data 367 | data_by_task[task_] = { 368 | 'configurations': configurations, 369 | 'y': train_y, 370 | } 371 | 372 | return data_by_task 373 | 374 | 375 | _adaboost_data = None 376 | _svm_data = None 377 | 378 | 379 | class WistubaAndSchillingGrid(AbstractBenchmark): 380 | """Base class for SVM and Adaboost data used by Schilling et al. (ECML 2016) and Wistuba et 381 | al. (ECML 2016).""" 382 | 383 | _file_dir = None 384 | _name = None 385 | _num_hyperparameters = None 386 | _hp_lower_bounds = None 387 | _hp_upper_bounds = None 388 | 389 | def __init__(self, task, load_all=True, *args, **kwargs): 390 | super().__init__(*args, **kwargs) 391 | self.task = task 392 | self._cache_dir = os.path.join(hpolib._config.data_dir, 'WistubaAndSchilling') 393 | try: 394 | os.makedirs(self._cache_dir) 395 | except: 396 | pass 397 | self.data = self._load_data(load_all) 398 | 399 | def _load_data(self, load_all): 400 | global _adaboost_data 401 | global _svm_data 402 | if self._name == 'Adaboost': 403 | if _adaboost_data is not None: 404 | return _adaboost_data 405 | elif self._name == 'SVM': 406 | if _svm_data is not None: 407 | return _svm_data 408 | 409 | data = {} 410 | 411 | current_dir = os.path.abspath(os.path.dirname(__file__)) 412 | data_dir = os.path.join(current_dir, self._file_dir) 413 | files = [ 414 | 'A9A', 'W8A', 'abalone', 'appendicitis', 'australian', 'automobile', 'banana', 415 | 'bands', 'breast-cancer', 'bupa', 'car', 'chess', 'cod-rna', 'coil2000', 416 | 'colon-cancer', 'crx', 'diabetes', 'ecoli', 'german-numer', 'haberman', 417 | 'housevotes', 'ijcnn1', 'kr-vs-k', 'led7digit', 'letter', 'lymphography', 418 | 'magic', 'monk-2', 'pendigits', 'phoneme', 'pima', 'ring', 'saheart', 'segment', 419 | 'seismic', 'shuttle', 'sonar-scale', 'spambase', 'spectfheart', 'splice', 420 | 'tic-tac-toe', 'titanic', 'twonorm', 'usps', 'vehicle', 'wdbc', 'wine', 421 | 'winequality-red', 'wisconsin', 'yeast', 422 | ] 423 | 424 | cache_file = os.path.join(self._cache_dir, self._name + '.pkl') 425 | while True: 426 | 427 | try: 428 | with open(cache_file, 'rb') as fh: 429 | data = pickle.load(fh) 430 | break 431 | except: 432 | pass 433 | 434 | try: 435 | with lockfile.LockFile(cache_file, timeout=10): 436 | 437 | for i, file_name in enumerate(files): 438 | if not load_all and i != self.task: 439 | continue 440 | print(i, file_name) 441 | file_name = os.path.join(data_dir, file_name) 442 | with open(file_name) as fh: 443 | raw_data = fh.readlines() 444 | raw_data = [line.split(' ') for line in raw_data] 445 | targets = [1 - float(line[0]) for line in raw_data] 446 | print(len(raw_data), len(targets)) 447 | print(targets) 448 | configurations = [] 449 | for line in raw_data: 450 | line = line[1:] 451 | line = {int(entry.split(':')[0]): float(entry.split(':')[1]) for 452 | entry in line} 453 | config = Configuration( 454 | values={ 455 | 'x%d' % (j + 1): line.get(j, 0) 456 | for j in range(self._num_hyperparameters) 457 | }, 458 | configuration_space=self.get_configuration_space(), 459 | ) 460 | configurations.append(config) 461 | 462 | data[i] = {config: target for config, target in zip(configurations, targets)} 463 | 464 | with open(cache_file, 'wb') as fh: 465 | pickle.dump(data, fh) 466 | break 467 | except lockfile.LockTimeout: 468 | pass 469 | 470 | # Shuffle data after returning it 471 | for i in data: 472 | configurations = list(data[i].keys()) 473 | targets = list(data[i].values()) 474 | shuffle_indices = self.rng.permutation(list(range(len(configurations)))) 475 | configurations = [configurations[shuffle_indices[j]] for j in 476 | range(len(configurations))] 477 | targets = [targets[shuffle_indices[j]] for j in range(len(targets))] 478 | data[i] = {config: target for config, target in zip(configurations, targets)} 479 | 480 | if self._name == 'Adaboost': 481 | _adaboost_data = data 482 | elif self._name == 'SVM': 483 | _svm_data = data 484 | 485 | return data 486 | 487 | @AbstractBenchmark._check_configuration 488 | def objective_function(self, configuration: Configuration, **kwargs) -> Dict: 489 | print(configuration.origin) 490 | return {'function_value': self.data[self.task][configuration]} 491 | 492 | def objective_function_test(self, configuration: Configuration, **kwargs): 493 | raise NotImplementedError 494 | 495 | @classmethod 496 | def get_configuration_space(cls): 497 | cs = ConfigurationSpace() 498 | for i in range(cls._num_hyperparameters): 499 | cs.add_hyperparameter(UniformFloatHyperparameter( 500 | 'x%d' % (i + 1), 501 | cls._hp_lower_bounds[i], cls._hp_upper_bounds[i])) 502 | return cs 503 | 504 | @classmethod 505 | def get_meta_information(cls): 506 | return { 507 | 'num_function_evals': 50, 508 | 'name': '%s grid data' % cls._name, 509 | 'reference': """""", 510 | } 511 | 512 | def get_num_base_tasks(self) -> int: 513 | return 49 514 | 515 | def get_empirical_f_opt(self) -> float: 516 | """Return the empirical f_opt. 517 | 518 | Because ``get_meta_information`` is a static function it has no access to the actual 519 | function values predicted by the surrogate. This helper function gives access. 520 | 521 | Returns 522 | ------- 523 | Configuration 524 | """ 525 | 526 | return min(list(self.data[self.task].values())) 527 | 528 | def get_empirical_f_worst(self) -> float: 529 | """Return the empirical f_opt. 530 | 531 | Because ``get_meta_information`` is a static function it has no access to the actual 532 | function values predicted by the surrogate. This helper function gives access. 533 | 534 | Returns 535 | ------- 536 | Configuration 537 | """ 538 | 539 | return max(list(self.data[self.task].values())) 540 | 541 | def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False): 542 | # Sample data for each base task 543 | if num_base_tasks is None: 544 | num_base_tasks = self.get_num_base_tasks() 545 | 546 | data_by_task = {} 547 | 548 | if fixed_grid: 549 | indices = self.rng.choice( 550 | len(self.data[0]), 551 | replace=False, 552 | size=self.get_meta_information()['num_function_evals'], 553 | ) 554 | 555 | for task_ in range(num_base_tasks + 1): 556 | if self.task == task_: 557 | continue 558 | 559 | if not fixed_grid: 560 | indices = self.rng.choice( 561 | len(self.data[task_]), 562 | replace=False, 563 | size=self.get_meta_information()['num_function_evals'], 564 | ) 565 | data = self.data[task_] 566 | else: 567 | data = { 568 | key: self.data[task_][key] 569 | for key in sorted(self.data[task_], key=lambda c: np.sum(c.get_array())) 570 | } 571 | 572 | data = [(k, v) for i, (k, v) in enumerate(data.items()) if i in indices] 573 | configurations = [val[0] for val in data] 574 | train_y = np.array([val[1] for val in data]) 575 | 576 | # store training data 577 | data_by_task[task_] = { 578 | 'configurations': configurations, 579 | 'y': train_y, 580 | } 581 | 582 | return data_by_task 583 | 584 | 585 | class AdaboostGrid(WistubaAndSchillingGrid): 586 | _file_dir = 'adaboost' 587 | _name = 'Adaboost' 588 | _num_hyperparameters = 2 589 | _hp_lower_bounds = [0.07525749891599529, 0.2037950470905062] 590 | _hp_upper_bounds = [1, 1] 591 | 592 | 593 | class SVMGrid(WistubaAndSchillingGrid): 594 | _file_dir = 'svm' 595 | _name = 'SVM' 596 | _num_hyperparameters = 6 597 | _hp_lower_bounds = [0, 0, 0, -0.8333333333333334, -1, 0] 598 | _hp_upper_bounds = [1, 1, 1, 1.0, 0.75, 1.0] 599 | 600 | 601 | _nn_data = None 602 | class NNGrid(AbstractBenchmark): 603 | """LCBench as described in Zimmer et al., 2021""" 604 | 605 | def __init__(self, task, load_all=True, *args, **kwargs): 606 | super().__init__(*args, **kwargs) 607 | self.task = task 608 | self._cache_dir = os.path.join(hpolib._config.data_dir, 'LCBench') 609 | try: 610 | os.makedirs(self._cache_dir) 611 | except: 612 | pass 613 | self.data = self._load_data() 614 | 615 | def _load_data(self): 616 | global _nn_data 617 | if _nn_data is not None: 618 | return _nn_data 619 | 620 | data = {} 621 | 622 | allowed_hp_names = self.get_configuration_space().get_hyperparameter_names() 623 | 624 | cache_file = os.path.join(self._cache_dir, 'hpobenchmark.pkl.gz') 625 | while True: 626 | try: 627 | 628 | import time 629 | try: 630 | st = time.time() 631 | with gzip.open(cache_file, 'rb') as fh: 632 | content = fh.read() 633 | data = pickle.loads(content) 634 | print(time.time() - st) 635 | break 636 | except Exception as e: 637 | pass 638 | 639 | with lockfile.LockFile(cache_file, timeout=10): 640 | 641 | sys.path.append('../../LCBench') 642 | from api import Benchmark 643 | 644 | data_dir = '../../LCBench/data_2k_lw.json' 645 | bench = Benchmark(data_dir=data_dir, cache=True, 646 | cache_dir=os.path.dirname(data_dir)) 647 | ds_names = bench.get_dataset_names() 648 | 649 | configuration_space = self.get_configuration_space() 650 | 651 | for i, ds_name in enumerate(ds_names): 652 | print(ds_name) 653 | configurations = [] 654 | targets = [] 655 | 656 | n_configs = bench.get_number_of_configs(ds_name) 657 | if n_configs is None: 658 | raise ValueError( 659 | 'Could not read the number of configs for dataset %s' % ds_name) 660 | 661 | for j in range(n_configs): 662 | try: 663 | config_dict = bench.query(ds_name, 'config', j) 664 | config_dict = { 665 | key: value for key, value in config_dict.items() 666 | if key in allowed_hp_names 667 | } 668 | except ValueError as e: 669 | continue 670 | try: 671 | config = Configuration( 672 | values=config_dict, 673 | configuration_space=configuration_space, 674 | ) 675 | except: 676 | print(config_dict) 677 | continue 678 | configurations.append(config) 679 | val_acc = 1 - bench.query(ds_name, "final_val_balanced_accuracy", j) 680 | targets.append(val_acc) 681 | data[i] = {config: target for config, target in 682 | zip(configurations, targets)} 683 | 684 | with gzip.open(cache_file, 'wb') as fh: 685 | pickle.dump(data, fh) 686 | break 687 | 688 | except lockfile.LockTimeout: 689 | pass 690 | 691 | # Shuffle data after returning it 692 | for i in data: 693 | configurations = list(data[i].keys()) 694 | targets = list(data[i].values()) 695 | shuffle_indices = self.rng.permutation(list(range(len(configurations)))) 696 | configurations = [configurations[shuffle_indices[j]] for j in 697 | range(len(configurations))] 698 | targets = [targets[shuffle_indices[j]] for j in range(len(targets))] 699 | data[i] = {config: target for config, target in zip(configurations, targets)} 700 | 701 | _nn_data = data 702 | 703 | return data 704 | 705 | @AbstractBenchmark._check_configuration 706 | def objective_function(self, configuration: Configuration, **kwargs) -> Dict: 707 | return {'function_value': self.data[self.task][configuration]} 708 | 709 | def objective_function_test(self, configuration: Configuration, **kwargs): 710 | raise NotImplementedError 711 | 712 | @classmethod 713 | def get_configuration_space(cls): 714 | cs = ConfigurationSpace() 715 | cs.add_hyperparameter(UniformIntegerHyperparameter('batch_size', 16, 512, log=True)) 716 | cs.add_hyperparameter(UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, log=True)) 717 | cs.add_hyperparameter(UniformFloatHyperparameter('momentum', 0.1, 0.99)) 718 | cs.add_hyperparameter(UniformFloatHyperparameter('weight_decay', 1e-5, 1e5)) 719 | cs.add_hyperparameter(UniformIntegerHyperparameter('num_layers', 1, 5)) 720 | cs.add_hyperparameter(UniformIntegerHyperparameter('max_units', 16, 1024, log=True)) 721 | cs.add_hyperparameter(UniformFloatHyperparameter('max_dropout', 0.0, 1.0)) 722 | return cs 723 | 724 | @classmethod 725 | def get_meta_information(cls): 726 | return { 727 | 'num_function_evals': 50, 728 | 'name': 'Neural Network grid data', 729 | 'reference': """""", 730 | } 731 | 732 | def get_num_base_tasks(self) -> int: 733 | n_base_tasks = len(self.data) 734 | assert n_base_tasks == 35 735 | return n_base_tasks - 1 736 | 737 | def get_empirical_f_opt(self) -> float: 738 | """Return the empirical f_opt. 739 | Because ``get_meta_information`` is a static function it has no access to the actual 740 | function values predicted by the surrogate. This helper function gives access. 741 | Returns 742 | ------- 743 | Configuration 744 | """ 745 | 746 | return min(list(self.data[self.task].values())) 747 | 748 | def get_empirical_f_worst(self) -> float: 749 | """Return the empirical f_opt. 750 | Because ``get_meta_information`` is a static function it has no access to the actual 751 | function values predicted by the surrogate. This helper function gives access. 752 | Returns 753 | ------- 754 | Configuration 755 | """ 756 | 757 | return max(list(self.data[self.task].values())) 758 | 759 | def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False): 760 | # Sample data for each base task 761 | if num_base_tasks is None: 762 | num_base_tasks = self.get_num_base_tasks() 763 | 764 | if fixed_grid: 765 | indices = self.rng.choice( 766 | len(self.data[0]), 767 | replace=False, 768 | size=self.get_meta_information()['num_function_evals'], 769 | ) 770 | 771 | data_by_task = {} 772 | for task_ in range(num_base_tasks + 1): 773 | if self.task == task_: 774 | continue 775 | 776 | if not fixed_grid: 777 | indices = self.rng.choice( 778 | len(self.data[task_]), 779 | replace=False, 780 | size=self.get_meta_information()['num_function_evals'], 781 | ) 782 | data = self.data[task_] 783 | else: 784 | data = { 785 | key: self.data[task_][key] 786 | for key in sorted(self.data[task_], key=lambda c: np.sum(c.get_array())) 787 | } 788 | 789 | data = [(k, v) for i, (k, v) in enumerate(data.items()) if i in indices] 790 | configurations = [val[0] for val in data] 791 | train_y = np.array([val[1] for val in data]) 792 | 793 | # store training data 794 | data_by_task[task_] = { 795 | 'configurations': configurations, 796 | 'y': train_y, 797 | } 798 | 799 | return data_by_task 800 | -------------------------------------------------------------------------------- /rgpe/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import typing 3 | 4 | import botorch.sampling.qmc 5 | from ConfigSpace import ConfigurationSpace 6 | import numpy as np 7 | import scipy as sp 8 | from scipy.stats import norm 9 | from smac.epm.base_epm import AbstractEPM 10 | from smac.epm.gaussian_process import GaussianProcess 11 | from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior 12 | from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel 13 | from smac.optimizer.acquisition import AbstractAcquisitionFunction 14 | 15 | 16 | def get_gaussian_process( 17 | configspace: ConfigurationSpace, 18 | types: typing.List[int], 19 | bounds: typing.List[typing.Tuple[float, float]], 20 | rng: np.random.RandomState, 21 | kernel, 22 | ) -> GaussianProcess: 23 | """Get the default GP class from SMAC. Sets the kernel and its hyperparameters for the 24 | problem at hand.""" 25 | 26 | if kernel is None: 27 | cov_amp = ConstantKernel( 28 | 2.0, 29 | constant_value_bounds=(np.exp(-10), np.exp(2)), 30 | prior=LognormalPrior(mean=0.0, sigma=1, rng=rng), 31 | ) 32 | 33 | cont_dims = np.where(np.array(types) == 0)[0] 34 | cat_dims = np.where(np.array(types) != 0)[0] 35 | 36 | if len(cont_dims) > 0: 37 | exp_kernel = Matern( 38 | np.ones([len(cont_dims)]), 39 | [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in 40 | range(len(cont_dims))], 41 | nu=2.5, 42 | operate_on=cont_dims, 43 | ) 44 | 45 | if len(cat_dims) > 0: 46 | ham_kernel = HammingKernel( 47 | np.ones([len(cat_dims)]), 48 | [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in 49 | range(len(cat_dims))], 50 | operate_on=cat_dims, 51 | ) 52 | 53 | assert len(cat_dims) + len(cont_dims) == len(configspace.get_hyperparameters()), ( 54 | len(cat_dims) + len(cont_dims), len(configspace.get_hyperparameters()) 55 | ) 56 | 57 | noise_kernel = WhiteKernel( 58 | noise_level=1e-8, 59 | noise_level_bounds=(np.exp(-25), np.exp(2)), 60 | prior=HorseshoePrior(scale=0.1, rng=rng), 61 | ) 62 | 63 | if len(cont_dims) > 0 and len(cat_dims) > 0: 64 | # both 65 | kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel 66 | elif len(cont_dims) > 0 and len(cat_dims) == 0: 67 | # only cont 68 | kernel = cov_amp * exp_kernel + noise_kernel 69 | elif len(cont_dims) == 0 and len(cat_dims) > 0: 70 | # only cont 71 | kernel = cov_amp * ham_kernel + noise_kernel 72 | else: 73 | raise ValueError() 74 | else: 75 | kernel = copy.deepcopy(kernel) 76 | 77 | gp = GaussianProcess( 78 | kernel=kernel, 79 | normalize_y=True, 80 | seed=rng.randint(0, 2 ** 20), 81 | types=types, 82 | bounds=bounds, 83 | configspace=configspace, 84 | ) 85 | return gp 86 | 87 | 88 | def sample_sobol(loo_model, locations, num_samples, engine_seed): 89 | """Sample from a Sobol sequence. Wraps the sampling to deal with the issue that the predictive 90 | covariance matrix might not be decomposable and fixes this by adding a small amount of noise 91 | to the diagonal.""" 92 | 93 | y_mean, y_cov = loo_model.predict(locations, cov_return_type='full_cov') 94 | initial_noise = 1e-14 95 | while initial_noise < 1: 96 | try: 97 | L = np.linalg.cholesky(y_cov + np.eye(len(locations)) * 1e-14) 98 | break 99 | except np.linalg.LinAlgError: 100 | initial_noise *= 10 101 | continue 102 | if initial_noise >= 1: 103 | rval = np.tile(y_mean, reps=num_samples).transpose() 104 | return rval 105 | 106 | engine = botorch.sampling.qmc.NormalQMCEngine(len(y_mean), seed=engine_seed, ) 107 | samples_alt = y_mean.flatten() + (engine.draw(num_samples).numpy() @ L) 108 | return samples_alt 109 | 110 | 111 | def copula_transform(values: np.ndarray) -> np.ndarray: 112 | 113 | """Copula transformation from "A Quantile-based Approach for Hyperparameter Transfer Learning" 114 | by Salinas, Shen and Perrone, ICML 2020""" 115 | 116 | quants = (sp.stats.rankdata(values.flatten()) - 1) / (len(values) - 1) 117 | cutoff = 1 / (4 * np.power(len(values), 0.25) * np.sqrt(np.pi * np.log(len(values)))) 118 | quants = np.clip(quants, a_min=cutoff, a_max=1-cutoff) 119 | # Inverse Gaussian CDF 120 | rval = np.array([sp.stats.norm.ppf(q) for q in quants]).reshape((-1, 1)) 121 | return rval 122 | 123 | 124 | class EI(AbstractAcquisitionFunction): 125 | 126 | """Computes for a given x the expected improvement as acquisition value. 127 | 128 | Uses only the target model of the ensemble to find ``x_best`` 129 | """ 130 | 131 | def __init__(self, 132 | model: AbstractEPM, 133 | par: float = 0.0): 134 | 135 | super(EI, self).__init__(model) 136 | self.long_name = 'Expected Improvement' 137 | self.par = par 138 | self._required_updates = ('model', ) 139 | 140 | def _compute(self, X: np.ndarray) -> np.ndarray: 141 | 142 | if len(X.shape) == 1: 143 | X = X[:, np.newaxis] 144 | 145 | eta = np.min(self.model.target_model.predict_marginalized_over_instances(X)) 146 | eta = eta * self.model.Y_std_ + self.model.Y_mean_ 147 | 148 | m, v = self.model.predict_marginalized_over_instances(X) 149 | print(eta, np.min(m)) 150 | s = np.sqrt(v) 151 | 152 | def calculate_f(): 153 | z = (eta - m - self.par) / s 154 | return (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z) 155 | 156 | if np.any(s == 0.0): 157 | # if std is zero, we have observed x on all instances 158 | # using a RF, std should be never exactly 0.0 159 | # Avoid zero division by setting all zeros in s to one. 160 | # Consider the corresponding results in f to be zero. 161 | self.logger.warning("Predicted std is 0.0 for at least one sample.") 162 | s_copy = np.copy(s) 163 | s[s_copy == 0.0] = 1.0 164 | f = calculate_f() 165 | f[s_copy == 0.0] = 0.0 166 | else: 167 | f = calculate_f() 168 | if (f < 0).any(): 169 | raise ValueError( 170 | "Expected Improvement is smaller than 0 for at least one " 171 | "sample.") 172 | return f 173 | 174 | -------------------------------------------------------------------------------- /scripts/generate_commands.py: -------------------------------------------------------------------------------- 1 | """Generate commands to reproduce experiments.""" 2 | 3 | import argparse 4 | import glob 5 | import itertools 6 | import os 7 | import random 8 | from typing import List 9 | 10 | parser = argparse.ArgumentParser() 11 | 12 | benchmarks = { 13 | 'alpine': (1, 50), 14 | 'quadratic': (30, 15), 15 | 'adaboost': (50, 15), 16 | 'svm': (50, 15), 17 | 'openml-glmnet': (38, 15), 18 | 'openml-svm': (38, 15), 19 | 'openml-xgb': (38, 15), 20 | 'nn': (35, 15), 21 | } 22 | 23 | normalization_to_initial_design = { 24 | 'None': 'unscaled', 25 | 'mean/var': 'scaled', 26 | 'Copula': 'copula', 27 | } 28 | 29 | all_setups = { 30 | "": "", 31 | "-learnedinit": "--learned-initial-design {learned_initial_design}", 32 | "-gpmetadata": "--empirical-meta-configs", 33 | "-gpmetadata-learnedinit": "--empirical-meta-configs --learned-initial-design {learned_initial_design}", 34 | "-gridmetadata": "--grid-meta-configs", 35 | "-gridmetadata-learnedinit": "--grid-meta-configs --learned-initial-design {learned_initial_design}" 36 | } 37 | all_setups_args = [setup[1:] if len(setup) > 0 else setup for setup in all_setups] + ['None'] 38 | 39 | parser.add_argument( 40 | '--benchmark', 41 | choices=benchmarks.keys(), 42 | required=True, 43 | help="Which benchmark to create the commands file for." 44 | ) 45 | parser.add_argument( 46 | '--setup', 47 | choices=all_setups_args, 48 | nargs='*', 49 | help="For which setup of meta-data (grid, from the pre-evaluated grid; gp, from a previous " 50 | "run of the GP) and learned init or not to create the commands." 51 | ) 52 | parser.add_argument( 53 | '--results-directory', 54 | type=str, 55 | help="If given, this script will check which output files already exist and not add those " 56 | "call to the commands file." 57 | ) 58 | 59 | args = parser.parse_args() 60 | 61 | results_dir = args.results_directory 62 | 63 | if results_dir: 64 | glob_dir = '%s/*/*/*' % glob.escape(results_dir) 65 | available_files = glob.glob(glob_dir) 66 | to_drop = len(results_dir) 67 | available_files = set([available_file[to_drop:] for available_file in available_files]) 68 | else: 69 | available_file = [] 70 | 71 | def add_seeds_and_tasks( 72 | template: str, 73 | n_seeds: int, 74 | n_tasks: int, 75 | relative_output_file_template: str, 76 | ) -> List[str]: 77 | rval = [] 78 | for seed in range(n_seeds): 79 | for task_id in range(n_tasks): 80 | 81 | if results_dir: 82 | relative_output_file = relative_output_file_template.format(seed=seed, task_id=task_id) 83 | # output_file = os.path.join(results_dir, relative_output_file) 84 | if relative_output_file in available_files: 85 | continue 86 | 87 | rval.append(template.format(seed=seed, task_id=task_id)) 88 | return rval 89 | 90 | output_directory = "/home/feurerm/projects/2018_fb/results_smac" 91 | #output_directory = "/work/ws/nemo/fr_mf1066-2019_rgpe-0/" 92 | run_script = "python /home/feurerm/sync_dir/projects/2018_fb/rgpe_code/scripts/run_benchmark_smac.py" 93 | #run_script = "python /home/fr/fr_fr/fr_mf1066/repositories/2019_rgpe/rgpe/scripts/run_benchmark_smac.py" 94 | 95 | for benchmark, (n_tasks, n_seeds) in benchmarks.items(): 96 | if benchmark != args.benchmark: 97 | continue 98 | setups_args = args.setup 99 | for i in range(len(setups_args)): 100 | if setups_args[i] != 'None': 101 | setups_args[i] = '-' + setups_args[i] 102 | setups = {} 103 | for setup in all_setups: 104 | if setup in setups_args: 105 | setups[setup] = all_setups[setup] 106 | if 'None' in setups_args: 107 | setups[''] = all_setups[''] 108 | print(setups) 109 | 110 | commands = [] 111 | 112 | # Random search 113 | n_init = 1 114 | for multiplier in (1, 50): 115 | for setup_name, setup_string in setups.items(): 116 | for search_space_pruning in (False, True): 117 | if 'learned' in setup_name: 118 | continue 119 | elif not search_space_pruning and setup_name != '': 120 | continue 121 | 122 | filename = "{seed}_50_{task_id}.json" 123 | if search_space_pruning: 124 | method_name = "random%d-ssp%s-%d" % (multiplier, setup_name, n_init) 125 | else: 126 | method_name = "random%s%s-%d" % (multiplier, setup_name, n_init) 127 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 128 | output_file = os.path.join( 129 | output_directory, benchmark, method_name, filename) 130 | 131 | command = ( 132 | "{run_script} --benchmark {benchmark} --method random --seed {seed} " 133 | "--task {task_id} --iteration-multiplier {multiplier} --n-init {n_init} " 134 | "--output-file {output_file} {setup_string}" 135 | ) 136 | if search_space_pruning: 137 | command += " --search-space-pruning complete" 138 | 139 | template = command.format(**{ 140 | 'run_script': run_script, 141 | 'benchmark': benchmark, 142 | 'multiplier': multiplier, 143 | 'output_file': output_file, 144 | 'seed': '{seed}', 145 | 'n_init': n_init, 146 | 'task_id': '{task_id}', 147 | 'setup_string': setup_string, 148 | }) 149 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 150 | 151 | # Baseline 152 | for n_init in (10, 50): 153 | for method in ('gpmap', 'gcp'): 154 | for search_space_pruning in (False, True): 155 | for setup_name, setup_string in setups.items(): 156 | 157 | filename = "{seed}_50_{task_id}.json" 158 | 159 | if search_space_pruning: 160 | method_name = "%s-ssp%s-%d" % (method, setup_name, n_init) 161 | else: 162 | method_name = "%s%s-%d" % (method, setup_name, n_init) 163 | output_file = os.path.join( 164 | output_directory, benchmark, method_name, filename) 165 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 166 | command = ( 167 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 168 | "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file}" 169 | ) 170 | if search_space_pruning: 171 | command += " --search-space-pruning complete" 172 | template = command.format(**{ 173 | 'method': method, 174 | 'run_script': run_script, 175 | 'benchmark': benchmark, 176 | 'output_file': output_file, 177 | 'seed': '{seed}', 178 | 'task_id': '{task_id}', 179 | 'setup_string': setup_string.format( 180 | learned_initial_design='scaled' if method == 'gpmap' else 'copula', 181 | ), 182 | 'n_init': n_init, 183 | }) 184 | if benchmark == 'alpine' and method == 'gpmap' and setup_name == '': 185 | commands.extend(add_seeds_and_tasks(template, n_seeds, 6, relative_output_file_template)) 186 | pass 187 | else: 188 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 189 | pass 190 | 191 | # ABLR 192 | method = 'ablr' 193 | for n_init in (1, ): 194 | for normalization in ('mean/var', 'Copula'): 195 | for setup_name, setup_string in setups.items(): 196 | if 'learnedinit' not in setup_name: 197 | continue 198 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 199 | n_init_ = 2 200 | else: 201 | n_init_ = 1 202 | filename = "{seed}_50_{task_id}.json" 203 | output_file = os.path.join( 204 | output_directory, benchmark, 205 | "%s-%s%s-%d" % (method, normalization.replace('/', ''), 206 | setup_name, n_init_), filename) 207 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 208 | command = ( 209 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 210 | "--normalization {normalization} " 211 | "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file}" 212 | ) 213 | template = command.format(**{ 214 | 'method': method, 215 | 'run_script': run_script, 216 | 'benchmark': benchmark, 217 | 'output_file': output_file, 218 | 'seed': '{seed}', 219 | 'task_id': '{task_id}', 220 | 'n_init': n_init_, 221 | 'setup_string': setup_string.format( 222 | learned_initial_design=normalization_to_initial_design[normalization] 223 | ), 224 | 'normalization': normalization, 225 | }) 226 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 227 | 228 | # TSTR 229 | method = 'tstr' 230 | for n_init in (1, ): 231 | for weight_dilution_strategy in ('None', 'probabilistic-ld'): 232 | for bandwidth in (0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0): 233 | for setup_name, setup_string in setups.items(): 234 | for normalization in ('mean/var', 'Copula', 'None'): 235 | for acquisition_function in ('targetEI', 'fullmodeltargetEI',): 236 | if 'learnedinit' not in setup_name: 237 | continue 238 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 239 | n_init_ = 2 240 | else: 241 | n_init_ = n_init 242 | filename = "{seed}_50_{task_id}.json" 243 | method_name = "%s-%s-%s-%s-%f%s-%d" % ( 244 | method, acquisition_function, normalization.replace('/', ''), 245 | weight_dilution_strategy, 246 | bandwidth, setup_name, n_init_ 247 | ) 248 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 249 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 250 | command = ( 251 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 252 | "--task {task_id} {setup_string} --n-init {n_init} --bandwidth {bandwidth} " 253 | "--normalization {normalization} --output-file {output_file} " 254 | "--weight_dilution_strategy {weight_dilution_strategy} " 255 | ) 256 | if acquisition_function == 'targetEI': 257 | command = '%s --variance-mode target --acquisition-function-name EI' % command 258 | elif acquisition_function == 'fullmodeltargetEI': 259 | command = '%s --variance-mode target --acquisition-function-name fullmodelEI' % command 260 | else: 261 | raise ValueError(acquisition_function) 262 | 263 | template = command.format(**{ 264 | 'method': method, 265 | 'run_script': run_script, 266 | 'benchmark': benchmark, 267 | 'output_file': output_file, 268 | 'seed': '{seed}', 269 | 'task_id': '{task_id}', 270 | 'n_init': n_init_, 271 | 'setup_string': setup_string.format( 272 | learned_initial_design=normalization_to_initial_design[normalization] 273 | ), 274 | 'bandwidth': bandwidth, 275 | 'normalization': normalization, 276 | 'weight_dilution_strategy': weight_dilution_strategy, 277 | }) 278 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 279 | 280 | # KL-dirvergence-based distance measure 281 | method = 'klweighting' 282 | if benchmark not in ['openml-svm', 'openml-xgb']: 283 | for n_init in (2, ): 284 | for eta in (1, 2, 5, 10, 20, 50, 100): 285 | for setup_name, setup_string in setups.items(): 286 | if 'learnedinit' not in setup_name: 287 | continue 288 | filename = "{seed}_50_{task_id}.json" 289 | method_name = "%s-%f%s-%d" % (method, eta, setup_name, n_init) 290 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 291 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 292 | command = ( 293 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 294 | "--task {task_id} {setup_string} --n-init {n_init} --eta {eta} " 295 | "--output-file {output_file} " 296 | ) 297 | template = command.format(**{ 298 | 'method': method, 299 | 'run_script': run_script, 300 | 'benchmark': benchmark, 301 | 'output_file': output_file, 302 | 'seed': '{seed}', 303 | 'task_id': '{task_id}', 304 | 'n_init': n_init, 305 | 'setup_string': setup_string.format(learned_initial_design='scaled'), 306 | 'eta': eta, 307 | }) 308 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 309 | 310 | # WAC 311 | for n_init in (2, ): 312 | for setup_name, setup_string in setups.items(): 313 | if 'learnedinit' not in setup_name: 314 | continue 315 | filename = "{seed}_50_{task_id}.json" 316 | method_name = "wac%s-%d" % (setup_name, n_init) 317 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 318 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 319 | command = ( 320 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 321 | "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file} " 322 | "--acquisition-function-name EI" 323 | ) 324 | template = command.format(**{ 325 | 'method': 'wac', 326 | 'run_script': run_script, 327 | 'benchmark': benchmark, 328 | 'output_file': output_file, 329 | 'seed': '{seed}', 330 | 'task_id': '{task_id}', 331 | 'n_init': n_init, 332 | 'setup_string': setup_string.format(learned_initial_design='scaled'), 333 | }) 334 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 335 | 336 | # GCP+Prior 337 | for n_init in (2, ): 338 | for setup_name, setup_string in setups.items(): 339 | if 'learnedinit' not in setup_name: 340 | continue 341 | filename = "{seed}_50_{task_id}.json" 342 | method_name = "gcp+prior%s-%d" % (setup_name, n_init) 343 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 344 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 345 | command = ( 346 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 347 | "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file} " 348 | "--acquisition-function-name EI" 349 | ) 350 | template = command.format(**{ 351 | 'method': 'gcp+prior', 352 | 'run_script': run_script, 353 | 'benchmark': benchmark, 354 | 'output_file': output_file, 355 | 'seed': '{seed}', 356 | 'task_id': '{task_id}', 357 | 'n_init': n_init, 358 | 'setup_string': setup_string.format(learned_initial_design='copula'), 359 | }) 360 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 361 | 362 | # TAF-TSTR 363 | method = 'taf' 364 | for n_init in (1, ): 365 | for weight_dilution_strategy in ('None', 'probabilistic-ld'): 366 | for bandwidth in (0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0): 367 | for setup_name, setup_string in setups.items(): 368 | for normalization in ['None', 'Copula', 'mean/var']: 369 | if 'learnedinit' not in setup_name: 370 | continue 371 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 372 | n_init_ = 2 373 | else: 374 | n_init_ = n_init 375 | filename = "{seed}_50_{task_id}.json" 376 | method_name = "%s-tstr-%s-%s-%f%s-%d" % ( 377 | method, normalization.replace('/', ''), weight_dilution_strategy, 378 | bandwidth, setup_name, n_init_, 379 | ) 380 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 381 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 382 | command = ( 383 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 384 | "--task {task_id} {setup_string} --bandwidth {bandwidth} " 385 | "--weighting-mode tstr --n-init {n_init} --normalization {normalization} " 386 | "--weight_dilution_strategy {weight_dilution_strategy} " 387 | "--output-file {output_file} " 388 | ) 389 | template = command.format(**{ 390 | 'method': method, 391 | 'run_script': run_script, 392 | 'benchmark': benchmark, 393 | 'output_file': output_file, 394 | 'seed': '{seed}', 395 | 'task_id': '{task_id}', 396 | 'n_init': n_init_, 397 | 'setup_string': setup_string.format( 398 | learned_initial_design=normalization_to_initial_design[normalization]), 399 | 'bandwidth': bandwidth, 400 | 'normalization': normalization, 401 | 'weight_dilution_strategy': weight_dilution_strategy, 402 | }) 403 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 404 | 405 | # TAF-RGPE 406 | method = 'taf' 407 | for n_init in (1, ): 408 | for weight_dilution_strategy in ('None', 'probabilistic-ld'): 409 | for sampling_strategy in ('bootstrap', ): 410 | for normalization in ('None', 'Copula', 'mean/var'): 411 | for setup_name, setup_string in setups.items(): 412 | if 'learnedinit' not in setup_name and n_init == 1: 413 | continue 414 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 415 | n_init_ = 2 416 | else: 417 | n_init_ = n_init 418 | filename = "{seed}_50_{task_id}.json" 419 | method_name = "%s-rgpe-%s-%s-%s-1000-%s-%d" % ( 420 | method, sampling_strategy, normalization.replace('/', ''), 421 | weight_dilution_strategy, setup_name, n_init_, 422 | ) 423 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 424 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 425 | command = ( 426 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 427 | "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} " 428 | "--sampling-mode {sampling_strategy} " 429 | "--weighting-mode rgpe --n-init {n_init} --output-file {output_file} " 430 | "--normalization {normalization}" 431 | ) 432 | 433 | template = command.format(**{ 434 | 'method': method, 435 | 'run_script': run_script, 436 | 'benchmark': benchmark, 437 | 'output_file': output_file, 438 | 'seed': '{seed}', 439 | 'task_id': '{task_id}', 440 | 'n_init': n_init_, 441 | 'setup_string': setup_string.format( 442 | learned_initial_design=normalization_to_initial_design[normalization]), 443 | 'weight_dilution_strategy': weight_dilution_strategy, 444 | 'sampling_strategy': sampling_strategy, 445 | 'normalization': normalization, 446 | }) 447 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 448 | 449 | # RGPE with Mixture of Gaussian Processes 450 | for method in ('rmogp', ): 451 | for n_init in (1, ): 452 | for ( 453 | weight_dilution_strategy, sampling_strategy, use_expectation, 454 | use_global_incumbent, num_posterior_samples, alpha, normalization, 455 | ) in ( 456 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'None'), 457 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'mean/var'), 458 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'Copula'), 459 | ('None', 'bootstrap', 'True', 'False', 1000, 0.0, 'mean/var'), 460 | ('None', 'bootstrap', 'True', 'False', 1000, 0.0, 'None'), 461 | ('probabilistic-ld', 'correct', 'True', 'False', 1000, 0.0, 'mean/var'), 462 | ('probabilistic-ld', 'correct', 'True', 'False', 1000, 0.0, 'None'), 463 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 100, 0.0, 'mean/var'), 464 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 100, 0.0, 'None'), 465 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 10000, 0.0, 'mean/var'), 466 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 10000, 0.0, 'None'), 467 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 100000, 0.0, 'mean/var'), 468 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 100000, 0.0, 'None'), 469 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.1, 'mean/var'), 470 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.1, 'None'), 471 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.2, 'mean/var'), 472 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.2, 'None'), 473 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.5, 'mean/var'), 474 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.5, 'None'), 475 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 1, 'mean/var'), 476 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 1, 'None'), 477 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 2, 'mean/var'), 478 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 2, 'None'), 479 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 5, 'mean/var'), 480 | ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 5, 'None'), 481 | ): 482 | 483 | for setup_name, setup_string in setups.items(): 484 | if 'learnedinit' not in setup_name and n_init == 1: 485 | continue 486 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 487 | n_init_ = 2 488 | else: 489 | n_init_ = n_init 490 | filename = "{seed}_50_{task_id}.json" 491 | method_name = "%s-rgpe-%s-%s-%s-%s-%s-%d-%s%s-%d" % ( 492 | method, sampling_strategy, normalization.replace('/', ''), 493 | weight_dilution_strategy, 494 | 'expectation' if use_expectation == 'True' else 'improvement', 495 | 'global' if use_global_incumbent == 'True' else 'local', 496 | num_posterior_samples, alpha, setup_name, n_init_, 497 | ) 498 | output_file = os.path.join(output_directory, benchmark, method_name, filename) 499 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 500 | command = ( 501 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 502 | "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} " 503 | "--sampling-mode {sampling_strategy} " 504 | "--use-expectation {use_expectation} " 505 | "--use-global-incumbent {use_global_incumbent} " 506 | "--weighting-mode rgpe --n-init {n_init} --output-file {output_file} " 507 | "--normalization {normalization} --num-posterior-samples {num_posterior_samples} " 508 | "--alpha {alpha}" 509 | ) 510 | 511 | template = command.format(**{ 512 | 'method': method, 513 | 'run_script': run_script, 514 | 'benchmark': benchmark, 515 | 'output_file': output_file, 516 | 'seed': '{seed}', 517 | 'task_id': '{task_id}', 518 | 'n_init': n_init_, 519 | 'setup_string': setup_string.format( 520 | learned_initial_design=normalization_to_initial_design[normalization]), 521 | 'weight_dilution_strategy': weight_dilution_strategy, 522 | 'sampling_strategy': sampling_strategy, 523 | 'normalization': normalization, 524 | 'num_posterior_samples': num_posterior_samples, 525 | 'use_global_incumbent': use_global_incumbent, 526 | 'use_expectation': use_expectation, 527 | 'alpha': alpha, 528 | }) 529 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 530 | 531 | 532 | # RGPE 533 | method = 'rgpe' 534 | for n_init in (1, ): 535 | for sampling_strategy in ('bootstrap', ): 536 | for weight_dilution_strategy in ('None', 'probabilistic-ld'): 537 | for acquisition_function in ('NoisyEI', 'fullmodelNoisyEI', 538 | 'targetEI', 'fullmodeltargetEI', 539 | 'CFNEI', 540 | 'EI', 'fullmodelEI'): 541 | for normalization in ('mean/var', 'Copula', 'None'): 542 | for setup_name, setup_string in setups.items(): 543 | if 'learnedinit' not in setup_name: 544 | continue 545 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 546 | n_init_ = 2 547 | else: 548 | n_init_ = n_init 549 | filename = "{seed}_50_{task_id}.json" 550 | method_name = "%s-%s-%s-%s-%s-1000-%s-%d" % ( 551 | method, sampling_strategy, normalization.replace('/', ''), 552 | weight_dilution_strategy, acquisition_function, setup_name, 553 | n_init_) 554 | output_file = os.path.join( 555 | output_directory, benchmark, method_name, filename, 556 | ) 557 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 558 | command = ( 559 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 560 | "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} " 561 | "--sampling-mode {sampling_strategy} --normalization {normalization} " 562 | "--num-posterior-samples 1000 --n-init {n_init} --output-file {output_file} " 563 | "--variance-mode average " 564 | ) 565 | if acquisition_function == 'targetEI': 566 | command = '%s --variance-mode target --acquisition-function-name EI' % command 567 | elif acquisition_function == 'fullmodeltargetEI': 568 | command = '%s --variance-mode target --acquisition-function-name fullmodelEI' % command 569 | elif acquisition_function == 'NoisyEI': 570 | command = '%s --acquisition-function-name 30 ' \ 571 | '--target-model-incumbent True' % command 572 | elif acquisition_function == 'fullmodelNoisyEI': 573 | command = '%s --acquisition-function-name 30 ' \ 574 | '--target-model-incumbent False' % command 575 | else: 576 | command = '%s --acquisition-function-name %s' % (command, acquisition_function) 577 | 578 | template = command.format(**{ 579 | 'method': method, 580 | 'run_script': run_script, 581 | 'benchmark': benchmark, 582 | 'output_file': output_file, 583 | 'seed': '{seed}', 584 | 'task_id': '{task_id}', 585 | 'n_init': n_init_, 586 | 'setup_string': setup_string.format( 587 | learned_initial_design=normalization_to_initial_design[normalization]), 588 | 'weight_dilution_strategy': weight_dilution_strategy, 589 | 'sampling_strategy': sampling_strategy, 590 | 'acquisition_function': acquisition_function, 591 | 'normalization': normalization, 592 | }) 593 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 594 | 595 | # Old RGPE 596 | method = 'rgpe' 597 | for n_init in (1, ): 598 | for weight_dilution_strategy in ('None', '95'): 599 | for sampling_strategy in ('correct', ): 600 | for acquisition_function in ('fullmodelNoisyEI', ): 601 | for normalization in ('mean/var', 'Copula', 'None'): 602 | for setup_name, setup_string in setups.items(): 603 | if 'learnedinit' not in setup_name: 604 | continue 605 | if normalization in ['Copula', 'mean/var'] and n_init < 2: 606 | n_init_ = 2 607 | else: 608 | n_init_ = n_init 609 | filename = "{seed}_50_{task_id}.json" 610 | method_name = "%s-%s-%s-%s-%s-1000-%s-%d" % ( 611 | method, sampling_strategy, normalization.replace('/', ''), 612 | weight_dilution_strategy, acquisition_function, setup_name, 613 | n_init_) 614 | output_file = os.path.join( 615 | output_directory, benchmark, method_name, filename, 616 | ) 617 | relative_output_file_template = os.path.join(benchmark, method_name, filename) 618 | command = ( 619 | "{run_script} --benchmark {benchmark} --method {method} --seed {seed} " 620 | "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} " 621 | "--sampling-mode {sampling_strategy} --normalization {normalization} " 622 | "--num-posterior-samples 1000 --n-init {n_init} --output-file {output_file} " 623 | "--variance-mode average " 624 | ) 625 | 626 | if acquisition_function == 'NoisyEI': 627 | command = '%s --acquisition-function-name 30 ' \ 628 | '--target-model-incumbent True' % command 629 | elif acquisition_function == 'fullmodelNoisyEI': 630 | command = '%s --acquisition-function-name 30 ' \ 631 | '--target-model-incumbent False' % command 632 | else: 633 | raise ValueError(acquisition_function) 634 | 635 | template = command.format(**{ 636 | 'method': method, 637 | 'run_script': run_script, 638 | 'benchmark': benchmark, 639 | 'output_file': output_file, 640 | 'seed': '{seed}', 641 | 'task_id': '{task_id}', 642 | 'n_init': n_init_, 643 | 'setup_string': setup_string.format( 644 | learned_initial_design=normalization_to_initial_design[normalization]), 645 | 'weight_dilution_strategy': weight_dilution_strategy, 646 | 'sampling_strategy': sampling_strategy, 647 | 'acquisition_function': acquisition_function, 648 | 'normalization': normalization, 649 | }) 650 | commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template)) 651 | 652 | random.shuffle(commands) 653 | print(len(commands)) 654 | string = "\n".join(commands) 655 | 656 | commands_file = os.path.join(output_directory, benchmark, 'commands.txt') 657 | print(commands_file) 658 | with open(commands_file, 'w') as fh: 659 | fh.write(string) 660 | -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | conda create -n rgpe -y 2 | source activate rgpe 3 | which conda 4 | conda env list 5 | conda install python=3.7 numpy=1.18.1 scipy=1.4.1 scikit-learn=0.22.1 gxx_linux-64 gcc_linux-64 \ 6 | swig cython=0.29.13 ipython jupyter matplotlib pandas=0.25 -y 7 | pip install ConfigSpace==0.4.11 pyrfr==0.8.0 8 | pip install git+https://github.com/automl/HPOlib1.5@0449121d31e0dcd4f63435ba5b27a0dee5bbd55f --no-deps 9 | pip install smac[all]==0.12.3 10 | pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 11 | pip install botorch==0.2.5 12 | pip install lockfile 13 | --------------------------------------------------------------------------------