├── COPYING
├── environment.yaml
├── readme.md
├── rgpe
    ├── __init__.py
    ├── exploring_openml.py
    ├── methods
    │   ├── GCPplusPrior.py
    │   ├── __init__.py
    │   ├── ablr.py
    │   ├── kl_weighting.py
    │   ├── noisy_ei.py
    │   ├── rgpe.py
    │   ├── rmogp.py
    │   ├── taf.py
    │   ├── tstr.py
    │   └── warmstarting_ac.py
    ├── test_functions.py
    └── utils.py
└── scripts
    ├── generate_commands.py
    ├── install.sh
    └── run_benchmark.py


/COPYING:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Matthias Feurer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
  1 | name: rgpe                                                                                                                                                                                                   │·····
  2 | channels:                                                                                                                                                                                                    │·····
  3 |   - defaults                                                                                                                                                                                                 │·····
  4 | dependencies:                                                                                                                                                                                                │·····
  5 |   - _libgcc_mutex=0.1=main                                                                                                                                                                                   │·····
  6 |   - attrs=19.3.0=py_0                                                                                                                                                                                        │·····
  7 |   - backcall=0.2.0=py_0                                                                                                                                                                                      │·····
  8 |   - binutils_impl_linux-64=2.33.1=he6710b0_7                                                                                                                                                                 │·····
  9 |   - binutils_linux-64=2.33.1=h9595d00_15                                                                                                                                                                     │·····
 10 |   - blas=1.0=mkl                                                                                                                                                                                             │·····
 11 |   - bleach=3.1.5=py_0                                                                                                                                                                                        │·····
 12 |   - ca-certificates=2020.6.24=0                                                                                                                                                                              │·····
 13 |   - certifi=2020.6.20=py37_0                                                                                                                                                                                 │·····
 14 |   - cycler=0.10.0=py37_0                                                                                                                                                                                     │·····
 15 |   - cython=0.29.13=py37he6710b0_0                                                                                                                                                                            │·····
 16 |   - dbus=1.13.16=hb2f20db_0                                                                                                                                                                                  │·····
 17 |   - decorator=4.4.2=py_0                                                                                                                                                                                     │·····
 18 |   - defusedxml=0.6.0=py_0                                                                                                                                                                                    │·····
 19 |   - entrypoints=0.3=py37_0                                                                                                                                                                                   │·····
 20 |   - expat=2.2.9=he6710b0_2                                                                                                                                                                                   │·····
 21 |   - fontconfig=2.13.0=h9420a91_0                                                                                                                                                                             │·····
 22 |   - freetype=2.10.2=h5ab3b9f_0                                                                                                                                                                               │·····
 23 |   - gcc_impl_linux-64=7.3.0=habb00fd_1                                                                                                                                                                       │·····
 24 |   - gcc_linux-64=7.3.0=h553295d_15                                                                                                                                                                           │·····
 25 |   - glib=2.65.0=h3eb4bd4_0                                                                                                                                                                                   │·····
 26 |   - gst-plugins-base=1.14.0=hbbd80ab_1                                                                                                                                                                       │·····
 27 |   - gstreamer=1.14.0=hb31296c_0                                                                                                                                                                              │·····
 28 |   - gxx_impl_linux-64=7.3.0=hdf63c60_1                                                                                                                                                                       │·····
 29 |   - gxx_linux-64=7.3.0=h553295d_15
 30 |     - icu=58.2=he6710b0_3                                                                                                                                                                                      │·····
 31 |   - importlib-metadata=1.7.0=py37_0                                                                                                                                                                          │·····
 32 |   - importlib_metadata=1.7.0=0                                                                                                                                                                               │·····
 33 |   - intel-openmp=2020.1=217                                                                                                                                                                                  │·····
 34 |   - ipykernel=5.3.4=py37h5ca1d4c_0                                                                                                                                                                           │·····
 35 |   - ipython=7.16.1=py37h5ca1d4c_0                                                                                                                                                                            │·····
 36 |   - ipython_genutils=0.2.0=py37_0                                                                                                                                                                            │·····
 37 |   - ipywidgets=7.5.1=py_0                                                                                                                                                                                    │·····
 38 |   - jedi=0.17.2=py37_0                                                                                                                                                                                       │·····
 39 |   - jinja2=2.11.2=py_0                                                                                                                                                                                       │·····
 40 |   - joblib=0.16.0=py_0                                                                                                                                                                                       │·····
 41 |   - jpeg=9b=h024ee3a_2                                                                                                                                                                                       │·····
 42 |   - jsonschema=3.2.0=py37_1                                                                                                                                                                                  │·····
 43 |   - jupyter=1.0.0=py37_7                                                                                                                                                                                     │·····
 44 |   - jupyter_client=6.1.6=py_0                                                                                                                                                                                │·····
 45 |   - jupyter_console=6.1.0=py_0                                                                                                                                                                               │·····
 46 |   - jupyter_core=4.6.3=py37_0                                                                                                                                                                                │·····
 47 |   - kiwisolver=1.2.0=py37hfd86e86_0                                                                                                                                                                          │·····
 48 |   - ld_impl_linux-64=2.33.1=h53a641e_7                                                                                                                                                                       │·····
 49 |   - libedit=3.1.20191231=h14c3975_1                                                                                                                                                                          │·····
 50 |   - libffi=3.3=he6710b0_2                                                                                                                                                                                    │·····
 51 |   - libgcc-ng=9.1.0=hdf63c60_0                                                                                                                                                                               │·····
 52 |   - libgfortran-ng=7.3.0=hdf63c60_0                                                                                                                                                                          │·····
 53 |   - libpng=1.6.37=hbc83047_0                                                                                                                                                                                 │·····
 54 |   - libsodium=1.0.18=h7b6447c_0                                                                                                                                                                              │·····
 55 |   - libstdcxx-ng=9.1.0=hdf63c60_0                                                                                                                                                                            │·····
 56 |   - libuuid=1.0.3=h1bed415_2                                                                                                                                                                                 │·····
 57 |   - libxcb=1.14=h7b6447c_0                                                                                                                                                                                   │·····
 58 |   - libxml2=2.9.10=he19cac6_1                                                                                                                                                                                │·····
 59 |   - markupsafe=1.1.1=py37h14c3975_1                                                                                                                                                                          │·····
 60 |   - matplotlib=3.2.2=0                                                                                                                                                                                       │·····
 61 |   - matplotlib-base=3.2.2=py37hef1b27d_0                                                                                                                                                                     │·····
 62 |   - mistune=0.8.4=py37h14c3975_1001                                                                                                                                                                          │·····
 63 |   - mkl=2020.1=217                                                                                                                                                                                           │·····
 64 |   - mkl-service=2.3.0=py37he904b0f_0                                                                                                                                                                         │·····
 65 |   - mkl_fft=1.1.0=py37h23d657b_0                                                                                                                                                                             │·····
 66 |   - mkl_random=1.1.1=py37h0573a6f_0                                                                                                                                                                          │·····
 67 |   - nbconvert=5.6.1=py37_1                                                                                                                                                                                   │·····
 68 |   - nbformat=5.0.7=py_0                                                                                                                                                                                      │·····
 69 |   - ncurses=6.2=he6710b0_1                                                                                                                                                                                   │·····
 70 |   - notebook=6.0.3=py37_0                                                                                                                                                                                    │·····
 71 |   - numpy=1.18.1=py37h4f9e942_0                                                                                                                                                                              │·····
 72 |   - numpy-base=1.18.1=py37hde5b4d6_1
 73 |     - openssl=1.1.1g=h7b6447c_0                                                                                                                                                                                │·····
 74 |   - packaging=20.4=py_0                                                                                                                                                                                      │·····
 75 |   - pandas=0.25.3=py37he6710b0_0                                                                                                                                                                             │·····
 76 |   - pandoc=2.10=0                                                                                                                                                                                            │·····
 77 |   - pandocfilters=1.4.2=py37_1                                                                                                                                                                               │·····
 78 |   - parso=0.7.0=py_0                                                                                                                                                                                         │·····
 79 |   - pcre=8.44=he6710b0_0                                                                                                                                                                                     │·····
 80 |   - pexpect=4.8.0=py37_1                                                                                                                                                                                     │·····
 81 |   - pickleshare=0.7.5=py37_1001                                                                                                                                                                              │·····
 82 |   - pip=20.1.1=py37_1                                                                                                                                                                                        │·····
 83 |   - prometheus_client=0.8.0=py_0                                                                                                                                                                             │·····
 84 |   - prompt-toolkit=3.0.5=py_0                                                                                                                                                                                │·····
 85 |   - prompt_toolkit=3.0.5=0                                                                                                                                                                                   │·····
 86 |   - ptyprocess=0.6.0=py37_0                                                                                                                                                                                  │·····
 87 |   - pygments=2.6.1=py_0                                                                                                                                                                                      │·····
 88 |   - pyparsing=2.4.7=py_0                                                                                                                                                                                     │·····
 89 |   - pyqt=5.9.2=py37h05f1152_2                                                                                                                                                                                │·····
 90 |   - pyrsistent=0.16.0=py37h7b6447c_0                                                                                                                                                                         │·····
 91 |   - python=3.7.7=hcff3b4d_5                                                                                                                                                                                  │·····
 92 |   - python-dateutil=2.8.1=py_0                                                                                                                                                                               │·····
 93 |   - pytz=2020.1=py_0                                                                                                                                                                                         │·····
 94 |   - pyzmq=19.0.1=py37he6710b0_1                                                                                                                                                                              │·····
 95 |   - qt=5.9.7=h5867ecd_1                                                                                                                                                                                      │·····
 96 |   - qtconsole=4.7.5=py_0                                                                                                                                                                                     │·····
 97 |   - qtpy=1.9.0=py_0                                                                                                                                                                                          │·····
 98 |   - readline=8.0=h7b6447c_0                                                                                                                                                                                  │·····
 99 |   - scikit-learn=0.22.1=py37hd81dba3_0                                                                                                                                                                       │·····
100 |   - scipy=1.4.1=py37h0b6359f_0                                                                                                                                                                               │·····
101 |   - send2trash=1.5.0=py37_0                                                                                                                                                                                  │·····
102 |   - setuptools=49.2.0=py37_0                                                                                                                                                                                 │·····
103 |   - sip=4.19.8=py37hf484d3e_0                                                                                                                                                                                │·····
104 |   - six=1.15.0=py_0                                                                                                                                                                                          │·····
105 |   - sqlite=3.32.3=h62c20be_0                                                                                                                                                                                 │·····
106 |   - swig=3.0.12=h38cdd7d_3                                                                                                                                                                                   │·····
107 |   - terminado=0.8.3=py37_0                                                                                                                                                                                   │·····
108 |   - testpath=0.4.4=py_0                                                                                                                                                                                      │·····
109 |   - tk=8.6.10=hbc83047_0                                                                                                                                                                                     │·····
110 |   - tornado=6.0.4=py37h7b6447c_1                                                                                                                                                                             │·····
111 |   - traitlets=4.3.3=py37_0                                                                                                                                                                                   │·····
112 |   - wcwidth=0.2.5=py_0                                                                                                                                                                                       │·····
113 |   - webencodings=0.5.1=py37_1                                                                                                                                                                                │·····
114 |   - wheel=0.34.2=py37_0                                                                                                                                                                                      │·····
115 |   - widgetsnbextension=3.5.1=py37_0
116 |     - xz=5.2.5=h7b6447c_0                                                                                                                                                                                      │·····
117 |   - zeromq=4.3.2=he6710b0_2                                                                                                                                                                                  │·····
118 |   - zipp=3.1.0=py_0                                                                                                                                                                                          │·····
119 |   - zlib=1.2.11=h7b6447c_3                                                                                                                                                                                   │·····
120 |   - pip:                                                                                                                                                                                                     │·····
121 |     - alabaster==0.7.12                                                                                                                                                                                      │·····
122 |     - babel==2.8.0                                                                                                                                                                                           │·····
123 |     - botorch==0.2.5                                                                                                                                                                                         │·····
124 |     - chardet==3.0.4                                                                                                                                                                                         │·····
125 |     - configspace==0.4.11                                                                                                                                                                                    │·····
126 |     - docutils==0.16                                                                                                                                                                                         │·····
127 |     - emcee==3.0.2                                                                                                                                                                                           │·····
128 |     - future==0.18.2                                                                                                                                                                                         │·····
129 |     - gpytorch==1.1.1                                                                                                                                                                                        │·····
130 |     - hpolib2==0.0.1                                                                                                                                                                                         │·····
131 |     - idna==2.10                                                                                                                                                                                             │·····
132 |     - imagesize==1.2.0                                                                                                                                                                                       │·····
133 |     - lazy-import==0.2.2                                                                                                                                                                                     │·····
134 |     - lockfile==0.12.2                                                                                                                                                                                       │·····
135 |     - pillow==7.2.0                                                                                                                                                                                          │·····
136 |     - psutil==5.7.2                                                                                                                                                                                          │·····
137 |     - pyaml==20.4.0                                                                                                                                                                                          │·····
138 |     - pydoe==0.3.8                                                                                                                                                                                           │·····
139 |     - pynisher==0.5.0                                                                                                                                                                                        │·····
140 |     - pyrfr==0.8.0                                                                                                                                                                                           │·····
141 |     - pyyaml==5.3.1                                                                                                                                                                                          │·····
142 |     - requests==2.24.0                                                                                                                                                                                       │·····
143 |     - scikit-optimize==0.7.4                                                                                                                                                                                 │·····
144 |     - smac==0.12.3                                                                                                                                                                                           │·····
145 |     - snowballstemmer==2.0.0                                                                                                                                                                                 │·····
146 |     - sphinx==3.1.2                                                                                                                                                                                          │·····
147 |     - sphinx-gallery==0.5.0                                                                                                                                                                                  │·····
148 |     - sphinx-rtd-theme==0.5.0                                                                                                                                                                                │·····
149 |     - sphinxcontrib-applehelp==1.0.2                                                                                                                                                                         │·····
150 |     - sphinxcontrib-devhelp==1.0.2                                                                                                                                                                           │·····
151 |     - sphinxcontrib-htmlhelp==1.0.3                                                                                                                                                                          │·····
152 |     - sphinxcontrib-jsmath==1.0.1                                                                                                                                                                            │·····
153 |     - sphinxcontrib-qthelp==1.0.3                                                                                                                                                                            │·····
154 |     - sphinxcontrib-serializinghtml==1.1.4                                                                                                                                                                   │·····
155 |     - torch==1.5.0+cpu                                                                                                                                                                                       │·····
156 |     - torchvision==0.6.0+cpu                                                                                                                                                                                 │·····
157 |     - urllib3==1.25.10
158 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Practical Transfer Learning for Bayesian Optimization
  2 | 
  3 | Code accompanying
  4 | 
  5 |     Practical Transfer Learning for Bayesian Optimization
  6 |     Matthias Feurer, Benjamin Letham, Frank Hutter and Eytan Bakshy
  7 |     https://arxiv.org/pdf/1802.02219v3.pdf
  8 | 
  9 | All code is developed using Python 3.7 and SMAC3 v0.12.3. The exact versions of the software 
 10 | used are given in environment.yaml.
 11 | 
 12 | ## Guide to the code
 13 | 
 14 | ### scripts/generate_commands.py
 15 | 
 16 | Generates the commands for running experiments. See bottom of this file for usage.
 17 | 
 18 | ### scripts/run_benchmark.py
 19 | 
 20 | Main script. Configures SMAC to use the actual transfer learning methods. Then it applies SMAC 
 21 | to the chosen benchmark function and outputs a `.json` file containing the results
 22 | 
 23 | ### scripts/install.sh
 24 | 
 25 | Installation file used to setup the conda environment. We cannot guarantee that this leads to 
 26 | the exact same environment that we used for our experiments.
 27 | 
 28 | ### rgpe/methods
 29 | 
 30 | Contains the actual implementations of all methods used throughout the paper:
 31 | 
 32 | * ablr.py: Perrone et al., NeurIPS 2019
 33 | * GCPplusPrior.py: Salinas et al., ICML 2020
 34 | * kl_weighting.py: Ramachandran et al., ECML 2019
 35 | * noisy_ei.py: Letham et al., Bayesian Analysis, 2019
 36 | * rgpe.py: This paper
 37 | * rmogp.py: This paper
 38 | * taf.py: Wistuba et al., Machine Learning, 2018
 39 | * tstr.py: Wistuba et al., ECML 2016
 40 | * warmstarting_ac.py: Lindauer et al., AAAI 2018
 41 | 
 42 | ### rgpe/test_functions.py
 43 | 
 44 | Implementation of all test functions used throughout the paper. Required data is either downloaded 
 45 | from the internet (for surrogates based on OpenML data), or needs to be downloaded manually 
 46 | (AdaBoost, SVM, LCBench).
 47 | 
 48 | To run the LCBench benchmark, close the [LCBench repository](https://github.com/automl/LCBench/) 
 49 | and set the paths in the class `NNGrid` to point to where you cloned the repository to, and the 
 50 | directories to where you downloaded the LCBench data 
 51 | (see [here](https://github.com/automl/LCBench/#downloading-the-data) for downloading the data).
 52 | 
 53 | ### rgpe/adaboost
 54 | 
 55 | AdaBoost data from 
 56 | [Schilling et al.](https://github.com/nicoschilling/ECML2016/tree/master/data/adaboost). Please 
 57 | download these files from Nico's repository and place them here.
 58 | 
 59 | ### rgpe/svm
 60 | 
 61 | SVM data from [Schilling et al.](https://github.com/nicoschilling/ECML2016/tree/master/data/svm).
 62 | Please download the files from Nico's repository and place them here.
 63 | 
 64 | ### rgpe/utils.py
 65 | 
 66 | Helper functions for obtaining Gaussian process objects, conducting Sobol sequence construction 
 67 | and computing expected improvement.
 68 | 
 69 | ## Example calls
 70 | 
 71 | ### RGPE
 72 | 
 73 | ```
 74 | python scripts/run_benchmark_smac.py --benchmark adaboost --method rgpe --seed 5 --task 20 \
 75 |     --empirical-meta-configs --learned-initial-design copula --weight-dilution-strategy probabilistic-ld \
 76 |     --sampling-mode bootstrap --normalization Copula --num-posterior-samples 1000 --n-init 1 \
 77 |     --output-file results/adaboost/rgpe-bootstrap-Copula-probabilistic-ld-NoisyEI-1000--gpmetadata-learnedinit-1/5_50_20.json \
 78 |     --variance-mode average  --acquisition-function-name 30 --target-model-incumbent False
 79 | ```
 80 | 
 81 | ### TAF
 82 | 
 83 | ```
 84 | python scripts/run_benchmark_smac.py --benchmark adaboost --method taf --seed 8 --task 47 \
 85 |     --empirical-meta-configs --learned-initial-design unscaled --bandwidth 0.1  \
 86 |     --weighting-mode tstr --n-init 1 --normalization None --weight_dilution_strategy None \
 87 |     --output-file results/adaboost/taf-tstr-None-None-0.100000-gpmetadata-learnedinit-2/8_50_47.json
 88 | ```
 89 | 
 90 | ## How to reproduce the experiments
 91 | 
 92 | 1. Install everything
 93 | 2. Run only GP(MAP) to obtain data to warmstart transfer learning methods with. To obtain 
 94 |    commands for doing so run `python generate_commands.py --benchmark adaboost --setup None`
 95 | 3. Run (almost) everything else. To obtain the commands for doing so run 
 96 |    `python generate_commands.py --benchmark adaboost --setup -gpmetadata-learnedinit`
 97 | 4. Finally, also run some methods which do not contain a learned initialization:
 98 |    `python generate_commands.py --benchmark adaboost --setup -gpmetadata`
 99 | 
100 | 


--------------------------------------------------------------------------------
/rgpe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/transfer-hpo-framework/60a2146c53b2489015576878946ec638d868d990/rgpe/__init__.py


--------------------------------------------------------------------------------
/rgpe/methods/GCPplusPrior.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List,  Tuple, Union
  2 | 
  3 | from ConfigSpace import Configuration
  4 | import numpy as np
  5 | from scipy.stats import norm
  6 | import torch.nn as nn
  7 | import torch
  8 | 
  9 | from smac.configspace import convert_configurations_to_array
 10 | from smac.epm.base_epm import AbstractEPM
 11 | from smac.optimizer.acquisition import AbstractAcquisitionFunction
 12 | from rgpe.utils import get_gaussian_process, copula_transform
 13 | 
 14 | 
 15 | class GCPplusPrior(AbstractEPM):
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
 20 |         **kwargs
 21 |     ):
 22 |         """
 23 |         Gaussian Copula Process plus prior from "A Quantile-based Approach for Hyperparameter
 24 |         Transfer Learning" by  Salinas, Shen and Perrone, ICML 2020,
 25 |         https://proceedings.icml.cc/static/paper_files/icml/2020/4367-Paper.pdf
 26 | 
 27 |         This is a re-implementation that is not based on the original code which can be found at
 28 |         https://github.com/geoalgo/A-Quantile-based-Approach-for-Hyperparameter-Transfer-Learning
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         training_data
 33 |             Dictionary containing the training data for each meta-task. Mapping from an integer (
 34 |             task ID) to a dictionary, which is a mapping from configuration to performance.
 35 |         """
 36 | 
 37 |         if kwargs.get('instance_features') is not None:
 38 |             raise NotImplementedError()
 39 |         super().__init__(**kwargs)
 40 |         self.training_data = training_data
 41 | 
 42 |         self.categorical_mask = np.array(self.types) > 0
 43 |         self.n_categories = np.sum(self.types)
 44 | 
 45 |         torch.manual_seed(self.seed)
 46 |         self.rng = np.random.RandomState(self.seed)
 47 | 
 48 |         X_train = []
 49 |         y_train = []
 50 |         for task in training_data:
 51 |             Y = training_data[task]['y']
 52 |             y_scaled = copula_transform(Y)
 53 |             configs = training_data[task]['configurations']
 54 |             X = convert_configurations_to_array(configs)
 55 |             for x, y in zip(X, y_scaled):
 56 |                 X_train.append(x)
 57 |                 y_train.append(y)
 58 |         X_train = np.array(X_train)
 59 |         X_train = self._preprocess(X_train)
 60 |         y_train = np.array(y_train)
 61 | 
 62 |         class NLLHLoss(nn.Module):
 63 | 
 64 |             def forward(self, input, target):
 65 |                 # Assuming network outputs var
 66 |                 std = torch.log(1 + torch.exp(input[:, 1])) + 10e-12
 67 |                 mu = input[:, 0].view(-1, 1)
 68 | 
 69 |                 # Pytorch Normal indeed takes the standard deviation as argument
 70 |                 n = torch.distributions.normal.Normal(mu, std)
 71 |                 loss = n.log_prob(target)
 72 |                 return -torch.mean(loss)
 73 | 
 74 |         # TODO we could add embeddings for categorical hyperparameters here to improve performance?
 75 |         model = torch.nn.Sequential(
 76 |             torch.nn.Linear(X_train.shape[1], 50).float(),
 77 |             torch.nn.Dropout(0.1),
 78 |             torch.nn.ReLU(),
 79 |             torch.nn.Linear(50, 50).float(),
 80 |             torch.nn.Dropout(0.1),
 81 |             torch.nn.ReLU(),
 82 |             torch.nn.Linear(50, 50).float(),
 83 |             torch.nn.Dropout(0.1),
 84 |             torch.nn.ReLU(),
 85 |             torch.nn.Linear(50, 2).float(),
 86 |         )
 87 |         loss_fn = NLLHLoss()
 88 |         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
 89 |         scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer,
 90 |                                                     step_size=1000,
 91 |                                                     gamma=0.2)
 92 |         for iter in range(3000):
 93 | 
 94 |             batch = self.rng.choice(len(X_train), size=64)
 95 |             x_batch = torch.tensor(X_train[batch]).float()
 96 |             y_batch = torch.tensor(y_train[batch]).float()
 97 | 
 98 |             y_pred = model(x_batch)
 99 | 
100 |             # Compute and print loss.
101 |             loss = loss_fn(y_pred, y_batch)
102 |             if iter % 100 == 99:
103 |                 print(iter, loss.item())
104 | 
105 |             optimizer.zero_grad()
106 |             loss.backward()
107 |             optimizer.step()
108 |             scheduler.step()
109 |         self.prior = model
110 | 
111 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
112 |         X = self._preprocess(X)
113 |         prior_prediction = self.prior.forward(torch.tensor(X).float()).detach().numpy()
114 |         prior_mean = prior_prediction[:, 0].flatten()
115 |         prior_std = prior_prediction[:, 1].flatten()
116 |         prior_std = np.log(1 + np.exp(prior_std)) + 10e-12
117 | 
118 |         y_scaled = copula_transform(Y).flatten()
119 |         residual = (y_scaled - prior_mean) / prior_std
120 | 
121 |         self.target_model = get_gaussian_process(
122 |             bounds=self.bounds,
123 |             types=self.types,
124 |             configspace=self.configspace,
125 |             rng=self.rng,
126 |             kernel=None,
127 |         )
128 |         self.target_model._train(X, residual)
129 | 
130 |         return self
131 | 
132 |     def predict(self, X: np.ndarray, cov_return_type: str = 'diagonal_cov') -> Tuple[np.ndarray, np.ndarray]:
133 |         X = self._preprocess(X)
134 |         prior_prediction = self.prior.forward(torch.tensor(X).float()).detach().numpy()
135 |         prior_mean = prior_prediction[:, 0]
136 |         prior_std = prior_prediction[:, 1]
137 |         prior_std = (np.log(1 + np.exp(prior_std)) + 10e-12)
138 |         gp_mean, gp_var = self.target_model._predict(X)
139 |         mean_x = gp_mean * prior_std + prior_mean
140 |         covar_x = np.sqrt(gp_var) * prior_std
141 |         return mean_x.reshape((-1, 1)), covar_x.reshape((-1, 1))
142 | 
143 |     def _preprocess(self, X: np.ndarray) -> np.ndarray:
144 |         """Perform one-hot-encoding of categorical hyperparameters."""
145 |         categories_array = np.zeros((X.shape[0], self.n_categories))
146 |         categories_idx = 0
147 |         for idx in range(len(self.types)):
148 |             if self.types[idx] == 0:
149 |                 continue
150 |             else:
151 |                 for j in range(self.types[idx]):
152 |                     mask = X[:, idx] == j
153 |                     categories_array[mask, categories_idx] = 1
154 |                     categories_idx += 1
155 |         numerical_array = X[:, ~self.categorical_mask]
156 |         X = np.concatenate((numerical_array, categories_array), axis=1)
157 |         X[np.isnan(X)] = -1.0
158 |         return X
159 | 
160 | 
161 | class CustomEI(AbstractAcquisitionFunction):
162 |     """EI for residual GP as defined in Section 4.2 of Salinas et al."""
163 | 
164 |     def __init__(self, model: AbstractEPM):
165 | 
166 |         super().__init__(model)
167 |         self.eta = None
168 |         self._required_updates = ('model', 'eta')
169 | 
170 |     def _compute(self, X: np.ndarray) -> np.ndarray:
171 |         if len(X.shape) == 1:
172 |             X = X[:, np.newaxis]
173 | 
174 |         m, v = self.model.predict_marginalized_over_instances(X)
175 |         s = np.sqrt(v)
176 | 
177 |         if self.eta is None:
178 |             raise ValueError('No current best specified. Call update('
179 |                              'eta=<int>) to inform the acquisition function '
180 |                              'about the current best value.')
181 | 
182 |         def calculate_f():
183 |             z = (self.eta - m) / v
184 |             return v * (z * norm.cdf(z) + norm.pdf(z))
185 | 
186 |         if np.any(s == 0.0):
187 |             # if std is zero, we have observed x on all instances
188 |             # using a RF, std should be never exactly 0.0
189 |             # Avoid zero division by setting all zeros in s to one.
190 |             # Consider the corresponding results in f to be zero.
191 |             self.logger.warning("Predicted std is 0.0 for at least one sample.")
192 |             s_copy = np.copy(s)
193 |             s[s_copy == 0.0] = 1.0
194 |             f = calculate_f()
195 |             f[s_copy == 0.0] = 0.0
196 |         else:
197 |             f = calculate_f()
198 |         if (f < 0).any():
199 |             raise ValueError(
200 |                 "Expected Improvement is smaller than 0 for at least one "
201 |                 "sample.")
202 |         return f
203 | 


--------------------------------------------------------------------------------
/rgpe/methods/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/transfer-hpo-framework/60a2146c53b2489015576878946ec638d868d990/rgpe/methods/__init__.py


--------------------------------------------------------------------------------
/rgpe/methods/ablr.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Tuple, Union
  2 | 
  3 | from ConfigSpace import Configuration
  4 | import numpy as np
  5 | import scipy.optimize
  6 | import torch.nn as nn
  7 | import torch
  8 | 
  9 | from smac.configspace import convert_configurations_to_array
 10 | from smac.epm.base_epm import AbstractEPM
 11 | 
 12 | from rgpe.utils import copula_transform
 13 | 
 14 | D = 50  # Hidden layer size
 15 | 
 16 | precision = 32
 17 | if precision == 32:
 18 |     t_dtype = torch.float32
 19 |     np_dtype = np.float32
 20 | else:
 21 |     t_dtype = torch.float64
 22 |     np_dtype = np.float64
 23 | 
 24 | 
 25 | class Net(torch.nn.Module):
 26 |     """
 27 |     Implementation of the Adaptive Bayesian Linear Regression (ABLR) for multi-task
 28 |     hyperparameter optimization.
 29 | 
 30 |     For details see https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf
 31 | 
 32 |     This class implements the neural network. For the class connecting it to SMAC see below."""
 33 | 
 34 |     def __init__(self, num_tasks, n_attributes, meta_data=None, use_copula_transform=False):
 35 | 
 36 |         self.num_tasks = num_tasks
 37 |         self.n_attributes = n_attributes
 38 |         self.meta_data = meta_data
 39 |         self.use_copula_transform = use_copula_transform
 40 | 
 41 |         self.mean_ = None
 42 |         self.std_ = None
 43 | 
 44 |         super().__init__()
 45 |         self.total_n_params = 0
 46 | 
 47 |         hidden1 = nn.Linear(self.n_attributes, D)
 48 |         hidden2 = nn.Linear(D, D)
 49 |         hidden3 = nn.Linear(D, D)
 50 |         self.layers = [
 51 |             hidden1, hidden2, hidden3
 52 |         ]
 53 |         if precision == 32:
 54 |             self.layers = [layer.float() for layer in self.layers]
 55 |         else:
 56 |             self.layers = [layer.double() for layer in self.layers]
 57 | 
 58 |         # initialization of alpha and beta
 59 |         # Instead of alpha, we model 1/alpha and use a different range for the values
 60 |         # (i.e. 1e-6 to 1 instead of 1 to 1e6)
 61 |         self.alpha_t = torch.tensor([1] * self.num_tasks, requires_grad=True, dtype=t_dtype)
 62 |         self.total_n_params += len(self.alpha_t)
 63 |         self.beta_t = torch.tensor([1e3] * self.num_tasks, requires_grad=True, dtype=t_dtype)
 64 |         self.total_n_params += len(self.beta_t)
 65 | 
 66 |         # initialization of the weights
 67 |         for layer in self.layers:
 68 |             torch.nn.init.xavier_normal_(layer.weight)
 69 |             if len(layer.weight.shape) == 1:
 70 |                 size = layer.weight.shape[0]
 71 |             else:
 72 |                 size = layer.weight.shape[0] * layer.weight.shape[1]
 73 |             self.total_n_params += size
 74 | 
 75 |         # initialize arrays for the optimization of sum log-likelihood
 76 |         self.K_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)]
 77 |         self.L_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)]
 78 |         self.L_t_inv = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)]
 79 |         self.e_t = [torch.tensor(0.0, dtype=t_dtype) for i in range(self.num_tasks)]
 80 | 
 81 |     def forward(self, x):
 82 |         """
 83 |         Simple forward pass through the neural network
 84 |         """
 85 | 
 86 |         for layer in self.layers:
 87 |             x = layer(x)
 88 |             x = torch.tanh(x)
 89 | 
 90 |         return x
 91 | 
 92 |     def loss(self, hp, training_datasets):
 93 |         """
 94 |         Negative log marginal likelihood of multi-task ABLR
 95 |         hp : np.ndarray
 96 |             Contains the weights of the network, alpha and beta
 97 |         training_datasets : list
 98 |             tuples (X, y) for the meta-datasets and the current dataset
 99 |         """
100 |         # Apply the flattened hyperparameter array to the neural network
101 | 
102 |         if precision == 32:
103 |             hp = hp.astype(np.float32)
104 | 
105 |         idx = 0
106 |         for layer in self.layers:
107 |             weights = layer.weight.data.numpy().astype(np_dtype)
108 |             if len(weights.shape) == 1:
109 |                 size = weights.shape[0]
110 |             else:
111 |                 size = weights.shape[0] * weights.shape[1]
112 |             layer.weight.data = torch.from_numpy(hp[idx: idx + size].reshape(weights.shape))
113 |             layer.weight.requires_grad_()
114 |             idx += size
115 | 
116 |         self.alpha_t.data = torch.from_numpy(hp[idx: idx + self.num_tasks])
117 |         idx += self.num_tasks
118 |         self.alpha_t.requires_grad_()
119 |         self.beta_t.data = torch.from_numpy(hp[idx: idx + self.num_tasks])
120 |         idx += self.num_tasks
121 |         self.beta_t.requires_grad_()
122 |         assert idx == self.total_n_params
123 | 
124 |         # Likelihood computation starts here
125 |         self.likelihood = None
126 | 
127 |         for i, (x, y) in enumerate(training_datasets):
128 | 
129 |             out = self.forward(x)
130 | 
131 |             # Loss function calculations, see 6th Equation on the first page of the Appendix
132 |             # https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning-supplemental.zip
133 |             assert (torch.t(out).shape == (D, x.shape[0]))
134 |             # Remember that we model 1/alpha instead of alpha
135 |             r = self.beta_t[i] * self.alpha_t[i]
136 |             K_t = torch.add(
137 |                 torch.eye(D, dtype=t_dtype),
138 |                 r * torch.matmul(torch.t(out), out)
139 |             )
140 |             self.K_t[i] = K_t.clone()
141 |             assert (K_t.shape == (D, D))
142 | 
143 |             L_t = torch.cholesky(K_t, upper=False)
144 |             self.L_t[i] = L_t.clone()
145 |             # Naive version:
146 |             # self.L_t_inv[i] = torch.inverse(L_t)
147 |             # e_t = torch.matmul(self.L_t_inv[i], torch.matmul(torch.t(out), y))
148 |             e_t = torch.triangular_solve(torch.matmul(torch.t(out), y), L_t, upper=False).solution
149 |             self.e_t[i] = e_t.view((D, 1)).clone()
150 |             assert (self.e_t[i].shape == (D, 1))
151 | 
152 |             norm_y_t = torch.norm(y, 2, 0)
153 |             norm_c_t = torch.norm(e_t[i], 2, 0)
154 | 
155 |             L1 = -(x.shape[0] / 2 * torch.log(self.beta_t[i]))
156 |             L2 = self.beta_t[i] / 2 * (torch.pow(norm_y_t, 2) -r * torch.pow(norm_c_t, 2))
157 |             L3 = torch.sum(torch.log(torch.diag(L_t)))
158 |             L = L1 + L2 + L3
159 | 
160 |             if self.likelihood is None:
161 |                 self.likelihood = L
162 |             else:
163 |                 self.likelihood = torch.add(self.likelihood, L)
164 | 
165 |         # Get the gratient and put transform it into the flat array structure required by
166 |         # scipy.optimize
167 |         g = np.zeros((self.total_n_params))
168 |         self.likelihood.backward()
169 | 
170 |         idx = 0
171 |         for layer in self.layers:
172 |             gradients = layer.weight.grad.data.numpy().astype(np_dtype)
173 |             if len(gradients.shape) == 1:
174 |                 size = gradients.shape[0]
175 |             else:
176 |                 size = gradients.shape[0] * gradients.shape[1]
177 |             g[idx: idx + size] = gradients.flatten()
178 |             idx += size
179 |             layer.weight.grad.zero_()
180 | 
181 |         g[idx: idx + self.num_tasks] = self.alpha_t.grad.data.numpy().astype(np_dtype)
182 |         idx += self.num_tasks
183 |         g[idx: idx + self.num_tasks] = self.beta_t.grad.data.numpy().astype(np_dtype)
184 |         idx += self.num_tasks
185 |         self.alpha_t.grad.data.zero_()
186 |         self.beta_t.grad.data.zero_()
187 |         self._gradient = g
188 | 
189 |         return self.likelihood
190 | 
191 |     def gradient(self, hp, training_datasets):
192 |         """
193 |         Gradient of the parameters of the network that are optimized through LBFGS
194 | 
195 |         The gradient is actually stored during the forward pass, this is only a convenience
196 |         function to work with the LBFGS interface of scipy.
197 |         """
198 | 
199 |         return self._gradient
200 | 
201 |     def optimize(self, training_datasets):
202 |         """
203 |         Optimize weights, alpha and beta with LBFGSB
204 |         """
205 | 
206 |         # Initial flattened array of weights used as a starting point of LBFGS
207 |         init = np.ones((self.total_n_params), dtype=np_dtype)
208 | 
209 |         idx = 0
210 |         for layer in self.layers:
211 |             weights = layer.weight.data.numpy().astype(np_dtype)
212 |             if len(weights.shape) == 1:
213 |                 size = weights.shape[0]
214 |             else:
215 |                 size = weights.shape[0] * weights.shape[1]
216 |             init[idx: idx + size] = weights.flatten()
217 |             idx += size
218 |         mybounds = [[None, None] for i in range(idx)]
219 | 
220 |         init[idx: idx + self.num_tasks] = self.alpha_t.data.numpy().astype(np_dtype)
221 |         idx += self.num_tasks
222 |         mybounds.extend([[1e-3, 1e3]] * self.num_tasks)
223 |         init[idx: idx + self.num_tasks] = self.beta_t.data.numpy().astype(np_dtype)
224 |         idx += self.num_tasks
225 |         mybounds.extend([[1, 1e6]] * self.num_tasks)
226 | 
227 |         assert self.total_n_params == len(mybounds), (self.total_n_params, len(mybounds))
228 |         assert self.total_n_params == idx
229 | 
230 |         res = scipy.optimize.fmin_l_bfgs_b(
231 |             lambda *args: float(self.loss(*args)),
232 |             x0=init,
233 |             bounds=mybounds,
234 |             fprime=self.gradient,
235 |             args=(training_datasets, ),
236 |         )
237 |         print(self.loss(res[0], training_datasets))  # This updates the internal states
238 |         print(res)
239 | 
240 |     def train(self, X: np.ndarray, y: np.ndarray):
241 |         """Optimize the neural network given training data ``X``.
242 | 
243 |         Training data is concatenated with meta-data and then passed to the optimize function.
244 |         """
245 |         y = y.reshape((y.shape[0], 1))
246 | 
247 |         training_datasets = []
248 |         for meta_task in self.meta_data:
249 |             meta_task_data = self.meta_data[meta_task]
250 |             X_t = meta_task_data[0]
251 |             y_t = meta_task_data[1]
252 |             if X_t.shape[1] != self.n_attributes:
253 |                 raise ValueError((X_t.shape[1], self.n_attributes))
254 | 
255 |             if self.use_copula_transform:
256 |                 y_t = copula_transform(y_t)
257 |             else:
258 |                 mean = y_t.mean()
259 |                 std = y_t.std()
260 |                 if std == 0:
261 |                     std = 1
262 |                 y_t = (y_t.copy() - mean) / std
263 |                 y_t = y_t.reshape(y_t.shape[0], 1)
264 | 
265 |             training_datasets.append((
266 |                 torch.tensor(X_t, dtype=t_dtype),
267 |                 torch.tensor(y_t, dtype=t_dtype),
268 |             ))
269 | 
270 |         if X.shape[1] != self.n_attributes:
271 |             raise ValueError((X.shape[1], self.n_attributes))
272 | 
273 |         if self.use_copula_transform:
274 |             self.mean_ = 0
275 |             self.std_ = 1
276 |             y_ = copula_transform(y.copy())
277 |         else:
278 |             self.mean_ = y.mean()
279 |             self.std_ = y.std()
280 |             if self.std_ == 0:
281 |                 self.std_ = 1
282 |             y_ = (y.copy() - self.mean_) / self.std_
283 | 
284 |         training_datasets.append((
285 |             torch.tensor(X, dtype=t_dtype),
286 |             torch.tensor(y_, dtype=t_dtype),
287 |         ))
288 |         if len(training_datasets) != self.num_tasks:
289 |             raise ValueError((len(training_datasets), self.num_tasks))
290 | 
291 |         self.optimize(training_datasets)
292 | 
293 |     def predict(self, X_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
294 |         """Returns the predictive mean and variance of the objective function at
295 |         the given test points.
296 |         """
297 |         X_test = torch.tensor(X_test, dtype=t_dtype)
298 |         out = self.forward(X_test)
299 | 
300 |         # Naive implementation:
301 |         #m = torch.matmul(torch.matmul(torch.t(self.e_t[-1]), self.L_t_inv[-1]), torch.t(out))
302 |         m = torch.matmul(
303 |             torch.t(self.e_t[-1]),
304 |             torch.triangular_solve(torch.t(out), self.L_t[-1], upper=False).solution,
305 |         )
306 |         # Remember that we model 1/alpha instead of alpha
307 |         m = (self.beta_t[-1] * self.alpha_t[-1]) * m.reshape((m.shape[1], 1))
308 |         assert (m.shape == (X_test.shape[0], 1))
309 |         if not torch.isfinite(m).all():
310 |             raise ValueError('Infinite predictions %s for input %s' % (m, X_test))
311 |         m = m * self.std_ + self.mean_
312 | 
313 |         # Naive implementation
314 |         #v = torch.matmul(self.L_inv_t[-1], torch.t(out))
315 |         v = torch.triangular_solve(torch.t(out), self.L_t[-1], upper=False).solution
316 |         # Remember that we model 1/alpha instead of alpha
317 |         v = self.alpha_t[-1] * torch.pow(torch.norm(v, dim=0), 2)
318 |         v = v.reshape((-1, 1))
319 |         assert (v.shape == (X_test.shape[0], 1)), v.shape
320 |         if not torch.isfinite(v).all():
321 |             raise ValueError('Infinite predictions %s for input %s' % (v, X_test))
322 |         v = v * (self.std_ ** 2)
323 | 
324 |         return m.detach().numpy(), v.detach().numpy()
325 | 
326 | 
327 | class ABLR(AbstractEPM):
328 |     """
329 |     Implementation of the Adaptive Bayesian Linear Regression (ABLR) for multi-task
330 |     hyperparameter optimization.
331 | 
332 |     For details see https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf
333 | 
334 |     This is the wrapper class to be used with SMAC, which internally uses the neural network
335 |     class in the code above.
336 |     """
337 | 
338 |     def __init__(
339 |             self,
340 |             training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
341 |             use_copula_transform: bool = False,
342 |             **kwargs
343 |     ):
344 |         if kwargs.get('instance_features') is not None:
345 |             raise NotImplementedError()
346 |         super().__init__(**kwargs)
347 |         self.training_data = training_data
348 |         self.use_copula_transform = use_copula_transform
349 |         self.nn = None
350 |         torch.manual_seed(self.seed)
351 |         self.rng = np.random.RandomState(self.seed)
352 | 
353 |         self.categorical_mask = np.array(self.types) > 0
354 |         self.n_categories = np.sum(self.types)
355 | 
356 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
357 |         meta_data = dict()
358 |         for id_ in self.training_data:
359 |             configs = self.training_data[id_]['configurations']
360 |             X_ = convert_configurations_to_array(configs)
361 |             X_ = self._preprocess(X_)
362 |             meta_data[id_] = (
363 |                 X_,
364 |                 self.training_data[id_]['y'].flatten(),
365 |                 None,
366 |             )
367 | 
368 |         X = self._preprocess(X)
369 |         for i in range(10):
370 |             try:
371 |                 # Sometimes the neural network training fails due to numerical issues - we
372 |                 # then retrain the network from scratch
373 |                 if self.nn is None:
374 |                     self.nn = Net(
375 |                         num_tasks=len(self.training_data) + 1,
376 |                         n_attributes=X.shape[1],
377 |                         meta_data=meta_data,
378 |                         use_copula_transform=self.use_copula_transform,
379 |                     )
380 |                 self.nn.train(X, Y)
381 |                 break
382 |             except Exception as e:
383 |                 print('Training failed %d/%d!' % (i + 1, 10))
384 |                 print(e)
385 |                 self.nn = None
386 | 
387 |         return self
388 | 
389 |     def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
390 |         X = self._preprocess(X)
391 |         if self.nn:
392 |             return self.nn.predict(X)
393 |         else:
394 |             return self.rng.randn(X.shape[0], 1), self.rng.randn(X.shape[0], 1)
395 | 
396 |     def _preprocess(self, X: np.ndarray) -> np.ndarray:
397 |         """Perform one-hot-encoding of categorical hyperparameters."""
398 |         categories_array = np.zeros((X.shape[0], self.n_categories))
399 |         categories_idx = 0
400 |         for idx in range(len(self.types)):
401 |             if self.types[idx] == 0:
402 |                 continue
403 |             else:
404 |                 for j in range(self.types[idx]):
405 |                     mask = X[:, idx] == j
406 |                     categories_array[mask, categories_idx] = 1
407 |                     categories_idx += 1
408 |         numerical_array = X[:, ~self.categorical_mask]
409 |         X = np.concatenate((numerical_array, categories_array), axis=1)
410 |         X[np.isnan(X)] = -1.0
411 |         return X
412 | 


--------------------------------------------------------------------------------
/rgpe/methods/kl_weighting.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import json
  3 | from typing import Dict, List, Optional, Tuple, Union
  4 | 
  5 | from ConfigSpace import Configuration
  6 | import numpy as np
  7 | import numpy.linalg as npla
  8 | import scipy.linalg as spla
  9 | import scipy.optimize
 10 | import sklearn.metrics
 11 | from smac.configspace import convert_configurations_to_array
 12 | from smac.epm.base_epm import AbstractEPM
 13 | 
 14 | from rgpe.utils import get_gaussian_process
 15 | 
 16 | # Code from https://github.com/HIPS/Spearmint/blob/PESC/spearmint/acquisition_functions/predictive_entropy_search.py#L944
 17 | """
 18 | See Miguel's paper (http://arxiv.org/pdf/1406.2541v1.pdf) section 2.1 and Appendix A
 19 | Returns a function the samples from the approximation...
 20 | if testing=True, it does not return the result but instead the random cosine for testing only
 21 | We express the kernel as an expectation. But then we approximate the expectation with a weighted sum
 22 | theta are the coefficients for this weighted sum. that is why we take the dot product of theta at 
 23 | the end
 24 | we also need to scale at the end so that it's an average of the random features. 
 25 | if use_woodbury_if_faster is False, it never uses the woodbury version
 26 | """
 27 | 
 28 | def chol2inv(chol):
 29 |     return spla.cho_solve((chol, False), np.eye(chol.shape[0]))
 30 | 
 31 | 
 32 | def sample_gp_with_random_features(gp, nFeatures, rng, testing=False, use_woodbury_if_faster=True):
 33 |     d = len(gp.configspace.get_hyperparameters())
 34 |     N_data = gp.gp.X_train_.shape[0]
 35 | 
 36 |     nu2 = np.exp(gp.gp.kernel.theta[-1])
 37 | 
 38 |     sigma2 = np.exp(gp.gp.kernel.theta[0])  # the kernel amplitude
 39 | 
 40 |     # We draw the random features - in contrast to the original code we only support Matern5/2
 41 |     m = 5.0 / 2.0
 42 |     W = (
 43 |         rng.randn(nFeatures, d) / gp.gp.kernel.theta[1: -1] /
 44 |         np.sqrt(rng.gamma(shape=m, scale=1.0 / m, size=(nFeatures, 1)))
 45 |     )
 46 |     b = rng.uniform(low=0, high=2 * np.pi, size=nFeatures)[:, None]
 47 | 
 48 |     # Just for testing the  random features in W and b... doesn't test the weights theta
 49 |     if testing:
 50 |         return lambda x: np.sqrt(2 * sigma2 / nFeatures) * np.cos(np.dot(W, x.T) + b)
 51 |     # K(x1, x2) \approx np.dot(test(x1).T, tst_fun(x2))
 52 | 
 53 |     randomness = rng.randn(nFeatures)
 54 | 
 55 |     # W has size nFeatures by d
 56 |     # tDesignMatrix has size Nfeatures by Ndata
 57 |     # woodbury has size Ndata by Ndata
 58 |     # z is a vector of length nFeatures
 59 | 
 60 |     gp_inputs = gp.gp.X_train_
 61 | 
 62 |     # tDesignMatrix has size Nfeatures by Ndata
 63 |     tDesignMatrix = np.sqrt(2.0 * sigma2 / nFeatures) * np.cos(np.dot(W, gp_inputs.T) + b)
 64 | 
 65 |     if use_woodbury_if_faster and N_data < nFeatures:
 66 |         # you can do things in cost N^2d instead of d^3 by doing this woodbury thing
 67 | 
 68 |         # We obtain the posterior on the coefficients
 69 |         woodbury = np.dot(tDesignMatrix.T, tDesignMatrix) + nu2 * np.eye(N_data)
 70 |         chol_woodbury = spla.cholesky(woodbury)
 71 |         # inverseWoodbury = chol2inv(chol_woodbury)
 72 |         z = np.dot(tDesignMatrix, gp.gp.y_train_ / nu2)
 73 |         # m = z - np.dot(tDesignMatrix, np.dot(inverseWoodbury, np.dot(tDesignMatrix.T, z)))
 74 |         m = z - np.dot(tDesignMatrix,
 75 |                        spla.cho_solve((chol_woodbury, False), np.dot(tDesignMatrix.T, z)))
 76 |         # (above) alternative to original but with cho_solve
 77 | 
 78 |         # z = np.dot(tDesignMatrix, gp.observed_values / nu2)
 79 |         # m = np.dot(np.eye(nFeatures) - \
 80 |         # np.dot(tDesignMatrix, spla.cho_solve((chol_woodbury, False), tDesignMatrix.T)), z)
 81 | 
 82 |         # woodbury has size N_data by N_data
 83 |         D, U = npla.eigh(woodbury)
 84 |         # sort the eigenvalues (not sure if this matters)
 85 |         idx = D.argsort()[::-1]  # in decreasing order instead of increasing
 86 |         D = D[idx]
 87 |         U = U[:, idx]
 88 |         R = 1.0 / (np.sqrt(D) * (np.sqrt(D) + np.sqrt(nu2)))
 89 |         # R = 1.0 / (D + np.sqrt(D*nu2))
 90 | 
 91 |         # We sample from the posterior of the coefficients
 92 |         theta = randomness - \
 93 |                 np.dot(tDesignMatrix,
 94 |                        np.dot(U, (R * np.dot(U.T, np.dot(tDesignMatrix.T, randomness))))) + m
 95 | 
 96 |     else:
 97 |         # all you are doing here is sampling from the posterior of the linear model
 98 |         # that approximates the GP
 99 |         # Sigma = matrixInverse(np.dot(tDesignMatrix, tDesignMatrix.T) / nu2 + np.eye(
100 |         # nFeatures))
101 |         # m = np.dot(Sigma, np.dot(tDesignMatrix, gp.observed_values / nu2))
102 |         # theta = m + np.dot(randomness, spla.cholesky(Sigma, lower=False)).T
103 | 
104 |         # Sigma = matrixInverse(np.dot(tDesignMatrix, tDesignMatrix.T) + nu2*np.eye(nFeatures))
105 |         # m = np.dot(Sigma, np.dot(tDesignMatrix, gp.observed_values))
106 |         # theta = m + np.dot(randomness, spla.cholesky(Sigma*nu2, lower=False)).T
107 | 
108 |         approx_Kxx = np.dot(tDesignMatrix, tDesignMatrix.T)
109 |         while True:
110 |             try:
111 |                 print(approx_Kxx, nu2)
112 |                 chol_Sigma_inverse = spla.cholesky(approx_Kxx + nu2 * np.eye(nFeatures))
113 |                 break
114 |             except np.linalg.LinAlgError:
115 |                 nu2 = np.log(nu2)
116 |                 nu2 += 1
117 |                 nu2 = np.exp(nu2)
118 |         Sigma = chol2inv(chol_Sigma_inverse)
119 |         m = spla.cho_solve((chol_Sigma_inverse, False),
120 |                            np.dot(tDesignMatrix, gp.gp.y_train_))
121 |         theta = m + np.dot(randomness, spla.cholesky(Sigma * nu2, lower=False)).T
122 |         # the above commented out version might be less stable? i forget why i changed it
123 |         # that's ok.
124 | 
125 |     def wrapper(gradient, x):
126 |         # the argument "gradient" is
127 |         # not the usual compute_grad that computes BOTH when true
128 |         # here it only computes the objective when true
129 |         if x.ndim == 1:
130 |             x = x[None]
131 | 
132 |         if not gradient:
133 |             result = np.dot(theta.T, np.sqrt(2.0 * sigma2 / nFeatures) * np.cos(np.dot(W, x.T) + b))
134 |             if result.size == 1:
135 |                 result = float(
136 |                     result)  # if the answer is just a number, take it out of the numpy array
137 |                 # wrapper
138 |                 # (failure to do so messed up NLopt and it only gives a cryptic error message)
139 |             return result
140 |         else:
141 |             grad = np.dot(theta.T,
142 |                           -np.sqrt(2.0 * sigma2 / nFeatures) * np.sin(np.dot(W, x.T) + b) * W)
143 |             return grad
144 | 
145 |     return wrapper
146 | 
147 | 
148 | class KLWeighting(AbstractEPM):
149 | 
150 |     """Weighting method from "Information-theoretic Transfer Learning framework for Bayesian
151 |     optimization" by Ramachandran et al., MLKDD 2018
152 | 
153 |     This does not implement PES!
154 |     """
155 | 
156 |     def __init__(
157 |         self,
158 |         training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
159 |         eta: float,  # https://github.com/AnilRamachandran/ITTLBO/blob/master/BO_TL_PES_loop.m#L218
160 |         variance_mode: str = 'target',
161 |         ** kwargs
162 |     ):
163 |         if kwargs.get('instance_features') is not None:
164 |             raise NotImplementedError()
165 |         super().__init__(**kwargs)
166 |         self.training_data = training_data
167 |         self.eta = eta
168 | 
169 |         self.rng = np.random.RandomState(self.seed)
170 |         self.variance_mode = variance_mode
171 | 
172 |         # https://github.com/AnilRamachandran/ITTLBO/blob/master/BO_TL_PES_loop.m#L153
173 |         self.num_samples = 100
174 |         self.num_features = 500
175 | 
176 |         base_models = []
177 |         for task in training_data:
178 |             model = get_gaussian_process(
179 |                 bounds=self.bounds,
180 |                 types=self.types,
181 |                 configspace=self.configspace,
182 |                 rng=self.rng,
183 |                 kernel=None,
184 |             )
185 |             Y = training_data[task]['y']
186 |             mean = Y.mean()
187 |             std = Y.std()
188 |             if std == 0:
189 |                 std = 1
190 | 
191 |             y_scaled = (Y - mean) / std
192 |             y_scaled = y_scaled.flatten()
193 |             configs = training_data[task]['configurations']
194 |             X = convert_configurations_to_array(configs)
195 | 
196 |             model.train(
197 |                 X=X,
198 |                 Y=y_scaled,
199 |             )
200 |             base_models.append(model)
201 |         self.base_models = base_models
202 | 
203 |         self.weights_over_time = []
204 | 
205 |         bounds = [(0, 1)] * len(self.configspace.get_hyperparameters())
206 |         samples = []
207 |         for s in range(len(base_models)):
208 |             samples_base_task = []
209 |             for _ in range(self.num_samples):
210 |                 x0 = self.configspace.sample_configuration().get_array()
211 |                 base_gp_sample = sample_gp_with_random_features(self.base_models[s], self.num_features,
212 |                                                                 self.rng)
213 |                 opt_base = scipy.optimize.minimize(functools.partial(base_gp_sample, False), x0,
214 |                                                    jac=functools.partial(base_gp_sample, True),
215 |                                                    bounds=bounds)
216 |                 samples_base_task.append(opt_base.x)
217 | 
218 |             samples.append(np.array(samples_base_task))
219 |         self.samples = samples
220 | 
221 |     def _compute_weights(self):
222 | 
223 |         pseudo_weights = []
224 |         bounds = [(0, 1)] * len(self.configspace.get_hyperparameters())
225 | 
226 |         samples_target_task = []
227 |         for _ in range(self.num_samples):
228 |             target_gp_sample = sample_gp_with_random_features(self.target_model, self.num_features,
229 |                                                               self.rng)
230 |             x0 = self.configspace.sample_configuration().get_array()
231 |             opt_target = scipy.optimize.minimize(functools.partial(target_gp_sample, False), x0,
232 |                                                  jac=functools.partial(target_gp_sample, True),
233 |                                                  bounds=bounds)
234 |             samples_target_task.append(opt_target.x)
235 |         samples_target_task = np.array(samples_target_task)
236 | 
237 |         for s in range(len(self.model_list_)):
238 |             if s == len(self.model_list_) - 1:
239 |                 pseudo_weights.append(1)
240 |             else:
241 |                 exp_arg = 0
242 | 
243 |                 masks = np.eye(self.num_samples, dtype=bool)
244 |                 for i in range(self.num_samples):
245 |                     samples_base_task = self.samples[s]
246 |                     tau_i = sklearn.metrics.pairwise_distances(
247 |                         samples_target_task[i].reshape((1, -1)),
248 |                         Y=samples_base_task, metric='euclidean').min() + 1e-14
249 |                     rho_i = sklearn.metrics.pairwise_distances(
250 |                         samples_base_task[i].reshape((1, -1)),
251 |                         Y=samples_base_task[~masks[i]], metric='euclidean'
252 |                     ).min() + 1e-14
253 |                     exp_arg += np.log(tau_i / rho_i)
254 |                     #print(tau_i, rho_i, np.log(tau_i / rho_i))
255 | 
256 |                 exp_arg *= (len(self.configspace.get_hyperparameters()) / self.num_samples)
257 |                 exp_arg += np.log(self.num_samples / (self.num_samples - 1))
258 |                 #print(exp_arg)
259 |                 pseudo_weights.append(np.exp(- exp_arg / self.eta))
260 | 
261 |         pseudo_weights = np.array(pseudo_weights)
262 |         #print(pseudo_weights)
263 |         self.weights_ = pseudo_weights / np.sum(pseudo_weights)
264 | 
265 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
266 |         Y = Y.flatten()
267 |         mean = Y.mean()
268 |         std = Y.std()
269 |         if std == 0:
270 |             std = 1
271 | 
272 |         y_scaled = (Y - mean) / std
273 |         self.Y_mean_ = mean
274 |         self.Y_std_ = std
275 | 
276 |         target_model = get_gaussian_process(
277 |             bounds=self.bounds,
278 |             types=self.types,
279 |             configspace=self.configspace,
280 |             rng=self.rng,
281 |             kernel=None,
282 |         )
283 |         self.target_model = target_model.train(X, y_scaled)
284 |         self.model_list_ = self.base_models + [target_model]
285 |         try:
286 |             self._compute_weights()
287 |         except Exception as e:
288 |             print(e)
289 |             self.weights_ = np.zeros((len(self.model_list_, )))
290 |             self.weights_[-1] = 1
291 |         print('Weights', self.weights_)
292 |         self.weights_over_time.append(self.weights_)
293 | 
294 |         # create model and acquisition function
295 |         return self
296 | 
297 |     def _predict(self, X: np.ndarray, cov_return_type: bool) -> Tuple[np.ndarray, np.ndarray]:
298 | 
299 |         # compute posterior for each model
300 |         weighted_means = []
301 |         weighted_covars = []
302 | 
303 |         # filter model with zero weights
304 |         # weights on covariance matrices are weight**2
305 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
306 |         non_zero_weights = self.weights_[non_zero_weight_indices]
307 |         # re-normalize
308 |         non_zero_weights /= non_zero_weights.sum()
309 | 
310 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
311 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
312 |             weight = non_zero_weights[non_zero_weight_idx]
313 |             mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type=cov_return_type)
314 |             weighted_means.append(weight * mean)
315 |             if self.variance_mode == 'average':
316 |                 weighted_covars.append(covar * weight ** 2)
317 |             elif self.variance_mode == 'target':
318 |                 if raw_idx + 1 == len(self.weights_):
319 |                     weighted_covars.append(covar)
320 |             else:
321 |                 raise ValueError()
322 | 
323 |         if self.variance_mode == 'target':
324 |             assert len(weighted_covars) == 1
325 | 
326 |         # set mean and covariance to be the rank-weighted sum the means and covariances
327 |         # of the
328 |         # base models and target model
329 |         mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_
330 |         covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2)
331 |         return mean_x, covar_x
332 | 


--------------------------------------------------------------------------------
/rgpe/methods/noisy_ei.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | from scipy.stats import norm
  6 | 
  7 | from smac.epm.base_epm import AbstractEPM
  8 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI
  9 | 
 10 | from rgpe.utils import sample_sobol
 11 | 
 12 | 
 13 | class NoisyEI(AbstractAcquisitionFunction):
 14 |     """Implements the Noisy Expected Improvement by Letham et al. described in
 15 |     https://arxiv.org/abs/1706.07094 and used in https://arxiv.org/abs/1802.02219
 16 | 
 17 |     This implementation requires an ensemble of methods, for example RGPE and assumes that each
 18 |     method itself is a Gaussian Process as implemented in SMAC.
 19 | 
 20 |     If you are looking for a general implementation of NoisyEI we recommend having a look at
 21 |     BoTorch.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         model: AbstractEPM,
 27 |         target_model_incumbent: bool,
 28 |         acquisition_function: Optional[AbstractAcquisitionFunction] = None,
 29 |         par: float = 0.0,
 30 |         n_samples: int = 30,
 31 |     ):
 32 | 
 33 |         super().__init__(model)
 34 |         self.long_name = 'Noisy Expected Improvement'
 35 |         self.par = par
 36 |         self.eta = None
 37 |         self.target_model_incumbent = target_model_incumbent
 38 | 
 39 |         if acquisition_function is None:
 40 |             self.acq = EI(model=None)
 41 |         else:
 42 |             self.acq = acquisition_function
 43 |         self.n_samples = n_samples
 44 | 
 45 |         self._functions = None
 46 |         self._do_integrate = True
 47 | 
 48 |         self.base_models = None
 49 | 
 50 |     def update(self, model: AbstractEPM, **kwargs):
 51 | 
 52 |         X = kwargs['X']
 53 | 
 54 |         if model.weights_[-1] != 1:
 55 | 
 56 |             del kwargs['eta']
 57 |             models = []
 58 |             etas = []
 59 | 
 60 |             input_locations = []
 61 |             samples = []
 62 | 
 63 |             self._do_integrate = True
 64 | 
 65 |             if self.base_models is None:
 66 |                 self.base_models = []
 67 |                 for _ in range(self.n_samples):
 68 |                     model_ = copy.deepcopy(model)
 69 |                     self.base_models.append(model_.base_models)
 70 | 
 71 |             # First, create samples from each model of the ensemble to integrate over
 72 |             for model_idx, weight in enumerate(model.weights_):
 73 |                 if weight <= 0:
 74 |                     # Ignore models with zero weight
 75 |                     samples.append(None)
 76 |                     input_locations.append(None)
 77 |                     continue
 78 |                 submodel = model.model_list_[model_idx]
 79 |                 original_training_data = submodel.gp.X_train_
 80 |                 if model_idx == len(model.weights_) - 1:
 81 |                     integrate = original_training_data.copy()
 82 |                 else:
 83 |                     integrate = np.vstack((original_training_data, X))
 84 |                 try:
 85 |                     sample = sample_sobol(submodel, integrate, self.n_samples, model.rng.randint(10000))
 86 |                 except:
 87 |                     sample = submodel.predict(integrate)[0].transpose()
 88 |                     sample = np.tile(sample, reps=self.n_samples)
 89 |                 samples.append(sample)
 90 |                 input_locations.append(integrate)
 91 | 
 92 |             # Second, train the integrated GPs for each base model
 93 |             for sample_idx in range(self.n_samples):
 94 | 
 95 |                 # Copy the individual models
 96 |                 # This is substantially faster than doing a deepcopy of all models as it avoids
 97 |                 # doing a deepcopy of the base models
 98 |                 model_ = copy.copy(model)
 99 |                 model_.base_models = self.base_models[sample_idx]
100 |                 # do a deep copy of the target model so we don't mess with it's original noise
101 |                 # estimate. The original noise estimate will be used as the basis for the GPs HPO
102 |                 # when fitting it the next time.
103 |                 model_.target_model = copy.deepcopy(model.target_model)
104 |                 model_.model_list_ = model_.base_models + [model_.target_model]
105 |                 models.append(model_)
106 | 
107 |                 # Train the individual models
108 |                 for model_idx, (submodel, weight) in enumerate(zip(model_.model_list_, model_.weights_)):
109 |                     if weight <= 0:
110 |                         continue
111 |                     theta = submodel.gp.kernel.theta
112 |                     theta[-1] = -25
113 |                     submodel.gp.kernel.theta = theta
114 |                     sample = samples[model_idx][sample_idx].reshape((-1, 1))
115 |                     submodel._train(input_locations[model_idx], sample, do_optimize=False)
116 | 
117 |             for model_ in models:
118 |                 if self.target_model_incumbent:
119 |                     predictions, _ = model_.target_model.predict(X)
120 |                     predictions = predictions * model_.Y_std_ + model_.Y_mean_
121 |                 else:
122 |                     predictions, _ = model_.predict(X)
123 |                 etas.append(np.min(predictions))
124 | 
125 |             if self._functions is None or len(self._functions) != len(models):
126 |                 self._functions = [copy.deepcopy(self.acq) for _ in models]
127 |             for model, func, eta in zip(models, self._functions, etas):
128 |                 func.update(model=model, eta=eta, **kwargs)
129 |         else:
130 |             print('No need to integrate...')
131 |             self._do_integrate = False
132 |             del kwargs['eta']
133 |             predictions, _ = model.predict(X)
134 |             kwargs['eta'] = np.min(predictions)
135 |             self.acq.update(model=model, **kwargs)
136 | 
137 |     def _compute(self, X: np.ndarray):
138 |         if self._do_integrate:
139 |             val = np.array([func._compute(X) for func in self._functions]).mean(axis=0)
140 |             return val
141 |         else:
142 |             return self.acq._compute(X)
143 | 
144 | 
145 | class ClosedFormNei(AbstractAcquisitionFunction):
146 |     """Closed-form adaptation of the Noisy Expected Improvement.
147 | 
148 |     While it is substantially faster to compute it does not consider the uncertainty about
149 |     which noisy observation is the best observation made so far.
150 |     """
151 | 
152 |     def update(self, **kwargs):
153 | 
154 |         X = kwargs['X']
155 |         self.model = kwargs['model']
156 |         # Model prediction is only used when not integrating over base models
157 |         prediction, _ = self.model.predict(X)
158 |         self.incumbent_array = X[np.argmin(prediction)].reshape((1, -1))
159 |         self.eta = np.min(prediction)
160 | 
161 |     def _compute(self, X: np.ndarray, **kwargs):
162 | 
163 |         if len(X.shape) == 1:
164 |             X = X[:, np.newaxis]
165 | 
166 |         if self.model.weights_[-1] != 1:
167 |             # Due to the joint prediction, it is not possible to compute EI only with respect to
168 |             # the predicted value on the target task
169 |             X_new = np.concatenate((self.incumbent_array, X), axis=0)
170 |             m_pred, v_pred = self.model._predict(X_new, cov_return_type='full_cov')
171 |             m_inc = m_pred[0]
172 |             v_inc = v_pred[0][0]
173 |             m_cand = m_pred[1:]
174 |             cov = v_pred[0][1:]
175 |             v_cand = np.diag(v_pred)[1:]
176 |             m = m_inc - m_cand
177 |             v = v_inc + v_cand - 2 * cov
178 |             s = np.sqrt(v)
179 |             eta_minus_m = m.reshape((-1, 1))
180 |             s = s.reshape((-1, 1))
181 |         else:
182 |             m, v = self.model.predict(X)
183 |             s = np.sqrt(v)
184 |             eta_minus_m = self.eta - m
185 | 
186 |         def calculate_f():
187 |             z = (eta_minus_m) / s
188 |             return (eta_minus_m) * norm.cdf(z) + s * norm.pdf(z)
189 | 
190 |         if np.any(s == 0.0):
191 |             self.logger.warning("Predicted std is 0.0 for at least one sample.")
192 |             s_copy = np.copy(s)
193 |             s[s_copy == 0.0] = 1.0
194 |             f = calculate_f()
195 |             f[s_copy == 0.0] = 0.0
196 |         else:
197 |             f = calculate_f()
198 |         if (f < 0).any():
199 |             raise ValueError(
200 |                 "Expected Improvement is smaller than 0 for at least one "
201 |                 "sample.")
202 |         return f
203 | 


--------------------------------------------------------------------------------
/rgpe/methods/rgpe.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict, List, Optional, Tuple, Union, Callable
  3 | 
  4 | import numpy as np
  5 | 
  6 | from ConfigSpace import Configuration
  7 | from smac.configspace import convert_configurations_to_array
  8 | from smac.epm.base_epm import AbstractEPM
  9 | from smac.epm.gaussian_process import GaussianProcess
 10 | from rgpe.utils import get_gaussian_process, sample_sobol, copula_transform
 11 | 
 12 | 
 13 | def roll_col(X: np.ndarray, shift: int) -> np.ndarray:
 14 |     """
 15 |     Rotate columns to right by shift.
 16 |     """
 17 |     return np.concatenate((X[:, -shift:], X[:, :-shift]), axis=1)
 18 | 
 19 | 
 20 | def compute_ranking_loss(
 21 |     f_samps: np.ndarray,
 22 |     target_y: np.ndarray,
 23 |     target_model: bool,
 24 | ) -> np.ndarray:
 25 |     """
 26 |     Compute ranking loss for each sample from the posterior over target points.
 27 |     """
 28 |     y_stack = np.tile(target_y.reshape((-1, 1)), f_samps.shape[0]).transpose()
 29 |     rank_loss = np.zeros(f_samps.shape[0])
 30 |     if not target_model:
 31 |         for i in range(1, target_y.shape[0]):
 32 |             rank_loss += np.sum(
 33 |                 (roll_col(f_samps, i) < f_samps) ^ (roll_col(y_stack, i) < y_stack),
 34 |                 axis=1
 35 |             )
 36 |     else:
 37 |         for i in range(1, target_y.shape[0]):
 38 |             rank_loss += np.sum(
 39 |                 (roll_col(f_samps, i) < y_stack) ^ (roll_col(y_stack, i) < y_stack),
 40 |                 axis=1
 41 |             )
 42 | 
 43 |     return rank_loss
 44 | 
 45 | 
 46 | def get_target_model_loocv_sample_preds(
 47 |     train_x: np.ndarray,
 48 |     train_y: np.ndarray,
 49 |     num_samples: int,
 50 |     model: GaussianProcess,
 51 |     engine_seed: int,
 52 | ) -> np.ndarray:
 53 |     """
 54 |     Use LOOCV to fit len(train_y) independent GPs and sample from their posterior to obtain an
 55 |     approximate sample from the target model.
 56 | 
 57 |     This sampling does not take into account the correlation between observations which occurs
 58 |     when the predictive uncertainty of the Gaussian process is unequal zero.
 59 |     """
 60 |     masks = np.eye(len(train_x), dtype=np.bool)
 61 |     train_x_cv = np.stack([train_x[~m] for m in masks])
 62 |     train_y_cv = np.stack([train_y[~m] for m in masks])
 63 |     test_x_cv = np.stack([train_x[m] for m in masks])
 64 | 
 65 |     samples = np.zeros((num_samples, train_y.shape[0]))
 66 |     for i in range(train_y.shape[0]):
 67 |         loo_model = get_gaussian_process(
 68 |             configspace=model.configspace,
 69 |             bounds=model.bounds,
 70 |             types=model.types,
 71 |             rng=model.rng,
 72 |             kernel=model.kernel,
 73 |         )
 74 |         loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False)
 75 | 
 76 |         samples_i = sample_sobol(loo_model, test_x_cv[i], num_samples, engine_seed).flatten()
 77 | 
 78 |         samples[:, i] = samples_i
 79 | 
 80 |     return samples
 81 | 
 82 | 
 83 | def compute_target_model_ranking_loss(
 84 |     train_x: np.ndarray,
 85 |     train_y: np.ndarray,
 86 |     num_samples: int,
 87 |     model: GaussianProcess,
 88 |     engine_seed: int,
 89 | ) -> np.ndarray:
 90 |     """
 91 |     Use LOOCV to fit len(train_y) independent GPs and sample from their posterior to obtain an
 92 |     approximate sample from the target model.
 93 | 
 94 |     This function does joint draws from all observations (both training data and left out sample)
 95 |     to take correlation between observations into account, which can occur if the predictive
 96 |     variance of the Gaussian process is unequal zero. To avoid returning a tensor, this function
 97 |     directly computes the ranking loss.
 98 |     """
 99 |     masks = np.eye(len(train_x), dtype=np.bool)
100 |     train_x_cv = np.stack([train_x[~m] for m in masks])
101 |     train_y_cv = np.stack([train_y[~m] for m in masks])
102 | 
103 |     ranking_losses = np.zeros(num_samples, dtype=np.int)
104 |     for i in range(train_y.shape[0]):
105 |         loo_model = get_gaussian_process(
106 |             configspace=model.configspace,
107 |             bounds=model.bounds,
108 |             types=model.types,
109 |             rng=model.rng,
110 |             kernel=model.kernel,
111 |         )
112 |         loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False)
113 |         samples_i = sample_sobol(loo_model, train_x, num_samples, engine_seed)
114 | 
115 |         for j in range(len(train_y)):
116 |             ranking_losses += (samples_i[:, i] < samples_i[:, j]) ^ (train_y[i] < train_y[j])
117 | 
118 |     return ranking_losses
119 | 
120 | 
121 | def compute_rank_weights(
122 |         train_x: np.ndarray,
123 |         train_y: np.ndarray,
124 |         base_models: List[GaussianProcess],
125 |         target_model: GaussianProcess,
126 |         num_samples: int,
127 |         sampling_mode: str,
128 |         weight_dilution_strategy: Union[int, Callable],
129 |         number_of_function_evaluations,
130 |         rng: np.random.RandomState,
131 |         alpha: float = 0.0,
132 | ) -> np.ndarray:
133 |     """
134 |     Compute ranking weights for each base model and the target model
135 |     (using LOOCV for the target model).
136 | 
137 |     Returns
138 |     -------
139 |     weights : np.ndarray
140 |     """
141 | 
142 |     if sampling_mode == 'bootstrap':
143 | 
144 |         predictions = []
145 |         for model_idx in range(len(base_models)):
146 |             model = base_models[model_idx]
147 |             predictions.append(model.predict(train_x)[0].flatten())
148 | 
149 |         masks = np.eye(len(train_x), dtype=np.bool)
150 |         train_x_cv = np.stack([train_x[~m] for m in masks])
151 |         train_y_cv = np.stack([train_y[~m] for m in masks])
152 |         test_x_cv = np.stack([train_x[m] for m in masks])
153 | 
154 |         loo_prediction = []
155 |         for i in range(train_y.shape[0]):
156 |             loo_model = get_gaussian_process(
157 |                 configspace=target_model.configspace,
158 |                 bounds=target_model.bounds,
159 |                 types=target_model.types,
160 |                 rng=target_model.rng,
161 |                 kernel=target_model.kernel,
162 |             )
163 |             loo_model._train(X=train_x_cv[i], y=train_y_cv[i], do_optimize=False)
164 |             loo_prediction.append(loo_model.predict(test_x_cv[i])[0][0][0])
165 |         predictions.append(loo_prediction)
166 |         predictions = np.array(predictions)
167 | 
168 |         bootstrap_indices = rng.choice(predictions.shape[1],
169 |                                        size=(num_samples, predictions.shape[1]),
170 |                                        replace=True)
171 | 
172 |         bootstrap_predictions = []
173 |         bootstrap_targets = train_y[bootstrap_indices].reshape((num_samples, len(train_y)))
174 |         for m in range(len(base_models) + 1):
175 |             bootstrap_predictions.append(predictions[m, bootstrap_indices])
176 | 
177 |         ranking_losses = np.zeros((len(base_models) + 1, num_samples))
178 |         for i in range(len(base_models)):
179 | 
180 |             for j in range(len(train_y)):
181 |                 ranking_losses[i] += np.sum(
182 |                     (
183 |                         roll_col(bootstrap_predictions[i], j) < bootstrap_predictions[i])
184 |                         ^ (roll_col(bootstrap_targets, j) < bootstrap_targets
185 |                     ), axis=1
186 |                 )
187 |         for j in range(len(train_y)):
188 |             ranking_losses[-1] += np.sum(
189 |                 (
190 |                     (roll_col(bootstrap_predictions[-1], j) < bootstrap_targets)
191 |                     ^ (roll_col(bootstrap_targets, j) < bootstrap_targets)
192 |                 ), axis=1
193 |             )
194 | 
195 |     elif sampling_mode in ['simplified', 'correct']:
196 |         # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf
197 |         ranking_losses = []
198 |         # compute ranking loss for each base model
199 |         for model_idx in range(len(base_models)):
200 |             model = base_models[model_idx]
201 |             # compute posterior over training points for target task
202 |             f_samps = sample_sobol(model, train_x, num_samples, rng.randint(10000))
203 |             # compute and save ranking loss
204 |             ranking_losses.append(compute_ranking_loss(f_samps, train_y, target_model=False))
205 | 
206 |         # compute ranking loss for target model using LOOCV
207 |         if sampling_mode == 'simplified':
208 |             # Independent draw of the leave one out sample, other "samples" are noise-free and the
209 |             # actual observation
210 |             f_samps = get_target_model_loocv_sample_preds(train_x, train_y, num_samples, target_model,
211 |                                                           rng.randint(10000))
212 |             ranking_losses.append(compute_ranking_loss(f_samps, train_y, target_model=True))
213 |         elif sampling_mode == 'correct':
214 |             # Joint draw of the leave one out sample and the other observations
215 |             ranking_losses.append(
216 |                 compute_target_model_ranking_loss(train_x, train_y, num_samples, target_model,
217 |                                                   rng.randint(10000))
218 |             )
219 |         else:
220 |             raise ValueError(sampling_mode)
221 |     else:
222 |         raise NotImplementedError(sampling_mode)
223 | 
224 |     if isinstance(weight_dilution_strategy, int):
225 |         weight_dilution_percentile_target = weight_dilution_strategy
226 |         weight_dilution_percentile_base = 50
227 |     elif weight_dilution_strategy is None or weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
228 |         pass
229 |     else:
230 |         raise ValueError(weight_dilution_strategy)
231 | 
232 |     ranking_loss = np.array(ranking_losses)
233 | 
234 |     # perform model pruning
235 |     p_drop = []
236 |     if weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
237 |         for i in range(len(base_models)):
238 |             better_than_target = np.sum(ranking_loss[i, :] < ranking_loss[-1, :])
239 |             worse_than_target = np.sum(ranking_loss[i, :] >= ranking_loss[-1, :])
240 |             correction_term = alpha * (better_than_target + worse_than_target)
241 |             proba_keep = better_than_target / (better_than_target + worse_than_target + correction_term)
242 |             if weight_dilution_strategy == 'probabilistic-ld':
243 |                 proba_keep = proba_keep * (1 - len(train_x) / float(number_of_function_evaluations))
244 |             proba_drop = 1 - proba_keep
245 |             p_drop.append(proba_drop)
246 |             r = rng.rand()
247 |             if r < proba_drop:
248 |                 ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1
249 |     elif weight_dilution_strategy is not None:
250 |         # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf
251 |         percentile_base = np.percentile(ranking_loss[: -1, :], weight_dilution_percentile_base, axis=1)
252 |         percentile_target = np.percentile(ranking_loss[-1, :], weight_dilution_percentile_target)
253 |         for i in range(len(base_models)):
254 |             if percentile_base[i] >= percentile_target:
255 |                 ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1
256 | 
257 |     # compute best model (minimum ranking loss) for each sample
258 |     # this differs from v1, where the weight is given only to the target model in case of a tie.
259 |     # Here, we distribute the weight fairly among all participants of the tie.
260 |     minima = np.min(ranking_loss, axis=0)
261 |     assert len(minima) == num_samples
262 |     best_models = np.zeros(len(base_models) + 1)
263 |     for i, minimum in enumerate(minima):
264 |         minimum_locations = ranking_loss[:, i] == minimum
265 |         sample_from = np.where(minimum_locations)[0]
266 | 
267 |         for sample in sample_from:
268 |             best_models[sample] += 1. / len(sample_from)
269 | 
270 |     # compute proportion of samples for which each model is best
271 |     rank_weights = best_models / num_samples
272 |     return rank_weights, p_drop
273 | 
274 | 
275 | class RGPE(AbstractEPM):
276 | 
277 |     def __init__(
278 |         self,
279 |         training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
280 |         num_posterior_samples: int,
281 |         weight_dilution_strategy: Union[int, str],
282 |         number_of_function_evaluations: int,
283 |         sampling_mode: str = 'correct',
284 |         variance_mode: str = 'average',
285 |         normalization: str = 'mean/var',
286 |         alpha: float = 0.0,
287 |         **kwargs
288 |     ):
289 |         """Ranking-Weighted Gaussian Process Ensemble.
290 | 
291 |         Parameters
292 |         ----------
293 |         training_data
294 |             Dictionary containing the training data for each meta-task. Mapping from an integer (
295 |             task ID) to a dictionary, which is a mapping from configuration to performance.
296 |         num_posterior_samples
297 |             Number of samples to draw for approximating the posterior probability of a model
298 |             being the best model to explain the observations on the target task.
299 |         weight_dilution_strategy
300 |             Can be one of the following four:
301 |             * ``'probabilistic-ld'``: the method presented in the paper
302 |             * ``'probabilistic'``: the method presented in the paper, but without the time-dependent
303 |               pruning of meta-models
304 |             * an integer: a deterministic strategy described in https://arxiv.org/abs/1802.02219v1
305 |             * ``None``: no weight dilution prevention
306 |         number_of_function_evaluations
307 |             Optimization horizon - used to compute the time-dependent factor in the probability
308 |             of dropping base models for the weight dilution prevention strategy
309 |             ``'probabilistic-ld'``.
310 |         sampling_mode
311 |             Can be any of:
312 |             * ``'bootstrap'``
313 |             * ``'correct'``
314 |             * ``'simplified'``
315 |         variance_mode
316 |             Can be either ``'average'`` to return the weighted average of the variance
317 |             predictions of the individual models or ``'target'`` to only obtain the variance
318 |             prediction of the target model. Changing this is only necessary to use the model
319 |             together with the expected improvement.
320 |         normalization
321 |             Can be either:
322 |             * ``None``: No normalization per task
323 |             * ``'mean/var'``: Zero mean unit standard deviation normalization per task as
324 |               proposed by Yogatama et al. (AISTATS 2014).
325 |             * ``'Copula'``: Copula transform as proposed by Salinas et al., 2020
326 |         alpha
327 |             Regularization hyperparameter to increase aggressiveness of dropping base models when
328 |             using the weight dilution strategies ``'probabilistic-ld'`` or ``'probabilistic'``.
329 |         """
330 | 
331 |         if kwargs.get('instance_features') is not None:
332 |             raise NotImplementedError()
333 |         super().__init__(**kwargs)
334 |         self.training_data = training_data
335 | 
336 |         self.number_of_function_evaluations = number_of_function_evaluations
337 |         self.num_posterior_samples = num_posterior_samples
338 |         self.rng = np.random.RandomState(self.seed)
339 |         self.sampling_mode = sampling_mode
340 |         self.variance_mode = variance_mode
341 |         self.normalization = normalization
342 |         self.alpha = alpha
343 | 
344 |         if self.normalization not in ['None', 'mean/var', 'Copula']:
345 |             raise ValueError(self.normalization)
346 | 
347 |         if weight_dilution_strategy is None or weight_dilution_strategy == 'None':
348 |             weight_dilution_strategy = None
349 |         elif weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
350 |             pass
351 |         else:
352 |             weight_dilution_strategy = int(weight_dilution_strategy)
353 | 
354 |         self.weight_dilution_strategy = weight_dilution_strategy
355 | 
356 |         base_models = []
357 |         for task in training_data:
358 |             model = get_gaussian_process(
359 |                 bounds=self.bounds,
360 |                 types=self.types,
361 |                 configspace=self.configspace,
362 |                 rng=self.rng,
363 |                 kernel=None,
364 |             )
365 |             y = training_data[task]['y']
366 |             if self.normalization == 'mean/var':
367 |                 mean = y.mean()
368 |                 std = y.std()
369 |                 if std == 0:
370 |                     std = 1
371 | 
372 |                 y_scaled = (y - mean) / std
373 |                 y_scaled = y_scaled.flatten()
374 |             elif self.normalization == 'Copula':
375 |                 y_scaled = copula_transform(y)
376 |             elif self.normalization == 'None':
377 |                 y_scaled = y
378 |             else:
379 |                 raise ValueError(self.normalization)
380 |             configs = training_data[task]['configurations']
381 |             X = convert_configurations_to_array(configs)
382 | 
383 |             model.train(
384 |                 X=X,
385 |                 Y=y_scaled,
386 |             )
387 |             base_models.append(model)
388 |         self.base_models = base_models
389 |         self.weights_over_time = []
390 |         self.p_drop_over_time = []
391 | 
392 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
393 |         """SMAC training function"""
394 |         print(self.normalization)
395 |         if self.normalization == 'mean/var':
396 |             Y = Y.flatten()
397 |             mean = Y.mean()
398 |             std = Y.std()
399 |             if std == 0:
400 |                 std = 1
401 | 
402 |             y_scaled = (Y - mean) / std
403 |             self.Y_std_ = std
404 |             self.Y_mean_ = mean
405 |         elif self.normalization in ['None', 'Copula']:
406 |             self.Y_mean_ = 0.
407 |             self.Y_std_ = 1.
408 |             y_scaled = Y
409 |             if self.normalization == 'Copula':
410 |                 y_scaled = copula_transform(Y)
411 |         else:
412 |             raise ValueError(self.normalization)
413 | 
414 |         target_model = get_gaussian_process(
415 |             bounds=self.bounds,
416 |             types=self.types,
417 |             configspace=self.configspace,
418 |             rng=self.rng,
419 |             kernel=None,
420 |         )
421 |         self.target_model = target_model.train(X, y_scaled)
422 |         self.model_list_ = self.base_models + [target_model]
423 | 
424 |         if X.shape[0] < 3:
425 |             self.weights_ = np.ones(len(self.model_list_)) / len(self.model_list_)
426 |             p_drop = np.ones((len(self.base_models, ))) * np.NaN
427 |         else:
428 |             try:
429 |                 self.weights_, p_drop = compute_rank_weights(
430 |                     train_x=X,
431 |                     train_y=y_scaled,
432 |                     base_models=self.base_models,
433 |                     target_model=target_model,
434 |                     num_samples=self.num_posterior_samples,
435 |                     sampling_mode=self.sampling_mode,
436 |                     weight_dilution_strategy=self.weight_dilution_strategy,
437 |                     number_of_function_evaluations=self.number_of_function_evaluations,
438 |                     rng=self.rng,
439 |                     alpha=self.alpha,
440 |                 )
441 |             except Exception as e:
442 |                 print(e)
443 |                 self.weights_ = np.zeros((len(self.model_list_, )))
444 |                 self.weights_[-1] = 1
445 |                 p_drop = np.ones((len(self.base_models, ))) * np.NaN
446 | 
447 |         print('Weights', self.weights_)
448 |         self.weights_over_time.append(self.weights_)
449 |         self.p_drop_over_time.append(p_drop)
450 | 
451 |         return self
452 | 
453 |     def _predict(self, X: np.ndarray, cov_return_type='diagonal_cov') -> Tuple[np.ndarray, np.ndarray]:
454 |         """SMAC predict function"""
455 | 
456 |         # compute posterior for each model
457 |         weighted_means = []
458 |         weighted_covars = []
459 | 
460 |         # filter model with zero weights
461 |         # weights on covariance matrices are weight**2
462 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
463 |         non_zero_weights = self.weights_[non_zero_weight_indices]
464 |         # re-normalize
465 |         non_zero_weights /= non_zero_weights.sum()
466 | 
467 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
468 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
469 |             weight = non_zero_weights[non_zero_weight_idx]
470 |             mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type)
471 | 
472 |             weighted_means.append(weight * mean)
473 | 
474 |             if self.variance_mode == 'average':
475 |                 weighted_covars.append(covar * weight ** 2)
476 |             elif self.variance_mode == 'target':
477 |                 if raw_idx + 1 == len(self.weights_):
478 |                     weighted_covars.append(covar)
479 |             else:
480 |                 raise ValueError()
481 | 
482 |         if len(weighted_covars) == 0:
483 |             if self.variance_mode != 'target':
484 |                 raise ValueError(self.variance_mode)
485 |             _, covar = self.model_list_[-1]._predict(X, cov_return_type=cov_return_type)
486 |             weighted_covars.append(covar)
487 | 
488 |         mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_
489 |         covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2)
490 |         return mean_x, covar_x
491 | 
492 |     def sample_functions(self, X_test: np.ndarray, n_funcs: int = 1) -> np.ndarray:
493 |         """
494 |         Sample function values from the posterior of the specified test points.
495 |         """
496 | 
497 |         # filter model with zero weights
498 |         # weights on covariance matrices are weight**2
499 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
500 |         non_zero_weights = self.weights_[non_zero_weight_indices]
501 |         # re-normalize
502 |         non_zero_weights /= non_zero_weights.sum()
503 | 
504 |         samples = []
505 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
506 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
507 |             weight = non_zero_weights[non_zero_weight_idx]
508 | 
509 |             funcs = self.model_list_[raw_idx].sample_functions(X_test, n_funcs)
510 |             funcs = funcs * weight
511 |             samples.append(funcs)
512 |         samples = np.sum(samples, axis=0)
513 |         return samples
514 | 


--------------------------------------------------------------------------------
/rgpe/methods/rmogp.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | from scipy.stats import norm
  6 | 
  7 | from smac.epm.base_epm import AbstractEPM
  8 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI
  9 | 
 10 | from rgpe.utils import sample_sobol
 11 | 
 12 | 
 13 | class MixtureOfGPs(AbstractAcquisitionFunction):
 14 | 
 15 |     def __init__(self,
 16 |                  model: AbstractEPM,
 17 |                  use_expectation=True,
 18 |                  use_global_incumbent=False,
 19 |                  ):
 20 |         """Ranking-weighted Mixture of Gaussian Processes acquisition function
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         model : AbstractEPM
 25 |             An linearly-weighted ensemble which contains a model for each base task and the target
 26 |             task.
 27 |         use_expectation : bool
 28 |             Whether to compute the expectation per base task. Defaults to ``True``,
 29 |             using ``False`` makes the acquisition function behave similar to the transfer
 30 |             acquisition function (Wistuba et al., Machine Learning 2018).
 31 |         use_global_incumbent : bool
 32 |             Whether to use a global incumbent or an incumbent per task. Defaults to ``False``,
 33 |             using ``True`` makes the acquisition function behave more similar to 'Active Testing'
 34 |             from Leite and Brazdil (2012).
 35 |         """
 36 | 
 37 |         super().__init__(model)
 38 |         self.long_name = 'Transfer Acquisition Function'
 39 |         self.eta = None
 40 |         self.etas = None
 41 |         self.n_models = 0
 42 |         self.use_expectation = use_expectation
 43 |         self.use_global_incumbent = use_global_incumbent
 44 | 
 45 |         self.base_models = None
 46 | 
 47 |     def update(self, **kwargs):
 48 |         """SMAC's acquisition function update mechanism.
 49 | 
 50 |         This is a fast implementation which copies the base models once in the beginning. Do use
 51 |         with care if moving the acquisition function to a new version of SMAC, a different
 52 |         Bayesian optimization library or somehow else change the experimental setup. If you are
 53 |         unsure about this, please use the slower implementation below which does a deepcopy in
 54 |         every iteration.
 55 |         """
 56 |         model = kwargs['model']
 57 |         self.n_models = len(self.model.model_list_)
 58 | 
 59 |         X = kwargs['X']
 60 | 
 61 |         if self.base_models is None:
 62 |             self.base_models = copy.deepcopy(model.base_models)
 63 |         model = copy.copy(model)
 64 | 
 65 |         etas = []
 66 |         for i, (submodel, weight) in enumerate(zip(model.model_list_, model.weights_)):
 67 |             if weight <= 0:
 68 |                 etas.append(np.inf)
 69 |                 continue
 70 |             if self.use_expectation and i != self.n_models - 1:
 71 |                 # Use the re-parametrization trick to get rid of noise
 72 |                 original_training_data = submodel.gp.X_train_.copy()
 73 |                 integrate = np.vstack((original_training_data, X))
 74 |                 sample, _ = submodel.predict(integrate)
 75 |                 theta = self.base_models[i].gp.kernel.theta
 76 |                 theta[-1] = -25
 77 |                 self.base_models[i].gp.kernel.theta = theta
 78 |                 self.base_models[i]._train(integrate, sample, do_optimize=False)
 79 |             if self.use_global_incumbent:
 80 |                 eta, _ = submodel.predict(self.incumbent_array)
 81 |             else:
 82 |                 means, _ = submodel.predict(X)
 83 |                 eta = np.min(means)
 84 |             etas.append(eta)
 85 | 
 86 |         model.base_models = []
 87 |         model.model_list_ = []
 88 |         for submodel in self.base_models:
 89 |             model.base_models.append(submodel)
 90 |             model.model_list_.append(submodel)
 91 |         model.model_list_.append(model.target_model)
 92 | 
 93 |         self.model = model
 94 |         self.etas = etas
 95 | 
 96 |     def update_slow(self, **kwargs):
 97 |         """SMAC's acquisition function update mechanism."""
 98 |         model = kwargs['model']
 99 |         self.n_models = len(self.model.model_list_)
100 | 
101 |         X = kwargs['X']
102 | 
103 |         self._do_integrate = True
104 |         model_ = copy.deepcopy(model)
105 |         etas = []
106 |         for submodel, weight in zip(model_.model_list_, model_.weights_):
107 |             if weight <= 0:
108 |                 etas.append(np.inf)
109 |                 continue
110 |             if self.use_expectation:
111 |                 # Use the re-parametrization trick to get rid of noise
112 |                 original_training_data = submodel.gp.X_train_.copy()
113 |                 integrate = np.vstack((original_training_data, X))
114 |                 sample, _ = submodel.predict(integrate)
115 |                 print('before', submodel.gp.kernel.theta)
116 |                 theta = submodel.gp.kernel.theta
117 |                 theta[-1] = -25
118 |                 submodel.gp.kernel.theta = theta
119 |                 print('after', submodel.gp.kernel.theta)
120 |                 submodel._train(integrate, sample, do_optimize=False)
121 |             if self.use_global_incumbent:
122 |                 eta, _ = submodel.predict(self.incumbent_array)
123 |             else:
124 |                 means, _ = submodel.predict(X)
125 |                 eta = np.min(means)
126 |             etas.append(eta)
127 | 
128 |         self.model = model_
129 |         self.etas = etas
130 | 
131 |     def _compute(self, X: np.ndarray, **kwargs):
132 |         """SMAC's acquisition function computation mechanism."""
133 | 
134 |         ei_values = []
135 | 
136 |         for i, (weight, model) in enumerate(zip(self.model.weights_, self.model.model_list_)):
137 |             if weight == 0:
138 |                 continue
139 |             else:
140 |                 eta = self.etas[i]
141 |                 if self.use_expectation or i == self.n_models - 1:
142 | 
143 |                     m, v = model.predict(X)
144 |                     s = np.sqrt(v)
145 |                     eta_minus_m = eta - m
146 | 
147 |                     def calculate_f():
148 |                         z = eta_minus_m / s
149 |                         return eta_minus_m * norm.cdf(z) + s * norm.pdf(z)
150 | 
151 |                     if np.any(s == 0.0):
152 |                         # if std is zero, we have observed x on all instances
153 |                         # using a RF, std should be never exactly 0.0
154 |                         # Avoid zero division by setting all zeros in s to one.
155 |                         # Consider the corresponding results in f to be zero.
156 |                         self.logger.warning("Predicted std is 0.0 for at least one sample.")
157 |                         s_copy = np.copy(s)
158 |                         s[s_copy == 0.0] = 1.0
159 |                         ei = calculate_f()
160 |                         ei[s_copy == 0.0] = 0.0
161 |                     else:
162 |                         ei = calculate_f()
163 |                     if (ei < 0).any():
164 |                         raise ValueError(
165 |                             "Expected Improvement is smaller than 0 for at least one "
166 |                             "sample.")
167 | 
168 |                     ei_values.append(ei * weight)
169 |                 else:
170 |                     m, _ = model.predict(X)
171 |                     improvement = eta - m
172 |                     improvement = improvement
173 |                     improvement = np.maximum(improvement, 0)
174 |                     ei_values.append(improvement * weight)
175 | 
176 |         rval = np.sum(ei_values, axis=0)
177 |         rval = rval.reshape((-1, 1))
178 | 
179 |         return rval
180 | 
181 | 


--------------------------------------------------------------------------------
/rgpe/methods/taf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from smac.epm.base_epm import AbstractEPM
 4 | from smac.optimizer.acquisition import AbstractAcquisitionFunction, EI
 5 | 
 6 | 
 7 | class TAF(AbstractAcquisitionFunction):
 8 | 
 9 |     def __init__(self, model: AbstractEPM):
10 |         """Transfer acquisition function from "Scalable Gaussian process-based transfer surrogates
11 |         for hyperparameter optimization" by Wistuba, Schilling and Schmidt-Thieme,
12 |         Machine Learning 2018, https://link.springer.com/article/10.1007/s10994-017-5684-y
13 | 
14 |         Works both with TST-R and RGPE weighting.
15 |         """
16 | 
17 |         super().__init__(model)
18 |         self.long_name = 'Transfer Acquisition Function'
19 |         self.eta = None
20 |         self.acq = EI(model=None)
21 | 
22 |     def update(self, **kwargs):
23 | 
24 |         X = kwargs['X']
25 |         prediction, _ = self.model.target_model.predict(X)
26 |         self.incumbent_array = X[np.argmin(prediction)].reshape((1, -1))
27 |         eta = np.min(prediction)
28 |         assert (id(kwargs['model']) == id(self.model))
29 |         kwargs = {}
30 |         kwargs['model'] = self.model.target_model
31 |         kwargs['eta'] = eta
32 |         self.acq.model = None
33 |         self.acq.update(**kwargs)
34 |         best_values = []
35 |         for weight, base_model in zip(self.model.weights_, self.model.base_models):
36 |             if weight == 0:
37 |                 best_values.append(None)
38 |             else:
39 |                 values, _ = base_model.predict(X)
40 |                 min_value = np.min(values)
41 |                 best_values.append(min_value)
42 |         self.best_values = best_values
43 | 
44 |     def _compute(self, X: np.ndarray, **kwargs):
45 | 
46 |         ei = self.acq._compute(X)
47 | 
48 |         if self.model.weights_[-1] == 1:
49 |             return ei
50 | 
51 |         else:
52 |             improvements = []
53 | 
54 |             for weight, best_value, base_model in zip(self.model.weights_, self.best_values, self.model.base_models):
55 |                 if weight == 0:
56 |                     continue
57 |                 else:
58 |                     predictions, _ = base_model._predict(X, cov_return_type=None)
59 |                     improvement = np.maximum(best_value - predictions, 0).flatten() * weight
60 |                     improvements.append(improvement)
61 | 
62 |             improvements = np.sum(improvements, axis=0)
63 | 
64 |             rval = ei.flatten() * self.model.weights_[-1] + improvements
65 |             rval = rval.reshape((-1, 1))
66 | 
67 |             return rval
68 | 


--------------------------------------------------------------------------------
/rgpe/methods/tstr.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict, List, Optional, Tuple, Union
  3 | 
  4 | from ConfigSpace import Configuration
  5 | import numpy as np
  6 | from smac.configspace import convert_configurations_to_array
  7 | from smac.epm.base_epm import AbstractEPM
  8 | from rgpe.utils import get_gaussian_process, copula_transform
  9 | 
 10 | 
 11 | class TSTR(AbstractEPM):
 12 | 
 13 |     def __init__(
 14 |         self,
 15 |         training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
 16 |         bandwidth: float = 0.1,
 17 |         variance_mode: str = 'target',
 18 |         normalization: str = 'mean/var',
 19 |         weight_dilution_strategy: Union[int, str] = 'None',
 20 |         number_of_function_evaluations: float = 50,
 21 |         **kwargs
 22 |     ):
 23 |         """
 24 |         Two-stage transfer surrogate with ranking from "Scalable Gaussian process-based
 25 |         transfer surrogates for hyperparameter optimization" by Wistuba, Schilling and
 26 |         Schmidt-Thieme, Machine Learning 2018,
 27 |         https://link.springer.com/article/10.1007/s10994-017-5684-y
 28 | 
 29 |         Parameters
 30 |         ----------
 31 |         training_data
 32 |             Dictionary containing the training data for each meta-task. Mapping from an integer (
 33 |             task ID) to a dictionary, which is a mapping from configuration to performance.
 34 |         bandwidth
 35 |             rho in the original paper
 36 |         variance_mode
 37 |             Can be either ``'average'`` to return the weighted average of the variance
 38 |             predictions of the individual models or ``'target'`` to only obtain the variance
 39 |             prediction of the target model. Changing this is only necessary to use the model
 40 |             together with the expected improvement.
 41 |         normalization
 42 |             Can be either:
 43 |             * ``None``: No normalization per task
 44 |             * ``'mean/var'``: Zero mean unit standard deviation normalization per task as
 45 |               proposed by Yogatama et al. (AISTATS 2014).
 46 |             * ``'Copula'``: Copula transform as proposed by Salinas et al., 2020
 47 |         weight_dilution_strategy
 48 |             Can be one of the following four:
 49 |             * ``'probabilistic-ld'``: the method presented in the paper
 50 |             * ``'probabilistic'``: the method presented in the paper, but without the time-dependent
 51 |               pruning of meta-models
 52 |             * an integer: a deterministic strategy described in https://arxiv.org/abs/1802.02219v1
 53 |             * ``None``: no weight dilution prevention
 54 |         number_of_function_evaluations
 55 |             Optimization horizon - used to compute the time-dependent factor in the probability
 56 |             of dropping base models for the weight dilution prevention strategy
 57 |             ``'probabilistic-ld'``.
 58 |         """
 59 | 
 60 |         if kwargs.get('instance_features') is not None:
 61 |             raise NotImplementedError()
 62 |         super().__init__(**kwargs)
 63 |         self.training_data = training_data
 64 | 
 65 |         self.bandwidth = bandwidth
 66 |         self.rng = np.random.RandomState(self.seed)
 67 |         self.variance_mode = variance_mode
 68 |         self.normalization = normalization
 69 |         self.weight_dilution_strategy = weight_dilution_strategy
 70 |         self.number_of_function_evaluations = number_of_function_evaluations
 71 | 
 72 |         if self.normalization not in ['None', 'mean/var', 'Copula']:
 73 |             raise ValueError(self.normalization)
 74 | 
 75 |         base_models = []
 76 |         for task in training_data:
 77 |             model = get_gaussian_process(
 78 |                 bounds=self.bounds,
 79 |                 types=self.types,
 80 |                 configspace=self.configspace,
 81 |                 rng=self.rng,
 82 |                 kernel=None,
 83 |             )
 84 |             Y = training_data[task]['y']
 85 | 
 86 |             if self.normalization == 'mean/var':
 87 |                 mean = Y.mean()
 88 |                 std = Y.std()
 89 |                 if std == 0:
 90 |                     std = 1
 91 | 
 92 |                 y_scaled = (Y - mean) / std
 93 |                 y_scaled = y_scaled.flatten()
 94 |             elif self.normalization == 'Copula':
 95 |                 y_scaled = copula_transform(Y)
 96 |             elif self.normalization == 'None':
 97 |                 y_scaled = Y
 98 |             else:
 99 |                 raise ValueError(self.normalization)
100 |             configs = training_data[task]['configurations']
101 |             X = convert_configurations_to_array(configs)
102 | 
103 |             model.train(
104 |                 X=X,
105 |                 Y=y_scaled,
106 |             )
107 |             base_models.append(model)
108 |         self.base_models = base_models
109 |         self.weights_over_time = []
110 | 
111 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
112 |         if self.normalization == 'mean/var':
113 |             Y = Y.flatten()
114 |             mean = Y.mean()
115 |             std = Y.std()
116 |             if std == 0:
117 |                 std = 1
118 | 
119 |             y_scaled = (Y - mean) / std
120 |             self.Y_std_ = std
121 |             self.Y_mean_ = mean
122 |         elif self.normalization in ['None', 'Copula']:
123 |             self.Y_mean_ = 0.
124 |             self.Y_std_ = 1.
125 |             y_scaled = Y
126 |             if self.normalization == 'Copula':
127 |                 y_scaled = copula_transform(Y)
128 |         else:
129 |             raise ValueError(self.normalization)
130 | 
131 |         target_model = get_gaussian_process(
132 |             bounds=self.bounds,
133 |             types=self.types,
134 |             configspace=self.configspace,
135 |             rng=self.rng,
136 |             kernel=None,
137 |         )
138 |         self.target_model = target_model.train(X, y_scaled)
139 |         self.model_list_ = self.base_models + [target_model]
140 | 
141 |         weights = np.zeros(len(self.model_list_))
142 |         weights[-1] = 0.75
143 | 
144 |         discordant_pairs_per_task = {}
145 | 
146 |         for model_idx, model in enumerate(self.base_models):
147 |             if X.shape[0] < 2:
148 |                 weights[model_idx] = 0.75
149 |             else:
150 |                 mean, _ = model.predict(X)
151 |                 discordant_pairs = 0
152 |                 total_pairs = 0
153 |                 for i in range(X.shape[0]):
154 |                     for j in range(i + 1, X.shape[0]):
155 |                         if (Y[i] < Y[j]) ^ (mean[i] < mean[j]):
156 |                             discordant_pairs += 1
157 |                         total_pairs += 1
158 |                 t = discordant_pairs / total_pairs / self.bandwidth
159 |                 discordant_pairs_per_task[model_idx] = discordant_pairs
160 |                 if t < 1:  # The paper says <=, but the code says < (https://github.com/wistuba/TST/blob/master/src/de/ismll/hylap/surrogateModel/TwoStageSurrogate.java)
161 |                     weights[model_idx] = 0.75 * (1 - t ** 2)
162 |                 else:
163 |                     weights[model_idx] = 0
164 | 
165 |         # perform model pruning
166 |         # use this only for ablation
167 |         if X.shape[0] >= 2:
168 |             p_drop = []
169 |             if self.weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
170 |                 for i in range(len(self.base_models)):
171 |                     concordant_pairs = total_pairs - discordant_pairs_per_task[i]
172 |                     proba_keep = concordant_pairs / total_pairs
173 |                     if self.weight_dilution_strategy == 'probabilistic-ld':
174 |                         proba_keep = proba_keep * (1 - len(X) / float(self.number_of_function_evaluations))
175 |                     proba_drop = 1 - proba_keep
176 |                     p_drop.append(proba_drop)
177 |                     r = self.rng.rand()
178 |                     if r < proba_drop:
179 |                         weights[i] = 0
180 |             elif self.weight_dilution_strategy == 'None':
181 |                 pass
182 |             else:
183 |                 raise ValueError(self.weight_dilution_strategy)
184 | 
185 |         weights /= np.sum(weights)
186 |         print(weights)
187 |         self.weights_ = weights
188 | 
189 |         self.weights_over_time.append(weights)
190 |         # create model and acquisition function
191 |         return self
192 | 
193 |     def _predict(self, X: np.ndarray, cov_return_type: str = 'diagonal_cov') -> Tuple[np.ndarray, np.ndarray]:
194 | 
195 |         if cov_return_type != 'diagonal_cov':
196 |             raise NotImplementedError(cov_return_type)
197 | 
198 |         # compute posterior for each model
199 |         weighted_means = []
200 |         weighted_covars = []
201 | 
202 |         # filter model with zero weights
203 |         # weights on covariance matrices are weight**2
204 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
205 |         non_zero_weights = self.weights_[non_zero_weight_indices]
206 |         # re-normalize
207 |         non_zero_weights /= non_zero_weights.sum()
208 | 
209 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
210 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
211 |             weight = non_zero_weights[non_zero_weight_idx]
212 |             mean, covar = self.model_list_[raw_idx]._predict(X)
213 | 
214 |             weighted_means.append(weight * mean)
215 | 
216 |             if self.variance_mode == 'average':
217 |                 weighted_covars.append(covar * weight ** 2)
218 |             elif self.variance_mode == 'target':
219 |                 if raw_idx + 1 == len(self.weights_):
220 |                     weighted_covars.append(covar)
221 |             else:
222 |                 raise ValueError()
223 | 
224 |         if len(weighted_covars) == 0:
225 |             if self.variance_mode != 'target':
226 |                 raise ValueError(self.variance_mode)
227 |             _, covar = self.model_list_[-1]._predict(X, cov_return_type)
228 |             weighted_covars.append(covar)
229 | 
230 |         # set mean and covariance to be the rank-weighted sum the means and covariances
231 |         # of the base models and target model
232 |         mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_
233 |         covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2)
234 |         return mean_x, covar_x
235 | 
236 |     def sample_functions(self, X_test: np.ndarray, n_funcs: int=1) -> np.ndarray:
237 |         """
238 |         Samples F function values from the current posterior at the N
239 |         specified test points.
240 | 
241 |         Parameters
242 |         ----------
243 |         X_test: np.ndarray (N, D)
244 |             Input test points
245 |         n_funcs: int
246 |             Number of function values that are drawn at each test point.
247 | 
248 |         Returns
249 |         ----------
250 |         function_samples: np.array(F, N)
251 |             The F function values drawn at the N test points.
252 |         """
253 | 
254 |         # filter model with zero weights
255 |         # weights on covariance matrices are weight**2
256 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
257 |         non_zero_weights = self.weights_[non_zero_weight_indices]
258 |         # re-normalize
259 |         non_zero_weights /= non_zero_weights.sum()
260 | 
261 |         samples = []
262 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
263 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
264 |             weight = non_zero_weights[non_zero_weight_idx]
265 | 
266 |             funcs = self.model_list_[raw_idx].sample_functions(X_test, n_funcs)
267 |             funcs = funcs * weight
268 |             samples.append(funcs)
269 |         samples = np.sum(samples, axis=0)
270 |         return samples
271 | 


--------------------------------------------------------------------------------
/rgpe/methods/warmstarting_ac.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict, List, Optional, Tuple, Union
  3 | 
  4 | from ConfigSpace import Configuration
  5 | import numpy as np
  6 | from sklearn.linear_model import SGDRegressor
  7 | from smac.configspace import convert_configurations_to_array
  8 | from smac.epm.base_epm import AbstractEPM
  9 | 
 10 | from rgpe.utils import get_gaussian_process
 11 | 
 12 | 
 13 | class WarmstartingAC(AbstractEPM):
 14 | 
 15 |     """Weighting method from "Warmstarting of Model-based Algorithm Configuration" by Lindauer
 16 |     and Hutter, AAAI 2018
 17 | 
 18 |     https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17235/15829
 19 |     """
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         training_data: Dict[int, Dict[str, Union[List[Configuration], np.ndarray]]],
 24 |         variance_mode: str = 'average',
 25 |         ** kwargs
 26 |     ):
 27 |         if kwargs.get('instance_features') is not None:
 28 |             raise NotImplementedError()
 29 |         super().__init__(**kwargs)
 30 |         self.training_data = training_data
 31 | 
 32 |         self.rng = np.random.RandomState(self.seed)
 33 |         self.variance_mode = variance_mode
 34 | 
 35 |         base_models = []
 36 |         for task in training_data:
 37 |             model = get_gaussian_process(
 38 |                 bounds=self.bounds,
 39 |                 types=self.types,
 40 |                 configspace=self.configspace,
 41 |                 rng=self.rng,
 42 |                 kernel=None,
 43 |             )
 44 |             Y = training_data[task]['y']
 45 |             mean = Y.mean()
 46 |             std = Y.std()
 47 |             if std == 0:
 48 |                 std = 1
 49 | 
 50 |             y_scaled = (Y - mean) / std
 51 |             y_scaled = y_scaled.flatten()
 52 |             configs = training_data[task]['configurations']
 53 |             X = convert_configurations_to_array(configs)
 54 | 
 55 |             model.train(
 56 |                 X=X,
 57 |                 Y=y_scaled,
 58 |             )
 59 |             base_models.append(model)
 60 |         self.base_models = base_models
 61 |         self.sgd = SGDRegressor(random_state=12345, warm_start=True, max_iter=100)
 62 | 
 63 |         self.weights_over_time = []
 64 | 
 65 |     def _compute_weights(self, X, y):
 66 |         if X.shape[0] == 1:
 67 |             self.weights_ = np.ones(len(self.base_models) + 1) / (len(self.base_models) + 1)
 68 |             return
 69 |         predictions = []
 70 |         for base_model in self.base_models:
 71 |             m, _ = base_model.predict(X)
 72 |             predictions.append(m.flatten())
 73 |         loo_predictions = []
 74 |         for i in range(X.shape[0]):
 75 |             X_tmp = list(X)
 76 |             x_loo = X_tmp[i]
 77 |             del X_tmp[i]
 78 |             X_tmp = np.array(X_tmp)
 79 |             y_tmp = list(y)
 80 |             del y_tmp[i]
 81 |             y_tmp = np.array(y_tmp)
 82 |             self.target_model._train(X_tmp, y_tmp, do_optimize=False)
 83 |             m, _ = self.target_model.predict(np.array([x_loo]))
 84 |             loo_predictions.append(m)
 85 |         predictions.append(np.array(loo_predictions).flatten())
 86 |         predictions = np.array(predictions)
 87 |         self.sgd.fit(predictions.transpose(), y)
 88 |         self.weights_ = self.sgd.coef_
 89 |         # Counteract the following weird failure case:
 90 |         # * all observations so far have the same value -> normalization makes them all 0.0
 91 |         # * all predictions via cross-validation have a value of 0.0
 92 |         # -> this results in SGD having all weights being zero
 93 |         if np.sum(self.weights_) == 0:
 94 |             self.weights_[-1] = 1
 95 | 
 96 |     def _train(self, X: np.ndarray, Y: np.ndarray) -> AbstractEPM:
 97 |         Y = Y.flatten()
 98 |         mean = Y.mean()
 99 |         std = Y.std()
100 |         if std == 0:
101 |             std = 1
102 | 
103 |         y_scaled = (Y - mean) / std
104 |         self.Y_mean_ = mean
105 |         self.Y_std_ = std
106 | 
107 |         target_model = get_gaussian_process(
108 |             bounds=self.bounds,
109 |             types=self.types,
110 |             configspace=self.configspace,
111 |             rng=self.rng,
112 |             kernel=None,
113 |         )
114 |         self.target_model = target_model.train(X, y_scaled)
115 |         self.model_list_ = self.base_models + [target_model]
116 |         self._compute_weights(X, Y)
117 |         print('Weights', self.weights_)
118 |         self.weights_over_time.append(self.weights_)
119 | 
120 |         # create model and acquisition function
121 |         return self
122 | 
123 |     def _predict(self, X: np.ndarray, cov_return_type) -> Tuple[np.ndarray, np.ndarray]:
124 | 
125 |         # compute posterior for each model
126 |         weighted_means = []
127 |         weighted_covars = []
128 | 
129 |         # filter model with zero weights
130 |         # weights on covariance matrices are weight**2
131 |         non_zero_weight_indices = (self.weights_ ** 2 > 0).nonzero()[0]
132 |         non_zero_weights = self.weights_[non_zero_weight_indices]
133 |         # re-normalize
134 |         non_zero_weights /= non_zero_weights.sum()
135 | 
136 |         for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
137 |             raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
138 |             weight = non_zero_weights[non_zero_weight_idx]
139 |             mean, covar = self.model_list_[raw_idx]._predict(X, cov_return_type)
140 |             weighted_means.append(weight * mean)
141 |             if self.variance_mode == 'average':
142 |                 weighted_covars.append(covar * weight)
143 |             elif self.variance_mode == 'correct-average':
144 |                 weighted_covars.append(covar * weight ** 2)
145 |             elif self.variance_mode == 'target':
146 |                 if raw_idx + 1 == len(self.weights_):
147 |                     weighted_covars.append(covar)
148 |             else:
149 |                 raise ValueError()
150 | 
151 |         if self.variance_mode == 'target':
152 |             assert len(weighted_covars) == 1
153 | 
154 |         # set mean and covariance to be the rank-weighted sum the means and covariances
155 |         # of the
156 |         # base models and target model
157 |         mean_x = np.sum(np.stack(weighted_means), axis=0) * self.Y_std_ + self.Y_mean_
158 |         covar_x = np.sum(weighted_covars, axis=0) * (self.Y_std_ ** 2)
159 |         return mean_x, covar_x
160 | 


--------------------------------------------------------------------------------
/rgpe/test_functions.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import os
  3 | import pickle
  4 | import sys
  5 | from typing import Dict, Optional
  6 | 
  7 | from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
  8 | from ConfigSpace.hyperparameters import (
  9 |     UniformFloatHyperparameter,
 10 |     UniformIntegerHyperparameter,
 11 | )
 12 | from hpolib.abstract_benchmark import AbstractBenchmark
 13 | import hpolib
 14 | import lockfile
 15 | 
 16 | import numpy as np
 17 | import scipy.optimize
 18 | 
 19 | 
 20 | class Alpine1D(AbstractBenchmark):
 21 |     """Modified Alpine1D function as used in v1: https://arxiv.org/pdf/1802.02219v1.pdf"""
 22 | 
 23 |     def __init__(self, task, load_all=True, *args, **kwargs):
 24 |         super().__init__(*args, **kwargs)
 25 |         self.task = task
 26 | 
 27 |     @AbstractBenchmark._check_configuration
 28 |     def objective_function(self, configuration: Configuration, **kwargs) -> Dict:
 29 |         x = configuration['x']
 30 | 
 31 |         shift = kwargs.get('task')
 32 |         if shift is None:
 33 |             shift = self.task
 34 |         shift = shift * np.pi / 12
 35 | 
 36 |         rval = (x * np.sin(x + np.pi + shift) + 0.1 * x)
 37 |         return {'function_value': rval}
 38 | 
 39 |     def objective_function_test(self, configuration: Configuration, **kwargs):
 40 |         raise NotImplementedError
 41 | 
 42 |     @staticmethod
 43 |     def get_configuration_space():
 44 |         cs = ConfigurationSpace()
 45 |         cs.add_hyperparameter(UniformFloatHyperparameter('x', -10, 10))
 46 |         return cs
 47 | 
 48 |     @staticmethod
 49 |     def get_meta_information():
 50 |         return {
 51 |             'num_function_evals': 50,
 52 |             'name': 'Modified Alpine 1D',
 53 |             'reference': """@inproceedings{feurer-automl18,
 54 |   author    = {Matthias Feurer and Benjamin Letham and Eytan Bakshy},
 55 |   title     = {Scalable Meta-Learning for Bayesian Optimization using Ranking-Weighted Gaussian Process Ensembles},
 56 |   booktitle = {ICML 2018 AutoML Workshop},
 57 |   year      = {2018},
 58 |   month     = jul,
 59 | }
 60 | """,
 61 |         }
 62 | 
 63 |     def get_num_base_tasks(self) -> int:
 64 |         return 5
 65 | 
 66 |     def get_empirical_f_opt(self, task: Optional[int] = None) -> float:
 67 |         """Return the empirical f_opt.
 68 | 
 69 |         Because ``get_meta_information`` is a static function it has no access to the actual
 70 |         function values predicted by the surrogate. This helper function gives access.
 71 | 
 72 |         Returns
 73 |         -------
 74 |         Configuration
 75 |         """
 76 |         if task is None:
 77 |             task = self.task
 78 | 
 79 |         cs = self.get_configuration_space()
 80 |         bounds = [(-10, 10)]
 81 |         def target(x, task):
 82 |             config = Configuration(cs, {'x': x[0]})
 83 |             return float(self.objective_function(config, task=task)['function_value'])
 84 |         res = scipy.optimize.differential_evolution(
 85 |             func=target, bounds=bounds, args=(task, ), popsize=1000, polish=True,
 86 |             seed=self.rng,
 87 |         )
 88 |         return res.fun
 89 | 
 90 |     def get_empirical_f_worst(self, task: Optional[int] = None) -> float:
 91 |         """Return the empirical f_worst.
 92 | 
 93 |         Because ``get_meta_information`` is a static function it has no access to the actual
 94 |         function values predicted by the surrogate. This helper function gives access.
 95 | 
 96 |         Returns
 97 |         -------
 98 |         Configuration
 99 |         """
100 |         if task is None:
101 |             task = self.task
102 | 
103 |         cs = self.get_configuration_space()
104 |         bounds = [(-10, 10)]
105 |         def target(x, task):
106 |             try:
107 |                 config = Configuration(cs, {'x': x[0]})
108 |                 return -float(self.objective_function(config, task=task)['function_value'])
109 |             except:
110 |                 return -1e10
111 |         res = scipy.optimize.differential_evolution(
112 |             func=target, bounds=bounds, args=(task, ), popsize=1000, polish=True,
113 |             seed=self.rng,
114 |         )
115 |         return -res.fun
116 | 
117 |     def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False):
118 |         # Sample data for each base task
119 |         if num_base_tasks is None:
120 |             num_base_tasks = self.get_num_base_tasks()
121 | 
122 |         if fixed_grid:
123 |             seed = self.rng.randint(0, 10000)
124 |         else:
125 |             seed = None
126 | 
127 |         data_by_task = {}
128 |         for task in range(num_base_tasks + 1):
129 |             if task == self.task:
130 |                 continue
131 | 
132 |             cs = self.get_configuration_space()
133 |             if fixed_grid:
134 |                 cs.seed(seed)
135 |                 num_training_points = 20
136 |             else:
137 |                 num_training_points = self.rng.randint(low=15, high=25)
138 |                 cs.seed(self.rng.randint(0, 10000))
139 |             configurations = cs.sample_configuration(num_training_points)
140 | 
141 |             # get observed values
142 |             train_y = [
143 |                 self.objective_function(config, task=task)['function_value']
144 |                 for config in configurations
145 |             ]
146 |             train_y = np.array(train_y)
147 |             # store training data
148 |             data_by_task[task] = {
149 |                 # scale x to [0, 1]
150 |                 'configurations': configurations,
151 |                 'y': train_y,
152 |             }
153 | 
154 |         return data_by_task
155 | 
156 | 
157 | num_dimensions = 3
158 | class Quadratic(AbstractBenchmark):
159 |     """Quadratic function as used by Perrone et al., 2018"""
160 | 
161 |     def __init__(self, task, load_all=True, *args, **kwargs):
162 |         super().__init__(*args, **kwargs)
163 |         self.task = task
164 |         self._functions = dict()
165 |         self._sample_coefficients(task)
166 |         self._cache_dir = os.path.join(hpolib._config.data_dir, "artificial", "quadratic")
167 |         try:
168 |             os.makedirs(self._cache_dir)
169 |         except:
170 |             pass
171 | 
172 |     def _sample_coefficients(self, task):
173 |         rng = np.random.RandomState(task)
174 |         coefficients = rng.rand(3) * (10 - 0.1) + 0.1
175 |         self._functions[task] = coefficients
176 | 
177 |     @AbstractBenchmark._check_configuration
178 |     def objective_function(self, configuration: Configuration, **kwargs) -> Dict:
179 |         x = []
180 |         for i in range(1, num_dimensions + 1):
181 |             x.append(configuration['x%d' % i])
182 |         x = np.array(x)
183 | 
184 |         task = kwargs.get('task')
185 |         if task is None:
186 |             task = self.task
187 |         if task not in self._functions:
188 |             self._sample_coefficients(task)
189 |         a, b, c = self._functions[task]
190 | 
191 |         rval = 0.5 * a * np.linalg.norm(x) ** 2 + b * np.sum(x) + 3 * c
192 |         return {'function_value': rval}
193 | 
194 |     def objective_function_test(self, configuration: Configuration, **kwargs):
195 |         raise NotImplementedError
196 | 
197 |     @classmethod
198 |     def get_configuration_space(cls):
199 |         cs = ConfigurationSpace()
200 |         for i in range(1, num_dimensions + 1):
201 |             cs.add_hyperparameter(UniformFloatHyperparameter('x%d' % i, -5, 5))
202 |         return cs
203 | 
204 |     @staticmethod
205 |     def get_meta_information():
206 |         return {
207 |             'num_function_evals': 50,
208 |             'name': '3D Quadratic Function',
209 |             'reference': """@incollection{NIPS2018_7917,
210 | title = {Scalable Hyperparameter Transfer Learning},
211 | author = {Perrone, Valerio and Jenatton, Rodolphe and Seeger, Matthias W and Archambeau, Cedric},
212 | booktitle = {Advances in Neural Information Processing Systems 31},
213 | editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},
214 | pages = {6845--6855},
215 | year = {2018},
216 | publisher = {Curran Associates, Inc.},
217 | url = {http://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf}
218 | }
219 | """,
220 |         }
221 | 
222 |     def get_num_base_tasks(self) -> int:
223 |         return 29
224 | 
225 |     def get_cache_key(self) -> str:
226 |         return '-'.join([str(float(entry)) for entry in self._functions[self.task]])
227 | 
228 |     def get_empirical_f_opt(self) -> float:
229 |         """Return the empirical f_opt.
230 | 
231 |         Because ``get_meta_information`` is a static function it has no access to the actual
232 |         function values predicted by the surrogate. This helper function gives access.
233 | 
234 |         Returns
235 |         -------
236 |         Configuration
237 |         """
238 | 
239 |         cache_key = self.get_cache_key()
240 |         opt_file_name = os.path.join(self._cache_dir, cache_key + 'opt')
241 | 
242 |         while True:
243 |             try:
244 |                 if not os.path.exists(opt_file_name):
245 |                     with lockfile.LockFile(opt_file_name, timeout=10):
246 | 
247 |                         cs = self.get_configuration_space()
248 |                         bounds = [(-5, 5)] * num_dimensions
249 | 
250 |                         def target(x, task):
251 |                             try:
252 |                                 config = Configuration(
253 |                                     cs,
254 |                                     {'x%d' % (i + 1): x[i] for i in range(num_dimensions)},
255 |                                 )
256 |                                 return float(
257 |                                     self.objective_function(config, task=task)['function_value'])
258 |                             except:
259 |                                 return 1e10
260 | 
261 |                         res = scipy.optimize.differential_evolution(
262 |                             func=target, bounds=bounds, args=(self.task,), popsize=1000,
263 |                             polish=True,
264 |                             seed=self.rng,
265 |                         )
266 |                         opt = res.fun
267 | 
268 |                         with open(opt_file_name, 'wb') as fh:
269 |                             pickle.dump(opt, fh)
270 |                     break
271 |                 else:
272 |                     try:
273 |                         with open(opt_file_name, 'rb') as fh:
274 |                             opt = pickle.load(fh)
275 |                         break
276 |                     except:
277 |                         continue
278 |             except lockfile.LockTimeout:
279 |                 pass
280 | 
281 |         return opt
282 | 
283 | 
284 |     def get_empirical_f_worst(self) -> float:
285 |         """Return the empirical f_opt.
286 | 
287 |         Because ``get_meta_information`` is a static function it has no access to the actual
288 |         function values predicted by the surrogate. This helper function gives access.
289 | 
290 |         Returns
291 |         -------
292 |         Configuration
293 |         """
294 | 
295 |         cache_key = self.get_cache_key()
296 |         opt_file_name = os.path.join(self._cache_dir, cache_key + 'worst')
297 | 
298 |         while True:
299 |             try:
300 |                 if not os.path.exists(opt_file_name):
301 |                     with lockfile.LockFile(opt_file_name, timeout=10):
302 | 
303 |                         cs = self.get_configuration_space()
304 |                         bounds = [(-5, 5)] * num_dimensions
305 | 
306 |                         def target(x, task):
307 |                             try:
308 |                                 config = Configuration(
309 |                                     cs,
310 |                                     {'x%d' % (i + 1): x[i] for i in range(num_dimensions)},
311 |                                 )
312 |                                 return -float(self.objective_function(config, task=task)['function_value'])
313 |                             except:
314 |                                 return -1e10
315 | 
316 |                         res = scipy.optimize.differential_evolution(
317 |                             func=target, bounds=bounds, args=(self.task,), popsize=1000,
318 |                             polish=True,
319 |                             seed=self.rng,
320 |                         )
321 |                         opt = -res.fun
322 | 
323 |                         with open(opt_file_name, 'wb') as fh:
324 |                             pickle.dump(opt, fh)
325 |                     break
326 |                 else:
327 |                     try:
328 |                         with open(opt_file_name, 'rb') as fh:
329 |                             opt = pickle.load(fh)
330 |                         break
331 |                     except:
332 |                         continue
333 |             except lockfile.LockTimeout:
334 |                 pass
335 | 
336 |         return opt
337 | 
338 |     def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False):
339 |         # Sample data for each base task
340 |         if num_base_tasks is None:
341 |             num_base_tasks = self.get_num_base_tasks()
342 | 
343 |         if fixed_grid:
344 |             seed = self.rng.randint(0, 10000)
345 |         else:
346 |             seed = None
347 | 
348 |         data_by_task = {}
349 |         for task_ in range(num_base_tasks + 1):
350 |             if self.task == task_:
351 |                 continue
352 | 
353 |             cs = self.get_configuration_space()
354 |             if fixed_grid:
355 |                 cs.seed(seed)
356 |             else:
357 |                 cs.seed(self.rng.randint(0, 10000))
358 |             configurations = cs.sample_configuration(10)
359 | 
360 |             # get observed values
361 |             train_y = [
362 |                 self.objective_function(config, task=task_)['function_value']
363 |                 for config in configurations
364 |             ]
365 |             train_y = np.array(train_y)
366 |             # store training data
367 |             data_by_task[task_] = {
368 |                 'configurations': configurations,
369 |                 'y': train_y,
370 |             }
371 | 
372 |         return data_by_task
373 | 
374 | 
375 | _adaboost_data = None
376 | _svm_data = None
377 | 
378 | 
379 | class WistubaAndSchillingGrid(AbstractBenchmark):
380 |     """Base class for SVM and Adaboost data used by Schilling et al. (ECML 2016) and Wistuba et
381 |     al. (ECML 2016)."""
382 | 
383 |     _file_dir = None
384 |     _name = None
385 |     _num_hyperparameters = None
386 |     _hp_lower_bounds = None
387 |     _hp_upper_bounds = None
388 | 
389 |     def __init__(self, task, load_all=True, *args, **kwargs):
390 |         super().__init__(*args, **kwargs)
391 |         self.task = task
392 |         self._cache_dir = os.path.join(hpolib._config.data_dir, 'WistubaAndSchilling')
393 |         try:
394 |             os.makedirs(self._cache_dir)
395 |         except:
396 |             pass
397 |         self.data = self._load_data(load_all)
398 | 
399 |     def _load_data(self, load_all):
400 |         global _adaboost_data
401 |         global _svm_data
402 |         if self._name == 'Adaboost':
403 |             if _adaboost_data is not None:
404 |                 return _adaboost_data
405 |         elif self._name == 'SVM':
406 |             if _svm_data is not None:
407 |                 return _svm_data
408 | 
409 |         data = {}
410 | 
411 |         current_dir = os.path.abspath(os.path.dirname(__file__))
412 |         data_dir = os.path.join(current_dir, self._file_dir)
413 |         files = [
414 |             'A9A', 'W8A', 'abalone', 'appendicitis', 'australian', 'automobile', 'banana',
415 |             'bands', 'breast-cancer', 'bupa', 'car', 'chess', 'cod-rna', 'coil2000',
416 |             'colon-cancer', 'crx', 'diabetes', 'ecoli', 'german-numer', 'haberman',
417 |             'housevotes', 'ijcnn1', 'kr-vs-k', 'led7digit', 'letter', 'lymphography',
418 |             'magic', 'monk-2', 'pendigits', 'phoneme', 'pima', 'ring', 'saheart', 'segment',
419 |             'seismic', 'shuttle', 'sonar-scale', 'spambase', 'spectfheart', 'splice',
420 |             'tic-tac-toe', 'titanic', 'twonorm', 'usps', 'vehicle', 'wdbc', 'wine',
421 |             'winequality-red', 'wisconsin', 'yeast',
422 |         ]
423 | 
424 |         cache_file = os.path.join(self._cache_dir, self._name + '.pkl')
425 |         while True:
426 | 
427 |             try:
428 |                 with open(cache_file, 'rb') as fh:
429 |                     data = pickle.load(fh)
430 |                 break
431 |             except:
432 |                 pass
433 | 
434 |             try:
435 |                 with lockfile.LockFile(cache_file, timeout=10):
436 | 
437 |                     for i, file_name in enumerate(files):
438 |                         if not load_all and i != self.task:
439 |                             continue
440 |                         print(i, file_name)
441 |                         file_name = os.path.join(data_dir, file_name)
442 |                         with open(file_name) as fh:
443 |                             raw_data = fh.readlines()
444 |                         raw_data = [line.split(' ') for line in raw_data]
445 |                         targets = [1 - float(line[0]) for line in raw_data]
446 |                         print(len(raw_data), len(targets))
447 |                         print(targets)
448 |                         configurations = []
449 |                         for line in raw_data:
450 |                             line = line[1:]
451 |                             line = {int(entry.split(':')[0]): float(entry.split(':')[1]) for
452 |                                     entry in line}
453 |                             config = Configuration(
454 |                                 values={
455 |                                     'x%d' % (j + 1): line.get(j, 0)
456 |                                     for j in range(self._num_hyperparameters)
457 |                                 },
458 |                                 configuration_space=self.get_configuration_space(),
459 |                             )
460 |                             configurations.append(config)
461 | 
462 |                         data[i] = {config: target for config, target in zip(configurations, targets)}
463 | 
464 |                     with open(cache_file, 'wb') as fh:
465 |                         pickle.dump(data, fh)
466 |                 break
467 |             except lockfile.LockTimeout:
468 |                 pass
469 | 
470 |         # Shuffle data after returning it
471 |         for i in data:
472 |             configurations = list(data[i].keys())
473 |             targets = list(data[i].values())
474 |             shuffle_indices = self.rng.permutation(list(range(len(configurations))))
475 |             configurations = [configurations[shuffle_indices[j]] for j in
476 |                               range(len(configurations))]
477 |             targets = [targets[shuffle_indices[j]] for j in range(len(targets))]
478 |             data[i] = {config: target for config, target in zip(configurations, targets)}
479 | 
480 |         if self._name == 'Adaboost':
481 |             _adaboost_data = data
482 |         elif self._name == 'SVM':
483 |             _svm_data = data
484 | 
485 |         return data
486 | 
487 |     @AbstractBenchmark._check_configuration
488 |     def objective_function(self, configuration: Configuration, **kwargs) -> Dict:
489 |         print(configuration.origin)
490 |         return {'function_value': self.data[self.task][configuration]}
491 | 
492 |     def objective_function_test(self, configuration: Configuration, **kwargs):
493 |         raise NotImplementedError
494 | 
495 |     @classmethod
496 |     def get_configuration_space(cls):
497 |         cs = ConfigurationSpace()
498 |         for i in range(cls._num_hyperparameters):
499 |             cs.add_hyperparameter(UniformFloatHyperparameter(
500 |                 'x%d' % (i + 1),
501 |                 cls._hp_lower_bounds[i], cls._hp_upper_bounds[i]))
502 |         return cs
503 | 
504 |     @classmethod
505 |     def get_meta_information(cls):
506 |         return {
507 |             'num_function_evals': 50,
508 |             'name': '%s grid data' % cls._name,
509 |             'reference': """""",
510 |         }
511 | 
512 |     def get_num_base_tasks(self) -> int:
513 |         return 49
514 | 
515 |     def get_empirical_f_opt(self) -> float:
516 |         """Return the empirical f_opt.
517 | 
518 |         Because ``get_meta_information`` is a static function it has no access to the actual
519 |         function values predicted by the surrogate. This helper function gives access.
520 | 
521 |         Returns
522 |         -------
523 |         Configuration
524 |         """
525 | 
526 |         return min(list(self.data[self.task].values()))
527 | 
528 |     def get_empirical_f_worst(self) -> float:
529 |         """Return the empirical f_opt.
530 | 
531 |         Because ``get_meta_information`` is a static function it has no access to the actual
532 |         function values predicted by the surrogate. This helper function gives access.
533 | 
534 |         Returns
535 |         -------
536 |         Configuration
537 |         """
538 | 
539 |         return max(list(self.data[self.task].values()))
540 | 
541 |     def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False):
542 |         # Sample data for each base task
543 |         if num_base_tasks is None:
544 |             num_base_tasks = self.get_num_base_tasks()
545 | 
546 |         data_by_task = {}
547 | 
548 |         if fixed_grid:
549 |             indices = self.rng.choice(
550 |                 len(self.data[0]),
551 |                 replace=False,
552 |                 size=self.get_meta_information()['num_function_evals'],
553 |             )
554 | 
555 |         for task_ in range(num_base_tasks + 1):
556 |             if self.task == task_:
557 |                 continue
558 | 
559 |             if not fixed_grid:
560 |                 indices = self.rng.choice(
561 |                     len(self.data[task_]),
562 |                     replace=False,
563 |                     size=self.get_meta_information()['num_function_evals'],
564 |                 )
565 |                 data = self.data[task_]
566 |             else:
567 |                 data = {
568 |                     key: self.data[task_][key]
569 |                     for key in sorted(self.data[task_], key=lambda c: np.sum(c.get_array()))
570 |                 }
571 | 
572 |             data = [(k, v) for i, (k, v) in enumerate(data.items()) if i in indices]
573 |             configurations = [val[0] for val in data]
574 |             train_y = np.array([val[1] for val in data])
575 | 
576 |             # store training data
577 |             data_by_task[task_] = {
578 |                 'configurations': configurations,
579 |                 'y': train_y,
580 |             }
581 | 
582 |         return data_by_task
583 | 
584 | 
585 | class AdaboostGrid(WistubaAndSchillingGrid):
586 |     _file_dir = 'adaboost'
587 |     _name = 'Adaboost'
588 |     _num_hyperparameters = 2
589 |     _hp_lower_bounds = [0.07525749891599529, 0.2037950470905062]
590 |     _hp_upper_bounds = [1, 1]
591 | 
592 | 
593 | class SVMGrid(WistubaAndSchillingGrid):
594 |     _file_dir = 'svm'
595 |     _name = 'SVM'
596 |     _num_hyperparameters = 6
597 |     _hp_lower_bounds = [0, 0, 0, -0.8333333333333334, -1, 0]
598 |     _hp_upper_bounds = [1, 1, 1, 1.0, 0.75, 1.0]
599 | 
600 | 
601 | _nn_data = None
602 | class NNGrid(AbstractBenchmark):
603 |     """LCBench as described in Zimmer et al., 2021"""
604 | 
605 |     def __init__(self, task, load_all=True, *args, **kwargs):
606 |         super().__init__(*args, **kwargs)
607 |         self.task = task
608 |         self._cache_dir = os.path.join(hpolib._config.data_dir, 'LCBench')
609 |         try:
610 |             os.makedirs(self._cache_dir)
611 |         except:
612 |             pass
613 |         self.data = self._load_data()
614 | 
615 |     def _load_data(self):
616 |         global _nn_data
617 |         if _nn_data is not None:
618 |             return _nn_data
619 | 
620 |         data = {}
621 | 
622 |         allowed_hp_names = self.get_configuration_space().get_hyperparameter_names()
623 | 
624 |         cache_file = os.path.join(self._cache_dir, 'hpobenchmark.pkl.gz')
625 |         while True:
626 |             try:
627 | 
628 |                 import time
629 |                 try:
630 |                     st = time.time()
631 |                     with gzip.open(cache_file, 'rb') as fh:
632 |                         content = fh.read()
633 |                         data = pickle.loads(content)
634 |                     print(time.time() - st)
635 |                     break
636 |                 except Exception as e:
637 |                     pass
638 | 
639 |                 with lockfile.LockFile(cache_file, timeout=10):
640 | 
641 |                     sys.path.append('../../LCBench')
642 |                     from api import Benchmark
643 | 
644 |                     data_dir = '../../LCBench/data_2k_lw.json'
645 |                     bench = Benchmark(data_dir=data_dir, cache=True,
646 |                                       cache_dir=os.path.dirname(data_dir))
647 |                     ds_names = bench.get_dataset_names()
648 | 
649 |                     configuration_space = self.get_configuration_space()
650 | 
651 |                     for i, ds_name in enumerate(ds_names):
652 |                         print(ds_name)
653 |                         configurations = []
654 |                         targets = []
655 | 
656 |                         n_configs = bench.get_number_of_configs(ds_name)
657 |                         if n_configs is None:
658 |                             raise ValueError(
659 |                                 'Could not read the number of configs for dataset %s' % ds_name)
660 | 
661 |                         for j in range(n_configs):
662 |                             try:
663 |                                 config_dict = bench.query(ds_name, 'config', j)
664 |                                 config_dict = {
665 |                                     key: value for key, value in config_dict.items()
666 |                                     if key in allowed_hp_names
667 |                                 }
668 |                             except ValueError as e:
669 |                                 continue
670 |                             try:
671 |                                 config = Configuration(
672 |                                     values=config_dict,
673 |                                     configuration_space=configuration_space,
674 |                                 )
675 |                             except:
676 |                                 print(config_dict)
677 |                                 continue
678 |                             configurations.append(config)
679 |                             val_acc = 1 - bench.query(ds_name, "final_val_balanced_accuracy", j)
680 |                             targets.append(val_acc)
681 |                         data[i] = {config: target for config, target in
682 |                                    zip(configurations, targets)}
683 | 
684 |                     with gzip.open(cache_file, 'wb') as fh:
685 |                         pickle.dump(data, fh)
686 |                 break
687 | 
688 |             except lockfile.LockTimeout:
689 |                 pass
690 | 
691 |         # Shuffle data after returning it
692 |         for i in data:
693 |             configurations = list(data[i].keys())
694 |             targets = list(data[i].values())
695 |             shuffle_indices = self.rng.permutation(list(range(len(configurations))))
696 |             configurations = [configurations[shuffle_indices[j]] for j in
697 |                               range(len(configurations))]
698 |             targets = [targets[shuffle_indices[j]] for j in range(len(targets))]
699 |             data[i] = {config: target for config, target in zip(configurations, targets)}
700 | 
701 |         _nn_data = data
702 | 
703 |         return data
704 | 
705 |     @AbstractBenchmark._check_configuration
706 |     def objective_function(self, configuration: Configuration, **kwargs) -> Dict:
707 |         return {'function_value': self.data[self.task][configuration]}
708 | 
709 |     def objective_function_test(self, configuration: Configuration, **kwargs):
710 |         raise NotImplementedError
711 | 
712 |     @classmethod
713 |     def get_configuration_space(cls):
714 |         cs = ConfigurationSpace()
715 |         cs.add_hyperparameter(UniformIntegerHyperparameter('batch_size', 16, 512, log=True))
716 |         cs.add_hyperparameter(UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, log=True))
717 |         cs.add_hyperparameter(UniformFloatHyperparameter('momentum', 0.1, 0.99))
718 |         cs.add_hyperparameter(UniformFloatHyperparameter('weight_decay', 1e-5, 1e5))
719 |         cs.add_hyperparameter(UniformIntegerHyperparameter('num_layers', 1, 5))
720 |         cs.add_hyperparameter(UniformIntegerHyperparameter('max_units', 16, 1024, log=True))
721 |         cs.add_hyperparameter(UniformFloatHyperparameter('max_dropout', 0.0, 1.0))
722 |         return cs
723 | 
724 |     @classmethod
725 |     def get_meta_information(cls):
726 |         return {
727 |             'num_function_evals': 50,
728 |             'name': 'Neural Network grid data',
729 |             'reference': """""",
730 |         }
731 | 
732 |     def get_num_base_tasks(self) -> int:
733 |         n_base_tasks = len(self.data)
734 |         assert n_base_tasks == 35
735 |         return n_base_tasks - 1
736 | 
737 |     def get_empirical_f_opt(self) -> float:
738 |         """Return the empirical f_opt.
739 |         Because ``get_meta_information`` is a static function it has no access to the actual
740 |         function values predicted by the surrogate. This helper function gives access.
741 |         Returns
742 |         -------
743 |         Configuration
744 |         """
745 | 
746 |         return min(list(self.data[self.task].values()))
747 | 
748 |     def get_empirical_f_worst(self) -> float:
749 |         """Return the empirical f_opt.
750 |         Because ``get_meta_information`` is a static function it has no access to the actual
751 |         function values predicted by the surrogate. This helper function gives access.
752 |         Returns
753 |         -------
754 |         Configuration
755 |         """
756 | 
757 |         return max(list(self.data[self.task].values()))
758 | 
759 |     def get_meta_data(self, num_base_tasks: Optional[int] = None, fixed_grid: Optional[bool] = False):
760 |         # Sample data for each base task
761 |         if num_base_tasks is None:
762 |             num_base_tasks = self.get_num_base_tasks()
763 | 
764 |         if fixed_grid:
765 |             indices = self.rng.choice(
766 |                 len(self.data[0]),
767 |                 replace=False,
768 |                 size=self.get_meta_information()['num_function_evals'],
769 |             )
770 | 
771 |         data_by_task = {}
772 |         for task_ in range(num_base_tasks + 1):
773 |             if self.task == task_:
774 |                 continue
775 | 
776 |             if not fixed_grid:
777 |                 indices = self.rng.choice(
778 |                     len(self.data[task_]),
779 |                     replace=False,
780 |                     size=self.get_meta_information()['num_function_evals'],
781 |                 )
782 |                 data = self.data[task_]
783 |             else:
784 |                 data = {
785 |                     key: self.data[task_][key]
786 |                     for key in sorted(self.data[task_], key=lambda c: np.sum(c.get_array()))
787 |                 }
788 | 
789 |             data = [(k, v) for i, (k, v) in enumerate(data.items()) if i in indices]
790 |             configurations = [val[0] for val in data]
791 |             train_y = np.array([val[1] for val in data])
792 | 
793 |             # store training data
794 |             data_by_task[task_] = {
795 |                 'configurations': configurations,
796 |                 'y': train_y,
797 |             }
798 | 
799 |         return data_by_task
800 | 


--------------------------------------------------------------------------------
/rgpe/utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import typing
  3 | 
  4 | import botorch.sampling.qmc
  5 | from ConfigSpace import ConfigurationSpace
  6 | import numpy as np
  7 | import scipy as sp
  8 | from scipy.stats import norm
  9 | from smac.epm.base_epm import AbstractEPM
 10 | from smac.epm.gaussian_process import GaussianProcess
 11 | from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior
 12 | from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel
 13 | from smac.optimizer.acquisition import AbstractAcquisitionFunction
 14 | 
 15 | 
 16 | def get_gaussian_process(
 17 |     configspace: ConfigurationSpace,
 18 |     types: typing.List[int],
 19 |     bounds: typing.List[typing.Tuple[float, float]],
 20 |     rng: np.random.RandomState,
 21 |     kernel,
 22 | ) -> GaussianProcess:
 23 |     """Get the default GP class from SMAC. Sets the kernel and its hyperparameters for the
 24 |     problem at hand."""
 25 | 
 26 |     if kernel is None:
 27 |         cov_amp = ConstantKernel(
 28 |             2.0,
 29 |             constant_value_bounds=(np.exp(-10), np.exp(2)),
 30 |             prior=LognormalPrior(mean=0.0, sigma=1, rng=rng),
 31 |         )
 32 | 
 33 |         cont_dims = np.where(np.array(types) == 0)[0]
 34 |         cat_dims = np.where(np.array(types) != 0)[0]
 35 | 
 36 |         if len(cont_dims) > 0:
 37 |             exp_kernel = Matern(
 38 |                 np.ones([len(cont_dims)]),
 39 |                 [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in
 40 |                  range(len(cont_dims))],
 41 |                 nu=2.5,
 42 |                 operate_on=cont_dims,
 43 |             )
 44 | 
 45 |         if len(cat_dims) > 0:
 46 |             ham_kernel = HammingKernel(
 47 |                 np.ones([len(cat_dims)]),
 48 |                 [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in
 49 |                  range(len(cat_dims))],
 50 |                 operate_on=cat_dims,
 51 |             )
 52 | 
 53 |         assert len(cat_dims) + len(cont_dims) == len(configspace.get_hyperparameters()), (
 54 |             len(cat_dims) + len(cont_dims), len(configspace.get_hyperparameters())
 55 |         )
 56 | 
 57 |         noise_kernel = WhiteKernel(
 58 |             noise_level=1e-8,
 59 |             noise_level_bounds=(np.exp(-25), np.exp(2)),
 60 |             prior=HorseshoePrior(scale=0.1, rng=rng),
 61 |         )
 62 | 
 63 |         if len(cont_dims) > 0 and len(cat_dims) > 0:
 64 |             # both
 65 |             kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
 66 |         elif len(cont_dims) > 0 and len(cat_dims) == 0:
 67 |             # only cont
 68 |             kernel = cov_amp * exp_kernel + noise_kernel
 69 |         elif len(cont_dims) == 0 and len(cat_dims) > 0:
 70 |             # only cont
 71 |             kernel = cov_amp * ham_kernel + noise_kernel
 72 |         else:
 73 |             raise ValueError()
 74 |     else:
 75 |         kernel = copy.deepcopy(kernel)
 76 | 
 77 |     gp = GaussianProcess(
 78 |         kernel=kernel,
 79 |         normalize_y=True,
 80 |         seed=rng.randint(0, 2 ** 20),
 81 |         types=types,
 82 |         bounds=bounds,
 83 |         configspace=configspace,
 84 |     )
 85 |     return gp
 86 | 
 87 | 
 88 | def sample_sobol(loo_model, locations, num_samples, engine_seed):
 89 |     """Sample from a Sobol sequence. Wraps the sampling to deal with the issue that the predictive
 90 |     covariance matrix might not be decomposable and fixes this by adding a small amount of noise
 91 |     to the diagonal."""
 92 | 
 93 |     y_mean, y_cov = loo_model.predict(locations, cov_return_type='full_cov')
 94 |     initial_noise = 1e-14
 95 |     while initial_noise < 1:
 96 |         try:
 97 |             L = np.linalg.cholesky(y_cov + np.eye(len(locations)) * 1e-14)
 98 |             break
 99 |         except np.linalg.LinAlgError:
100 |             initial_noise *= 10
101 |             continue
102 |     if initial_noise >= 1:
103 |         rval = np.tile(y_mean, reps=num_samples).transpose()
104 |         return rval
105 | 
106 |     engine = botorch.sampling.qmc.NormalQMCEngine(len(y_mean), seed=engine_seed, )
107 |     samples_alt = y_mean.flatten() + (engine.draw(num_samples).numpy() @ L)
108 |     return samples_alt
109 | 
110 | 
111 | def copula_transform(values: np.ndarray) -> np.ndarray:
112 | 
113 |     """Copula transformation from "A Quantile-based Approach for Hyperparameter Transfer Learning"
114 |     by  Salinas, Shen and Perrone, ICML 2020"""
115 | 
116 |     quants = (sp.stats.rankdata(values.flatten()) - 1) / (len(values) - 1)
117 |     cutoff = 1 / (4 * np.power(len(values), 0.25) * np.sqrt(np.pi * np.log(len(values))))
118 |     quants = np.clip(quants, a_min=cutoff, a_max=1-cutoff)
119 |     # Inverse Gaussian CDF
120 |     rval = np.array([sp.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))
121 |     return rval
122 | 
123 | 
124 | class EI(AbstractAcquisitionFunction):
125 | 
126 |     """Computes for a given x the expected improvement as acquisition value.
127 | 
128 |     Uses only the target model of the ensemble to find ``x_best``
129 |     """
130 | 
131 |     def __init__(self,
132 |                  model: AbstractEPM,
133 |                  par: float = 0.0):
134 | 
135 |         super(EI, self).__init__(model)
136 |         self.long_name = 'Expected Improvement'
137 |         self.par = par
138 |         self._required_updates = ('model', )
139 | 
140 |     def _compute(self, X: np.ndarray) -> np.ndarray:
141 |         
142 |         if len(X.shape) == 1:
143 |             X = X[:, np.newaxis]
144 | 
145 |         eta = np.min(self.model.target_model.predict_marginalized_over_instances(X))
146 |         eta = eta * self.model.Y_std_ + self.model.Y_mean_
147 | 
148 |         m, v = self.model.predict_marginalized_over_instances(X)
149 |         print(eta, np.min(m))
150 |         s = np.sqrt(v)
151 | 
152 |         def calculate_f():
153 |             z = (eta - m - self.par) / s
154 |             return (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z)
155 | 
156 |         if np.any(s == 0.0):
157 |             # if std is zero, we have observed x on all instances
158 |             # using a RF, std should be never exactly 0.0
159 |             # Avoid zero division by setting all zeros in s to one.
160 |             # Consider the corresponding results in f to be zero.
161 |             self.logger.warning("Predicted std is 0.0 for at least one sample.")
162 |             s_copy = np.copy(s)
163 |             s[s_copy == 0.0] = 1.0
164 |             f = calculate_f()
165 |             f[s_copy == 0.0] = 0.0
166 |         else:
167 |             f = calculate_f()
168 |         if (f < 0).any():
169 |             raise ValueError(
170 |                 "Expected Improvement is smaller than 0 for at least one "
171 |                 "sample.")
172 |         return f
173 | 
174 | 


--------------------------------------------------------------------------------
/scripts/generate_commands.py:
--------------------------------------------------------------------------------
  1 | """Generate commands to reproduce experiments."""
  2 | 
  3 | import argparse
  4 | import glob
  5 | import itertools
  6 | import os
  7 | import random
  8 | from typing import List
  9 | 
 10 | parser = argparse.ArgumentParser()
 11 | 
 12 | benchmarks = {
 13 |     'alpine': (1, 50),
 14 |     'quadratic': (30, 15),
 15 |     'adaboost': (50, 15),
 16 |     'svm': (50, 15),
 17 |     'openml-glmnet': (38, 15),
 18 |     'openml-svm': (38, 15),
 19 |     'openml-xgb': (38, 15),
 20 |     'nn': (35, 15),
 21 | }
 22 | 
 23 | normalization_to_initial_design = {
 24 |     'None': 'unscaled',
 25 |     'mean/var': 'scaled',
 26 |     'Copula': 'copula',
 27 | }
 28 | 
 29 | all_setups = {
 30 |     "": "",
 31 |     "-learnedinit": "--learned-initial-design {learned_initial_design}",
 32 |     "-gpmetadata": "--empirical-meta-configs",
 33 |     "-gpmetadata-learnedinit": "--empirical-meta-configs --learned-initial-design {learned_initial_design}",
 34 |     "-gridmetadata": "--grid-meta-configs",
 35 |     "-gridmetadata-learnedinit": "--grid-meta-configs --learned-initial-design {learned_initial_design}"
 36 | }
 37 | all_setups_args = [setup[1:] if len(setup) > 0 else setup for setup in all_setups] + ['None']
 38 | 
 39 | parser.add_argument(
 40 |     '--benchmark',
 41 |     choices=benchmarks.keys(),
 42 |     required=True,
 43 |     help="Which benchmark to create the commands file for."
 44 | )
 45 | parser.add_argument(
 46 |     '--setup',
 47 |     choices=all_setups_args,
 48 |     nargs='*',
 49 |     help="For which setup of meta-data (grid, from the pre-evaluated grid; gp, from a previous "
 50 |          "run of the GP) and learned init or not to create the commands."
 51 | )
 52 | parser.add_argument(
 53 |     '--results-directory',
 54 |     type=str,
 55 |     help="If given, this script will check which output files already exist and not add those "
 56 |          "call to the commands file."
 57 | )
 58 | 
 59 | args = parser.parse_args()
 60 | 
 61 | results_dir = args.results_directory
 62 | 
 63 | if results_dir:
 64 |     glob_dir = '%s/*/*/*' % glob.escape(results_dir)
 65 |     available_files = glob.glob(glob_dir)
 66 |     to_drop = len(results_dir)
 67 |     available_files = set([available_file[to_drop:] for available_file in available_files])
 68 | else:
 69 |     available_file = []
 70 | 
 71 | def add_seeds_and_tasks(
 72 |     template: str,
 73 |     n_seeds: int,
 74 |     n_tasks: int,
 75 |     relative_output_file_template: str,
 76 | ) -> List[str]:
 77 |     rval = []
 78 |     for seed in range(n_seeds):
 79 |         for task_id in range(n_tasks):
 80 | 
 81 |             if results_dir:
 82 |                 relative_output_file = relative_output_file_template.format(seed=seed, task_id=task_id)
 83 |                 # output_file = os.path.join(results_dir, relative_output_file)
 84 |                 if relative_output_file in available_files:
 85 |                     continue
 86 | 
 87 |             rval.append(template.format(seed=seed, task_id=task_id))
 88 |     return rval
 89 | 
 90 | output_directory = "/home/feurerm/projects/2018_fb/results_smac"
 91 | #output_directory = "/work/ws/nemo/fr_mf1066-2019_rgpe-0/"
 92 | run_script = "python /home/feurerm/sync_dir/projects/2018_fb/rgpe_code/scripts/run_benchmark_smac.py"
 93 | #run_script = "python /home/fr/fr_fr/fr_mf1066/repositories/2019_rgpe/rgpe/scripts/run_benchmark_smac.py"
 94 | 
 95 | for benchmark, (n_tasks, n_seeds) in benchmarks.items():
 96 |     if benchmark != args.benchmark:
 97 |         continue
 98 |     setups_args = args.setup
 99 |     for i in range(len(setups_args)):
100 |         if setups_args[i] != 'None':
101 |             setups_args[i] = '-' + setups_args[i]
102 |     setups = {}
103 |     for setup in all_setups:
104 |         if setup in setups_args:
105 |             setups[setup] = all_setups[setup]
106 |     if 'None' in setups_args:
107 |         setups[''] = all_setups['']
108 |     print(setups)
109 | 
110 |     commands = []
111 | 
112 |     # Random search
113 |     n_init = 1
114 |     for multiplier in (1, 50):
115 |         for setup_name, setup_string in setups.items():
116 |             for search_space_pruning in (False, True):
117 |                 if 'learned' in setup_name:
118 |                     continue
119 |                 elif not search_space_pruning and setup_name != '':
120 |                     continue
121 | 
122 |                 filename = "{seed}_50_{task_id}.json"
123 |                 if search_space_pruning:
124 |                     method_name = "random%d-ssp%s-%d" % (multiplier, setup_name, n_init)
125 |                 else:
126 |                     method_name = "random%s%s-%d" % (multiplier, setup_name, n_init)
127 |                 relative_output_file_template = os.path.join(benchmark, method_name, filename)
128 |                 output_file = os.path.join(
129 |                     output_directory, benchmark, method_name, filename)
130 | 
131 |                 command = (
132 |                     "{run_script} --benchmark {benchmark} --method random --seed {seed} "
133 |                     "--task {task_id} --iteration-multiplier {multiplier} --n-init {n_init} "
134 |                     "--output-file {output_file} {setup_string}"
135 |                 )
136 |                 if search_space_pruning:
137 |                     command += " --search-space-pruning complete"
138 | 
139 |                 template = command.format(**{
140 |                     'run_script': run_script,
141 |                     'benchmark': benchmark,
142 |                     'multiplier': multiplier,
143 |                     'output_file': output_file,
144 |                     'seed': '{seed}',
145 |                     'n_init': n_init,
146 |                     'task_id': '{task_id}',
147 |                     'setup_string': setup_string,
148 |                 })
149 |                 commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
150 | 
151 |     # Baseline
152 |     for n_init in (10, 50):
153 |         for method in ('gpmap', 'gcp'):
154 |             for search_space_pruning in (False, True):
155 |                 for setup_name, setup_string in setups.items():
156 | 
157 |                     filename = "{seed}_50_{task_id}.json"
158 | 
159 |                     if search_space_pruning:
160 |                         method_name = "%s-ssp%s-%d" % (method, setup_name, n_init)
161 |                     else:
162 |                         method_name = "%s%s-%d" % (method, setup_name, n_init)
163 |                     output_file = os.path.join(
164 |                         output_directory, benchmark, method_name, filename)
165 |                     relative_output_file_template = os.path.join(benchmark, method_name, filename)
166 |                     command = (
167 |                         "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
168 |                         "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file}"
169 |                     )
170 |                     if search_space_pruning:
171 |                         command += " --search-space-pruning complete"
172 |                     template = command.format(**{
173 |                         'method': method,
174 |                         'run_script': run_script,
175 |                         'benchmark': benchmark,
176 |                         'output_file': output_file,
177 |                         'seed': '{seed}',
178 |                         'task_id': '{task_id}',
179 |                         'setup_string': setup_string.format(
180 |                             learned_initial_design='scaled' if method == 'gpmap' else 'copula',
181 |                         ),
182 |                         'n_init': n_init,
183 |                     })
184 |                     if benchmark == 'alpine' and method == 'gpmap' and setup_name == '':
185 |                         commands.extend(add_seeds_and_tasks(template, n_seeds, 6, relative_output_file_template))
186 |                         pass
187 |                     else:
188 |                         commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
189 |                         pass
190 | 
191 |     # ABLR
192 |     method = 'ablr'
193 |     for n_init in (1, ):
194 |         for normalization in ('mean/var', 'Copula'):
195 |             for setup_name, setup_string in setups.items():
196 |                 if 'learnedinit' not in setup_name:
197 |                     continue
198 |                 if normalization in ['Copula', 'mean/var'] and n_init < 2:
199 |                     n_init_ = 2
200 |                 else:
201 |                     n_init_ = 1
202 |                 filename = "{seed}_50_{task_id}.json"
203 |                 output_file = os.path.join(
204 |                     output_directory, benchmark,
205 |                     "%s-%s%s-%d" % (method, normalization.replace('/', ''),
206 |                                     setup_name, n_init_), filename)
207 |                 relative_output_file_template = os.path.join(benchmark, method_name, filename)
208 |                 command = (
209 |                     "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
210 |                     "--normalization {normalization} "
211 |                     "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file}"
212 |                 )
213 |                 template = command.format(**{
214 |                     'method': method,
215 |                     'run_script': run_script,
216 |                     'benchmark': benchmark,
217 |                     'output_file': output_file,
218 |                     'seed': '{seed}',
219 |                     'task_id': '{task_id}',
220 |                     'n_init': n_init_,
221 |                     'setup_string': setup_string.format(
222 |                         learned_initial_design=normalization_to_initial_design[normalization]
223 |                     ),
224 |                     'normalization': normalization,
225 |                 })
226 |                 commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
227 | 
228 |     # TSTR
229 |     method = 'tstr'
230 |     for n_init in (1, ):
231 |         for weight_dilution_strategy in ('None', 'probabilistic-ld'):
232 |             for bandwidth in (0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0):
233 |                 for setup_name, setup_string in setups.items():
234 |                     for normalization in ('mean/var', 'Copula', 'None'):
235 |                         for acquisition_function in ('targetEI', 'fullmodeltargetEI',):
236 |                             if 'learnedinit' not in setup_name:
237 |                                 continue
238 |                             if normalization in ['Copula', 'mean/var'] and n_init < 2:
239 |                                 n_init_ = 2
240 |                             else:
241 |                                 n_init_ = n_init
242 |                             filename = "{seed}_50_{task_id}.json"
243 |                             method_name = "%s-%s-%s-%s-%f%s-%d" % (
244 |                                 method, acquisition_function, normalization.replace('/', ''),
245 |                                 weight_dilution_strategy,
246 |                                 bandwidth, setup_name, n_init_
247 |                             )
248 |                             output_file = os.path.join(output_directory, benchmark, method_name, filename)
249 |                             relative_output_file_template = os.path.join(benchmark, method_name, filename)
250 |                             command = (
251 |                                 "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
252 |                                 "--task {task_id} {setup_string} --n-init {n_init} --bandwidth {bandwidth} "
253 |                                 "--normalization {normalization} --output-file {output_file} "
254 |                                 "--weight_dilution_strategy {weight_dilution_strategy} "
255 |                             )
256 |                             if acquisition_function == 'targetEI':
257 |                                 command = '%s --variance-mode target --acquisition-function-name EI' % command
258 |                             elif acquisition_function == 'fullmodeltargetEI':
259 |                                 command = '%s --variance-mode target --acquisition-function-name fullmodelEI' % command
260 |                             else:
261 |                                 raise ValueError(acquisition_function)
262 | 
263 |                             template = command.format(**{
264 |                                 'method': method,
265 |                                 'run_script': run_script,
266 |                                 'benchmark': benchmark,
267 |                                 'output_file': output_file,
268 |                                 'seed': '{seed}',
269 |                                 'task_id': '{task_id}',
270 |                                 'n_init': n_init_,
271 |                                 'setup_string': setup_string.format(
272 |                                     learned_initial_design=normalization_to_initial_design[normalization]
273 |                                 ),
274 |                                 'bandwidth': bandwidth,
275 |                                 'normalization': normalization,
276 |                                 'weight_dilution_strategy': weight_dilution_strategy,
277 |                             })
278 |                             commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
279 | 
280 |     # KL-dirvergence-based distance measure
281 |     method = 'klweighting'
282 |     if benchmark not in ['openml-svm', 'openml-xgb']:
283 |         for n_init in (2, ):
284 |             for eta in (1, 2, 5, 10, 20, 50, 100):
285 |                 for setup_name, setup_string in setups.items():
286 |                     if 'learnedinit' not in setup_name:
287 |                         continue
288 |                     filename = "{seed}_50_{task_id}.json"
289 |                     method_name = "%s-%f%s-%d" % (method, eta, setup_name, n_init)
290 |                     output_file = os.path.join(output_directory, benchmark, method_name, filename)
291 |                     relative_output_file_template = os.path.join(benchmark, method_name, filename)
292 |                     command = (
293 |                         "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
294 |                         "--task {task_id} {setup_string} --n-init {n_init} --eta {eta} "
295 |                         "--output-file {output_file} "
296 |                     )
297 |                     template = command.format(**{
298 |                         'method': method,
299 |                         'run_script': run_script,
300 |                         'benchmark': benchmark,
301 |                         'output_file': output_file,
302 |                         'seed': '{seed}',
303 |                         'task_id': '{task_id}',
304 |                         'n_init': n_init,
305 |                         'setup_string': setup_string.format(learned_initial_design='scaled'),
306 |                         'eta': eta,
307 |                     })
308 |                     commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
309 | 
310 |     # WAC
311 |     for n_init in (2, ):
312 |         for setup_name, setup_string in setups.items():
313 |             if 'learnedinit' not in setup_name:
314 |                 continue
315 |             filename = "{seed}_50_{task_id}.json"
316 |             method_name = "wac%s-%d" % (setup_name, n_init)
317 |             output_file = os.path.join(output_directory, benchmark, method_name, filename)
318 |             relative_output_file_template = os.path.join(benchmark, method_name, filename)
319 |             command = (
320 |                 "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
321 |                 "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file} "
322 |                 "--acquisition-function-name EI"
323 |             )
324 |             template = command.format(**{
325 |                 'method': 'wac',
326 |                 'run_script': run_script,
327 |                 'benchmark': benchmark,
328 |                 'output_file': output_file,
329 |                 'seed': '{seed}',
330 |                 'task_id': '{task_id}',
331 |                 'n_init': n_init,
332 |                 'setup_string': setup_string.format(learned_initial_design='scaled'),
333 |             })
334 |             commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
335 | 
336 |     # GCP+Prior
337 |     for n_init in (2, ):
338 |         for setup_name, setup_string in setups.items():
339 |             if 'learnedinit' not in setup_name:
340 |                 continue
341 |             filename = "{seed}_50_{task_id}.json"
342 |             method_name = "gcp+prior%s-%d" % (setup_name, n_init)
343 |             output_file = os.path.join(output_directory, benchmark, method_name, filename)
344 |             relative_output_file_template = os.path.join(benchmark, method_name, filename)
345 |             command = (
346 |                 "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
347 |                 "--task {task_id} {setup_string} --n-init {n_init} --output-file {output_file} "
348 |                 "--acquisition-function-name EI"
349 |             )
350 |             template = command.format(**{
351 |                 'method': 'gcp+prior',
352 |                 'run_script': run_script,
353 |                 'benchmark': benchmark,
354 |                 'output_file': output_file,
355 |                 'seed': '{seed}',
356 |                 'task_id': '{task_id}',
357 |                 'n_init': n_init,
358 |                 'setup_string': setup_string.format(learned_initial_design='copula'),
359 |             })
360 |             commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
361 | 
362 |     # TAF-TSTR
363 |     method = 'taf'
364 |     for n_init in (1, ):
365 |         for weight_dilution_strategy in ('None', 'probabilistic-ld'):
366 |             for bandwidth in (0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0):
367 |                 for setup_name, setup_string in setups.items():
368 |                     for normalization in ['None', 'Copula', 'mean/var']:
369 |                         if 'learnedinit' not in setup_name:
370 |                             continue
371 |                         if normalization in ['Copula', 'mean/var'] and n_init < 2:
372 |                             n_init_ = 2
373 |                         else:
374 |                             n_init_ = n_init
375 |                         filename = "{seed}_50_{task_id}.json"
376 |                         method_name = "%s-tstr-%s-%s-%f%s-%d" % (
377 |                             method, normalization.replace('/', ''), weight_dilution_strategy,
378 |                             bandwidth, setup_name, n_init_,
379 |                         )
380 |                         output_file = os.path.join(output_directory, benchmark, method_name, filename)
381 |                         relative_output_file_template = os.path.join(benchmark, method_name, filename)
382 |                         command = (
383 |                             "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
384 |                             "--task {task_id} {setup_string} --bandwidth {bandwidth} "
385 |                             "--weighting-mode tstr --n-init {n_init} --normalization {normalization} "
386 |                             "--weight_dilution_strategy {weight_dilution_strategy} "
387 |                             "--output-file {output_file} "
388 |                         )
389 |                         template = command.format(**{
390 |                             'method': method,
391 |                             'run_script': run_script,
392 |                             'benchmark': benchmark,
393 |                             'output_file': output_file,
394 |                             'seed': '{seed}',
395 |                             'task_id': '{task_id}',
396 |                             'n_init': n_init_,
397 |                             'setup_string': setup_string.format(
398 |                                 learned_initial_design=normalization_to_initial_design[normalization]),
399 |                             'bandwidth': bandwidth,
400 |                             'normalization': normalization,
401 |                             'weight_dilution_strategy': weight_dilution_strategy,
402 |                         })
403 |                         commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
404 | 
405 |     # TAF-RGPE
406 |     method = 'taf'
407 |     for n_init in (1, ):
408 |         for weight_dilution_strategy in ('None', 'probabilistic-ld'):
409 |             for sampling_strategy in ('bootstrap', ):
410 |                 for normalization in ('None', 'Copula', 'mean/var'):
411 |                     for setup_name, setup_string in setups.items():
412 |                         if 'learnedinit' not in setup_name and n_init == 1:
413 |                             continue
414 |                         if normalization in ['Copula', 'mean/var'] and n_init < 2:
415 |                             n_init_ = 2
416 |                         else:
417 |                             n_init_ = n_init
418 |                         filename = "{seed}_50_{task_id}.json"
419 |                         method_name = "%s-rgpe-%s-%s-%s-1000-%s-%d" % (
420 |                             method, sampling_strategy, normalization.replace('/', ''),
421 |                             weight_dilution_strategy, setup_name, n_init_,
422 |                         )
423 |                         output_file = os.path.join(output_directory, benchmark, method_name, filename)
424 |                         relative_output_file_template = os.path.join(benchmark, method_name, filename)
425 |                         command = (
426 |                             "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
427 |                             "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} "
428 |                             "--sampling-mode {sampling_strategy} "
429 |                             "--weighting-mode rgpe --n-init {n_init} --output-file {output_file} "
430 |                             "--normalization {normalization}"
431 |                         )
432 | 
433 |                         template = command.format(**{
434 |                             'method': method,
435 |                             'run_script': run_script,
436 |                             'benchmark': benchmark,
437 |                             'output_file': output_file,
438 |                             'seed': '{seed}',
439 |                             'task_id': '{task_id}',
440 |                             'n_init': n_init_,
441 |                             'setup_string': setup_string.format(
442 |                                 learned_initial_design=normalization_to_initial_design[normalization]),
443 |                             'weight_dilution_strategy': weight_dilution_strategy,
444 |                             'sampling_strategy': sampling_strategy,
445 |                             'normalization': normalization,
446 |                         })
447 |                         commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
448 | 
449 |     # RGPE with Mixture of Gaussian Processes
450 |     for method in ('rmogp', ):
451 |         for n_init in (1, ):
452 |             for (
453 |                 weight_dilution_strategy, sampling_strategy, use_expectation,
454 |                 use_global_incumbent, num_posterior_samples, alpha, normalization,
455 |             ) in (
456 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'None'),
457 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'mean/var'),
458 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.0, 'Copula'),
459 |                 ('None', 'bootstrap', 'True', 'False', 1000, 0.0, 'mean/var'),
460 |                 ('None', 'bootstrap', 'True', 'False', 1000, 0.0, 'None'),
461 |                 ('probabilistic-ld', 'correct', 'True', 'False', 1000, 0.0, 'mean/var'),
462 |                 ('probabilistic-ld', 'correct', 'True', 'False', 1000, 0.0, 'None'),
463 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 100, 0.0, 'mean/var'),
464 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 100, 0.0, 'None'),
465 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 10000, 0.0, 'mean/var'),
466 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 10000, 0.0, 'None'),
467 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 100000, 0.0, 'mean/var'),
468 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 100000, 0.0, 'None'),
469 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.1, 'mean/var'),
470 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.1, 'None'),
471 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.2, 'mean/var'),
472 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.2, 'None'),
473 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.5, 'mean/var'),
474 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 0.5, 'None'),
475 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 1, 'mean/var'),
476 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 1, 'None'),
477 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 2, 'mean/var'),
478 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 2, 'None'),
479 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 5, 'mean/var'),
480 |                 ('probabilistic-ld', 'bootstrap', 'True', 'False', 1000, 5, 'None'),
481 |             ):
482 | 
483 |                 for setup_name, setup_string in setups.items():
484 |                     if 'learnedinit' not in setup_name and n_init == 1:
485 |                         continue
486 |                     if normalization in ['Copula', 'mean/var'] and n_init < 2:
487 |                         n_init_ = 2
488 |                     else:
489 |                         n_init_ = n_init
490 |                     filename = "{seed}_50_{task_id}.json"
491 |                     method_name = "%s-rgpe-%s-%s-%s-%s-%s-%d-%s%s-%d" % (
492 |                         method, sampling_strategy, normalization.replace('/', ''),
493 |                         weight_dilution_strategy,
494 |                         'expectation' if use_expectation == 'True' else 'improvement',
495 |                         'global' if use_global_incumbent == 'True' else 'local',
496 |                         num_posterior_samples, alpha, setup_name, n_init_,
497 |                     )
498 |                     output_file = os.path.join(output_directory, benchmark, method_name, filename)
499 |                     relative_output_file_template = os.path.join(benchmark, method_name, filename)
500 |                     command = (
501 |                         "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
502 |                         "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} "
503 |                         "--sampling-mode {sampling_strategy} "
504 |                         "--use-expectation {use_expectation} "
505 |                         "--use-global-incumbent {use_global_incumbent} "
506 |                         "--weighting-mode rgpe --n-init {n_init} --output-file {output_file} "
507 |                         "--normalization {normalization} --num-posterior-samples {num_posterior_samples} "
508 |                         "--alpha {alpha}"
509 |                     )
510 | 
511 |                     template = command.format(**{
512 |                         'method': method,
513 |                         'run_script': run_script,
514 |                         'benchmark': benchmark,
515 |                         'output_file': output_file,
516 |                         'seed': '{seed}',
517 |                         'task_id': '{task_id}',
518 |                         'n_init': n_init_,
519 |                         'setup_string': setup_string.format(
520 |                             learned_initial_design=normalization_to_initial_design[normalization]),
521 |                         'weight_dilution_strategy': weight_dilution_strategy,
522 |                         'sampling_strategy': sampling_strategy,
523 |                         'normalization': normalization,
524 |                         'num_posterior_samples': num_posterior_samples,
525 |                         'use_global_incumbent': use_global_incumbent,
526 |                         'use_expectation': use_expectation,
527 |                         'alpha': alpha,
528 |                     })
529 |                     commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
530 | 
531 | 
532 |     # RGPE
533 |     method = 'rgpe'
534 |     for n_init in (1, ):
535 |         for sampling_strategy in ('bootstrap', ):
536 |             for weight_dilution_strategy in ('None', 'probabilistic-ld'):
537 |                 for acquisition_function in ('NoisyEI', 'fullmodelNoisyEI',
538 |                                              'targetEI', 'fullmodeltargetEI',
539 |                                              'CFNEI',
540 |                                              'EI', 'fullmodelEI'):
541 |                     for normalization in ('mean/var', 'Copula', 'None'):
542 |                         for setup_name, setup_string in setups.items():
543 |                             if 'learnedinit' not in setup_name:
544 |                                 continue
545 |                             if normalization in ['Copula', 'mean/var'] and n_init < 2:
546 |                                 n_init_ = 2
547 |                             else:
548 |                                 n_init_ = n_init
549 |                             filename = "{seed}_50_{task_id}.json"
550 |                             method_name = "%s-%s-%s-%s-%s-1000-%s-%d" % (
551 |                                     method, sampling_strategy, normalization.replace('/', ''),
552 |                                     weight_dilution_strategy, acquisition_function, setup_name,
553 |                                     n_init_)
554 |                             output_file = os.path.join(
555 |                                 output_directory, benchmark, method_name, filename,
556 |                             )
557 |                             relative_output_file_template = os.path.join(benchmark, method_name, filename)
558 |                             command = (
559 |                                 "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
560 |                                 "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} "
561 |                                 "--sampling-mode {sampling_strategy} --normalization {normalization} "
562 |                                 "--num-posterior-samples 1000 --n-init {n_init} --output-file {output_file} "
563 |                                 "--variance-mode average "
564 |                             )
565 |                             if acquisition_function == 'targetEI':
566 |                                 command = '%s --variance-mode target --acquisition-function-name EI' % command
567 |                             elif acquisition_function == 'fullmodeltargetEI':
568 |                                 command = '%s --variance-mode target --acquisition-function-name fullmodelEI' % command
569 |                             elif acquisition_function == 'NoisyEI':
570 |                                 command = '%s --acquisition-function-name 30 ' \
571 |                                           '--target-model-incumbent True' % command
572 |                             elif acquisition_function == 'fullmodelNoisyEI':
573 |                                 command = '%s --acquisition-function-name 30 ' \
574 |                                           '--target-model-incumbent False' % command
575 |                             else:
576 |                                 command = '%s --acquisition-function-name %s' % (command, acquisition_function)
577 | 
578 |                             template = command.format(**{
579 |                                 'method': method,
580 |                                 'run_script': run_script,
581 |                                 'benchmark': benchmark,
582 |                                 'output_file': output_file,
583 |                                 'seed': '{seed}',
584 |                                 'task_id': '{task_id}',
585 |                                 'n_init': n_init_,
586 |                                 'setup_string': setup_string.format(
587 |                                     learned_initial_design=normalization_to_initial_design[normalization]),
588 |                                 'weight_dilution_strategy': weight_dilution_strategy,
589 |                                 'sampling_strategy': sampling_strategy,
590 |                                 'acquisition_function': acquisition_function,
591 |                                 'normalization': normalization,
592 |                             })
593 |                             commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
594 | 
595 |     # Old RGPE
596 |     method = 'rgpe'
597 |     for n_init in (1, ):
598 |         for weight_dilution_strategy in ('None', '95'):
599 |             for sampling_strategy in ('correct', ):
600 |                 for acquisition_function in ('fullmodelNoisyEI', ):
601 |                     for normalization in ('mean/var', 'Copula', 'None'):
602 |                         for setup_name, setup_string in setups.items():
603 |                             if 'learnedinit' not in setup_name:
604 |                                 continue
605 |                             if normalization in ['Copula', 'mean/var'] and n_init < 2:
606 |                                 n_init_ = 2
607 |                             else:
608 |                                 n_init_ = n_init
609 |                             filename = "{seed}_50_{task_id}.json"
610 |                             method_name = "%s-%s-%s-%s-%s-1000-%s-%d" % (
611 |                                     method, sampling_strategy, normalization.replace('/', ''),
612 |                                     weight_dilution_strategy, acquisition_function, setup_name,
613 |                                     n_init_)
614 |                             output_file = os.path.join(
615 |                                 output_directory, benchmark, method_name, filename,
616 |                             )
617 |                             relative_output_file_template = os.path.join(benchmark, method_name, filename)
618 |                             command = (
619 |                                 "{run_script} --benchmark {benchmark} --method {method} --seed {seed} "
620 |                                 "--task {task_id} {setup_string} --weight-dilution-strategy {weight_dilution_strategy} "
621 |                                 "--sampling-mode {sampling_strategy} --normalization {normalization} "
622 |                                 "--num-posterior-samples 1000 --n-init {n_init} --output-file {output_file} "
623 |                                 "--variance-mode average "
624 |                             )
625 | 
626 |                             if acquisition_function == 'NoisyEI':
627 |                                 command = '%s --acquisition-function-name 30 ' \
628 |                                           '--target-model-incumbent True' % command
629 |                             elif acquisition_function == 'fullmodelNoisyEI':
630 |                                 command = '%s --acquisition-function-name 30 ' \
631 |                                           '--target-model-incumbent False' % command
632 |                             else:
633 |                                 raise ValueError(acquisition_function)
634 | 
635 |                             template = command.format(**{
636 |                                 'method': method,
637 |                                 'run_script': run_script,
638 |                                 'benchmark': benchmark,
639 |                                 'output_file': output_file,
640 |                                 'seed': '{seed}',
641 |                                 'task_id': '{task_id}',
642 |                                 'n_init': n_init_,
643 |                                 'setup_string': setup_string.format(
644 |                                     learned_initial_design=normalization_to_initial_design[normalization]),
645 |                                 'weight_dilution_strategy': weight_dilution_strategy,
646 |                                 'sampling_strategy': sampling_strategy,
647 |                                 'acquisition_function': acquisition_function,
648 |                                 'normalization': normalization,
649 |                             })
650 |                             commands.extend(add_seeds_and_tasks(template, n_seeds, n_tasks, relative_output_file_template))
651 | 
652 |     random.shuffle(commands)
653 |     print(len(commands))
654 |     string = "\n".join(commands)
655 | 
656 |     commands_file = os.path.join(output_directory, benchmark, 'commands.txt')
657 |     print(commands_file)
658 |     with open(commands_file, 'w') as fh:
659 |         fh.write(string)
660 | 


--------------------------------------------------------------------------------
/scripts/install.sh:
--------------------------------------------------------------------------------
 1 | conda create -n rgpe -y
 2 | source activate rgpe
 3 | which conda
 4 | conda env list
 5 | conda install python=3.7 numpy=1.18.1 scipy=1.4.1 scikit-learn=0.22.1 gxx_linux-64 gcc_linux-64 \
 6 |     swig cython=0.29.13 ipython jupyter matplotlib pandas=0.25 -y
 7 | pip install ConfigSpace==0.4.11 pyrfr==0.8.0
 8 | pip install git+https://github.com/automl/HPOlib1.5@0449121d31e0dcd4f63435ba5b27a0dee5bbd55f --no-deps
 9 | pip install smac[all]==0.12.3
10 | pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
11 | pip install botorch==0.2.5
12 | pip install lockfile
13 | 


--------------------------------------------------------------------------------