├── images └── finals.png ├── submissions └── space-decay │ ├── requirements.txt │ ├── util.py │ ├── gp.py │ ├── turbo1.py │ ├── sampler.py │ └── optimizer.py ├── prepare_upload.sh ├── README.md ├── run_local.sh ├── environment.txt ├── .gitignore ├── LICENSE.turbo └── LICENSE /images/finals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbr-ai-labs/bbo-challenge-jetbrains-research/HEAD/images/finals.png -------------------------------------------------------------------------------- /submissions/space-decay/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/uber-research/TuRBO.git@master 2 | scikit-optimize==0.8.dev0 3 | pyaml>=16.9 -------------------------------------------------------------------------------- /submissions/space-decay/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as ss 3 | 4 | 5 | def order_stats(X): 6 | _, idx, cnt = np.unique(X, return_inverse=True, return_counts=True) 7 | obs = np.cumsum(cnt) # Need to do it this way due to ties 8 | o_stats = obs[idx] 9 | return o_stats 10 | 11 | 12 | def copula_standardize(X): 13 | X = np.nan_to_num(np.asarray(X)) # Replace inf by something large 14 | assert X.ndim == 1 and np.all(np.isfinite(X)) 15 | o_stats = order_stats(X) 16 | quantile = np.true_divide(o_stats, len(X) + 1) 17 | X_ss = ss.norm.ppf(quantile) 18 | return X_ss 19 | -------------------------------------------------------------------------------- /prepare_upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | set -o pipefail 5 | 6 | # Input args 7 | CODE_DIR=$1 8 | NAME=$2 9 | 10 | # Eliminate final slash 11 | CODE_DIR=$(dirname $CODE_DIR)/$(basename $CODE_DIR) 12 | 13 | # Copy in provided files 14 | cp -r -n $CODE_DIR ./$NAME 15 | 16 | # Make a blank req file if none provided 17 | REQ_FILE=./$NAME/requirements.txt 18 | touch $REQ_FILE 19 | 20 | # Download all the wheels/tar balls with our docker as the target 21 | pip download -r $REQ_FILE -d ./$NAME --python-version 36 --implementation cp --platform manylinux1_x86_64 --abi cp36m --no-deps 22 | 23 | # Test zip does not exist yet to avoid clobber 24 | ! test -f $NAME.zip 25 | 26 | # Build the zip with correct directory structure 27 | (cd $NAME && zip -r ../$NAME.zip ./*) 28 | 29 | # Display final output for user at end 30 | set +x 31 | 32 | echo "----------------------------------------------------------------" 33 | echo "Built achive for upload" 34 | unzip -l ./$NAME.zip 35 | 36 | echo "For scoring, upload $NAME.zip at address:" 37 | echo "https://bbochallenge.com/my-submissions" 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JetBrains Research's Solution for Black-Box Optimization Challenge 2 | 3 | This is the code for our solution to the [NeurIPS 2020 Black-Box Optimization Challenge](https://bbochallenge.com/). 4 | 5 | Our solution is described in the "Solving Black-Box Optimization Challenge via Learning Search Space Partition for Local Bayesian Optimization" paper. 6 | 7 | ## Final Results 8 | 9 | Our approach scored 92.509 in the finals and ranked 3rd overall! 10 | 11 | ![finals](./images/finals.png) 12 | 13 | ## Team Members 14 | 15 | * Mikita Sazanovich (github: [@niksaz](https://github.com/niksaz)) 16 | * Anastasiya Nikolskaya (github: [@nuvard](https://github.com/nuvard)) 17 | * Yury Belousov (github: [@bruce-willis](https://github.com/bruce-willis)) 18 | * Aleksei Shpilman 19 | 20 | ## Citing us 21 | 22 | The paper is available at: https://arxiv.org/pdf/2012.10335.pdf (extended version from Proceedings of Machine Learning Research at: http://proceedings.mlr.press/v133/sazanovich21a.html). 23 | 24 | If you want to cite this code, please use the following: 25 | 26 | ``` 27 | @misc{sazanovich2020solving, 28 | title={Solving Black-Box Optimization Challenge via Learning Search Space Partition for Local Bayesian Optimization}, 29 | author={Mikita Sazanovich and Anastasiya Nikolskaya and Yury Belousov and Aleksei Shpilman}, 30 | year={2020}, 31 | eprint={2012.10335}, 32 | archivePrefix={arXiv}, 33 | primaryClass={cs.LG} 34 | } 35 | ``` 36 | 37 | ## License 38 | 39 | Our implementation is released under [Apache License 2.0](./LICENSE) license except for the code derived from TuRBO. 40 | -------------------------------------------------------------------------------- /run_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SECONDS=0 4 | 5 | set -ex 6 | set -o pipefail 7 | 8 | # Up-to-date competition settings 9 | N_STEP=16 10 | N_BATCH=8 11 | 12 | # Outdated competition settings 13 | # N_STEP=32 14 | # N_BATCH=8 15 | 16 | # For a fast experiment 17 | # N_STEP=15 18 | # N_BATCH=1 19 | 20 | # Input args 21 | CODE_DIR=$1 22 | N_REPEAT=$2 23 | 24 | # Where output goes 25 | DB_ROOT=./output 26 | DBID=run_$(date +"%Y%m%d_%H%M%S") 27 | 28 | # Setup vars 29 | OPT=$(basename $CODE_DIR) 30 | OPT_ROOT=$(dirname $CODE_DIR) 31 | 32 | # Check that bayesmark is installed in this environment 33 | which bayesmark-init 34 | which bayesmark-launch 35 | which bayesmark-exp 36 | which bayesmark-agg 37 | which bayesmark-anal 38 | 39 | # Ensure output folder exists 40 | mkdir -p $DB_ROOT 41 | 42 | # Copy the baseline file in, we can skip this but we must include RandomSearch in the -o list 43 | ! test -d $DB_ROOT/$DBID/ # Check the folder does not yet exist 44 | bayesmark-init -dir $DB_ROOT -b $DBID 45 | cp ./input/baseline-$N_STEP-$N_BATCH.json $DB_ROOT/$DBID/derived/baseline.json 46 | 47 | # By default, runs on all models (-c), data (-d), metrics (-m) 48 | bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o $OPT --opt-root $OPT_ROOT -v -c SVM DT -d boston wine 49 | # To run on all problems use instead (slower): 50 | # bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o $OPT --opt-root $OPT_ROOT -v 51 | 52 | # Now aggregate the results 53 | bayesmark-agg -dir $DB_ROOT -b $DBID 54 | # And analyze the scores 55 | bayesmark-anal -dir $DB_ROOT -b $DBID -v 56 | 57 | echo "Time spent:" $SECONDS 58 | -------------------------------------------------------------------------------- /environment.txt: -------------------------------------------------------------------------------- 1 | # Pinned requirements used in the docker image (valohai/bbochallenge:20200821-57e60f9) that executes submissions (Python 3.6.12) 2 | absl-py==0.9.0 3 | astunparse==1.6.3 4 | attrs==19.3.0 5 | bayesian-optimization==0.6.0 6 | bayesmark==0.0.7 7 | botorch==0.2.1 8 | cachetools==4.1.0 9 | certifi==2020.4.5.1 10 | chardet==3.0.4 11 | cma==3.0.3 12 | coverage==5.1 13 | cycler==0.10.0 14 | decorator==4.4.2 15 | dill==0.3.1.1 16 | fn==0.4.3 17 | future==0.18.2 18 | gast==0.3.3 19 | genty==1.3.2 20 | gitdb==4.0.5 21 | GitPython==3.1.3 22 | google-auth==1.14.3 23 | google-auth-oauthlib==0.4.1 24 | google-pasta==0.2.0 25 | gpytorch==1.1.1 26 | grpcio==1.29.0 27 | h5py==2.10.0 28 | hyperopt==0.1.1 29 | idna==2.9 30 | importlib-metadata==1.6.0 31 | joblib==0.14.1 32 | Keras==2.3.1 33 | Keras-Applications==1.0.8 34 | Keras-Preprocessing==1.1.2 35 | kiwisolver==1.2.0 36 | lightgbm==2.3.1 37 | Markdown==3.2.2 38 | matplotlib==3.2.1 39 | more-itertools==8.2.0 40 | mypy==0.770 41 | mypy-extensions==0.4.3 42 | networkx==2.4 43 | nevergrad==0.1.4 44 | nose==1.3.7 45 | nose-timer==1.0.0 46 | numpy==1.18.5 47 | oauthlib==3.1.0 48 | opentuner==0.8.2 49 | opt-einsum==3.2.1 50 | packaging==20.3 51 | pandas==1.0.5 52 | pathvalidate==2.3.0 53 | pluggy==0.13.1 54 | POAP==0.1.26 55 | protobuf==3.12.0 56 | py==1.8.1 57 | pyasn1==0.4.8 58 | pyasn1-modules==0.2.8 59 | pyDOE2==1.3.0 60 | pymongo==3.10.1 61 | pyparsing==2.4.7 62 | pySOT==0.2.3 63 | pytest==5.4.2 64 | python-dateutil==2.8.1 65 | pytz==2020.1 66 | PyYAML==5.3.1 67 | requests==2.23.0 68 | requests-oauthlib==1.3.0 69 | rsa==4.0 70 | scikit-learn==0.20.2 71 | scikit-optimize==0.5.2 72 | scipy==1.4.1 73 | six==1.14.0 74 | smmap==3.0.4 75 | SQLAlchemy==1.3.16 76 | tensorboard==2.2.1 77 | tensorboard-plugin-wit==1.6.0.post3 78 | tensorflow==2.2.0 79 | tensorflow-estimator==2.2.0 80 | tensorflow-hub==0.8.0 81 | termcolor==1.1.0 82 | torch==1.5.0 83 | typed-ast==1.4.1 84 | typing-extensions==3.7.4.2 85 | urllib3==1.25.9 86 | wcwidth==0.1.9 87 | Werkzeug==1.0.1 88 | wrapt==1.12.1 89 | xarray==0.15.1 90 | xgboost==1.1.0 91 | xlrd==1.2.0 92 | xlwt==1.3.0 93 | zipp==3.1.0 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Specific to this repo 2 | output/ 3 | archives/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | .idea/ 136 | -------------------------------------------------------------------------------- /LICENSE.turbo: -------------------------------------------------------------------------------- 1 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by the text below. 2 | 3 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 4 | 5 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 6 | 7 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. 8 | 9 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. 10 | 11 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under this License. 12 | 13 | This License governs use of the accompanying Work, and your use of the Work constitutes acceptance of this License. 14 | 15 | You may use this Work for any non-commercial purpose, subject to the restrictions in this License. Some purposes which can be non-commercial are teaching, academic research, and personal experimentation. You may also distribute this Work with books or other teaching materials, or publish the Work on websites, that are intended to teach the use of the Work. 16 | 17 | You may not use or distribute this Work, or any derivative works, outputs, or results from the Work, in any form for commercial purposes. Non-exhaustive examples of commercial purposes would be running business operations, licensing, leasing, or selling the Work, or distributing the Work for use with commercial products. 18 | 19 | You may modify this Work and distribute the modified Work for non-commercial purposes, however, you may not grant rights to the Work or derivative works that are broader than or in conflict with those provided by this License. For example, you may not distribute modifications of the Work under terms that would permit commercial use, or under terms that purport to require the Work or derivative works to be sublicensed to others. 20 | 21 | In return, we require that you agree: 22 | 23 | 1. Not to remove any copyright or other notices from the Work. 24 | 25 | 2. That if you distribute the Work in Source or Object form, you will include a verbatim copy of this License. 26 | 27 | 3. That if you distribute derivative works of the Work in Source form, you do so only under a license that includes all of the provisions of this License and is not in conflict with this License, and if you distribute derivative works of the Work solely in Object form you do so only under a license that complies with this License. 28 | 29 | 4. That if you have modified the Work or created derivative works from the Work, and distribute such modifications or derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Work. Such notices must state: (i) that you have changed the Work; and (ii) the date of any changes. 30 | 31 | 5. If you publicly use the Work or any output or result of the Work, you will provide a notice with such use that provides any person who uses, views, accesses, interacts with, or is otherwise exposed to the Work (i) with information of the nature of the Work, (ii) with a link to the Work, and (iii) a notice that the Work is available under this License. 32 | 33 | 6. THAT THE WORK COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 34 | 35 | 7. THAT NEITHER UBER TECHNOLOGIES, INC. NOR ANY OF ITS AFFILIATES, SUPPLIERS, SUCCESSORS, NOR ASSIGNS WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE WORK OR THIS LICENSE, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 36 | 37 | 8. That if you sue anyone over patents that you think may apply to the Work or anyone's use of the Work, your license to the Work ends automatically. 38 | 39 | 9. That your rights under the License end automatically if you breach it in any way. 40 | 41 | 10. Uber Technologies, Inc. reserves all rights not expressly granted to you in this License. 42 | -------------------------------------------------------------------------------- /submissions/space-decay/gp.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import math 13 | 14 | import gpytorch 15 | import numpy as np 16 | import torch 17 | from gpytorch.constraints.constraints import Interval 18 | from gpytorch.distributions import MultivariateNormal 19 | from gpytorch.kernels import MaternKernel, ScaleKernel, CylindricalKernel 20 | from gpytorch.likelihoods import GaussianLikelihood 21 | from gpytorch.means import ConstantMean 22 | from gpytorch.mlls import ExactMarginalLogLikelihood 23 | from gpytorch.models import ExactGP 24 | #from botorch.models import SingleTaskGP, FixedNoiseGP 25 | 26 | 27 | # GP Model 28 | class GP(ExactGP): 29 | def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims): 30 | super(GP, self).__init__(train_x, train_y, likelihood) 31 | self.ard_dims = ard_dims 32 | self.mean_module = ConstantMean() 33 | base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5) 34 | self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint) 35 | 36 | def forward(self, x): 37 | mean_x = self.mean_module(x) 38 | covar_x = self.covar_module(x) 39 | return MultivariateNormal(mean_x, covar_x) 40 | 41 | 42 | def map_box_ball(x, dim): 43 | #dim = x.shape[1] 44 | # from borders to [-1, 1]^d 45 | x = (x - 0.5) * 2 46 | # from [-1, 1]^d to Ball(0, 1) 47 | x = x / np.sqrt(dim) 48 | return x 49 | 50 | 51 | def map_ball_box(x, dim): 52 | #dim = len(borders) 53 | # from Ball(0, 1) to [-1, 1]^d 54 | x = np.sqrt(dim) * x 55 | # from [-1, 1]^d to borders 56 | x = x * 0.5 + 0.5 57 | return x 58 | 59 | 60 | class KumaAlphaPrior(gpytorch.priors.Prior): 61 | def __init__(self): 62 | super(KumaAlphaPrior, self).__init__() 63 | self.log_a_max = np.log(2) 64 | pass 65 | 66 | def log_prob(self, x): 67 | x = torch.log(x) 68 | loc = torch.tensor(0.).to(x) 69 | scale = torch.tensor(0.01).to(x) 70 | return torch.sum(torch.log( 71 | torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).exp() + 0.5 / self.log_a_max 72 | )) 73 | 74 | 75 | class KumaBetaPrior(gpytorch.priors.Prior): 76 | def __init__(self): 77 | super(KumaBetaPrior, self).__init__() 78 | self.log_b_max = np.log(2) 79 | pass 80 | 81 | def log_prob(self, x): 82 | x = torch.log(x) 83 | loc = torch.tensor(0.).to(x) 84 | scale = torch.tensor(0.01).to(x) 85 | return torch.sum(torch.log( 86 | torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).exp() + 0.5 / self.log_b_max 87 | )) 88 | 89 | 90 | class AngularWeightsPrior(gpytorch.priors.Prior): 91 | def __init__(self): 92 | super(AngularWeightsPrior, self).__init__() 93 | 94 | def log_prob(self, x): 95 | x = torch.log(x) 96 | loc = torch.tensor(0.).to(x) 97 | scale = torch.tensor(2.).to(x) 98 | return torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).sum() 99 | 100 | 101 | class CustomCylindricalGP(ExactGP): # FixedNoiseGP SingleTaskGP 102 | def __init__(self, train_X, train_Y, likelihood, dim, lengthscale_constraint, outputscale_constraint, ard_dims): 103 | # squeeze output dim before passing train_Y to ExactGP 104 | super().__init__(train_X, train_Y, likelihood) # GaussianLikelihood()) # GaussianLikelihood() noise.squeeze(-1) 105 | self.dim = dim 106 | self.mean_module = ConstantMean() 107 | self.covar_module = ScaleKernel(CylindricalKernel( 108 | num_angular_weights=ard_dims, 109 | alpha_prior=KumaAlphaPrior(), 110 | alpha_constraint=gpytorch.constraints.constraints.Interval(lower_bound=0.5, upper_bound=1.), 111 | beta_prior=KumaBetaPrior(), 112 | beta_constraint=gpytorch.constraints.constraints.Interval(lower_bound=1., upper_bound=2.), 113 | radial_base_kernel=MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=1, nu=2.5), 114 | # angular_weights_constraint=gpytorch.constraints.constraints.Interval(lower_bound=np.exp(-12.), 115 | # upper_bound=np.exp(20.)), 116 | angular_weights_prior=AngularWeightsPrior() 117 | )) 118 | self.to(train_X) # make sure we're on the right device/dtype 119 | 120 | def forward(self, x): 121 | x = map_box_ball(x, self.dim) 122 | mean_x = self.mean_module(x) 123 | covar_x = self.covar_module(x) 124 | return MultivariateNormal(mean_x, covar_x) 125 | 126 | 127 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}, use_cylinder=True, dim=1): 128 | """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" 129 | assert train_x.ndim == 2 130 | assert train_y.ndim == 1 131 | assert train_x.shape[0] == train_y.shape[0] 132 | 133 | # Create hyper parameter bounds 134 | noise_constraint = Interval(5e-4, 0.2) 135 | if use_ard: 136 | lengthscale_constraint = Interval(0.005, 2.0) 137 | else: 138 | lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1])) # [0.005, sqrt(dim)] 139 | outputscale_constraint = Interval(0.05, 20.0) 140 | 141 | # Create models 142 | likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype) 143 | ard_dims = train_x.shape[1] if use_ard else None 144 | if use_cylinder: 145 | model = CustomCylindricalGP( 146 | train_X=train_x, 147 | train_Y=train_y, 148 | likelihood=likelihood, 149 | dim=dim, 150 | lengthscale_constraint=lengthscale_constraint, 151 | outputscale_constraint=outputscale_constraint, 152 | ard_dims=ard_dims, 153 | ).to(device=train_x.device, dtype=train_x.dtype) 154 | else: 155 | model = GP( 156 | train_x=train_x, 157 | train_y=train_y, 158 | likelihood=likelihood, 159 | lengthscale_constraint=lengthscale_constraint, 160 | outputscale_constraint=outputscale_constraint, 161 | ard_dims=ard_dims, 162 | ).to(device=train_x.device, dtype=train_x.dtype) 163 | 164 | 165 | # Find optimal model hyperparameters 166 | model.train() 167 | likelihood.train() 168 | 169 | # "Loss" for GPs - the marginal log likelihood 170 | mll = ExactMarginalLogLikelihood(likelihood, model) 171 | 172 | # Initialize model hypers 173 | if hypers: 174 | model.load_state_dict(hypers) 175 | else: 176 | hypers = {} 177 | if not use_cylinder: 178 | hypers["covar_module.outputscale"] = 1.0 179 | hypers["covar_module.base_kernel.lengthscale"] = 0.5 180 | hypers["likelihood.noise"] = 0.005 181 | model.initialize(**hypers) 182 | 183 | # Use the adam optimizer 184 | optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) 185 | 186 | for _ in range(num_steps): 187 | optimizer.zero_grad() 188 | output = model(train_x) 189 | loss = -mll(output, train_y) 190 | loss.backward() 191 | optimizer.step() 192 | 193 | # Switch to eval mode 194 | model.eval() 195 | likelihood.eval() 196 | 197 | return model 198 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | `submissions/space-decay/gp.py` and 2 | `submissions/space-decay/turbo1.py` are derived 3 | from https://github.com/uber-research/TuRBO on October 15, 2020, hence 4 | it is distributed under LICENSE.turbo. The rest of the files are 5 | distributed under Apache License Version 2.0. 6 | 7 | 8 | Apache License 9 | Version 2.0, January 2004 10 | http://www.apache.org/licenses/ 11 | 12 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 13 | 14 | 1. Definitions. 15 | 16 | "License" shall mean the terms and conditions for use, reproduction, 17 | and distribution as defined by Sections 1 through 9 of this document. 18 | 19 | "Licensor" shall mean the copyright owner or entity authorized by 20 | the copyright owner that is granting the License. 21 | 22 | "Legal Entity" shall mean the union of the acting entity and all 23 | other entities that control, are controlled by, or are under common 24 | control with that entity. For the purposes of this definition, 25 | "control" means (i) the power, direct or indirect, to cause the 26 | direction or management of such entity, whether by contract or 27 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 28 | outstanding shares, or (iii) beneficial ownership of such entity. 29 | 30 | "You" (or "Your") shall mean an individual or Legal Entity 31 | exercising permissions granted by this License. 32 | 33 | "Source" form shall mean the preferred form for making modifications, 34 | including but not limited to software source code, documentation 35 | source, and configuration files. 36 | 37 | "Object" form shall mean any form resulting from mechanical 38 | transformation or translation of a Source form, including but 39 | not limited to compiled object code, generated documentation, 40 | and conversions to other media types. 41 | 42 | "Work" shall mean the work of authorship, whether in Source or 43 | Object form, made available under the License, as indicated by a 44 | copyright notice that is included in or attached to the work 45 | (an example is provided in the Appendix below). 46 | 47 | "Derivative Works" shall mean any work, whether in Source or Object 48 | form, that is based on (or derived from) the Work and for which the 49 | editorial revisions, annotations, elaborations, or other modifications 50 | represent, as a whole, an original work of authorship. For the purposes 51 | of this License, Derivative Works shall not include works that remain 52 | separable from, or merely link (or bind by name) to the interfaces of, 53 | the Work and Derivative Works thereof. 54 | 55 | "Contribution" shall mean any work of authorship, including 56 | the original version of the Work and any modifications or additions 57 | to that Work or Derivative Works thereof, that is intentionally 58 | submitted to Licensor for inclusion in the Work by the copyright owner 59 | or by an individual or Legal Entity authorized to submit on behalf of 60 | the copyright owner. For the purposes of this definition, "submitted" 61 | means any form of electronic, verbal, or written communication sent 62 | to the Licensor or its representatives, including but not limited to 63 | communication on electronic mailing lists, source code control systems, 64 | and issue tracking systems that are managed by, or on behalf of, the 65 | Licensor for the purpose of discussing and improving the Work, but 66 | excluding communication that is conspicuously marked or otherwise 67 | designated in writing by the copyright owner as "Not a Contribution." 68 | 69 | "Contributor" shall mean Licensor and any individual or Legal Entity 70 | on behalf of whom a Contribution has been received by Licensor and 71 | subsequently incorporated within the Work. 72 | 73 | 2. Grant of Copyright License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | copyright license to reproduce, prepare Derivative Works of, 77 | publicly display, publicly perform, sublicense, and distribute the 78 | Work and such Derivative Works in Source or Object form. 79 | 80 | 3. Grant of Patent License. Subject to the terms and conditions of 81 | this License, each Contributor hereby grants to You a perpetual, 82 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 83 | (except as stated in this section) patent license to make, have made, 84 | use, offer to sell, sell, import, and otherwise transfer the Work, 85 | where such license applies only to those patent claims licensable 86 | by such Contributor that are necessarily infringed by their 87 | Contribution(s) alone or by combination of their Contribution(s) 88 | with the Work to which such Contribution(s) was submitted. If You 89 | institute patent litigation against any entity (including a 90 | cross-claim or counterclaim in a lawsuit) alleging that the Work 91 | or a Contribution incorporated within the Work constitutes direct 92 | or contributory patent infringement, then any patent licenses 93 | granted to You under this License for that Work shall terminate 94 | as of the date such litigation is filed. 95 | 96 | 4. Redistribution. You may reproduce and distribute copies of the 97 | Work or Derivative Works thereof in any medium, with or without 98 | modifications, and in Source or Object form, provided that You 99 | meet the following conditions: 100 | 101 | (a) You must give any other recipients of the Work or 102 | Derivative Works a copy of this License; and 103 | 104 | (b) You must cause any modified files to carry prominent notices 105 | stating that You changed the files; and 106 | 107 | (c) You must retain, in the Source form of any Derivative Works 108 | that You distribute, all copyright, patent, trademark, and 109 | attribution notices from the Source form of the Work, 110 | excluding those notices that do not pertain to any part of 111 | the Derivative Works; and 112 | 113 | (d) If the Work includes a "NOTICE" text file as part of its 114 | distribution, then any Derivative Works that You distribute must 115 | include a readable copy of the attribution notices contained 116 | within such NOTICE file, excluding those notices that do not 117 | pertain to any part of the Derivative Works, in at least one 118 | of the following places: within a NOTICE text file distributed 119 | as part of the Derivative Works; within the Source form or 120 | documentation, if provided along with the Derivative Works; or, 121 | within a display generated by the Derivative Works, if and 122 | wherever such third-party notices normally appear. The contents 123 | of the NOTICE file are for informational purposes only and 124 | do not modify the License. You may add Your own attribution 125 | notices within Derivative Works that You distribute, alongside 126 | or as an addendum to the NOTICE text from the Work, provided 127 | that such additional attribution notices cannot be construed 128 | as modifying the License. 129 | 130 | You may add Your own copyright statement to Your modifications and 131 | may provide additional or different license terms and conditions 132 | for use, reproduction, or distribution of Your modifications, or 133 | for any such Derivative Works as a whole, provided Your use, 134 | reproduction, and distribution of the Work otherwise complies with 135 | the conditions stated in this License. 136 | 137 | 5. Submission of Contributions. Unless You explicitly state otherwise, 138 | any Contribution intentionally submitted for inclusion in the Work 139 | by You to the Licensor shall be under the terms and conditions of 140 | this License, without any additional terms or conditions. 141 | Notwithstanding the above, nothing herein shall supersede or modify 142 | the terms of any separate license agreement you may have executed 143 | with Licensor regarding such Contributions. 144 | 145 | 6. Trademarks. This License does not grant permission to use the trade 146 | names, trademarks, service marks, or product names of the Licensor, 147 | except as required for reasonable and customary use in describing the 148 | origin of the Work and reproducing the content of the NOTICE file. 149 | 150 | 7. Disclaimer of Warranty. Unless required by applicable law or 151 | agreed to in writing, Licensor provides the Work (and each 152 | Contributor provides its Contributions) on an "AS IS" BASIS, 153 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 154 | implied, including, without limitation, any warranties or conditions 155 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 156 | PARTICULAR PURPOSE. You are solely responsible for determining the 157 | appropriateness of using or redistributing the Work and assume any 158 | risks associated with Your exercise of permissions under this License. 159 | 160 | 8. Limitation of Liability. In no event and under no legal theory, 161 | whether in tort (including negligence), contract, or otherwise, 162 | unless required by applicable law (such as deliberate and grossly 163 | negligent acts) or agreed to in writing, shall any Contributor be 164 | liable to You for damages, including any direct, indirect, special, 165 | incidental, or consequential damages of any character arising as a 166 | result of this License or out of the use or inability to use the 167 | Work (including but not limited to damages for loss of goodwill, 168 | work stoppage, computer failure or malfunction, or any and all 169 | other commercial damages or losses), even if such Contributor 170 | has been advised of the possibility of such damages. 171 | 172 | 9. Accepting Warranty or Additional Liability. While redistributing 173 | the Work or Derivative Works thereof, You may choose to offer, 174 | and charge a fee for, acceptance of support, warranty, indemnity, 175 | or other liability obligations and/or rights consistent with this 176 | License. However, in accepting such obligations, You may act only 177 | on Your own behalf and on Your sole responsibility, not on behalf 178 | of any other Contributor, and only if You agree to indemnify, 179 | defend, and hold each Contributor harmless for any liability 180 | incurred by, or claims asserted against, such Contributor by reason 181 | of your accepting any such warranty or additional liability. 182 | 183 | END OF TERMS AND CONDITIONS 184 | 185 | APPENDIX: How to apply the Apache License to your work. 186 | 187 | To apply the Apache License to your work, attach the following 188 | boilerplate notice, with the fields enclosed by brackets "[]" 189 | replaced with your own identifying information. (Don't include 190 | the brackets!) The text should be enclosed in the appropriate 191 | comment syntax for the file format. We also recommend that a 192 | file or class name and description of purpose be included on the 193 | same "printed page" as the copyright notice for easier 194 | identification within third-party archives. 195 | 196 | Copyright 2020 JetBrains Research 197 | 198 | Licensed under the Apache License, Version 2.0 (the "License"); 199 | you may not use this file except in compliance with the License. 200 | You may obtain a copy of the License at 201 | 202 | http://www.apache.org/licenses/LICENSE-2.0 203 | 204 | Unless required by applicable law or agreed to in writing, software 205 | distributed under the License is distributed on an "AS IS" BASIS, 206 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 207 | See the License for the specific language governing permissions and 208 | limitations under the License. 209 | -------------------------------------------------------------------------------- /submissions/space-decay/turbo1.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import math 13 | import sys 14 | from copy import deepcopy 15 | 16 | import gpytorch 17 | import numpy as np 18 | import torch 19 | from torch.quasirandom import SobolEngine 20 | 21 | from gp import train_gp 22 | from turbo.utils import from_unit_cube, latin_hypercube, to_unit_cube 23 | 24 | 25 | class Turbo1: 26 | """The TuRBO-1 algorithm. 27 | 28 | Parameters 29 | ---------- 30 | f : function handle 31 | lb : Lower variable bounds, numpy.array, shape (d,). 32 | ub : Upper variable bounds, numpy.array, shape (d,). 33 | n_init : Number of initial points (2*dim is recommended), int. 34 | max_evals : Total evaluation budget, int. 35 | batch_size : Number of points in each batch, int. 36 | verbose : If you want to print information about the optimization progress, bool. 37 | use_ard : If you want to use ARD for the GP kernel. 38 | max_cholesky_size : Largest number of training points where we use Cholesky, int 39 | n_training_steps : Number of training steps for learning the GP hypers, int 40 | min_cuda : We use float64 on the CPU if we have this or fewer datapoints 41 | device : Device to use for GP fitting ("cpu" or "cuda") 42 | dtype : Dtype to use for GP fitting ("float32" or "float64") 43 | 44 | Example usage: 45 | turbo1 = Turbo1(f=f, lb=lb, ub=ub, n_init=n_init, max_evals=max_evals) 46 | turbo1.optimize() # Run optimization 47 | X, fX = turbo1.X, turbo1.fX # Evaluated points 48 | """ 49 | 50 | def __init__( 51 | self, 52 | f, 53 | lb, 54 | ub, 55 | n_init, 56 | max_evals, 57 | batch_size=1, 58 | verbose=True, 59 | use_ard=True, 60 | max_cholesky_size=2000, 61 | n_training_steps=50, 62 | min_cuda=1024, 63 | device="cpu", 64 | dtype="float64", 65 | use_cylinder=False, 66 | budget=16*8, 67 | use_decay=False, 68 | decay_threshold=0.5, 69 | decay_alpha=0.8, 70 | use_pull=0, 71 | use_lcb=0, 72 | kappa=2.0, 73 | length_min=0.5**7, 74 | length_max=1.8, 75 | length_init=0.8, 76 | length_multiplier=2.0, 77 | used_budget=0 78 | ): 79 | 80 | # Very basic input checks 81 | assert lb.ndim == 1 and ub.ndim == 1 82 | assert len(lb) == len(ub) 83 | assert np.all(ub > lb) 84 | assert max_evals > 0 and isinstance(max_evals, int) 85 | assert n_init > 0 and isinstance(n_init, int) 86 | assert batch_size > 0 and isinstance(batch_size, int) 87 | assert isinstance(verbose, bool) and isinstance(use_ard, bool) 88 | assert max_cholesky_size >= 0 and isinstance(batch_size, int) 89 | assert n_training_steps >= 30 and isinstance(n_training_steps, int) 90 | assert max_evals > n_init and max_evals > batch_size 91 | assert device == "cpu" or device == "cuda" 92 | assert dtype == "float32" or dtype == "float64" 93 | if device == "cuda": 94 | assert torch.cuda.is_available(), "can't use cuda if it's not available" 95 | 96 | # Save function information 97 | self.f = f 98 | self.dim = len(lb) 99 | self.lb = lb 100 | self.ub = ub 101 | 102 | # Settings 103 | self.n_init = n_init 104 | self.max_evals = max_evals 105 | self.batch_size = batch_size 106 | self.verbose = verbose 107 | self.use_ard = use_ard 108 | self.max_cholesky_size = max_cholesky_size 109 | self.n_training_steps = n_training_steps 110 | 111 | #cylinder 112 | self.use_cylinder = use_cylinder 113 | 114 | #decay 115 | self.budget = budget 116 | self.used_budget = used_budget 117 | self.use_decay = use_decay 118 | self.decay_alpha = decay_alpha 119 | self.decay_threshold = decay_threshold 120 | 121 | # Hyperparameters 122 | self.mean = np.zeros((0, 1)) 123 | self.signal_var = np.zeros((0, 1)) 124 | self.noise_var = np.zeros((0, 1)) 125 | self.lengthscales = np.zeros((0, self.dim)) if self.use_ard else np.zeros((0, 1)) 126 | 127 | # pull 128 | self.use_pull = use_pull 129 | self.prob_pull = np.ones((0, self.dim)) / self.dim 130 | 131 | #lcb 132 | self.use_lcb = use_lcb 133 | self.kappa = kappa 134 | 135 | # Tolerances and counters 136 | self.n_cand = min(100 * self.dim, 5000) 137 | self.failtol = np.ceil(np.max([4.0 / batch_size, self.dim / batch_size])) 138 | self.succtol = 3 139 | self.n_evals = 0 140 | 141 | # Trust region sizes 142 | self.length_min = length_min 143 | self.length_max = length_max 144 | self.length_init = length_init 145 | self.length_multiplier = length_multiplier 146 | 147 | # Save the full history 148 | self.X = np.zeros((0, self.dim)) 149 | self.fX = np.zeros((0, 1)) 150 | 151 | # Device and dtype for GPyTorch 152 | self.min_cuda = min_cuda 153 | self.dtype = torch.float32 if dtype == "float32" else torch.float64 154 | self.device = torch.device("cuda") if device == "cuda" else torch.device("cpu") 155 | if self.verbose: 156 | print("Using dtype = %s \nUsing device = %s" % (self.dtype, self.device)) 157 | sys.stdout.flush() 158 | 159 | # Initialize parameters 160 | self._restart() 161 | 162 | def _restart(self): 163 | self._X = [] 164 | self._fX = [] 165 | self._predictions = [] 166 | self.failcount = 0 167 | self.succcount = 0 168 | self.initial = 1 169 | self.pull = 1 170 | self.length = self.length_init 171 | self.prob_pull = np.ones(self.dim) / self.dim 172 | self.prob_push = np.ones(self.dim) / self.dim 173 | self.init_iter = True 174 | #print(self.prob_pull) 175 | 176 | def _adjust_length(self, fX_next): 177 | if np.min(fX_next) < np.min(self._fX) - 1e-3 * math.fabs(np.min(self._fX)): 178 | self.succcount += 1 179 | self.failcount = 0 180 | else: 181 | self.succcount = 0 182 | self.failcount += 1 183 | if self.succcount == self.succtol: # Expand trust region 184 | self.length = min([self.length_multiplier * self.length, self.length_max]) 185 | self.succcount = 0 186 | self.pull = 0 187 | elif self.failcount == self.failtol: # Shrink trust region 188 | self.length /= self.length_multiplier 189 | self.failcount = 0 190 | self.pull = 1 191 | print('Use or not decay: ', self.use_decay) 192 | if self.use_decay: 193 | print(self.used_budget) 194 | if self.used_budget > self.decay_threshold * self.budget: 195 | print("Applying decay...") 196 | self.length *= self.decay_alpha #* min(np.random.lognormal(1, 2, 1), 1) 197 | 198 | diff_std = np.std(self.X - self.X[np.argmin(self.fX)], axis=0) 199 | self.prob_pull = np.exp(diff_std) / np.exp( 200 | np.std(self.X - self.X[np.argmin(self.fX)], axis=0)).sum() 201 | c = 0.1 # regularizer 202 | self.prob_push = np.exp(diff_std.max() - diff_std) / np.exp( 203 | diff_std.max() - diff_std).sum() 204 | 205 | def _create_candidates(self, X, fX, length, n_training_steps, hypers, used_budget=None): 206 | """Generate candidates assuming X has been scaled to [0,1]^d.""" 207 | # Pick the center as the point with the smallest function values 208 | # NOTE: This may not be robust to noise, in which case the posterior mean of the GP can be used instead 209 | if used_budget is not None: 210 | self.used_budget = used_budget 211 | assert X.min() >= 0.0 and X.max() <= 1.0 212 | 213 | # Standardize function values. 214 | mu, sigma = np.median(fX), fX.std() 215 | sigma = 1.0 if sigma < 1e-6 else sigma 216 | fX = (deepcopy(fX) - mu) / sigma 217 | 218 | # Figure out what device we are running on 219 | if len(X) < self.min_cuda: 220 | device, dtype = torch.device("cpu"), torch.float64 221 | else: 222 | device, dtype = self.device, self.dtype 223 | 224 | # We use CG + Lanczos for training if we have enough data 225 | with gpytorch.settings.max_cholesky_size(self.max_cholesky_size): 226 | X_torch = torch.tensor(X).to(device=device, dtype=dtype) 227 | y_torch = torch.tensor(fX).to(device=device, dtype=dtype) 228 | gp = train_gp( 229 | train_x=X_torch, train_y=y_torch, use_ard=self.use_ard, num_steps=n_training_steps, hypers=hypers, 230 | use_cylinder=self.use_cylinder, dim=self.dim 231 | ) 232 | 233 | # Save state dict 234 | hypers = gp.state_dict() 235 | self._errors = self.fX - np.array(self._predictions) 236 | # Create the trust region boundaries 237 | x_center = X[fX.argmin().item(), :][None, :] 238 | if not self.use_cylinder: 239 | weights = gp.covar_module.base_kernel.lengthscale.cpu().detach().numpy().ravel() 240 | else: 241 | #weights = gp.covar_module.base_kernel.radial_base_kernel.lengthscale.cpu().detach().numpy().ravel() 242 | weights = gp.covar_module.base_kernel.angular_weights.cpu().detach().numpy().ravel() 243 | weights = weights / weights.mean() # This will make the next line more stable 244 | weights = weights / np.prod(np.power(weights, 1.0 / len(weights))) # We now have weights.prod() = 1 245 | #print('weights', weights) 246 | # TODO: REMOVE 247 | #prob_pert = np.log(self.budget - len(self.fX)) / np.log(self.budget) 248 | #print('prob of pulling appliance:', prob_pert) 249 | # appliance = np.random.choice((1, 0), p=(prob_pert, 1 - prob_pert)) 250 | # print('pull or not: ', appliance) 251 | if self.use_pull == 1: 252 | print("Applying pulling...") 253 | if self.pull: 254 | print('Prob of pulling:', self.prob_pull) 255 | to_pull = np.random.choice(range(0,self.dim), size=min(self.dim, 2), p=self.prob_pull.flatten()) 256 | weights[to_pull] *= 2 257 | else: 258 | print('Prob of pushing:', self.prob_push) 259 | to_push = np.random.choice(range(0, self.dim), size=min(self.dim, 2), p=self.prob_push.flatten()) 260 | weights[to_push] /= 2 261 | lb = np.clip(x_center - weights * length / 2.0, 0.0, 1.0) 262 | ub = np.clip(x_center + weights * length / 2.0, 0.0, 1.0) 263 | #print('lb', lb) 264 | #print('ub', ub) 265 | self.cand_lb = lb 266 | self.cand_ub = ub 267 | 268 | # Draw a Sobolev sequence in [lb, ub] 269 | seed = np.random.randint(int(1e6)) 270 | sobol = SobolEngine(self.dim, scramble=True, seed=seed) 271 | pert = sobol.draw(self.n_cand).to(dtype=dtype, device=device).cpu().detach().numpy() 272 | pert = lb + (ub - lb) * pert 273 | 274 | # Create a perturbation mask 275 | prob_perturb = min(20.0 / self.dim, 1.0) 276 | mask = np.random.rand(self.n_cand, self.dim) <= prob_perturb 277 | ind = np.where(np.sum(mask, axis=1) == 0)[0] 278 | mask[ind, np.random.randint(0, self.dim - 1, size=len(ind))] = 1 279 | 280 | # Create candidate points 281 | X_cand = x_center.copy() * np.ones((self.n_cand, self.dim)) 282 | X_cand[mask] = pert[mask] 283 | 284 | # Figure out what device we are running on 285 | if len(X_cand) < self.min_cuda: 286 | device, dtype = torch.device("cpu"), torch.float64 287 | else: 288 | device, dtype = self.device, self.dtype 289 | 290 | # We may have to move the GP to a new device 291 | gp = gp.to(dtype=dtype, device=device) 292 | 293 | # We use Lanczos for sampling if we have enough data 294 | with torch.no_grad(), gpytorch.settings.max_cholesky_size(self.max_cholesky_size): 295 | X_cand_torch = torch.tensor(X_cand).to(device=device, dtype=dtype) 296 | f_preds = gp.likelihood(gp(X_cand_torch)) 297 | self.f_var = f_preds.variance.cpu().detach().numpy() 298 | #print(self.f_var.shape) 299 | y_cand = f_preds.sample(torch.Size([self.batch_size])).t().cpu().detach().numpy() 300 | #print(y_cand.shape) 301 | self.gp = deepcopy(gp) 302 | self.init_iter = False 303 | # Remove the torch variables 304 | del X_torch, y_torch, X_cand_torch, gp 305 | 306 | # De-standardize the sampled values 307 | y_cand = mu + sigma * y_cand 308 | #print(y_cand.shape) 309 | return X_cand, y_cand, hypers 310 | 311 | def _select_candidates(self, X_cand, y_cand): 312 | """Select candidates.""" 313 | X_next = np.ones((self.batch_size, self.dim)) 314 | _y_cand = deepcopy(y_cand) 315 | if self.use_lcb: 316 | print("Applying LCB...") 317 | f_var = np.expand_dims(np.sqrt(self.f_var), 1).repeat(self.batch_size, axis=1) 318 | #print(f_var.shape) 319 | #print(_y_cand.shape) 320 | _y_cand = y_cand - self.kappa * f_var 321 | for i in range(self.batch_size): 322 | # Pick the best point and make sure we never pick it again 323 | indbest = np.argmin(_y_cand[:, i]) 324 | self._predictions.append(_y_cand[indbest, i]) 325 | X_next[i, :] = deepcopy(X_cand[indbest, :]) 326 | _y_cand[indbest, :] = np.inf 327 | return X_next 328 | -------------------------------------------------------------------------------- /submissions/space-decay/sampler.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from skopt.space import Categorical, Integer, Real 4 | from skopt.sampler import InitialPointGenerator, Sobol, Halton, Lhs, Hammersly, Grid 5 | from skopt.space import Space 6 | from scipy.special import binom 7 | from scipy.optimize import minimize 8 | import numpy as np 9 | from sklearn.utils import check_random_state 10 | from scipy.interpolate import interp1d 11 | 12 | def fix_sampler_seed(seed): 13 | if seed is not None: 14 | random.seed(seed) 15 | np.random.seed(seed) 16 | 17 | def cook_initial_point_generator(generator, **kwargs): 18 | """Cook a default initial point generator. 19 | For the special generator called "random" the return value is None. 20 | Parameters 21 | ---------- 22 | generator : "lhs", "sobol", "halton", "hammersly", "grid", "random" \ 23 | or InitialPointGenerator instance" 24 | Should inherit from `skopt.sampler.InitialPointGenerator`. 25 | kwargs : dict 26 | Extra parameters provided to the generator at init time. 27 | """ 28 | if generator is None: 29 | generator = "random" 30 | elif isinstance(generator, str): 31 | generator = generator.lower() 32 | if generator not in ["sobol", "halton", "hammersly", "lhs", "random", 33 | "grid", "maxpro", "maxpro-gd"]: 34 | raise ValueError("Valid strings for the generator parameter " 35 | " are: 'sobol', 'lhs', 'halton', 'hammersly'," 36 | "'random', 'maxpro','maxpro-gd', or 'grid' not " 37 | "%s." % generator) 38 | elif not isinstance(generator, InitialPointGenerator): 39 | raise ValueError("generator has to be an InitialPointGenerator." 40 | "Got %s" % (str(type(generator)))) 41 | 42 | if isinstance(generator, str): 43 | if generator == "sobol": 44 | generator = Sobol() 45 | elif generator == "halton": 46 | generator = Halton() 47 | elif generator == "hammersly": 48 | generator = Hammersly() 49 | elif generator == "lhs": 50 | generator = Lhs() 51 | elif generator == "grid": 52 | generator = Grid() 53 | elif generator == "random": 54 | return None 55 | elif generator == "maxpro": 56 | generator = MaxPro(use_gradient=False) 57 | elif generator == "maxpro-gd": 58 | generator = MaxPro(use_gradient=True) 59 | generator.set_params(**kwargs) 60 | return generator 61 | 62 | def _random_permute_matrix(h, random_state=None): 63 | rng = check_random_state(random_state) 64 | h_rand_perm = np.zeros_like(h) 65 | samples, n = h.shape 66 | for j in range(n): 67 | order = rng.permutation(range(samples)) 68 | h_rand_perm[:, j] = h[order, j] 69 | return h_rand_perm 70 | 71 | 72 | class MaxPro(InitialPointGenerator): 73 | """Latin hypercube sampling 74 | Parameters 75 | ---------- 76 | lhs_type : str, default='classic' 77 | - 'classic' - a small random number is added 78 | - 'centered' - points are set uniformly in each interval 79 | criterion : str or None, default='maximin' 80 | When set to None, the LHS is not optimized 81 | - 'correlation' : optimized LHS by minimizing the correlation 82 | - 'maximin' : optimized LHS by maximizing the minimal pdist 83 | - 'ratio' : optimized LHS by minimizing the ratio 84 | `max(pdist) / min(pdist)` 85 | iterations : int 86 | Defines the number of iterations for optimizing LHS 87 | """ 88 | def __init__(self, 89 | iterations=1000, use_gradient=True, lhs_type = "classic"): 90 | self.iterations = iterations 91 | self.use_gradient = use_gradient 92 | self.lhs_type = lhs_type 93 | 94 | def generate(self, dimensions, n_samples, random_state=None): 95 | """Creates latin hypercube samples with maxpro criterion. 96 | Parameters 97 | ---------- 98 | dimensions : list, shape (n_dims,) 99 | List of search space dimensions. 100 | Each search dimension can be defined either as 101 | - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` 102 | dimensions), 103 | - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` 104 | dimensions), 105 | - as a list of categories (for `Categorical` dimensions), or 106 | - an instance of a `Dimension` object (`Real`, `Integer` or 107 | `Categorical`). 108 | n_samples : int 109 | The order of the LHS sequence. Defines the number of samples. 110 | random_state : int, RandomState instance, or None (default) 111 | Set random state to something other than None for reproducible 112 | results. 113 | Returns 114 | ------- 115 | np.array, shape=(n_dim, n_samples) 116 | LHS set 117 | """ 118 | rng = check_random_state(random_state) 119 | space = Space(dimensions) 120 | transformer = space.get_transformer() 121 | n_dim = space.n_dims 122 | space.set_transformer("normalize") 123 | h = self._lhs_normalized(n_dim, n_samples, rng) 124 | 125 | self.num_pts = n_samples 126 | self.dim = n_dim 127 | if self.use_gradient: 128 | print('Using gradient descent') 129 | bounds = [(0,1)] * len(dimensions) * self.num_pts 130 | h_opt = minimize(self.maxpro_criter, h, jac=self.maxpro_grad, bounds=bounds) 131 | h_opt = h_opt['x'].reshape(n_samples, n_dim) 132 | else: 133 | print('Using naive method') 134 | best = 1e+6 135 | for i in range(self.iterations): 136 | h = self._lhs_normalized(n_dim, n_samples, i*rng) 137 | criter = self.maxpro_criter(h) 138 | if best > criter: 139 | best = criter 140 | h_opt = h.copy() 141 | h_opt = space.inverse_transform(h_opt) 142 | space.set_transformer(transformer) 143 | return h_opt 144 | 145 | def maxpro_criter(self, X): 146 | """ 147 | :param X: all x data 148 | :return: value of MaxPro criterion 149 | """ 150 | x = X.copy() 151 | #print(x.shape) 152 | if x.ndim < 2: 153 | x = x.reshape(self.num_pts, self.dim) 154 | #print(x.shape, self.dim) 155 | res = 1 / binom(self.num_pts, 2) 156 | sum_part = 0 157 | for i in range(self.num_pts-1): 158 | for j in range(i+1, self.num_pts): 159 | #print(np.prod(((x[i] - x[j]) ** 2))) 160 | #print(np.prod(((x[i] - x[j]) ** 2)) ** (-1)) 161 | sum_part += (np.prod(((x[i] - x[j]) ** 2)) + 1e-8)** (-1) 162 | res *= sum_part ** (1/self.dim) 163 | return res 164 | 165 | def maxpro_deriv(self, x, r, s): 166 | """Returns derivative of maxpro criterion for Z_rs 167 | 168 | :param X: all x points 169 | :param r: number of point to get derivative 170 | :param s: number of coordinate to get derivative 171 | :return: derivative value 172 | """ 173 | res = 2 / binom(self.num_pts, 2) 174 | sum_part = 0 175 | for i in range(len(x)): 176 | if i != r: 177 | sum_part += (np.prod(((x[i] - x[r]) ** 2)) + 1e-8) ** (-1) * (x[i][s] - x[r][s] + 1e-8) ** (-1) 178 | res *= sum_part 179 | return res 180 | 181 | def maxpro_grad(self, X): 182 | x = X.copy() 183 | if x.ndim < 2: 184 | x = x.reshape(self.num_pts, self.dim) 185 | grad_val = np.zeros(x.shape) 186 | for r in range(self.num_pts): 187 | for i in range(self.dim): 188 | grad_val[r][i] = self.maxpro_deriv(x, r, i) 189 | return grad_val.flatten() 190 | 191 | def _lhs_normalized(self, n_dim, n_samples, random_state): 192 | rng = check_random_state(random_state) 193 | x = np.linspace(0, 1, n_samples + 1) 194 | u = rng.rand(n_samples, n_dim) 195 | h = np.zeros_like(u) 196 | if self.lhs_type == "centered": 197 | for j in range(n_dim): 198 | h[:, j] = np.diff(x) / 2.0 + x[:n_samples] 199 | elif self.lhs_type == "classic": 200 | for j in range(n_dim): 201 | h[:, j] = u[:, j] * np.diff(x) + x[:n_samples] 202 | else: 203 | raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type)) 204 | return _random_permute_matrix(h, random_state=rng) 205 | 206 | class Sampler: 207 | def __init__(self, method, api_config, n_points=8, generator_kwargs=None): 208 | if generator_kwargs is None: 209 | generator_kwargs = {} 210 | self.method = cook_initial_point_generator(method, **generator_kwargs) 211 | 212 | self.dimensions, self.round_to_values = Sampler.get_sk_dimensions(api_config) 213 | self.dimensions_list = tuple(dd.name for dd in self.dimensions) 214 | 215 | self.n_points = n_points 216 | 217 | @staticmethod 218 | def get_sk_dimensions(api_config, transform="normalize"): 219 | """Help routine to setup skopt search space in constructor. 220 | 221 | Take api_config as argument so this can be static. 222 | """ 223 | # The ordering of iteration prob makes no difference, but just to be 224 | # safe and consistnent with space.py, I will make sorted. 225 | param_list = sorted(api_config.keys()) 226 | 227 | sk_dims = [] 228 | round_to_values = {} 229 | for param_name in param_list: 230 | param_config = api_config[param_name] 231 | 232 | param_type = param_config["type"] 233 | 234 | param_space = param_config.get("space", None) 235 | param_range = param_config.get("range", None) 236 | param_values = param_config.get("values", None) 237 | 238 | # Some setup for case that whitelist of values is provided: 239 | values_only_type = param_type in ("cat", "ordinal") 240 | if (param_values is not None) and (not values_only_type): 241 | assert param_range is None 242 | param_values = np.unique(param_values) 243 | param_range = (param_values[0], param_values[-1]) 244 | round_to_values[param_name] = interp1d( 245 | param_values, param_values, kind="nearest", fill_value="extrapolate" 246 | ) 247 | 248 | if param_type == "int": 249 | # Integer space in sklearn does not support any warping => Need 250 | # to leave the warping as linear in skopt. 251 | sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name)) 252 | elif param_type == "bool": 253 | assert param_range is None 254 | assert param_values is None 255 | sk_dims.append(Integer(0, 1, transform=transform, name=param_name)) 256 | elif param_type in ("cat", "ordinal"): 257 | assert param_range is None 258 | # Leave x-form to one-hot as per skopt default 259 | sk_dims.append(Categorical(param_values, name=param_name)) 260 | elif param_type == "real": 261 | # Skopt doesn't support all our warpings, so need to pick 262 | # closest substitute it does support. 263 | prior = "log-uniform" if param_space in ("log", "logit") else "uniform" 264 | sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name)) 265 | else: 266 | assert False, "type %s not handled in API" % param_type 267 | return sk_dims, round_to_values 268 | 269 | def generate(self, random_state): 270 | # First get list of lists from the sampling method. 271 | next_guess = self.method.generate(dimensions=self.dimensions, 272 | n_samples=self.n_points, 273 | random_state=random_state) 274 | # Then convert to list of dicts 275 | next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess] 276 | 277 | # Now do the rounding, custom rounding is not supported in skopt. Note 278 | # that there is not nec a round function for each dimension here. 279 | for param_name, round_f in self.round_to_values.items(): 280 | for xx in next_guess: 281 | xx[param_name] = round_f(xx[param_name]) 282 | return next_guess 283 | 284 | 285 | if __name__ == "__main__": 286 | api_config = { 287 | "max_depth": { 288 | "type": "int", 289 | "space": "linear", 290 | "range": (1, 15) 291 | }, 292 | "min_samples_split": { 293 | "type": "real", 294 | "space": "logit", 295 | "range": (0.01, 0.99) 296 | }, 297 | "min_samples_leaf": { 298 | "type": "real", 299 | "space": "logit", 300 | "range": (0.01, 0.49) 301 | }, 302 | "min_weight_fraction_leaf": { 303 | "type": "real", 304 | "space": "logit", 305 | "range": (0.01, 0.49) 306 | }, 307 | "max_features": { 308 | "type": "real", 309 | "space": "logit", 310 | "range": (0.01, 0.99) 311 | }, 312 | "min_impurity_decrease": { 313 | "type": "real", 314 | "space": "linear", 315 | "range": (0.0, 0.5) 316 | }, 317 | } 318 | n_points = 8 319 | 320 | sobol_points = Sampler(method='sobol', api_config=api_config, n_points=n_points).generate(random_state=42) 321 | halton_points = Sampler(method='halton', api_config=api_config, n_points=n_points).generate(random_state=42) 322 | hammersly_points = Sampler(method='hammersly', api_config=api_config, n_points=n_points).generate(random_state=42) 323 | lhs_classic_points = Sampler(method='lhs', api_config=api_config, n_points=n_points, generator_kwargs={'lhs_type': 'classic', 'criterion': 'maximin'}).generate(random_state=42) 324 | lhs_centered_points = Sampler(method='lhs', api_config=api_config, n_points=n_points, generator_kwargs={'lhs_type': 'centered'}).generate(random_state=42) 325 | grid_points = Sampler(method='grid', api_config=api_config, n_points=n_points).generate(random_state=42) 326 | 327 | t = 0 328 | -------------------------------------------------------------------------------- /submissions/space-decay/optimizer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | from copy import deepcopy 4 | from typing import Optional 5 | 6 | import torch 7 | import numpy as np 8 | from sklearn.cluster import KMeans 9 | from sklearn.neighbors import KNeighborsClassifier 10 | from sklearn.svm import SVC 11 | from turbo.utils import from_unit_cube, latin_hypercube, to_unit_cube 12 | 13 | from bayesmark.abstract_optimizer import AbstractOptimizer 14 | from bayesmark.experiment import experiment_main 15 | from bayesmark.space import JointSpace 16 | 17 | # It depends on scikit-optimize==0.8.dev0, which is not in the default environment. 18 | import sampler 19 | from turbo1 import Turbo1 20 | from util import copula_standardize 21 | 22 | try: 23 | import open3d 24 | DEBUG = True 25 | except ImportError as _: 26 | DEBUG = False 27 | 28 | 29 | def fix_optimizer_seed(seed): 30 | if seed is not None: 31 | random.seed(seed) 32 | np.random.seed(seed) 33 | torch.manual_seed(seed) 34 | 35 | 36 | def _add_pcd(pcds, points, color): 37 | if len(points) == 0: 38 | return 39 | if points.shape[1] == 2: 40 | extended_points = np.zeros((len(points), 3)) 41 | extended_points[:, :2] = points[:, :] 42 | points = extended_points 43 | elif points.shape[1] != 3: 44 | raise ValueError('The points for the DEBUG should either be 2D or 3D.') 45 | pcd = open3d.geometry.PointCloud() 46 | pcd.points = open3d.utility.Vector3dVector(points) 47 | pcd.colors = open3d.utility.Vector3dVector(np.tile(color, (len(points), 1))) 48 | pcds.append(pcd) 49 | 50 | 51 | class SpacePartitioningOptimizer(AbstractOptimizer): 52 | primary_import = 'scikit-learn' 53 | 54 | def __init__(self, api_config, **kwargs): 55 | AbstractOptimizer.__init__(self, api_config) 56 | 57 | print('api_config:', api_config) 58 | self.api_config = api_config 59 | 60 | self.space_x = JointSpace(api_config) 61 | self.bounds = self.space_x.get_bounds() 62 | self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] 63 | self.dim = len(self.bounds) 64 | 65 | self.X = np.zeros((0, self.dim)) 66 | self.y = np.zeros((0, 1)) 67 | 68 | self.X_init = None 69 | self.batch_size = None 70 | self.turbo = None 71 | self.split_used = 0 72 | self.node = None 73 | self.best_values = [] 74 | 75 | self.config = self._read_config() 76 | print('config:', self.config) 77 | optimizer_seed = self.config.get('optimizer_seed') 78 | fix_optimizer_seed(optimizer_seed) 79 | self.sampler_seed = self.config.get('sampler_seed') 80 | sampler.fix_sampler_seed(self.sampler_seed) 81 | 82 | self.is_init_batch = False 83 | self.init_batches = [] 84 | 85 | def _read_config(self): 86 | return {'turbo_training_steps': 100, 'turbo_length_retries': 10, 'turbo_length_init_method': 'default', 'experimental_design': 'lhs_classic_ratio', 'n_init_points': 24, 'max_tree_depth': 5, 'kmeans_resplits': 10, 'split_model': {'type': 'SVC', 'args': {'kernel': 'poly', 'gamma': 'scale', 'C': 745.3227447730735}}, 'reset_no_improvement': 8, 'reset_split_after': 4, 'turbo': {'budget': 128, 'use_cylinder': 0, 'use_pull': 0, 'use_lcb': 0, 'kappa': 2.0, 'use_decay': 1, 'decay_alpha': 0.49937937259674076, 'decay_threshold': 0.5, 'length_min': 1e-06, 'length_max': 2.0, 'length_init': 0.8, 'length_multiplier': 2.0}, 'sampler_seed': 42, 'optimizer_seed': 578330} 87 | 88 | def _init(self, n_suggestions): 89 | self.batch_size = n_suggestions 90 | n_init_points = self.config['n_init_points'] 91 | if n_init_points == -1: 92 | # Special value to use the default 2*D+1 number. 93 | n_init_points = 2 * self.dim + 1 94 | self.n_init = max(self.batch_size, n_init_points) 95 | exp_design = self.config['experimental_design'] 96 | if exp_design == 'latin_hypercube': 97 | X_init = latin_hypercube(self.n_init, self.dim) 98 | elif exp_design == 'halton': 99 | halton_sampler = sampler.Sampler(method='halton', api_config=self.api_config, n_points=self.n_init) 100 | X_init = halton_sampler.generate(random_state=self.sampler_seed) 101 | X_init = self.space_x.warp(X_init) 102 | X_init = to_unit_cube(X_init, self.lb, self.ub) 103 | elif exp_design == 'lhs_classic_ratio': 104 | lhs_sampler = sampler.Sampler( 105 | method='lhs', 106 | api_config=self.api_config, 107 | n_points=self.n_init, 108 | generator_kwargs={'lhs_type': 'classic', 'criterion': 'ratio'}) 109 | X_init = lhs_sampler.generate(random_state=self.sampler_seed) 110 | X_init = self.space_x.warp(X_init) 111 | X_init = to_unit_cube(X_init, self.lb, self.ub) 112 | else: 113 | raise ValueError(f'Unknown experimental design: {exp_design}.') 114 | self.X_init = X_init 115 | if DEBUG: 116 | print(f'Initialized the method with {self.n_init} points by {exp_design}:') 117 | print(X_init) 118 | 119 | def _get_split_model(self, X, kmeans_labels): 120 | split_model_config = self.config['split_model'] 121 | model_type = split_model_config['type'] 122 | args = split_model_config['args'] 123 | if model_type == 'SVC': 124 | split_model = SVC(**args, max_iter=10**7) 125 | elif model_type == 'KNeighborsClassifier': 126 | split_model = KNeighborsClassifier(**args) 127 | else: 128 | raise ValueError(f'Unknown split model type in the config: {model_type}.') 129 | 130 | split_model.fit(X, kmeans_labels) 131 | split_model_predictions = split_model.predict(X) 132 | split_model_matches = np.sum(split_model_predictions == kmeans_labels) 133 | split_model_mismatches = np.sum(split_model_predictions != kmeans_labels) 134 | print('Labels for the split model:', kmeans_labels) 135 | print('Predictions of the split model:', split_model_predictions) 136 | print(f'Split model matches {split_model_matches} and mismatches {split_model_mismatches}') 137 | return split_model 138 | 139 | def _find_split(self, X, y) -> Optional: 140 | max_margin = None 141 | max_margin_labels = None 142 | for _ in range(self.config['kmeans_resplits']): 143 | kmeans = KMeans(n_clusters=2).fit(y) 144 | kmeans_labels = kmeans.labels_ 145 | if np.count_nonzero(kmeans_labels == 1) > 0 and np.count_nonzero(kmeans_labels == 0) > 0: 146 | if np.mean(y[kmeans_labels == 1]) > np.mean(y[kmeans_labels == 0]): 147 | # Reverse labels if the entries with 1s have a higher mean error, since 1s go to the left branch. 148 | kmeans_labels = 1 - kmeans_labels 149 | margin = -(np.mean(y[kmeans_labels == 1]) - np.mean(y[kmeans_labels == 0])) 150 | if DEBUG: 151 | print('MARGIN is', margin, np.count_nonzero(kmeans_labels == 1), np.count_nonzero(kmeans_labels == 0)) 152 | if max_margin is None or margin > max_margin: 153 | max_margin = margin 154 | max_margin_labels = kmeans_labels 155 | if DEBUG: 156 | print('MAX MARGIN is', max_margin) 157 | if max_margin_labels is None: 158 | return None 159 | else: 160 | return self._get_split_model(X, max_margin_labels) 161 | 162 | def _build_tree(self, X, y, depth=0): 163 | print('len(X) in _build_tree is', len(X)) 164 | if depth == self.config['max_tree_depth']: 165 | return [] 166 | split = self._find_split(X, y) 167 | if split is None: 168 | return [] 169 | in_region_points = split.predict(X) 170 | left_subtree_size = np.count_nonzero(in_region_points == 1) 171 | right_subtree_size = np.count_nonzero(in_region_points == 0) 172 | print(f'{len(X)} points would be split {left_subtree_size}/{right_subtree_size}.') 173 | if left_subtree_size < self.n_init: 174 | return [] 175 | idx = (in_region_points == 1) 176 | splits = self._build_tree(X[idx], y[idx], depth + 1) 177 | return [split] + splits 178 | 179 | def _get_in_node_region(self, points, splits): 180 | in_region = np.ones(len(points)) 181 | for split in splits: 182 | split_in_region = split.predict(points) 183 | in_region *= split_in_region 184 | return in_region 185 | 186 | def _suggest(self, n_suggestions): 187 | X = to_unit_cube(deepcopy(self.X), self.lb, self.ub) 188 | y = deepcopy(self.y) 189 | if not self.node: 190 | self.split_used = 0 191 | self.node = self._build_tree(X, y) 192 | used_budget = len(y) 193 | idx = (self._get_in_node_region(X, self.node) == 1) 194 | X = X[idx] 195 | y = y[idx] 196 | print(f'Rebuilt the tree of depth {len(self.node)}') 197 | model_config = self.config['turbo'] 198 | #print('CONFIG!!!!!', model_config) 199 | self.turbo = Turbo1( 200 | f=None, 201 | lb=self.bounds[:, 0], 202 | ub=self.bounds[:, 1], 203 | n_init=len(X), 204 | max_evals=np.iinfo(np.int32).max, 205 | batch_size=self.batch_size, 206 | verbose=False, 207 | use_cylinder=model_config['use_cylinder'], 208 | budget=model_config['budget'], 209 | use_decay=model_config['use_decay'], 210 | decay_threshold=model_config['decay_threshold'], 211 | decay_alpha=model_config['decay_alpha'], 212 | use_pull=model_config['use_pull'], 213 | use_lcb=model_config['use_lcb'], 214 | kappa=model_config['kappa'], 215 | length_min=model_config['length_min'], 216 | length_max=model_config['length_max'], 217 | length_init=model_config['length_init'], 218 | length_multiplier=model_config['length_multiplier'], 219 | used_budget=used_budget 220 | ) 221 | self.turbo._X = np.array(X, copy=True) 222 | self.turbo._fX = np.array(y, copy=True) 223 | self.turbo.X = np.array(X, copy=True) 224 | self.turbo.fX = np.array(y, copy=True) 225 | print('Initialized TURBO') 226 | else: 227 | idx = (self._get_in_node_region(X, self.node) == 1) 228 | X = X[idx] 229 | y = y[idx] 230 | self.split_used += 1 231 | 232 | length_init_method = self.config['turbo_length_init_method'] 233 | if length_init_method == 'default': 234 | length = self.turbo.length 235 | elif length_init_method == 'length_init': 236 | length = self.turbo.length_init 237 | elif length_init_method == 'length_max': 238 | length = self.turbo.length_max 239 | elif length_init_method == 'infinity': 240 | length = np.iinfo(np.int32).max 241 | else: 242 | raise ValueError(f'Unknown init method for turbo\'s length: {length_init_method}.') 243 | length_reties = self.config['turbo_length_retries'] 244 | for retry in range(length_reties): 245 | XX = X 246 | yy = copula_standardize(y.ravel()) 247 | X_cand, y_cand, _ = self.turbo._create_candidates( 248 | XX, yy, length=length, n_training_steps=self.config['turbo_training_steps'], hypers={}) 249 | in_region_predictions = self._get_in_node_region(X_cand, self.node) 250 | in_region_idx = in_region_predictions == 1 251 | if DEBUG: 252 | print(f'In region: {np.sum(in_region_idx)} out of {len(X_cand)}') 253 | if np.sum(in_region_idx) >= n_suggestions: 254 | X_cand, y_cand = X_cand[in_region_idx], y_cand[in_region_idx] 255 | self.turbo.f_var = self.turbo.f_var[in_region_idx] 256 | if DEBUG: 257 | print('Found a suitable set of candidates.') 258 | break 259 | else: 260 | length /= 2 261 | if DEBUG: 262 | print(f'Retrying {retry + 1}/{length_reties} time') 263 | 264 | X_cand = self.turbo._select_candidates(X_cand, y_cand)[:n_suggestions, :] 265 | if DEBUG: 266 | if X.shape[1] == 3: 267 | tx = np.arange(0.0, 1.0 + 1e-6, 0.1) 268 | ty = np.arange(0.0, 1.0 + 1e-6, 0.1) 269 | tz = np.arange(0.0, 1.0 + 1e-6, 0.1) 270 | p = np.array([[x, y, z] for x in tx for y in ty for z in tz]) 271 | elif X.shape[1] == 2: 272 | tx = np.arange(0.0, 1.0 + 1e-6, 0.1) 273 | ty = np.arange(0.0, 1.0 + 1e-6, 0.1) 274 | p = np.array([[x, y] for x in tx for y in ty]) 275 | else: 276 | raise ValueError('The points for the DEBUG should either be 2D or 3D.') 277 | p_predictions = self._get_in_node_region(p, self.node) 278 | in_turbo_bounds = np.logical_and( 279 | np.all(self.turbo.cand_lb <= p, axis=1), 280 | np.all(p <= self.turbo.cand_ub, axis=1)) 281 | pcds = [] 282 | _add_pcd(pcds, p[p_predictions == 0], (1.0, 0.0, 0.0)) 283 | _add_pcd(pcds, p[np.logical_and(p_predictions == 1, np.logical_not(in_turbo_bounds))], (0.0, 1.0, 0.0)) 284 | _add_pcd(pcds, p[np.logical_and(p_predictions == 1, in_turbo_bounds)], (0.0, 0.5, 0.0)) 285 | _add_pcd(pcds, X_cand, (0.0, 0.0, 0.0)) 286 | open3d.visualization.draw_geometries(pcds) 287 | return X_cand 288 | 289 | def suggest(self, n_suggestions=1): 290 | X_suggestions = np.zeros((n_suggestions, self.dim)) 291 | # Initialize the design if it is the first call 292 | if self.X_init is None: 293 | self._init(n_suggestions) 294 | if self.init_batches: 295 | print('REUSING INITIALIZATION:') 296 | for X, Y in self.init_batches: 297 | print('Re-observing a batch!') 298 | self.observe(X, Y) 299 | self.X_init = [] 300 | 301 | # Pick from the experimental design 302 | n_init = min(len(self.X_init), n_suggestions) 303 | if n_init > 0: 304 | X_suggestions[:n_init] = self.X_init[:n_init] 305 | self.X_init = self.X_init[n_init:] 306 | self.is_init_batch = True 307 | else: 308 | self.is_init_batch = False 309 | 310 | # Pick from the model based on the already received observations 311 | n_suggest = n_suggestions - n_init 312 | if n_suggest > 0: 313 | X_cand = self._suggest(n_suggest) 314 | X_suggestions[-n_suggest:] = X_cand 315 | 316 | # Map into the continuous space with the api bounds and unwarp the suggestions 317 | X_min_bound = 0.0 318 | X_max_bound = 1.0 319 | X_suggestions_min = X_suggestions.min() 320 | X_suggestions_max = X_suggestions.max() 321 | if X_suggestions_min < X_min_bound or X_suggestions_max > X_max_bound: 322 | print(f'Some suggestions are out of the bounds in suggest(): {X_suggestions_min}, {X_suggestions_max}') 323 | print('Clipping everything...') 324 | X_suggestions = np.clip(X_suggestions, X_min_bound, X_max_bound) 325 | X_suggestions = from_unit_cube(X_suggestions, self.lb, self.ub) 326 | X_suggestions = self.space_x.unwarp(X_suggestions) 327 | return X_suggestions 328 | 329 | def observe(self, X_observed, Y_observed): 330 | if self.is_init_batch: 331 | self.init_batches.append([X_observed, Y_observed]) 332 | X, Y = [], [] 333 | for x, y in zip(X_observed, Y_observed): 334 | if np.isfinite(y): 335 | X.append(x) 336 | Y.append(y) 337 | else: 338 | # Ignore for now; could potentially substitute with an upper bound. 339 | continue 340 | if not X: 341 | return 342 | X, Y = self.space_x.warp(X), np.array(Y)[:, None] 343 | self.X = np.vstack((self.X, deepcopy(X))) 344 | self.y = np.vstack((self.y, deepcopy(Y))) 345 | self.best_values.append(Y.min()) 346 | 347 | if self.turbo: 348 | if len(self.turbo._X) >= self.turbo.n_init: 349 | self.turbo._adjust_length(Y) 350 | print('TURBO length:', self.turbo.length) 351 | self.turbo._X = np.vstack((self.turbo._X, deepcopy(X))) 352 | self.turbo._fX = np.vstack((self.turbo._fX, deepcopy(Y))) 353 | self.turbo.X = np.vstack((self.turbo.X, deepcopy(X))) 354 | self.turbo.fX = np.vstack((self.turbo.fX, deepcopy(Y))) 355 | 356 | N = self.config['reset_no_improvement'] 357 | if len(self.best_values) > N and np.min(self.best_values[:-N]) <= np.min(self.best_values[-N:]): 358 | print('########## RESETTING COMPLETELY! ##########') 359 | self.X = np.zeros((0, self.dim)) 360 | self.y = np.zeros((0, 1)) 361 | self.best_values = [] 362 | self.X_init = None 363 | self.node = None 364 | self.turbo = None 365 | self.split_used = 0 366 | 367 | if self.split_used >= self.config['reset_split_after']: 368 | print('########## REBUILDING THE SPLIT! ##########') 369 | self.node = None 370 | self.turbo = None 371 | self.split_used = 0 372 | 373 | 374 | if __name__ == '__main__': 375 | experiment_main(SpacePartitioningOptimizer) 376 | --------------------------------------------------------------------------------