├── images
    └── finals.png
├── submissions
    └── space-decay
    │   ├── requirements.txt
    │   ├── util.py
    │   ├── gp.py
    │   ├── turbo1.py
    │   ├── sampler.py
    │   └── optimizer.py
├── prepare_upload.sh
├── README.md
├── run_local.sh
├── environment.txt
├── .gitignore
├── LICENSE.turbo
└── LICENSE


/images/finals.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbr-ai-labs/bbo-challenge-jetbrains-research/HEAD/images/finals.png


--------------------------------------------------------------------------------
/submissions/space-decay/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/uber-research/TuRBO.git@master
2 | scikit-optimize==0.8.dev0
3 | pyaml>=16.9


--------------------------------------------------------------------------------
/submissions/space-decay/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.stats as ss
 3 | 
 4 | 
 5 | def order_stats(X):
 6 |   _, idx, cnt = np.unique(X, return_inverse=True, return_counts=True)
 7 |   obs = np.cumsum(cnt)  # Need to do it this way due to ties
 8 |   o_stats = obs[idx]
 9 |   return o_stats
10 | 
11 | 
12 | def copula_standardize(X):
13 |   X = np.nan_to_num(np.asarray(X))  # Replace inf by something large
14 |   assert X.ndim == 1 and np.all(np.isfinite(X))
15 |   o_stats = order_stats(X)
16 |   quantile = np.true_divide(o_stats, len(X) + 1)
17 |   X_ss = ss.norm.ppf(quantile)
18 |   return X_ss
19 | 


--------------------------------------------------------------------------------
/prepare_upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | set -o pipefail
 5 | 
 6 | # Input args
 7 | CODE_DIR=$1
 8 | NAME=$2
 9 | 
10 | # Eliminate final slash
11 | CODE_DIR=$(dirname $CODE_DIR)/$(basename $CODE_DIR)
12 | 
13 | # Copy in provided files
14 | cp -r -n $CODE_DIR ./$NAME
15 | 
16 | # Make a blank req file if none provided
17 | REQ_FILE=./$NAME/requirements.txt
18 | touch $REQ_FILE
19 | 
20 | # Download all the wheels/tar balls with our docker as the target
21 | pip download -r $REQ_FILE -d ./$NAME --python-version 36 --implementation cp --platform manylinux1_x86_64 --abi cp36m --no-deps
22 | 
23 | # Test zip does not exist yet to avoid clobber
24 | ! test -f $NAME.zip
25 | 
26 | # Build the zip with correct directory structure
27 | (cd $NAME && zip -r ../$NAME.zip ./*)
28 | 
29 | # Display final output for user at end
30 | set +x
31 | 
32 | echo "----------------------------------------------------------------"
33 | echo "Built achive for upload"
34 | unzip -l ./$NAME.zip
35 | 
36 | echo "For scoring, upload $NAME.zip at address:"
37 | echo "https://bbochallenge.com/my-submissions"
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # JetBrains Research's Solution for Black-Box Optimization Challenge
 2 | 
 3 | This is the code for our solution to the [NeurIPS 2020 Black-Box Optimization Challenge](https://bbochallenge.com/).
 4 | 
 5 | Our solution is described in the "Solving Black-Box Optimization Challenge via Learning Search Space Partition for Local Bayesian Optimization" paper.
 6 | 
 7 | ## Final Results
 8 | 
 9 | Our approach scored 92.509 in the finals and ranked 3rd overall!
10 | 
11 | ![finals](./images/finals.png)
12 | 
13 | ## Team Members
14 | 
15 | * Mikita Sazanovich (github: [@niksaz](https://github.com/niksaz))
16 | * Anastasiya Nikolskaya (github: [@nuvard](https://github.com/nuvard))
17 | * Yury Belousov (github: [@bruce-willis](https://github.com/bruce-willis))
18 | * Aleksei Shpilman
19 | 
20 | ## Citing us
21 | 
22 | The paper is available at: https://arxiv.org/pdf/2012.10335.pdf (extended version from Proceedings of Machine Learning Research at: http://proceedings.mlr.press/v133/sazanovich21a.html).
23 | 
24 | If you want to cite this code, please use the following:
25 | 
26 | ```
27 | @misc{sazanovich2020solving,
28 |       title={Solving Black-Box Optimization Challenge via Learning Search Space Partition for Local Bayesian Optimization}, 
29 |       author={Mikita Sazanovich and Anastasiya Nikolskaya and Yury Belousov and Aleksei Shpilman},
30 |       year={2020},
31 |       eprint={2012.10335},
32 |       archivePrefix={arXiv},
33 |       primaryClass={cs.LG}
34 | }
35 | ```
36 | 
37 | ## License
38 | 
39 | Our implementation is released under [Apache License 2.0](./LICENSE) license except for the code derived from TuRBO.
40 | 


--------------------------------------------------------------------------------
/run_local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SECONDS=0
 4 | 
 5 | set -ex
 6 | set -o pipefail
 7 | 
 8 | # Up-to-date competition settings
 9 | N_STEP=16
10 | N_BATCH=8
11 | 
12 | # Outdated competition settings
13 | # N_STEP=32
14 | # N_BATCH=8
15 | 
16 | # For a fast experiment
17 | # N_STEP=15
18 | # N_BATCH=1
19 | 
20 | # Input args
21 | CODE_DIR=$1
22 | N_REPEAT=$2
23 | 
24 | # Where output goes
25 | DB_ROOT=./output
26 | DBID=run_$(date +"%Y%m%d_%H%M%S")
27 | 
28 | # Setup vars
29 | OPT=$(basename $CODE_DIR)
30 | OPT_ROOT=$(dirname $CODE_DIR)
31 | 
32 | # Check that bayesmark is installed in this environment
33 | which bayesmark-init
34 | which bayesmark-launch
35 | which bayesmark-exp
36 | which bayesmark-agg
37 | which bayesmark-anal
38 | 
39 | # Ensure output folder exists
40 | mkdir -p $DB_ROOT
41 | 
42 | # Copy the baseline file in, we can skip this but we must include RandomSearch in the -o list
43 | ! test -d $DB_ROOT/$DBID/  # Check the folder does not yet exist
44 | bayesmark-init -dir $DB_ROOT -b $DBID
45 | cp ./input/baseline-$N_STEP-$N_BATCH.json $DB_ROOT/$DBID/derived/baseline.json
46 | 
47 | # By default, runs on all models (-c), data (-d), metrics (-m)
48 | bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o $OPT --opt-root $OPT_ROOT -v -c SVM DT -d boston wine
49 | # To run on all problems use instead (slower):
50 | # bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o $OPT --opt-root $OPT_ROOT -v
51 | 
52 | # Now aggregate the results
53 | bayesmark-agg -dir $DB_ROOT -b $DBID
54 | # And analyze the scores
55 | bayesmark-anal -dir $DB_ROOT -b $DBID -v
56 | 
57 | echo "Time spent:" $SECONDS
58 | 


--------------------------------------------------------------------------------
/environment.txt:
--------------------------------------------------------------------------------
 1 | # Pinned requirements used in the docker image (valohai/bbochallenge:20200821-57e60f9) that executes submissions (Python 3.6.12)
 2 | absl-py==0.9.0
 3 | astunparse==1.6.3
 4 | attrs==19.3.0
 5 | bayesian-optimization==0.6.0
 6 | bayesmark==0.0.7
 7 | botorch==0.2.1
 8 | cachetools==4.1.0
 9 | certifi==2020.4.5.1
10 | chardet==3.0.4
11 | cma==3.0.3
12 | coverage==5.1
13 | cycler==0.10.0
14 | decorator==4.4.2
15 | dill==0.3.1.1
16 | fn==0.4.3
17 | future==0.18.2
18 | gast==0.3.3
19 | genty==1.3.2
20 | gitdb==4.0.5
21 | GitPython==3.1.3
22 | google-auth==1.14.3
23 | google-auth-oauthlib==0.4.1
24 | google-pasta==0.2.0
25 | gpytorch==1.1.1
26 | grpcio==1.29.0
27 | h5py==2.10.0
28 | hyperopt==0.1.1
29 | idna==2.9
30 | importlib-metadata==1.6.0
31 | joblib==0.14.1
32 | Keras==2.3.1
33 | Keras-Applications==1.0.8
34 | Keras-Preprocessing==1.1.2
35 | kiwisolver==1.2.0
36 | lightgbm==2.3.1
37 | Markdown==3.2.2
38 | matplotlib==3.2.1
39 | more-itertools==8.2.0
40 | mypy==0.770
41 | mypy-extensions==0.4.3
42 | networkx==2.4
43 | nevergrad==0.1.4
44 | nose==1.3.7
45 | nose-timer==1.0.0
46 | numpy==1.18.5
47 | oauthlib==3.1.0
48 | opentuner==0.8.2
49 | opt-einsum==3.2.1
50 | packaging==20.3
51 | pandas==1.0.5
52 | pathvalidate==2.3.0
53 | pluggy==0.13.1
54 | POAP==0.1.26
55 | protobuf==3.12.0
56 | py==1.8.1
57 | pyasn1==0.4.8
58 | pyasn1-modules==0.2.8
59 | pyDOE2==1.3.0
60 | pymongo==3.10.1
61 | pyparsing==2.4.7
62 | pySOT==0.2.3
63 | pytest==5.4.2
64 | python-dateutil==2.8.1
65 | pytz==2020.1
66 | PyYAML==5.3.1
67 | requests==2.23.0
68 | requests-oauthlib==1.3.0
69 | rsa==4.0
70 | scikit-learn==0.20.2
71 | scikit-optimize==0.5.2
72 | scipy==1.4.1
73 | six==1.14.0
74 | smmap==3.0.4
75 | SQLAlchemy==1.3.16
76 | tensorboard==2.2.1
77 | tensorboard-plugin-wit==1.6.0.post3
78 | tensorflow==2.2.0
79 | tensorflow-estimator==2.2.0
80 | tensorflow-hub==0.8.0
81 | termcolor==1.1.0
82 | torch==1.5.0
83 | typed-ast==1.4.1
84 | typing-extensions==3.7.4.2
85 | urllib3==1.25.9
86 | wcwidth==0.1.9
87 | Werkzeug==1.0.1
88 | wrapt==1.12.1
89 | xarray==0.15.1
90 | xgboost==1.1.0
91 | xlrd==1.2.0
92 | xlwt==1.3.0
93 | zipp==3.1.0
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Specific to this repo
  2 | output/
  3 | archives/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | .idea/
136 | 


--------------------------------------------------------------------------------
/LICENSE.turbo:
--------------------------------------------------------------------------------
 1 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by the text below.
 2 | 
 3 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
 4 | 
 5 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
 6 | 
 7 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
 8 | 
 9 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
10 | 
11 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under this License.
12 | 
13 | This License governs use of the accompanying Work, and your use of the Work constitutes acceptance of this License.
14 | 
15 | You may use this Work for any non-commercial purpose, subject to the restrictions in this License. Some purposes which can be non-commercial are teaching, academic research, and personal experimentation. You may also distribute this Work with books or other teaching materials, or publish the Work on websites, that are intended to teach the use of the Work.
16 | 
17 | You may not use or distribute this Work, or any derivative works, outputs, or results from the Work, in any form for commercial purposes. Non-exhaustive examples of commercial purposes would be running business operations, licensing, leasing, or selling the Work, or distributing the Work for use with commercial products.
18 | 
19 | You may modify this Work and distribute the modified Work for non-commercial purposes, however, you may not grant rights to the Work or derivative works that are broader than or in conflict with those provided by this License. For example, you may not distribute modifications of the Work under terms that would permit commercial use, or under terms that purport to require the Work or derivative works to be sublicensed to others.
20 | 
21 | In return, we require that you agree:
22 | 
23 | 1. Not to remove any copyright or other notices from the Work.
24 | 
25 | 2. That if you distribute the Work in Source or Object form, you will include a verbatim copy of this License.
26 | 
27 | 3. That if you distribute derivative works of the Work in Source form, you do so only under a license that includes all of the provisions of this License and is not in conflict with this License, and if you distribute derivative works of the Work solely in Object form you do so only under a license that complies with this License.
28 | 
29 | 4. That if you have modified the Work or created derivative works from the Work, and distribute such modifications or derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Work. Such notices must state: (i) that you have changed the Work; and (ii) the date of any changes.
30 | 
31 | 5. If you publicly use the Work or any output or result of the Work, you will provide a notice with such use that provides any person who uses, views, accesses, interacts with, or is otherwise exposed to the Work (i) with information of the nature of the Work, (ii) with a link to the Work, and (iii) a notice that the Work is available under this License.
32 | 
33 | 6. THAT THE WORK COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS.
34 | 
35 | 7. THAT NEITHER UBER TECHNOLOGIES, INC. NOR ANY OF ITS AFFILIATES, SUPPLIERS, SUCCESSORS, NOR ASSIGNS WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE WORK OR THIS LICENSE, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS.
36 | 
37 | 8. That if you sue anyone over patents that you think may apply to the Work or anyone's use of the Work, your license to the Work ends automatically.
38 | 
39 | 9. That your rights under the License end automatically if you breach it in any way.
40 | 
41 | 10. Uber Technologies, Inc. reserves all rights not expressly granted to you in this License.
42 | 


--------------------------------------------------------------------------------
/submissions/space-decay/gp.py:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
  3 | #                                                                             #
  4 | # Licensed under the Uber Non-Commercial License (the "License");             #
  5 | # you may not use this file except in compliance with the License.            #
  6 | # You may obtain a copy of the License at the root directory of this project. #
  7 | #                                                                             #
  8 | # See the License for the specific language governing permissions and         #
  9 | # limitations under the License.                                              #
 10 | ###############################################################################
 11 | 
 12 | import math
 13 | 
 14 | import gpytorch
 15 | import numpy as np
 16 | import torch
 17 | from gpytorch.constraints.constraints import Interval
 18 | from gpytorch.distributions import MultivariateNormal
 19 | from gpytorch.kernels import MaternKernel, ScaleKernel, CylindricalKernel
 20 | from gpytorch.likelihoods import GaussianLikelihood
 21 | from gpytorch.means import ConstantMean
 22 | from gpytorch.mlls import ExactMarginalLogLikelihood
 23 | from gpytorch.models import ExactGP
 24 | #from botorch.models import SingleTaskGP, FixedNoiseGP
 25 | 
 26 | 
 27 | # GP Model
 28 | class GP(ExactGP):
 29 |     def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims):
 30 |         super(GP, self).__init__(train_x, train_y, likelihood)
 31 |         self.ard_dims = ard_dims
 32 |         self.mean_module = ConstantMean()
 33 |         base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5)
 34 |         self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint)
 35 | 
 36 |     def forward(self, x):
 37 |         mean_x = self.mean_module(x)
 38 |         covar_x = self.covar_module(x)
 39 |         return MultivariateNormal(mean_x, covar_x)
 40 | 
 41 | 
 42 | def map_box_ball(x, dim):
 43 |     #dim = x.shape[1]
 44 |     # from borders to [-1, 1]^d
 45 |     x = (x - 0.5) * 2
 46 |     # from [-1, 1]^d to Ball(0, 1)
 47 |     x = x / np.sqrt(dim)
 48 |     return x
 49 | 
 50 | 
 51 | def map_ball_box(x, dim):
 52 |     #dim = len(borders)
 53 |     # from Ball(0, 1) to [-1, 1]^d
 54 |     x = np.sqrt(dim) * x
 55 |     # from [-1, 1]^d to borders
 56 |     x = x * 0.5 + 0.5
 57 |     return x
 58 | 
 59 | 
 60 | class KumaAlphaPrior(gpytorch.priors.Prior):
 61 |     def __init__(self):
 62 |         super(KumaAlphaPrior, self).__init__()
 63 |         self.log_a_max = np.log(2)
 64 |         pass
 65 | 
 66 |     def log_prob(self, x):
 67 |         x = torch.log(x)
 68 |         loc = torch.tensor(0.).to(x)
 69 |         scale = torch.tensor(0.01).to(x)
 70 |         return torch.sum(torch.log(
 71 |             torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).exp() + 0.5 / self.log_a_max
 72 |         ))
 73 | 
 74 | 
 75 | class KumaBetaPrior(gpytorch.priors.Prior):
 76 |     def __init__(self):
 77 |         super(KumaBetaPrior, self).__init__()
 78 |         self.log_b_max = np.log(2)
 79 |         pass
 80 | 
 81 |     def log_prob(self, x):
 82 |         x = torch.log(x)
 83 |         loc = torch.tensor(0.).to(x)
 84 |         scale = torch.tensor(0.01).to(x)
 85 |         return torch.sum(torch.log(
 86 |             torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).exp() + 0.5 / self.log_b_max
 87 |         ))
 88 | 
 89 | 
 90 | class AngularWeightsPrior(gpytorch.priors.Prior):
 91 |     def __init__(self):
 92 |         super(AngularWeightsPrior, self).__init__()
 93 | 
 94 |     def log_prob(self, x):
 95 |         x = torch.log(x)
 96 |         loc = torch.tensor(0.).to(x)
 97 |         scale = torch.tensor(2.).to(x)
 98 |         return torch.distributions.Normal(loc=loc, scale=scale).log_prob(x).sum()
 99 | 
100 | 
101 | class CustomCylindricalGP(ExactGP):  # FixedNoiseGP SingleTaskGP
102 |     def __init__(self, train_X, train_Y, likelihood, dim, lengthscale_constraint, outputscale_constraint, ard_dims):
103 |         # squeeze output dim before passing train_Y to ExactGP
104 |         super().__init__(train_X, train_Y, likelihood)  # GaussianLikelihood())  # GaussianLikelihood() noise.squeeze(-1)
105 |         self.dim = dim
106 |         self.mean_module = ConstantMean()
107 |         self.covar_module = ScaleKernel(CylindricalKernel(
108 |             num_angular_weights=ard_dims,
109 |             alpha_prior=KumaAlphaPrior(),
110 |             alpha_constraint=gpytorch.constraints.constraints.Interval(lower_bound=0.5, upper_bound=1.),
111 |             beta_prior=KumaBetaPrior(),
112 |             beta_constraint=gpytorch.constraints.constraints.Interval(lower_bound=1., upper_bound=2.),
113 |             radial_base_kernel=MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=1, nu=2.5),
114 |             # angular_weights_constraint=gpytorch.constraints.constraints.Interval(lower_bound=np.exp(-12.),
115 |             #                                                                      upper_bound=np.exp(20.)),
116 |             angular_weights_prior=AngularWeightsPrior()
117 |         ))
118 |         self.to(train_X)  # make sure we're on the right device/dtype
119 | 
120 |     def forward(self, x):
121 |         x = map_box_ball(x, self.dim)
122 |         mean_x = self.mean_module(x)
123 |         covar_x = self.covar_module(x)
124 |         return MultivariateNormal(mean_x, covar_x)
125 | 
126 | 
127 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}, use_cylinder=True, dim=1):
128 |     """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized."""
129 |     assert train_x.ndim == 2
130 |     assert train_y.ndim == 1
131 |     assert train_x.shape[0] == train_y.shape[0]
132 | 
133 |     # Create hyper parameter bounds
134 |     noise_constraint = Interval(5e-4, 0.2)
135 |     if use_ard:
136 |         lengthscale_constraint = Interval(0.005, 2.0)
137 |     else:
138 |         lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1]))  # [0.005, sqrt(dim)]
139 |     outputscale_constraint = Interval(0.05, 20.0)
140 | 
141 |     # Create models
142 |     likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype)
143 |     ard_dims = train_x.shape[1] if use_ard else None
144 |     if use_cylinder:
145 |         model = CustomCylindricalGP(
146 |             train_X=train_x,
147 |             train_Y=train_y,
148 |             likelihood=likelihood,
149 |             dim=dim,
150 |             lengthscale_constraint=lengthscale_constraint,
151 |             outputscale_constraint=outputscale_constraint,
152 |             ard_dims=ard_dims,
153 |         ).to(device=train_x.device, dtype=train_x.dtype)
154 |     else:
155 |         model = GP(
156 |             train_x=train_x,
157 |             train_y=train_y,
158 |             likelihood=likelihood,
159 |             lengthscale_constraint=lengthscale_constraint,
160 |             outputscale_constraint=outputscale_constraint,
161 |             ard_dims=ard_dims,
162 |         ).to(device=train_x.device, dtype=train_x.dtype)
163 | 
164 | 
165 |     # Find optimal model hyperparameters
166 |     model.train()
167 |     likelihood.train()
168 | 
169 |     # "Loss" for GPs - the marginal log likelihood
170 |     mll = ExactMarginalLogLikelihood(likelihood, model)
171 | 
172 |     # Initialize model hypers
173 |     if hypers:
174 |         model.load_state_dict(hypers)
175 |     else:
176 |         hypers = {}
177 |         if not use_cylinder:
178 |             hypers["covar_module.outputscale"] = 1.0
179 |             hypers["covar_module.base_kernel.lengthscale"] = 0.5
180 |             hypers["likelihood.noise"] = 0.005
181 |         model.initialize(**hypers)
182 | 
183 |     # Use the adam optimizer
184 |     optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)
185 | 
186 |     for _ in range(num_steps):
187 |         optimizer.zero_grad()
188 |         output = model(train_x)
189 |         loss = -mll(output, train_y)
190 |         loss.backward()
191 |         optimizer.step()
192 | 
193 |     # Switch to eval mode
194 |     model.eval()
195 |     likelihood.eval()
196 | 
197 |     return model
198 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |     `submissions/space-decay/gp.py` and
  2 |     `submissions/space-decay/turbo1.py` are derived
  3 |    from https://github.com/uber-research/TuRBO on October 15, 2020, hence
  4 |    it is distributed under LICENSE.turbo. The rest of the files are
  5 |    distributed under Apache License Version 2.0.
  6 | 
  7 | 
  8 |                                 Apache License
  9 |                            Version 2.0, January 2004
 10 |                         http://www.apache.org/licenses/
 11 | 
 12 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 13 | 
 14 |    1. Definitions.
 15 | 
 16 |       "License" shall mean the terms and conditions for use, reproduction,
 17 |       and distribution as defined by Sections 1 through 9 of this document.
 18 | 
 19 |       "Licensor" shall mean the copyright owner or entity authorized by
 20 |       the copyright owner that is granting the License.
 21 | 
 22 |       "Legal Entity" shall mean the union of the acting entity and all
 23 |       other entities that control, are controlled by, or are under common
 24 |       control with that entity. For the purposes of this definition,
 25 |       "control" means (i) the power, direct or indirect, to cause the
 26 |       direction or management of such entity, whether by contract or
 27 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 28 |       outstanding shares, or (iii) beneficial ownership of such entity.
 29 | 
 30 |       "You" (or "Your") shall mean an individual or Legal Entity
 31 |       exercising permissions granted by this License.
 32 | 
 33 |       "Source" form shall mean the preferred form for making modifications,
 34 |       including but not limited to software source code, documentation
 35 |       source, and configuration files.
 36 | 
 37 |       "Object" form shall mean any form resulting from mechanical
 38 |       transformation or translation of a Source form, including but
 39 |       not limited to compiled object code, generated documentation,
 40 |       and conversions to other media types.
 41 | 
 42 |       "Work" shall mean the work of authorship, whether in Source or
 43 |       Object form, made available under the License, as indicated by a
 44 |       copyright notice that is included in or attached to the work
 45 |       (an example is provided in the Appendix below).
 46 | 
 47 |       "Derivative Works" shall mean any work, whether in Source or Object
 48 |       form, that is based on (or derived from) the Work and for which the
 49 |       editorial revisions, annotations, elaborations, or other modifications
 50 |       represent, as a whole, an original work of authorship. For the purposes
 51 |       of this License, Derivative Works shall not include works that remain
 52 |       separable from, or merely link (or bind by name) to the interfaces of,
 53 |       the Work and Derivative Works thereof.
 54 | 
 55 |       "Contribution" shall mean any work of authorship, including
 56 |       the original version of the Work and any modifications or additions
 57 |       to that Work or Derivative Works thereof, that is intentionally
 58 |       submitted to Licensor for inclusion in the Work by the copyright owner
 59 |       or by an individual or Legal Entity authorized to submit on behalf of
 60 |       the copyright owner. For the purposes of this definition, "submitted"
 61 |       means any form of electronic, verbal, or written communication sent
 62 |       to the Licensor or its representatives, including but not limited to
 63 |       communication on electronic mailing lists, source code control systems,
 64 |       and issue tracking systems that are managed by, or on behalf of, the
 65 |       Licensor for the purpose of discussing and improving the Work, but
 66 |       excluding communication that is conspicuously marked or otherwise
 67 |       designated in writing by the copyright owner as "Not a Contribution."
 68 | 
 69 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 70 |       on behalf of whom a Contribution has been received by Licensor and
 71 |       subsequently incorporated within the Work.
 72 | 
 73 |    2. Grant of Copyright License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       copyright license to reproduce, prepare Derivative Works of,
 77 |       publicly display, publicly perform, sublicense, and distribute the
 78 |       Work and such Derivative Works in Source or Object form.
 79 | 
 80 |    3. Grant of Patent License. Subject to the terms and conditions of
 81 |       this License, each Contributor hereby grants to You a perpetual,
 82 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 83 |       (except as stated in this section) patent license to make, have made,
 84 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 85 |       where such license applies only to those patent claims licensable
 86 |       by such Contributor that are necessarily infringed by their
 87 |       Contribution(s) alone or by combination of their Contribution(s)
 88 |       with the Work to which such Contribution(s) was submitted. If You
 89 |       institute patent litigation against any entity (including a
 90 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 91 |       or a Contribution incorporated within the Work constitutes direct
 92 |       or contributory patent infringement, then any patent licenses
 93 |       granted to You under this License for that Work shall terminate
 94 |       as of the date such litigation is filed.
 95 | 
 96 |    4. Redistribution. You may reproduce and distribute copies of the
 97 |       Work or Derivative Works thereof in any medium, with or without
 98 |       modifications, and in Source or Object form, provided that You
 99 |       meet the following conditions:
100 | 
101 |       (a) You must give any other recipients of the Work or
102 |           Derivative Works a copy of this License; and
103 | 
104 |       (b) You must cause any modified files to carry prominent notices
105 |           stating that You changed the files; and
106 | 
107 |       (c) You must retain, in the Source form of any Derivative Works
108 |           that You distribute, all copyright, patent, trademark, and
109 |           attribution notices from the Source form of the Work,
110 |           excluding those notices that do not pertain to any part of
111 |           the Derivative Works; and
112 | 
113 |       (d) If the Work includes a "NOTICE" text file as part of its
114 |           distribution, then any Derivative Works that You distribute must
115 |           include a readable copy of the attribution notices contained
116 |           within such NOTICE file, excluding those notices that do not
117 |           pertain to any part of the Derivative Works, in at least one
118 |           of the following places: within a NOTICE text file distributed
119 |           as part of the Derivative Works; within the Source form or
120 |           documentation, if provided along with the Derivative Works; or,
121 |           within a display generated by the Derivative Works, if and
122 |           wherever such third-party notices normally appear. The contents
123 |           of the NOTICE file are for informational purposes only and
124 |           do not modify the License. You may add Your own attribution
125 |           notices within Derivative Works that You distribute, alongside
126 |           or as an addendum to the NOTICE text from the Work, provided
127 |           that such additional attribution notices cannot be construed
128 |           as modifying the License.
129 | 
130 |       You may add Your own copyright statement to Your modifications and
131 |       may provide additional or different license terms and conditions
132 |       for use, reproduction, or distribution of Your modifications, or
133 |       for any such Derivative Works as a whole, provided Your use,
134 |       reproduction, and distribution of the Work otherwise complies with
135 |       the conditions stated in this License.
136 | 
137 |    5. Submission of Contributions. Unless You explicitly state otherwise,
138 |       any Contribution intentionally submitted for inclusion in the Work
139 |       by You to the Licensor shall be under the terms and conditions of
140 |       this License, without any additional terms or conditions.
141 |       Notwithstanding the above, nothing herein shall supersede or modify
142 |       the terms of any separate license agreement you may have executed
143 |       with Licensor regarding such Contributions.
144 | 
145 |    6. Trademarks. This License does not grant permission to use the trade
146 |       names, trademarks, service marks, or product names of the Licensor,
147 |       except as required for reasonable and customary use in describing the
148 |       origin of the Work and reproducing the content of the NOTICE file.
149 | 
150 |    7. Disclaimer of Warranty. Unless required by applicable law or
151 |       agreed to in writing, Licensor provides the Work (and each
152 |       Contributor provides its Contributions) on an "AS IS" BASIS,
153 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
154 |       implied, including, without limitation, any warranties or conditions
155 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
156 |       PARTICULAR PURPOSE. You are solely responsible for determining the
157 |       appropriateness of using or redistributing the Work and assume any
158 |       risks associated with Your exercise of permissions under this License.
159 | 
160 |    8. Limitation of Liability. In no event and under no legal theory,
161 |       whether in tort (including negligence), contract, or otherwise,
162 |       unless required by applicable law (such as deliberate and grossly
163 |       negligent acts) or agreed to in writing, shall any Contributor be
164 |       liable to You for damages, including any direct, indirect, special,
165 |       incidental, or consequential damages of any character arising as a
166 |       result of this License or out of the use or inability to use the
167 |       Work (including but not limited to damages for loss of goodwill,
168 |       work stoppage, computer failure or malfunction, or any and all
169 |       other commercial damages or losses), even if such Contributor
170 |       has been advised of the possibility of such damages.
171 | 
172 |    9. Accepting Warranty or Additional Liability. While redistributing
173 |       the Work or Derivative Works thereof, You may choose to offer,
174 |       and charge a fee for, acceptance of support, warranty, indemnity,
175 |       or other liability obligations and/or rights consistent with this
176 |       License. However, in accepting such obligations, You may act only
177 |       on Your own behalf and on Your sole responsibility, not on behalf
178 |       of any other Contributor, and only if You agree to indemnify,
179 |       defend, and hold each Contributor harmless for any liability
180 |       incurred by, or claims asserted against, such Contributor by reason
181 |       of your accepting any such warranty or additional liability.
182 | 
183 |    END OF TERMS AND CONDITIONS
184 | 
185 |    APPENDIX: How to apply the Apache License to your work.
186 | 
187 |       To apply the Apache License to your work, attach the following
188 |       boilerplate notice, with the fields enclosed by brackets "[]"
189 |       replaced with your own identifying information. (Don't include
190 |       the brackets!)  The text should be enclosed in the appropriate
191 |       comment syntax for the file format. We also recommend that a
192 |       file or class name and description of purpose be included on the
193 |       same "printed page" as the copyright notice for easier
194 |       identification within third-party archives.
195 | 
196 |    Copyright 2020 JetBrains Research
197 | 
198 |    Licensed under the Apache License, Version 2.0 (the "License");
199 |    you may not use this file except in compliance with the License.
200 |    You may obtain a copy of the License at
201 | 
202 |        http://www.apache.org/licenses/LICENSE-2.0
203 | 
204 |    Unless required by applicable law or agreed to in writing, software
205 |    distributed under the License is distributed on an "AS IS" BASIS,
206 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
207 |    See the License for the specific language governing permissions and
208 |    limitations under the License.
209 | 


--------------------------------------------------------------------------------
/submissions/space-decay/turbo1.py:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
  3 | #                                                                             #
  4 | # Licensed under the Uber Non-Commercial License (the "License");             #
  5 | # you may not use this file except in compliance with the License.            #
  6 | # You may obtain a copy of the License at the root directory of this project. #
  7 | #                                                                             #
  8 | # See the License for the specific language governing permissions and         #
  9 | # limitations under the License.                                              #
 10 | ###############################################################################
 11 | 
 12 | import math
 13 | import sys
 14 | from copy import deepcopy
 15 | 
 16 | import gpytorch
 17 | import numpy as np
 18 | import torch
 19 | from torch.quasirandom import SobolEngine
 20 | 
 21 | from gp import train_gp
 22 | from turbo.utils import from_unit_cube, latin_hypercube, to_unit_cube
 23 | 
 24 | 
 25 | class Turbo1:
 26 |     """The TuRBO-1 algorithm.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     f : function handle
 31 |     lb : Lower variable bounds, numpy.array, shape (d,).
 32 |     ub : Upper variable bounds, numpy.array, shape (d,).
 33 |     n_init : Number of initial points (2*dim is recommended), int.
 34 |     max_evals : Total evaluation budget, int.
 35 |     batch_size : Number of points in each batch, int.
 36 |     verbose : If you want to print information about the optimization progress, bool.
 37 |     use_ard : If you want to use ARD for the GP kernel.
 38 |     max_cholesky_size : Largest number of training points where we use Cholesky, int
 39 |     n_training_steps : Number of training steps for learning the GP hypers, int
 40 |     min_cuda : We use float64 on the CPU if we have this or fewer datapoints
 41 |     device : Device to use for GP fitting ("cpu" or "cuda")
 42 |     dtype : Dtype to use for GP fitting ("float32" or "float64")
 43 | 
 44 |     Example usage:
 45 |         turbo1 = Turbo1(f=f, lb=lb, ub=ub, n_init=n_init, max_evals=max_evals)
 46 |         turbo1.optimize()  # Run optimization
 47 |         X, fX = turbo1.X, turbo1.fX  # Evaluated points
 48 |     """
 49 | 
 50 |     def __init__(
 51 |         self,
 52 |         f,
 53 |         lb,
 54 |         ub,
 55 |         n_init,
 56 |         max_evals,
 57 |         batch_size=1,
 58 |         verbose=True,
 59 |         use_ard=True,
 60 |         max_cholesky_size=2000,
 61 |         n_training_steps=50,
 62 |         min_cuda=1024,
 63 |         device="cpu",
 64 |         dtype="float64",
 65 |         use_cylinder=False,
 66 |         budget=16*8,
 67 |         use_decay=False,
 68 |         decay_threshold=0.5,
 69 |         decay_alpha=0.8,
 70 |         use_pull=0,
 71 |         use_lcb=0,
 72 |         kappa=2.0,
 73 |         length_min=0.5**7,
 74 |         length_max=1.8,
 75 |         length_init=0.8,
 76 |         length_multiplier=2.0,
 77 |         used_budget=0
 78 |     ):
 79 | 
 80 |         # Very basic input checks
 81 |         assert lb.ndim == 1 and ub.ndim == 1
 82 |         assert len(lb) == len(ub)
 83 |         assert np.all(ub > lb)
 84 |         assert max_evals > 0 and isinstance(max_evals, int)
 85 |         assert n_init > 0 and isinstance(n_init, int)
 86 |         assert batch_size > 0 and isinstance(batch_size, int)
 87 |         assert isinstance(verbose, bool) and isinstance(use_ard, bool)
 88 |         assert max_cholesky_size >= 0 and isinstance(batch_size, int)
 89 |         assert n_training_steps >= 30 and isinstance(n_training_steps, int)
 90 |         assert max_evals > n_init and max_evals > batch_size
 91 |         assert device == "cpu" or device == "cuda"
 92 |         assert dtype == "float32" or dtype == "float64"
 93 |         if device == "cuda":
 94 |             assert torch.cuda.is_available(), "can't use cuda if it's not available"
 95 | 
 96 |         # Save function information
 97 |         self.f = f
 98 |         self.dim = len(lb)
 99 |         self.lb = lb
100 |         self.ub = ub
101 | 
102 |         # Settings
103 |         self.n_init = n_init
104 |         self.max_evals = max_evals
105 |         self.batch_size = batch_size
106 |         self.verbose = verbose
107 |         self.use_ard = use_ard
108 |         self.max_cholesky_size = max_cholesky_size
109 |         self.n_training_steps = n_training_steps
110 | 
111 |         #cylinder
112 |         self.use_cylinder = use_cylinder
113 | 
114 |         #decay
115 |         self.budget = budget
116 |         self.used_budget = used_budget
117 |         self.use_decay = use_decay
118 |         self.decay_alpha = decay_alpha
119 |         self.decay_threshold = decay_threshold
120 | 
121 |         # Hyperparameters
122 |         self.mean = np.zeros((0, 1))
123 |         self.signal_var = np.zeros((0, 1))
124 |         self.noise_var = np.zeros((0, 1))
125 |         self.lengthscales = np.zeros((0, self.dim)) if self.use_ard else np.zeros((0, 1))
126 | 
127 |         # pull
128 |         self.use_pull = use_pull
129 |         self.prob_pull = np.ones((0, self.dim)) / self.dim
130 | 
131 |         #lcb
132 |         self.use_lcb = use_lcb
133 |         self.kappa = kappa
134 | 
135 |         # Tolerances and counters
136 |         self.n_cand = min(100 * self.dim, 5000)
137 |         self.failtol = np.ceil(np.max([4.0 / batch_size, self.dim / batch_size]))
138 |         self.succtol = 3
139 |         self.n_evals = 0
140 | 
141 |         # Trust region sizes
142 |         self.length_min = length_min
143 |         self.length_max = length_max
144 |         self.length_init = length_init
145 |         self.length_multiplier = length_multiplier
146 | 
147 |         # Save the full history
148 |         self.X = np.zeros((0, self.dim))
149 |         self.fX = np.zeros((0, 1))
150 | 
151 |         # Device and dtype for GPyTorch
152 |         self.min_cuda = min_cuda
153 |         self.dtype = torch.float32 if dtype == "float32" else torch.float64
154 |         self.device = torch.device("cuda") if device == "cuda" else torch.device("cpu")
155 |         if self.verbose:
156 |             print("Using dtype = %s \nUsing device = %s" % (self.dtype, self.device))
157 |             sys.stdout.flush()
158 | 
159 |         # Initialize parameters
160 |         self._restart()
161 | 
162 |     def _restart(self):
163 |         self._X = []
164 |         self._fX = []
165 |         self._predictions = []
166 |         self.failcount = 0
167 |         self.succcount = 0
168 |         self.initial = 1
169 |         self.pull = 1
170 |         self.length = self.length_init
171 |         self.prob_pull = np.ones(self.dim) / self.dim
172 |         self.prob_push = np.ones(self.dim) / self.dim
173 |         self.init_iter = True
174 |         #print(self.prob_pull)
175 | 
176 |     def _adjust_length(self, fX_next):
177 |         if np.min(fX_next) < np.min(self._fX) - 1e-3 * math.fabs(np.min(self._fX)):
178 |             self.succcount += 1
179 |             self.failcount = 0
180 |         else:
181 |             self.succcount = 0
182 |             self.failcount += 1
183 |         if self.succcount == self.succtol:  # Expand trust region
184 |             self.length = min([self.length_multiplier * self.length, self.length_max])
185 |             self.succcount = 0
186 |             self.pull = 0
187 |         elif self.failcount == self.failtol:  # Shrink trust region
188 |             self.length /= self.length_multiplier
189 |             self.failcount = 0
190 |             self.pull = 1
191 |         print('Use or not decay: ', self.use_decay)
192 |         if self.use_decay:
193 |             print(self.used_budget)
194 |             if self.used_budget > self.decay_threshold * self.budget:
195 |                 print("Applying decay...")
196 |                 self.length *= self.decay_alpha #* min(np.random.lognormal(1, 2, 1), 1)
197 | 
198 |         diff_std = np.std(self.X - self.X[np.argmin(self.fX)], axis=0)
199 |         self.prob_pull = np.exp(diff_std) / np.exp(
200 |             np.std(self.X - self.X[np.argmin(self.fX)], axis=0)).sum()
201 |         c = 0.1 # regularizer
202 |         self.prob_push = np.exp(diff_std.max() - diff_std) / np.exp(
203 |             diff_std.max() - diff_std).sum()
204 | 
205 |     def _create_candidates(self, X, fX, length, n_training_steps, hypers, used_budget=None):
206 |         """Generate candidates assuming X has been scaled to [0,1]^d."""
207 |         # Pick the center as the point with the smallest function values
208 |         # NOTE: This may not be robust to noise, in which case the posterior mean of the GP can be used instead
209 |         if used_budget is not None:
210 |             self.used_budget = used_budget
211 |         assert X.min() >= 0.0 and X.max() <= 1.0
212 | 
213 |         # Standardize function values.
214 |         mu, sigma = np.median(fX), fX.std()
215 |         sigma = 1.0 if sigma < 1e-6 else sigma
216 |         fX = (deepcopy(fX) - mu) / sigma
217 | 
218 |         # Figure out what device we are running on
219 |         if len(X) < self.min_cuda:
220 |             device, dtype = torch.device("cpu"), torch.float64
221 |         else:
222 |             device, dtype = self.device, self.dtype
223 | 
224 |         # We use CG + Lanczos for training if we have enough data
225 |         with gpytorch.settings.max_cholesky_size(self.max_cholesky_size):
226 |             X_torch = torch.tensor(X).to(device=device, dtype=dtype)
227 |             y_torch = torch.tensor(fX).to(device=device, dtype=dtype)
228 |             gp = train_gp(
229 |                 train_x=X_torch, train_y=y_torch, use_ard=self.use_ard, num_steps=n_training_steps, hypers=hypers,
230 |                 use_cylinder=self.use_cylinder, dim=self.dim
231 |             )
232 | 
233 |             # Save state dict
234 |             hypers = gp.state_dict()
235 |         self._errors = self.fX - np.array(self._predictions)
236 |         # Create the trust region boundaries
237 |         x_center = X[fX.argmin().item(), :][None, :]
238 |         if not self.use_cylinder:
239 |             weights = gp.covar_module.base_kernel.lengthscale.cpu().detach().numpy().ravel()
240 |         else:
241 |             #weights = gp.covar_module.base_kernel.radial_base_kernel.lengthscale.cpu().detach().numpy().ravel()
242 |             weights = gp.covar_module.base_kernel.angular_weights.cpu().detach().numpy().ravel()
243 |         weights = weights / weights.mean()  # This will make the next line more stable
244 |         weights = weights / np.prod(np.power(weights, 1.0 / len(weights)))  # We now have weights.prod() = 1
245 |         #print('weights', weights)
246 |         # TODO: REMOVE
247 |         #prob_pert = np.log(self.budget - len(self.fX)) / np.log(self.budget)
248 |         #print('prob of pulling appliance:', prob_pert)
249 |        # appliance = np.random.choice((1, 0), p=(prob_pert, 1 - prob_pert))
250 |        # print('pull or not: ', appliance)
251 |         if self.use_pull == 1:
252 |             print("Applying pulling...")
253 |             if self.pull:
254 |                 print('Prob of pulling:', self.prob_pull)
255 |                 to_pull = np.random.choice(range(0,self.dim), size=min(self.dim, 2), p=self.prob_pull.flatten())
256 |                 weights[to_pull] *= 2
257 |             else:
258 |                 print('Prob of pushing:', self.prob_push)
259 |                 to_push = np.random.choice(range(0, self.dim), size=min(self.dim, 2), p=self.prob_push.flatten())
260 |                 weights[to_push] /= 2
261 |         lb = np.clip(x_center - weights * length / 2.0, 0.0, 1.0)
262 |         ub = np.clip(x_center + weights * length / 2.0, 0.0, 1.0)
263 |         #print('lb', lb)
264 |         #print('ub', ub)
265 |         self.cand_lb = lb
266 |         self.cand_ub = ub
267 | 
268 |         # Draw a Sobolev sequence in [lb, ub]
269 |         seed = np.random.randint(int(1e6))
270 |         sobol = SobolEngine(self.dim, scramble=True, seed=seed)
271 |         pert = sobol.draw(self.n_cand).to(dtype=dtype, device=device).cpu().detach().numpy()
272 |         pert = lb + (ub - lb) * pert
273 | 
274 |         # Create a perturbation mask
275 |         prob_perturb = min(20.0 / self.dim, 1.0)
276 |         mask = np.random.rand(self.n_cand, self.dim) <= prob_perturb
277 |         ind = np.where(np.sum(mask, axis=1) == 0)[0]
278 |         mask[ind, np.random.randint(0, self.dim - 1, size=len(ind))] = 1
279 | 
280 |         # Create candidate points
281 |         X_cand = x_center.copy() * np.ones((self.n_cand, self.dim))
282 |         X_cand[mask] = pert[mask]
283 | 
284 |         # Figure out what device we are running on
285 |         if len(X_cand) < self.min_cuda:
286 |             device, dtype = torch.device("cpu"), torch.float64
287 |         else:
288 |             device, dtype = self.device, self.dtype
289 | 
290 |         # We may have to move the GP to a new device
291 |         gp = gp.to(dtype=dtype, device=device)
292 | 
293 |         # We use Lanczos for sampling if we have enough data
294 |         with torch.no_grad(), gpytorch.settings.max_cholesky_size(self.max_cholesky_size):
295 |             X_cand_torch = torch.tensor(X_cand).to(device=device, dtype=dtype)
296 |             f_preds = gp.likelihood(gp(X_cand_torch))
297 |             self.f_var = f_preds.variance.cpu().detach().numpy()
298 |             #print(self.f_var.shape)
299 |             y_cand = f_preds.sample(torch.Size([self.batch_size])).t().cpu().detach().numpy()
300 |             #print(y_cand.shape)
301 |         self.gp = deepcopy(gp)
302 |         self.init_iter = False
303 |         # Remove the torch variables
304 |         del X_torch, y_torch, X_cand_torch, gp
305 | 
306 |         # De-standardize the sampled values
307 |         y_cand = mu + sigma * y_cand
308 |         #print(y_cand.shape)
309 |         return X_cand, y_cand, hypers
310 | 
311 |     def _select_candidates(self, X_cand, y_cand):
312 |         """Select candidates."""
313 |         X_next = np.ones((self.batch_size, self.dim))
314 |         _y_cand = deepcopy(y_cand)
315 |         if self.use_lcb:
316 |             print("Applying LCB...")
317 |             f_var = np.expand_dims(np.sqrt(self.f_var), 1).repeat(self.batch_size, axis=1)
318 |             #print(f_var.shape)
319 |             #print(_y_cand.shape)
320 |             _y_cand = y_cand - self.kappa * f_var
321 |         for i in range(self.batch_size):
322 |             # Pick the best point and make sure we never pick it again
323 |             indbest = np.argmin(_y_cand[:, i])
324 |             self._predictions.append(_y_cand[indbest, i])
325 |             X_next[i, :] = deepcopy(X_cand[indbest, :])
326 |             _y_cand[indbest, :] = np.inf
327 |         return X_next
328 | 


--------------------------------------------------------------------------------
/submissions/space-decay/sampler.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | from skopt.space import Categorical, Integer, Real
  4 | from skopt.sampler import InitialPointGenerator, Sobol, Halton, Lhs, Hammersly, Grid
  5 | from skopt.space import Space
  6 | from scipy.special import binom
  7 | from scipy.optimize import minimize
  8 | import numpy as np
  9 | from sklearn.utils import check_random_state
 10 | from scipy.interpolate import interp1d
 11 | 
 12 | def fix_sampler_seed(seed):
 13 |     if seed is not None:
 14 |         random.seed(seed)
 15 |         np.random.seed(seed)
 16 | 
 17 | def cook_initial_point_generator(generator, **kwargs):
 18 |     """Cook a default initial point generator.
 19 |     For the special generator called "random" the return value is None.
 20 |     Parameters
 21 |     ----------
 22 |     generator : "lhs", "sobol", "halton", "hammersly", "grid", "random" \
 23 |             or InitialPointGenerator instance"
 24 |         Should inherit from `skopt.sampler.InitialPointGenerator`.
 25 |     kwargs : dict
 26 |         Extra parameters provided to the generator at init time.
 27 |     """
 28 |     if generator is None:
 29 |         generator = "random"
 30 |     elif isinstance(generator, str):
 31 |         generator = generator.lower()
 32 |         if generator not in ["sobol", "halton", "hammersly", "lhs", "random",
 33 |                              "grid", "maxpro", "maxpro-gd"]:
 34 |             raise ValueError("Valid strings for the generator parameter "
 35 |                              " are: 'sobol', 'lhs', 'halton', 'hammersly',"
 36 |                              "'random', 'maxpro','maxpro-gd', or 'grid' not "
 37 |                              "%s." % generator)
 38 |     elif not isinstance(generator, InitialPointGenerator):
 39 |         raise ValueError("generator has to be an InitialPointGenerator."
 40 |                          "Got %s" % (str(type(generator))))
 41 | 
 42 |     if isinstance(generator, str):
 43 |         if generator == "sobol":
 44 |             generator = Sobol()
 45 |         elif generator == "halton":
 46 |             generator = Halton()
 47 |         elif generator == "hammersly":
 48 |             generator = Hammersly()
 49 |         elif generator == "lhs":
 50 |             generator = Lhs()
 51 |         elif generator == "grid":
 52 |             generator = Grid()
 53 |         elif generator == "random":
 54 |             return None
 55 |         elif generator == "maxpro":
 56 |             generator = MaxPro(use_gradient=False)
 57 |         elif generator == "maxpro-gd":
 58 |             generator = MaxPro(use_gradient=True)
 59 |     generator.set_params(**kwargs)
 60 |     return generator
 61 | 
 62 | def _random_permute_matrix(h, random_state=None):
 63 |     rng = check_random_state(random_state)
 64 |     h_rand_perm = np.zeros_like(h)
 65 |     samples, n = h.shape
 66 |     for j in range(n):
 67 |         order = rng.permutation(range(samples))
 68 |         h_rand_perm[:, j] = h[order, j]
 69 |     return h_rand_perm
 70 | 
 71 | 
 72 | class MaxPro(InitialPointGenerator):
 73 |     """Latin hypercube sampling
 74 |     Parameters
 75 |     ----------
 76 |     lhs_type : str, default='classic'
 77 |         - 'classic' - a small random number is added
 78 |         - 'centered' - points are set uniformly in each interval
 79 |     criterion : str or None, default='maximin'
 80 |         When set to None, the LHS is not optimized
 81 |         - 'correlation' : optimized LHS by minimizing the correlation
 82 |         - 'maximin' : optimized LHS by maximizing the minimal pdist
 83 |         - 'ratio' : optimized LHS by minimizing the ratio
 84 |           `max(pdist) / min(pdist)`
 85 |     iterations : int
 86 |         Defines the number of iterations for optimizing LHS
 87 |     """
 88 |     def __init__(self,
 89 |                  iterations=1000, use_gradient=True, lhs_type = "classic"):
 90 |         self.iterations = iterations
 91 |         self.use_gradient = use_gradient
 92 |         self.lhs_type = lhs_type
 93 | 
 94 |     def generate(self, dimensions, n_samples, random_state=None):
 95 |         """Creates latin hypercube samples with maxpro criterion.
 96 |         Parameters
 97 |         ----------
 98 |         dimensions : list, shape (n_dims,)
 99 |             List of search space dimensions.
100 |             Each search dimension can be defined either as
101 |             - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
102 |               dimensions),
103 |             - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
104 |               dimensions),
105 |             - as a list of categories (for `Categorical` dimensions), or
106 |             - an instance of a `Dimension` object (`Real`, `Integer` or
107 |               `Categorical`).
108 |         n_samples : int
109 |             The order of the LHS sequence. Defines the number of samples.
110 |         random_state : int, RandomState instance, or None (default)
111 |             Set random state to something other than None for reproducible
112 |             results.
113 |         Returns
114 |         -------
115 |         np.array, shape=(n_dim, n_samples)
116 |             LHS set
117 |         """
118 |         rng = check_random_state(random_state)
119 |         space = Space(dimensions)
120 |         transformer = space.get_transformer()
121 |         n_dim = space.n_dims
122 |         space.set_transformer("normalize")
123 |         h = self._lhs_normalized(n_dim, n_samples, rng)
124 | 
125 |         self.num_pts = n_samples
126 |         self.dim = n_dim
127 |         if self.use_gradient:
128 |             print('Using gradient descent')
129 |             bounds = [(0,1)] * len(dimensions) * self.num_pts
130 |             h_opt = minimize(self.maxpro_criter, h, jac=self.maxpro_grad, bounds=bounds)
131 |             h_opt = h_opt['x'].reshape(n_samples, n_dim)
132 |         else:
133 |             print('Using naive method')
134 |             best = 1e+6
135 |             for i in range(self.iterations):
136 |                 h = self._lhs_normalized(n_dim, n_samples, i*rng)
137 |                 criter = self.maxpro_criter(h)
138 |                 if best > criter:
139 |                     best = criter
140 |                     h_opt = h.copy()
141 |         h_opt = space.inverse_transform(h_opt)
142 |         space.set_transformer(transformer)
143 |         return h_opt
144 | 
145 |     def maxpro_criter(self, X):
146 |         """
147 |         :param X: all x data
148 |         :return: value of MaxPro criterion
149 |         """
150 |         x = X.copy()
151 |         #print(x.shape)
152 |         if x.ndim < 2:
153 |             x = x.reshape(self.num_pts, self.dim)
154 |             #print(x.shape, self.dim)
155 |         res = 1 / binom(self.num_pts, 2)
156 |         sum_part = 0
157 |         for i in range(self.num_pts-1):
158 |             for j in range(i+1, self.num_pts):
159 |                 #print(np.prod(((x[i] - x[j]) ** 2)))
160 |                 #print(np.prod(((x[i] - x[j]) ** 2)) ** (-1))
161 |                 sum_part += (np.prod(((x[i] - x[j]) ** 2)) + 1e-8)** (-1)
162 |         res *= sum_part ** (1/self.dim)
163 |         return res
164 | 
165 |     def maxpro_deriv(self, x, r, s):
166 |         """Returns derivative of maxpro criterion for Z_rs
167 | 
168 |         :param X: all x points
169 |         :param r: number of point to get derivative
170 |         :param s: number of coordinate to get derivative
171 |         :return: derivative value
172 |         """
173 |         res = 2 / binom(self.num_pts, 2)
174 |         sum_part = 0
175 |         for i in range(len(x)):
176 |             if i != r:
177 |                 sum_part += (np.prod(((x[i] - x[r]) ** 2)) + 1e-8) ** (-1) * (x[i][s] - x[r][s] + 1e-8) ** (-1)
178 |         res *= sum_part
179 |         return res
180 | 
181 |     def maxpro_grad(self, X):
182 |         x = X.copy()
183 |         if x.ndim < 2:
184 |             x = x.reshape(self.num_pts, self.dim)
185 |         grad_val = np.zeros(x.shape)
186 |         for r in range(self.num_pts):
187 |             for i in range(self.dim):
188 |                 grad_val[r][i] = self.maxpro_deriv(x, r, i)
189 |         return grad_val.flatten()
190 | 
191 |     def _lhs_normalized(self, n_dim, n_samples, random_state):
192 |         rng = check_random_state(random_state)
193 |         x = np.linspace(0, 1, n_samples + 1)
194 |         u = rng.rand(n_samples, n_dim)
195 |         h = np.zeros_like(u)
196 |         if self.lhs_type == "centered":
197 |             for j in range(n_dim):
198 |                 h[:, j] = np.diff(x) / 2.0 + x[:n_samples]
199 |         elif self.lhs_type == "classic":
200 |             for j in range(n_dim):
201 |                 h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
202 |         else:
203 |             raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
204 |         return _random_permute_matrix(h, random_state=rng)
205 | 
206 | class Sampler:
207 |     def __init__(self, method, api_config, n_points=8, generator_kwargs=None):
208 |         if generator_kwargs is None:
209 |             generator_kwargs = {}
210 |         self.method = cook_initial_point_generator(method, **generator_kwargs)
211 | 
212 |         self.dimensions, self.round_to_values = Sampler.get_sk_dimensions(api_config)
213 |         self.dimensions_list = tuple(dd.name for dd in self.dimensions)
214 | 
215 |         self.n_points = n_points
216 | 
217 |     @staticmethod
218 |     def get_sk_dimensions(api_config, transform="normalize"):
219 |         """Help routine to setup skopt search space in constructor.
220 | 
221 |         Take api_config as argument so this can be static.
222 |         """
223 |         # The ordering of iteration prob makes no difference, but just to be
224 |         # safe and consistnent with space.py, I will make sorted.
225 |         param_list = sorted(api_config.keys())
226 | 
227 |         sk_dims = []
228 |         round_to_values = {}
229 |         for param_name in param_list:
230 |             param_config = api_config[param_name]
231 | 
232 |             param_type = param_config["type"]
233 | 
234 |             param_space = param_config.get("space", None)
235 |             param_range = param_config.get("range", None)
236 |             param_values = param_config.get("values", None)
237 | 
238 |             # Some setup for case that whitelist of values is provided:
239 |             values_only_type = param_type in ("cat", "ordinal")
240 |             if (param_values is not None) and (not values_only_type):
241 |                 assert param_range is None
242 |                 param_values = np.unique(param_values)
243 |                 param_range = (param_values[0], param_values[-1])
244 |                 round_to_values[param_name] = interp1d(
245 |                     param_values, param_values, kind="nearest", fill_value="extrapolate"
246 |                 )
247 | 
248 |             if param_type == "int":
249 |                 # Integer space in sklearn does not support any warping => Need
250 |                 # to leave the warping as linear in skopt.
251 |                 sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))
252 |             elif param_type == "bool":
253 |                 assert param_range is None
254 |                 assert param_values is None
255 |                 sk_dims.append(Integer(0, 1, transform=transform, name=param_name))
256 |             elif param_type in ("cat", "ordinal"):
257 |                 assert param_range is None
258 |                 # Leave x-form to one-hot as per skopt default
259 |                 sk_dims.append(Categorical(param_values, name=param_name))
260 |             elif param_type == "real":
261 |                 # Skopt doesn't support all our warpings, so need to pick
262 |                 # closest substitute it does support.
263 |                 prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
264 |                 sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))
265 |             else:
266 |                 assert False, "type %s not handled in API" % param_type
267 |         return sk_dims, round_to_values
268 | 
269 |     def generate(self, random_state):
270 |         # First get list of lists from the sampling method.
271 |         next_guess = self.method.generate(dimensions=self.dimensions,
272 |                                           n_samples=self.n_points,
273 |                                           random_state=random_state)
274 |         # Then convert to list of dicts
275 |         next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]
276 | 
277 |         # Now do the rounding, custom rounding is not supported in skopt. Note
278 |         # that there is not nec a round function for each dimension here.
279 |         for param_name, round_f in self.round_to_values.items():
280 |             for xx in next_guess:
281 |                 xx[param_name] = round_f(xx[param_name])
282 |         return next_guess
283 | 
284 | 
285 | if __name__ == "__main__":
286 |     api_config = {
287 |         "max_depth": {
288 |             "type": "int",
289 |             "space": "linear",
290 |             "range": (1, 15)
291 |         },
292 |         "min_samples_split": {
293 |             "type": "real",
294 |             "space": "logit",
295 |             "range": (0.01, 0.99)
296 |         },
297 |         "min_samples_leaf": {
298 |             "type": "real",
299 |             "space": "logit",
300 |             "range": (0.01, 0.49)
301 |         },
302 |         "min_weight_fraction_leaf": {
303 |             "type": "real",
304 |             "space": "logit",
305 |             "range": (0.01, 0.49)
306 |         },
307 |         "max_features": {
308 |             "type": "real",
309 |             "space": "logit",
310 |             "range": (0.01, 0.99)
311 |         },
312 |         "min_impurity_decrease": {
313 |             "type": "real",
314 |             "space": "linear",
315 |             "range": (0.0, 0.5)
316 |         },
317 |     }
318 |     n_points = 8
319 | 
320 |     sobol_points = Sampler(method='sobol', api_config=api_config, n_points=n_points).generate(random_state=42)
321 |     halton_points = Sampler(method='halton', api_config=api_config, n_points=n_points).generate(random_state=42)
322 |     hammersly_points = Sampler(method='hammersly', api_config=api_config, n_points=n_points).generate(random_state=42)
323 |     lhs_classic_points = Sampler(method='lhs', api_config=api_config, n_points=n_points, generator_kwargs={'lhs_type': 'classic', 'criterion': 'maximin'}).generate(random_state=42)
324 |     lhs_centered_points = Sampler(method='lhs', api_config=api_config, n_points=n_points, generator_kwargs={'lhs_type': 'centered'}).generate(random_state=42)
325 |     grid_points = Sampler(method='grid', api_config=api_config, n_points=n_points).generate(random_state=42)
326 | 
327 |     t = 0
328 | 


--------------------------------------------------------------------------------
/submissions/space-decay/optimizer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | from copy import deepcopy
  4 | from typing import Optional
  5 | 
  6 | import torch
  7 | import numpy as np
  8 | from sklearn.cluster import KMeans
  9 | from sklearn.neighbors import KNeighborsClassifier
 10 | from sklearn.svm import SVC
 11 | from turbo.utils import from_unit_cube, latin_hypercube, to_unit_cube
 12 | 
 13 | from bayesmark.abstract_optimizer import AbstractOptimizer
 14 | from bayesmark.experiment import experiment_main
 15 | from bayesmark.space import JointSpace
 16 | 
 17 | # It depends on scikit-optimize==0.8.dev0, which is not in the default environment.
 18 | import sampler
 19 | from turbo1 import Turbo1
 20 | from util import copula_standardize
 21 | 
 22 | try:
 23 |   import open3d
 24 |   DEBUG = True
 25 | except ImportError as _:
 26 |   DEBUG = False
 27 | 
 28 | 
 29 | def fix_optimizer_seed(seed):
 30 |   if seed is not None:
 31 |     random.seed(seed)
 32 |     np.random.seed(seed)
 33 |     torch.manual_seed(seed)
 34 | 
 35 | 
 36 | def _add_pcd(pcds, points, color):
 37 |   if len(points) == 0:
 38 |     return
 39 |   if points.shape[1] == 2:
 40 |     extended_points = np.zeros((len(points), 3))
 41 |     extended_points[:, :2] = points[:, :]
 42 |     points = extended_points
 43 |   elif points.shape[1] != 3:
 44 |     raise ValueError('The points for the DEBUG should either be 2D or 3D.')
 45 |   pcd = open3d.geometry.PointCloud()
 46 |   pcd.points = open3d.utility.Vector3dVector(points)
 47 |   pcd.colors = open3d.utility.Vector3dVector(np.tile(color, (len(points), 1)))
 48 |   pcds.append(pcd)
 49 | 
 50 | 
 51 | class SpacePartitioningOptimizer(AbstractOptimizer):
 52 |   primary_import = 'scikit-learn'
 53 | 
 54 |   def __init__(self, api_config, **kwargs):
 55 |     AbstractOptimizer.__init__(self, api_config)
 56 | 
 57 |     print('api_config:', api_config)
 58 |     self.api_config = api_config
 59 | 
 60 |     self.space_x = JointSpace(api_config)
 61 |     self.bounds = self.space_x.get_bounds()
 62 |     self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1]
 63 |     self.dim = len(self.bounds)
 64 | 
 65 |     self.X = np.zeros((0, self.dim))
 66 |     self.y = np.zeros((0, 1))
 67 | 
 68 |     self.X_init = None
 69 |     self.batch_size = None
 70 |     self.turbo = None
 71 |     self.split_used = 0
 72 |     self.node = None
 73 |     self.best_values = []
 74 | 
 75 |     self.config = self._read_config()
 76 |     print('config:', self.config)
 77 |     optimizer_seed = self.config.get('optimizer_seed')
 78 |     fix_optimizer_seed(optimizer_seed)
 79 |     self.sampler_seed = self.config.get('sampler_seed')
 80 |     sampler.fix_sampler_seed(self.sampler_seed)
 81 | 
 82 |     self.is_init_batch = False
 83 |     self.init_batches = []
 84 | 
 85 |   def _read_config(self):
 86 |     return {'turbo_training_steps': 100, 'turbo_length_retries': 10, 'turbo_length_init_method': 'default', 'experimental_design': 'lhs_classic_ratio', 'n_init_points': 24, 'max_tree_depth': 5, 'kmeans_resplits': 10, 'split_model': {'type': 'SVC', 'args': {'kernel': 'poly', 'gamma': 'scale', 'C': 745.3227447730735}}, 'reset_no_improvement': 8, 'reset_split_after': 4, 'turbo': {'budget': 128, 'use_cylinder': 0, 'use_pull': 0, 'use_lcb': 0, 'kappa': 2.0, 'use_decay': 1, 'decay_alpha': 0.49937937259674076, 'decay_threshold': 0.5, 'length_min': 1e-06, 'length_max': 2.0, 'length_init': 0.8, 'length_multiplier': 2.0}, 'sampler_seed': 42, 'optimizer_seed': 578330}
 87 | 
 88 |   def _init(self, n_suggestions):
 89 |     self.batch_size = n_suggestions
 90 |     n_init_points = self.config['n_init_points']
 91 |     if n_init_points == -1:
 92 |       # Special value to use the default 2*D+1 number.
 93 |       n_init_points = 2 * self.dim + 1
 94 |     self.n_init = max(self.batch_size, n_init_points)
 95 |     exp_design = self.config['experimental_design']
 96 |     if exp_design == 'latin_hypercube':
 97 |       X_init = latin_hypercube(self.n_init, self.dim)
 98 |     elif exp_design == 'halton':
 99 |       halton_sampler = sampler.Sampler(method='halton', api_config=self.api_config, n_points=self.n_init)
100 |       X_init = halton_sampler.generate(random_state=self.sampler_seed)
101 |       X_init = self.space_x.warp(X_init)
102 |       X_init = to_unit_cube(X_init, self.lb, self.ub)
103 |     elif exp_design == 'lhs_classic_ratio':
104 |       lhs_sampler = sampler.Sampler(
105 |         method='lhs',
106 |         api_config=self.api_config,
107 |         n_points=self.n_init,
108 |         generator_kwargs={'lhs_type': 'classic', 'criterion': 'ratio'})
109 |       X_init = lhs_sampler.generate(random_state=self.sampler_seed)
110 |       X_init = self.space_x.warp(X_init)
111 |       X_init = to_unit_cube(X_init, self.lb, self.ub)
112 |     else:
113 |       raise ValueError(f'Unknown experimental design: {exp_design}.')
114 |     self.X_init = X_init
115 |     if DEBUG:
116 |       print(f'Initialized the method with {self.n_init} points by {exp_design}:')
117 |       print(X_init)
118 | 
119 |   def _get_split_model(self, X, kmeans_labels):
120 |     split_model_config = self.config['split_model']
121 |     model_type = split_model_config['type']
122 |     args = split_model_config['args']
123 |     if model_type == 'SVC':
124 |       split_model = SVC(**args, max_iter=10**7)
125 |     elif model_type == 'KNeighborsClassifier':
126 |       split_model = KNeighborsClassifier(**args)
127 |     else:
128 |       raise ValueError(f'Unknown split model type in the config: {model_type}.')
129 | 
130 |     split_model.fit(X, kmeans_labels)
131 |     split_model_predictions = split_model.predict(X)
132 |     split_model_matches = np.sum(split_model_predictions == kmeans_labels)
133 |     split_model_mismatches = np.sum(split_model_predictions != kmeans_labels)
134 |     print('Labels for the split model:', kmeans_labels)
135 |     print('Predictions of the split model:', split_model_predictions)
136 |     print(f'Split model matches {split_model_matches} and mismatches {split_model_mismatches}')
137 |     return split_model
138 | 
139 |   def _find_split(self, X, y) -> Optional:
140 |     max_margin = None
141 |     max_margin_labels = None
142 |     for _ in range(self.config['kmeans_resplits']):
143 |       kmeans = KMeans(n_clusters=2).fit(y)
144 |       kmeans_labels = kmeans.labels_
145 |       if np.count_nonzero(kmeans_labels == 1) > 0 and np.count_nonzero(kmeans_labels == 0) > 0:
146 |         if np.mean(y[kmeans_labels == 1]) > np.mean(y[kmeans_labels == 0]):
147 |           # Reverse labels if the entries with 1s have a higher mean error, since 1s go to the left branch.
148 |           kmeans_labels = 1 - kmeans_labels
149 |         margin = -(np.mean(y[kmeans_labels == 1]) - np.mean(y[kmeans_labels == 0]))
150 |         if DEBUG:
151 |           print('MARGIN is', margin, np.count_nonzero(kmeans_labels == 1), np.count_nonzero(kmeans_labels == 0))
152 |         if max_margin is None or margin > max_margin:
153 |           max_margin = margin
154 |           max_margin_labels = kmeans_labels
155 |     if DEBUG:
156 |       print('MAX MARGIN is', max_margin)
157 |     if max_margin_labels is None:
158 |       return None
159 |     else:
160 |       return self._get_split_model(X, max_margin_labels)
161 | 
162 |   def _build_tree(self, X, y, depth=0):
163 |     print('len(X) in _build_tree is', len(X))
164 |     if depth == self.config['max_tree_depth']:
165 |       return []
166 |     split = self._find_split(X, y)
167 |     if split is None:
168 |       return []
169 |     in_region_points = split.predict(X)
170 |     left_subtree_size = np.count_nonzero(in_region_points == 1)
171 |     right_subtree_size = np.count_nonzero(in_region_points == 0)
172 |     print(f'{len(X)} points would be split {left_subtree_size}/{right_subtree_size}.')
173 |     if left_subtree_size < self.n_init:
174 |       return []
175 |     idx = (in_region_points == 1)
176 |     splits = self._build_tree(X[idx], y[idx], depth + 1)
177 |     return [split] + splits
178 | 
179 |   def _get_in_node_region(self, points, splits):
180 |     in_region = np.ones(len(points))
181 |     for split in splits:
182 |       split_in_region = split.predict(points)
183 |       in_region *= split_in_region
184 |     return in_region
185 | 
186 |   def _suggest(self, n_suggestions):
187 |     X = to_unit_cube(deepcopy(self.X), self.lb, self.ub)
188 |     y = deepcopy(self.y)
189 |     if not self.node:
190 |       self.split_used = 0
191 |       self.node = self._build_tree(X, y)
192 |       used_budget = len(y)
193 |       idx = (self._get_in_node_region(X, self.node) == 1)
194 |       X = X[idx]
195 |       y = y[idx]
196 |       print(f'Rebuilt the tree of depth {len(self.node)}')
197 |       model_config = self.config['turbo']
198 |       #print('CONFIG!!!!!', model_config)
199 |       self.turbo = Turbo1(
200 |         f=None,
201 |         lb=self.bounds[:, 0],
202 |         ub=self.bounds[:, 1],
203 |         n_init=len(X),
204 |         max_evals=np.iinfo(np.int32).max,
205 |         batch_size=self.batch_size,
206 |         verbose=False,
207 |         use_cylinder=model_config['use_cylinder'],
208 |         budget=model_config['budget'],
209 |         use_decay=model_config['use_decay'],
210 |         decay_threshold=model_config['decay_threshold'],
211 |         decay_alpha=model_config['decay_alpha'],
212 |         use_pull=model_config['use_pull'],
213 |         use_lcb=model_config['use_lcb'],
214 |         kappa=model_config['kappa'],
215 |         length_min=model_config['length_min'],
216 |         length_max=model_config['length_max'],
217 |         length_init=model_config['length_init'],
218 |         length_multiplier=model_config['length_multiplier'],
219 |         used_budget=used_budget
220 |       )
221 |       self.turbo._X = np.array(X, copy=True)
222 |       self.turbo._fX = np.array(y, copy=True)
223 |       self.turbo.X = np.array(X, copy=True)
224 |       self.turbo.fX = np.array(y, copy=True)
225 |       print('Initialized TURBO')
226 |     else:
227 |       idx = (self._get_in_node_region(X, self.node) == 1)
228 |       X = X[idx]
229 |       y = y[idx]
230 |     self.split_used += 1
231 | 
232 |     length_init_method = self.config['turbo_length_init_method']
233 |     if length_init_method == 'default':
234 |       length = self.turbo.length
235 |     elif length_init_method == 'length_init':
236 |       length = self.turbo.length_init
237 |     elif length_init_method == 'length_max':
238 |       length = self.turbo.length_max
239 |     elif length_init_method == 'infinity':
240 |       length = np.iinfo(np.int32).max
241 |     else:
242 |       raise ValueError(f'Unknown init method for turbo\'s length: {length_init_method}.')
243 |     length_reties = self.config['turbo_length_retries']
244 |     for retry in range(length_reties):
245 |       XX = X
246 |       yy = copula_standardize(y.ravel())
247 |       X_cand, y_cand, _ = self.turbo._create_candidates(
248 |         XX, yy, length=length, n_training_steps=self.config['turbo_training_steps'], hypers={})
249 |       in_region_predictions = self._get_in_node_region(X_cand, self.node)
250 |       in_region_idx = in_region_predictions == 1
251 |       if DEBUG:
252 |         print(f'In region: {np.sum(in_region_idx)} out of {len(X_cand)}')
253 |       if np.sum(in_region_idx) >= n_suggestions:
254 |         X_cand, y_cand = X_cand[in_region_idx], y_cand[in_region_idx]
255 |         self.turbo.f_var = self.turbo.f_var[in_region_idx]
256 |         if DEBUG:
257 |           print('Found a suitable set of candidates.')
258 |         break
259 |       else:
260 |         length /= 2
261 |         if DEBUG:
262 |           print(f'Retrying {retry + 1}/{length_reties} time')
263 | 
264 |     X_cand = self.turbo._select_candidates(X_cand, y_cand)[:n_suggestions, :]
265 |     if DEBUG:
266 |       if X.shape[1] == 3:
267 |         tx = np.arange(0.0, 1.0 + 1e-6, 0.1)
268 |         ty = np.arange(0.0, 1.0 + 1e-6, 0.1)
269 |         tz = np.arange(0.0, 1.0 + 1e-6, 0.1)
270 |         p = np.array([[x, y, z] for x in tx for y in ty for z in tz])
271 |       elif X.shape[1] == 2:
272 |         tx = np.arange(0.0, 1.0 + 1e-6, 0.1)
273 |         ty = np.arange(0.0, 1.0 + 1e-6, 0.1)
274 |         p = np.array([[x, y] for x in tx for y in ty])
275 |       else:
276 |         raise ValueError('The points for the DEBUG should either be 2D or 3D.')
277 |       p_predictions = self._get_in_node_region(p, self.node)
278 |       in_turbo_bounds = np.logical_and(
279 |         np.all(self.turbo.cand_lb <= p, axis=1),
280 |         np.all(p <= self.turbo.cand_ub, axis=1))
281 |       pcds = []
282 |       _add_pcd(pcds, p[p_predictions == 0], (1.0, 0.0, 0.0))
283 |       _add_pcd(pcds, p[np.logical_and(p_predictions == 1, np.logical_not(in_turbo_bounds))], (0.0, 1.0, 0.0))
284 |       _add_pcd(pcds, p[np.logical_and(p_predictions == 1, in_turbo_bounds)], (0.0, 0.5, 0.0))
285 |       _add_pcd(pcds, X_cand, (0.0, 0.0, 0.0))
286 |       open3d.visualization.draw_geometries(pcds)
287 |     return X_cand
288 | 
289 |   def suggest(self, n_suggestions=1):
290 |     X_suggestions = np.zeros((n_suggestions, self.dim))
291 |     # Initialize the design if it is the first call
292 |     if self.X_init is None:
293 |       self._init(n_suggestions)
294 |       if self.init_batches:
295 |         print('REUSING INITIALIZATION:')
296 |         for X, Y in self.init_batches:
297 |           print('Re-observing a batch!')
298 |           self.observe(X, Y)
299 |         self.X_init = []
300 | 
301 |     # Pick from the experimental design
302 |     n_init = min(len(self.X_init), n_suggestions)
303 |     if n_init > 0:
304 |       X_suggestions[:n_init] = self.X_init[:n_init]
305 |       self.X_init = self.X_init[n_init:]
306 |       self.is_init_batch = True
307 |     else:
308 |       self.is_init_batch = False
309 | 
310 |     # Pick from the model based on the already received observations
311 |     n_suggest = n_suggestions - n_init
312 |     if n_suggest > 0:
313 |       X_cand = self._suggest(n_suggest)
314 |       X_suggestions[-n_suggest:] = X_cand
315 | 
316 |     # Map into the continuous space with the api bounds and unwarp the suggestions
317 |     X_min_bound = 0.0
318 |     X_max_bound = 1.0
319 |     X_suggestions_min = X_suggestions.min()
320 |     X_suggestions_max = X_suggestions.max()
321 |     if X_suggestions_min < X_min_bound or X_suggestions_max > X_max_bound:
322 |       print(f'Some suggestions are out of the bounds in suggest(): {X_suggestions_min}, {X_suggestions_max}')
323 |       print('Clipping everything...')
324 |       X_suggestions = np.clip(X_suggestions, X_min_bound, X_max_bound)
325 |     X_suggestions = from_unit_cube(X_suggestions, self.lb, self.ub)
326 |     X_suggestions = self.space_x.unwarp(X_suggestions)
327 |     return X_suggestions
328 | 
329 |   def observe(self, X_observed, Y_observed):
330 |     if self.is_init_batch:
331 |       self.init_batches.append([X_observed, Y_observed])
332 |     X, Y = [], []
333 |     for x, y in zip(X_observed, Y_observed):
334 |       if np.isfinite(y):
335 |         X.append(x)
336 |         Y.append(y)
337 |       else:
338 |         # Ignore for now; could potentially substitute with an upper bound.
339 |         continue
340 |     if not X:
341 |       return
342 |     X, Y = self.space_x.warp(X), np.array(Y)[:, None]
343 |     self.X = np.vstack((self.X, deepcopy(X)))
344 |     self.y = np.vstack((self.y, deepcopy(Y)))
345 |     self.best_values.append(Y.min())
346 | 
347 |     if self.turbo:
348 |       if len(self.turbo._X) >= self.turbo.n_init:
349 |         self.turbo._adjust_length(Y)
350 |       print('TURBO length:', self.turbo.length)
351 |       self.turbo._X = np.vstack((self.turbo._X, deepcopy(X)))
352 |       self.turbo._fX = np.vstack((self.turbo._fX, deepcopy(Y)))
353 |       self.turbo.X = np.vstack((self.turbo.X, deepcopy(X)))
354 |       self.turbo.fX = np.vstack((self.turbo.fX, deepcopy(Y)))
355 | 
356 |     N = self.config['reset_no_improvement']
357 |     if len(self.best_values) > N and np.min(self.best_values[:-N]) <= np.min(self.best_values[-N:]):
358 |       print('########## RESETTING COMPLETELY! ##########')
359 |       self.X = np.zeros((0, self.dim))
360 |       self.y = np.zeros((0, 1))
361 |       self.best_values = []
362 |       self.X_init = None
363 |       self.node = None
364 |       self.turbo = None
365 |       self.split_used = 0
366 | 
367 |     if self.split_used >= self.config['reset_split_after']:
368 |       print('########## REBUILDING THE SPLIT! ##########')
369 |       self.node = None
370 |       self.turbo = None
371 |       self.split_used = 0
372 | 
373 | 
374 | if __name__ == '__main__':
375 |   experiment_main(SpacePartitioningOptimizer)
376 | 


--------------------------------------------------------------------------------