├── .gitignore ├── Dockerfile ├── LICENSE ├── LICENSE.turbo ├── README.md ├── environment.txt ├── img ├── leaderboard.png └── plot-cv-generalization.png ├── notebooks └── Analyzing Bayesmark Results for Each ML Model.ipynb ├── prepare_upload.sh ├── run_benchmark.py ├── run_local.sh └── submissions └── mksturbo ├── optimizer.py ├── requirements.txt └── turbolib.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Specific to this repo 2 | output/ 3 | out/ 4 | tmp/ 5 | *.zip 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .cache 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Jupyter Notebook 51 | .ipynb_checkpoints 52 | 53 | # Environments 54 | .env 55 | .venv 56 | env/ 57 | venv/ 58 | ENV/ 59 | env.bak/ 60 | venv.bak/ 61 | 62 | # mypy 63 | .mypy_cache/ 64 | .dmypy.json 65 | dmypy.json 66 | 67 | *.stats 68 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | RUN pip install --upgrade pip 4 | 5 | ADD ./environment.txt /usr/src/environment.txt 6 | RUN pip install --no-cache-dir -r /usr/src/environment.txt 7 | 8 | # Installs google cloud sdk, this is mostly for using gsutil to export model. 9 | # See https://cloud.google.com/ai-platform/training/docs/custom-containers-training 10 | RUN wget -nv \ 11 | https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \ 12 | mkdir /root/tools && \ 13 | tar xvzf google-cloud-sdk.tar.gz -C /root/tools && \ 14 | rm google-cloud-sdk.tar.gz && \ 15 | /root/tools/google-cloud-sdk/install.sh --usage-reporting=false \ 16 | --path-update=false --bash-completion=false \ 17 | --disable-installation-options && \ 18 | rm -rf /root/.config/* && \ 19 | ln -s /root/.config /config && \ 20 | rm -rf /root/tools/google-cloud-sdk/.install/.backup 21 | ENV PATH $PATH:/root/tools/google-cloud-sdk/bin 22 | RUN echo '[GoogleCompute]\nservice_account = default' > /etc/boto.cfg 23 | 24 | # Install dependencies for slack notification 25 | RUN pip install --no-cache-dir requests 26 | 27 | ADD ./submissions/mksturbo/requirements.txt /usr/src/requirements-mksturbo.txt 28 | RUN pip install --no-cache-dir -U -r /usr/src/requirements-mksturbo.txt 29 | 30 | ADD ./input /usr/src/input 31 | ADD ./run_local.sh /usr/src/run_local.sh 32 | ADD ./run_benchmark.py /usr/src/run_benchmark.py 33 | ADD ./submissions /usr/src/submissions 34 | WORKDIR /usr/src 35 | 36 | CMD ["python", "run_benchmark.py"] 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | `submissions/mksturbo/turbolib.py` is derived 2 | from https://github.com/uber-research/TuRBO on October 14, 2020, hence 3 | it is distributed under LICENSE.turbo. The rest of the files are 4 | distributed under Apache License Version 2.0. 5 | 6 | Apache License 7 | Version 2.0, January 2004 8 | http://www.apache.org/licenses/ 9 | 10 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 11 | 12 | 1. Definitions. 13 | 14 | "License" shall mean the terms and conditions for use, reproduction, 15 | and distribution as defined by Sections 1 through 9 of this document. 16 | 17 | "Licensor" shall mean the copyright owner or entity authorized by 18 | the copyright owner that is granting the License. 19 | 20 | "Legal Entity" shall mean the union of the acting entity and all 21 | other entities that control, are controlled by, or are under common 22 | control with that entity. For the purposes of this definition, 23 | "control" means (i) the power, direct or indirect, to cause the 24 | direction or management of such entity, whether by contract or 25 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 26 | outstanding shares, or (iii) beneficial ownership of such entity. 27 | 28 | "You" (or "Your") shall mean an individual or Legal Entity 29 | exercising permissions granted by this License. 30 | 31 | "Source" form shall mean the preferred form for making modifications, 32 | including but not limited to software source code, documentation 33 | source, and configuration files. 34 | 35 | "Object" form shall mean any form resulting from mechanical 36 | transformation or translation of a Source form, including but 37 | not limited to compiled object code, generated documentation, 38 | and conversions to other media types. 39 | 40 | "Work" shall mean the work of authorship, whether in Source or 41 | Object form, made available under the License, as indicated by a 42 | copyright notice that is included in or attached to the work 43 | (an example is provided in the Appendix below). 44 | 45 | "Derivative Works" shall mean any work, whether in Source or Object 46 | form, that is based on (or derived from) the Work and for which the 47 | editorial revisions, annotations, elaborations, or other modifications 48 | represent, as a whole, an original work of authorship. For the purposes 49 | of this License, Derivative Works shall not include works that remain 50 | separable from, or merely link (or bind by name) to the interfaces of, 51 | the Work and Derivative Works thereof. 52 | 53 | "Contribution" shall mean any work of authorship, including 54 | the original version of the Work and any modifications or additions 55 | to that Work or Derivative Works thereof, that is intentionally 56 | submitted to Licensor for inclusion in the Work by the copyright owner 57 | or by an individual or Legal Entity authorized to submit on behalf of 58 | the copyright owner. For the purposes of this definition, "submitted" 59 | means any form of electronic, verbal, or written communication sent 60 | to the Licensor or its representatives, including but not limited to 61 | communication on electronic mailing lists, source code control systems, 62 | and issue tracking systems that are managed by, or on behalf of, the 63 | Licensor for the purpose of discussing and improving the Work, but 64 | excluding communication that is conspicuously marked or otherwise 65 | designated in writing by the copyright owner as "Not a Contribution." 66 | 67 | "Contributor" shall mean Licensor and any individual or Legal Entity 68 | on behalf of whom a Contribution has been received by Licensor and 69 | subsequently incorporated within the Work. 70 | 71 | 2. Grant of Copyright License. Subject to the terms and conditions of 72 | this License, each Contributor hereby grants to You a perpetual, 73 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 74 | copyright license to reproduce, prepare Derivative Works of, 75 | publicly display, publicly perform, sublicense, and distribute the 76 | Work and such Derivative Works in Source or Object form. 77 | 78 | 3. Grant of Patent License. Subject to the terms and conditions of 79 | this License, each Contributor hereby grants to You a perpetual, 80 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 81 | (except as stated in this section) patent license to make, have made, 82 | use, offer to sell, sell, import, and otherwise transfer the Work, 83 | where such license applies only to those patent claims licensable 84 | by such Contributor that are necessarily infringed by their 85 | Contribution(s) alone or by combination of their Contribution(s) 86 | with the Work to which such Contribution(s) was submitted. If You 87 | institute patent litigation against any entity (including a 88 | cross-claim or counterclaim in a lawsuit) alleging that the Work 89 | or a Contribution incorporated within the Work constitutes direct 90 | or contributory patent infringement, then any patent licenses 91 | granted to You under this License for that Work shall terminate 92 | as of the date such litigation is filed. 93 | 94 | 4. Redistribution. You may reproduce and distribute copies of the 95 | Work or Derivative Works thereof in any medium, with or without 96 | modifications, and in Source or Object form, provided that You 97 | meet the following conditions: 98 | 99 | (a) You must give any other recipients of the Work or 100 | Derivative Works a copy of this License; and 101 | 102 | (b) You must cause any modified files to carry prominent notices 103 | stating that You changed the files; and 104 | 105 | (c) You must retain, in the Source form of any Derivative Works 106 | that You distribute, all copyright, patent, trademark, and 107 | attribution notices from the Source form of the Work, 108 | excluding those notices that do not pertain to any part of 109 | the Derivative Works; and 110 | 111 | (d) If the Work includes a "NOTICE" text file as part of its 112 | distribution, then any Derivative Works that You distribute must 113 | include a readable copy of the attribution notices contained 114 | within such NOTICE file, excluding those notices that do not 115 | pertain to any part of the Derivative Works, in at least one 116 | of the following places: within a NOTICE text file distributed 117 | as part of the Derivative Works; within the Source form or 118 | documentation, if provided along with the Derivative Works; or, 119 | within a display generated by the Derivative Works, if and 120 | wherever such third-party notices normally appear. The contents 121 | of the NOTICE file are for informational purposes only and 122 | do not modify the License. You may add Your own attribution 123 | notices within Derivative Works that You distribute, alongside 124 | or as an addendum to the NOTICE text from the Work, provided 125 | that such additional attribution notices cannot be construed 126 | as modifying the License. 127 | 128 | You may add Your own copyright statement to Your modifications and 129 | may provide additional or different license terms and conditions 130 | for use, reproduction, or distribution of Your modifications, or 131 | for any such Derivative Works as a whole, provided Your use, 132 | reproduction, and distribution of the Work otherwise complies with 133 | the conditions stated in this License. 134 | 135 | 5. Submission of Contributions. Unless You explicitly state otherwise, 136 | any Contribution intentionally submitted for inclusion in the Work 137 | by You to the Licensor shall be under the terms and conditions of 138 | this License, without any additional terms or conditions. 139 | Notwithstanding the above, nothing herein shall supersede or modify 140 | the terms of any separate license agreement you may have executed 141 | with Licensor regarding such Contributions. 142 | 143 | 6. Trademarks. This License does not grant permission to use the trade 144 | names, trademarks, service marks, or product names of the Licensor, 145 | except as required for reasonable and customary use in describing the 146 | origin of the Work and reproducing the content of the NOTICE file. 147 | 148 | 7. Disclaimer of Warranty. Unless required by applicable law or 149 | agreed to in writing, Licensor provides the Work (and each 150 | Contributor provides its Contributions) on an "AS IS" BASIS, 151 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 152 | implied, including, without limitation, any warranties or conditions 153 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 154 | PARTICULAR PURPOSE. You are solely responsible for determining the 155 | appropriateness of using or redistributing the Work and assume any 156 | risks associated with Your exercise of permissions under this License. 157 | 158 | 8. Limitation of Liability. In no event and under no legal theory, 159 | whether in tort (including negligence), contract, or otherwise, 160 | unless required by applicable law (such as deliberate and grossly 161 | negligent acts) or agreed to in writing, shall any Contributor be 162 | liable to You for damages, including any direct, indirect, special, 163 | incidental, or consequential damages of any character arising as a 164 | result of this License or out of the use or inability to use the 165 | Work (including but not limited to damages for loss of goodwill, 166 | work stoppage, computer failure or malfunction, or any and all 167 | other commercial damages or losses), even if such Contributor 168 | has been advised of the possibility of such damages. 169 | 170 | 9. Accepting Warranty or Additional Liability. While redistributing 171 | the Work or Derivative Works thereof, You may choose to offer, 172 | and charge a fee for, acceptance of support, warranty, indemnity, 173 | or other liability obligations and/or rights consistent with this 174 | License. However, in accepting such obligations, You may act only 175 | on Your own behalf and on Your sole responsibility, not on behalf 176 | of any other Contributor, and only if You agree to indemnify, 177 | defend, and hold each Contributor harmless for any liability 178 | incurred by, or claims asserted against, such Contributor by reason 179 | of your accepting any such warranty or additional liability. 180 | 181 | END OF TERMS AND CONDITIONS 182 | 183 | APPENDIX: How to apply the Apache License to your work. 184 | 185 | To apply the Apache License to your work, attach the following 186 | boilerplate notice, with the fields enclosed by brackets "[]" 187 | replaced with your own identifying information. (Don't include 188 | the brackets!) The text should be enclosed in the appropriate 189 | comment syntax for the file format. We also recommend that a 190 | file or class name and description of purpose be included on the 191 | same "printed page" as the copyright notice for easier 192 | identification within third-party archives. 193 | 194 | Copyright 2020 Optuna Developers (Preferred Networks & CyberAgent) 195 | 196 | Licensed under the Apache License, Version 2.0 (the "License"); 197 | you may not use this file except in compliance with the License. 198 | You may obtain a copy of the License at 199 | 200 | http://www.apache.org/licenses/LICENSE-2.0 201 | 202 | Unless required by applicable law or agreed to in writing, software 203 | distributed under the License is distributed on an "AS IS" BASIS, 204 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 205 | See the License for the specific language governing permissions and 206 | limitations under the License. 207 | -------------------------------------------------------------------------------- /LICENSE.turbo: -------------------------------------------------------------------------------- 1 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by the text below. 2 | 3 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 4 | 5 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 6 | 7 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. 8 | 9 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. 10 | 11 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under this License. 12 | 13 | This License governs use of the accompanying Work, and your use of the Work constitutes acceptance of this License. 14 | 15 | You may use this Work for any non-commercial purpose, subject to the restrictions in this License. Some purposes which can be non-commercial are teaching, academic research, and personal experimentation. You may also distribute this Work with books or other teaching materials, or publish the Work on websites, that are intended to teach the use of the Work. 16 | 17 | You may not use or distribute this Work, or any derivative works, outputs, or results from the Work, in any form for commercial purposes. Non-exhaustive examples of commercial purposes would be running business operations, licensing, leasing, or selling the Work, or distributing the Work for use with commercial products. 18 | 19 | You may modify this Work and distribute the modified Work for non-commercial purposes, however, you may not grant rights to the Work or derivative works that are broader than or in conflict with those provided by this License. For example, you may not distribute modifications of the Work under terms that would permit commercial use, or under terms that purport to require the Work or derivative works to be sublicensed to others. 20 | 21 | In return, we require that you agree: 22 | 23 | 1. Not to remove any copyright or other notices from the Work. 24 | 25 | 2. That if you distribute the Work in Source or Object form, you will include a verbatim copy of this License. 26 | 27 | 3. That if you distribute derivative works of the Work in Source form, you do so only under a license that includes all of the provisions of this License and is not in conflict with this License, and if you distribute derivative works of the Work solely in Object form you do so only under a license that complies with this License. 28 | 29 | 4. That if you have modified the Work or created derivative works from the Work, and distribute such modifications or derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Work. Such notices must state: (i) that you have changed the Work; and (ii) the date of any changes. 30 | 31 | 5. If you publicly use the Work or any output or result of the Work, you will provide a notice with such use that provides any person who uses, views, accesses, interacts with, or is otherwise exposed to the Work (i) with information of the nature of the Work, (ii) with a link to the Work, and (iii) a notice that the Work is available under this License. 32 | 33 | 6. THAT THE WORK COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 34 | 35 | 7. THAT NEITHER UBER TECHNOLOGIES, INC. NOR ANY OF ITS AFFILIATES, SUPPLIERS, SUCCESSORS, NOR ASSIGNS WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE WORK OR THIS LICENSE, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 36 | 37 | 8. That if you sue anyone over patents that you think may apply to the Work or anyone's use of the Work, your license to the Work ends automatically. 38 | 39 | 9. That your rights under the License end automatically if you breach it in any way. 40 | 41 | 10. Uber Technologies, Inc. reserves all rights not expressly granted to you in this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optuna Developers' Solution for Black-Box Optimization Challenge 2 | 3 | First of all, we would like to thank BBO Challenge Organizers for this interesting competition. And congratulations to all winners. 4 | Here is the code of Optuna Developers' solution for [NeurIPS 2020 Black-Box Optimization Challenge](https://bbochallenge.com/). 5 | 6 | ## Final Submissions 7 | 8 | ### Results 9 | 10 | Our solution achieved 96.939 for public and also 91.806 for private. 11 | We ranked 9th place in public and 5th place in private. 12 | 13 | ![leaderboard](./img/leaderboard.png) 14 | 15 | ### Solution Overview 16 | 17 | Our solution is based on TuRBO, a batch Bayesian optimization algorithm which restricts the search space to certain trust regions. 18 | We made following improvements: 19 | 20 | 1. **Selection from multiple kernels**: Matern kernels (with `nu=[0.5, 1.5, 2.5]`) and RBF kernel. 21 | 2. **Stagnation Driven Trust Region Restart Strategy**: The trust region update is restarted when all the solutions in the same batch have the same evaluation value. 22 | 3. **Masking Length for the Trust Region**: Avoid to shrink the trust region for categorical and logit variables up to 10 iterations. 23 | 4. **Exhaustive Search**: We do an exhaustive search if the cardinality of the search space is smaller than or equal to the evaluation budge. 24 | 5. **Initialization with Sobol Sequences**: we employ Sobol sequences instead of Latin hypercube sampling, which is used by the original TuRBO. 25 | 26 | 27 | ### How to build the final submission 28 | 29 | The final code is placed to `./submissions/mksturbo`. 30 | You can prepare the submission using the `prepare_upload.sh` script. 31 | 32 | ``` 33 | $ ./prepare_upload.sh ./submissions/mksturbo/ 34 | ``` 35 | 36 | 37 | ### How to run local benchmarks using Bayesmark 38 | 39 | You can run local benchamarks on publicly available problems using [Bayesmark](https://github.com/uber/bayesmark) library. 40 | These problems using scikit-learn's classifiers/regressors and its built-in datasets. 41 | See the [Bayesmark documentation](https://bayesmark.readthedocs.io/en/latest/index.html) for the details. 42 | 43 | ``` 44 | $ python3 -m venv venv # Please use Python 3.6.10. 45 | $ source venv/bin/activate 46 | $ pip install -r environment.txt -r submissions/mksturbo/requirements.txt 47 | $ ./run_local.sh ./submissions/mksturbo/ 3 48 | ``` 49 | 50 |
51 | 52 | Faster local benchmarking 53 | 54 | You can also use [run_benchmark.py](./run_benchmark.py) to run local benchmarks. 55 | This script is faster than `run_local.sh` because it runs benchmarks in parallel. 56 | 57 | ``` 58 | $ python run_benchmark.py --task large --repeat 3 --parallelism 16 --out ./output/ --optimizer ./submissions/mksturbo/ 59 | ``` 60 | 61 |
62 | 63 | ## Notebooks 64 | 65 | ### [Analyzing Bayesmark Results for Each ML Model](./notebooks/Analyzing%20Bayesmark%20Results%20for%20Each%20ML%20Model.ipynb) 66 | 67 | ![plot-cv-generalization](./img/plot-cv-generalization.png) 68 | 69 | This notebook demonstrates how to visualize benchmark results of Bayesmark. 70 | 71 | 72 | ## Team Members 73 | 74 | * Masashi Shibata (GitHub: [@c-bata](https://github.com/c-bata)) 75 | * Toshihiko Yanase (GitHub: [@toshihikoyanase](https://github.com/toshihikoyanase)) 76 | * Hideaki Imamura (GitHub: [@HideakiImamura](https://github.com/HideakiImamura)) 77 | * Masahiro Nomura (GitHub: [@nmasahiro](https://github.com/nmasahiro)) 78 | * Takeru Ohta (GitHub: [@sile](https://github.com/sile)) 79 | * Shotaro Sano (GitHub: [@g-votte](https://github.com/g-votte/)) 80 | * Hiroyuki Vincent Yamazaki (GitHub: [@hvy](https://github.com/hvy/)) 81 | 82 | 83 | ## LICENSE 84 | 85 | Our implementation is released under [Apache License 2.0](./LICENSE) license except for the code derived from TuRBO. 86 | -------------------------------------------------------------------------------- /environment.txt: -------------------------------------------------------------------------------- 1 | # Pinned requirements used in the docker image (0.88) that executes submissions (Python 3.6.10) 2 | absl-py==0.9.0 3 | astunparse==1.6.3 4 | attrs==19.3.0 5 | bayesian-optimization==0.6.0 6 | bayesmark==0.0.6 7 | cachetools==4.1.0 8 | certifi==2020.4.5.1 9 | chardet==3.0.4 10 | cma==3.0.3 11 | coverage==5.1 12 | cycler==0.10.0 13 | decorator==4.4.2 14 | dill==0.3.1.1 15 | fn==0.4.3 16 | future==0.18.2 17 | gast==0.3.3 18 | genty==1.3.2 19 | gitdb==4.0.5 20 | GitPython==3.1.3 21 | google-auth==1.14.3 22 | google-auth-oauthlib==0.4.1 23 | google-pasta==0.2.0 24 | gpytorch==1.1.1 25 | grpcio==1.29.0 26 | h5py==2.10.0 27 | hyperopt==0.1.1 28 | idna==2.9 29 | importlib-metadata==1.6.0 30 | joblib==0.14.1 31 | Keras==2.3.1 32 | Keras-Applications==1.0.8 33 | Keras-Preprocessing==1.1.2 34 | kiwisolver==1.2.0 35 | lightgbm==2.3.1 36 | Markdown==3.2.2 37 | matplotlib==3.2.1 38 | more-itertools==8.2.0 39 | mypy==0.770 40 | mypy-extensions==0.4.3 41 | networkx==2.4 42 | nevergrad==0.1.4 43 | nose==1.3.7 44 | nose-timer==1.0.0 45 | numpy==1.18.5 46 | oauthlib==3.1.0 47 | opentuner==0.8.2 48 | opt-einsum==3.2.1 49 | packaging==20.3 50 | pandas==1.0.5 51 | pathvalidate==2.3.0 52 | pluggy==0.13.1 53 | POAP==0.1.26 54 | protobuf==3.12.0 55 | py==1.8.1 56 | pyasn1==0.4.8 57 | pyasn1-modules==0.2.8 58 | pyDOE2==1.3.0 59 | pymongo==3.10.1 60 | pyparsing==2.4.7 61 | pySOT==0.2.2 62 | pytest==5.4.2 63 | python-dateutil==2.8.1 64 | pytz==2020.1 65 | PyYAML==5.3.1 66 | requests==2.23.0 67 | requests-oauthlib==1.3.0 68 | rsa==4.0 69 | setuptools>=41.2 70 | scikit-learn==0.20.2 71 | scikit-optimize==0.5.2 72 | scipy==1.4.1 73 | six==1.14.0 74 | smmap==3.0.4 75 | SQLAlchemy==1.3.16 76 | tensorboard==2.2.1 77 | tensorboard-plugin-wit==1.6.0.post3 78 | tensorflow==2.2.0 79 | tensorflow-estimator==2.2.0 80 | tensorflow-hub==0.8.0 81 | termcolor==1.1.0 82 | threadpoolctl==2.1.0 83 | torch==1.5.0 84 | typed-ast==1.4.1 85 | typing-extensions==3.7.4.2 86 | urllib3==1.25.9 87 | wcwidth==0.1.9 88 | Werkzeug==1.0.1 89 | wrapt==1.12.1 90 | xarray==0.15.1 91 | xgboost==1.1.0 92 | xlrd==1.2.0 93 | xlwt==1.3.0 94 | zipp==3.1.0 95 | -------------------------------------------------------------------------------- /img/leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/optuna/bboc-optuna-developers/e1ceb9da39a1f1819c267c83bf2742e1e0ac6480/img/leaderboard.png -------------------------------------------------------------------------------- /img/plot-cv-generalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/optuna/bboc-optuna-developers/e1ceb9da39a1f1819c267c83bf2742e1e0ac6480/img/plot-cv-generalization.png -------------------------------------------------------------------------------- /prepare_upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | set -o pipefail 5 | 6 | # Input args 7 | CODE_DIR=$1 8 | 9 | # Setup vars 10 | NAME=upload_$(basename $CODE_DIR) 11 | # Eliminate final slash 12 | CODE_DIR=$(dirname $CODE_DIR)/$(basename $CODE_DIR) 13 | 14 | # Copy in provided files 15 | cp -r -n $CODE_DIR ./$NAME 16 | 17 | # Make a blank req file if none provided 18 | REQ_FILE=./$NAME/requirements.txt 19 | touch $REQ_FILE 20 | 21 | # Download all the wheels/tar balls with our docker as the target 22 | pip download -r $REQ_FILE -d ./$NAME --python-version 37 --implementation cp --platform manylinux1_x86_64 --abi cp37m --no-deps 23 | 24 | # Test zip does not exist yet to avoid clobber 25 | ! test -f $NAME.zip 26 | 27 | # Build the zip with correct directory structure 28 | (cd $NAME && zip -r ../$NAME.zip ./*) 29 | 30 | # Display final output for user at end 31 | set +x 32 | 33 | echo "----------------------------------------------------------------" 34 | echo "Built achive for upload" 35 | unzip -l ./$NAME.zip 36 | 37 | echo "For scoring, upload $NAME.zip at address:" 38 | echo "https://bbochallenge.com/my-submissions" 39 | -------------------------------------------------------------------------------- /run_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import itertools 4 | import os 5 | import shutil 6 | import subprocess 7 | import json 8 | import requests 9 | 10 | from concurrent.futures import ProcessPoolExecutor, as_completed 11 | 12 | 13 | FULL_MODELS = [ 14 | "MLP-adam", 15 | "MLP-sgd", 16 | "lasso", 17 | "DT", 18 | "RF", 19 | "SVM", 20 | "ada", 21 | "kNN", 22 | "linear", 23 | ] 24 | FULL_DATA = ["breast", "digits", "iris", "wine", "boston", "diabetes"] 25 | 26 | 27 | class BenchmarkLauncher(object): 28 | def __init__( 29 | self, db_root, db_id, n_step, n_repeat, n_batch, opt, opt_root, verbose=False 30 | ): 31 | self.db_root = db_root 32 | self.db_id = db_id 33 | self.n_step = n_step 34 | self.n_repeat = n_repeat 35 | self.n_batch = n_batch 36 | self.opt = opt 37 | self.opt_root = opt_root 38 | self.verbose = verbose 39 | 40 | def __call__(self, models, data): 41 | cmd = [ 42 | "bayesmark-launch", 43 | "-dir", 44 | self.db_root, 45 | "-b", 46 | self.db_id, 47 | "-n", 48 | str(self.n_step), 49 | "-r", 50 | str(self.n_repeat), 51 | "-p", 52 | str(self.n_batch), 53 | "-o", 54 | self.opt, 55 | "--opt-root", 56 | self.opt_root, 57 | ] 58 | if self.verbose: 59 | cmd += ["-v"] 60 | cmd += ["-c"] + models.split() 61 | cmd += ["-d"] + data.split() 62 | subprocess.run(cmd, check=True) 63 | 64 | 65 | def notify_slack(msg, url, channel): 66 | if url == "": 67 | url = os.getenv("WEBHOOK_URL", None) 68 | if channel == "": 69 | channel = os.getenv("WEBHOOK_SLACK_CHANNEL", None) 70 | 71 | if url is None or channel is None: 72 | print(msg) 73 | return 74 | 75 | requests.post( 76 | url, 77 | data=json.dumps( 78 | { 79 | "channel": channel, 80 | "text": msg, 81 | "username": "BBO Challenge Bayesmark Report", 82 | "link_names": 1, 83 | } 84 | ), 85 | ) 86 | 87 | 88 | def main(): 89 | parser = argparse.ArgumentParser(description="Run a benchmark of BBO Challenege.") 90 | parser.add_argument( 91 | "--task", 92 | type=str, 93 | required=True, 94 | choices=["small", "large", "custom"], 95 | help="Size of the benchmark task.", 96 | ) 97 | parser.add_argument( 98 | "--optimizer", type=str, required=True, help="Path of the optimizer." 99 | ) 100 | parser.add_argument("--repeat", type=int, default=10, help="Number of repeat.") 101 | parser.add_argument( 102 | "--parallelism", 103 | type=int, 104 | default=1, 105 | help="Number of jobs that parallelize the benchmark.", 106 | ) 107 | parser.add_argument( 108 | "--out", type=str, default="./out", help="Path of the output directory." 109 | ) 110 | parser.add_argument( 111 | "--custom-models", 112 | type=str, 113 | default=[], 114 | nargs="+", 115 | help="Models to be specified with the custom task type.", 116 | ) 117 | parser.add_argument( 118 | "--custom-data", 119 | type=str, 120 | default=[], 121 | nargs="+", 122 | help="Data to be specified with the custom task type.", 123 | ) 124 | parser.add_argument("--slack-url", type=str, default="", help="Slack Webhook URL") 125 | parser.add_argument("--slack-channel", type=str, default="", help="Slack channel") 126 | parser.add_argument("--job-id", type=str, default="unknown", help="Job ID") 127 | parser.add_argument("--gcs-path", type=str, default="", help="Path of GCS") 128 | 129 | args = parser.parse_args() 130 | 131 | n_step = 16 132 | n_batch = 8 133 | 134 | task = args.task 135 | n_repeat = args.repeat 136 | 137 | slack_url = args.slack_url 138 | slack_channel = args.slack_channel 139 | gcs_path = args.gcs_path 140 | job_id = args.job_id 141 | 142 | code_dir = os.path.normpath(args.optimizer) 143 | opt = os.path.split(code_dir)[1] 144 | opt_root = os.path.split(code_dir)[0] 145 | 146 | db_root = args.out 147 | db_id = "run_{}_{}_{}".format( 148 | task, opt, datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 149 | ) 150 | 151 | try: 152 | subprocess.run(["which", "bayesmark-init"], check=True) 153 | except subprocess.CalledProcessError: 154 | raise ValueError( 155 | "Bayesmark has not been installed. Please try: pip install bayesmark." 156 | ) 157 | 158 | os.makedirs(db_root, exist_ok=True) 159 | if os.path.exists(os.path.join(db_root, db_id)): 160 | raise ValueError("The DBID {} alrady exists.".format(db_id)) 161 | 162 | subprocess.run(["bayesmark-init", "-dir", db_root, "-b", db_id]) 163 | 164 | name = "baseline-{}-{}.json".format(n_step, n_batch) 165 | src = os.path.join(os.path.dirname(__file__), "input", name) 166 | dist = os.path.join(db_root, db_id, "derived", "baseline.json") 167 | shutil.copy(src, dist) 168 | 169 | if task == "small": 170 | models = ["DT", "SVM"] 171 | data = ["boston", "wine"] 172 | elif task == "large": 173 | models = FULL_MODELS 174 | data = FULL_DATA 175 | elif task == "custom": 176 | models = args.custom_models 177 | data = args.custom_data 178 | 179 | for m in models: 180 | if m not in FULL_MODELS: 181 | raise ValueError( 182 | "Unknown mdoel is specified in `--custom-models`: {}".format(m) 183 | ) 184 | for d in data: 185 | if d not in FULL_DATA: 186 | raise ValueError( 187 | "Unknown data is specified in `--custom-data`: {}".format(d) 188 | ) 189 | 190 | if len(models) == 0 and len(data) == 0: 191 | raise ValueError( 192 | "Please specify `--custom-models` or `--custom-data` when using the custom task type." 193 | ) 194 | if len(models) == 0: 195 | models = FULL_MODELS 196 | if len(data) == 0: 197 | data = FULL_DATA 198 | 199 | else: 200 | raise ValueError() 201 | 202 | launcher = BenchmarkLauncher( 203 | db_root=db_root, 204 | db_id=db_id, 205 | n_step=n_step, 206 | n_repeat=n_repeat, 207 | n_batch=n_batch, 208 | opt=opt, 209 | opt_root=opt_root, 210 | ) 211 | 212 | pool = ProcessPoolExecutor(args.parallelism) 213 | futures = [] 214 | for arg in itertools.product(models, data): 215 | future = pool.submit(launcher, *arg) 216 | futures.append(future) 217 | 218 | failure_count = 0 219 | for future in as_completed(futures): 220 | try: 221 | future.result() 222 | except Exception as e: 223 | failure_count += 1 224 | print(e) 225 | 226 | cmd = ["bayesmark-agg", "-dir", db_root, "-b", db_id] 227 | subprocess.run(cmd, check=True) 228 | 229 | cmd = ["bayesmark-anal", "-dir", db_root, "-b", db_id, "-v"] 230 | anal_output = subprocess.run(cmd, stderr=subprocess.PIPE, check=True) 231 | anal_text_stderr = anal_output.stderr.decode("utf-8") 232 | print(anal_text_stderr) 233 | print("\nFailure count: {}".format(failure_count)) 234 | 235 | anal_summary = anal_text_stderr.split('----------\n')[1] 236 | output_pah = os.path.abspath(os.path.join(db_root, db_id)) 237 | print("\nOutput path: {}".format(output_pah)) 238 | 239 | notify_slack( 240 | f"Job finished: {job_id}\n" 241 | f"Output Path: {output_pah}\n" 242 | f"bayesmark-anal: {anal_summary}\n" 243 | f"failure_count: {failure_count}\n", 244 | slack_url, slack_channel 245 | ) 246 | 247 | if gcs_path: 248 | db_folders = os.listdir(db_root) 249 | print("DB Folders: ", " ".join(db_folders)) 250 | 251 | assert len(db_folders) == 1 252 | cmd = [ 253 | "gsutil", 254 | "-m", 255 | "cp", 256 | "-r", 257 | os.path.join(db_root, db_folders[0]), 258 | os.path.join(gcs_path, db_folders[0]), 259 | ] 260 | subprocess.run( 261 | cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, 262 | check=True, timeout=10*60, 263 | ) 264 | 265 | 266 | if __name__ == "__main__": 267 | main() 268 | -------------------------------------------------------------------------------- /run_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | set -o pipefail 5 | 6 | # Default number of steps and batch size for the challenge 7 | N_STEP=16 8 | N_BATCH=8 9 | 10 | # Input args 11 | CODE_DIR=$1 12 | N_REPEAT=$2 13 | 14 | # Where output goes 15 | DB_ROOT=./output 16 | DBID=run_$(date +"%Y%m%d_%H%M%S") 17 | 18 | # Setup vars 19 | OPT=$(basename $CODE_DIR) 20 | OPT_ROOT=$(dirname $CODE_DIR) 21 | 22 | # Check that bayesmark is installed in this environment 23 | which bayesmark-init 24 | which bayesmark-launch 25 | which bayesmark-exp 26 | which bayesmark-agg 27 | which bayesmark-anal 28 | 29 | # Ensure output folder exists 30 | mkdir -p $DB_ROOT 31 | 32 | # Copy the baseline file in, we can skip this but we must include RandomSearch in the -o list 33 | ! test -d $DB_ROOT/$DBID/ # Check the folder does not yet exist 34 | bayesmark-init -dir $DB_ROOT -b $DBID 35 | 36 | # By default, runs on all models (-c), data (-d), metrics (-m) 37 | bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o RandomSearch $OPT --opt-root $OPT_ROOT -v -c SVM DT -d boston wine 38 | # To run on all problems use instead (slower): 39 | # bayesmark-launch -dir $DB_ROOT -b $DBID -n $N_STEP -r $N_REPEAT -p $N_BATCH -o $OPT --opt-root $OPT_ROOT -v 40 | 41 | # Now aggregate the results 42 | bayesmark-agg -dir $DB_ROOT -b $DBID 43 | # And analyze the scores 44 | bayesmark-anal -dir $DB_ROOT -b $DBID -v 45 | -------------------------------------------------------------------------------- /submissions/mksturbo/optimizer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from copy import deepcopy 3 | import itertools 4 | 5 | import numpy as np 6 | import scipy.stats as ss 7 | import torch 8 | from turbo import Turbo1 9 | from turbo.utils import from_unit_cube, to_unit_cube 10 | 11 | import bayesmark 12 | from bayesmark.abstract_optimizer import AbstractOptimizer 13 | from bayesmark.experiment import experiment_main 14 | from bayesmark.space import JointSpace 15 | 16 | # Local file. 17 | import turbolib 18 | 19 | 20 | def order_stats(X): 21 | _, idx, cnt = np.unique(X, return_inverse=True, return_counts=True) 22 | obs = np.cumsum(cnt) # Need to do it this way due to ties 23 | o_stats = obs[idx] 24 | return o_stats 25 | 26 | 27 | def copula_standardize(X): 28 | X = np.nan_to_num(np.asarray(X)) # Replace inf by something large 29 | assert X.ndim == 1 and np.all(np.isfinite(X)) 30 | o_stats = order_stats(X) 31 | quantile = np.true_divide(o_stats, len(X) + 1) 32 | X_ss = ss.norm.ppf(quantile) 33 | return X_ss 34 | 35 | 36 | def sobol_sequence(n_pts, sobol_engine): 37 | return sobol_engine.draw(n_pts).to(dtype=torch.float64, device=torch.device("cpu")).cpu().detach().numpy() 38 | 39 | 40 | class TurboOptimizer(AbstractOptimizer): 41 | primary_import = "Turbo" 42 | 43 | def __init__(self, api_config, **kwargs): 44 | """Build wrapper class to use an optimizer in benchmark. 45 | 46 | Parameters 47 | ---------- 48 | api_config : dict-like of dict-like 49 | Configuration of the optimization variables. See API description. 50 | """ 51 | AbstractOptimizer.__init__(self, api_config) 52 | 53 | self.space_x = JointSpace(api_config) 54 | self.bounds = self.space_x.get_bounds() 55 | self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] 56 | self.dim = len(self.bounds) 57 | self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int 58 | self.batch_size = None 59 | self.history = [] 60 | 61 | self.turbo = Turbo1( 62 | f=None, 63 | lb=self.bounds[:, 0], 64 | ub=self.bounds[:, 1], 65 | n_init=2 * self.dim + 1, 66 | max_evals=self.max_evals, 67 | batch_size=1, # We need to update this later 68 | verbose=False, 69 | ) 70 | 71 | # todo: add some option or config to switch grid search 72 | self.enable_grid_search = True 73 | self._grid_suggestion = False 74 | if self.enable_grid_search: 75 | self.init_grid() 76 | 77 | self.sobol_engine = torch.quasirandom.SobolEngine(self.turbo.dim, scramble=True, seed=None) 78 | self.create_mask(api_config) 79 | self._suggest_counter = 0 80 | 81 | # Configuration to search using optuna 82 | self._select_cand_kernelwise = False 83 | # self._use_rbf_kernel = True 84 | # self._use_matern_1_2 = True 85 | # self._use_matern_3_2 = True 86 | # self._use_matern_5_2 = True 87 | # self._use_length_mask = True 88 | 89 | def create_mask(self, api_config): 90 | self._length_mask = [] 91 | for _, conf in api_config.items(): 92 | param_type = conf["type"] 93 | param_space = conf.get("space", None) 94 | if param_type in {"bool", "cat"}: 95 | self._length_mask.append(self.turbo.length_max) 96 | elif param_type in {"real", "int"} and param_space == "logit": 97 | self._length_mask.append(self.turbo.length_max) 98 | else: 99 | self._length_mask.append(self.turbo.length_min) 100 | self._length_mask = np.array(self._length_mask) 101 | 102 | def init_grid(self): 103 | use_grid = True 104 | self.grid_keys = None 105 | self.grids = None 106 | self.grid_id = 0 107 | param_value = collections.OrderedDict() 108 | for param in self.space_x.param_list: 109 | space = self.space_x.spaces[param] 110 | print(space) 111 | if isinstance(space, bayesmark.space.Integer): 112 | param_value[param] = list(range(space.lower, space.upper + 1)) 113 | elif isinstance(space, bayesmark.space.Categorical): 114 | param_value[param] = list(space.values) 115 | elif isinstance(space, bayesmark.space.Boolean): 116 | param_value[param] = [True, False] 117 | else: 118 | use_grid = False 119 | break 120 | 121 | if use_grid: 122 | n_grids = 1 123 | for v in param_value.values(): 124 | n_grids *= len(v) 125 | 126 | if n_grids <= 8 * 16: 127 | self.grid_keys = list(param_value.keys()) 128 | self.grids = list(itertools.product(*param_value.values())) 129 | 130 | def get_grid_suggestions(self, n_suggestions): 131 | self._grid_suggestion = True 132 | suggestions = [] 133 | for _ in range(n_suggestions): 134 | if self.grid_id >= len(self.grids): 135 | _n_suggestions = n_suggestions- len(suggestions) 136 | suggestions += bayesmark.random_search.suggest_dict([], [], self.api_config, n_suggestions=_n_suggestions) 137 | return suggestions 138 | 139 | suggestion = {} 140 | grid = self.grids[self.grid_id] 141 | for i, k in enumerate(self.grid_keys): 142 | suggestion[k] = grid[i] 143 | self.grid_id += 1 144 | suggestions.append(suggestion) 145 | return suggestions 146 | 147 | def restart(self): 148 | self.turbo._restart() 149 | self.turbo._X = np.zeros((0, self.turbo.dim)) 150 | self.turbo._fX = np.zeros((0, 1)) 151 | X_init = sobol_sequence(self.turbo.n_init, self.sobol_engine) 152 | self.X_init = from_unit_cube(X_init, self.lb, self.ub) 153 | self._suggest_counter = 0 154 | 155 | def suggest(self, n_suggestions=1): 156 | try: 157 | v = self._suggest(n_suggestions) 158 | except Exception as e: 159 | import sys 160 | import traceback 161 | print("Exception:", e) 162 | print("Stacktrace:") 163 | stacktrace = '\n'.join(traceback.format_tb(e.__traceback__)) 164 | print(stacktrace) 165 | sys.exit(1) 166 | return v 167 | 168 | def _suggest(self, n_suggestions=1): 169 | if self.batch_size is None: # Remember the batch size on the first call to suggest 170 | self.batch_size = n_suggestions 171 | self.turbo.batch_size = n_suggestions 172 | self.turbo.failtol = np.ceil(np.max([4.0 / self.batch_size, self.dim / self.batch_size])) 173 | self.turbo.n_init = max([self.turbo.n_init, self.batch_size]) 174 | self.restart() 175 | 176 | if self.grid_keys is not None and self.enable_grid_search: 177 | return self.get_grid_suggestions(n_suggestions) 178 | 179 | X_next = np.zeros((n_suggestions, self.dim)) 180 | 181 | # Pick from the initial points 182 | n_init = min(len(self.X_init), n_suggestions) 183 | if n_init > 0: 184 | X_next[:n_init] = deepcopy(self.X_init[:n_init, :]) 185 | self.X_init = self.X_init[n_init:, :] # Remove these pending points 186 | 187 | # Get remaining points from TuRBO 188 | n_adapt = n_suggestions - n_init 189 | nus = [2.5, 2.5, 1.5, 0.5] 190 | 191 | if n_adapt > 0: 192 | if len(self.turbo._X) > 0: # Use random points if we can't fit a GP 193 | X = to_unit_cube(deepcopy(self.turbo._X), self.lb, self.ub) 194 | fX = copula_standardize(deepcopy(self.turbo._fX).ravel()) # Use Copula 195 | 196 | if self._suggest_counter < 10: 197 | _length = np.array([self.turbo.length] * self.turbo.dim) 198 | _length = np.maximum(_length, self._length_mask) 199 | else: 200 | _length = self.turbo.length 201 | 202 | if self._select_cand_kernelwise: 203 | _X_next = np.zeros((0, self.turbo.dim)) 204 | for i, nu in enumerate(nus): 205 | X_cand, y_cand, _ = turbolib._create_candidates( 206 | self, X, fX, length=_length, n_training_steps=100, hypers={}, 207 | is_rbf= i == 0, nu=nu 208 | ) 209 | _X_next = np.vstack((_X_next, self.turbo._select_candidates(X_cand, y_cand)[:2, :])) 210 | 211 | X_next[-n_adapt:, :] = _X_next[:n_adapt, :] 212 | X_next[-n_adapt:, :] = from_unit_cube(X_next[-n_adapt:, :], self.lb, self.ub) 213 | else: 214 | X_cand = np.zeros((len(nus), self.turbo.n_cand, self.turbo.dim)) 215 | y_cand = np.inf * np.ones((len(nus), self.turbo.n_cand, self.turbo.batch_size)) 216 | for i, nu in enumerate(nus): 217 | X_cand[i, :, :], y_cand[i, :, :], _ = turbolib._create_candidates( 218 | self, X, fX, length=_length, n_training_steps=100, hypers={}, 219 | is_rbf= i == 0, nu=nu 220 | ) 221 | _X_next = turbolib._select_candidates(self, X_cand, y_cand) 222 | 223 | X_next[-n_adapt:, :] = _X_next[:n_adapt, :] 224 | X_next[-n_adapt:, :] = from_unit_cube(X_next[-n_adapt:, :], self.lb, self.ub) 225 | 226 | # Unwarp the suggestions 227 | suggestions = self.space_x.unwarp(X_next) 228 | self._suggest_counter += 1 229 | return suggestions 230 | 231 | 232 | def observe(self, X, y): 233 | """Send an observation of a suggestion back to the optimizer. 234 | 235 | Parameters 236 | ---------- 237 | X : list of dict-like 238 | Places where the objective function has already been evaluated. 239 | Each suggestion is a dictionary where each key corresponds to a 240 | parameter being optimized. 241 | y : array-like, shape (n,) 242 | Corresponding values where objective has been evaluated 243 | """ 244 | if self._grid_suggestion: 245 | return 246 | 247 | assert len(X) == len(y) 248 | XX, yy = self.space_x.warp(X), np.array(y)[:, None] 249 | 250 | if len(self.turbo._fX) >= self.turbo.n_init: 251 | self.turbo._adjust_length(yy) 252 | 253 | self.turbo.n_evals += self.batch_size 254 | 255 | self.turbo._X = np.vstack((self.turbo._X, deepcopy(XX))) 256 | self.turbo._fX = np.vstack((self.turbo._fX, deepcopy(yy))) 257 | self.turbo.X = np.vstack((self.turbo.X, deepcopy(XX))) 258 | self.turbo.fX = np.vstack((self.turbo.fX, deepcopy(yy))) 259 | 260 | # Check for a restart 261 | if self.turbo.length < self.turbo.length_min: 262 | self.restart() 263 | 264 | # Restart if all observation is the same. 265 | for y1, y2 in zip(yy.tolist(), yy.tolist()[1:]): 266 | if y1 != y2: 267 | return 268 | self.restart() 269 | 270 | 271 | if __name__ == "__main__": 272 | experiment_main(TurboOptimizer) 273 | -------------------------------------------------------------------------------- /submissions/mksturbo/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/uber-research/TuRBO.git@master 2 | -------------------------------------------------------------------------------- /submissions/mksturbo/turbolib.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | from copy import deepcopy 12 | import math 13 | 14 | import numpy as np 15 | import torch 16 | import gpytorch 17 | from gpytorch.constraints.constraints import Interval 18 | from gpytorch.distributions import MultivariateNormal 19 | from gpytorch.kernels import MaternKernel, RBFKernel, ScaleKernel 20 | from gpytorch.likelihoods import GaussianLikelihood 21 | from gpytorch.means import ConstantMean 22 | from gpytorch.mlls import ExactMarginalLogLikelihood 23 | from gpytorch.models import ExactGP 24 | 25 | 26 | class GP(ExactGP): 27 | def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims, nu=2.5): 28 | super(GP, self).__init__(train_x, train_y, likelihood) 29 | self.ard_dims = ard_dims 30 | self.mean_module = ConstantMean() 31 | base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=nu) 32 | self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint) 33 | 34 | def forward(self, x): 35 | mean_x = self.mean_module(x) 36 | covar_x = self.covar_module(x) 37 | return MultivariateNormal(mean_x, covar_x) 38 | 39 | 40 | class RBFGP(ExactGP): 41 | def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims): 42 | super(RBFGP, self).__init__(train_x, train_y, likelihood) 43 | self.ard_dims = ard_dims 44 | self.mean_module = ConstantMean() 45 | base_kernel = RBFKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims) 46 | self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint) 47 | 48 | def forward(self, x): 49 | mean_x = self.mean_module(x) 50 | covar_x = self.covar_module(x) 51 | return MultivariateNormal(mean_x, covar_x) 52 | 53 | 54 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}, is_rbf=False, nu=2.5): 55 | assert train_x.ndim == 2 56 | assert train_y.ndim == 1 57 | assert train_x.shape[0] == train_y.shape[0] 58 | 59 | # Create hyper parameter bounds 60 | noise_constraint = Interval(5e-4, 0.2) 61 | if use_ard: 62 | lengthscale_constraint = Interval(0.005, 2.0) 63 | else: 64 | lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1])) # [0.005, sqrt(dim)] 65 | outputscale_constraint = Interval(0.05, 20.0) 66 | 67 | # Create models 68 | likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype) 69 | ard_dims = train_x.shape[1] if use_ard else None 70 | if is_rbf: 71 | model = RBFGP( 72 | train_x=train_x, 73 | train_y=train_y, 74 | likelihood=likelihood, 75 | lengthscale_constraint=lengthscale_constraint, 76 | outputscale_constraint=outputscale_constraint, 77 | ard_dims=ard_dims, 78 | ).to(device=train_x.device, dtype=train_x.dtype) 79 | else: 80 | model = GP( 81 | train_x=train_x, 82 | train_y=train_y, 83 | likelihood=likelihood, 84 | lengthscale_constraint=lengthscale_constraint, 85 | outputscale_constraint=outputscale_constraint, 86 | ard_dims=ard_dims, 87 | nu=nu, 88 | ).to(device=train_x.device, dtype=train_x.dtype) 89 | 90 | # Find optimal model hyperparameters 91 | model.train() 92 | likelihood.train() 93 | 94 | # "Loss" for GPs - the marginal log likelihood 95 | mll = ExactMarginalLogLikelihood(likelihood, model) 96 | 97 | # Initialize model hypers 98 | if hypers: 99 | model.load_state_dict(hypers) 100 | else: 101 | hypers = {} 102 | hypers["covar_module.outputscale"] = 1.0 103 | hypers["covar_module.base_kernel.lengthscale"] = 0.5 104 | hypers["likelihood.noise"] = 0.005 105 | model.initialize(**hypers) 106 | 107 | # Use the adam optimizer 108 | optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) 109 | 110 | for _ in range(num_steps): 111 | optimizer.zero_grad() 112 | output = model(train_x) 113 | loss = -mll(output, train_y) 114 | loss.backward() 115 | optimizer.step() 116 | 117 | # Switch to eval mode 118 | model.eval() 119 | likelihood.eval() 120 | 121 | return model 122 | 123 | 124 | def _select_candidates(optimizer, X_cand, y_cand): 125 | assert X_cand.shape == (4, optimizer.turbo.n_cand, optimizer.turbo.dim) 126 | assert y_cand.shape == (4, optimizer.turbo.n_cand, optimizer.turbo.batch_size) 127 | assert X_cand.min() >= 0.0 and X_cand.max() <= 1.0 and np.all(np.isfinite(y_cand)) 128 | 129 | X_next = np.zeros((optimizer.turbo.batch_size, optimizer.turbo.dim)) 130 | for k in range(optimizer.turbo.batch_size): 131 | i, j = np.unravel_index(np.argmin(y_cand[:, :, k]), (4, optimizer.turbo.n_cand)) 132 | assert y_cand[:, :, k].min() == y_cand[i, j, k] 133 | X_next[k, :] = deepcopy(X_cand[i, j, :]) 134 | assert np.isfinite(y_cand[i, j, k]) # Just to make sure we never select nan or inf 135 | 136 | # Make sure we never pick this point again 137 | y_cand[i, j, :] = np.inf 138 | 139 | return X_next 140 | 141 | 142 | def _create_candidates(optimizer, X, fX, length, n_training_steps, hypers, is_rbf=False, nu=2.5): 143 | # Pick the center as the point with the smallest function values 144 | # NOTE: This may not be robust to noise, in which case the posterior mean of the GP can be used instead 145 | assert X.min() >= 0.0 and X.max() <= 1.0 146 | 147 | # Standardize function values. 148 | mu, sigma = np.median(fX), fX.std() 149 | sigma = 1.0 if sigma < 1e-6 else sigma 150 | fX = (deepcopy(fX) - mu) / sigma 151 | 152 | # Figure out what device we are running on 153 | if len(X) < optimizer.turbo.min_cuda: 154 | device, dtype = torch.device("cpu"), torch.float64 155 | else: 156 | device, dtype = optimizer.turbo.device, optimizer.turbo.dtype 157 | 158 | # We use CG + Lanczos for training if we have enough data 159 | with gpytorch.settings.max_cholesky_size(optimizer.turbo.max_cholesky_size): 160 | X_torch = torch.tensor(X).to(device=device, dtype=dtype) 161 | y_torch = torch.tensor(fX).to(device=device, dtype=dtype) 162 | gp = train_gp( 163 | train_x=X_torch, train_y=y_torch, use_ard=optimizer.turbo.use_ard, num_steps=n_training_steps, hypers=hypers, 164 | is_rbf=is_rbf, nu=nu, 165 | ) 166 | 167 | # Save state dict 168 | hypers = gp.state_dict() 169 | 170 | # Create the trust region boundaries 171 | x_center = X[fX.argmin().item(), :][None, :] 172 | weights = gp.covar_module.base_kernel.lengthscale.cpu().detach().numpy().ravel() 173 | weights = weights / weights.mean() # This will make the next line more stable 174 | weights = weights / np.prod(np.power(weights, 1.0 / len(weights))) # We now have weights.prod() = 1 175 | lb = np.clip(x_center - weights * length / 2.0, 0.0, 1.0) 176 | ub = np.clip(x_center + weights * length / 2.0, 0.0, 1.0) 177 | 178 | # Draw a Sobolev sequence in [lb, ub] 179 | seed = np.random.randint(int(1e6)) 180 | sobol = torch.quasirandom.SobolEngine(optimizer.turbo.dim, scramble=True, seed=seed) 181 | pert = sobol.draw(optimizer.turbo.n_cand).to(dtype=dtype, device=device).cpu().detach().numpy() 182 | pert = lb + (ub - lb) * pert 183 | 184 | # Create a perturbation mask 185 | prob_perturb = min(20.0 / optimizer.turbo.dim, 1.0) 186 | mask = np.random.rand(optimizer.turbo.n_cand, optimizer.turbo.dim) <= prob_perturb 187 | ind = np.where(np.sum(mask, axis=1) == 0)[0] 188 | mask[ind, np.random.randint(0, optimizer.turbo.dim - 1, size=len(ind))] = 1 189 | 190 | # Create candidate points 191 | X_cand = x_center.copy() * np.ones((optimizer.turbo.n_cand, optimizer.turbo.dim)) 192 | X_cand[mask] = pert[mask] 193 | 194 | # Figure out what device we are running on 195 | if len(X_cand) < optimizer.turbo.min_cuda: 196 | device, dtype = torch.device("cpu"), torch.float64 197 | else: 198 | device, dtype = optimizer.turbo.device, optimizer.turbo.dtype 199 | 200 | # We may have to move the GP to a new device 201 | gp = gp.to(dtype=dtype, device=device) 202 | 203 | # We use Lanczos for sampling if we have enough data 204 | with torch.no_grad(), gpytorch.settings.max_cholesky_size(optimizer.turbo.max_cholesky_size): 205 | X_cand_torch = torch.tensor(X_cand).to(device=device, dtype=dtype) 206 | y_cand = gp.likelihood(gp(X_cand_torch)).sample(torch.Size([optimizer.turbo.batch_size])).t().cpu().detach().numpy() 207 | 208 | # Remove the torch variables 209 | del X_torch, y_torch, X_cand_torch, gp 210 | 211 | # De-standardize the sampled values 212 | y_cand = mu + sigma * y_cand 213 | 214 | return X_cand, y_cand, hypers 215 | --------------------------------------------------------------------------------