├── test ├── __init__.py ├── resnet │ ├── __init__.py │ ├── benchmark_resnet.py │ ├── benchmark_resnet_runtest.py │ ├── benchmark_resnet_utils.py │ ├── resnet_obj.py │ └── resnet_dataset.py ├── awd_lstm_lm │ ├── __init__.py │ ├── .gitignore │ ├── locked_dropout.py │ ├── utils.py │ ├── embed_regularize.py │ ├── LICENSE │ ├── getdata.sh │ ├── data.py │ ├── benchmark_lstm.py │ ├── generate.py │ ├── weight_drop.py │ ├── model.py │ ├── benchmark_lstm_utils.py │ ├── pointer.py │ └── benchmark_lstm_runtest.py ├── autogluon_abohb │ ├── __init__.py │ ├── process_csv.py │ └── process_history.py ├── math_benchmarks │ ├── __init__.py │ └── math_utils.py ├── nas_benchmarks │ ├── __init__.py │ ├── nasbench201_utils.py │ └── benchmark_nasbench201.py ├── benchmark_show_runtest.py ├── benchmark_xgb_worker.py ├── benchmark_xgb.py ├── preprocess_data.py ├── benchmark_plot_test.py ├── benchmark_xgb_runtest.py ├── utils.py ├── benchmark_process_record.py └── benchmark_plot.py ├── tuner ├── surrogate │ ├── __init__.py │ ├── rf_ensemble.py │ ├── gp_ensemble.py │ └── mf_gp.py ├── acq_maximizer │ ├── __init__.py │ └── ei_optimization.py ├── mq_mf_worker_gpu.py ├── async_mq_mf_worker_gpu.py ├── __init__.py ├── async_mq_random.py ├── xgb_model.py ├── mq_random_search.py ├── async_mq_hb.py ├── async_mq_hb_v0.py ├── utils.py ├── async_mq_bo.py ├── mq_mf_worker.py ├── mq_bohb_v0.py ├── async_mq_mf_worker.py ├── async_mq_ea.py ├── async_mq_bohb.py ├── mq_bo.py ├── mq_sh.py ├── mq_hb.py └── async_mq_base_facade.py ├── requirements.txt └── .gitignore /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tuner/surrogate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/autogluon_abohb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/math_benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/nas_benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tuner/acq_maximizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/.gitignore: -------------------------------------------------------------------------------- 1 | *.pt 2 | __pycache__/ 3 | *.data -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==0.21.3 2 | ConfigSpace==0.4.18 3 | pyrfr==0.8.0 4 | openbox==0.7.9 5 | xgboost==1.3.1 6 | torch==1.7.1 7 | torchvision 8 | nas-bench-201 9 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # setup.py 2 | build 3 | dist 4 | *.egg* 5 | 6 | # Python 7 | *.pyc 8 | *.pyo 9 | 10 | # IDEs 11 | .idea 12 | 13 | # MACOSX 14 | .DS_Store 15 | 16 | 17 | # Other 18 | .tox 19 | .coverage 20 | .pypirc 21 | .ipynb_checkpoints/ 22 | *.ipynb 23 | *~ 24 | .html/ 25 | .mypy_cache/ 26 | 27 | #eclipse files 28 | .pydevproject 29 | .project 30 | 31 | logs/ 32 | data/ 33 | datasets/ 34 | smac3-output* 35 | 36 | ks/ 37 | 38 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/locked_dropout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | class LockedDropout(nn.Module): 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def forward(self, x, dropout=0.5): 10 | if not self.training or not dropout: 11 | return x 12 | m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout) 13 | mask = Variable(m, requires_grad=False) / (1 - dropout) 14 | mask = mask.expand_as(x) 15 | return mask * x 16 | -------------------------------------------------------------------------------- /tuner/mq_mf_worker_gpu.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from tuner.mq_mf_worker import mqmfWorker 3 | 4 | 5 | class mqmfWorker_gpu(mqmfWorker): 6 | """ 7 | message queue worker for multi-fidelity optimization 8 | gpu version: specify 'device' 9 | """ 10 | def __init__(self, objective_function, 11 | device, 12 | ip="127.0.0.1", port=13579, authkey=b'abc', 13 | sleep_time=0.1, 14 | no_time_limit=False, 15 | logger=None): 16 | objective_function = partial(objective_function, device=device) 17 | super().__init__( 18 | objective_function=objective_function, 19 | ip=ip, port=port, authkey=authkey, 20 | sleep_time=sleep_time, 21 | no_time_limit=no_time_limit, 22 | logger=logger 23 | ) 24 | self.logging('Worker device: %s' % device) 25 | -------------------------------------------------------------------------------- /tuner/async_mq_mf_worker_gpu.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from tuner.async_mq_mf_worker import async_mqmfWorker 3 | 4 | 5 | class async_mqmfWorker_gpu(async_mqmfWorker): 6 | """ 7 | async message queue worker for multi-fidelity optimization 8 | gpu version: specify 'device' 9 | """ 10 | def __init__(self, objective_function, 11 | device, 12 | ip="127.0.0.1", port=13579, authkey=b'abc', 13 | sleep_time=0.1, 14 | no_time_limit=False, 15 | logger=None): 16 | objective_function = partial(objective_function, device=device) 17 | super().__init__( 18 | objective_function=objective_function, 19 | ip=ip, port=port, authkey=authkey, 20 | sleep_time=sleep_time, 21 | no_time_limit=no_time_limit, 22 | logger=logger 23 | ) 24 | self.logging('Worker device: %s' % device) 25 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def repackage_hidden(h): 5 | """Wraps hidden states in new Tensors, 6 | to detach them from their history.""" 7 | if isinstance(h, torch.Tensor): 8 | return h.detach() 9 | else: 10 | return tuple(repackage_hidden(v) for v in h) 11 | 12 | 13 | def batchify(data, bsz, device): 14 | # Work out how cleanly we can divide the dataset into bsz parts. 15 | nbatch = data.size(0) // bsz 16 | # Trim off any extra elements that wouldn't cleanly fit (remainders). 17 | data = data.narrow(0, 0, nbatch * bsz) 18 | # Evenly divide the data across the bsz batches. 19 | data = data.view(bsz, -1).t().contiguous() 20 | data = data.to(device) 21 | return data 22 | 23 | 24 | def get_batch(source, i, bptt, seq_len=None, evaluation=False): 25 | seq_len = min(seq_len if seq_len else bptt, len(source) - 1 - i) 26 | data = source[i:i + seq_len] 27 | target = source[i + 1:i + 1 + seq_len].view(-1) 28 | return data, target 29 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/embed_regularize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | 5 | def embedded_dropout(embed, words, dropout=0.1, scale=None): 6 | if dropout: 7 | mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout) 8 | masked_embed_weight = mask * embed.weight 9 | else: 10 | masked_embed_weight = embed.weight 11 | if scale: 12 | masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight 13 | 14 | padding_idx = embed.padding_idx 15 | if padding_idx is None: 16 | padding_idx = -1 17 | 18 | X = torch.nn.functional.embedding(words, masked_embed_weight, 19 | padding_idx, embed.max_norm, embed.norm_type, 20 | embed.scale_grad_by_freq, embed.sparse 21 | ) 22 | return X 23 | 24 | if __name__ == '__main__': 25 | V = 50 26 | h = 4 27 | bptt = 10 28 | batch_size = 2 29 | 30 | embed = torch.nn.Embedding(V, h) 31 | 32 | words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt)) 33 | words = torch.LongTensor(words) 34 | 35 | origX = embed(words) 36 | X = embedded_dropout(embed, words) 37 | 38 | print(origX) 39 | print(X) 40 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/getdata.sh: -------------------------------------------------------------------------------- 1 | echo "=== Acquiring datasets ===" 2 | echo "---" 3 | mkdir -p save 4 | 5 | mkdir -p data 6 | cd data 7 | 8 | echo "- Downloading WikiText-2 (WT2)" 9 | wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip 10 | unzip -q wikitext-2-v1.zip 11 | cd wikitext-2 12 | mv wiki.train.tokens train.txt 13 | mv wiki.valid.tokens valid.txt 14 | mv wiki.test.tokens test.txt 15 | cd .. 16 | 17 | echo "- Downloading WikiText-103 (WT2)" 18 | wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip 19 | unzip -q wikitext-103-v1.zip 20 | cd wikitext-103 21 | mv wiki.train.tokens train.txt 22 | mv wiki.valid.tokens valid.txt 23 | mv wiki.test.tokens test.txt 24 | cd .. 25 | 26 | echo "- Downloading enwik8 (Character)" 27 | mkdir -p enwik8 28 | cd enwik8 29 | wget --continue http://mattmahoney.net/dc/enwik8.zip 30 | python prep_enwik8.py 31 | cd .. 32 | 33 | echo "- Downloading Penn Treebank (PTB)" 34 | wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz 35 | tar -xzf simple-examples.tgz 36 | 37 | mkdir -p penn 38 | cd penn 39 | mv ../simple-examples/data/ptb.train.txt train.txt 40 | mv ../simple-examples/data/ptb.test.txt test.txt 41 | mv ../simple-examples/data/ptb.valid.txt valid.txt 42 | cd .. 43 | 44 | echo "- Downloading Penn Treebank (Character)" 45 | mkdir -p pennchar 46 | cd pennchar 47 | mv ../simple-examples/data/ptb.char.train.txt train.txt 48 | mv ../simple-examples/data/ptb.char.test.txt test.txt 49 | mv ../simple-examples/data/ptb.char.valid.txt valid.txt 50 | cd .. 51 | 52 | rm -rf simple-examples/ 53 | 54 | echo "---" 55 | echo "Happy language modeling :)" 56 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from collections import Counter 5 | 6 | 7 | class Dictionary(object): 8 | def __init__(self): 9 | self.word2idx = {} 10 | self.idx2word = [] 11 | self.counter = Counter() 12 | self.total = 0 13 | 14 | def add_word(self, word): 15 | if word not in self.word2idx: 16 | self.idx2word.append(word) 17 | self.word2idx[word] = len(self.idx2word) - 1 18 | token_id = self.word2idx[word] 19 | self.counter[token_id] += 1 20 | self.total += 1 21 | return self.word2idx[word] 22 | 23 | def __len__(self): 24 | return len(self.idx2word) 25 | 26 | 27 | class Corpus(object): 28 | def __init__(self, path): 29 | self.dictionary = Dictionary() 30 | self.train = self.tokenize(os.path.join(path, 'train.txt')) 31 | self.valid = self.tokenize(os.path.join(path, 'valid.txt')) 32 | self.test = self.tokenize(os.path.join(path, 'test.txt')) 33 | 34 | def tokenize(self, path): 35 | """Tokenizes a text file.""" 36 | assert os.path.exists(path) 37 | # Add words to the dictionary 38 | with open(path, 'r') as f: 39 | tokens = 0 40 | for line in f: 41 | words = line.split() + [''] 42 | tokens += len(words) 43 | for word in words: 44 | self.dictionary.add_word(word) 45 | 46 | # Tokenize file content 47 | with open(path, 'r') as f: 48 | ids = torch.LongTensor(tokens) 49 | token = 0 50 | for line in f: 51 | words = line.split() + [''] 52 | for word in words: 53 | ids[token] = self.dictionary.word2idx[word] 54 | token += 1 55 | 56 | return ids 57 | -------------------------------------------------------------------------------- /test/benchmark_show_runtest.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/benchmark_show_runtest.py --model xgb --datasets covtype --mths hyperband-n8 5 | 6 | 7 | """ 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import numpy as np 13 | import pickle as pkl 14 | 15 | sys.path.insert(0, ".") 16 | sys.path.insert(1, "../open-box") # for dependency 17 | from utils import setup_exp 18 | 19 | default_mths = 'random-n8,hyperband-n8' 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--datasets', type=str) 23 | parser.add_argument('--mths', type=str, default=default_mths) 24 | parser.add_argument('--runtime_limit', type=int) # if you don't want to use default setup 25 | parser.add_argument('--model', type=str, default='xgb') 26 | parser.add_argument('--std_scale', type=float, default=0.5) 27 | 28 | args = parser.parse_args() 29 | test_datasets = args.datasets.split(',') 30 | mths = args.mths.split(',') 31 | model = args.model 32 | std_scale = args.std_scale 33 | 34 | print(test_datasets) 35 | if std_scale != 1: 36 | print('=== Caution: std_scale:', std_scale) 37 | 38 | 39 | for dataset in test_datasets: 40 | # setup 41 | _, runtime_limit, _ = setup_exp(dataset, 1, 1, 1) 42 | if args.runtime_limit is not None: 43 | runtime_limit = args.runtime_limit 44 | for mth in mths: 45 | perfs = [] 46 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 47 | for file in os.listdir(dir_path): 48 | if file.startswith('incumbent_test_perf_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 49 | with open(os.path.join(dir_path, file), 'rb') as f: 50 | perf = pkl.load(f) 51 | perfs.append(perf) 52 | m = np.mean(perfs).item() 53 | s = np.std(perfs).item() 54 | if dataset in ['cifar10-valid', 'cifar100', 'ImageNet16-120', 'penn']: 55 | print(dataset, mth, perfs, u'%.2f\u00B1%.2f' % (m, s)) 56 | else: 57 | print(dataset, mth, perfs, u'%.4f\u00B1%.4f' % (m, s)) 58 | -------------------------------------------------------------------------------- /tuner/__init__.py: -------------------------------------------------------------------------------- 1 | from tuner.mq_random_search import mqRandomSearch 2 | from tuner.mq_bo import mqBO 3 | from tuner.mq_sh import mqSuccessiveHalving 4 | from tuner.mq_hb import mqHyperband 5 | from tuner.mq_bohb_v0 import mqBOHB_v0 6 | from tuner.mq_bohb_v2 import mqBOHB_v2 7 | from tuner.mq_mfes import mqMFES 8 | from tuner.async_mq_random import async_mqRandomSearch 9 | from tuner.async_mq_bo import async_mqBO 10 | from tuner.async_mq_ea import async_mqEA 11 | from tuner.async_mq_sh import async_mqSuccessiveHalving 12 | from tuner.async_mq_sh_v0 import async_mqSuccessiveHalving_v0 13 | from tuner.async_mq_hb import async_mqHyperband 14 | from tuner.async_mq_hb_v0 import async_mqHyperband_v0 15 | from tuner.async_mq_bohb import async_mqBOHB 16 | from tuner.async_mq_mfes import async_mqMFES 17 | 18 | mth_dict = dict( 19 | # random=(mqRandomSearch, 'sync'), # sync random (not used) 20 | bo=(mqBO, 'sync'), # batch BO 21 | sh=(mqSuccessiveHalving, 'sync'), # Successive Halving 22 | hyperband=(mqHyperband, 'sync'), # Hyperband 23 | bohb=(mqBOHB_v0, 'sync'), # BOHB 24 | bohbv2=(mqBOHB_v2, 'sync'), # tpe 25 | mfeshb=(mqMFES, 'sync'), # MFES-HB 26 | arandom=(async_mqRandomSearch, 'async'), # A-Random 27 | abo=(async_mqBO, 'async'), # async batch BO, A-BO 28 | area=(async_mqEA, 'async', dict(strategy='oldest')), # Asynchronous Evolutionary Algorithm 29 | areav2=(async_mqEA, 'async', dict(strategy='worst')), # Asynchronous Evolutionary Algorithm 30 | asha=(async_mqSuccessiveHalving_v0, 'async'), # original asha 31 | asha_delayed=(async_mqSuccessiveHalving, 'async'), # delayed asha 32 | ahyperband=(async_mqHyperband_v0, 'async'), # A-Hyperband with original asha 33 | ahyperband_delayed=(async_mqHyperband, 'async'), # A-Hyperband with delayed asha 34 | abohb=(async_mqBOHB, 'async'), # A-BOHB*: our implementation version. prf 35 | 36 | # ours 37 | tuner=(async_mqMFES, 'async'), 38 | 39 | # exp version 40 | ahb_bs=(async_mqMFES, 'async', dict(test_random=True, 41 | test_original_asha=True, )), # A-Hyperband with bracket selection 42 | abohb_bs=(async_mqMFES, 'async', dict(test_bohb=True, 43 | acq_optimizer='random', )), # A-BOHB* with bracket selection 44 | tuner_exp1=(async_mqMFES, 'async', dict(use_weight_init=False, )), # test ours without bracket selection 45 | tuner_exp2=(async_mqMFES, 'async', dict(test_original_asha=True, )), # test original asha + ours 46 | ) 47 | -------------------------------------------------------------------------------- /tuner/async_mq_random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tuner.async_mq_base_facade import async_mqBaseFacade 3 | from tuner.utils import sample_configuration 4 | 5 | from openbox.utils.config_space import ConfigurationSpace 6 | 7 | 8 | class async_mqRandomSearch(async_mqBaseFacade): 9 | """ 10 | The implementation of Asynchronous Random Search 11 | """ 12 | def __init__(self, objective_func, 13 | config_space: ConfigurationSpace, 14 | R, 15 | random_state=1, 16 | method_id='mqAsyncRandomSearch', 17 | restart_needed=True, 18 | time_limit_per_trial=600, 19 | runtime_limit=None, 20 | ip='', 21 | port=13579, 22 | authkey=b'abc', 23 | **kwargs): 24 | max_queue_len = 1000 # conservative design 25 | super().__init__(objective_func, method_name=method_id, 26 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 27 | runtime_limit=runtime_limit, 28 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 29 | self.seed = random_state 30 | self.config_space = config_space 31 | self.config_space.seed(self.seed) 32 | self.R = R 33 | 34 | self.incumbent_configs = list() 35 | self.incumbent_perfs = list() 36 | 37 | self.all_configs = set() 38 | 39 | self.logger.info('Unused kwargs: %s' % kwargs) 40 | 41 | def get_job(self): 42 | """ 43 | sample a random config 44 | """ 45 | next_config = sample_configuration(self.config_space, excluded_configs=self.all_configs) 46 | next_n_iteration = self.R 47 | next_extra_conf = dict(initial_run=True) 48 | 49 | self.all_configs.add(next_config) 50 | 51 | return next_config, next_n_iteration, next_extra_conf 52 | 53 | def update_observation(self, config, perf, n_iteration): 54 | assert int(n_iteration) == self.R 55 | self.incumbent_configs.append(config) 56 | self.incumbent_perfs.append(perf) 57 | return 58 | 59 | def get_incumbent(self, num_inc=1): 60 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 61 | indices = np.argsort(self.incumbent_perfs) 62 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 63 | perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 64 | return configs, perfs 65 | -------------------------------------------------------------------------------- /test/math_benchmarks/math_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from functools import partial 3 | from test.math_benchmarks.so_benchmark_function import get_problem 4 | 5 | 6 | def get_math_obj_func_cs(total_resource, eta, problem_str, noise_alpha, rng, problem_kwargs): 7 | problem = get_problem(problem_str, **problem_kwargs) 8 | assert hasattr(problem, 'evaluate_config') 9 | if problem_str == 'hartmann': 10 | noise_scale = 0.38 11 | else: 12 | noise_scale = 1.0 13 | if problem_str.startswith('counting'): 14 | obj_func = partial(mf_objective_func_math, total_resource=total_resource, eta=eta, 15 | problem=problem, continue_training=False) 16 | else: 17 | obj_func = partial(mf_objective_func_math_noise, total_resource=total_resource, eta=eta, 18 | problem=problem, noise_scale=noise_scale, noise_alpha=noise_alpha, rng=rng) 19 | cs = problem.get_configspace() 20 | return obj_func, cs 21 | 22 | 23 | def mf_objective_func_math_noise( 24 | config, n_resource, extra_conf, 25 | total_resource, eta, problem, noise_scale, noise_alpha, rng: np.random.RandomState): 26 | print('objective extra conf:', extra_conf) 27 | 28 | # noise_level = np.log(total_resource / n_resource) / np.log(eta) 29 | noise_level = 1 / n_resource - 1 / total_resource 30 | # noise_level = (1 / (1 + np.e ** (-x)) - 0.5) * 2 31 | 32 | original_perf = problem.evaluate_config(config) 33 | noise = rng.normal(scale=noise_level * noise_scale * noise_alpha) 34 | perf = original_perf + noise 35 | print('config: %s, resource: %f/%f, noise_level: %f. perf=%f+%f=%f' 36 | % (config, n_resource, total_resource, noise_level, original_perf, noise, perf)) 37 | 38 | eval_time = 27 * n_resource / total_resource 39 | if not extra_conf['initial_run']: 40 | eval_time -= 27 * n_resource / eta / total_resource 41 | 42 | result = dict( 43 | objective_value=perf, # minimize 44 | elapsed_time=eval_time, 45 | ) 46 | return result 47 | 48 | 49 | def mf_objective_func_math(config, n_resource, extra_conf, total_resource, eta, problem, continue_training): 50 | print('objective extra conf:', extra_conf) 51 | 52 | fidelity = n_resource / total_resource 53 | perf = problem.evaluate_config(config, fidelity=fidelity) 54 | print('config: %s, resource: %f/%f, perf=%f' 55 | % (config, n_resource, total_resource, perf)) 56 | 57 | eval_time = 270 * n_resource / total_resource 58 | if continue_training and not extra_conf['initial_run']: 59 | eval_time -= 270 * n_resource / eta / total_resource 60 | 61 | result = dict( 62 | objective_value=perf, # minimize 63 | elapsed_time=eval_time, 64 | ) 65 | return result 66 | -------------------------------------------------------------------------------- /test/benchmark_xgb_worker.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/benchmark_xgb_worker.py --dataset covtype --R 27 --n_jobs 16 --parallel async --n_workers 8 --ip 127.0.0.1 --port 13579 5 | 6 | """ 7 | 8 | import os 9 | import time 10 | import sys 11 | import argparse 12 | import traceback 13 | import pickle as pkl 14 | import numpy as np 15 | from functools import partial 16 | from multiprocessing import Process 17 | 18 | sys.path.insert(0, ".") 19 | sys.path.insert(1, "../open-box") # for dependency 20 | from tuner.mq_mf_worker import mqmfWorker 21 | from tuner.async_mq_mf_worker import async_mqmfWorker 22 | from utils import load_data 23 | from benchmark_xgb_utils import mf_objective_func 24 | 25 | 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--dataset', type=str) 28 | parser.add_argument('--R', type=int, default=27) 29 | parser.add_argument('--eta', type=int, default=3) 30 | parser.add_argument('--n_jobs', type=int, default=15) 31 | 32 | parser.add_argument('--ip', type=str) 33 | parser.add_argument('--port', type=int) 34 | parser.add_argument('--n_workers', type=int) # must set 35 | parser.add_argument('--parallel', type=str, choices=['sync', 'async']) 36 | 37 | args = parser.parse_args() 38 | dataset = args.dataset 39 | R = args.R 40 | eta = args.eta 41 | n_jobs = args.n_jobs # changed according to dataset 42 | 43 | ip = args.ip 44 | port = args.port 45 | n_workers = args.n_workers 46 | parallel_strategy = args.parallel 47 | 48 | print(ip, port, n_jobs, n_workers, parallel_strategy) 49 | print(R, eta) 50 | for para in (ip, port, n_jobs, R, eta, n_workers, parallel_strategy): 51 | assert para is not None 52 | 53 | pre_sample = False 54 | run_test = True 55 | assert parallel_strategy in ['sync', 'async'] 56 | if pre_sample and eta is None: 57 | raise ValueError('eta must not be None if pre_sample=True') 58 | 59 | 60 | def worker_run(i): 61 | if parallel_strategy == 'sync': 62 | worker = mqmfWorker(objective_function, ip, port) 63 | elif parallel_strategy == 'async': 64 | worker = async_mqmfWorker(objective_function, ip, port) 65 | else: 66 | raise ValueError('Error parallel_strategy: %s.' % parallel_strategy) 67 | worker.run() 68 | print("Worker %d exit." % (i,)) 69 | 70 | 71 | x_train, x_val, x_test, y_train, y_val, y_test = load_data(dataset) 72 | 73 | if pre_sample: 74 | raise NotImplementedError 75 | else: 76 | objective_function = partial(mf_objective_func, total_resource=R, 77 | x_train=x_train, x_val=x_val, x_test=x_test, 78 | y_train=y_train, y_val=y_val, y_test=y_test, 79 | n_jobs=n_jobs, run_test=run_test) 80 | 81 | worker_pool = [] 82 | for i in range(n_workers): 83 | w = Process(target=worker_run, args=(i,)) 84 | worker_pool.append(w) 85 | w.start() 86 | -------------------------------------------------------------------------------- /test/resnet/benchmark_resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/resnet/benchmark_resnet.py --mth hyperband --R 27 --n_workers 4 --rep 1 --start_id 0 5 | 6 | """ 7 | 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import numpy as np 13 | import pickle as pkl 14 | 15 | sys.path.insert(0, ".") 16 | sys.path.insert(1, "../open-box") # for dependency 17 | from tuner import mth_dict 18 | from benchmark_resnet_utils import run_exp 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--mth', type=str, default='hyperband') 22 | parser.add_argument('--dataset', type=str, default='cifar10') 23 | parser.add_argument('--R', type=int, default=27) 24 | parser.add_argument('--eta', type=int, default=3) 25 | parser.add_argument('--n_jobs', type=int, default=4) 26 | 27 | parser.add_argument('--ip', type=str, default='127.0.0.1') 28 | parser.add_argument('--port', type=int, default=0) 29 | parser.add_argument('--n_workers', type=int) # must set 30 | 31 | parser.add_argument('--runtime_limit', type=int, default=172800) 32 | parser.add_argument('--time_limit_per_trial', type=int, default=999999) 33 | 34 | parser.add_argument('--rep', type=int, default=1) 35 | parser.add_argument('--start_id', type=int, default=0) 36 | 37 | args = parser.parse_args() 38 | algo_name = args.mth 39 | dataset = args.dataset 40 | R = args.R 41 | eta = args.eta 42 | n_jobs = args.n_jobs # changed according to dataset 43 | 44 | ip = args.ip 45 | port = args.port 46 | n_workers = args.n_workers # Caution: must set for saving result to different dirs 47 | 48 | runtime_limit = args.runtime_limit # changed according to dataset 49 | time_limit_per_trial = args.time_limit_per_trial # changed according to dataset 50 | 51 | rep = args.rep 52 | start_id = args.start_id 53 | 54 | print(ip, port, n_jobs, n_workers, dataset) 55 | print(R, eta) 56 | print(runtime_limit) 57 | for para in (ip, port, n_jobs, R, eta, n_workers, runtime_limit): 58 | assert para is not None 59 | 60 | mth_info = mth_dict[algo_name] 61 | if len(mth_info) == 2: 62 | algo_class, parallel_strategy = mth_info 63 | algo_kwargs = dict() 64 | elif len(mth_info) == 3: 65 | algo_class, parallel_strategy, algo_kwargs = mth_info 66 | else: 67 | raise ValueError('error mth info: %s' % mth_info) 68 | # objective_func, config_space, random_state, method_id, runtime_limit, time_limit_per_trial, ip, port 69 | # are filled in run_exp() 70 | algo_kwargs['R'] = R 71 | algo_kwargs['eta'] = eta 72 | algo_kwargs['restart_needed'] = True 73 | 74 | from tuner.mq_random_search import mqRandomSearch 75 | from tuner.mq_bo import mqBO 76 | if algo_class in (mqRandomSearch, mqBO): 77 | print('set algo_class n_workers:', n_workers) 78 | algo_kwargs['n_workers'] = n_workers 79 | 80 | run_exp(dataset, algo_class, algo_kwargs, algo_name, n_workers, parallel_strategy, 81 | R, n_jobs, runtime_limit, time_limit_per_trial, start_id, rep, ip, port, 82 | eta=eta, pre_sample=False, run_test=True) 83 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/benchmark_lstm.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/awd_lstm_lm/benchmark_lstm.py --mth hyperband --R 27 --n_workers 4 --rep 1 --start_id 0 5 | 6 | """ 7 | 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import numpy as np 13 | import pickle as pkl 14 | 15 | sys.path.insert(0, ".") 16 | sys.path.insert(1, "../open-box") # for dependency 17 | from tuner import mth_dict 18 | from benchmark_lstm_utils import run_exp 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--mth', type=str, default='hyperband') 22 | parser.add_argument('--dataset', type=str, default='penn') 23 | parser.add_argument('--R', type=int, default=27) 24 | parser.add_argument('--eta', type=int, default=3) 25 | parser.add_argument('--n_jobs', type=int, default=4) 26 | 27 | parser.add_argument('--ip', type=str, default='127.0.0.1') 28 | parser.add_argument('--port', type=int, default=0) 29 | parser.add_argument('--n_workers', type=int) # must set 30 | 31 | parser.add_argument('--runtime_limit', type=int, default=172800) 32 | parser.add_argument('--time_limit_per_trial', type=int, default=999999) 33 | 34 | parser.add_argument('--rep', type=int, default=1) 35 | parser.add_argument('--start_id', type=int, default=0) 36 | 37 | args = parser.parse_args() 38 | algo_name = args.mth 39 | dataset = args.dataset 40 | R = args.R 41 | eta = args.eta 42 | n_jobs = args.n_jobs # changed according to dataset 43 | 44 | ip = args.ip 45 | port = args.port 46 | n_workers = args.n_workers # Caution: must set for saving result to different dirs 47 | 48 | runtime_limit = args.runtime_limit # changed according to dataset 49 | time_limit_per_trial = args.time_limit_per_trial # changed according to dataset 50 | 51 | rep = args.rep 52 | start_id = args.start_id 53 | 54 | print(ip, port, n_jobs, n_workers, dataset) 55 | print(R, eta) 56 | print(runtime_limit) 57 | for para in (ip, port, n_jobs, R, eta, n_workers, runtime_limit): 58 | assert para is not None 59 | 60 | mth_info = mth_dict[algo_name] 61 | if len(mth_info) == 2: 62 | algo_class, parallel_strategy = mth_info 63 | algo_kwargs = dict() 64 | elif len(mth_info) == 3: 65 | algo_class, parallel_strategy, algo_kwargs = mth_info 66 | else: 67 | raise ValueError('error mth info: %s' % mth_info) 68 | # objective_func, config_space, random_state, method_id, runtime_limit, time_limit_per_trial, ip, port 69 | # are filled in run_exp() 70 | algo_kwargs['R'] = R 71 | algo_kwargs['eta'] = eta 72 | algo_kwargs['restart_needed'] = True 73 | 74 | from tuner.mq_random_search import mqRandomSearch 75 | from tuner.mq_bo import mqBO 76 | if algo_class in (mqRandomSearch, mqBO): 77 | print('set algo_class n_workers:', n_workers) 78 | algo_kwargs['n_workers'] = n_workers 79 | 80 | run_exp(dataset, algo_class, algo_kwargs, algo_name, n_workers, parallel_strategy, 81 | R, n_jobs, runtime_limit, time_limit_per_trial, start_id, rep, ip, port, 82 | eta=eta, pre_sample=False, run_test=True) 83 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/generate.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Language Modeling on Penn Tree Bank 3 | # 4 | # This file generates new sentences sampled from the language model 5 | # 6 | ############################################################################### 7 | 8 | import argparse 9 | 10 | import torch 11 | from torch.autograd import Variable 12 | 13 | import data 14 | 15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model') 16 | 17 | # Model parameters. 18 | parser.add_argument('--data', type=str, default='./data/penn', 19 | help='location of the data corpus') 20 | parser.add_argument('--model', type=str, default='LSTM', 21 | help='type of recurrent net (LSTM, QRNN)') 22 | parser.add_argument('--checkpoint', type=str, default='./model.pt', 23 | help='model checkpoint to use') 24 | parser.add_argument('--outf', type=str, default='generated.txt', 25 | help='output file for generated text') 26 | parser.add_argument('--words', type=int, default='1000', 27 | help='number of words to generate') 28 | parser.add_argument('--seed', type=int, default=1111, 29 | help='random seed') 30 | parser.add_argument('--cuda', action='store_true', 31 | help='use CUDA') 32 | parser.add_argument('--temperature', type=float, default=1.0, 33 | help='temperature - higher will increase diversity') 34 | parser.add_argument('--log-interval', type=int, default=100, 35 | help='reporting interval') 36 | args = parser.parse_args() 37 | 38 | # Set the random seed manually for reproducibility. 39 | torch.manual_seed(args.seed) 40 | if torch.cuda.is_available(): 41 | if not args.cuda: 42 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 43 | else: 44 | torch.cuda.manual_seed(args.seed) 45 | 46 | if args.temperature < 1e-3: 47 | parser.error("--temperature has to be greater or equal 1e-3") 48 | 49 | with open(args.checkpoint, 'rb') as f: 50 | model = torch.load(f) 51 | model.eval() 52 | if args.model == 'QRNN': 53 | model.reset() 54 | 55 | if args.cuda: 56 | model.cuda() 57 | else: 58 | model.cpu() 59 | 60 | corpus = data.Corpus(args.data) 61 | ntokens = len(corpus.dictionary) 62 | hidden = model.init_hidden(1) 63 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) 64 | if args.cuda: 65 | input.data = input.data.cuda() 66 | 67 | with open(args.outf, 'w') as outf: 68 | for i in range(args.words): 69 | output, hidden = model(input, hidden) 70 | word_weights = output.squeeze().data.div(args.temperature).exp().cpu() 71 | word_idx = torch.multinomial(word_weights, 1)[0] 72 | input.data.fill_(word_idx) 73 | word = corpus.dictionary.idx2word[word_idx] 74 | 75 | outf.write(word + ('\n' if i % 20 == 19 else ' ')) 76 | 77 | if i % args.log_interval == 0: 78 | print('| Generated {}/{} words'.format(i, args.words)) 79 | -------------------------------------------------------------------------------- /tuner/xgb_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openbox.utils.config_space import ConfigurationSpace 3 | from openbox.utils.config_space import UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant 4 | 5 | 6 | class XGBoost: 7 | def __init__(self, n_estimators, learning_rate, max_depth, min_child_weight, 8 | subsample, colsample_bytree, gamma=None, reg_alpha=None, reg_lambda=None, 9 | n_jobs=4, seed=47): 10 | self.n_estimators = int(n_estimators) 11 | self.learning_rate = learning_rate 12 | self.max_depth = max_depth 13 | self.subsample = subsample 14 | self.min_child_weight = min_child_weight 15 | self.colsample_bytree = colsample_bytree 16 | self.gamma = gamma 17 | self.reg_alpha = reg_alpha 18 | self.reg_lambda = reg_lambda 19 | 20 | self.n_jobs = n_jobs 21 | self.random_state = np.random.RandomState(seed) 22 | self.estimator = None 23 | 24 | def fit(self, X, y): 25 | from xgboost import XGBClassifier 26 | # objective is set automatically in sklearn interface of xgboost 27 | self.estimator = XGBClassifier( 28 | use_label_encoder=False, 29 | max_depth=self.max_depth, 30 | learning_rate=self.learning_rate, 31 | n_estimators=self.n_estimators, 32 | min_child_weight=self.min_child_weight, 33 | subsample=self.subsample, 34 | colsample_bytree=self.colsample_bytree, 35 | gamma=self.gamma, 36 | reg_alpha=self.reg_alpha, 37 | reg_lambda=self.reg_lambda, 38 | random_state=self.random_state, 39 | n_jobs=self.n_jobs, 40 | ) 41 | self.estimator.fit(X, y) 42 | return self 43 | 44 | def predict(self, X): 45 | if self.estimator is None: 46 | raise NotImplementedError() 47 | return self.estimator.predict(X) 48 | 49 | @staticmethod 50 | def get_cs(): 51 | cs = ConfigurationSpace() 52 | n_estimators = UniformFloatHyperparameter("n_estimators", 100, 1000, q=50, default_value=500) 53 | max_depth = UniformIntegerHyperparameter("max_depth", 1, 12) 54 | learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.9, log=True, default_value=0.1) 55 | min_child_weight = UniformFloatHyperparameter("min_child_weight", 0, 10, q=0.1, default_value=1) 56 | subsample = UniformFloatHyperparameter("subsample", 0.1, 1, q=0.1, default_value=1) 57 | colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.1, 1, q=0.1, default_value=1) 58 | gamma = UniformFloatHyperparameter("gamma", 0, 10, q=0.1, default_value=0) 59 | reg_alpha = UniformFloatHyperparameter("reg_alpha", 0, 10, q=0.1, default_value=0) 60 | reg_lambda = UniformFloatHyperparameter("reg_lambda", 1, 10, q=0.1, default_value=1) 61 | cs.add_hyperparameters([n_estimators, max_depth, learning_rate, min_child_weight, subsample, 62 | colsample_bytree, gamma, reg_alpha, reg_lambda]) 63 | return cs 64 | 65 | -------------------------------------------------------------------------------- /test/benchmark_xgb.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/benchmark_xgb.py --mth hyperband --datasets spambase --R 27 --n_jobs 4 --n_workers 1 --runtime_limit 60 --rep 1 --start_id 0 5 | 6 | """ 7 | 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import numpy as np 13 | import pickle as pkl 14 | 15 | sys.path.insert(0, ".") 16 | sys.path.insert(1, "../open-box") # for dependency 17 | from tuner import mth_dict 18 | from benchmark_xgb_utils import run_exp 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--mth', type=str) 22 | parser.add_argument('--datasets', type=str) 23 | parser.add_argument('--R', type=int, default=27) 24 | parser.add_argument('--eta', type=int, default=3) 25 | parser.add_argument('--n_jobs', type=int, default=4) 26 | 27 | parser.add_argument('--ip', type=str, default='127.0.0.1') 28 | parser.add_argument('--port', type=int, default=0) 29 | parser.add_argument('--n_workers', type=int) # must set 30 | parser.add_argument('--max_local_workers', type=int, default=8) 31 | 32 | parser.add_argument('--runtime_limit', type=int, default=60) 33 | parser.add_argument('--time_limit_per_trial', type=int, default=600) 34 | 35 | parser.add_argument('--rep', type=int, default=1) 36 | parser.add_argument('--start_id', type=int, default=0) 37 | 38 | args = parser.parse_args() 39 | algo_name = args.mth 40 | test_datasets = args.datasets.split(',') 41 | print("datasets num=", len(test_datasets)) 42 | R = args.R 43 | eta = args.eta 44 | n_jobs = args.n_jobs # changed according to dataset 45 | 46 | ip = args.ip 47 | port = args.port 48 | n_workers = args.n_workers # Caution: must set for saving result to different dirs 49 | max_local_workers = args.max_local_workers 50 | 51 | runtime_limit = args.runtime_limit # changed according to dataset 52 | time_limit_per_trial = args.time_limit_per_trial # changed according to dataset 53 | 54 | rep = args.rep 55 | start_id = args.start_id 56 | 57 | print(ip, port, n_jobs, n_workers, test_datasets, algo_name) 58 | print(R, eta) 59 | print(runtime_limit) 60 | for para in (algo_name, ip, port, n_jobs, R, eta, n_workers, runtime_limit): 61 | assert para is not None 62 | 63 | mth_info = mth_dict[algo_name] 64 | if len(mth_info) == 2: 65 | algo_class, parallel_strategy = mth_info 66 | algo_kwargs = dict() 67 | elif len(mth_info) == 3: 68 | algo_class, parallel_strategy, algo_kwargs = mth_info 69 | else: 70 | raise ValueError('error mth info: %s' % mth_info) 71 | # objective_func, config_space, random_state, method_id, runtime_limit, time_limit_per_trial, ip, port 72 | # are filled in run_exp() 73 | algo_kwargs['R'] = R 74 | algo_kwargs['eta'] = eta 75 | algo_kwargs['restart_needed'] = True 76 | 77 | from tuner.mq_random_search import mqRandomSearch 78 | from tuner.mq_bo import mqBO 79 | if algo_class in (mqRandomSearch, mqBO): 80 | print('set algo_class n_workers:', n_workers) 81 | algo_kwargs['n_workers'] = n_workers 82 | 83 | run_exp(test_datasets, algo_class, algo_kwargs, algo_name, n_workers, parallel_strategy, 84 | R, n_jobs, runtime_limit, time_limit_per_trial, start_id, rep, ip, port, 85 | eta=eta, pre_sample=False, run_test=True, max_local_workers=max_local_workers) 86 | -------------------------------------------------------------------------------- /tuner/mq_random_search.py: -------------------------------------------------------------------------------- 1 | import time 2 | import traceback 3 | import numpy as np 4 | from tuner.mq_base_facade import mqBaseFacade 5 | from tuner.utils import sample_configurations 6 | 7 | from openbox.utils.config_space import ConfigurationSpace 8 | 9 | 10 | class mqRandomSearch(mqBaseFacade): 11 | def __init__(self, objective_func, 12 | config_space: ConfigurationSpace, 13 | R, 14 | n_workers, 15 | num_iter=10000, 16 | random_state=1, 17 | method_id='mqRandomSearch', 18 | restart_needed=True, 19 | time_limit_per_trial=600, 20 | runtime_limit=None, 21 | ip='', 22 | port=13579, 23 | authkey=b'abc', 24 | **kwargs): 25 | max_queue_len = max(1000, 3 * n_workers) # conservative design 26 | super().__init__(objective_func, method_name=method_id, 27 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 28 | runtime_limit=runtime_limit, 29 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 30 | self.seed = random_state 31 | self.config_space = config_space 32 | self.config_space.seed(self.seed) 33 | self.R = R 34 | self.n_workers = n_workers 35 | self.num_iter = num_iter 36 | self.incumbent_configs = [] 37 | self.incumbent_perfs = [] 38 | self.logger.info('Unused kwargs: %s' % kwargs) 39 | 40 | def run(self): 41 | try: 42 | for iter in range(1, 1 + self.num_iter): 43 | self.logger.info('-' * 50) 44 | self.logger.info("Random Search algorithm: %d/%d iteration starts" % (iter, self.num_iter)) 45 | start_time = time.time() 46 | self.iterate() 47 | time_elapsed = (time.time() - start_time) / 60 48 | self.logger.info("iteration took %.2f min." % time_elapsed) 49 | self.save_intermediate_statistics() 50 | except Exception as e: 51 | print(e) 52 | print(traceback.format_exc()) 53 | self.logger.error(traceback.format_exc()) 54 | # clear the immediate result. 55 | # self.remove_immediate_model() 56 | 57 | def iterate(self): 58 | configs = sample_configurations(self.config_space, self.n_workers) 59 | extra_info = None 60 | ret_val, early_stops = self.run_in_parallel(configs, self.R, extra_info, initial_run=True) 61 | val_losses = [item['loss'] for item in ret_val] 62 | 63 | self.incumbent_configs.extend(configs) 64 | self.incumbent_perfs.extend(val_losses) 65 | self.add_stage_history(self.stage_id, self.global_incumbent) 66 | self.stage_id += 1 67 | # self.remove_immediate_model() 68 | 69 | def get_incumbent(self, num_inc=1): 70 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 71 | indices = np.argsort(self.incumbent_perfs) 72 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 73 | targets = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 74 | return configs, targets 75 | -------------------------------------------------------------------------------- /test/autogluon_abohb/process_csv.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/autogluon_abohb/process_csv.py --model xgb --dataset pokerhand --time 7200 --mths abohb_aws-n8 --R 27 5 | 6 | """ 7 | import argparse 8 | import os 9 | import numpy as np 10 | import pickle as pkl 11 | import pandas as pd 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--dataset', type=str) 15 | parser.add_argument('--mths', type=str, default='abohb_aws-n8') 16 | parser.add_argument('--R', type=int, default=27) 17 | parser.add_argument('--time', type=int) 18 | parser.add_argument('--model', type=str, default='xgb') 19 | parser.add_argument('--simulation_factor', type=int, default=1) # simulation sleep time factor for nasbench 20 | 21 | args = parser.parse_args() 22 | dataset = args.dataset 23 | mths = args.mths.split(',') 24 | R = args.R 25 | runtime_limit = args.time 26 | model = args.model 27 | simulation_factor = args.simulation_factor 28 | if model not in ('nasbench101', 'nasbench201', 'math'): 29 | simulation_factor = 1 30 | else: 31 | print('simulation factor:', simulation_factor) 32 | for para in (dataset, runtime_limit): 33 | assert para is not None 34 | 35 | 36 | for mth in mths: 37 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 38 | for file in os.listdir(dir_path): 39 | if file.startswith('result_%s-%s-' % (mth, dataset)) and file.endswith('.csv'): 40 | new_file = 'new_record_' + file[7:-4] + '.pkl' 41 | # history.pkl has higher priority 42 | if new_file in os.listdir(dir_path): 43 | print('%s already exists. pass.' % new_file) 44 | continue 45 | 46 | df = pd.read_csv(os.path.join(dir_path, file), header='infer', sep=',') 47 | 48 | recorder = [] 49 | for idx, row in df.iterrows(): # for row in df.itertuples(): getattr(row, 'col_name') 50 | epoch = row['epoch'] 51 | if epoch < R: 52 | continue 53 | elif epoch > R: 54 | raise ValueError('please check R in settings.', R, mth, idx, row) 55 | 56 | # time_step = row['time_step'] 57 | runtime = row['runtime'] 58 | performance = row['performance'] 59 | test_perf = row['test_perf'] if 'test_perf' in row else None 60 | eval_time = row['eval_time'] 61 | 62 | if runtime > runtime_limit: 63 | print('abandon record by runtime:', runtime, runtime_limit) 64 | continue 65 | 66 | record = { 67 | 'time_consumed': eval_time * simulation_factor, 68 | 'configuration': None, # todo 69 | 'global_time': runtime * simulation_factor, 70 | 'n_iteration': epoch, 71 | 'return_info': { 72 | 'loss': -performance, # minimize 73 | 'test_perf': test_perf, # already processed 74 | }, 75 | } 76 | recorder.append(record) 77 | 78 | recorder.sort(key=lambda rec: rec['global_time']) 79 | # write new 80 | with open(os.path.join(dir_path, new_file), 'wb') as f: 81 | pkl.dump(recorder, f) 82 | print('recorder len:', mth, len(recorder), df.shape[0]) 83 | -------------------------------------------------------------------------------- /test/nas_benchmarks/nasbench201_utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from openbox.utils.config_space import ConfigurationSpace 4 | from openbox.utils.config_space import CategoricalHyperparameter 5 | from nas_201_api import NASBench201API as API 6 | 7 | 8 | OP_NUM = 6 9 | MAX_IEPOCH = 200 10 | 11 | 12 | def load_nasbench201(path='../nas_data/NAS-Bench-201-v1_1-096897.pth'): 13 | s = time.time() 14 | api = API(path, verbose=False) 15 | print('nas-bench-201 load time: %.2fs' % (time.time() - s)) 16 | return api 17 | 18 | 19 | def get_nasbench201_configspace(): 20 | op_list = ['none', 'skip_connect', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3'] 21 | cs = ConfigurationSpace() 22 | for i in range(OP_NUM): 23 | cs.add_hyperparameter(CategoricalHyperparameter('op%d' % i, choices=op_list, default_value='none')) 24 | return cs 25 | 26 | 27 | def objective_func(config, n_resource, extra_conf, total_resource, eta, api, dataset): 28 | assert dataset in ['cifar10-valid', 'cifar10', 'cifar100', 'ImageNet16-120'] 29 | print('objective extra conf:', extra_conf) 30 | iepoch = int(MAX_IEPOCH * n_resource / total_resource) - 1 31 | 32 | # convert config to arch 33 | arch = '|%s~0|+|%s~0|%s~1|+|%s~0|%s~1|%s~2|' % (config['op0'], 34 | config['op1'], config['op2'], 35 | config['op3'], config['op4'], config['op5']) 36 | 37 | # query 38 | info = api.get_more_info(arch, dataset, iepoch=iepoch, hp='200', is_random=False) 39 | train_time = info['train-all-time'] 40 | if dataset == 'cifar10-valid': 41 | val_perf = info['valid-accuracy'] 42 | test_perf = info.get('test-accuracy', None) 43 | elif dataset == 'cifar10': 44 | val_perf = info['test-accuracy'] 45 | test_perf = None 46 | elif dataset == 'cifar100': 47 | val_perf = info['valtest-accuracy'] 48 | test_perf = info.get('test-accuracy', None) 49 | elif dataset == 'ImageNet16-120': 50 | val_perf = info['valtest-accuracy'] 51 | test_perf = info.get('test-accuracy', None) 52 | else: 53 | raise ValueError 54 | 55 | # Get checkpoint info 56 | if extra_conf['initial_run']: 57 | last_train_time = 0.0 58 | else: 59 | last_iepoch = int(MAX_IEPOCH * (n_resource / eta) / total_resource) - 1 60 | last_info = api.get_more_info(arch, dataset, iepoch=last_iepoch, hp='200', is_random=False) 61 | last_train_time = last_info['train-all-time'] 62 | 63 | # restore from checkpoint 64 | train_time = train_time - last_train_time 65 | 66 | result = dict( 67 | objective_value=-val_perf, # minimize 68 | test_perf=-test_perf if test_perf is not None else None, # minimize 69 | elapsed_time=train_time, 70 | ) 71 | return result 72 | 73 | 74 | if __name__ == '__main__': 75 | cs = get_nasbench201_configspace() 76 | for i in range(3): 77 | conf = cs.sample_configuration() 78 | print(conf) 79 | 80 | test_load = False 81 | if test_load: 82 | api = load_nasbench201('../nas_data/NAS-Bench-201-v1_1-096897.pth') 83 | conf = cs.sample_configuration() 84 | print(conf) 85 | dataset = 'cifar10-valid' 86 | extra_conf = dict(initial_run=True) 87 | result = objective_func(conf, 3, extra_conf, total_resource=27, eta=3, api=api, dataset=dataset) 88 | print(result) 89 | -------------------------------------------------------------------------------- /tuner/async_mq_hb.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | from tuner.utils import sample_configuration 3 | from tuner.async_mq_sh import async_mqSuccessiveHalving 4 | 5 | from openbox.utils.config_space import ConfigurationSpace 6 | 7 | 8 | class async_mqHyperband(async_mqSuccessiveHalving): 9 | """ 10 | The implementation of Asynchronous Hyperband (extended of ASHA) 11 | Delayed ASHA 12 | """ 13 | def __init__(self, objective_func, 14 | config_space: ConfigurationSpace, 15 | R, 16 | eta=3, 17 | skip_outer_loop=0, 18 | random_state=1, 19 | method_id='mqAsyncHyperband', 20 | restart_needed=True, 21 | time_limit_per_trial=600, 22 | runtime_limit=None, 23 | ip='', 24 | port=13579, 25 | authkey=b'abc',): 26 | super().__init__(objective_func, config_space, R, eta=eta, 27 | random_state=random_state, method_id=method_id, restart_needed=restart_needed, 28 | time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, 29 | ip=ip, port=port, authkey=authkey) 30 | 31 | self.skip_outer_loop = skip_outer_loop 32 | 33 | # construct hyperband iteration list for initial configs 34 | self.hb_bracket_id = 0 # index the chosen bracket in self.hb_bracket_list 35 | self.hb_bracket_list = list() # record iteration lists of all brackets 36 | self.hb_iter_id = 0 # index the current chosen n_iteration in self.hb_iter_list 37 | self.hb_iter_list = list() # record current iteration list 38 | self.B = (self.s_max + 1) * self.R 39 | for s in reversed(range(self.skip_outer_loop, self.s_max + 1)): 40 | # Initial number of configurations 41 | n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) 42 | # Initial number of iterations per config 43 | r = self.R * self.eta ** (-s) 44 | # construct iteration list 45 | self.hb_bracket_list.append([r] * n) 46 | self.hb_iter_list = self.hb_bracket_list[0] 47 | self.logger.info('hyperband iteration lists of all brackets: %s. init bracket: %s.' 48 | % (self.hb_bracket_list, self.hb_iter_list)) 49 | 50 | def choose_next(self): 51 | """ 52 | sample a random config. give iterations according to Hyperband strategy. 53 | """ 54 | next_n_iteration = self.get_next_n_iteration() 55 | next_rung_id = self.get_rung_id(self.bracket, next_n_iteration) 56 | 57 | next_config = sample_configuration(self.config_space, excluded_configs=self.bracket[next_rung_id]['configs']) 58 | next_extra_conf = {} 59 | return next_config, next_n_iteration, next_extra_conf 60 | 61 | def get_next_n_iteration(self): 62 | next_n_iteration = self.hb_iter_list[self.hb_iter_id] 63 | self.hb_iter_id += 1 64 | # next bracket 65 | if self.hb_iter_id == len(self.hb_iter_list): 66 | self.hb_iter_id = 0 67 | self.hb_bracket_id += 1 68 | if self.hb_bracket_id == len(self.hb_bracket_list): 69 | self.hb_bracket_id = 0 70 | self.hb_iter_list = self.hb_bracket_list[self.hb_bracket_id] 71 | self.logger.info('iteration list of next bracket: %s' % self.hb_iter_list) 72 | return next_n_iteration 73 | -------------------------------------------------------------------------------- /test/preprocess_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import pickle as pkl 5 | from sklearn.model_selection import train_test_split 6 | 7 | 8 | def load_dataset(dataset, data_dir): 9 | """ 10 | no label encoding 11 | """ 12 | data_path = os.path.join(data_dir, "%s.csv" % dataset) 13 | 14 | # Load train data. 15 | if dataset in ['higgs', 'amazon_employee', 'spectf', 'usps', 'vehicle_sensIT', 'codrna', 'HIGGS']: 16 | label_col = 0 17 | elif dataset in ['rmftsa_sleepdata(1)']: 18 | label_col = 1 19 | else: 20 | label_col = -1 21 | 22 | if dataset in ['spambase', 'messidor_features', 'covtype', 'HIGGS']: 23 | header = None 24 | else: 25 | header = 'infer' 26 | 27 | if dataset in ['winequality_white', 'winequality_red']: 28 | sep = ';' 29 | else: 30 | sep = ',' 31 | 32 | na_values = ["n/a", "na", "--", "-", "?"] 33 | keep_default_na = True 34 | df = pd.read_csv(data_path, keep_default_na=keep_default_na, 35 | na_values=na_values, header=header, sep=sep) 36 | 37 | # Drop the row with all NaNs. 38 | df.dropna(how='all') 39 | 40 | # Clean the data where the label columns have nans. 41 | columns_missed = df.columns[df.isnull().any()].tolist() 42 | 43 | label_colname = df.columns[label_col] 44 | 45 | if label_colname in columns_missed: 46 | labels = df[label_colname].values 47 | row_idx = [idx for idx, val in enumerate(labels) if np.isnan(val)] 48 | # Delete the row with NaN label. 49 | df.drop(df.index[row_idx], inplace=True) 50 | 51 | train_y = df[label_colname].values 52 | 53 | # Delete the label column. 54 | df.drop(label_colname, axis=1, inplace=True) 55 | 56 | train_X = df 57 | return train_X, train_y 58 | 59 | 60 | data_dir = './datasets' 61 | datasets = ['HIGGS', 'covtype', 'pokerhand'] 62 | 63 | new_data_dir = 'datasets' 64 | if not os.path.exists(new_data_dir): 65 | os.makedirs(new_data_dir) 66 | 67 | for dataset in datasets: 68 | x, y = load_dataset(dataset, data_dir) 69 | print(dataset, 'loaded', x.shape, y.shape) 70 | 71 | # split. train : validate : test = 6 : 2 : 2 72 | xx, x_test, yy, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=1) 73 | x_train, x_val, y_train, y_val = train_test_split(xx, yy, test_size=0.25, stratify=yy, random_state=1) 74 | print(dataset, 'split', x_train.shape[0], x_val.shape[0], x_test.shape[0]) 75 | 76 | # save 77 | if dataset == 'codrna': 78 | name = dataset + '.pkl' 79 | obj = (x_train, x_val, x_test, y_train, y_val, y_test) 80 | with open(os.path.join(new_data_dir, name), 'wb') as f: 81 | pkl.dump(obj, f) 82 | else: 83 | name_x_train = dataset + '-x_train.npy' 84 | name_x_val = dataset + '-x_val.npy' 85 | name_x_test = dataset + '-x_test.npy' 86 | name_y_train = dataset + '-y_train.npy' 87 | name_y_val = dataset + '-y_val.npy' 88 | name_y_test = dataset + '-y_test.npy' 89 | np.save(os.path.join(new_data_dir, name_x_train), x_train) 90 | np.save(os.path.join(new_data_dir, name_x_val), x_val) 91 | np.save(os.path.join(new_data_dir, name_x_test), x_test) 92 | np.save(os.path.join(new_data_dir, name_y_train), y_train) 93 | np.save(os.path.join(new_data_dir, name_y_val), y_val) 94 | np.save(os.path.join(new_data_dir, name_y_test), y_test) 95 | print(dataset, 'finished') 96 | -------------------------------------------------------------------------------- /tuner/async_mq_hb_v0.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | from tuner.utils import sample_configuration 3 | from tuner.async_mq_sh_v0 import async_mqSuccessiveHalving_v0 4 | 5 | from openbox.utils.config_space import ConfigurationSpace 6 | 7 | 8 | class async_mqHyperband_v0(async_mqSuccessiveHalving_v0): 9 | """ 10 | The implementation of Asynchronous Hyperband (extended of ASHA) 11 | origin version 12 | """ 13 | def __init__(self, objective_func, 14 | config_space: ConfigurationSpace, 15 | R, 16 | eta=3, 17 | skip_outer_loop=0, 18 | random_state=1, 19 | method_id='mqAsyncHyperband', 20 | restart_needed=True, 21 | time_limit_per_trial=600, 22 | runtime_limit=None, 23 | ip='', 24 | port=13579, 25 | authkey=b'abc',): 26 | super().__init__(objective_func, config_space, R, eta=eta, 27 | random_state=random_state, method_id=method_id, restart_needed=restart_needed, 28 | time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, 29 | ip=ip, port=port, authkey=authkey) 30 | 31 | self.skip_outer_loop = skip_outer_loop 32 | 33 | # construct hyperband iteration list for initial configs 34 | self.hb_bracket_id = 0 # index the chosen bracket in self.hb_bracket_list 35 | self.hb_bracket_list = list() # record iteration lists of all brackets 36 | self.hb_iter_id = 0 # index the current chosen n_iteration in self.hb_iter_list 37 | self.hb_iter_list = list() # record current iteration list 38 | self.B = (self.s_max + 1) * self.R 39 | for s in reversed(range(self.skip_outer_loop, self.s_max + 1)): 40 | # Initial number of configurations 41 | n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) 42 | # Initial number of iterations per config 43 | r = self.R * self.eta ** (-s) 44 | # construct iteration list 45 | self.hb_bracket_list.append([r] * n) 46 | self.hb_iter_list = self.hb_bracket_list[0] 47 | self.logger.info('hyperband iteration lists of all brackets: %s. init bracket: %s.' 48 | % (self.hb_bracket_list, self.hb_iter_list)) 49 | 50 | def choose_next(self): 51 | """ 52 | sample a random config. give iterations according to Hyperband strategy. 53 | """ 54 | next_n_iteration = self.get_next_n_iteration() 55 | next_rung_id = self.get_rung_id(self.bracket, next_n_iteration) 56 | 57 | next_config = sample_configuration(self.config_space, excluded_configs=self.bracket[next_rung_id]['configs']) 58 | next_extra_conf = {} 59 | return next_config, next_n_iteration, next_extra_conf 60 | 61 | def get_next_n_iteration(self): 62 | next_n_iteration = self.hb_iter_list[self.hb_iter_id] 63 | self.hb_iter_id += 1 64 | # next bracket 65 | if self.hb_iter_id == len(self.hb_iter_list): 66 | self.hb_iter_id = 0 67 | self.hb_bracket_id += 1 68 | if self.hb_bracket_id == len(self.hb_bracket_list): 69 | self.hb_bracket_id = 0 70 | self.hb_iter_list = self.hb_bracket_list[self.hb_bracket_id] 71 | self.logger.info('iteration list of next bracket: %s' % self.hb_iter_list) 72 | return next_n_iteration 73 | -------------------------------------------------------------------------------- /test/benchmark_plot_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | run benchmark_process_record.py first to get new_record file 3 | 4 | example cmdline: 5 | 6 | python test/benchmark_plot_test.py --dataset covtype --R 27 7 | 8 | """ 9 | import argparse 10 | import os 11 | import numpy as np 12 | import pickle as pkl 13 | import matplotlib.pyplot as plt 14 | 15 | from utils import setup_exp, create_plot_points 16 | 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--dataset', type=str) 20 | parser.add_argument('--mths', type=str) 21 | parser.add_argument('--R', type=int, default=27) 22 | parser.add_argument('--runtime_limit', type=int) # if you don't want to use default setup 23 | parser.add_argument('--model', type=str, default='xgb') 24 | parser.add_argument('--default_value', type=float, default=0.0) 25 | 26 | args = parser.parse_args() 27 | dataset = args.dataset 28 | mths = args.mths.split(',') 29 | R = args.R 30 | model = args.model 31 | default_value = args.default_value 32 | 33 | print('start', dataset) 34 | # setup 35 | _, runtime_limit, _ = setup_exp(dataset, 1, 1, 1) 36 | if args.runtime_limit is not None: 37 | runtime_limit = args.runtime_limit 38 | point_num = 300 39 | 40 | 41 | result = dict() 42 | for mth in mths: 43 | stats = [] 44 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 45 | for file in os.listdir(dir_path): 46 | if file.startswith('new_record_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 47 | with open(os.path.join(dir_path, file), 'rb') as f: 48 | raw_recorder = pkl.load(f) 49 | recorder = [] 50 | for record in raw_recorder: 51 | # if record.get('n_iteration') is not None and record['n_iteration'] < R: 52 | # print('error abandon record by n_iteration:', R, mth, record) 53 | # continue 54 | if record['global_time'] > runtime_limit: 55 | print('abandon record by runtime_limit:', runtime_limit, mth, record) 56 | continue 57 | recorder.append(record) 58 | recorder.sort(key=lambda rec: rec['global_time']) 59 | # print([(rec['global_time'], rec['return_info']['loss']) for rec in recorder]) 60 | print('new recorder len:', mth, len(recorder), len(raw_recorder)) 61 | 62 | best_val_perf = recorder[0]['return_info']['loss'] 63 | timestamps = [recorder[0]['global_time']] 64 | test_perfs = [recorder[0]['return_info'].get('test_perf', None)] 65 | if test_perfs[0] is None: 66 | raise ValueError('%s\n%s does not have test_perf!' % (recorder[0], mth)) 67 | for rec in recorder[1:]: 68 | val_perf = rec['return_info']['loss'] 69 | if val_perf < best_val_perf: 70 | best_val_perf = val_perf 71 | timestamps.append(rec['global_time']) 72 | test_perfs.append(rec['return_info']['test_perf']) 73 | stats.append((timestamps, test_perfs)) 74 | x, m, s = create_plot_points(stats, 0, runtime_limit, point_num=point_num, default=default_value) 75 | result[mth] = (x, m, s) 76 | 77 | # print last test perf 78 | print('===== mth - last test perf =====') 79 | for mth in mths: 80 | x, m, s = result[mth] 81 | m = m[-1] 82 | s = s[-1] 83 | perfs = None 84 | if dataset in ['cifar10', 'cifar10-valid', 'cifar100', 'ImageNet16-120']: 85 | print(dataset, mth, perfs, u'%.2f\u00B1%.2f' % (m, s)) 86 | else: 87 | print(dataset, mth, perfs, u'%.4f\u00B1%.4f' % (m, s)) 88 | 89 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/weight_drop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | from functools import wraps 4 | 5 | class WeightDrop(torch.nn.Module): 6 | def __init__(self, module, weights, dropout=0, variational=False): 7 | super(WeightDrop, self).__init__() 8 | self.module = module 9 | self.weights = weights 10 | self.dropout = dropout 11 | self.variational = variational 12 | self._setup() 13 | 14 | def widget_demagnetizer_y2k_edition(*args, **kwargs): 15 | # We need to replace flatten_parameters with a nothing function 16 | # It must be a function rather than a lambda as otherwise pickling explodes 17 | # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION! 18 | # (╯°□°)╯︵ ┻━┻ 19 | return 20 | 21 | def _setup(self): 22 | # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN 23 | if issubclass(type(self.module), torch.nn.RNNBase): 24 | self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition 25 | 26 | for name_w in self.weights: 27 | print('Applying weight drop of {} to {}'.format(self.dropout, name_w)) 28 | w = getattr(self.module, name_w) 29 | del self.module._parameters[name_w] 30 | self.module.register_parameter(name_w + '_raw', Parameter(w.data)) 31 | 32 | def _setweights(self): 33 | for name_w in self.weights: 34 | raw_w = getattr(self.module, name_w + '_raw') 35 | w = None 36 | if self.variational: 37 | mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1)) 38 | if raw_w.is_cuda: mask = mask.cuda() 39 | mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True) 40 | w = torch.nn.Parameter(mask.expand_as(raw_w) * raw_w) 41 | else: 42 | w = torch.nn.Parameter(torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)) 43 | setattr(self.module, name_w, w) 44 | 45 | def forward(self, *args): 46 | self._setweights() 47 | return self.module.forward(*args) 48 | 49 | if __name__ == '__main__': 50 | import torch 51 | from weight_drop import WeightDrop 52 | 53 | # Input is (seq, batch, input) 54 | x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda() 55 | h0 = None 56 | 57 | ### 58 | 59 | print('Testing WeightDrop') 60 | print('=-=-=-=-=-=-=-=-=-=') 61 | 62 | ### 63 | 64 | print('Testing WeightDrop with Linear') 65 | 66 | lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9) 67 | lin.cuda() 68 | run1 = [x.sum() for x in lin(x).data] 69 | run2 = [x.sum() for x in lin(x).data] 70 | 71 | print('All items should be different') 72 | print('Run 1:', run1) 73 | print('Run 2:', run2) 74 | 75 | assert run1[0] != run2[0] 76 | assert run1[1] != run2[1] 77 | 78 | print('---') 79 | 80 | ### 81 | 82 | print('Testing WeightDrop with LSTM') 83 | 84 | wdrnn = WeightDrop(torch.nn.LSTM(10, 10), ['weight_hh_l0'], dropout=0.9) 85 | wdrnn.cuda() 86 | 87 | run1 = [x.sum() for x in wdrnn(x, h0)[0].data] 88 | run2 = [x.sum() for x in wdrnn(x, h0)[0].data] 89 | 90 | print('First timesteps should be equal, all others should differ') 91 | print('Run 1:', run1) 92 | print('Run 2:', run2) 93 | 94 | # First time step, not influenced by hidden to hidden weights, should be equal 95 | assert run1[0] == run2[0] 96 | # Second step should not 97 | assert run1[1] != run2[1] 98 | 99 | print('---') 100 | -------------------------------------------------------------------------------- /test/autogluon_abohb/process_history.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/autogluon_abohb/process_history.py --model xgb --dataset pokerhand --time 7200 --mths abohb_aws-n8 --R 27 5 | 6 | """ 7 | import argparse 8 | import os 9 | import numpy as np 10 | import pickle as pkl 11 | from collections import OrderedDict 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--dataset', type=str) 15 | parser.add_argument('--mths', type=str, default='abohb_aws-n8') 16 | parser.add_argument('--R', type=int, default=27) 17 | parser.add_argument('--time', type=int) 18 | parser.add_argument('--model', type=str, default='xgb') 19 | parser.add_argument('--simulation_factor', type=int, default=1) # simulation sleep time factor for nasbench 20 | 21 | args = parser.parse_args() 22 | dataset = args.dataset 23 | mths = args.mths.split(',') 24 | R = args.R 25 | runtime_limit = args.time 26 | model = args.model 27 | simulation_factor = args.simulation_factor 28 | if model not in ('nasbench101', 'nasbench201', 'math'): 29 | simulation_factor = 1 30 | else: 31 | print('simulation factor:', simulation_factor) 32 | for para in (dataset, runtime_limit): 33 | assert para is not None 34 | 35 | 36 | for mth in mths: 37 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 38 | for file in os.listdir(dir_path): 39 | if file.startswith('history_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 40 | with open(os.path.join(dir_path, file), 'rb') as f: 41 | save_item = pkl.load(f) 42 | if isinstance(save_item, tuple): 43 | start_time, history = save_item 44 | elif isinstance(save_item, OrderedDict): 45 | print('Warning: no start time in history file.') 46 | history = save_item 47 | start_time = history['0'][0]['time_step'] - 60 48 | else: 49 | raise ValueError('Unknown save type: %s' % (type(save_item))) 50 | 51 | recorder = [] 52 | cnt = 0 53 | for history_list in history.values(): 54 | for history_dict in history_list: 55 | cnt += 1 56 | epoch = history_dict.pop('epoch') 57 | if epoch < R: 58 | continue 59 | elif epoch > R: 60 | raise ValueError('please check R in settings.', R, mth, history_list) 61 | 62 | time_step = history_dict.pop('time_step') 63 | performance = history_dict.pop('performance') 64 | test_perf = history_dict.pop('test_perf', None) 65 | eval_time = history_dict.pop('eval_time') 66 | history_dict.pop('terminated') 67 | history_dict.pop('bracket') 68 | 69 | runtime = (time_step - start_time) 70 | if runtime > runtime_limit: 71 | print('abandon record by runtime:', runtime, runtime_limit) 72 | continue 73 | 74 | record = { 75 | 'time_consumed': eval_time * simulation_factor, 76 | 'configuration': history_dict, 77 | 'global_time': runtime * simulation_factor, 78 | 'n_iteration': epoch, 79 | 'return_info': { 80 | 'loss': -performance, # minimize 81 | 'test_perf': test_perf, # already processed 82 | }, 83 | } 84 | recorder.append(record) 85 | 86 | recorder.sort(key=lambda rec: rec['global_time']) 87 | # write new 88 | new_file = 'new_record_' + file[8:] 89 | with open(os.path.join(dir_path, new_file), 'wb') as f: 90 | pkl.dump(recorder, f) 91 | print('recorder len:', mth, len(recorder), cnt) 92 | -------------------------------------------------------------------------------- /test/resnet/benchmark_resnet_runtest.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/resnet/benchmark_resnet_runtest.py --mth hyperband-n4 --rep 1 --start_id 0 5 | 6 | """ 7 | import os 8 | import sys 9 | import time 10 | import argparse 11 | import numpy as np 12 | import pickle as pkl 13 | 14 | sys.path.insert(0, ".") 15 | sys.path.insert(1, "../open-box") # for dependency 16 | from test.utils import seeds 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--dataset', type=str, default='cifar10') 20 | parser.add_argument('--mth', type=str, default='hyperband-n4') 21 | parser.add_argument('--rep', type=int, default=1) 22 | parser.add_argument('--start_id', type=int, default=0) 23 | parser.add_argument('--runtime_limit', type=int, default=172800) 24 | 25 | args = parser.parse_args() 26 | dataset = args.dataset 27 | mth = args.mth 28 | rep = args.rep 29 | start_id = args.start_id 30 | runtime_limit = args.runtime_limit 31 | model = 'resnet' 32 | 33 | try: 34 | from sklearn.metrics.scorer import accuracy_scorer 35 | except ModuleNotFoundError: 36 | from sklearn.metrics._scorer import accuracy_scorer 37 | print('from sklearn.metrics._scorer import accuracy_scorer') 38 | from resnet_model import get_estimator 39 | from resnet_util import get_transforms 40 | from resnet_dataset import ImageDataset 41 | from resnet_obj import dl_holdout_validation 42 | 43 | from openbox.utils.constants import MAXINT 44 | 45 | # Constant 46 | max_epoch = 200 47 | scorer = accuracy_scorer 48 | image_size = 32 49 | data_dir = './datasets/img_datasets/cifar10/' 50 | image_data = ImageDataset(data_path=data_dir, train_val_split=True, image_size=image_size) 51 | 52 | 53 | def test_func(config, device='cuda'): # device='cuda' 'cuda:0' 54 | 55 | data_transforms = get_transforms(image_size=image_size) 56 | image_data.load_data(data_transforms['train'], data_transforms['val']) 57 | # load test 58 | image_data.set_test_path(data_dir) 59 | image_data.load_test_data(data_transforms['val']) 60 | start_time = time.time() 61 | 62 | config_dict = config.get_dictionary().copy() 63 | 64 | estimator = get_estimator(config_dict, max_epoch, device=device) 65 | 66 | estimator.epoch_num = estimator.max_epoch 67 | 68 | try: 69 | score = dl_holdout_validation(estimator, scorer, image_data, random_state=1, run_test=True) 70 | except Exception as e: 71 | import traceback 72 | traceback.print_exc() 73 | score = -MAXINT 74 | print('Evaluation | Score: %.4f | Time cost: %.2f seconds' % 75 | (scorer._sign * score, 76 | time.time() - start_time)) 77 | print(str(config)) 78 | 79 | # Turn it into a minimization problem. 80 | return -score 81 | 82 | 83 | print('===== start test %s %s: rep=%d' % (mth, dataset, rep)) 84 | for i in range(start_id, start_id + rep): 85 | seed = seeds[i] 86 | 87 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 88 | for file in os.listdir(dir_path): 89 | if file.startswith('incumbent_new_record_%s-%s-%d-' % (mth, dataset, seed)) \ 90 | and file.endswith('.pkl'): 91 | # load config 92 | with open(os.path.join(dir_path, file), 'rb') as f: 93 | record = pkl.load(f) 94 | print(model, dataset, mth, seed, 'loaded!', record, flush=True) 95 | 96 | # run test 97 | config = record['configuration'] 98 | perf = test_func(config, device='cuda') 99 | print(model, dataset, mth, seed, 'perf =', perf) 100 | 101 | # save perf 102 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 103 | method_id = mth + '-%s-%d-%s' % (dataset, seed, timestamp) 104 | perf_file_name = 'incumbent_test_perf_%s.pkl' % (method_id,) 105 | with open(os.path.join(dir_path, perf_file_name), 'wb') as f: 106 | pkl.dump(perf, f) 107 | print(dir_path, perf_file_name, 'saved!', flush=True) 108 | -------------------------------------------------------------------------------- /tuner/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import numpy as np 3 | from openbox.utils.config_space import Configuration, ConfigurationSpace 4 | 5 | WAITING = 'waiting' 6 | RUNNING = 'running' 7 | COMPLETED = 'completed' 8 | PROMOTED = 'promoted' 9 | 10 | 11 | def sample_configuration(configuration_space: ConfigurationSpace, excluded_configs=None, 12 | max_sample_cnt=1000): 13 | """ 14 | sample one config not in excluded_configs 15 | """ 16 | if excluded_configs is None: 17 | excluded_configs = set() 18 | if isinstance(excluded_configs, set): 19 | excluded_configs_set = excluded_configs 20 | else: 21 | excluded_configs_set = set(excluded_configs) 22 | 23 | sample_cnt = 0 24 | while True: 25 | config = configuration_space.sample_configuration() 26 | sample_cnt += 1 27 | if config not in excluded_configs_set: 28 | break 29 | if sample_cnt >= max_sample_cnt: 30 | raise ValueError('Cannot sample non duplicate configuration after %d iterations. ' 31 | 'len of excluded configs set/list = %d/%d.' 32 | % (max_sample_cnt, len(excluded_configs_set), len(excluded_configs))) 33 | return config 34 | 35 | 36 | def sample_configurations(configuration_space: ConfigurationSpace, num, 37 | excluded_configs=None, max_sample_cnt=1000) -> List[Configuration]: 38 | if excluded_configs is None: 39 | excluded_configs = set() 40 | if isinstance(excluded_configs, set): 41 | excluded_configs_set = excluded_configs 42 | else: 43 | excluded_configs_set = set(excluded_configs) 44 | 45 | result = [] 46 | result_set = set() # speedup checking 47 | max_sample_cnt = max(max_sample_cnt, 3 * num) 48 | sample_cnt = 0 49 | while len(result) < num: 50 | config = configuration_space.sample_configuration(1) 51 | sample_cnt += 1 52 | if config not in result_set and config not in excluded_configs_set: 53 | result.append(config) 54 | result_set.add(config) 55 | if sample_cnt >= max_sample_cnt: 56 | raise ValueError('Cannot sample non duplicate configuration after %d iterations. ' 57 | 'len of excluded configs set/list = %d/%d.' 58 | % (max_sample_cnt, len(excluded_configs_set), len(excluded_configs))) 59 | return result 60 | 61 | 62 | def expand_configurations(configs: List[Configuration], configuration_space: ConfigurationSpace, num: int, 63 | excluded_configs=None, max_sample_cnt=1000): 64 | if excluded_configs is None: 65 | excluded_configs = set() 66 | if isinstance(excluded_configs, set): 67 | excluded_configs_set = excluded_configs 68 | else: 69 | excluded_configs_set = set(excluded_configs) 70 | 71 | max_sample_cnt = max(max_sample_cnt, 3 * num) 72 | sample_cnt = 0 73 | while len(configs) < num: 74 | config = configuration_space.sample_configuration(1) 75 | sample_cnt += 1 76 | if config not in configs and config not in excluded_configs_set: 77 | configs.append(config) 78 | if sample_cnt >= max_sample_cnt: 79 | raise ValueError('Cannot sample non duplicate configuration after %d iterations. ' 80 | 'len of excluded configs set/list = %d/%d.' 81 | % (max_sample_cnt, len(excluded_configs_set), len(excluded_configs))) 82 | return configs 83 | 84 | 85 | def minmax_normalization(x): 86 | min_value = min(x) 87 | delta = max(x) - min(x) 88 | if delta == 0: 89 | return [1.0] * len(x) 90 | return [(float(item) - min_value) / float(delta) for item in x] 91 | 92 | 93 | def std_normalization(x): 94 | _mean = np.mean(x) 95 | _std = np.std(x) 96 | if _std == 0: 97 | return np.array([0.] * len(x)) 98 | return (np.array(x) - _mean) / _std 99 | 100 | 101 | def norm2_normalization(x): 102 | z = np.array(x) 103 | normalized_z = z / np.linalg.norm(z) 104 | return normalized_z 105 | -------------------------------------------------------------------------------- /tuner/surrogate/rf_ensemble.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openbox.surrogate.base.base_model import AbstractModel 3 | from openbox.surrogate.base.rf_with_instances import RandomForestWithInstances 4 | 5 | 6 | class RandomForestEnsemble(AbstractModel): 7 | def __init__(self, types: np.ndarray, 8 | bounds: np.ndarray, s_max, eta, weight_list, fusion_method, **kwargs): 9 | super().__init__(types=types, bounds=bounds, **kwargs) 10 | 11 | self.s_max = s_max 12 | self.eta = eta 13 | self.fusion = fusion_method 14 | self.surrogate_weight = dict() 15 | self.surrogate_container = dict() 16 | self.surrogate_r = list() 17 | self.weight_list = weight_list 18 | for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): 19 | r = int(item) 20 | self.surrogate_r.append(r) 21 | self.surrogate_weight[r] = self.weight_list[index] 22 | self.surrogate_container[r] = RandomForestWithInstances(types=types, bounds=bounds) 23 | 24 | def train(self, X: np.ndarray, Y: np.ndarray, r) -> 'AbstractModel': 25 | """Trains the Model on X and Y. 26 | 27 | Parameters 28 | ---------- 29 | X : np.ndarray [n_samples, n_features (config + instance features)] 30 | Input data points. 31 | Y : np.ndarray [n_samples, n_objectives] 32 | The corresponding target values. n_objectives must match the 33 | number of target names specified in the constructor. 34 | r : int 35 | Determine which surrogate in self.surrogate_container to train. 36 | 37 | Returns 38 | ------- 39 | self : AbstractModel 40 | """ 41 | self.types = self._initial_types.copy() 42 | 43 | if len(X.shape) != 2: 44 | raise ValueError('Expected 2d array, got %dd array!' % len(X.shape)) 45 | if X.shape[1] != len(self.types): 46 | raise ValueError('Feature mismatch: X should have %d features, but has %d' % (X.shape[1], len(self.types))) 47 | if X.shape[0] != Y.shape[0]: 48 | raise ValueError('X.shape[0] (%s) != y.shape[0] (%s)' % (X.shape[0], Y.shape[0])) 49 | 50 | self.n_params = X.shape[1] - self.n_feats 51 | 52 | # reduce dimensionality of features of larger than PCA_DIM 53 | if self.pca and X.shape[0] > self.pca.n_components: 54 | X_feats = X[:, -self.n_feats:] 55 | # scale features 56 | X_feats = self.scaler.fit_transform(X_feats) 57 | X_feats = np.nan_to_num(X_feats) # if features with max == min 58 | # PCA 59 | X_feats = self.pca.fit_transform(X_feats) 60 | X = np.hstack((X[:, :self.n_params], X_feats)) 61 | if hasattr(self, "types"): 62 | # for RF, adapt types list 63 | # if X_feats.shape[0] < self.pca, X_feats.shape[1] == 64 | # X_feats.shape[0] 65 | self.types = np.array( 66 | np.hstack((self.types[:self.n_params], np.zeros((X_feats.shape[1])))), 67 | dtype=np.uint, 68 | ) 69 | 70 | return self._train(X, Y, r) 71 | 72 | def _train(self, X: np.ndarray, y: np.ndarray, r): 73 | self.surrogate_container[r].train(X, y) 74 | 75 | def _predict(self, X: np.ndarray): 76 | if len(X.shape) != 2: 77 | raise ValueError( 78 | 'Expected 2d array, got %dd array!' % len(X.shape)) 79 | if X.shape[1] != self.types.shape[0]: 80 | raise ValueError('Rows in X should have %d entries but have %d!' % 81 | (self.types.shape[0], X.shape[1])) 82 | if self.fusion == 'idp': 83 | means, vars = np.zeros((X.shape[0], 1)), np.zeros((X.shape[0], 1)) 84 | for r in self.surrogate_r: 85 | mean, var = self.surrogate_container[r].predict(X) 86 | means += self.surrogate_weight[r] * mean 87 | vars += self.surrogate_weight[r] * self.surrogate_weight[r] * var 88 | return means.reshape((-1, 1)), vars.reshape((-1, 1)) 89 | else: 90 | raise ValueError('Undefined Fusion Method: %s!' % self.fusion) 91 | -------------------------------------------------------------------------------- /tuner/surrogate/gp_ensemble.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openbox.surrogate.base.base_model import AbstractModel 3 | from openbox.surrogate.base.build_gp import create_gp_model 4 | 5 | 6 | class GaussianProcessEnsemble(AbstractModel): 7 | def __init__(self, config_space, types: np.ndarray, 8 | bounds: np.ndarray, s_max, eta, weight_list, fusion_method, rng, **kwargs): 9 | super().__init__(types=types, bounds=bounds, **kwargs) 10 | 11 | self.s_max = s_max 12 | self.eta = eta 13 | self.fusion = fusion_method 14 | self.surrogate_weight = dict() 15 | self.surrogate_container = dict() 16 | self.surrogate_r = list() 17 | self.weight_list = weight_list 18 | self.rng = rng 19 | for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): 20 | r = int(item) 21 | self.surrogate_r.append(r) 22 | self.surrogate_weight[r] = self.weight_list[index] 23 | self.surrogate_container[r] = create_gp_model('gp', config_space, types, bounds, self.rng) 24 | 25 | def train(self, X: np.ndarray, Y: np.ndarray, r) -> 'AbstractModel': 26 | """Trains the Model on X and Y. 27 | 28 | Parameters 29 | ---------- 30 | X : np.ndarray [n_samples, n_features (config + instance features)] 31 | Input data points. 32 | Y : np.ndarray [n_samples, n_objectives] 33 | The corresponding target values. n_objectives must match the 34 | number of target names specified in the constructor. 35 | r : int 36 | Determine which surrogate in self.surrogate_container to train. 37 | 38 | Returns 39 | ------- 40 | self : AbstractModel 41 | """ 42 | self.types = self._initial_types.copy() 43 | 44 | if len(X.shape) != 2: 45 | raise ValueError('Expected 2d array, got %dd array!' % len(X.shape)) 46 | if X.shape[1] != len(self.types): 47 | raise ValueError('Feature mismatch: X should have %d features, but has %d' % (X.shape[1], len(self.types))) 48 | if X.shape[0] != Y.shape[0]: 49 | raise ValueError('X.shape[0] (%s) != y.shape[0] (%s)' % (X.shape[0], Y.shape[0])) 50 | 51 | self.n_params = X.shape[1] - self.n_feats 52 | 53 | # reduce dimensionality of features of larger than PCA_DIM 54 | if self.pca and X.shape[0] > self.pca.n_components: 55 | X_feats = X[:, -self.n_feats:] 56 | # scale features 57 | X_feats = self.scaler.fit_transform(X_feats) 58 | X_feats = np.nan_to_num(X_feats) # if features with max == min 59 | # PCA 60 | X_feats = self.pca.fit_transform(X_feats) 61 | X = np.hstack((X[:, :self.n_params], X_feats)) 62 | if hasattr(self, "types"): 63 | # for RF, adapt types list 64 | # if X_feats.shape[0] < self.pca, X_feats.shape[1] == 65 | # X_feats.shape[0] 66 | self.types = np.array( 67 | np.hstack((self.types[:self.n_params], np.zeros((X_feats.shape[1])))), 68 | dtype=np.uint, 69 | ) 70 | 71 | return self._train(X, Y, r) 72 | 73 | def _train(self, X: np.ndarray, y: np.ndarray, r): 74 | self.surrogate_container[r].train(X, y) 75 | 76 | def _predict(self, X: np.ndarray): 77 | if len(X.shape) != 2: 78 | raise ValueError( 79 | 'Expected 2d array, got %dd array!' % len(X.shape)) 80 | if X.shape[1] != self.types.shape[0]: 81 | raise ValueError('Rows in X should have %d entries but have %d!' % 82 | (self.types.shape[0], X.shape[1])) 83 | if self.fusion == 'idp': 84 | means, vars = np.zeros((X.shape[0], 1)), np.zeros((X.shape[0], 1)) 85 | for r in self.surrogate_r: 86 | mean, var = self.surrogate_container[r].predict(X) 87 | means += self.surrogate_weight[r] * mean 88 | vars += self.surrogate_weight[r] * self.surrogate_weight[r] * var 89 | return means.reshape((-1, 1)), vars.reshape((-1, 1)) 90 | else: 91 | raise ValueError('Undefined Fusion Method: %s!' % self.fusion) 92 | -------------------------------------------------------------------------------- /test/benchmark_xgb_runtest.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/benchmark_xgb_runtest.py --datasets covtype --mths hyperband-n1 --rep 1 --start_id 0 5 | 6 | python test/benchmark_xgb_runtest.py --datasets covtype --show_mode 1 7 | 8 | """ 9 | import os 10 | import sys 11 | import time 12 | import argparse 13 | import numpy as np 14 | import pickle as pkl 15 | from sklearn.metrics import balanced_accuracy_score 16 | 17 | sys.path.insert(0, ".") 18 | sys.path.insert(1, "../open-box") # for dependency 19 | from utils import load_data, setup_exp, check_datasets, seeds 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--datasets', type=str) 23 | parser.add_argument('--mths', type=str) 24 | parser.add_argument('--rep', type=int, default=1) 25 | parser.add_argument('--start_id', type=int, default=0) 26 | parser.add_argument('--show_mode', type=int, default=0) 27 | parser.add_argument('--runtime_limit', type=int) # if you don't want to use default setup 28 | 29 | args = parser.parse_args() 30 | test_datasets = args.datasets.split(',') 31 | mths = args.mths.split(',') 32 | rep = args.rep 33 | start_id = args.start_id 34 | show_mode = args.show_mode 35 | 36 | print(test_datasets) 37 | 38 | 39 | def test_func(config, x_train, x_test, y_train, y_test): 40 | from tuner.xgb_model import XGBoost 41 | conf_dict = config.get_dictionary() 42 | model = XGBoost(**conf_dict, n_jobs=n_jobs, seed=47) 43 | model.fit(x_train, y_train) 44 | # test 45 | y_pred = model.predict(x_test) 46 | perf = balanced_accuracy_score(y_test, y_pred) 47 | return perf 48 | 49 | 50 | if show_mode == 1: 51 | for dataset in test_datasets: 52 | # setup 53 | _, runtime_limit, _ = setup_exp(dataset, 1, 1, 1) 54 | if args.runtime_limit is not None: 55 | runtime_limit = args.runtime_limit 56 | for mth in mths: 57 | perfs = [] 58 | dir_path = 'data/benchmark_xgb/%s-%d/%s/' % (dataset, runtime_limit, mth) 59 | for file in os.listdir(dir_path): 60 | if file.startswith('incumbent_test_perf_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 61 | with open(os.path.join(dir_path, file), 'rb') as f: 62 | perf = pkl.load(f) 63 | perfs.append(perf) 64 | m = np.mean(perfs).item() 65 | s = np.std(perfs).item() 66 | print(dataset, mth, perfs, u'%.4f\u00B1%.4f' % (m, s)) 67 | exit() 68 | 69 | 70 | check_datasets(test_datasets) 71 | for dataset in test_datasets: 72 | # setup 73 | n_jobs, runtime_limit, _ = setup_exp(dataset, 4, 1, 1) 74 | if args.runtime_limit is not None: 75 | runtime_limit = args.runtime_limit 76 | x_train, x_val, x_test, y_train, y_val, y_test = load_data(dataset) 77 | for mth in mths: 78 | print('===== start test %s %s: rep=%d, n_jobs=%d' % (mth, dataset, rep, n_jobs)) 79 | for i in range(start_id, start_id + rep): 80 | seed = seeds[i] 81 | 82 | dir_path = 'data/benchmark_xgb/%s-%d/%s/' % (dataset, runtime_limit, mth) 83 | for file in os.listdir(dir_path): 84 | if file.startswith('incumbent_new_record_%s-%s-%d-' % (mth, dataset, seed)) \ 85 | and file.endswith('.pkl'): 86 | # load config 87 | with open(os.path.join(dir_path, file), 'rb') as f: 88 | record = pkl.load(f) 89 | print(dataset, mth, seed, 'loaded!', record, flush=True) 90 | 91 | # run test 92 | config = record['configuration'] 93 | perf = test_func(config, x_train, x_test, y_train, y_test) 94 | print(dataset, mth, seed, 'perf =', perf) 95 | 96 | # save perf 97 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 98 | method_id = mth + '-%s-%d-%s' % (dataset, seed, timestamp) 99 | perf_file_name = 'incumbent_test_perf_%s.pkl' % (method_id,) 100 | with open(os.path.join(dir_path, perf_file_name), 'wb') as f: 101 | pkl.dump(perf, f) 102 | print(dir_path, perf_file_name, 'saved!', flush=True) 103 | -------------------------------------------------------------------------------- /tuner/async_mq_bo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tuner.async_mq_base_facade import async_mqBaseFacade 3 | from tuner.utils import sample_configuration 4 | 5 | from openbox.utils.config_space import ConfigurationSpace 6 | from openbox.core.async_batch_advisor import AsyncBatchAdvisor, SUCCESS 7 | from openbox.core.base import Observation 8 | 9 | 10 | class async_mqBO(async_mqBaseFacade): 11 | """ 12 | The implementation of Asynchronous Parallel Bayesian Optimization (using OpenBox) 13 | """ 14 | def __init__(self, objective_func, 15 | config_space: ConfigurationSpace, 16 | R, 17 | bo_init_num=3, 18 | random_state=1, 19 | method_id='mqAsyncBO', 20 | restart_needed=True, 21 | time_limit_per_trial=600, 22 | runtime_limit=None, 23 | ip='', 24 | port=13579, 25 | authkey=b'abc', 26 | **kwargs): 27 | max_queue_len = 1000 # conservative design 28 | super().__init__(objective_func, method_name=method_id, 29 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 30 | runtime_limit=runtime_limit, 31 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 32 | self.seed = random_state 33 | self.config_space = config_space 34 | self.config_space.seed(self.seed) 35 | self.R = R 36 | 37 | self.incumbent_configs = list() 38 | self.incumbent_perfs = list() 39 | 40 | self.logger.info('Unused kwargs: %s' % kwargs) 41 | 42 | self.bo_init_num = bo_init_num 43 | # using median_imputation batch_strategy implemented in OpenBox to generate BO suggestions 44 | if 'task_info' in AsyncBatchAdvisor.__init__.__code__.co_varnames: 45 | # old version OpenBox 46 | task_info = {'num_constraints': 0, 'num_objs': 1} 47 | task_kwargs = dict(task_info=task_info) 48 | else: 49 | task_kwargs = dict(num_objs=1, num_constraints=0) 50 | self.config_advisor = AsyncBatchAdvisor(config_space, 51 | **task_kwargs, 52 | batch_size=None, 53 | batch_strategy='median_imputation', 54 | initial_trials=self.bo_init_num, 55 | init_strategy='random', 56 | optimization_strategy='bo', 57 | surrogate_type='prf', 58 | acq_type='ei', 59 | acq_optimizer_type='local_random', 60 | task_id=self.method_name, 61 | output_dir=self.log_directory, 62 | random_state=random_state, 63 | ) 64 | 65 | def get_job(self): 66 | """ 67 | sample config 68 | """ 69 | next_config = self.config_advisor.get_suggestion() 70 | next_n_iteration = self.R 71 | next_extra_conf = dict(initial_run=True) 72 | 73 | return next_config, next_n_iteration, next_extra_conf 74 | 75 | def update_observation(self, config, perf, n_iteration): 76 | assert int(n_iteration) == self.R 77 | self.incumbent_configs.append(config) 78 | self.incumbent_perfs.append(perf) 79 | 80 | # update bo advisor 81 | objs = [perf] 82 | observation = Observation( 83 | config=config, objs=objs, constraints=None, 84 | trial_state=SUCCESS, elapsed_time=None, 85 | ) 86 | self.config_advisor.update_observation(observation) 87 | self.logger.info('update BO observation: config=%s, perf=%f' % (str(config), perf)) 88 | 89 | def get_incumbent(self, num_inc=1): 90 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 91 | indices = np.argsort(self.incumbent_perfs) 92 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 93 | perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 94 | return configs, perfs 95 | -------------------------------------------------------------------------------- /tuner/acq_maximizer/ei_optimization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openbox.utils.config_space import get_one_exchange_neighbourhood 3 | from openbox.utils.constants import MAXINT 4 | 5 | from tuner.surrogate.mf_gp import convert_configurations_to_resource_array 6 | 7 | 8 | class RandomSampling(object): 9 | 10 | def __init__(self, acquisition_function, config_space, n_samples=5000, rng=None): 11 | """ 12 | Samples candidates uniformly at random and returns the point with the highest objective value. 13 | 14 | Parameters 15 | ---------- 16 | acquisition_function: 17 | The acquisition function which will be maximized 18 | n_samples: int 19 | Number of candidates that are samples 20 | """ 21 | self.config_space = config_space 22 | self.acquisition_function = acquisition_function 23 | if rng is None: 24 | self.rng = np.random.RandomState(1357) 25 | else: 26 | self.rng = rng 27 | self.n_samples = n_samples 28 | 29 | def maximize(self, best_config, batch_size=1): 30 | """ 31 | Maximizes the given acquisition function. 32 | 33 | Parameters 34 | ---------- 35 | batch_size: number of maximizer returned. 36 | 37 | Returns 38 | ------- 39 | np.ndarray(N,D) 40 | Point with highest acquisition value. 41 | """ 42 | 43 | incs_configs = list(get_one_exchange_neighbourhood(best_config, seed=self.rng.randint(MAXINT))) 44 | 45 | # Sample random points uniformly over the whole space 46 | # rand_configs = self.config_space.sample_configuration(max(self.n_samples, batch_size) - len(incs_configs)) 47 | rand_configs = self.config_space.sample_configuration(max(self.n_samples, batch_size)) 48 | 49 | configs_list = incs_configs + rand_configs 50 | 51 | y = self.acquisition_function(configs_list) 52 | y = y.reshape(-1) 53 | assert y.shape[0] == len(configs_list) 54 | 55 | candidates = [configs_list[int(i)] for i in np.argsort(-y)[:batch_size]] # maximize 56 | return candidates 57 | 58 | 59 | class mf_RandomSampling(object): 60 | 61 | def __init__(self, acquisition_function, config_space, max_resource, n_samples=5000, rng=None): 62 | """ 63 | Samples candidates uniformly at random and returns the point with the highest objective value. 64 | 65 | Parameters 66 | ---------- 67 | acquisition_function: 68 | The acquisition function which will be maximized 69 | n_samples: int 70 | Number of candidates that are samples 71 | """ 72 | self.config_space = config_space 73 | self.acquisition_function = acquisition_function 74 | if rng is None: 75 | self.rng = np.random.RandomState(1357) 76 | else: 77 | self.rng = rng 78 | self.n_samples = n_samples 79 | self.max_resource = max_resource 80 | 81 | def maximize(self, resource, best_config, batch_size=1): 82 | """ 83 | Maximizes the given acquisition function. 84 | 85 | Parameters 86 | ---------- 87 | resource: 88 | 89 | best_config: 90 | 91 | batch_size: number of maximizer returned. 92 | 93 | Returns 94 | ------- 95 | np.ndarray(N,D) 96 | Point with highest acquisition value. 97 | """ 98 | 99 | incs_configs = list(get_one_exchange_neighbourhood(best_config, seed=self.rng.randint(MAXINT))) 100 | 101 | # Sample random points uniformly over the whole space 102 | # rand_configs = self.config_space.sample_configuration(max(self.n_samples, batch_size) - len(incs_configs)) 103 | rand_configs = self.config_space.sample_configuration(max(self.n_samples, batch_size)) 104 | 105 | configs_list = incs_configs + rand_configs 106 | resource_list = [resource] * len(configs_list) 107 | config_array = convert_configurations_to_resource_array(configs_list, resource_list, self.max_resource) 108 | 109 | y = self.acquisition_function(config_array, convert=False) 110 | y = y.reshape(-1) 111 | assert y.shape[0] == len(configs_list) 112 | 113 | candidates = [configs_list[int(i)] for i in np.argsort(-y)[:batch_size]] # maximize 114 | return candidates 115 | -------------------------------------------------------------------------------- /tuner/mq_mf_worker.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import traceback 4 | from openbox.utils.constants import MAXINT, SUCCESS, FAILED, TIMEOUT 5 | from openbox.utils.limit import time_limit, TimeoutException 6 | from openbox.core.message_queue.worker_messager import WorkerMessager 7 | 8 | 9 | def no_time_limit_func(objective_function, time_limit_per_trial, args, kwargs): 10 | ret = objective_function(*args, **kwargs) 11 | return False, ret 12 | 13 | 14 | class mqmfWorker(object): 15 | """ 16 | message queue worker for multi-fidelity optimization 17 | """ 18 | def __init__(self, objective_function, 19 | ip="127.0.0.1", port=13579, authkey=b'abc', 20 | sleep_time=0.1, 21 | no_time_limit=False, 22 | logger=None): 23 | self.objective_function = objective_function 24 | self.worker_messager = WorkerMessager(ip, port, authkey=authkey) 25 | self.sleep_time = sleep_time 26 | 27 | if no_time_limit: 28 | self.time_limit = no_time_limit_func 29 | else: 30 | self.time_limit = time_limit 31 | 32 | if logger is not None: 33 | self.logging = logger.info 34 | else: 35 | self.logging = print 36 | 37 | def run(self): 38 | while True: 39 | # Get config 40 | try: 41 | msg = self.worker_messager.receive_message() 42 | except Exception as e: 43 | self.logging("Worker receive message error: %s" % str(e)) 44 | return 45 | if msg is None: 46 | # Wait for configs 47 | time.sleep(self.sleep_time) 48 | continue 49 | self.logging("Worker: get msg: %s. start working." % msg) 50 | config, extra_conf, time_limit_per_trial, n_iteration, trial_id = msg 51 | 52 | # Start working 53 | start_time = time.time() 54 | trial_state = SUCCESS 55 | ref_id = None 56 | early_stop = False 57 | test_perf = None 58 | try: 59 | args, kwargs = (config, n_iteration, extra_conf), dict() 60 | timeout_status, _result = self.time_limit(self.objective_function, 61 | time_limit_per_trial, 62 | args=args, kwargs=kwargs) 63 | if timeout_status: 64 | raise TimeoutException( 65 | 'Timeout: time limit for this evaluation is %.1fs' % time_limit_per_trial) 66 | else: 67 | if _result is None: 68 | perf = MAXINT 69 | elif isinstance(_result, dict): 70 | perf = _result['objective_value'] 71 | if perf is None: 72 | perf = MAXINT 73 | ref_id = _result.get('ref_id', None) 74 | early_stop = _result.get('early_stop', False) 75 | test_perf = _result.get('test_perf', None) 76 | else: 77 | perf = _result 78 | except Exception as e: 79 | if isinstance(e, TimeoutException): 80 | trial_state = TIMEOUT 81 | else: 82 | traceback.print_exc(file=sys.stdout) 83 | trial_state = FAILED 84 | perf = MAXINT 85 | 86 | time_taken = time.time() - start_time 87 | return_info = dict(loss=perf, 88 | n_iteration=n_iteration, 89 | ref_id=ref_id, 90 | early_stop=early_stop, 91 | trial_state=trial_state, 92 | test_perf=test_perf, 93 | extra_conf=extra_conf) 94 | observation = [return_info, time_taken, trial_id, config] 95 | 96 | # Send result 97 | self.logging("Worker: perf=%f. time=%.2fs. sending result." % (perf, time_taken)) 98 | try: 99 | self.worker_messager.send_message(observation) 100 | except Exception as e: 101 | self.logging("Worker send message error: %s" % str(e)) 102 | return 103 | -------------------------------------------------------------------------------- /tuner/mq_bohb_v0.py: -------------------------------------------------------------------------------- 1 | from openbox.utils.config_space import ConfigurationSpace 2 | from tuner.mq_hb import mqHyperband 3 | from tuner.utils import sample_configurations, expand_configurations 4 | from tuner.acq_maximizer.ei_optimization import RandomSampling 5 | 6 | import numpy as np 7 | from openbox.utils.util_funcs import get_types 8 | from openbox.acquisition_function.acquisition import EI 9 | from openbox.surrogate.base.rf_with_instances import RandomForestWithInstances 10 | from openbox.utils.config_space.util import convert_configurations_to_array 11 | 12 | 13 | class mqBOHB_v0(mqHyperband): 14 | """ The implementation of BOHB. 15 | The paper can be found in https://arxiv.org/abs/1807.01774 . 16 | """ 17 | 18 | def __init__(self, objective_func, 19 | config_space: ConfigurationSpace, 20 | R, 21 | eta=3, 22 | num_iter=10000, 23 | rand_prob=0.3, 24 | bo_init_num=3, 25 | random_state=1, 26 | method_id='mqBOHB', 27 | restart_needed=True, 28 | time_limit_per_trial=600, 29 | runtime_limit=None, 30 | ip='', 31 | port=13579, 32 | authkey=b'abc',): 33 | super().__init__(objective_func, config_space, R, eta=eta, num_iter=num_iter, 34 | random_state=random_state, method_id=method_id, 35 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 36 | runtime_limit=runtime_limit, 37 | ip=ip, port=port, authkey=authkey) 38 | 39 | self.rand_prob = rand_prob 40 | self.bo_init_num = bo_init_num 41 | types, bounds = get_types(config_space) 42 | self.surrogate = RandomForestWithInstances(types=types, bounds=bounds) 43 | self.acquisition_function = EI(model=self.surrogate) 44 | self.acq_optimizer = RandomSampling(self.acquisition_function, config_space, 45 | n_samples=max(5000, 50 * len(bounds))) 46 | self.rng = np.random.RandomState(self.seed) 47 | 48 | def choose_next(self, num_config): 49 | # Sample n configurations according to BOHB strategy. 50 | self.logger.info('Sample %d configs in choose_next. rand_prob is %f.' % (num_config, self.rand_prob)) 51 | 52 | if len(self.incumbent_configs) < self.bo_init_num: 53 | self.logger.info('len(self.incumbent_configs) = %d. Return all random configs.' 54 | % (len(self.incumbent_configs), )) 55 | return sample_configurations(self.config_space, num_config, excluded_configs=self.incumbent_configs) 56 | 57 | config_candidates = [] 58 | 59 | # BO 60 | num_bo_config = num_config - int(num_config * self.rand_prob) 61 | self.surrogate.train(convert_configurations_to_array(self.incumbent_configs), 62 | np.array(self.incumbent_perfs, dtype=np.float64)) 63 | best_index = np.argmin(self.incumbent_perfs) 64 | best_config = self.incumbent_configs[best_index] 65 | best_perf = self.incumbent_perfs[best_index] 66 | # Update surrogate model in acquisition function. 67 | self.acquisition_function.update(model=self.surrogate, eta=best_perf, 68 | num_data=len(self.incumbent_configs)) 69 | bo_candidates = self.acq_optimizer.maximize(best_config=best_config, batch_size=5000) 70 | for config in bo_candidates: 71 | if config not in config_candidates + self.incumbent_configs: 72 | config_candidates.append(config) 73 | if len(config_candidates) == num_bo_config: 74 | break 75 | self.logger.info('len bo configs = %d.' % len(config_candidates)) 76 | 77 | # sample random configs 78 | config_candidates = expand_configurations(config_candidates, self.config_space, num_config, 79 | excluded_configs=self.incumbent_configs) 80 | self.logger.info('len total configs = %d.' % len(config_candidates)) 81 | assert len(config_candidates) == num_config 82 | return config_candidates 83 | 84 | def update_incumbent_before_reduce(self, T, val_losses, n_iteration): 85 | if int(n_iteration) < self.R: 86 | return 87 | self.incumbent_configs.extend(T) 88 | self.incumbent_perfs.extend(val_losses) 89 | self.logger.info('%d observations updated. %d incumbent configs total.' % (len(T), len(self.incumbent_configs))) 90 | 91 | def update_incumbent_after_reduce(self, T, incumbent_loss): 92 | return 93 | -------------------------------------------------------------------------------- /tuner/async_mq_mf_worker.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import traceback 4 | from openbox.utils.constants import MAXINT, SUCCESS, FAILED, TIMEOUT 5 | from openbox.utils.limit import time_limit, TimeoutException 6 | from openbox.core.message_queue.worker_messager import WorkerMessager 7 | 8 | 9 | def no_time_limit_func(objective_function, time_limit_per_trial, args, kwargs): 10 | ret = objective_function(*args, **kwargs) 11 | return False, ret 12 | 13 | 14 | class async_mqmfWorker(object): 15 | """ 16 | async message queue worker for multi-fidelity optimization 17 | """ 18 | def __init__(self, objective_function, 19 | ip="127.0.0.1", port=13579, authkey=b'abc', 20 | sleep_time=0.1, 21 | no_time_limit=False, 22 | logger=None): 23 | self.objective_function = objective_function 24 | self.worker_messager = WorkerMessager(ip, port, authkey=authkey) 25 | self.sleep_time = sleep_time 26 | 27 | if no_time_limit: 28 | self.time_limit = no_time_limit_func 29 | else: 30 | self.time_limit = time_limit 31 | 32 | if logger is not None: 33 | self.logging = logger.info 34 | else: 35 | self.logging = print 36 | 37 | def run(self): 38 | # tell master worker is ready 39 | init_observation = [None, None, None, None] 40 | try: 41 | self.worker_messager.send_message(init_observation) 42 | except Exception as e: 43 | self.logging("Worker send init message error: %s" % str(e)) 44 | return 45 | 46 | while True: 47 | # Get config 48 | try: 49 | msg = self.worker_messager.receive_message() 50 | except Exception as e: 51 | self.logging("Worker receive message error: %s" % str(e)) 52 | return 53 | if msg is None: 54 | # Wait for configs 55 | time.sleep(self.sleep_time) 56 | continue 57 | self.logging("Worker: get msg: %s. start working." % msg) 58 | config, extra_conf, time_limit_per_trial, n_iteration, trial_id = msg 59 | 60 | # Start working 61 | start_time = time.time() 62 | trial_state = SUCCESS 63 | ref_id = None 64 | early_stop = False 65 | test_perf = None 66 | try: 67 | args, kwargs = (config, n_iteration, extra_conf), dict() 68 | timeout_status, _result = self.time_limit(self.objective_function, 69 | time_limit_per_trial, 70 | args=args, kwargs=kwargs) 71 | if timeout_status: 72 | raise TimeoutException( 73 | 'Timeout: time limit for this evaluation is %.1fs' % time_limit_per_trial) 74 | else: 75 | if _result is None: 76 | perf = MAXINT 77 | elif isinstance(_result, dict): 78 | perf = _result['objective_value'] 79 | if perf is None: 80 | perf = MAXINT 81 | ref_id = _result.get('ref_id', None) 82 | early_stop = _result.get('early_stop', False) 83 | test_perf = _result.get('test_perf', None) 84 | else: 85 | perf = _result 86 | except Exception as e: 87 | if isinstance(e, TimeoutException): 88 | trial_state = TIMEOUT 89 | else: 90 | traceback.print_exc(file=sys.stdout) 91 | trial_state = FAILED 92 | perf = MAXINT 93 | 94 | time_taken = time.time() - start_time 95 | return_info = dict(loss=perf, 96 | n_iteration=n_iteration, 97 | ref_id=ref_id, 98 | early_stop=early_stop, 99 | trial_state=trial_state, 100 | test_perf=test_perf, 101 | extra_conf=extra_conf) 102 | observation = [return_info, time_taken, trial_id, config] 103 | 104 | # Send result 105 | self.logging("Worker: perf=%f. time=%.2fs. sending result." % (perf, time_taken)) 106 | try: 107 | self.worker_messager.send_message(observation) 108 | except Exception as e: 109 | self.logging("Worker send message error: %s" % str(e)) 110 | return 111 | -------------------------------------------------------------------------------- /tuner/surrogate/mf_gp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import log 3 | from typing import List 4 | from openbox.utils.config_space import Configuration, ConfigurationSpace 5 | from openbox.utils.config_space.util import impute_default_values 6 | from openbox.surrogate.base.gp import GaussianProcess 7 | from openbox.surrogate.base.gp_mcmc import GaussianProcessMCMC 8 | from openbox.surrogate.base.gp_base_prior import HorseshoePrior, LognormalPrior 9 | from openbox.surrogate.base.gp_kernels import ConstantKernel, Matern, HammingKernel, WhiteKernel, RBF 10 | 11 | 12 | def convert_configurations_to_resource_array(configs: List[Configuration], resources: List[int], 13 | max_resource: int) -> np.ndarray: 14 | """Impute inactive hyperparameters in configurations with their default. Add a feature for amount of resources 15 | 16 | Necessary to apply an EPM to the data. 17 | 18 | Parameters 19 | ---------- 20 | configs : List[Configuration] 21 | List of configuration objects. 22 | resources: List[int] 23 | List of configuration resources. 24 | max_resource: int 25 | The maximum amount of resources. 26 | 27 | Returns 28 | ------- 29 | np.ndarray 30 | Array with configuration hyperparameters. Inactive values are imputed 31 | with their default value. 32 | """ 33 | configs_array = np.array([config.get_array() for config in configs], 34 | dtype=np.float64) 35 | configuration_space = configs[0].configuration_space 36 | config_features = impute_default_values(configuration_space, configs_array) 37 | resource_features = np.array([[log(resource) / log(max_resource)] for resource in resources]) 38 | result = np.hstack([config_features, resource_features]) 39 | return result 40 | 41 | 42 | def create_resource_gp_model(model_type, config_space, types, bounds, rng): 43 | """ 44 | Construct the Gaussian process surrogate that is capable of dealing with categorical hyperparameters. 45 | """ 46 | cov_amp = ConstantKernel( 47 | 2.0, 48 | constant_value_bounds=(np.exp(-10), np.exp(2)), 49 | prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng), 50 | ) 51 | 52 | # resource feature 53 | types = np.hstack((types, [0])).astype(int) 54 | bounds = np.vstack((bounds, [[0.0, 1.0]])).astype(float) 55 | 56 | cont_dims = np.nonzero(types == 0)[0].astype(np.int) 57 | cat_dims = np.nonzero(types != 0)[0].astype(np.int) 58 | 59 | if len(cont_dims) > 0: 60 | exp_kernel = Matern( 61 | np.ones([len(cont_dims)]), 62 | [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cont_dims))], 63 | nu=2.5, 64 | operate_on=cont_dims, 65 | ) 66 | 67 | if len(cat_dims) > 0: 68 | ham_kernel = HammingKernel( 69 | np.ones([len(cat_dims)]), 70 | [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cat_dims))], 71 | operate_on=cat_dims, 72 | ) 73 | 74 | noise_kernel = WhiteKernel( 75 | noise_level=1e-8, 76 | noise_level_bounds=(np.exp(-25), np.exp(2)), 77 | prior=HorseshoePrior(scale=0.1, rng=rng), 78 | ) 79 | 80 | if len(cont_dims) > 0 and len(cat_dims) > 0: 81 | # both 82 | kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel 83 | elif len(cont_dims) > 0 and len(cat_dims) == 0: 84 | # only cont 85 | kernel = cov_amp * exp_kernel + noise_kernel 86 | elif len(cont_dims) == 0 and len(cat_dims) > 0: 87 | # only cont 88 | kernel = cov_amp * ham_kernel + noise_kernel 89 | else: 90 | raise ValueError() 91 | 92 | # seed = rng.randint(0, 2 ** 20) 93 | if model_type == 'gp_mcmc': 94 | n_mcmc_walkers = 3 * len(kernel.theta) 95 | if n_mcmc_walkers % 2 == 1: 96 | n_mcmc_walkers += 1 97 | model = GaussianProcessMCMC( 98 | configspace=config_space, 99 | types=types, 100 | bounds=bounds, 101 | kernel=kernel, 102 | n_mcmc_walkers=n_mcmc_walkers, 103 | chain_length=250, 104 | burnin_steps=250, 105 | normalize_y=True, 106 | seed=rng.randint(low=0, high=10000), 107 | ) 108 | elif model_type == 'gp': 109 | model = GaussianProcess( 110 | configspace=config_space, 111 | types=types, 112 | bounds=bounds, 113 | kernel=kernel, 114 | normalize_y=True, 115 | seed=rng.randint(low=0, high=10000), 116 | ) 117 | else: 118 | raise ValueError("Invalid surrogate str %s!" % model_type) 119 | return model 120 | 121 | -------------------------------------------------------------------------------- /tuner/async_mq_ea.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from ConfigSpace.util import get_one_exchange_neighbourhood 4 | from tuner.async_mq_base_facade import async_mqBaseFacade 5 | from tuner.utils import sample_configuration 6 | 7 | from openbox.utils.config_space import ConfigurationSpace 8 | 9 | 10 | class async_mqEA(async_mqBaseFacade): 11 | """ 12 | The implementation of Asynchronous Evolutionary Algorithm 13 | """ 14 | def __init__(self, objective_func, 15 | config_space: ConfigurationSpace, 16 | R, 17 | population_size=30, 18 | subset_size=20, 19 | epsilon=0.2, 20 | strategy='worst', # 'worst', 'oldest' 21 | random_state=1, 22 | method_id='mqAsyncEA', 23 | restart_needed=True, 24 | time_limit_per_trial=600, 25 | runtime_limit=None, 26 | ip='', 27 | port=13579, 28 | authkey=b'abc', 29 | **kwargs): 30 | max_queue_len = 1000 # conservative design 31 | super().__init__(objective_func, method_name=method_id, 32 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 33 | runtime_limit=runtime_limit, 34 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 35 | self.seed = random_state 36 | self.config_space = config_space 37 | self.config_space.seed(self.seed) 38 | self.R = R 39 | 40 | self.incumbent_configs = list() 41 | self.incumbent_perfs = list() 42 | 43 | self.all_configs = set() 44 | 45 | self.age = 0 46 | self.population = list() 47 | self.population_size = population_size 48 | self.subset_size = subset_size 49 | assert 0 < self.subset_size <= self.population_size 50 | self.epsilon = epsilon 51 | self.strategy = strategy 52 | assert self.strategy in ['worst', 'oldest'] 53 | self.rng = np.random.RandomState(self.seed) 54 | 55 | self.logger.info('Unused kwargs: %s' % kwargs) 56 | 57 | def get_job(self): 58 | """ 59 | sample a random config 60 | """ 61 | if len(self.population) < self.population_size: 62 | # Initialize population 63 | next_config = sample_configuration(self.config_space, excluded_configs=self.all_configs) 64 | else: 65 | # Select a parent by subset tournament and epsilon greedy 66 | if self.rng.random() < self.epsilon: 67 | parent_config = random.sample(self.population, 1)[0]['config'] 68 | else: 69 | subset = random.sample(self.population, self.subset_size) 70 | subset.sort(key=lambda x: x['perf']) # minimize 71 | parent_config = subset[0]['config'] 72 | 73 | # Mutation to 1-step neighbors 74 | next_config = None 75 | neighbors_gen = get_one_exchange_neighbourhood(parent_config, seed=1) 76 | for neighbor in neighbors_gen: 77 | if neighbor not in self.all_configs: 78 | next_config = neighbor 79 | break 80 | if next_config is None: # If all the neighors are evaluated, sample randomly! 81 | next_config = sample_configuration(self.config_space, excluded_configs=self.all_configs) 82 | 83 | self.all_configs.add(next_config) 84 | 85 | next_n_iteration = self.R 86 | next_extra_conf = dict(initial_run=True) 87 | return next_config, next_n_iteration, next_extra_conf 88 | 89 | def update_observation(self, config, perf, n_iteration): 90 | assert int(n_iteration) == self.R 91 | self.incumbent_configs.append(config) 92 | self.incumbent_perfs.append(perf) 93 | 94 | # update population 95 | self.population.append(dict(config=config, age=self.age, perf=perf)) 96 | self.age += 1 97 | 98 | # Eliminate samples 99 | if len(self.population) > self.population_size: 100 | if self.strategy == 'oldest': 101 | self.population.sort(key=lambda x: x['age']) 102 | self.population.pop(0) 103 | elif self.strategy == 'worst': 104 | self.population.sort(key=lambda x: x['perf']) 105 | self.population.pop(-1) 106 | else: 107 | raise ValueError('Unknown strategy: %s' % self.strategy) 108 | return 109 | 110 | def get_incumbent(self, num_inc=1): 111 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 112 | indices = np.argsort(self.incumbent_perfs) 113 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 114 | perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 115 | return configs, perfs 116 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from embed_regularize import embedded_dropout 5 | from locked_dropout import LockedDropout 6 | from weight_drop import WeightDrop 7 | 8 | class RNNModel(nn.Module): 9 | """Container module with an encoder, a recurrent module, and a decoder.""" 10 | 11 | def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): 12 | super(RNNModel, self).__init__() 13 | self.lockdrop = LockedDropout() 14 | self.idrop = nn.Dropout(dropouti) 15 | self.hdrop = nn.Dropout(dropouth) 16 | self.drop = nn.Dropout(dropout) 17 | self.encoder = nn.Embedding(ntoken, ninp) 18 | assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' 19 | if rnn_type == 'LSTM': 20 | self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] 21 | if wdrop: 22 | self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] 23 | if rnn_type == 'GRU': 24 | self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] 25 | if wdrop: 26 | self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] 27 | elif rnn_type == 'QRNN': 28 | from torchqrnn import QRNNLayer 29 | self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] 30 | for rnn in self.rnns: 31 | rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) 32 | print(self.rnns) 33 | self.rnns = torch.nn.ModuleList(self.rnns) 34 | self.decoder = nn.Linear(nhid, ntoken) 35 | 36 | # Optionally tie weights as in: 37 | # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) 38 | # https://arxiv.org/abs/1608.05859 39 | # and 40 | # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) 41 | # https://arxiv.org/abs/1611.01462 42 | if tie_weights: 43 | #if nhid != ninp: 44 | # raise ValueError('When using the tied flag, nhid must be equal to emsize') 45 | self.decoder.weight = self.encoder.weight 46 | 47 | self.init_weights() 48 | 49 | self.rnn_type = rnn_type 50 | self.ninp = ninp 51 | self.nhid = nhid 52 | self.nlayers = nlayers 53 | self.dropout = dropout 54 | self.dropouti = dropouti 55 | self.dropouth = dropouth 56 | self.dropoute = dropoute 57 | self.tie_weights = tie_weights 58 | 59 | def reset(self): 60 | if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] 61 | 62 | def init_weights(self): 63 | initrange = 0.1 64 | self.encoder.weight.data.uniform_(-initrange, initrange) 65 | self.decoder.bias.data.fill_(0) 66 | self.decoder.weight.data.uniform_(-initrange, initrange) 67 | 68 | def forward(self, input, hidden, return_h=False): 69 | emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) 70 | #emb = self.idrop(emb) 71 | 72 | emb = self.lockdrop(emb, self.dropouti) 73 | 74 | raw_output = emb 75 | new_hidden = [] 76 | #raw_output, hidden = self.rnn(emb, hidden) 77 | raw_outputs = [] 78 | outputs = [] 79 | for l, rnn in enumerate(self.rnns): 80 | current_input = raw_output 81 | raw_output, new_h = rnn(raw_output, hidden[l]) 82 | new_hidden.append(new_h) 83 | raw_outputs.append(raw_output) 84 | if l != self.nlayers - 1: 85 | #self.hdrop(raw_output) 86 | raw_output = self.lockdrop(raw_output, self.dropouth) 87 | outputs.append(raw_output) 88 | hidden = new_hidden 89 | 90 | output = self.lockdrop(raw_output, self.dropout) 91 | outputs.append(output) 92 | 93 | result = output.view(output.size(0)*output.size(1), output.size(2)) 94 | if return_h: 95 | return result, hidden, raw_outputs, outputs 96 | return result, hidden 97 | 98 | def init_hidden(self, bsz): 99 | weight = next(self.parameters()).data 100 | if self.rnn_type == 'LSTM': 101 | return [(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_(), 102 | weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()) 103 | for l in range(self.nlayers)] 104 | elif self.rnn_type == 'QRNN' or self.rnn_type == 'GRU': 105 | return [weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_() 106 | for l in range(self.nlayers)] 107 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import contextlib 4 | import traceback 5 | import numpy as np 6 | import pickle as pkl 7 | 8 | seeds = [4465, 3822, 4531, 8459, 6295, 2854, 7820, 4050, 280, 6983, 9 | 5497, 83, 9801, 8760, 5765, 6142, 4158, 9599, 1776, 1656] 10 | 11 | 12 | def setup_exp(_dataset, n_jobs, runtime_limit, time_limit_per_trial): 13 | if _dataset == 'hepmass': 14 | n_jobs = 32 15 | runtime_limit = 6 * 3600 # 6h 16 | time_limit_per_trial = 999999 17 | elif _dataset == 'HIGGS': 18 | n_jobs = 32 19 | runtime_limit = 6 * 3600 # 6h 20 | time_limit_per_trial = 999999 21 | elif _dataset == 'pokerhand': 22 | n_jobs = 16 23 | runtime_limit = 2 * 3600 # 2h 24 | time_limit_per_trial = 999999 25 | elif _dataset == 'covtype': 26 | n_jobs = 16 27 | runtime_limit = 3 * 3600 # 3h 28 | time_limit_per_trial = 999999 29 | else: 30 | print('[setup exp] dataset setup not found. use input settings.') 31 | print('[setup exp] dataset=%s, n_jobs=%d, runtime_limit=%d, time_limit_per_trial=%d' 32 | % (_dataset, n_jobs, runtime_limit, time_limit_per_trial)) 33 | for para in (n_jobs, runtime_limit, time_limit_per_trial): 34 | assert para is not None and para > 0 35 | return n_jobs, runtime_limit, time_limit_per_trial 36 | 37 | 38 | def load_data(dataset, data_dir='datasets', **kwargs): 39 | name_x_train = dataset + '-x_train.npy' 40 | name_x_val = dataset + '-x_val.npy' 41 | name_x_test = dataset + '-x_test.npy' 42 | name_y_train = dataset + '-y_train.npy' 43 | name_y_val = dataset + '-y_val.npy' 44 | name_y_test = dataset + '-y_test.npy' 45 | x_train = np.load(os.path.join(data_dir, name_x_train)) 46 | x_val = np.load(os.path.join(data_dir, name_x_val)) 47 | x_test = np.load(os.path.join(data_dir, name_x_test)) 48 | y_train = np.load(os.path.join(data_dir, name_y_train)) 49 | y_val = np.load(os.path.join(data_dir, name_y_val)) 50 | y_test = np.load(os.path.join(data_dir, name_y_test)) 51 | print(dataset, 'loaded. x shape =', x_train.shape, x_val.shape, x_test.shape) 52 | return x_train, x_val, x_test, y_train, y_val, y_test 53 | 54 | 55 | def check_datasets(datasets): 56 | for _dataset in datasets: 57 | try: 58 | _ = load_data(_dataset) 59 | except Exception as e: 60 | print('Dataset - %s load error' % (_dataset)) 61 | print(traceback.format_exc()) 62 | raise 63 | 64 | 65 | # timer tool 66 | @contextlib.contextmanager 67 | def timeit(name=''): 68 | print("[%s]Start." % name, flush=True) 69 | start = time.time() 70 | yield 71 | end = time.time() 72 | m, s = divmod(end - start, 60) 73 | h, m = divmod(m, 60) 74 | print("[%s]Total time = %d hours, %d minutes, %d seconds." % (name, h, m, s), flush=True) 75 | 76 | 77 | # ===== for plot ===== 78 | 79 | def descending(x): 80 | y = [x[0]] 81 | for i in range(1, len(x)): 82 | y.append(min(y[-1], x[i])) 83 | return y 84 | 85 | 86 | def create_point(x, stats, default=0.0): 87 | """ 88 | get the closest perf of time point x where timestamp < x 89 | :param x: 90 | the time point 91 | :param stats: 92 | list of func. func is tuple of timestamp list and perf list 93 | :param default: 94 | init value of perf 95 | :return: 96 | list of perf of funcs at time point x 97 | """ 98 | perf_list = [] 99 | for func in stats: 100 | timestamp, perf = func 101 | last_p = default 102 | for t, p in zip(timestamp, perf): 103 | if t > x: 104 | break 105 | last_p = p 106 | perf_list.append(last_p) 107 | return perf_list 108 | 109 | 110 | def create_plot_points(stats, start_time, end_time, point_num=500, default=0.0): 111 | """ 112 | 113 | :param stats: 114 | list of func. func is tuple of timestamp list and perf list 115 | :param start_time: 116 | :param end_time: 117 | :param point_num: 118 | :param default: 119 | init value of perf 120 | :return: 121 | """ 122 | x = np.linspace(start_time, end_time, num=point_num) 123 | _mean, _std = list(), list() 124 | for i, stage in enumerate(x): 125 | perf_list = create_point(stage, stats, default) 126 | _mean.append(np.mean(perf_list)) 127 | _std.append(np.std(perf_list)) 128 | # Used to plot errorbar. 129 | return x, np.array(_mean), np.array(_std) 130 | 131 | 132 | def smooth(vals, start_idx, end_idx, n_points=4): 133 | diff = vals[start_idx] - vals[end_idx - 1] 134 | idxs = np.random.choice(list(range(start_idx, end_idx)), n_points) 135 | new_vals = vals.copy() 136 | val_sum = 0. 137 | new_vals[start_idx:end_idx] = vals[start_idx] 138 | for idx in sorted(idxs): 139 | _val = np.random.uniform(0, diff * 0.4, 1)[0] 140 | diff -= _val 141 | new_vals[idx:end_idx] -= _val 142 | val_sum += _val 143 | new_vals[end_idx - 1] -= (vals[start_idx] - vals[end_idx - 1] - val_sum) 144 | print(vals[start_idx:end_idx]) 145 | print(new_vals[start_idx:end_idx]) 146 | return new_vals 147 | -------------------------------------------------------------------------------- /test/benchmark_process_record.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/benchmark_process_record.py --dataset covtype --old 10800 --new 10800 --R 27 5 | 6 | """ 7 | import argparse 8 | import os 9 | import numpy as np 10 | import pickle as pkl 11 | 12 | 13 | # step 1: cut off 14 | def cut_off(model, dataset, mths, old_runtime_limit, new_runtime_limit): 15 | print('===== step 1: cut off') 16 | print(old_runtime_limit, new_runtime_limit) 17 | if old_runtime_limit != new_runtime_limit: 18 | for mth in mths: 19 | old_dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, old_runtime_limit, mth) 20 | new_dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, new_runtime_limit, mth) 21 | if not os.path.exists(new_dir_path): 22 | os.makedirs(new_dir_path) 23 | else: 24 | raise Exception('please checkout. new dir already exists!') 25 | for file in os.listdir(old_dir_path): 26 | if file.startswith('record_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 27 | with open(os.path.join(old_dir_path, file), 'rb') as f: 28 | raw_recorder = pkl.load(f) 29 | recorder = [] 30 | for record in raw_recorder: 31 | if record['global_time'] > new_runtime_limit: 32 | # print('abandon record by new_runtime_limit:', new_runtime_limit, mth, record) 33 | continue 34 | recorder.append(record) 35 | # write new 36 | with open(os.path.join(new_dir_path, file), 'wb') as f: 37 | pkl.dump(recorder, f) 38 | print('recorder len:', mth, len(recorder), len(raw_recorder)) 39 | 40 | 41 | # step 2: remove partial validation in hyperband 42 | def remove_partial(model, dataset, mths, new_runtime_limit, R): 43 | print('===== step 2: remove part validation in hyperband') 44 | for mth in mths: 45 | new_dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, new_runtime_limit, mth) 46 | for file in os.listdir(new_dir_path): 47 | if file.startswith('record_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 48 | with open(os.path.join(new_dir_path, file), 'rb') as f: 49 | raw_recorder = pkl.load(f) 50 | recorder = [] 51 | for record in raw_recorder: 52 | if record.get('n_iteration') is not None: 53 | if record['n_iteration'] < R: 54 | if mth.startswith('random') or mth == 'smac': 55 | print('error abandon record by n_iteration:', R, mth, record) 56 | continue 57 | if record['n_iteration'] > R: 58 | raise ValueError('please check R in settings.', R, mth, record) 59 | recorder.append(record) 60 | # write new 61 | new_file = 'new_' + file 62 | with open(os.path.join(new_dir_path, new_file), 'wb') as f: 63 | pkl.dump(recorder, f) 64 | print('recorder len:', mth, len(recorder), len(raw_recorder)) 65 | 66 | 67 | # step 3: get incumbent config 68 | def get_incumbent(model, dataset, mths, new_runtime_limit): 69 | print('===== step 3: get incumbent config') 70 | for mth in mths: 71 | new_dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, new_runtime_limit, mth) 72 | for file in os.listdir(new_dir_path): 73 | if file.startswith('new_record_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 74 | with open(os.path.join(new_dir_path, file), 'rb') as f: 75 | raw_recorder = pkl.load(f) 76 | incumbent_perf = np.inf 77 | incumbent_record = None 78 | for record in raw_recorder: 79 | perf = record['return_info']['loss'] 80 | if perf < incumbent_perf: 81 | incumbent_perf = perf 82 | incumbent_record = record 83 | # write new 84 | new_file = 'incumbent_' + file 85 | with open(os.path.join(new_dir_path, new_file), 'wb') as f: 86 | pkl.dump(incumbent_record, f) 87 | print(mth, file, incumbent_record) 88 | 89 | 90 | if __name__ == '__main__': 91 | 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument('--dataset', type=str) 94 | parser.add_argument('--mths', type=str) 95 | parser.add_argument('--R', type=int, default=27) 96 | parser.add_argument('--old', type=int) 97 | parser.add_argument('--new', type=int) 98 | parser.add_argument('--model', type=str, default='xgb') 99 | 100 | args = parser.parse_args() 101 | dataset = args.dataset 102 | mths = args.mths.split(',') 103 | R = args.R 104 | old_runtime_limit = args.old 105 | new_runtime_limit = args.new 106 | model = args.model 107 | for para in (dataset, old_runtime_limit, new_runtime_limit): 108 | assert para is not None 109 | 110 | # process 111 | cut_off(model, dataset, mths, old_runtime_limit, new_runtime_limit) 112 | remove_partial(model, dataset, mths, new_runtime_limit, R) 113 | get_incumbent(model, dataset, mths, new_runtime_limit) 114 | -------------------------------------------------------------------------------- /tuner/async_mq_bohb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | 5 | from tuner.async_mq_hb import async_mqHyperband 6 | from tuner.utils import RUNNING, COMPLETED, PROMOTED 7 | from tuner.utils import sample_configuration 8 | from tuner.utils import minmax_normalization, std_normalization 9 | from tuner.acq_maximizer.ei_optimization import RandomSampling 10 | 11 | from openbox.utils.util_funcs import get_types 12 | from openbox.utils.config_space import ConfigurationSpace 13 | from openbox.acquisition_function.acquisition import EI 14 | from openbox.surrogate.base.rf_with_instances import RandomForestWithInstances 15 | from openbox.utils.config_space.util import convert_configurations_to_array 16 | 17 | 18 | class async_mqBOHB(async_mqHyperband): 19 | """ 20 | The implementation of Asynchronous BOHB (combine ASHA and BOHB) 21 | no median imputation! 22 | """ 23 | 24 | def __init__(self, objective_func, 25 | config_space: ConfigurationSpace, 26 | R, 27 | eta=3, 28 | skip_outer_loop=0, 29 | rand_prob=0.3, 30 | bo_init_num=3, 31 | random_state=1, 32 | method_id='mqAsyncBOHB', 33 | restart_needed=True, 34 | time_limit_per_trial=600, 35 | runtime_limit=None, 36 | ip='', 37 | port=13579, 38 | authkey=b'abc'): 39 | super().__init__(objective_func, config_space, R, eta=eta, skip_outer_loop=skip_outer_loop, 40 | random_state=random_state, method_id=method_id, restart_needed=restart_needed, 41 | time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, 42 | ip=ip, port=port, authkey=authkey) 43 | 44 | self.rand_prob = rand_prob 45 | self.bo_init_num = bo_init_num 46 | types, bounds = get_types(config_space) 47 | self.surrogate = RandomForestWithInstances(types=types, bounds=bounds) 48 | self.acquisition_function = EI(model=self.surrogate) 49 | self.acq_optimizer = RandomSampling(self.acquisition_function, config_space, 50 | n_samples=max(5000, 50 * len(bounds))) 51 | self.rng = np.random.RandomState(self.seed) 52 | 53 | def update_observation(self, config, perf, n_iteration): 54 | rung_id = self.get_rung_id(self.bracket, n_iteration) 55 | 56 | updated = False 57 | for job in self.bracket[rung_id]['jobs']: 58 | _job_status, _config, _perf, _extra_conf = job 59 | if _config == config: 60 | assert _job_status == RUNNING 61 | job[0] = COMPLETED 62 | job[2] = perf 63 | updated = True 64 | break 65 | assert updated 66 | # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket)) 67 | 68 | if n_iteration == self.R: 69 | self.incumbent_configs.append(config) 70 | self.incumbent_perfs.append(perf) 71 | # train BO surrogate 72 | train_perfs = np.array(std_normalization(self.incumbent_perfs), dtype=np.float64) 73 | self.surrogate.train(convert_configurations_to_array(self.incumbent_configs), train_perfs) 74 | 75 | def choose_next(self): 76 | """ 77 | sample a config according to BOHB. give iterations according to Hyperband strategy. 78 | """ 79 | next_config = None 80 | next_n_iteration = self.get_next_n_iteration() 81 | next_rung_id = self.get_rung_id(self.bracket, next_n_iteration) 82 | 83 | # sample config 84 | excluded_configs = self.bracket[next_rung_id]['configs'] 85 | 86 | if len(self.incumbent_configs) < self.bo_init_num or self.rng.random() < self.rand_prob: 87 | next_config = sample_configuration(self.config_space, excluded_configs=excluded_configs) 88 | else: 89 | # BO 90 | start_time = time.time() 91 | best_index = np.argmin(self.incumbent_perfs) 92 | best_config = self.incumbent_configs[best_index] 93 | std_incumbent_value = np.min(std_normalization(self.incumbent_perfs)) 94 | # Update surrogate model in acquisition function. 95 | self.acquisition_function.update(model=self.surrogate, eta=std_incumbent_value, 96 | num_data=len(self.incumbent_configs)) 97 | candidates = self.acq_optimizer.maximize(best_config=best_config, batch_size=5000) 98 | time1 = time.time() 99 | for candidate in candidates: 100 | if candidate not in excluded_configs: 101 | next_config = candidate 102 | break 103 | if next_config is None: 104 | self.logger.warning('Cannot get a non duplicate configuration from bo candidates. ' 105 | 'Sample a random one.') 106 | next_config = sample_configuration(self.config_space, excluded_configs=excluded_configs) 107 | time2 = time.time() 108 | if time2 - start_time > 1: 109 | self.logger.info('BO opt cost %.2fs. check duplication cost %.2fs. len of incumbent_configs: %d.' 110 | % (time1-start_time, time2-time1, len(self.incumbent_configs))) 111 | 112 | next_extra_conf = {} 113 | return next_config, next_n_iteration, next_extra_conf 114 | -------------------------------------------------------------------------------- /test/resnet/benchmark_resnet_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import traceback 4 | import pickle as pkl 5 | import numpy as np 6 | from functools import partial 7 | from multiprocessing import Process, Manager 8 | 9 | from tuner.mq_mf_worker_gpu import mqmfWorker_gpu 10 | from tuner.async_mq_mf_worker_gpu import async_mqmfWorker_gpu 11 | from test.utils import setup_exp, seeds 12 | from test.benchmark_process_record import remove_partial, get_incumbent 13 | from resnet_obj import mf_objective_func_gpu 14 | from resnet_model import ResNet32Classifier 15 | 16 | 17 | def evaluate_parallel(algo_class, algo_kwargs, method_id, n_workers, dataset, seed, ip, port, 18 | parallel_strategy, n_jobs, R, eta=3, run_test=True, 19 | dir_path=None, file_name=None): 20 | # dataset / n_jobs are ignored 21 | assert dir_path is not None 22 | assert file_name is not None 23 | 24 | print(method_id, n_workers, dataset, seed) 25 | if port == 0: 26 | port = 13579 + np.random.RandomState(int(time.time() * 10000 % 10000)).randint(2000) 27 | print('ip=', ip, 'port=', port) 28 | assert parallel_strategy in ['sync', 'async'] 29 | 30 | model_dir = os.path.join('./data/resnet_save_models', method_id) 31 | objective_function_gpu = partial(mf_objective_func_gpu, total_resource=R, run_test=run_test, 32 | model_dir=model_dir, eta=eta) 33 | 34 | def master_run(return_list, algo_class, algo_kwargs): 35 | algo_kwargs['ip'] = '' 36 | algo_kwargs['port'] = port 37 | algo = algo_class(**algo_kwargs) 38 | 39 | tmp_path = os.path.join(dir_path, 'tmp') 40 | algo.set_save_intermediate_record(tmp_path, file_name) 41 | 42 | algo.run() 43 | try: 44 | algo.logger.info('===== bracket status: %s' % algo.get_bracket_status(algo.bracket)) 45 | except Exception as e: 46 | pass 47 | try: 48 | algo.logger.info('===== brackets status: %s' % algo.get_brackets_status(algo.brackets)) 49 | except Exception as e: 50 | pass 51 | return_list.extend(algo.recorder) # send to return list 52 | 53 | def worker_run(i): 54 | device = 'cuda:%d' % i # gpu 55 | if parallel_strategy == 'sync': 56 | worker = mqmfWorker_gpu(objective_function_gpu, device, ip, port) 57 | elif parallel_strategy == 'async': 58 | worker = async_mqmfWorker_gpu(objective_function_gpu, device, ip, port) 59 | else: 60 | raise ValueError('Error parallel_strategy: %s.' % parallel_strategy) 61 | worker.run() 62 | print("Worker %d exit." % (i,)) 63 | 64 | manager = Manager() 65 | recorder = manager.list() # shared list 66 | master = Process(target=master_run, args=(recorder, algo_class, algo_kwargs)) 67 | master.start() 68 | 69 | time.sleep(10) # wait for master init 70 | worker_pool = [] 71 | for i in range(n_workers): 72 | worker = Process(target=worker_run, args=(i,)) 73 | worker_pool.append(worker) 74 | worker.start() 75 | 76 | master.join() # wait for master to gen result 77 | for w in worker_pool: 78 | w.kill() 79 | 80 | return list(recorder) # covert to list 81 | 82 | 83 | def run_exp(dataset, algo_class, algo_kwargs, algo_name, n_workers, parallel_strategy, 84 | R, n_jobs, runtime_limit, time_limit_per_trial, start_id, rep, ip, port, 85 | eta=3, pre_sample=False, run_test=False): 86 | # n_jobs / pre_sample are ignored 87 | assert dataset == 'cifar10' 88 | model = 'resnet' 89 | 90 | # setup 91 | print('===== start eval %s: rep=%d, n_jobs=%d, runtime_limit=%d, time_limit_per_trial=%d' 92 | % (dataset, rep, n_jobs, runtime_limit, time_limit_per_trial)) 93 | for i in range(start_id, start_id + rep): 94 | seed = seeds[i] 95 | 96 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 97 | method_str = '%s-n%d' % (algo_name, n_workers) 98 | method_id = method_str + '-%s-%d-%s' % (dataset, seed, timestamp) 99 | 100 | # ip, port are filled in evaluate_parallel() 101 | algo_kwargs['objective_func'] = None 102 | algo_kwargs['config_space'] = ResNet32Classifier.get_hyperparameter_search_space() 103 | algo_kwargs['random_state'] = seed 104 | algo_kwargs['method_id'] = method_id 105 | algo_kwargs['runtime_limit'] = runtime_limit 106 | algo_kwargs['time_limit_per_trial'] = time_limit_per_trial 107 | 108 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, method_str) 109 | file_name = 'record_%s.pkl' % (method_id,) 110 | 111 | recorder = evaluate_parallel( 112 | algo_class, algo_kwargs, method_id, n_workers, dataset, seed, ip, port, 113 | parallel_strategy, n_jobs, R, eta=eta, run_test=run_test, 114 | dir_path=dir_path, file_name=file_name, 115 | ) 116 | 117 | try: 118 | if not os.path.exists(dir_path): 119 | os.makedirs(dir_path) 120 | except FileExistsError: 121 | pass 122 | with open(os.path.join(dir_path, file_name), 'wb') as f: 123 | pkl.dump(recorder, f) 124 | print(dir_path, file_name, 'saved!', flush=True) 125 | 126 | if rep > 1: 127 | time.sleep(3600) 128 | 129 | try: 130 | remove_partial(model, dataset, [method_str], runtime_limit, R) 131 | get_incumbent(model, dataset, [method_str], runtime_limit) 132 | except Exception as e: 133 | print('benchmark process record failed: %s' % (traceback.format_exc(),)) 134 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/benchmark_lstm_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import traceback 4 | import pickle as pkl 5 | import numpy as np 6 | from functools import partial 7 | from multiprocessing import Process, Manager 8 | 9 | from tuner.mq_mf_worker_gpu import mqmfWorker_gpu 10 | from tuner.async_mq_mf_worker_gpu import async_mqmfWorker_gpu 11 | from test.utils import setup_exp, seeds 12 | from test.benchmark_process_record import remove_partial, get_incumbent 13 | from lstm_obj import get_corpus, get_lstm_configspace, mf_objective_func_gpu 14 | 15 | 16 | def evaluate_parallel(algo_class, algo_kwargs, method_id, n_workers, dataset, seed, ip, port, 17 | parallel_strategy, n_jobs, R, eta=3, run_test=True, 18 | dir_path=None, file_name=None): 19 | # dataset / n_jobs are ignored 20 | assert dir_path is not None 21 | assert file_name is not None 22 | 23 | print(method_id, n_workers, dataset, seed) 24 | if port == 0: 25 | port = 13579 + np.random.RandomState(int(time.time() * 10000 % 10000)).randint(2000) 26 | print('ip=', ip, 'port=', port) 27 | assert parallel_strategy in ['sync', 'async'] 28 | 29 | data_path = './test/awd_lstm_lm/data/penn' 30 | corpus = get_corpus(data_path) 31 | 32 | model_dir = os.path.join('./data/lstm_save_models', method_id) 33 | objective_function_gpu = partial(mf_objective_func_gpu, total_resource=R, run_test=run_test, 34 | model_dir=model_dir, eta=eta, corpus=corpus) 35 | 36 | def master_run(return_list, algo_class, algo_kwargs): 37 | algo_kwargs['ip'] = '' 38 | algo_kwargs['port'] = port 39 | algo = algo_class(**algo_kwargs) 40 | 41 | tmp_path = os.path.join(dir_path, 'tmp') 42 | algo.set_save_intermediate_record(tmp_path, file_name) 43 | 44 | algo.run() 45 | try: 46 | algo.logger.info('===== bracket status: %s' % algo.get_bracket_status(algo.bracket)) 47 | except Exception as e: 48 | pass 49 | try: 50 | algo.logger.info('===== brackets status: %s' % algo.get_brackets_status(algo.brackets)) 51 | except Exception as e: 52 | pass 53 | return_list.extend(algo.recorder) # send to return list 54 | 55 | def worker_run(i): 56 | device = 'cuda:%d' % i # gpu 57 | if parallel_strategy == 'sync': 58 | worker = mqmfWorker_gpu(objective_function_gpu, device, ip, port) 59 | elif parallel_strategy == 'async': 60 | worker = async_mqmfWorker_gpu(objective_function_gpu, device, ip, port) 61 | else: 62 | raise ValueError('Error parallel_strategy: %s.' % parallel_strategy) 63 | worker.run() 64 | print("Worker %d exit." % (i,)) 65 | 66 | manager = Manager() 67 | recorder = manager.list() # shared list 68 | master = Process(target=master_run, args=(recorder, algo_class, algo_kwargs)) 69 | master.start() 70 | 71 | time.sleep(10) # wait for master init 72 | worker_pool = [] 73 | for i in range(n_workers): 74 | worker = Process(target=worker_run, args=(i,)) 75 | worker_pool.append(worker) 76 | worker.start() 77 | 78 | master.join() # wait for master to gen result 79 | for w in worker_pool: 80 | w.kill() 81 | 82 | return list(recorder) # covert to list 83 | 84 | 85 | def run_exp(dataset, algo_class, algo_kwargs, algo_name, n_workers, parallel_strategy, 86 | R, n_jobs, runtime_limit, time_limit_per_trial, start_id, rep, ip, port, 87 | eta=3, pre_sample=False, run_test=False): 88 | # n_jobs / pre_sample are ignored 89 | assert dataset == 'penn' 90 | model = 'lstm' 91 | 92 | # setup 93 | print('===== start eval %s: rep=%d, n_jobs=%d, runtime_limit=%d, time_limit_per_trial=%d' 94 | % (dataset, rep, n_jobs, runtime_limit, time_limit_per_trial)) 95 | for i in range(start_id, start_id + rep): 96 | seed = seeds[i] 97 | 98 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 99 | method_str = '%s-n%d' % (algo_name, n_workers) 100 | method_id = method_str + '-%s-%d-%s' % (dataset, seed, timestamp) 101 | 102 | # ip, port are filled in evaluate_parallel() 103 | algo_kwargs['objective_func'] = None 104 | algo_kwargs['config_space'] = get_lstm_configspace() 105 | algo_kwargs['random_state'] = seed 106 | algo_kwargs['method_id'] = method_id 107 | algo_kwargs['runtime_limit'] = runtime_limit 108 | algo_kwargs['time_limit_per_trial'] = time_limit_per_trial 109 | 110 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, method_str) 111 | file_name = 'record_%s.pkl' % (method_id,) 112 | 113 | recorder = evaluate_parallel( 114 | algo_class, algo_kwargs, method_id, n_workers, dataset, seed, ip, port, 115 | parallel_strategy, n_jobs, R, eta=eta, run_test=run_test, 116 | dir_path=dir_path, file_name=file_name, 117 | ) 118 | 119 | try: 120 | if not os.path.exists(dir_path): 121 | os.makedirs(dir_path) 122 | except FileExistsError: 123 | pass 124 | with open(os.path.join(dir_path, file_name), 'wb') as f: 125 | pkl.dump(recorder, f) 126 | print(dir_path, file_name, 'saved!', flush=True) 127 | 128 | if rep > 1: 129 | time.sleep(3600) 130 | 131 | try: 132 | remove_partial(model, dataset, [method_str], runtime_limit, R) 133 | get_incumbent(model, dataset, [method_str], runtime_limit) 134 | except Exception as e: 135 | print('benchmark process record failed: %s' % (traceback.format_exc(),)) 136 | -------------------------------------------------------------------------------- /test/resnet/resnet_obj.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import warnings 4 | import torch 5 | from math import ceil, log 6 | import numpy as np 7 | try: 8 | from sklearn.metrics.scorer import accuracy_scorer 9 | except ModuleNotFoundError: 10 | from sklearn.metrics._scorer import accuracy_scorer 11 | print('from sklearn.metrics._scorer import accuracy_scorer') 12 | from resnet_model import get_estimator 13 | from resnet_util import get_path_by_config, get_transforms 14 | from resnet_dataset import ImageDataset 15 | 16 | from openbox.utils.constants import MAXINT 17 | 18 | # Constant 19 | max_epoch = 200 20 | scorer = accuracy_scorer 21 | image_size = 32 22 | data_dir = './datasets/img_datasets/cifar10/' 23 | image_data = ImageDataset(data_path=data_dir, train_val_split=True, image_size=image_size) 24 | 25 | from resnet_model import ResNet32Classifier 26 | 27 | test_config = ResNet32Classifier.get_hyperparameter_search_space().get_default_configuration() 28 | 29 | 30 | def mf_objective_func_gpu(config, n_resource, extra_conf, device, total_resource, run_test=False, 31 | model_dir='./data/resnet_save_models/unnamed_trial', eta=3): # device='cuda' 'cuda:0' 32 | print('extra_conf:', extra_conf) 33 | initial_run = extra_conf['initial_run'] 34 | try: 35 | if not os.path.exists(model_dir): 36 | os.makedirs(model_dir) 37 | except FileExistsError: 38 | pass 39 | 40 | data_transforms = get_transforms(image_size=image_size) 41 | image_data.load_data(data_transforms['train'], data_transforms['val']) 42 | start_time = time.time() 43 | 44 | config_dict = config.get_dictionary().copy() 45 | 46 | estimator = get_estimator(config_dict, max_epoch, device=device) 47 | 48 | epoch_ratio = float(n_resource) / float(total_resource) 49 | 50 | config_model_path = os.path.join(model_dir, 51 | 'tmp_' + get_path_by_config(config) + '_%d' % int(n_resource / eta) + '.pt') 52 | save_path = os.path.join(model_dir, 53 | 'tmp_' + get_path_by_config(config) + '_%d' % int(n_resource) + '.pt') 54 | 55 | # Continue training if initial_run=False 56 | if not initial_run: 57 | if not os.path.exists(config_model_path): 58 | raise ValueError('not initial_run but config_model_path not exists. check if exists duplicated configs ' 59 | 'and saved model were removed.') 60 | estimator.epoch_num = ceil(estimator.max_epoch * epoch_ratio) - ceil( 61 | estimator.max_epoch * epoch_ratio / eta) 62 | estimator.load_path = config_model_path 63 | print(estimator.epoch_num) 64 | else: 65 | estimator.epoch_num = ceil(estimator.max_epoch * epoch_ratio) 66 | 67 | try: 68 | score = dl_holdout_validation(estimator, scorer, image_data, random_state=1) 69 | except Exception as e: 70 | import traceback 71 | traceback.print_exc() 72 | score = -MAXINT 73 | print('Evaluation | Score: %.4f | Time cost: %.2f seconds' % 74 | (scorer._sign * score, 75 | time.time() - start_time)) 76 | print(str(config)) 77 | 78 | # Save low-resource models 79 | if np.isfinite(score) and epoch_ratio != 1.0: 80 | state = {'model': estimator.model.state_dict(), 81 | 'optimizer': estimator.optimizer_.state_dict(), 82 | 'scheduler': estimator.scheduler.state_dict(), 83 | 'cur_epoch_num': estimator.cur_epoch_num} 84 | torch.save(state, save_path) 85 | 86 | try: 87 | if epoch_ratio == 1: 88 | s_max = int(log(total_resource) / log(eta)) 89 | for i in range(0, s_max + 1): 90 | if os.path.exists(os.path.join(model_dir, 91 | 'tmp_' + get_path_by_config(config) + '_%d' % int(eta ** i) + '.pt')): 92 | os.remove(os.path.join(model_dir, 93 | 'tmp_' + get_path_by_config(config) + '_%d' % int(eta ** i) + '.pt')) 94 | except Exception as e: 95 | print('unexpected exception!') 96 | import traceback 97 | traceback.print_exc() 98 | 99 | # if np.isfinite(score): 100 | # save_flag, model_path, delete_flag, model_path_deleted = self.topk_model_saver.add(config, score) 101 | # if save_flag is True: 102 | # state = {'model': estimator.model.state_dict(), 103 | # 'optimizer': estimator.optimizer_.state_dict(), 104 | # 'scheduler': estimator.scheduler.state_dict(), 105 | # 'cur_epoch_num': estimator.cur_epoch_num, 106 | # 'early_stop': estimator.early_stop} 107 | # torch.save(state, model_path) 108 | # print("Model saved to %s" % model_path) 109 | # 110 | # # In case of double-deletion 111 | # try: 112 | # if delete_flag and os.path.exists(model_path_deleted): 113 | # os.remove(model_path_deleted) 114 | # print("Model deleted from %s" % model_path) 115 | # except: 116 | # pass 117 | 118 | # Turn it into a minimization problem. 119 | result = dict( 120 | objective_value=-score, 121 | ) 122 | return result 123 | 124 | 125 | def dl_holdout_validation(estimator, scorer, dataset, random_state=1, run_test=False, **kwargs): 126 | start_time = time.time() 127 | with warnings.catch_warnings(): 128 | # ignore all caught warnings 129 | warnings.filterwarnings("ignore") 130 | estimator.fit(dataset, **kwargs) 131 | if 'profile_epoch' in kwargs or 'profile_iter' in kwargs: 132 | return time.time() - start_time 133 | else: 134 | return scorer._sign * estimator.score(dataset, scorer._score_func, run_test=run_test) 135 | 136 | 137 | if __name__ == '__main__': 138 | extra_conf = dict(initial_run=True) 139 | mf_objective_func_gpu(config=test_config, n_resource=27, extra_conf=extra_conf, device='cuda', total_resource=81) 140 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/pointer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import math 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | 9 | import data 10 | import model 11 | 12 | from utils import batchify, get_batch, repackage_hidden 13 | 14 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model') 15 | parser.add_argument('--data', type=str, default='data/penn', 16 | help='location of the data corpus') 17 | parser.add_argument('--model', type=str, default='LSTM', 18 | help='type of recurrent net (LSTM, QRNN)') 19 | parser.add_argument('--save', type=str,default='best.pt', 20 | help='model to use the pointer over') 21 | parser.add_argument('--cuda', action='store_false', 22 | help='use CUDA') 23 | parser.add_argument('--bptt', type=int, default=5000, 24 | help='sequence length') 25 | parser.add_argument('--window', type=int, default=3785, 26 | help='pointer window length') 27 | parser.add_argument('--theta', type=float, default=0.6625523432485668, 28 | help='mix between uniform distribution and pointer softmax distribution over previous words') 29 | parser.add_argument('--lambdasm', type=float, default=0.12785920428335693, 30 | help='linear mix between only pointer (1) and only vocab (0) distribution') 31 | args = parser.parse_args() 32 | 33 | ############################################################################### 34 | # Load data 35 | ############################################################################### 36 | 37 | corpus = data.Corpus(args.data) 38 | 39 | eval_batch_size = 1 40 | test_batch_size = 1 41 | #train_data = batchify(corpus.train, args.batch_size) 42 | val_data = batchify(corpus.valid, test_batch_size, args) 43 | test_data = batchify(corpus.test, test_batch_size, args) 44 | 45 | ############################################################################### 46 | # Build the model 47 | ############################################################################### 48 | 49 | ntokens = len(corpus.dictionary) 50 | criterion = nn.CrossEntropyLoss() 51 | 52 | def one_hot(idx, size, cuda=True): 53 | a = np.zeros((1, size), np.float32) 54 | a[0][idx] = 1 55 | v = Variable(torch.from_numpy(a)) 56 | if cuda: v = v.cuda() 57 | return v 58 | 59 | def evaluate(data_source, batch_size=10, window=args.window): 60 | # Turn on evaluation mode which disables dropout. 61 | if args.model == 'QRNN': model.reset() 62 | model.eval() 63 | total_loss = 0 64 | ntokens = len(corpus.dictionary) 65 | hidden = model.init_hidden(batch_size) 66 | next_word_history = None 67 | pointer_history = None 68 | for i in range(0, data_source.size(0) - 1, args.bptt): 69 | if i > 0: print(i, len(data_source), math.exp(total_loss / i)) 70 | data, targets = get_batch(data_source, i, evaluation=True, args=args) 71 | output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) 72 | rnn_out = rnn_outs[-1].squeeze() 73 | output_flat = output.view(-1, ntokens) 74 | ### 75 | # Fill pointer history 76 | start_idx = len(next_word_history) if next_word_history is not None else 0 77 | next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) 78 | #print(next_word_history) 79 | pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) 80 | #print(pointer_history) 81 | ### 82 | # Built-in cross entropy 83 | # total_loss += len(data) * criterion(output_flat, targets).data[0] 84 | ### 85 | # Manual cross entropy 86 | # softmax_output_flat = torch.nn.functional.softmax(output_flat) 87 | # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) 88 | # entropy = -torch.log(soft) 89 | # total_loss += len(data) * entropy.mean().data[0] 90 | ### 91 | # Pointer manual cross entropy 92 | loss = 0 93 | softmax_output_flat = torch.nn.functional.softmax(output_flat) 94 | for idx, vocab_loss in enumerate(softmax_output_flat): 95 | p = vocab_loss 96 | if start_idx + idx > window: 97 | valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] 98 | valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] 99 | logits = torch.mv(valid_pointer_history, rnn_out[idx]) 100 | theta = args.theta 101 | ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) 102 | ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() 103 | lambdah = args.lambdasm 104 | p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss 105 | ### 106 | target_loss = p[targets[idx].data] 107 | loss += (-torch.log(target_loss)).data[0] 108 | total_loss += loss / batch_size 109 | ### 110 | hidden = repackage_hidden(hidden) 111 | next_word_history = next_word_history[-window:] 112 | pointer_history = pointer_history[-window:] 113 | return total_loss / len(data_source) 114 | 115 | # Load the best saved model. 116 | with open(args.save, 'rb') as f: 117 | if not args.cuda: 118 | model = torch.load(f, map_location=lambda storage, loc: storage) 119 | else: 120 | model = torch.load(f) 121 | print(model) 122 | 123 | # Run on val data. 124 | val_loss = evaluate(val_data, test_batch_size) 125 | print('=' * 89) 126 | print('| End of pointer | val loss {:5.2f} | val ppl {:8.2f}'.format( 127 | val_loss, math.exp(val_loss))) 128 | print('=' * 89) 129 | 130 | # Run on test data. 131 | test_loss = evaluate(test_data, test_batch_size) 132 | print('=' * 89) 133 | print('| End of pointer | test loss {:5.2f} | test ppl {:8.2f}'.format( 134 | test_loss, math.exp(test_loss))) 135 | print('=' * 89) 136 | -------------------------------------------------------------------------------- /test/benchmark_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | run benchmark_process_record.py first to get new_record file 3 | 4 | example cmdline: 5 | 6 | python test/benchmark_plot.py --dataset covtype --R 27 7 | 8 | """ 9 | import argparse 10 | import os 11 | import time 12 | import numpy as np 13 | import pickle as pkl 14 | import matplotlib.pyplot as plt 15 | 16 | from utils import setup_exp, descending, create_plot_points 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--dataset', type=str) 20 | parser.add_argument('--mths', type=str) 21 | parser.add_argument('--R', type=int, default=27) 22 | parser.add_argument('--runtime_limit', type=int) # if you don't want to use default setup 23 | parser.add_argument('--model', type=str, default='xgb') 24 | parser.add_argument('--default_value', type=float, default=0.0) 25 | 26 | args = parser.parse_args() 27 | dataset = args.dataset 28 | mths = args.mths.split(',') 29 | R = args.R 30 | model = args.model 31 | default_value = args.default_value 32 | 33 | 34 | def fetch_color_marker(m_list): 35 | color_dict = dict() 36 | marker_dict = dict() 37 | color_list = ['purple', 'royalblue', 'green', 'brown', 'red', 'orange', 'yellowgreen', 'black', 'yellow'] 38 | markers = ['s', '^', '*', 'v', 'o', 'p', '2', 'x', 'd'] 39 | 40 | def fill_values(name, idx): 41 | color_dict[name] = color_list[idx] 42 | marker_dict[name] = markers[idx] 43 | 44 | for name in m_list: 45 | if name.startswith('hyperband'): 46 | fill_values(name, 2) 47 | else: 48 | print('color not defined:', name) 49 | fill_values(name, 1) 50 | return color_dict, marker_dict 51 | 52 | 53 | def get_mth_legend(mth): 54 | return mth 55 | 56 | 57 | def plot_setup(_dataset): 58 | if _dataset == 'covtype': 59 | plt.ylim(-0.940, -0.880) 60 | elif _dataset == 'pokerhand': 61 | plt.ylim(-1.001, -0.951) 62 | elif _dataset.startswith('HIGGS'): 63 | plt.ylim(-0.756, -0.746) 64 | elif _dataset.startswith('hepmass'): 65 | plt.ylim(-0.8755, -0.8725) 66 | elif _dataset == 'cifar10-valid': 67 | plt.ylim(-91.65, -90.85) 68 | elif _dataset == 'cifar100': 69 | plt.ylim(-73.7, -70.7) 70 | elif _dataset == 'ImageNet16-120': 71 | plt.ylim(-47.0, -45.0) 72 | plt.xlim(0, runtime_limit) 73 | 74 | 75 | print('start', dataset) 76 | # setup 77 | _, runtime_limit, _ = setup_exp(dataset, 1, 1, 1) 78 | if args.runtime_limit is not None: 79 | runtime_limit = args.runtime_limit 80 | plot_setup(dataset) 81 | color_dict, marker_dict = fetch_color_marker(mths) 82 | point_num = 300 83 | lw = 2 84 | markersize = 6 85 | markevery = int(point_num / 10) 86 | alpha = 0.15 87 | 88 | plot_list = [] 89 | legend_list = [] 90 | result = dict() 91 | for mth in mths: 92 | stats = [] 93 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 94 | for file in os.listdir(dir_path): 95 | if file.startswith('new_record_%s-%s-' % (mth, dataset)) and file.endswith('.pkl'): 96 | with open(os.path.join(dir_path, file), 'rb') as f: 97 | raw_recorder = pkl.load(f) 98 | recorder = [] 99 | for record in raw_recorder: 100 | # if record.get('n_iteration') is not None and record['n_iteration'] < R: 101 | # print('error abandon record by n_iteration:', R, mth, record) 102 | # continue 103 | if record['global_time'] > runtime_limit: 104 | print('abandon record by runtime_limit:', runtime_limit, mth, record) 105 | continue 106 | recorder.append(record) 107 | recorder.sort(key=lambda rec: rec['global_time']) 108 | # print([(rec['global_time'], rec['return_info']['loss']) for rec in recorder]) 109 | print('new recorder len:', mth, len(recorder), len(raw_recorder)) 110 | timestamp = [rec['global_time'] for rec in recorder] 111 | perf = descending([rec['return_info']['loss'] for rec in recorder]) 112 | stats.append((timestamp, perf)) 113 | x, m, s = create_plot_points(stats, 0, runtime_limit, point_num=point_num, default=default_value) 114 | result[mth] = (x, m, s) 115 | # plot 116 | plt.plot(x, m, lw=lw, label=get_mth_legend(mth), 117 | #color=color_dict[mth], marker=marker_dict[mth], 118 | markersize=markersize, markevery=markevery) 119 | #plt.fill_between(x, m - s, m + s, alpha=alpha, facecolor=color_dict[mth]) 120 | 121 | # calculate speedup 122 | speedup_algo = 1 123 | print('===== mth - baseline - speedup ===== speedup_algo =', speedup_algo) 124 | for mth in mths: 125 | for baseline in mths: 126 | baseline_perf = result[baseline][1][-1] 127 | if speedup_algo == 1: # algo 1 128 | baseline_time = None 129 | x, m, s = result[baseline] 130 | x, m, s = x.tolist(), m.tolist(), s.tolist() 131 | for xi, mi, si in zip(x, m, s): 132 | if mi <= baseline_perf: 133 | baseline_time = xi 134 | break 135 | assert baseline_time is not None 136 | else: 137 | raise ValueError 138 | x, m, s = result[mth] 139 | x, m, s = x.tolist(), m.tolist(), s.tolist() 140 | mth_time = baseline_time 141 | for xi, mi, si in zip(x, m, s): 142 | if mi <= baseline_perf: 143 | mth_time = xi 144 | break 145 | speedup = baseline_time / mth_time 146 | print("%s %s %.2f" % (mth, baseline, speedup)) 147 | 148 | # print last val perf 149 | print('===== mth - last val perf =====') 150 | for mth in mths: 151 | x, m, s = result[mth] 152 | m = m[-1] 153 | s = s[-1] 154 | perfs = None 155 | if dataset in ['cifar10', 'cifar10-valid', 'cifar100', 'ImageNet16-120']: 156 | print(dataset, mth, perfs, u'%.2f\u00B1%.2f' % (m, s)) 157 | else: 158 | print(dataset, mth, perfs, u'%.4f\u00B1%.4f' % (m, s)) 159 | 160 | # show plot 161 | plt.legend(loc='upper right') 162 | plt.title("%s on %s" % (model, dataset), fontsize=16) 163 | plt.xlabel("Wall Clock Time (sec)", fontsize=16) 164 | plt.ylabel("Validation Error", fontsize=16) 165 | plt.tight_layout() 166 | plt.grid() 167 | plt.show() 168 | -------------------------------------------------------------------------------- /tuner/mq_bo.py: -------------------------------------------------------------------------------- 1 | import time 2 | import traceback 3 | import numpy as np 4 | from tuner.mq_base_facade import mqBaseFacade 5 | from tuner.utils import sample_configurations, expand_configurations 6 | 7 | from openbox.core.sync_batch_advisor import SyncBatchAdvisor, SUCCESS 8 | from openbox.utils.config_space import ConfigurationSpace 9 | from openbox.core.base import Observation 10 | 11 | 12 | class mqBO(mqBaseFacade): 13 | """ 14 | synchronous parallel Bayesian Optimization (using OpenBox) 15 | """ 16 | def __init__(self, objective_func, 17 | config_space: ConfigurationSpace, 18 | R, 19 | n_workers, 20 | num_iter=10000, 21 | bo_init_num=3, 22 | random_state=1, 23 | method_id='mqBO', 24 | restart_needed=True, 25 | time_limit_per_trial=600, 26 | runtime_limit=None, 27 | ip='', 28 | port=13579, 29 | authkey=b'abc', 30 | **kwargs): 31 | max_queue_len = max(1000, 3 * n_workers) # conservative design 32 | super().__init__(objective_func, method_name=method_id, 33 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 34 | runtime_limit=runtime_limit, 35 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 36 | self.seed = random_state 37 | self.config_space = config_space 38 | self.config_space.seed(self.seed) 39 | 40 | self.R = R 41 | self.n_workers = n_workers 42 | self.bo_init_num = bo_init_num 43 | # using median_imputation batch_strategy implemented in OpenBox to generate BO suggestions 44 | if 'task_info' in SyncBatchAdvisor.__init__.__code__.co_varnames: 45 | # old version OpenBox 46 | task_info = {'num_constraints': 0, 'num_objs': 1} 47 | task_kwargs = dict(task_info=task_info) 48 | else: 49 | task_kwargs = dict(num_objs=1, num_constraints=0) 50 | self.config_advisor = SyncBatchAdvisor(config_space, 51 | **task_kwargs, 52 | batch_size=self.n_workers, 53 | batch_strategy='median_imputation', 54 | initial_trials=self.bo_init_num, 55 | init_strategy='random', 56 | optimization_strategy='bo', 57 | surrogate_type='prf', 58 | acq_type='ei', 59 | acq_optimizer_type='local_random', 60 | task_id=self.method_name, 61 | output_dir=self.log_directory, 62 | random_state=random_state, 63 | ) 64 | 65 | self.num_iter = num_iter 66 | self.incumbent_configs = [] 67 | self.incumbent_perfs = [] 68 | self.logger.info('Unused kwargs: %s' % kwargs) 69 | 70 | def run(self): 71 | try: 72 | for iter in range(1, 1 + self.num_iter): 73 | self.logger.info('-' * 50) 74 | self.logger.info("%s: %d/%d iteration starts" % (self.method_name, iter, self.num_iter)) 75 | start_time = time.time() 76 | self.iterate() 77 | time_elapsed = (time.time() - start_time) / 60 78 | self.logger.info("iteration took %.2f min." % time_elapsed) 79 | self.save_intermediate_statistics() 80 | except Exception as e: 81 | print(e) 82 | print(traceback.format_exc()) 83 | self.logger.error(traceback.format_exc()) 84 | # clear the immediate result. 85 | # self.remove_immediate_model() 86 | 87 | def iterate(self): 88 | configs = self.get_bo_candidates() 89 | extra_info = None 90 | ret_val, early_stops = self.run_in_parallel(configs, self.R, extra_info, initial_run=True) 91 | val_losses = [item['loss'] for item in ret_val] 92 | 93 | self.incumbent_configs.extend(configs) 94 | self.incumbent_perfs.extend(val_losses) 95 | self.add_stage_history(self.stage_id, self.global_incumbent) 96 | self.stage_id += 1 97 | # self.remove_immediate_model() 98 | 99 | # update bo advisor 100 | for config, perf in zip(configs, val_losses): 101 | objs = [perf] 102 | observation = Observation( 103 | config=config, objs=objs, constraints=None, 104 | trial_state=SUCCESS, elapsed_time=None, 105 | ) 106 | self.config_advisor.update_observation(observation) 107 | self.logger.info('update BO observation: config=%s, perf=%f' % (str(config), perf)) 108 | 109 | def get_bo_candidates(self): 110 | num_config = self.n_workers 111 | # get bo configs 112 | if len(self.incumbent_configs) < self.bo_init_num: 113 | # fix bug: bo advisor suggests repeated configs if call get_suggestions() repeatedly in initial stage 114 | bo_configs = list() 115 | else: 116 | bo_configs = self.config_advisor.get_suggestions() 117 | bo_configs = bo_configs[:num_config] # may exceed num_config in initial random sampling 118 | self.logger.info('len bo configs = %d.' % len(bo_configs)) 119 | 120 | # sample random configs 121 | configs = expand_configurations(bo_configs, self.config_space, num_config) 122 | self.logger.info('len total configs = %d.' % len(configs)) 123 | assert len(configs) == num_config 124 | return configs 125 | 126 | def get_incumbent(self, num_inc=1): 127 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 128 | indices = np.argsort(self.incumbent_perfs) 129 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 130 | targets = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 131 | return configs, targets 132 | -------------------------------------------------------------------------------- /tuner/mq_sh.py: -------------------------------------------------------------------------------- 1 | import time 2 | import traceback 3 | import numpy as np 4 | from math import log, ceil 5 | from tuner.mq_base_facade import mqBaseFacade 6 | from tuner.utils import sample_configurations 7 | 8 | from openbox.utils.config_space import ConfigurationSpace 9 | 10 | 11 | class mqSuccessiveHalving(mqBaseFacade): 12 | """ The implementation of Successive Halving Algorithm (SHA). 13 | """ 14 | def __init__(self, objective_func, 15 | config_space: ConfigurationSpace, 16 | R, 17 | eta=3, 18 | num_iter=10000, 19 | random_state=1, 20 | method_id='mqSuccessiveHalving', 21 | restart_needed=True, 22 | time_limit_per_trial=600, 23 | runtime_limit=None, 24 | ip='', 25 | port=13579, 26 | authkey=b'abc',): 27 | max_queue_len = 1000 # conservative design 28 | super().__init__(objective_func, method_name=method_id, 29 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 30 | runtime_limit=runtime_limit, 31 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 32 | self.seed = random_state 33 | self.config_space = config_space 34 | self.config_space.seed(self.seed) 35 | 36 | self.num_iter = num_iter 37 | self.R = R # Maximum iterations per configuration 38 | self.eta = eta # Define configuration downsampling rate (default = 3) 39 | self.logeta = lambda x: log(x) / log(self.eta) 40 | self.s_max = int(self.logeta(self.R)) 41 | 42 | self.incumbent_configs = list() 43 | self.incumbent_perfs = list() 44 | 45 | # This function can be called multiple times 46 | def iterate(self, skip_last=0): 47 | s = self.s_max 48 | # Initial number of configurations 49 | n = int(self.R) 50 | # Initial number of iterations per config 51 | r = self.R * self.eta ** (-s) 52 | 53 | # Choose next n configurations. 54 | T = self.choose_next(n) 55 | incumbent_loss = np.inf 56 | extra_info = None 57 | last_run_num = None 58 | initial_run = True 59 | for i in range((s + 1) - int(skip_last)): # Changed from s + 1 60 | # Run each of the n configs for 61 | # and keep best (n_configs / eta) configurations. 62 | 63 | n_configs = n * self.eta ** (-i) 64 | n_iteration = r * self.eta ** (i) 65 | n_iter = n_iteration 66 | if last_run_num is not None and not self.restart_needed: 67 | n_iter -= last_run_num 68 | last_run_num = n_iteration 69 | 70 | self.logger.info("%s: %d configurations x %d iterations each" 71 | % (self.method_name, int(n_configs), int(n_iteration))) 72 | 73 | ret_val, early_stops = self.run_in_parallel(T, n_iter, extra_info, initial_run) 74 | initial_run = False 75 | val_losses = [item['loss'] for item in ret_val] 76 | ref_list = [item['ref_id'] for item in ret_val] 77 | 78 | self.update_incumbent_before_reduce(T, val_losses, n_iteration) 79 | 80 | # select a number of best configurations for the next loop 81 | # filter out early stops, if any 82 | indices = np.argsort(val_losses) 83 | if len(T) == sum(early_stops): 84 | break 85 | if len(T) >= self.eta: 86 | indices = [i for i in indices if not early_stops[i]] 87 | T = [T[i] for i in indices] 88 | extra_info = [ref_list[i] for i in indices] 89 | reduced_num = int(n_configs / self.eta) 90 | T = T[0:reduced_num] 91 | extra_info = extra_info[0:reduced_num] 92 | else: 93 | T = [T[indices[0]]] # todo: confirm no filter early stops? 94 | extra_info = [ref_list[indices[0]]] 95 | val_losses = [val_losses[i] for i in indices][0:len(T)] # update: sorted 96 | incumbent_loss = val_losses[0] 97 | self.add_stage_history(self.stage_id, min(self.global_incumbent, incumbent_loss)) 98 | self.stage_id += 1 99 | self.update_incumbent_after_reduce(T, incumbent_loss) 100 | # self.remove_immediate_model() 101 | 102 | def run(self, skip_last=0): 103 | try: 104 | for iter in range(1, 1 + self.num_iter): 105 | self.logger.info('-' * 50) 106 | self.logger.info("%s algorithm: %d/%d iteration starts" % (self.method_name, iter, self.num_iter)) 107 | start_time = time.time() 108 | self.iterate(skip_last=skip_last) 109 | time_elapsed = (time.time() - start_time) / 60 110 | self.logger.info("Iteration took %.2f min." % time_elapsed) 111 | self.save_intermediate_statistics() 112 | for i, obj in enumerate(self.incumbent_perfs): 113 | self.logger.info( 114 | '%d-th config: %s, obj: %f.' % (i + 1, str(self.incumbent_configs[i]), self.incumbent_perfs[i])) 115 | except Exception as e: 116 | print(e) 117 | print(traceback.format_exc()) 118 | self.logger.error(traceback.format_exc()) 119 | # Clean the immediate results. 120 | # self.remove_immediate_model() 121 | 122 | def choose_next(self, num_config): 123 | # Sample n configurations uniformly. 124 | return sample_configurations(self.config_space, num_config) 125 | 126 | def update_incumbent_before_reduce(self, T, val_losses, n_iteration): 127 | return 128 | 129 | def update_incumbent_after_reduce(self, T, incumbent_loss): 130 | """ 131 | update: T is sorted 132 | """ 133 | if not np.isnan(incumbent_loss): 134 | self.incumbent_configs.append(T[0]) 135 | self.incumbent_perfs.append(incumbent_loss) 136 | 137 | def get_incumbent(self, num_inc=1): 138 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 139 | indices = np.argsort(self.incumbent_perfs) 140 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 141 | perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 142 | return configs, perfs 143 | -------------------------------------------------------------------------------- /tuner/mq_hb.py: -------------------------------------------------------------------------------- 1 | import time 2 | import traceback 3 | import numpy as np 4 | from math import log, ceil 5 | from tuner.mq_base_facade import mqBaseFacade 6 | from tuner.utils import sample_configurations 7 | 8 | from openbox.utils.config_space import ConfigurationSpace 9 | 10 | 11 | class mqHyperband(mqBaseFacade): 12 | """ The implementation of Hyperband (HB). 13 | The paper can be found in http://www.jmlr.org/papers/volume18/16-558/16-558.pdf . 14 | """ 15 | def __init__(self, objective_func, 16 | config_space: ConfigurationSpace, 17 | R, 18 | eta=3, 19 | num_iter=10000, 20 | random_state=1, 21 | method_id='mqHyperband', 22 | restart_needed=True, 23 | time_limit_per_trial=600, 24 | runtime_limit=None, 25 | ip='', 26 | port=13579, 27 | authkey=b'abc',): 28 | max_queue_len = 1000 # conservative design 29 | super().__init__(objective_func, method_name=method_id, 30 | restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, 31 | runtime_limit=runtime_limit, 32 | max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) 33 | self.seed = random_state 34 | self.config_space = config_space 35 | self.config_space.seed(self.seed) 36 | 37 | self.num_iter = num_iter 38 | self.R = R # Maximum iterations per configuration 39 | self.eta = eta # Define configuration downsampling rate (default = 3) 40 | self.logeta = lambda x: log(x) / log(self.eta) 41 | self.s_max = int(self.logeta(self.R)) 42 | self.B = (self.s_max + 1) * self.R 43 | 44 | self.incumbent_configs = list() 45 | self.incumbent_perfs = list() 46 | 47 | # This function can be called multiple times 48 | def iterate(self, skip_last=0): 49 | for s in reversed(range(self.s_max + 1)): 50 | # Initial number of configurations 51 | n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) 52 | # Initial number of iterations per config 53 | r = self.R * self.eta ** (-s) 54 | 55 | # Choose next n configurations. 56 | T = self.choose_next(n) 57 | incumbent_loss = np.inf 58 | extra_info = None 59 | last_run_num = None 60 | initial_run = True 61 | for i in range((s + 1) - int(skip_last)): # Changed from s + 1 62 | # Run each of the n configs for 63 | # and keep best (n_configs / eta) configurations. 64 | 65 | n_configs = n * self.eta ** (-i) 66 | n_iteration = r * self.eta ** (i) 67 | n_iter = n_iteration 68 | if last_run_num is not None and not self.restart_needed: 69 | n_iter -= last_run_num 70 | last_run_num = n_iteration 71 | 72 | self.logger.info("%s: %d configurations x %d iterations each" 73 | % (self.method_name, int(n_configs), int(n_iteration))) 74 | 75 | ret_val, early_stops = self.run_in_parallel(T, n_iter, extra_info, initial_run) 76 | initial_run = False 77 | val_losses = [item['loss'] for item in ret_val] 78 | ref_list = [item['ref_id'] for item in ret_val] 79 | 80 | self.update_incumbent_before_reduce(T, val_losses, n_iteration) 81 | 82 | # select a number of best configurations for the next loop 83 | # filter out early stops, if any 84 | indices = np.argsort(val_losses) 85 | if len(T) == sum(early_stops): 86 | break 87 | if len(T) >= self.eta: 88 | indices = [i for i in indices if not early_stops[i]] 89 | T = [T[i] for i in indices] 90 | extra_info = [ref_list[i] for i in indices] 91 | reduced_num = int(n_configs / self.eta) 92 | T = T[0:reduced_num] 93 | extra_info = extra_info[0:reduced_num] 94 | else: 95 | T = [T[indices[0]]] # todo: confirm no filter early stops? 96 | extra_info = [ref_list[indices[0]]] 97 | val_losses = [val_losses[i] for i in indices][0:len(T)] # update: sorted 98 | incumbent_loss = val_losses[0] 99 | self.add_stage_history(self.stage_id, min(self.global_incumbent, incumbent_loss)) 100 | self.stage_id += 1 101 | self.update_incumbent_after_reduce(T, incumbent_loss) 102 | # self.remove_immediate_model() 103 | 104 | def run(self, skip_last=0): 105 | try: 106 | for iter in range(1, 1 + self.num_iter): 107 | self.logger.info('-' * 50) 108 | self.logger.info("%s algorithm: %d/%d iteration starts" % (self.method_name, iter, self.num_iter)) 109 | start_time = time.time() 110 | self.iterate(skip_last=skip_last) 111 | time_elapsed = (time.time() - start_time) / 60 112 | self.logger.info("Iteration took %.2f min." % time_elapsed) 113 | self.save_intermediate_statistics() 114 | for i, obj in enumerate(self.incumbent_perfs): 115 | self.logger.info( 116 | '%d-th config: %s, obj: %f.' % (i + 1, str(self.incumbent_configs[i]), self.incumbent_perfs[i])) 117 | except Exception as e: 118 | print(e) 119 | print(traceback.format_exc()) 120 | self.logger.error(traceback.format_exc()) 121 | # Clean the immediate results. 122 | # self.remove_immediate_model() 123 | 124 | def choose_next(self, num_config): 125 | # Sample n configurations uniformly. 126 | return sample_configurations(self.config_space, num_config) 127 | 128 | def update_incumbent_before_reduce(self, T, val_losses, n_iteration): 129 | return 130 | 131 | def update_incumbent_after_reduce(self, T, incumbent_loss): 132 | """ 133 | update: T is sorted 134 | """ 135 | if not np.isnan(incumbent_loss): 136 | self.incumbent_configs.append(T[0]) 137 | self.incumbent_perfs.append(incumbent_loss) 138 | 139 | def get_incumbent(self, num_inc=1): 140 | assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) 141 | indices = np.argsort(self.incumbent_perfs) 142 | configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] 143 | perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] 144 | return configs, perfs 145 | -------------------------------------------------------------------------------- /tuner/async_mq_base_facade.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import traceback 4 | import numpy as np 5 | import pickle as pkl 6 | from openbox.utils.logging_utils import get_logger, setup_logger 7 | from openbox.core.message_queue.master_messager import MasterMessager 8 | 9 | PLOT = False 10 | try: 11 | import matplotlib.pyplot as plt 12 | plt.switch_backend('agg') 13 | PLOT = True 14 | except Exception as e: 15 | pass 16 | 17 | 18 | class async_mqBaseFacade(object): 19 | def __init__(self, objective_func, 20 | restart_needed=False, 21 | need_lc=False, 22 | method_name='default_method_name', 23 | log_directory='logs', 24 | data_directory='data', 25 | time_limit_per_trial=600, 26 | runtime_limit=None, 27 | max_queue_len=1000, 28 | ip='', 29 | port=13579, 30 | authkey=b'abc', 31 | sleep_time=0.1,): 32 | self.log_directory = log_directory 33 | if not os.path.exists(self.log_directory): 34 | os.makedirs(self.log_directory) 35 | self.data_directory = data_directory 36 | if not os.path.exists(self.data_directory): 37 | os.makedirs(self.data_directory) 38 | 39 | self.logger = self._get_logger(method_name) 40 | 41 | self.objective_func = objective_func 42 | self.trial_statistics = list() 43 | self.recorder = list() 44 | 45 | self.global_start_time = time.time() 46 | self.runtime_limit = None 47 | self._history = {"time_elapsed": list(), "performance": list(), 48 | "best_trial_id": list(), "configuration": list()} 49 | self.global_incumbent = 1e10 50 | self.global_incumbent_configuration = None 51 | self.global_trial_counter = 0 52 | self.restart_needed = restart_needed 53 | self.record_lc = need_lc 54 | self.method_name = method_name 55 | # evaluation metrics 56 | self.stage_id = 1 57 | self.stage_history = {'stage_id': list(), 'performance': list()} 58 | self.grid_search_perf = list() 59 | 60 | self.save_intermediate_record = False 61 | self.save_intermediate_record_id = 0 62 | self.save_intermediate_record_path = None 63 | 64 | if self.method_name is None: 65 | raise ValueError('Method name must be specified! NOT NONE.') 66 | 67 | self.time_limit_per_trial = time_limit_per_trial 68 | self.runtime_limit = runtime_limit 69 | assert self.runtime_limit is not None 70 | 71 | max_queue_len = max(1000, max_queue_len) 72 | self.master_messager = MasterMessager(ip, port, authkey, max_queue_len, max_queue_len) 73 | self.sleep_time = sleep_time 74 | 75 | def set_restart(self): 76 | self.restart_needed = True 77 | 78 | def set_method_name(self, name): 79 | self.method_name = name 80 | 81 | def add_stage_history(self, stage_id, performance): 82 | self.stage_history['stage_id'].append(stage_id) 83 | self.stage_history['performance'].append(performance) 84 | 85 | def add_history(self, time_elapsed, performance, trial_id, config): 86 | self._history['time_elapsed'].append(time_elapsed) 87 | self._history['performance'].append(performance) 88 | self._history['best_trial_id'].append(trial_id) 89 | self._history['configuration'].append(config) 90 | 91 | def run(self): 92 | try: 93 | worker_num = 0 94 | while True: 95 | if self.runtime_limit is not None and time.time() - self.global_start_time > self.runtime_limit: 96 | self.logger.info('RUNTIME BUDGET is RUNNING OUT.') 97 | return 98 | 99 | # Get observation from worker 100 | observation = self.master_messager.receive_message() # return_info, time_taken, trial_id, config 101 | if observation is None: 102 | # Wait for workers. 103 | time.sleep(self.sleep_time) 104 | continue 105 | 106 | return_info, time_taken, trial_id, config = observation 107 | # worker init 108 | if config is None: 109 | worker_num += 1 110 | self.logger.info("Worker %d init." % (worker_num, )) 111 | # update observation 112 | else: 113 | global_time = time.time() - self.global_start_time 114 | self.logger.info('Master get observation: %s. Global time=%.2fs.' % (str(observation), global_time)) 115 | n_iteration = return_info['n_iteration'] 116 | perf = return_info['loss'] 117 | t = time.time() 118 | self.update_observation(config, perf, n_iteration) 119 | self.logger.info('update_observation() cost %.2fs.' % (time.time() - t,)) 120 | self.recorder.append({'trial_id': trial_id, 'time_consumed': time_taken, 121 | 'configuration': config, 'n_iteration': n_iteration, 122 | 'return_info': return_info, 'global_time': global_time}) 123 | if (not hasattr(self, 'R')) or n_iteration == self.R: 124 | self.save_intermediate_statistics() 125 | 126 | # Send new job 127 | t = time.time() 128 | config, n_iteration, extra_conf = self.get_job() 129 | self.logger.info('get_job() cost %.2fs.' % (time.time()-t, )) 130 | msg = [config, extra_conf, self.time_limit_per_trial, n_iteration, self.global_trial_counter] 131 | self.master_messager.send_message(msg) 132 | self.global_trial_counter += 1 133 | self.logger.info('Master send job: %s.' % (msg,)) 134 | 135 | except Exception as e: 136 | print(e) 137 | print(traceback.format_exc()) 138 | self.logger.error(traceback.format_exc()) 139 | 140 | def get_job(self): 141 | raise NotImplementedError 142 | 143 | def update_observation(self, config, perf, n_iteration): 144 | raise NotImplementedError 145 | 146 | def set_save_intermediate_record(self, dir_path, file_name): 147 | try: 148 | if not os.path.exists(dir_path): 149 | os.makedirs(dir_path) 150 | except FileExistsError: 151 | pass 152 | self.save_intermediate_record = True 153 | if file_name.endswith('.pkl'): 154 | file_name = file_name[:-4] 155 | self.save_intermediate_record_path = os.path.join(dir_path, file_name) 156 | self.logger.info('set save_intermediate_record to True. path: %s.' % (self.save_intermediate_record_path,)) 157 | 158 | def save_intermediate_statistics(self): 159 | if self.save_intermediate_record: 160 | self.save_intermediate_record_id += 1 161 | path = '%s_%d.pkl' % (self.save_intermediate_record_path, self.save_intermediate_record_id) 162 | with open(path, 'wb') as f: 163 | pkl.dump(self.recorder, f) 164 | global_time = time.time() - self.global_start_time 165 | self.logger.info('Intermediate record %s saved! global_time=%.2fs.' % (path, global_time)) 166 | 167 | def _get_logger(self, name): 168 | logger_name = name 169 | setup_logger(os.path.join(self.log_directory, '%s.log' % str(logger_name)), None) 170 | return get_logger(self.__class__.__name__) 171 | -------------------------------------------------------------------------------- /test/awd_lstm_lm/benchmark_lstm_runtest.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/awd_lstm_lm/benchmark_lstm_runtest.py --mth hyperband-n4 --rep 1 --start_id 0 5 | 6 | """ 7 | import os 8 | import sys 9 | import time 10 | import argparse 11 | import numpy as np 12 | import pickle as pkl 13 | 14 | sys.path.insert(0, ".") 15 | sys.path.insert(1, "../open-box") # for dependency 16 | from test.utils import seeds 17 | 18 | import torch 19 | import math 20 | from math import ceil 21 | 22 | from splitcross import SplitCrossEntropyLoss 23 | from model import RNNModel 24 | from lstm_obj import get_corpus, evaluate, train 25 | 26 | try: 27 | from utils import batchify, get_batch, repackage_hidden 28 | except Exception: 29 | import sys 30 | sys.path.insert(0, '.') 31 | from test.awd_lstm_lm.utils import batchify, get_batch, repackage_hidden 32 | 33 | # Set the random seed manually for reproducibility. 34 | seed = 1 35 | np.random.seed(seed) 36 | torch.manual_seed(seed) 37 | torch.cuda.manual_seed(seed) 38 | 39 | log_interval = 200 40 | tied = True 41 | bptt = 70 42 | max_epoch = 200 43 | n_layers = 3 44 | clip = 0.25 45 | alpha = 2 46 | beta = 1 47 | decay_epoch = [100, 150] 48 | 49 | eval_batch_size = 10 50 | test_batch_size = 1 51 | 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument('--dataset', type=str, default='penn') 54 | parser.add_argument('--mth', type=str, default='hyperband-n4') 55 | parser.add_argument('--rep', type=int, default=1) 56 | parser.add_argument('--start_id', type=int, default=0) 57 | parser.add_argument('--runtime_limit', type=int, default=172800) 58 | 59 | args = parser.parse_args() 60 | dataset = args.dataset 61 | mth = args.mth 62 | rep = args.rep 63 | start_id = args.start_id 64 | runtime_limit = args.runtime_limit 65 | model = 'lstm' 66 | 67 | assert dataset == 'penn' 68 | data_path = './test/awd_lstm_lm/data/penn' 69 | corpus = get_corpus(data_path) 70 | 71 | try: 72 | from sklearn.metrics.scorer import accuracy_scorer 73 | except ModuleNotFoundError: 74 | from sklearn.metrics._scorer import accuracy_scorer 75 | print('from sklearn.metrics._scorer import accuracy_scorer') 76 | 77 | 78 | def test_func(config, device='cuda'): # device='cuda' 'cuda:0' 79 | 80 | device = torch.device(device) 81 | 82 | criterion = None 83 | dropout = config['dropout'] 84 | dropouth = config['dropouth'] 85 | dropouti = config['dropouti'] 86 | dropoute = config['dropoute'] 87 | wdrop = config['wdrop'] 88 | emsize = config['emsize'] 89 | hidden_size = config['hidden_size'] 90 | weight_decay = config['wdecay'] 91 | batch_size = config['batch_size'] 92 | lr = config['lr'] 93 | print('worker receive config:', config) 94 | 95 | ntokens = len(corpus.dictionary) 96 | model = RNNModel('LSTM', ntokens, emsize, hidden_size, n_layers, dropout, dropouth, 97 | dropouti, dropoute, wdrop, tied) 98 | 99 | train_data = batchify(corpus.train, batch_size, device) 100 | # val_data = batchify(corpus.valid, eval_batch_size, device) 101 | val_data = batchify(corpus.test, eval_batch_size, device) # test 102 | 103 | init_epoch_num = 1 104 | epoch_num = max_epoch 105 | print('epoch_num', epoch_num) 106 | ### 107 | if not criterion: 108 | splits = [] 109 | if ntokens > 500000: 110 | # One Billion 111 | # This produces fairly even matrix mults for the buckets: 112 | # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 113 | splits = [4200, 35000, 180000] 114 | elif ntokens > 75000: 115 | # WikiText-103 116 | splits = [2800, 20000, 76000] 117 | print('Using splits:', splits) 118 | criterion = SplitCrossEntropyLoss(emsize, splits=splits, verbose=False) 119 | ### 120 | 121 | model = model.to(device) 122 | criterion = criterion.to(device) 123 | ### 124 | params = list(model.parameters()) + list(criterion.parameters()) 125 | total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) 126 | print('Model total parameters:', total_params) 127 | # Loop over epochs. 128 | best_val_loss = [] 129 | stored_loss = 100000000 130 | 131 | # Ensure the optimizer is optimizing params, which includes both the model's weights as well as the criterion's weight (i.e. Adaptive Softmax) 132 | optimizer = torch.optim.SGD(params, lr=lr, weight_decay=weight_decay) 133 | 134 | return_pp = 1e10 135 | for epoch in range(init_epoch_num, init_epoch_num + epoch_num): 136 | epoch_start_time = time.time() 137 | train(corpus, model, criterion, optimizer, epoch, batch_size, train_data, bptt) 138 | if 't0' in optimizer.param_groups[0]: 139 | tmp = {} 140 | for prm in model.parameters(): 141 | tmp[prm] = prm.data.clone() 142 | if 'ax' in optimizer.state[prm]: 143 | prm.data = optimizer.state[prm]['ax'].clone() 144 | 145 | val_loss2 = evaluate(corpus, model, criterion, val_data) 146 | print('-' * 89) 147 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 148 | 'valid ppl {:8.2f} | valid bpc {:8.3f}'.format( 149 | epoch, (time.time() - epoch_start_time), val_loss2, math.exp(val_loss2), val_loss2 / math.log(2))) 150 | print('-' * 89) 151 | 152 | for prm in model.parameters(): 153 | if prm in tmp: 154 | prm.data = tmp[prm].clone() 155 | return_pp = math.exp(val_loss2) 156 | 157 | else: 158 | val_loss = evaluate(corpus, model, criterion, val_data, eval_batch_size) 159 | print('-' * 89) 160 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 161 | 'valid ppl {:8.2f} | valid bpc {:8.3f}'.format( 162 | epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), val_loss / math.log(2))) 163 | print('-' * 89) 164 | 165 | if epoch in decay_epoch: 166 | print('Dividing learning rate by 10') 167 | optimizer.param_groups[0]['lr'] /= 10. 168 | 169 | best_val_loss.append(val_loss) 170 | return_pp = math.exp(val_loss) 171 | 172 | # Turn it into a minimization problem. 173 | return return_pp 174 | 175 | 176 | print('===== start test %s %s: rep=%d' % (mth, dataset, rep)) 177 | for i in range(start_id, start_id + rep): 178 | seed = seeds[i] 179 | 180 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model, dataset, runtime_limit, mth) 181 | for file in os.listdir(dir_path): 182 | if file.startswith('incumbent_new_record_%s-%s-%d-' % (mth, dataset, seed)) \ 183 | and file.endswith('.pkl'): 184 | # load config 185 | with open(os.path.join(dir_path, file), 'rb') as f: 186 | record = pkl.load(f) 187 | print(model, dataset, mth, seed, 'loaded!', record, flush=True) 188 | 189 | # run test 190 | config = record['configuration'] 191 | perf = test_func(config, device='cuda') 192 | print(model, dataset, mth, seed, 'perf =', perf) 193 | 194 | # save perf 195 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 196 | method_id = mth + '-%s-%d-%s' % (dataset, seed, timestamp) 197 | perf_file_name = 'incumbent_test_perf_%s.pkl' % (method_id,) 198 | with open(os.path.join(dir_path, perf_file_name), 'wb') as f: 199 | pkl.dump(perf, f) 200 | print(dir_path, perf_file_name, 'saved!', flush=True) 201 | -------------------------------------------------------------------------------- /test/resnet/resnet_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision import transforms 6 | from torch.utils.data.sampler import SubsetRandomSampler, Sampler 7 | from torchvision import datasets 8 | 9 | 10 | def get_folder_dataset(folder_path, udf_transforms=None, grayscale=False): 11 | return datasets.ImageFolder(folder_path, transform=udf_transforms) 12 | 13 | 14 | class SubsetSequentialampler(Sampler): 15 | def __init__(self, indices): 16 | self.indices = indices 17 | 18 | def __iter__(self): 19 | return (self.indices[i] for i in range(len(self.indices))) 20 | 21 | def __len__(self): 22 | return len(self.indices) 23 | 24 | 25 | class BaseDataset(object): 26 | def __init__(self): 27 | self.train_dataset = None 28 | self.test_dataset = None 29 | self.val_dataset = None 30 | self.test_data_path = None 31 | 32 | def load_data(self): 33 | raise NotImplementedError() 34 | 35 | def load_test_data(self): 36 | raise NotImplementedError() 37 | 38 | def set_test_path(self, test_data_path): 39 | self.test_data_path = test_data_path 40 | 41 | 42 | class DLDataset(BaseDataset): 43 | def __init__(self): 44 | super().__init__() 45 | self.train_sampler, self.val_sampler = None, None 46 | self.subset_sampler_used = False 47 | self.train_indices, self.val_indices = None, None 48 | 49 | def create_train_val_split(self, dataset: Dataset, train_val_split=0.2, shuffle=True): 50 | dataset_size = len(dataset) 51 | indices = list(range(dataset_size)) 52 | test_split = int(np.floor(train_val_split * dataset_size)) 53 | 54 | if shuffle: 55 | np.random.seed(1) 56 | np.random.shuffle(indices) 57 | 58 | self.val_indices, self.train_indices = indices[:test_split], indices[test_split:] 59 | 60 | self.train_sampler = SubsetRandomSampler(self.train_indices) 61 | self.val_sampler = SubsetSequentialampler(self.val_indices) 62 | self.subset_sampler_used = True 63 | 64 | def get_train_samples_num(self): 65 | raise NotImplementedError() 66 | 67 | def get_train_val_indices(self): 68 | return self.train_indices, self.val_indices 69 | 70 | def get_loader_labels(self, loader: DataLoader): 71 | labels = list() 72 | for i, data in enumerate(loader): 73 | if len(data) != 2: 74 | raise ValueError('No labels found!') 75 | labels.extend(list(data[1])) 76 | return np.asarray(labels) 77 | 78 | def get_labels(self, mode='val'): 79 | if mode == 'val': 80 | if self.subset_sampler_used: 81 | loader = DataLoader(dataset=self.train_dataset, batch_size=32, 82 | sampler=self.val_sampler, num_workers=4) 83 | return self.get_loader_labels(loader) 84 | else: 85 | loader = DataLoader(dataset=self.val_dataset, batch_size=32, shuffle=False, 86 | sampler=None, num_workers=4) 87 | return self.get_loader_labels(loader) 88 | elif mode == 'train': 89 | if self.subset_sampler_used: 90 | loader = DataLoader(dataset=self.train_dataset, batch_size=32, 91 | sampler=self.train_sampler, num_workers=4) 92 | return self.get_loader_labels(loader) 93 | else: 94 | loader = DataLoader(dataset=self.train_dataset, batch_size=32, shuffle=False, 95 | sampler=None, num_workers=4) 96 | return self.get_loader_labels(loader) 97 | else: 98 | loader = DataLoader(dataset=self.test_dataset, batch_size=32, shuffle=False, 99 | num_workers=4) 100 | return self.get_loader_labels(loader) 101 | 102 | 103 | class ImageDataset(DLDataset): 104 | def __init__(self, data_path: str, 105 | data_transforms: dict = None, 106 | grayscale: bool = False, 107 | train_val_split: bool = False, 108 | image_size=32, 109 | val_split_size: float = 0.2): 110 | super().__init__() 111 | self.train_val_split = train_val_split 112 | self.val_split_size = val_split_size 113 | self.data_path = data_path 114 | 115 | self.udf_transforms = data_transforms 116 | self.grayscale = grayscale 117 | self.image_size = image_size 118 | 119 | default_dataset = get_folder_dataset(os.path.join(self.data_path, 'train')) 120 | self.classes = default_dataset.classes 121 | 122 | def load_data(self, train_transforms, val_transforms): 123 | # self.means, self.var = self.get_mean_and_var() 124 | self.train_dataset = get_folder_dataset(os.path.join(self.data_path, 'train'), 125 | udf_transforms=train_transforms, 126 | grayscale=self.grayscale) 127 | if not self.train_val_split: 128 | self.val_dataset = get_folder_dataset(os.path.join(self.data_path, 'val'), 129 | udf_transforms=val_transforms, 130 | grayscale=self.grayscale) 131 | else: 132 | self.train_for_val_dataset = get_folder_dataset(os.path.join(self.data_path, 'train'), 133 | udf_transforms=val_transforms, 134 | grayscale=self.grayscale) 135 | self.create_train_val_split(self.train_dataset, train_val_split=self.val_split_size, shuffle=True) 136 | 137 | def load_test_data(self, transforms): 138 | self.test_dataset = get_folder_dataset(os.path.join(self.test_data_path, 'test'), 139 | udf_transforms=transforms, 140 | grayscale=self.grayscale) 141 | self.test_dataset.classes = self.classes 142 | 143 | def get_train_samples_num(self): 144 | if self.train_dataset is None: 145 | _train_dataset = get_folder_dataset(os.path.join(self.data_path, 'train'), 146 | udf_transforms=None, 147 | grayscale=self.grayscale) 148 | _train_size = len(_train_dataset) 149 | else: 150 | _train_size = len(self.train_dataset) 151 | if self.subset_sampler_used: 152 | return _train_size * (1 - self.val_split_size) 153 | else: 154 | return _train_size 155 | 156 | def get_mean_and_var(self): 157 | basic_transforms = transforms.Compose([ 158 | transforms.ToTensor()]) 159 | _train_dataset = get_folder_dataset(os.path.join(self.data_path, 'train'), 160 | udf_transforms=basic_transforms) 161 | 162 | dataloader = torch.utils.data.DataLoader(_train_dataset, batch_size=1, shuffle=True, num_workers=2) 163 | mean = torch.zeros(3) 164 | std = torch.zeros(3) 165 | print('==> Computing mean and std..') 166 | for inputs, targets in dataloader: 167 | for i in range(3): 168 | mean[i] += inputs[:, i, :, :].mean() 169 | std[i] += inputs[:, i, :, :].std() 170 | mean.div_(len(_train_dataset)) 171 | std.div_(len(_train_dataset)) 172 | mean = mean.numpy() 173 | std = std.numpy() 174 | return mean, std 175 | -------------------------------------------------------------------------------- /test/nas_benchmarks/benchmark_nasbench201.py: -------------------------------------------------------------------------------- 1 | """ 2 | example cmdline: 3 | 4 | python test/nas_benchmarks/benchmark_nasbench201.py --dataset cifar10-valid --mths hyperband --R 27 --n_workers 4 --runtime_limit 86400 --rep 1 --start_id 0 5 | 6 | """ 7 | 8 | import os 9 | import sys 10 | import time 11 | import argparse 12 | import traceback 13 | import numpy as np 14 | import pickle as pkl 15 | from functools import partial 16 | 17 | sys.path.insert(0, ".") 18 | sys.path.insert(1, "../open-box") # for dependency 19 | from test.nas_benchmarks.nasbench201_utils import load_nasbench201, get_nasbench201_configspace, objective_func 20 | from test.nas_benchmarks.simulation_utils import run_in_parallel, run_async 21 | from test.utils import seeds, timeit 22 | from test.benchmark_process_record import remove_partial, get_incumbent 23 | from tuner import mth_dict 24 | 25 | dataset_choices = ['cifar10-valid', 'cifar10', 'cifar100', 'ImageNet16-120', 'all'] 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--mths', type=str, default='hyperband') 28 | parser.add_argument('--dataset', type=str, default='all', choices=dataset_choices) 29 | parser.add_argument('--R', type=int, default=27) 30 | parser.add_argument('--eta', type=int, default=3) 31 | parser.add_argument('--n_workers', type=int) # must set 32 | parser.add_argument('--runtime_limit', type=int, default=0) 33 | parser.add_argument('--time_limit_per_trial', type=int, default=999999) 34 | parser.add_argument('--rep', type=int, default=1) 35 | parser.add_argument('--start_id', type=int, default=0) 36 | parser.add_argument('--data_path', type=str, default='../nas_data/NAS-Bench-201-v1_1-096897.pth') 37 | 38 | args = parser.parse_args() 39 | mths = args.mths.split(',') 40 | print("mths:", mths) 41 | datasets = args.dataset.split(',') 42 | if datasets[0] == 'all': 43 | datasets = ['cifar10-valid', 'cifar100', 'ImageNet16-120'] 44 | R = args.R 45 | eta = args.eta 46 | n_workers = args.n_workers # Caution: must set for saving result to different dirs 47 | time_limit_per_trial = args.time_limit_per_trial 48 | rep = args.rep 49 | start_id = args.start_id 50 | data_path = args.data_path 51 | 52 | print(n_workers) 53 | print(R, eta) 54 | for para in (R, eta, n_workers): 55 | assert para is not None 56 | 57 | for algo_name in mths: 58 | assert algo_name in mth_dict.keys() 59 | 60 | 61 | def evaluate_simulation(algo_class, algo_kwargs, method_id, n_workers, seed, parallel_strategy): 62 | print(method_id, n_workers, seed) 63 | 64 | assert parallel_strategy in ['sync', 'async'] 65 | 66 | if parallel_strategy == 'sync': 67 | algo_class.run_in_parallel = run_in_parallel 68 | elif parallel_strategy == 'async': 69 | algo_class.run = run_async 70 | while True: 71 | try: 72 | port = 13579 + np.random.RandomState(int(time.time() * 10000 % 10000)).randint(2000) 73 | print('port =', port) 74 | algo = algo_class( 75 | objective_func=objective_function, # must set for simulation 76 | config_space=cs, 77 | R=R, 78 | eta=eta, 79 | random_state=seed, 80 | method_id=method_id, 81 | restart_needed=True, 82 | time_limit_per_trial=time_limit_per_trial, 83 | runtime_limit=runtime_limit, 84 | port=port, 85 | **algo_kwargs, 86 | ) 87 | except EOFError: 88 | print('EOFError: try next port.') 89 | else: 90 | break 91 | algo.n_workers = n_workers # must set for simulation 92 | algo.run() 93 | try: 94 | algo.logger.info('===== bracket status: %s' % algo.get_bracket_status(algo.bracket)) 95 | except Exception as e: 96 | pass 97 | try: 98 | algo.logger.info('===== brackets status: %s' % algo.get_brackets_status(algo.brackets)) 99 | except Exception as e: 100 | pass 101 | return algo.recorder 102 | 103 | 104 | with timeit('load nasbench201'): 105 | model_name = 'nasbench201' 106 | cs = get_nasbench201_configspace() 107 | api = load_nasbench201(path=data_path) 108 | 109 | with timeit('%s all' % datasets): 110 | for dataset in datasets: 111 | # set runtime_limit 112 | if args.runtime_limit == 0: 113 | if dataset in ['cifar10', 'cifar10-valid']: 114 | runtime_limit = 86400 115 | elif dataset == 'cifar100': 116 | runtime_limit = 172800 117 | elif dataset == 'ImageNet16-120': 118 | runtime_limit = 432000 119 | else: 120 | raise ValueError 121 | else: 122 | runtime_limit = args.runtime_limit 123 | 124 | # set dataset 125 | objective_function = partial(objective_func, total_resource=R, eta=eta, api=api, dataset=dataset) 126 | 127 | with timeit('%s all' % dataset): 128 | for algo_name in mths: 129 | with timeit('%s %d %d' % (algo_name, start_id, rep)): 130 | mth_info = mth_dict[algo_name] 131 | if len(mth_info) == 2: 132 | algo_class, parallel_strategy = mth_info 133 | algo_kwargs = dict() 134 | elif len(mth_info) == 3: 135 | algo_class, parallel_strategy, algo_kwargs = mth_info 136 | else: 137 | raise ValueError('error mth info: %s' % mth_info) 138 | 139 | from tuner.mq_random_search import mqRandomSearch 140 | from tuner.mq_bo import mqBO 141 | if algo_class in (mqRandomSearch, mqBO): 142 | print('set algo_class n_workers:', n_workers) 143 | algo_kwargs['n_workers'] = n_workers 144 | 145 | print('===== start eval %s: rep=%d, runtime_limit=%d, time_limit_per_trial=%d' 146 | % (dataset, rep, runtime_limit, time_limit_per_trial)) 147 | for i in range(start_id, start_id + rep): 148 | seed = seeds[i] 149 | 150 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 151 | if R != 27: 152 | method_str = '%s-%d-n%d' % (algo_name, R, n_workers) 153 | else: 154 | method_str = '%s-n%d' % (algo_name, n_workers) 155 | method_id = method_str + '-%s-%d-%s' % (dataset, seed, timestamp) 156 | 157 | with timeit('%d %s' % (i, method_id)): 158 | recorder = evaluate_simulation( 159 | algo_class, algo_kwargs, method_id, n_workers, seed, parallel_strategy 160 | ) 161 | 162 | dir_path = 'data/benchmark_%s/%s-%d/%s/' % (model_name, dataset, runtime_limit, method_str) 163 | file_name = 'record_%s.pkl' % (method_id,) 164 | try: 165 | if not os.path.exists(dir_path): 166 | os.makedirs(dir_path) 167 | except FileExistsError: 168 | pass 169 | with open(os.path.join(dir_path, file_name), 'wb') as f: 170 | pkl.dump(recorder, f) 171 | print(dir_path, file_name, 'saved!', flush=True) 172 | 173 | try: 174 | remove_partial(model_name, dataset, [method_str], runtime_limit, R) 175 | get_incumbent(model_name, dataset, [method_str], runtime_limit) 176 | except Exception as e: 177 | print('benchmark process record failed: %s' % (traceback.format_exc(),)) 178 | --------------------------------------------------------------------------------