├── .gitignore ├── LICENSE ├── README.md ├── baselines ├── __init__.py ├── core │ ├── __init__.py │ ├── multiobjective_experiment.py │ └── pareto.py ├── methods │ ├── __init__.py │ ├── bulkandcut │ │ ├── __init__.py │ │ ├── bayesian_optimization │ │ │ ├── __init__.py │ │ │ ├── constrained_bayesian_optimizer.py │ │ │ ├── optimizer_one_two.py │ │ │ └── optimizer_three.py │ │ ├── data_augmentation.py │ │ ├── genetic_algorithm │ │ │ ├── __init__.py │ │ │ ├── evolution.py │ │ │ └── individual.py │ │ ├── model │ │ │ ├── BNCmodel.py │ │ │ ├── __init__.py │ │ │ ├── average_meter.py │ │ │ ├── ax_adapter.py │ │ │ ├── blind_model.py │ │ │ ├── conv_cell.py │ │ │ ├── cross_entropy_with_probs.py │ │ │ ├── linear_cell.py │ │ │ ├── model_head.py │ │ │ ├── model_section.py │ │ │ └── skip_connection.py │ │ └── plot │ │ │ ├── __init__.py │ │ │ ├── learning_curve.py │ │ │ └── pareto.py │ ├── mobananas │ │ ├── __init__.py │ │ ├── member.py │ │ ├── mobananas.py │ │ ├── moshbananas.py │ │ └── neural_predictor.py │ ├── mobohb │ │ ├── __init__.py │ │ ├── hpbandster │ │ │ ├── __init__.py │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ ├── base_config_generator.py │ │ │ │ ├── base_iteration.py │ │ │ │ ├── dispatcher.py │ │ │ │ ├── master.py │ │ │ │ ├── nameserver.py │ │ │ │ ├── result.py │ │ │ │ └── worker.py │ │ │ ├── examples │ │ │ │ ├── README.txt │ │ │ │ ├── __init__.py │ │ │ │ ├── commons.py │ │ │ │ ├── example_1_local_sequential.py │ │ │ │ ├── example_2_local_parallel_threads.py │ │ │ │ ├── example_3_local_parallel_processes.py │ │ │ │ ├── example_4_cluster.py │ │ │ │ ├── example_5_keras_worker.py │ │ │ │ ├── example_5_mnist.py │ │ │ │ ├── example_5_pytorch_worker.py │ │ │ │ ├── example_5_run │ │ │ │ │ ├── configs.json │ │ │ │ │ └── results.json │ │ │ │ ├── example_8_mnist_continued.py │ │ │ │ ├── plot_example_6_analysis.py │ │ │ │ └── plot_example_7_interactive_plot.py │ │ │ ├── optimizers │ │ │ │ ├── __init__.py │ │ │ │ ├── bohb.py │ │ │ │ ├── config_generators │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bohb.py │ │ │ │ │ ├── h2bo.py │ │ │ │ │ ├── kde.py │ │ │ │ │ ├── lcnet.py │ │ │ │ │ ├── mobohb.py │ │ │ │ │ ├── mobohb_utils.py │ │ │ │ │ ├── parego.py │ │ │ │ │ └── random_sampling.py │ │ │ │ ├── h2bo.py │ │ │ │ ├── hyperband.py │ │ │ │ ├── iterations │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── successivehalving.py │ │ │ │ │ ├── successivehalvingparego.py │ │ │ │ │ ├── successiveresampling.py │ │ │ │ │ └── sucessivehalvingmobohb.py │ │ │ │ ├── kde │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── kernels.py │ │ │ │ │ └── mvkde.py │ │ │ │ ├── lcnet.py │ │ │ │ ├── learning_curve_models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── arif.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── lcnet.py │ │ │ │ ├── mobohb.py │ │ │ │ ├── parego.py │ │ │ │ └── randomsearch.py │ │ │ ├── utils.py │ │ │ ├── visualization.py │ │ │ └── workers │ │ │ │ ├── __init__.py │ │ │ │ └── hpolibbenchmark.py │ │ ├── mobohb_worker.py │ │ └── run_mobohb.py │ ├── msehvi │ │ ├── __init__.py │ │ └── msehvi.py │ └── shemoa │ │ ├── __init__.py │ │ ├── member.py │ │ └── shemoa.py └── problems │ ├── __init__.py │ ├── fashion │ ├── __init__.py │ ├── data │ │ ├── x_test.npy │ │ ├── x_train.npy │ │ ├── x_val.npy │ │ ├── y_test.npy │ │ ├── y_train.npy │ │ └── y_val.npy │ ├── fashionnet.py │ ├── problem.py │ ├── search_space.py │ └── utils.py │ ├── flowers │ ├── __init__.py │ ├── data │ │ ├── x_test.npy │ │ ├── x_train.npy │ │ ├── x_val.npy │ │ ├── y_test.npy │ │ ├── y_train.npy │ │ └── y_val.npy │ ├── flowernet.py │ ├── problem.py │ ├── search_space.py │ └── utils.py │ └── simple_problems.py └── examples ├── bulkandcut.py ├── mobohb.py ├── moshbananas.py ├── msehvi.py ├── random_search.py └── shemoa.py /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | .idea/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bag of Baselines 2 | 3 | Bag of Baselines implements several multi-objective opimisation methods to create a performance benchmark on two small datasets. To learn more about this work, check out the [publication](https://arxiv.org/abs/2105.01015). 4 | 5 | ### Methods 6 | 7 | The following methods are proposed and implemented: 8 | 9 | 1. **SH-EMOA**: Speeding up Evolutionary Multi-Objective Algorithms 10 | 11 | 2. **MO-BOHB**: Generalization of BOHB to an Arbitrary Number of Objectives 12 | 13 | 3. **MS-EHVI**: Mixed Surrogate Expected Hypervolume Improvement 14 | 15 | 4. **MO-BANANAS** 16 | 17 | 5. **BULK & CUT** 18 | 19 | ### Datasets 20 | 21 | Performance of the methods was evaluated using the following datasets: [Oxford-Flowers dataset](https://www.robots.ox.ac.uk/~vgg/data/flowers/) and [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist). 22 | 23 | 24 | ### Organization 25 | 26 | * The specific code for each of the methods (the main logic of each algorithm) is stored in the [methods](https://github.com/automl/multi-obj-baselines/tree/main/baselines/methods) folder. 27 | 28 | * In the [examples](https://github.com/automl/multi-obj-baselines/tree/main/examples) folder you will find a small Python script to run each of the available methods (for the "Fashion-MNIST" or the "flowers" dataset). 29 | 30 | * Code defining the search space and the evaluation function of the two different problems are defined in the [problems](https://github.com/automl/multi-obj-baselines/tree/main/baselines/problems) folder. 31 | 32 | -------------------------------------------------------------------------------- /baselines/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.multiobjective_experiment import MultiObjectiveSimpleExperiment 2 | from .core.multiobjective_experiment import save_experiment, load_experiment 3 | from .core.pareto import pareto, nDS, computeHV2D, nDS_index, crowdingDist 4 | -------------------------------------------------------------------------------- /baselines/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/core/__init__.py -------------------------------------------------------------------------------- /baselines/core/multiobjective_experiment.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict, Any, List 2 | import pickle 3 | 4 | from ax import Arm 5 | from ax import Metric 6 | from ax import Experiment 7 | from ax import SearchSpace 8 | from ax import SimpleExperiment 9 | from ax import OptimizationConfig 10 | 11 | from ax.core.simple_experiment import TEvaluationFunction 12 | from ax.core.simple_experiment import unimplemented_evaluation_function 13 | 14 | class MultiObjectiveSimpleExperiment(SimpleExperiment): 15 | 16 | def __init__( 17 | self, 18 | search_space: SearchSpace, 19 | optimization_config: OptimizationConfig, 20 | name: Optional[str] = None, 21 | eval_function: TEvaluationFunction = unimplemented_evaluation_function, 22 | status_quo: Optional[Arm] = None, 23 | properties: Optional[Dict[str, Any]] = None, 24 | extra_metrics: Optional[List[Metric]] = None, 25 | ): 26 | super(MultiObjectiveSimpleExperiment, self).__init__( 27 | search_space=search_space, 28 | name=name, 29 | evaluation_function=eval_function, 30 | status_quo=status_quo, 31 | properties=properties 32 | ) 33 | 34 | self.optimization_config = optimization_config 35 | 36 | if extra_metrics is not None: 37 | for metric in extra_metrics: 38 | Experiment.add_tracking_metric(self, metric) 39 | 40 | 41 | def save_experiment(experiment: Experiment, filename: str): 42 | with open(filename, 'wb') as file: 43 | pickle.dump(experiment, file) 44 | 45 | 46 | def load_experiment(filename: str): 47 | with open(filename, 'rb') as file: 48 | return pickle.load(file) 49 | -------------------------------------------------------------------------------- /baselines/core/pareto.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import numpy as np 3 | 4 | 5 | def pareto(costs: np.ndarray): 6 | """ 7 | Find the pareto-optimal points 8 | :param costs: (n_points, m_cost_values) array 9 | :return: (n_points, 1) indicator if point is on pareto front or not. 10 | """ 11 | assert type(costs) == np.ndarray 12 | assert costs.ndim == 2 13 | 14 | # first assume all points are pareto optimal 15 | is_pareto = np.ones(costs.shape[0], dtype=bool) 16 | for i, c in enumerate(costs): 17 | if is_pareto[i]: 18 | # determine all points that have a smaller cost 19 | all_with_lower_costs = np.any(costs < c, axis=1) 20 | keep_on_front = np.logical_and(all_with_lower_costs, is_pareto) 21 | is_pareto = keep_on_front 22 | is_pareto[i] = True # keep self 23 | return is_pareto 24 | 25 | def pareto_index(costs: np.ndarray, index_list): 26 | """ 27 | Find the pareto-optimal points 28 | :param costs: (n_points, m_cost_values) array 29 | :return: (n_points, 1) indicator if point is on pareto front or not. 30 | """ 31 | # first assume all points are pareto optimal 32 | is_pareto = np.ones(costs.shape[0], dtype=bool) 33 | 34 | for i, c in enumerate(costs): 35 | 36 | if is_pareto[i]: 37 | # determine all points that have a smaller cost 38 | all_with_lower_costs = np.any(costs < c, axis=1) 39 | keep_on_front = np.logical_and(all_with_lower_costs, is_pareto) 40 | is_pareto = keep_on_front 41 | is_pareto[i] = True # keep self 42 | 43 | index_return = index_list[is_pareto] 44 | 45 | return is_pareto, index_return 46 | 47 | def nDS_index(costs, index_list): 48 | """ 49 | Implementation of the non-dominated sorting method 50 | :param costs: (n_points, m_cost_values) array 51 | :list of indeces 52 | :return: list of all fronts, sorted indeces 53 | """ 54 | 55 | dominating_list = [] 56 | index_return_list = [] 57 | fronts = [] 58 | while costs.size > 0: 59 | dominating, index_return = pareto_index(costs, index_list) 60 | fronts.append(costs[dominating]) 61 | costs = costs[~dominating] 62 | index_list = index_list[~dominating] 63 | dominating_list.append(dominating) 64 | index_return_list.append(index_return) 65 | 66 | return fronts, index_return_list 67 | 68 | 69 | def crowdingDist(fronts, index_list): 70 | """ 71 | Implementation of the crowding distance 72 | :param front: (n_points, m_cost_values) array 73 | :return: sorted_front and corresponding distance value of each element in the sorted_front 74 | """ 75 | dist_list = [] 76 | index_return_list = [] 77 | 78 | for g in range(len(fronts)): 79 | front = fronts[g] 80 | index_ = index_list[g] 81 | 82 | sorted_front = np.sort(front.view([('', front.dtype)] * front.shape[1]), 83 | axis=0).view(np.float) 84 | 85 | _, sorted_index = (list(t) for t in zip(*sorted(zip([f[0] for f in front], index_)))) 86 | 87 | normalized_front = np.copy(sorted_front) 88 | 89 | for column in range(normalized_front.shape[1]): 90 | ma, mi = np.max(normalized_front[:, column]), np.min(normalized_front[:, column]) 91 | normalized_front[:, column] -= mi 92 | normalized_front[:, column] /= (ma - mi) 93 | 94 | dists = np.empty((sorted_front.shape[0], ), dtype=np.float) 95 | dists[0] = np.inf 96 | dists[-1] = np.inf 97 | 98 | for elem_idx in range(1, dists.shape[0] - 1): 99 | dist_left = np.linalg.norm(normalized_front[elem_idx] - normalized_front[elem_idx - 1]) 100 | dist_right = np.linalg.norm(normalized_front[elem_idx + 1] - normalized_front[elem_idx]) 101 | dists[elem_idx] = dist_left + dist_right 102 | 103 | dist_list.append((sorted_front, dists)) 104 | _, index_sorted_max = (list(t) for t in zip(*sorted(zip(dists, sorted_index)))) 105 | index_sorted_max.reverse() 106 | 107 | index_return_list.append(index_sorted_max) 108 | 109 | return dist_list, index_return_list 110 | 111 | 112 | 113 | def nDS(costs: np.ndarray): 114 | """ 115 | Implementation of the non-dominated sorting method 116 | :param costs: (n_points, m_cost_values) array 117 | :return: list of all fronts 118 | """ 119 | assert type(costs) == np.ndarray 120 | assert costs.ndim == 2 121 | 122 | # Stepwise compute the pareto front without all prior dominating points 123 | my_costs = costs.copy() 124 | remain = np.ones(len(my_costs), dtype=np.bool) 125 | fronts = [] 126 | while np.any(remain): 127 | front_i = pareto(my_costs) 128 | fronts.append(my_costs[front_i, :]) 129 | my_costs[front_i, :] = np.inf 130 | remain = np.logical_and(remain, np.logical_not(front_i)) 131 | return fronts 132 | 133 | 134 | def computeHV2D(front: np.ndarray, ref: List[float]): 135 | """ 136 | Compute the Hypervolume for the pareto front (only implement it for 2D) 137 | :param front: (n_points, m_cost_values) array for which to compute the volume 138 | :param ref: coordinates of the reference point 139 | :returns: Hypervolume of the polygon spanned by all points in the front + the reference point 140 | """ 141 | 142 | front = np.asarray(front) 143 | assert front.ndim == 2 144 | assert len(ref) == 2 145 | 146 | 147 | # We assume all points already sorted 148 | list_ = [ref] 149 | for x in front: 150 | elem_at = len(list_) -1 151 | list_.append([list_[elem_at][0], x[1]]) # add intersection points by keeping the x constant 152 | list_.append(x) 153 | list_.append([list_[-1][0], list_[0][1]]) 154 | sorted_front = np.array(list_) 155 | 156 | def shoelace(x_y): # taken from https://stackoverflow.com/a/58515054 157 | x_y = np.array(x_y) 158 | x_y = x_y.reshape(-1,2) 159 | 160 | x = x_y[:, 0] 161 | y = x_y[:, 1] 162 | 163 | S1 = np.sum(x*np.roll(y,-1)) 164 | S2 = np.sum(y*np.roll(x,-1)) 165 | 166 | area = .5*np.absolute(S1 - S2) 167 | 168 | return area 169 | return shoelace(sorted_front) 170 | -------------------------------------------------------------------------------- /baselines/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/__init__.py -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/__init__.py: -------------------------------------------------------------------------------- 1 | # Randomness control: 2 | from pathlib import Path 3 | import torch 4 | import numpy as np 5 | 6 | 7 | global_seed = 42 8 | #torch.manual_seed(global_seed) 9 | rng = np.random.default_rng() 10 | 11 | # Pytorch device: 12 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 13 | 14 | # Convenience imports: 15 | from .genetic_algorithm.evolution import Evolution # noqa 16 | from .model.BNCmodel import BNCmodel # noqa 17 | from .plot.pareto import generate_pareto_animation # noqa 18 | from .plot.pareto import Benchmark # noqa 19 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/bayesian_optimization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/bulkandcut/bayesian_optimization/__init__.py -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/bayesian_optimization/constrained_bayesian_optimizer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import csv 3 | from typing import List 4 | 5 | import numpy as np 6 | from sklearn.gaussian_process.kernels import Matern 7 | from sklearn.gaussian_process import GaussianProcessRegressor 8 | from scipy.optimize import minimize 9 | 10 | from baselines.methods.bulkandcut import global_seed, rng 11 | 12 | 13 | class ConstrainedBayesianOptimizer(): 14 | """ 15 | I minimize stuff using an arbitrary subset of the search dimensions. 16 | """ 17 | def __init__(self, par_bounds: List[dict]): 18 | self.surrogate_model = GaussianProcessRegressor( 19 | kernel=Matern(nu=2.5), 20 | alpha=1e-6, 21 | normalize_y=True, 22 | n_restarts_optimizer=5, 23 | random_state=global_seed, 24 | ) 25 | self.par_bounds = par_bounds 26 | self.par_names = list(par_bounds.keys()) # To have a fixed reference order 27 | 28 | self.par_values = [] 29 | self.par_targets = [] 30 | 31 | @property 32 | def incumbent(self): 33 | return self.par_values[np.argmin(self.par_targets)] 34 | 35 | @property 36 | def n_pars(self): 37 | return len(self.par_bounds) 38 | 39 | def register_target(self, par_values: dict, target: float): 40 | # TODO check bounds of par_values, if out ouf bounds, raise warning 41 | self.par_targets.append(target) 42 | self.par_values.append([par_values[pname] for pname in self.par_names]) 43 | 44 | def next_pars(self, dictated_pars: dict): 45 | # check validity of dictated pars: 46 | for dpar_k, dpar_v in dictated_pars.items(): 47 | if dpar_v < self.par_bounds[dpar_k][0] or dpar_v > self.par_bounds[dpar_k][1]: 48 | print(f"WARNING: Dictaded parameter {dpar_k} is out of bounds. It has value " 49 | f"{dpar_v}, but it should be between {self.par_bounds[dpar_k]}") 50 | 51 | lowb, highb = self._get_constrained_bounds(dpars=dictated_pars) 52 | 53 | if len(self.par_targets) < 2: 54 | # Return a random point if we've seen less than two points. 55 | suggestion = rng.uniform(low=lowb, high=highb) 56 | else: 57 | # Otherwise first we fit the Gaussian Process 58 | print('Values:', self.par_values) 59 | print('Targets:', self.par_targets) 60 | with warnings.catch_warnings(): # TODO: can I get rid of these warnings some other way? 61 | warnings.simplefilter("ignore") 62 | self.surrogate_model.fit( 63 | X=np.array(self.par_values), 64 | y=self.par_targets, 65 | ) 66 | # Then we return the LCB minimizer 67 | suggestion = self._minimize_lcb(lowb, highb) 68 | 69 | # Wrap the suggestion in a dictionary: 70 | suggestion = {pname: suggestion[n] for n, pname in enumerate(self.par_names)} 71 | return suggestion 72 | 73 | def _get_constrained_bounds(self, dpars: dict): 74 | low_bound, high_bound = [], [] 75 | for pname in self.par_names: 76 | if pname in dpars: 77 | low_bound.append(dpars[pname]) 78 | high_bound.append(dpars[pname]) 79 | else: 80 | low_bound.append(self.par_bounds[pname][0]) 81 | high_bound.append(self.par_bounds[pname][1]) 82 | 83 | return np.array(low_bound), np.array(high_bound) 84 | 85 | def _minimize_lcb(self, 86 | lowb: "np.array", 87 | highb: "np.array", 88 | n_random: int = 10000, 89 | n_solver: int = 10, 90 | ): 91 | """ 92 | A function to find the minimum of the acquisition function It uses a combination of random 93 | sampling (cheap) and the 'L-BFGS-B' optimization method. First by sampling `n_random` points 94 | at random, and then running L-BFGS-B for `n_solver` random starting points. 95 | 96 | This function was inspired on (a.k.a. plagiarized from) 97 | https://github.com/fmfn/BayesianOptimization 98 | """ 99 | 100 | def lcb(x, alpha=2.5): 101 | """ LCB: lower confidence bound """ 102 | x = x.reshape(1, -1) if x.ndim == 1 else x 103 | mean, std = self.surrogate_model.predict(X=x, return_std=True) 104 | return mean - alpha * std 105 | 106 | # Warm up with random points 107 | x_guesses = rng.uniform(low=lowb, high=highb, size=(n_random, self.n_pars)) 108 | ys = lcb(x=x_guesses) 109 | best_x = x_guesses[ys.argmin()] 110 | min_lcb = ys.min() 111 | 112 | # Then use the scipy minimizer solver 113 | x_guesses = rng.uniform(low=lowb, high=highb, size=(n_solver, self.n_pars)) 114 | x_guesses = np.vstack((best_x, x_guesses)) 115 | scikit_bounds = np.vstack((lowb, highb)).T 116 | for x0 in x_guesses: 117 | with warnings.catch_warnings(): # TODO: can I get rid of these warnings some other way? 118 | warnings.simplefilter("ignore") 119 | res = minimize( 120 | fun=lcb, 121 | x0=x0.reshape(1, -1), 122 | bounds=scikit_bounds, 123 | method="L-BFGS-B", 124 | ) 125 | if not res.success: 126 | continue 127 | 128 | # Store it if better than previous minimum. 129 | if res.fun[0] < min_lcb: 130 | best_x = res.x 131 | min_lcb = res.fun[0] 132 | 133 | # Clip output to make sure it lies within the bounds. Due to floating 134 | # point technicalities this is not always the case. 135 | return np.clip(best_x, lowb, highb) 136 | 137 | def save_csv(self, csv_path: str): 138 | # Write configurations and their respective targets on a csv file 139 | fieldnames = ["order", "target"] + self.par_names 140 | with open(csv_path, 'w', newline='') as csvfile: 141 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 142 | writer.writeheader() 143 | for i in range(len(self.par_targets)): 144 | new_row = { 145 | "order": i, 146 | "target": self.par_targets[i], 147 | } 148 | for p, pname in enumerate(self.par_names): 149 | new_row[pname] = self.par_values[i][p] 150 | writer.writerow(new_row) 151 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/bayesian_optimization/optimizer_one_two.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from baselines.methods.bulkandcut.bayesian_optimization.constrained_bayesian_optimizer \ 4 | import ConstrainedBayesianOptimizer 5 | 6 | 7 | class OptimizerOneTwo(ConstrainedBayesianOptimizer): 8 | """ 9 | Optimizer used on phases 1 and 2 (initialization and bulk-up) 10 | """ 11 | 12 | def __init__(self, log_dir: str): 13 | self.log_path = os.path.join(log_dir, "BO_OneTwo.csv") 14 | parameter_bounds = { 15 | "lr_exp": (-5., -0.), # LR = 10^lr_exp 16 | #"w_decay_exp": (-4., -1.), # weight_decay = 10^w_decay_exp 17 | #"lr_sched_gamma": (1., 1.), # 1. is equivalent to no schedule 18 | #"lr_sched_step_size": (2., 50.), 19 | # The parameters bellow are observed but not controlled by the optimizer. 20 | "depth": (1., 15.), # Depth of the network 21 | "log_npars": (0., 8.), # log10 of the number of parameters of the network 22 | } 23 | # The baseline (default configuration) is included in the search space. 24 | # default conf = { 25 | # "lr_exp" : math.log10(2.244958736283895e-05), 26 | # "w_decay_exp" : -2, 27 | # "lr_sched_gamma" : 1., # No schedule 28 | # "lr_sched_step_size" : 25., # This is irrelevant, because lr_sched_gamma=1. 29 | # } 30 | super().__init__(par_bounds=parameter_bounds) 31 | 32 | def register_target(self, config, learning_curves): 33 | valid_loss = learning_curves["validation_loss"][-1] 34 | super().register_target( 35 | par_values=config, 36 | target=valid_loss 37 | ) 38 | self.save_csv(self.log_path) 39 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/bayesian_optimization/optimizer_three.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from baselines.methods.bulkandcut.bayesian_optimization.constrained_bayesian_optimizer \ 4 | import ConstrainedBayesianOptimizer 5 | 6 | 7 | class OptimizerThree(ConstrainedBayesianOptimizer): 8 | """ 9 | Optimizer used on phase 3 (slim-down) 10 | """ 11 | 12 | def __init__(self, log_dir: str): 13 | self.log_path = os.path.join(log_dir, "BO_Three.csv") 14 | parameter_bounds = { 15 | "lr_exp": (-5., -2.), 16 | # "w_decay_exp": (-4., -1.), # weight_decay = 10^w_decay_exp 17 | # The parameters bellow are observed but not controlled by the optimizer: 18 | "depth": (1., 15.), # Depth of the network 19 | "log_npars": (0., 8.), # log10 of the number of parameters of the network 20 | } 21 | super().__init__(par_bounds=parameter_bounds) 22 | 23 | def register_target(self, config, learning_curves): 24 | valid_loss = learning_curves["train_loss"][-1] 25 | super().register_target( 26 | par_values=config, 27 | target=valid_loss 28 | ) 29 | self.save_csv(self.log_path) 30 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/data_augmentation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.transforms.functional import hflip 3 | 4 | from baselines.methods.bulkandcut import rng 5 | 6 | 7 | class DataAugmentation(): 8 | def __init__(self, n_classes: int, mixup_alpha: float = .25): 9 | self.n_classes = n_classes 10 | self.alpha = mixup_alpha 11 | 12 | def __call__(self, data, targets): 13 | data = self._hflip(data) 14 | targets = self._onehot(targets) 15 | data, targets = self._mixup(data, targets) 16 | return data, targets 17 | 18 | def _hflip(self, data): 19 | batch_size = data.size(0) 20 | mask = torch.rand(size=(batch_size,)) > .5 21 | data[mask] = hflip(data[mask]) 22 | return data 23 | 24 | def _mixup(self, data, targets): 25 | """ 26 | This function was adapted from: 27 | https://github.com/hysts/pytorch_mixup/blob/master/utils.py. 28 | To the author my gratitude. :-) 29 | """ 30 | batch_size = data.size(0) 31 | indices = torch.randperm(n=batch_size) 32 | data2 = data[indices] 33 | targets2 = targets[indices] 34 | 35 | # Original code: 36 | # lambda_ = torch.FloatTensor([rng.beta(a=alpha, b=alpha)]) 37 | # data = data * lambda_ + data2 * (1 - lambda_) 38 | # targets = targets * lambda_ + targets2 * (1 - lambda_) 39 | 40 | # My modification: 41 | lambda_ = torch.FloatTensor(rng.beta(a=self.alpha, b=self.alpha, size=batch_size)) 42 | lamb_data = lambda_.reshape((-1, 1, 1, 1)) 43 | lamb_targ = lambda_.reshape((-1, 1)) 44 | data = data * lamb_data + data2 * (1 - lamb_data) 45 | targets = targets * lamb_targ + targets2 * (1 - lamb_targ) 46 | 47 | return data, targets 48 | 49 | def _onehot(self, label): 50 | template = torch.zeros(label.size(0), self.n_classes) 51 | ohe = template.scatter_(1, label.view(-1, 1), 1) 52 | return ohe 53 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/genetic_algorithm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/bulkandcut/genetic_algorithm/__init__.py -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/genetic_algorithm/individual.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | 4 | 5 | class Individual(): 6 | def __init__(self, 7 | indv_id: int, 8 | path_to_model: str, 9 | summary: str, 10 | depth: int, 11 | birth_time: datetime, 12 | parent_id: int, 13 | bulk_counter: int, 14 | cut_counter: int, 15 | bulk_offsprings: int, 16 | cut_offsprings: int, 17 | optimizer_config: dict, 18 | learning_curves: list, 19 | n_parameters: int, 20 | parameters: dict 21 | ): 22 | # TODO: receive the whole model as argument and read here what is necessary 23 | # (similar to what I did with learning curves) 24 | self.indv_id = indv_id 25 | self.path_to_model = path_to_model 26 | self.summary = summary 27 | self.depth = depth 28 | self.birth_time = birth_time 29 | self.parent_id = parent_id 30 | self.bulk_counter = bulk_counter 31 | self.cut_counter = cut_counter 32 | self.bulk_offsprings = bulk_offsprings 33 | self.cut_offsprings = cut_offsprings 34 | self.optimizer_config = optimizer_config 35 | self.pre_training_loss = learning_curves["validation_loss"][0] 36 | self.post_training_loss = learning_curves["validation_loss"][-1] 37 | # We want to optimize these last two guys: 38 | self.post_training_accuracy = learning_curves["validation_accuracy"][-1] 39 | self.n_parameters = n_parameters 40 | self._parameters_dict = parameters 41 | 42 | def to_dict(self): 43 | # TODO: Too much boilerplate. Maybe store everything in a dict from the 44 | # start and make instance indexable 45 | return { 46 | "id": self.indv_id, 47 | "accuracy": self.post_training_accuracy, 48 | "n_parameters": self.n_parameters, 49 | "depth": self.depth, 50 | "birth": self.birth_time, 51 | "parent_id": self.parent_id, 52 | "bulk_counter": self.bulk_counter, 53 | "cut_counter": self.cut_counter, 54 | "bulk_offsprings": self.bulk_offsprings, 55 | "cut_offsprings": self.cut_offsprings, 56 | "loss_before_training": self.pre_training_loss, 57 | "loss_after_training": self.post_training_loss, 58 | } 59 | 60 | def __str__(self): 61 | n_ljust = 25 62 | thestring = f"Model {self.indv_id}\n\n" 63 | thestring += self.summary + "\n\n" 64 | for k, v in self.optimizer_config.items(): 65 | thestring += str(k).ljust(n_ljust) + str(v) + "\n" 66 | thestring += "\n" 67 | for k, v in self.to_dict().items(): 68 | thestring += str(k).ljust(n_ljust) + str(v) + "\n" 69 | return thestring 70 | 71 | def save_info(self): 72 | if not os.path.exists(self.path_to_model): 73 | raise Exception("Save model first, then its info") 74 | model_dir = os.path.dirname(self.path_to_model) 75 | info_path = os.path.join(model_dir, "..", str(self.indv_id).rjust(4, "0") + "_info.txt") 76 | with open(info_path, "x") as info_file: 77 | info_file.write(str(self)) 78 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/bulkandcut/model/__init__.py -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/average_meter.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(): 2 | 3 | def __init__(self): 4 | self.reset() 5 | 6 | def __call__(self): 7 | return self.sum / self.cnt 8 | 9 | def reset(self): 10 | self.sum = 0. 11 | self.cnt = 0. 12 | 13 | def update(self, val, n=1): 14 | self.sum += val * n 15 | self.cnt += n 16 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/ax_adapter.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def adapt_to_ex(model): 4 | 5 | 6 | self.conv_sections = conv_sections 7 | self.glob_av_pool = torch.nn.AdaptiveAvgPool2d(output_size=1) 8 | self.linear_sections = linear_sections 9 | self.head = head 10 | self.input_shape = input_shape 11 | self.n_classes = head.out_elements 12 | 13 | self.data_augment = DataAugmentation(n_classes=self.n_classes) 14 | self.loss_func_CE_soft = CrossEntropyWithProbs().to(device) 15 | self.loss_func_CE_hard = torch.nn.CrossEntropyLoss().to(device) 16 | self.loss_func_MSE = torch.nn.MSELoss().to(device) 17 | self.creation_time = datetime.now() 18 | 19 | config = {} 20 | 21 | config['n_conv_l'] = len(model.conv_sections) 22 | config['n_conv_0'] = model.conv_sections[0].out_elements 23 | config['n_conv_1'] = model.conv_sections[1].out_elements if len(model.conv_sections) > 1 else 16 24 | config['n_conv_2'] = model.conv_sections[2].out_elements if len(model.conv_sections) > 2 else 16 25 | 26 | # Dense 27 | config['n_fc_l'] = len(model.linear_sections) 28 | config['n_fc_0'] = model.linear_sections[0].out_elements 29 | config['n_fc_1'] = model.linear_sections[1].out_elements if len(model.linear_sections) > 1 else 16 30 | config['n_fc_2'] = model.linear_sections[2].out_elements if len(model.linear_sections) > 2 else 16 31 | 32 | # Kernel Size 33 | config['kernel_size'] = model.linear_sections[0].kernel_size 34 | 35 | # Learning Rate 36 | lr = RangeParameter('lr_init', ParameterType.FLOAT, 0.00001, 1.0, True) 37 | 38 | # Use Batch Normalization 39 | bn = ChoiceParameter('batch_norm', ParameterType.BOOL, values=[True, False]) 40 | 41 | # Batch size 42 | bs = RangeParameter('batch_size', ParameterType.INT, 1, 512, True) 43 | 44 | # Global Avg Pooling 45 | ga = ChoiceParameter('global_avg_pooling', ParameterType.BOOL, values=[True, False]) 46 | 47 | b = FixedParameter('budget', ParameterType.INT, 25) 48 | 49 | i = FixedParameter('id', ParameterType.STRING, 'dummy') 50 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/blind_model.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | import torch 6 | import torchsummary 7 | import tqdm 8 | 9 | from baselines.methods.bulkandcut.model.average_meter import AverageMeter 10 | from baselines.methods.bulkandcut import device 11 | 12 | 13 | class BlindModel(torch.nn.Module): 14 | 15 | def __init__(self, n_classes: int, super_stupid: bool = False): 16 | super(BlindModel, self).__init__() 17 | n_pars = 1 if super_stupid else n_classes 18 | self.bias = torch.nn.Parameter(data=torch.rand(n_pars) * 1E-6, requires_grad=True) 19 | self.loss_func_CE_hard = torch.nn.CrossEntropyLoss() 20 | self.optimizer = torch.optim.AdamW(params=self.parameters(), lr=2.244958736283895e-05) 21 | self.n_classes = n_classes 22 | self.creation_time = datetime.now() 23 | 24 | @property 25 | def n_parameters(self): 26 | return np.sum(par.numel() for par in self.parameters()) 27 | 28 | @property 29 | def summary(self): 30 | model_stats = torchsummary.summary( 31 | model=self, 32 | input_data=(1,), 33 | device=device, 34 | verbose=0, 35 | ) 36 | return str(model_stats) 37 | 38 | def save(self, file_path): 39 | torch.save(obj=self, f=file_path) 40 | 41 | def forward(self, x): 42 | batch_size = x.size(0) 43 | ones = torch.ones((batch_size, self.n_classes)).to(device) 44 | x = self.bias * ones # The blind model doesn't care about the input 45 | return x 46 | 47 | def start_training(self, 48 | train_dataset: "torch.utils.data.Dataset", 49 | valid_dataset: "torch.utils.data.Dataset", 50 | ): 51 | learning_curves = defaultdict(list) 52 | 53 | # Create Dataloaders: 54 | train_data_loader = torch.utils.data.DataLoader( 55 | dataset=train_dataset, 56 | batch_size=282, 57 | shuffle=True, 58 | ) 59 | valid_data_loader = torch.utils.data.DataLoader( 60 | dataset=valid_dataset, 61 | batch_size=282, 62 | shuffle=False, 63 | ) 64 | 65 | # Pre-training validation loss: 66 | print("Pre-training evaluation:") 67 | initial_loss, _ = self.evaluate( 68 | data_loader=valid_data_loader, 69 | split_name="validation", 70 | ) 71 | learning_curves["validation_loss"].append(initial_loss) 72 | 73 | train_batch_losses = self._train_one_epoch(train_data_loader=train_data_loader) 74 | learning_curves["train_loss"].append(train_batch_losses()) 75 | _, valid_accuracy = self.evaluate(data_loader=valid_data_loader, split_name="validation") 76 | learning_curves["validation_accuracy"].append(valid_accuracy) 77 | 78 | status_str = f"training loss: {learning_curves['train_loss'][-1]:.3f}, " 79 | status_str += f"validation accuracy: {valid_accuracy:.3f}\n" 80 | print(status_str) 81 | 82 | return learning_curves 83 | 84 | def _train_one_epoch(self, train_data_loader): 85 | self.train() 86 | 87 | batch_losses = AverageMeter() 88 | tqdm_ = tqdm.tqdm(train_data_loader) 89 | for images, targets in tqdm_: 90 | batch_size = images.size(0) 91 | images = images.to(device) 92 | targets = targets.to(device) 93 | 94 | # Forward- and backprop: 95 | self.optimizer.zero_grad() 96 | logits = self(images) 97 | loss = self.loss_func_CE_hard(input=logits, target=targets) 98 | loss.backward() 99 | self.optimizer.step() 100 | 101 | # Register training loss of the current batch: 102 | loss_value = loss.item() 103 | batch_losses.update(val=loss_value, n=batch_size) 104 | tqdm_.set_description(desc=f"Training loss: {loss_value:.3f}") 105 | 106 | return batch_losses 107 | 108 | @torch.no_grad() 109 | def evaluate(self, data_loader, split_name): 110 | self.eval() 111 | 112 | average_loss = AverageMeter() 113 | average_accuracy = AverageMeter() 114 | tqdm_ = tqdm.tqdm(data_loader) 115 | for images, labels in tqdm_: 116 | batch_size = images.size(0) 117 | images = images.to(device) 118 | labels = labels.to(device) 119 | 120 | # Loss: 121 | logits = self(images) 122 | loss_value = self.loss_func_CE_hard(input=logits, target=labels) 123 | average_loss.update(val=loss_value.item(), n=batch_size) 124 | 125 | # Top-3 accuracy: 126 | top3_accuracy = self._accuracy(outputs=logits, targets=labels, topk=(3,)) 127 | average_accuracy.update(val=top3_accuracy[0], n=batch_size) 128 | 129 | tqdm_.set_description(f"Evaluating on the {split_name} split:") 130 | 131 | return average_loss(), average_accuracy() 132 | 133 | @torch.no_grad() 134 | def _accuracy(self, outputs, targets, topk=(1,)): 135 | maxk = max(topk) 136 | batch_size = targets.size(0) 137 | 138 | _, pred = outputs.topk(k=maxk, dim=1, largest=True, sorted=True) 139 | pred = pred.T 140 | correct = pred.eq(targets.view(1, -1).expand_as(pred)) 141 | 142 | accuracies = [] 143 | for k in topk: 144 | correct_k = correct[:k].reshape(-1).float().sum(0) 145 | accuracies.append(correct_k.mul_(100.0/batch_size).item()) 146 | return accuracies 147 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/conv_cell.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import torch 4 | 5 | 6 | class ConvCell(torch.nn.Module): 7 | 8 | @classmethod 9 | def NEW(cls, index, parameters, in_elements: int): 10 | # sample 11 | out_elements = parameters[f'n_conv_{index}'] 12 | kernel_size = parameters['kernel_size'] 13 | conv = torch.nn.Conv2d( 14 | in_channels=in_elements, 15 | out_channels=out_elements, 16 | kernel_size=kernel_size, 17 | padding=(kernel_size - 1) // 2, 18 | ) 19 | bnorm = torch.nn.BatchNorm2d(num_features=out_elements) if parameters['batch_norm'] else torch.nn.Identity() 20 | 21 | return cls(conv_layer=conv, batch_norm=bnorm) 22 | 23 | def __init__(self, conv_layer, batch_norm, dropout_p: float = .5, is_first_cell: bool = False): 24 | super(ConvCell, self).__init__() 25 | self.conv = conv_layer 26 | self.act = torch.nn.ReLU() 27 | self.bnorm = batch_norm 28 | self.is_first_cell = is_first_cell # This changes how the cell is pruned 29 | 30 | def forward(self, x): 31 | x = self.conv(x) 32 | x = self.act(x) 33 | x = self.bnorm(x) 34 | return x 35 | 36 | def downstream_morphism(self): 37 | identity_layer = torch.nn.Conv2d( 38 | in_channels=self.out_elements, 39 | out_channels=self.out_elements, 40 | kernel_size=self.kernel_size, 41 | padding=self.padding, 42 | ) 43 | 44 | with torch.no_grad(): 45 | # Initiate weights and biases with the identity function 46 | torch.nn.init.dirac_(identity_layer.weight) 47 | torch.nn.init.zeros_(identity_layer.bias) 48 | 49 | # And add some noise to break the symmetry 50 | identity_layer.weight += torch.rand_like(identity_layer.weight) * 1E-5 51 | identity_layer.bias += torch.rand_like(identity_layer.bias) * 1E-5 52 | 53 | # Batch-norm morphism (is this the best way?): 54 | if isinstance(self.bnorm, torch.nn.BatchNorm2d): 55 | bnorm = torch.nn.BatchNorm2d(num_features=self.out_elements) 56 | bnorm.weight = torch.nn.Parameter(deepcopy(self.bnorm.weight)) 57 | bnorm.running_var = torch.square(deepcopy(self.bnorm.weight).detach()) - self.bnorm.eps 58 | bnorm.bias = torch.nn.Parameter(deepcopy(self.bnorm.bias)) 59 | bnorm.running_mean = deepcopy(self.bnorm.bias).detach() 60 | else: 61 | bnorm = torch.nn.Identity() 62 | 63 | return ConvCell(conv_layer=identity_layer, batch_norm=bnorm) 64 | 65 | @torch.no_grad() 66 | def prune(self, out_selected, amount: float = .1): 67 | 68 | num_out_elements = len(out_selected) 69 | conv_weight = self.conv.weight[out_selected] 70 | conv_bias = self.conv.bias[out_selected] 71 | 72 | if self.is_first_cell: 73 | num_in_elements = self.in_elements 74 | in_selected = None # should be ignored by the calling code 75 | else: 76 | # Upstream filters with the lowest L1 norms will be pruned 77 | elements_to_prune = int(amount * self.in_elements) # implicit floor 78 | num_in_elements = self.in_elements - elements_to_prune 79 | w_l1norm = torch.sum( 80 | input=torch.abs(self.conv.weight), 81 | dim=[0, 2, 3], 82 | ) 83 | candidates = torch.argsort(w_l1norm)[:2 * elements_to_prune] 84 | idx_to_prune = torch.randperm(candidates.size(0))[:elements_to_prune] 85 | in_selected = torch.arange(self.in_elements) 86 | for kill in idx_to_prune: 87 | in_selected = torch.cat((in_selected[:kill], in_selected[kill + 1:])) 88 | conv_weight = conv_weight[:, in_selected] 89 | 90 | 91 | # Pruning the convolution: 92 | pruned_conv = torch.nn.Conv2d( 93 | in_channels=num_in_elements, 94 | out_channels=num_out_elements, 95 | kernel_size=self.kernel_size, 96 | padding=self.padding, 97 | ) 98 | pruned_conv.weight = torch.nn.Parameter(deepcopy(conv_weight)) 99 | pruned_conv.bias = torch.nn.Parameter(deepcopy(conv_bias)) 100 | 101 | if isinstance(self.bnorm, torch.nn.BatchNorm2d): 102 | # Pruning the batch norm: 103 | bnorm_weight = self.bnorm.weight[out_selected] 104 | bnorm_bias = self.bnorm.bias[out_selected] 105 | bnorm_running_var = self.bnorm.running_var[out_selected] 106 | bnorm_running_mean = self.bnorm.running_mean[out_selected] 107 | pruned_bnorm = torch.nn.BatchNorm2d(num_features=num_out_elements) 108 | pruned_bnorm.weight = torch.nn.Parameter(deepcopy(bnorm_weight)) 109 | pruned_bnorm.bias = torch.nn.Parameter(deepcopy(bnorm_bias)) 110 | pruned_bnorm.running_var = deepcopy(bnorm_running_var) 111 | pruned_bnorm.bnorm_running_mean = deepcopy(bnorm_running_mean) 112 | else: 113 | pruned_bnorm = torch.nn.Identity() 114 | 115 | # "Pruning" dropout: 116 | # drop_p = self.drop.p * (1. - amount) 117 | 118 | # Wrapping it all up: 119 | pruned_cell = ConvCell( 120 | conv_layer=pruned_conv, 121 | batch_norm=pruned_bnorm, 122 | # dropout_p=drop_p, 123 | is_first_cell=self.is_first_cell, 124 | ) 125 | return pruned_cell, in_selected 126 | 127 | @property 128 | def in_elements(self): 129 | return self.conv.in_channels 130 | 131 | @property 132 | def out_elements(self): 133 | return self.conv.out_channels 134 | 135 | @property 136 | def kernel_size(self): 137 | return self.conv.kernel_size 138 | 139 | @property 140 | def padding(self): 141 | return self.conv.padding 142 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/cross_entropy_with_probs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | class CrossEntropyWithProbs(torch.nn.Module): 6 | """Cross entropy for soft labels. PyTorch, unlike TensorFlow or Keras, requires this 7 | workaround because CrossEntropyLoss demands that labels are given in a LongTensor. 8 | 9 | This code was shamelessly copied from Snorkel: 10 | https://github.com/snorkel-team/snorkel/blob/master/snorkel/classification/loss.py 11 | """ 12 | 13 | def __init__(self, weight: "torch.Tensor" = None, reduction: str = "mean"): 14 | super().__init__() 15 | self.weight = weight 16 | self.reduction = reduction 17 | 18 | def forward(self, input: torch.Tensor, target: torch.Tensor): 19 | num_points, num_classes = input.shape 20 | 21 | cum_losses = input.new_zeros(num_points) 22 | for y in range(num_classes): 23 | target_temp = input.new_full((num_points,), y, dtype=torch.long) 24 | y_loss = F.cross_entropy(input, target_temp, reduction="none") 25 | if self.weight is not None: 26 | y_loss = y_loss * self.weight[y] 27 | cum_losses += target[:, y].float() * y_loss 28 | 29 | if self.reduction == "none": 30 | return cum_losses 31 | elif self.reduction == "mean": 32 | return cum_losses.mean() 33 | elif self.reduction == "sum": 34 | return cum_losses.sum() 35 | else: 36 | raise ValueError("Keyword 'reduction' must be one of ['none', 'mean', 'sum']") 37 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/linear_cell.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import torch 4 | from ax import SearchSpace 5 | 6 | 7 | class LinearCell(torch.nn.Module): 8 | 9 | @classmethod 10 | def NEW(cls, index, parameters, in_elements: int): 11 | 12 | 13 | out_elements = parameters[f'n_fc_{index}'] 14 | ll = torch.nn.Linear(in_features=in_elements, out_features=out_elements) 15 | return cls(linear_layer=ll) 16 | 17 | def __init__(self, linear_layer, dropout_p=.5): 18 | super(LinearCell, self).__init__() 19 | self.linear = linear_layer 20 | self.act = torch.nn.ReLU() 21 | 22 | def forward(self, x): 23 | x = self.linear(x) 24 | x = self.act(x) 25 | return x 26 | 27 | def downstream_morphism(self): 28 | identity_layer = torch.nn.Linear( 29 | in_features=self.out_elements, 30 | out_features=self.out_elements, 31 | ) 32 | 33 | with torch.no_grad(): 34 | # Initiate weights and biases with the identity function 35 | torch.nn.init.eye_(identity_layer.weight) 36 | torch.nn.init.zeros_(identity_layer.bias) 37 | 38 | # And add some noise to break the symmetry 39 | identity_layer.weight += torch.rand_like(identity_layer.weight) * 1E-5 40 | identity_layer.bias += torch.rand_like(identity_layer.bias) * 1E-5 41 | 42 | return LinearCell(linear_layer=identity_layer) 43 | 44 | @torch.no_grad() 45 | def prune(self, out_selected, amount: float): 46 | # TODO: improve commentary 47 | 48 | elements_to_prune = int(amount * self.in_elements) # implicit floor 49 | num_in_elements = self.in_elements - elements_to_prune 50 | num_out_elements = self.out_elements if out_selected is None else len(out_selected) 51 | 52 | # Upstream units with the lowest L1 norms will be pruned 53 | w_l1norm = torch.sum( 54 | input=torch.abs(self.linear.weight), 55 | dim=0, 56 | ) 57 | candidates = torch.argsort(w_l1norm)[:2 * elements_to_prune] 58 | idx_to_prune = torch.randperm(candidates.size(0))[:elements_to_prune] 59 | in_selected = torch.arange(self.in_elements) 60 | for kill in idx_to_prune: 61 | in_selected = torch.cat((in_selected[:kill], in_selected[kill + 1:])) 62 | 63 | pruned_layer = torch.nn.Linear( 64 | in_features=num_in_elements, 65 | out_features=num_out_elements, 66 | ) 67 | 68 | weight = self.linear.weight[:, in_selected] 69 | bias = self.linear.bias 70 | if out_selected is not None: 71 | weight = weight[out_selected] 72 | bias = bias[out_selected] 73 | pruned_layer.weight = torch.nn.Parameter(deepcopy(weight)) 74 | pruned_layer.bias = torch.nn.Parameter(deepcopy(bias)) 75 | 76 | 77 | # Wrapping it up: 78 | pruned_cell = LinearCell( 79 | linear_layer=pruned_layer, 80 | ) 81 | 82 | return pruned_cell, in_selected 83 | 84 | @property 85 | def in_elements(self): 86 | return self.linear.in_features 87 | 88 | @property 89 | def out_elements(self): 90 | return self.linear.out_features 91 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/model_head.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import torch 4 | 5 | 6 | class ModelHead(torch.nn.Module): 7 | 8 | @classmethod 9 | def NEW(cls, in_elements, out_elements): 10 | linear_layer = torch.nn.Linear( 11 | in_features=in_elements, 12 | out_features=out_elements, 13 | ) 14 | return ModelHead(linear_layer=linear_layer) 15 | 16 | def __init__(self, linear_layer): 17 | super(ModelHead, self).__init__() 18 | self.layer = linear_layer 19 | 20 | @property 21 | def in_elements(self): 22 | return self.layer.in_features 23 | 24 | @property 25 | def out_elements(self): 26 | return self.layer.out_features 27 | 28 | def forward(self, x): 29 | return self.layer(x) 30 | 31 | def bulkup(self): 32 | return deepcopy(self) 33 | 34 | @torch.no_grad() 35 | def slimdown(self, amount: float): 36 | 37 | elements_to_prune = int(amount * self.in_elements) 38 | num_in_elements = self.in_elements - elements_to_prune 39 | new_layer = torch.nn.Linear( 40 | in_features=num_in_elements, 41 | out_features=self.out_elements, 42 | ) 43 | 44 | # Upstream units with the lowest L1 norms will be pruned 45 | w_l1norm = torch.sum( 46 | input=torch.abs(self.layer.weight), 47 | dim=0, 48 | ) 49 | candidates = torch.argsort(w_l1norm)[:2 * elements_to_prune] 50 | idx_to_prune = torch.randperm(candidates.size(0))[:elements_to_prune] 51 | in_selected = torch.arange(self.in_elements) 52 | for kill in idx_to_prune: 53 | in_selected = torch.cat((in_selected[:kill], in_selected[kill + 1:])) 54 | 55 | weight = deepcopy(self.layer.weight.data[:, in_selected]) 56 | bias = deepcopy(self.layer.bias) 57 | new_layer.weight = torch.nn.Parameter(weight) 58 | new_layer.bias = torch.nn.Parameter(bias) 59 | 60 | narrower_head = ModelHead(linear_layer=new_layer) 61 | 62 | return narrower_head, in_selected 63 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/model_section.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import torch 4 | from ax import SearchSpace 5 | 6 | from baselines.methods.bulkandcut.model.linear_cell import LinearCell 7 | from baselines.methods.bulkandcut.model.conv_cell import ConvCell 8 | from baselines.methods.bulkandcut.model.skip_connection import SkipConnection 9 | from baselines.methods.bulkandcut import rng, device 10 | 11 | 12 | class ModelSection(torch.nn.Module): 13 | 14 | @classmethod 15 | def NEW(cls, index: int, parameters, in_elements: int, section_type: str): 16 | if section_type not in ["linear", "conv"]: 17 | raise Exception(f"Unknown section type: {section_type}") 18 | 19 | if section_type == "linear": 20 | first_cell = LinearCell.NEW(index, parameters, in_elements=in_elements) 21 | last_op = torch.nn.Identity() 22 | else: 23 | first_cell = ConvCell.NEW(index, parameters, in_elements=in_elements) 24 | last_op = torch.nn.MaxPool2d(kernel_size=2, stride=2) 25 | cells = torch.nn.ModuleList([first_cell]) 26 | return ModelSection(cells=cells, last_op=last_op) 27 | 28 | def __init__(self, 29 | cells: "torch.nn.ModuleList", 30 | last_op: "torch.nn.Module", 31 | skip_cnns: "torch.nn.ModuleList" = None 32 | ): 33 | super(ModelSection, self).__init__() 34 | self.cells = cells 35 | self.last_op = last_op 36 | self.skip_cnns = skip_cnns if skip_cnns is not None else torch.nn.ModuleList() 37 | 38 | def __len__(self): 39 | return len(self.cells) 40 | 41 | def __iter__(self): 42 | return self.cells.__iter__() 43 | 44 | @property 45 | def in_elements(self): 46 | return self.cells[0].in_elements 47 | 48 | @property 49 | def out_elements(self): 50 | return self.cells[-1].out_elements 51 | 52 | @property 53 | def skip_connections_summary(self): 54 | if len(self.skip_cnns) == 0: 55 | return "\t None\n" 56 | summary = "" 57 | for sc in self.skip_cnns: 58 | summary += "\t" + str(sc) + "\n" 59 | return summary 60 | 61 | def mark_as_first_section(self): 62 | self.cells[0].is_first_cell = True 63 | 64 | def forward(self, x): 65 | n_cells = len(self.cells) 66 | x = self.cells[0](x) 67 | x_buffer = self._build_forward_buffer(buffer_shape=x.shape) 68 | 69 | for i in range(1, len(self.cells)): 70 | if i in x_buffer: 71 | x += x_buffer[i] 72 | for sk in self.skip_cnns: 73 | if sk.source == i: 74 | x_buffer[sk.destiny] += sk(x) 75 | x = self.cells[i](x) 76 | if n_cells + 1 in x_buffer: 77 | x += x_buffer[n_cells + 1] 78 | x = self.last_op(x) 79 | return x 80 | 81 | def _build_forward_buffer(self, buffer_shape): 82 | addresses = {skcnn.destiny for skcnn in self.skip_cnns} # a set 83 | buffer = {addr: torch.zeros(size=buffer_shape).to(device) for addr in addresses} # a dict 84 | return buffer 85 | 86 | def bulkup(self): 87 | # Adds a new cell 88 | sel_cell = rng.integers(low=0, high=len(self.cells)) 89 | identity_cell = self.cells[sel_cell].downstream_morphism() 90 | new_cell_set = deepcopy(self.cells) 91 | new_cell_set.insert(index=sel_cell + 1, module=identity_cell) 92 | 93 | # Adjust skip connection addressing 94 | new_skip_cnns = deepcopy(self.skip_cnns) 95 | for skcnn in new_skip_cnns: 96 | skcnn.adjust_addressing(inserted_cell=sel_cell + 1) 97 | 98 | # Stochastically add a skip connection 99 | if rng.random() < .7: 100 | candidates = self._skip_connection_candidates() 101 | if len(candidates) > 0: 102 | chosen = rng.choice(candidates) 103 | new_skip_connection = SkipConnection(source=chosen[0], destiny=chosen[1]) 104 | new_skip_cnns.append(new_skip_connection) 105 | 106 | deeper_section = ModelSection( 107 | cells=new_cell_set, 108 | skip_cnns=new_skip_cnns, 109 | last_op=deepcopy(self.last_op), 110 | ) 111 | return deeper_section 112 | 113 | def _skip_connection_candidates(self): 114 | n_cells = len(self.cells) 115 | if (n_cells) < 3: 116 | return [] 117 | 118 | already_connected = [(sk.source, sk.destiny) for sk in self.skip_cnns] 119 | candidates = [] 120 | for source in range(1, n_cells - 1): 121 | for destiny in range(source + 2, n_cells + 1): 122 | if (source, destiny) not in already_connected: 123 | candidates.append((source, destiny)) 124 | 125 | return candidates 126 | 127 | def slimdown(self, out_selected, amount: float): 128 | narrower_cells = torch.nn.ModuleList() 129 | for cell in self.cells[::-1]: 130 | pruned_cell, out_selected = cell.prune( 131 | out_selected=out_selected, 132 | amount=amount, 133 | ) 134 | narrower_cells.append(pruned_cell) 135 | 136 | narrower_section = ModelSection( 137 | cells=narrower_cells[::-1], 138 | skip_cnns=deepcopy(self.skip_cnns), 139 | last_op=deepcopy(self.last_op), 140 | ) 141 | return narrower_section, out_selected 142 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/model/skip_connection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SkipConnection(torch.nn.Module): 5 | 6 | def __init__(self, source: int, destiny: int): 7 | super(SkipConnection, self).__init__() 8 | initial_gain = torch.rand(1) * 1E-6 9 | self.weight = torch.nn.Parameter(data=initial_gain, requires_grad=True) 10 | 11 | self.source = source 12 | self.destiny = destiny 13 | 14 | def __str__(self): 15 | summary_str = f"from {self.source} to {self.destiny}" 16 | summary_str += f" with weight {self.weight.item():.4e}" 17 | return summary_str 18 | 19 | def forward(self, x): 20 | x = self.weight * x 21 | return x 22 | 23 | def adjust_addressing(self, inserted_cell: int): 24 | if self.source > inserted_cell: 25 | self.source += 1 26 | if self.destiny > inserted_cell: 27 | self.destiny += 1 28 | -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/plot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/bulkandcut/plot/__init__.py -------------------------------------------------------------------------------- /baselines/methods/bulkandcut/plot/learning_curve.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | 4 | fig_h = 6.2 # 6.2 inches - the default Libre-office slide height 5 | fig_w = fig_h * 16. / 9. / 2. # half a widescreen (16:9) 6 | 7 | 8 | def plot_learning_curves(ind_id: int, 9 | n_pars: int, 10 | curves: dict, 11 | model_path: str, 12 | parent_loss: float = None, 13 | parent_accuracy: float = None): 14 | 15 | plt.style.use("ggplot") 16 | fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(fig_w, fig_h)) 17 | fig.suptitle(f"Model {ind_id} \n {n_pars:,d} parameters") 18 | 19 | epochs = list(range(len(curves["validation_loss"]))) 20 | 21 | color1 = "tab:red" 22 | ax1.yaxis.tick_right() 23 | ax1.yaxis.set_label_position("right") 24 | ax1.set_ylabel("loss") 25 | ax1.plot(epochs, curves["validation_loss"], label="valid", color=color1) 26 | ax1.plot(epochs[1:], curves["train_loss"], label="train", color="tab:orange") 27 | 28 | if parent_loss is not None: 29 | ax1.axhline(parent_loss, color=color1, linestyle="--") 30 | ax1.legend() 31 | 32 | color2 = "tab:blue" 33 | ax2.yaxis.tick_right() 34 | ax2.yaxis.set_label_position("right") 35 | ax2.set_ylabel("accuracy (%)") 36 | ax2.set_xlabel("epoch") 37 | ax2.plot(epochs[1:], curves["validation_accuracy"], label="valid", color=color2) 38 | ax2.plot(epochs[1:], curves["train_accuracy"], label="train", color="b") 39 | if parent_accuracy is not None: 40 | ax2.axhline(parent_accuracy, color=color2, linestyle="--") 41 | ax2.legend() 42 | 43 | fig_path = os.path.join( 44 | os.path.dirname(model_path), 45 | "..", 46 | os.path.basename(model_path).split(".")[0] + "_learning_curves.png", 47 | ) 48 | fig.tight_layout() 49 | plt.savefig(fig_path) 50 | plt.close(fig) 51 | -------------------------------------------------------------------------------- /baselines/methods/mobananas/__init__.py: -------------------------------------------------------------------------------- 1 | from .mobananas import get_MOBANANAS 2 | from .moshbananas import get_MOSHBANANAS -------------------------------------------------------------------------------- /baselines/methods/mobananas/member.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import uuid 3 | from copy import deepcopy 4 | import numpy as np 5 | import pandas as pd 6 | from typing import Dict, Optional 7 | from ax import Experiment, Data, GeneratorRun, Arm 8 | from scipy.stats import truncnorm 9 | 10 | 11 | class Mutation(enum.IntEnum): 12 | NONE = -1 # Can be used when only recombination is required 13 | UNIFORM = 0 # Uniform mutation 14 | GAUSSIAN = 1 # Gaussian mutation 15 | 16 | 17 | class Member: 18 | """ 19 | Class to handle member. 20 | """ 21 | 22 | 23 | def __init__(self, search_space, 24 | mutation: Mutation, 25 | budget = 25, 26 | experiment: Experiment = None, 27 | x_coordinate: Optional[Dict] = None 28 | ) -> None: 29 | """ 30 | Init 31 | :param search_space: search_space of the given problem 32 | :param x_coordinate: Initial coordinate of the member 33 | :param target_function: The target function that determines the fitness value 34 | :param mutation: hyperparameter that determines which mutation type use 35 | :budget number of epochs 36 | :param experiment: axi experiment to run 37 | """ 38 | self._space = search_space 39 | self._budget = budget 40 | self._id = uuid.uuid4() 41 | self._x = search_space.sample_configuration().get_dictionary() if not x_coordinate else x_coordinate 42 | self._age = 0 43 | self._mutation = mutation 44 | self._x_changed = True 45 | self._fit = None 46 | self._experiment = experiment 47 | self._num_evals = 0 48 | 49 | 50 | @property 51 | def fitness(self): 52 | if self._x_changed: # Only if budget or architecture has changed we need to evaluate the fitness. 53 | self._x_changed = False 54 | 55 | 56 | params = deepcopy(self._x) 57 | params['budget'] = int(self._budget) 58 | 59 | params['n_conv_0'] = params['n_conv_0'] if 'n_conv_0' in params else 16 60 | params['n_conv_1'] = params['n_conv_1'] if 'n_conv_1' in params else 16 61 | params['n_conv_2'] = params['n_conv_2'] if 'n_conv_2' in params else 16 62 | 63 | params['n_fc_0'] = params['n_fc_0'] if 'n_fc_0' in params else 16 64 | params['n_fc_1'] = params['n_fc_1'] if 'n_fc_1' in params else 16 65 | params['n_fc_2'] = params['n_fc_2'] if 'n_fc_2' in params else 16 66 | 67 | params['batch_norm'] = bool(params['batch_norm']) 68 | params['global_avg_pooling'] = bool(params['global_avg_pooling']) 69 | 70 | trial_name = '{}-{}'.format(self._id, self._num_evals) 71 | params['id'] = trial_name 72 | 73 | 74 | trial = self._experiment.new_trial(GeneratorRun([Arm(params, name=trial_name)])) 75 | data = self._experiment.eval_trial(trial) 76 | self._num_evals += 1 77 | 78 | acc = float(data.df[data.df['metric_name'] == 'val_acc_1']['mean']) 79 | len = float(data.df[data.df['metric_name'] == 'num_params']['mean']) 80 | 81 | self._fit =[acc, len] 82 | 83 | return self._fit # evaluate or return save variable 84 | 85 | @property 86 | def x_coordinate(self): 87 | return self._x 88 | 89 | @x_coordinate.setter 90 | def x_coordinate(self, value): 91 | self._x_changed = True 92 | self._x = value 93 | 94 | @property 95 | def budget(self): 96 | return self._budget 97 | 98 | @budget.setter 99 | def budget(self, value): 100 | self._x_changed = True 101 | self._budget = value 102 | 103 | @property 104 | def id(self): 105 | return self._id 106 | 107 | 108 | def get_truncated_normal(self,mean=0, sd=1, low=0, upp=10): 109 | return truncnorm( 110 | (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd) 111 | 112 | def return_train_data(self): 113 | 114 | params = deepcopy(self.x_coordinate) 115 | hyperparameter_dict = self._space.get_hyperparameters_dict() 116 | 117 | params['n_conv_0'] = params['n_conv_0'] if 'n_conv_0' in params else 0 118 | params['n_conv_1'] = params['n_conv_1'] if 'n_conv_1' in params else 0 119 | params['n_conv_2'] = params['n_conv_2'] if 'n_conv_2' in params else 0 120 | 121 | params['n_fc_0'] = params['n_fc_0'] if 'n_fc_0' in params else 0 122 | params['n_fc_1'] = params['n_fc_1'] if 'n_fc_1' in params else 0 123 | params['n_fc_2'] = params['n_fc_2'] if 'n_fc_2' in params else 0 124 | 125 | train_data = [] 126 | for key in params.keys(): 127 | 128 | if params[key] == True: 129 | param = 1 130 | elif params[key] == False: 131 | param = 0 132 | else: 133 | 134 | try: 135 | param = params[key] / hyperparameter_dict[key].upper 136 | except: 137 | param = params[key]/ (np.sort(hyperparameter_dict[key].choices)[-1]) 138 | 139 | train_data.append(param) 140 | 141 | return train_data 142 | 143 | def mutate(self): 144 | """ 145 | Mutation to create a new offspring 146 | :return: new member who is based on this member 147 | """ 148 | new_x = self.x_coordinate.copy() 149 | hyperparameter_dict = self._space.get_hyperparameters_dict() 150 | 151 | if self._mutation == Mutation.GAUSSIAN: 152 | keys = np.random.choice(list(new_x.keys()), 3, replace=False) 153 | for k in keys: 154 | 155 | if self._space.is_mutable_hyperparameter(str(k)): 156 | 157 | try: 158 | 159 | mean = new_x[k] 160 | upper = hyperparameter_dict[k].upper 161 | lower = hyperparameter_dict[k].lower 162 | sd = (upper - lower) / 3 163 | X = self.get_truncated_normal(mean=mean, sd=sd, low=lower, upp=upper) 164 | 165 | 166 | if str(k) == "lr_init": 167 | new_x[k] = X.rvs() 168 | else: 169 | new_x[k] = int(X.rvs()) 170 | 171 | except: 172 | 173 | new_x[k] = self._space.sample_hyperparameter(str(k)) 174 | 175 | 176 | elif self._mutation != Mutation.NONE: 177 | # We won't consider any other mutation types 178 | raise NotImplementedError 179 | 180 | child = Member(self._space, self._mutation,self.budget, 181 | self._experiment, new_x) 182 | 183 | self._age += 1 184 | return child 185 | 186 | 187 | -------------------------------------------------------------------------------- /baselines/methods/mobananas/mobananas.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import sys 5 | from .member import Member 6 | from .neural_predictor import Neural_Predictor 7 | import numpy as np 8 | from .member import Member 9 | from .member import Mutation 10 | from baselines import nDS_index, crowdingDist 11 | 12 | 13 | 14 | class BANANAS: 15 | """ 16 | Class to group ensamble of NN 17 | """ 18 | 19 | def __init__(self,neural_predictor, target_function, experiment, search_space, 20 | initial_samples, num_arch, budget, select_models, function_evaluations, mutation_type = Mutation.GAUSSIAN): 21 | 22 | self.num_arch = num_arch 23 | self.num_function_evaluations = function_evaluations 24 | self.target_function = target_function 25 | self.search_space = search_space 26 | self.experiment = experiment 27 | self.initial_samples = initial_samples 28 | self.neural_predictor = neural_predictor 29 | self.budget = budget 30 | np.random.seed(0) 31 | self.select = select_models 32 | 33 | 34 | self.architecture_list = [Member(self.search_space, self.target_function, mutation_type, self.budget, 35 | experiment=self.experiment) for _ in range(self.initial_samples)] 36 | 37 | [Member.fitness for Member in self.architecture_list] 38 | 39 | 40 | self.iterations = (self.num_function_evaluations - self.initial_samples)//self.select 41 | 42 | def steps(self): 43 | 44 | it = 0 45 | 46 | while it < self.iterations: 47 | 48 | it = it + 1 49 | train_data = [member.return_train_data() for member in self.architecture_list] 50 | y_train_data = [member.fitness for member in self.architecture_list] 51 | train_data = [[train_data[i], [-y_train_data[i][0]/10, y_train_data[i][1]]] for i in range(len(train_data))] 52 | self.neural_predictor.train_models(train_data) 53 | 54 | # choose best configs 55 | best_configs = self._select_best_architectures_mo(self.num_arch) 56 | mutated_configs = [member.mutate() for member in best_configs] 57 | test_data = [member.return_train_data() for member in mutated_configs] 58 | chosen_models = self.neural_predictor.choose_models(mutated_configs,test_data, self.select) 59 | [member.fitness for member in chosen_models] 60 | 61 | 62 | 63 | self.architecture_list.extend(chosen_models) 64 | 65 | return 66 | 67 | def sort_pop(self,list1, list2): 68 | 69 | z = [list1[int(m)] for m in list2] 70 | 71 | return z 72 | 73 | def _select_best_architectures_mo(self, num_arch): 74 | 75 | index_list = np.array(list(range(len(self.architecture_list)))) 76 | fitness = [ member.fitness for member in self.architecture_list] 77 | a, index_return_list = nDS_index(np.array(fitness), index_list) 78 | b, sort_index = crowdingDist(a, index_return_list) 79 | 80 | sorted = [] 81 | 82 | for x in sort_index: 83 | sorted.extend(x) 84 | 85 | self.architecture_list = self.sort_pop(self.architecture_list, sorted) 86 | 87 | 88 | return self.architecture_list[0:num_arch] 89 | 90 | 91 | 92 | 93 | def get_MOBANANAS(experiment, search_space, evaluate_network, budget = 25, initial_samples = 20, 94 | num_arch = 8, select_models = 4,function_evaluations = 100): 95 | 96 | # save models and dict so it can be picked up later on 97 | 98 | 99 | neural_predictor = Neural_Predictor(num_epochs = 80, num_ensamble_nets = 5) 100 | 101 | banana = BANANAS(neural_predictor, evaluate_network, experiment, search_space, initial_samples, num_arch, budget, select_models, function_evaluations) 102 | banana.steps() 103 | 104 | return 105 | 106 | 107 | -------------------------------------------------------------------------------- /baselines/methods/mobananas/moshbananas.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import sys 5 | from .member import Member 6 | from .neural_predictor import Neural_Predictor 7 | import numpy as np 8 | from .member import Member 9 | from .member import Mutation 10 | from baselines import nDS_index, crowdingDist 11 | import math 12 | 13 | 14 | class BANANAS: 15 | """ 16 | Class to group ensamble of NN 17 | """ 18 | 19 | def __init__(self,neural_predictor, experiment, search_space, 20 | initial_samples, num_arch, max_budget, min_budget, eta, select_models, function_evaluations, mutation_type = Mutation.GAUSSIAN): 21 | 22 | self.num_arch = num_arch 23 | self.num_function_evaluations = function_evaluations 24 | self.search_space = search_space 25 | self.experiment = experiment 26 | self.initial_samples = initial_samples 27 | self.neural_predictor = neural_predictor 28 | self.max_budget = max_budget 29 | self.min_budget = min_budget 30 | np.random.seed(0) 31 | self.select = select_models 32 | self.eta = eta 33 | 34 | self.architecture_list = [Member(self.search_space, mutation_type, self.max_budget, 35 | experiment=self.experiment) for _ in range(self.initial_samples)] 36 | 37 | [Member.fitness for Member in self.architecture_list] 38 | 39 | 40 | self.iterations = (self.num_function_evaluations - self.initial_samples)// (self.select//self.eta) 41 | 42 | def steps(self): 43 | 44 | it = 0 45 | 46 | while it < self.iterations: 47 | 48 | it = it + 1 49 | train_data = [member.return_train_data() for member in self.architecture_list] 50 | y_train_data = [member.fitness for member in self.architecture_list] 51 | train_data = [[train_data[i], [-y_train_data[i][0]/10, y_train_data[i][1]]] for i in range(len(train_data))] 52 | self.neural_predictor.train_models(train_data) 53 | 54 | # choose best configs 55 | best_configs = self._select_best_architectures_mo(self.num_arch) 56 | mutated_configs = [member.mutate() for member in best_configs] 57 | test_data = [member.return_train_data() for member in mutated_configs] 58 | chosen_models = self.neural_predictor.choose_models(mutated_configs,test_data, self.select) 59 | chosen_models = self.successive_halving(chosen_models, self.min_budget, self.max_budget, eta=3) 60 | 61 | self.architecture_list.extend(chosen_models) 62 | 63 | return 64 | 65 | 66 | def get_budgets(self, min_budget, max_budget,eta = 3): 67 | 68 | budgets = [] 69 | b = max_budget 70 | while b > min_budget: 71 | budgets.append(b) 72 | b = math.ceil(b / eta) 73 | 74 | return budgets 75 | 76 | 77 | def successive_halving(self, members, min_budget, max_budget, eta = 3): 78 | 79 | budgets = self.get_budgets(min_budget, max_budget, eta) 80 | print(budgets) 81 | print(members) 82 | for b in budgets[::-1]: 83 | members[0].budget = b 84 | for member in members: 85 | member.budget = b 86 | 87 | fit = [member.fitness for member in members] 88 | members = self.sort_architectures(members) 89 | members = members[0:len(members)//eta] 90 | 91 | return members 92 | 93 | def sort_architectures(self, architectures): 94 | 95 | index_list = np.array(list(range(len(architectures)))) 96 | fitness = [member.fitness for member in architectures] 97 | a, index_return_list = nDS_index(np.array(fitness), index_list) 98 | b, sort_index = crowdingDist(a, index_return_list) 99 | 100 | sorted = [] 101 | 102 | for x in sort_index: 103 | sorted.extend(x) 104 | 105 | architectures = self.sort_pop(architectures, sorted) 106 | 107 | return architectures 108 | 109 | 110 | def sort_pop(self,list1, list2): 111 | 112 | z = [list1[int(m)] for m in list2] 113 | 114 | return z 115 | 116 | def _select_best_architectures_mo(self, num_arch): 117 | 118 | index_list = np.array(list(range(len(self.architecture_list)))) 119 | fitness = [ member.fitness for member in self.architecture_list] 120 | a, index_return_list = nDS_index(np.array(fitness), index_list) 121 | b, sort_index = crowdingDist(a, index_return_list) 122 | 123 | sorted = [] 124 | 125 | for x in sort_index: 126 | sorted.extend(x) 127 | 128 | self.architecture_list = self.sort_pop(self.architecture_list, sorted) 129 | 130 | 131 | return self.architecture_list[0:num_arch] 132 | 133 | 134 | 135 | 136 | def get_MOSHBANANAS(experiment, search_space, 137 | initial_samples=20, select_models = 10, num_arch = 20, 138 | min_budget = 5, max_budget = 25, function_evaluations = 100,eta = 3): 139 | 140 | # save models and dict so it can be picked up later on 141 | 142 | neural_predictor = Neural_Predictor(num_epochs = 80, num_ensamble_nets = 5) 143 | banana = BANANAS(neural_predictor, experiment, search_space, initial_samples, num_arch, max_budget,min_budget, eta, select_models, function_evaluations) 144 | banana.steps() 145 | 146 | return 147 | 148 | 149 | -------------------------------------------------------------------------------- /baselines/methods/mobananas/neural_predictor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.model_selection import StratifiedKFold 3 | from torch.utils.data import DataLoader, Subset 4 | from sklearn.model_selection import StratifiedKFold # We use 3-fold stratified cross-validation 5 | import torch 6 | import torchvision.transforms as transforms 7 | from torch.utils.data import DataLoader, Subset 8 | from baselines import nDS_index, crowdingDist 9 | import torch.nn.functional as F 10 | 11 | 12 | def sort_array(fit): 13 | index_list = np.array(list(range(len(fit)))) 14 | 15 | a, index_return_list = nDS_index(np.array(fit), index_list) 16 | b, sort_index = crowdingDist(a, index_return_list) 17 | 18 | sorted_ = [] 19 | for i, x in enumerate(sort_index): 20 | sorted_.extend(x) 21 | 22 | sorted_ = [sorted_.index(i) for i in range((len(fit)))] 23 | 24 | return sorted_ 25 | 26 | 27 | 28 | class Net(torch.nn.Module): 29 | 30 | def __init__(self): 31 | super(Net, self).__init__() 32 | 33 | self.fc2 = torch.nn.Linear(13, 10) 34 | torch.nn.init.normal_(self.fc2.weight) 35 | torch.nn.init.normal_(self.fc2.bias) 36 | 37 | self.fc3 = torch.nn.Linear(10, 2) 38 | torch.nn.init.normal_(self.fc3.weight) 39 | torch.nn.init.normal_(self.fc3.bias) 40 | 41 | def forward(self, x): 42 | 43 | x = self.fc2(x) 44 | x = F.relu(x) 45 | x = self.fc3(x) 46 | 47 | return x 48 | 49 | 50 | 51 | def train_fn(self, train_data, num_epochs): 52 | """ 53 | Training method 54 | :param optimizer: optimization algorithm 55 | """ 56 | self.train() 57 | batch_size = 32 58 | 59 | train_loader = DataLoader(dataset=train_data, 60 | batch_size=batch_size, 61 | shuffle=True) 62 | 63 | loss_criterion = torch.nn.MSELoss() 64 | optimizer = torch.optim.Adam(self.parameters(), lr=0.1) 65 | 66 | for epoch in range(num_epochs): # loop over the dataset multiple times 67 | 68 | running_loss = 0.0 69 | i = 0 70 | for data in train_loader: 71 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 72 | 73 | inputs = torch.stack(data[0]).to(device) 74 | inputs = torch.transpose(inputs, 0, 1) 75 | inputs.type(torch.FloatTensor) 76 | 77 | # get the inputs; data is a list of [inputs, labels] 78 | y_value = torch.stack(data[1]) 79 | y_value = torch.transpose(y_value, 0, 1) 80 | y_value.type(torch.FloatTensor) 81 | 82 | # zero the parameter gradients 83 | optimizer.zero_grad() 84 | # forward + backward + optimize 85 | self.double() 86 | outputs = self(inputs) 87 | loss = loss_criterion(outputs, y_value) 88 | loss.backward() 89 | optimizer.step() 90 | 91 | # print statistics 92 | running_loss += loss.item() 93 | 94 | if (epoch + 1) % 20 == 0: 95 | print('[%d] loss: %.2f' % 96 | (epoch + 1, running_loss)) 97 | running_loss = 0.0 98 | 99 | return 100 | 101 | 102 | 103 | def predict(self, x): 104 | 105 | self.eval() 106 | self.double() 107 | 108 | x = [float(m) for m in x] 109 | 110 | train_loader = DataLoader(dataset=[x], 111 | shuffle=True) 112 | 113 | for d in train_loader: 114 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 115 | inputs = torch.stack(d).to(device) 116 | inputs = torch.transpose(inputs, 0, 1) 117 | 118 | output = self(inputs) 119 | 120 | 121 | return output 122 | 123 | 124 | 125 | class Neural_Predictor: 126 | """ 127 | Class to group ensamble of NN 128 | """ 129 | 130 | def __init__(self, num_epochs, num_ensamble_nets): 131 | 132 | self.num_epochs = num_epochs 133 | self.num_ensamble_nets = num_ensamble_nets 134 | self.networks = [Net() for i in range(self.num_ensamble_nets)] 135 | self.all_architecture = [] 136 | 137 | def train_models(self, x): 138 | 139 | 140 | for model in self.networks: 141 | model.train_fn(x, self.num_epochs) 142 | 143 | 144 | 145 | def ensamble_predict(self, x): 146 | 147 | predictions = [model.predict(x).tolist()[0] for model in self.networks] 148 | predictions = [ [- pred[0]*10, pred[1]] for pred in predictions] 149 | 150 | 151 | mean1 = np.mean([pred[0] for pred in predictions]) 152 | mean2 = np.mean([pred[1] for pred in predictions]) 153 | return [mean1, mean2], predictions 154 | 155 | 156 | 157 | def independent_thompson_sampling_for_mo(self, x, arches_in, num_models): 158 | 159 | arches = arches_in.copy() 160 | mean_list = [] 161 | prediction_list = [[] for _ in range(num_models)] 162 | 163 | for arch in range(len(arches)): 164 | 165 | mean, predictions = self.ensamble_predict(x[arch]) 166 | mean_list.append(mean) 167 | 168 | for i in range(num_models): 169 | prediction_list[i].extend([predictions[i]]) 170 | 171 | sorted_mean = sort_array(mean_list) 172 | 173 | fit = [] 174 | for i in range(num_models): 175 | fit.append(sort_array(prediction_list[i])) 176 | 177 | 178 | prob_ = [] 179 | for i in range(len(arches_in)): 180 | prob1 = self.independent_thompson_sampling(sorted_mean[i], [f[i] for f in fit]) 181 | prob_.append(prob1) 182 | 183 | return prob_ 184 | 185 | def sort_pop(self,list1, list2): 186 | 187 | z = [] 188 | for m in list2: 189 | z.append(list1[int(m)]) 190 | 191 | return z 192 | 193 | def independent_thompson_sampling(self, mean, predictions_fixed): 194 | 195 | M = self.num_ensamble_nets 196 | squared_differences = np.sum([np.square(np.abs(predictions_fixed[i]) - mean) for i in range(len(predictions_fixed))]) 197 | var = np.sqrt( (squared_differences) / (M - 1)) 198 | prob = np.random.normal(mean, var) 199 | 200 | return prob 201 | 202 | def choose_models(self, architectures, test_data, select_models): 203 | 204 | architectures = architectures.copy() 205 | 206 | arch_lists = [] 207 | probs = self.independent_thompson_sampling_for_mo(test_data, architectures, self.num_ensamble_nets) 208 | 209 | 210 | for _ in range(select_models): 211 | max_index = probs.index(min(probs)) 212 | arch_lists.append(architectures[max_index]) 213 | probs.pop(max_index) 214 | architectures.pop(max_index) 215 | 216 | return arch_lists 217 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_mobohb import get_MOBOHB -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/mobohb/hpbandster/core/__init__.py -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/core/base_config_generator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | class base_config_generator(object): 5 | """ 6 | The config generator determines how new configurations are sampled. This can take very different levels of 7 | complexity, from random sampling to the construction of complex empirical prediction models for promising 8 | configurations. 9 | """ 10 | def __init__(self, logger=None): 11 | """ 12 | Parameters 13 | ---------- 14 | 15 | directory: string 16 | where the results are logged 17 | logger: hpbandster.utils.result_logger_v?? 18 | the logger to store the data, defaults to v1 19 | overwrite: bool 20 | whether or not existing data will be overwritten 21 | logger: logging.logger 22 | for some debug output 23 | 24 | """ 25 | 26 | if logger is None: 27 | self.logger=logging.getLogger('hpbandster') 28 | else: 29 | self.logger=logger 30 | 31 | def get_config(self, budget): 32 | """ 33 | function to sample a new configuration 34 | 35 | This function is called inside Hyperband to query a new configuration 36 | 37 | Parameters 38 | ---------- 39 | budget: float 40 | the budget for which this configuration is scheduled 41 | 42 | returns: (config, info_dict) 43 | must return a valid configuration and a (possibly empty) info dict 44 | """ 45 | raise NotImplementedError('This function needs to be overwritten in %s.'%(self.__class__.__name__)) 46 | 47 | def new_result(self, job, update_model=True): 48 | """ 49 | registers finished runs 50 | 51 | Every time a run has finished, this function should be called 52 | to register it with the result logger. If overwritten, make 53 | sure to call this method from the base class to ensure proper 54 | logging. 55 | 56 | 57 | Parameters 58 | ---------- 59 | job: instance of hpbandster.distributed.dispatcher.Job 60 | contains all necessary information about the job 61 | update_model: boolean 62 | determines whether a model inside the config_generator should be updated 63 | """ 64 | if not job.exception is None: 65 | self.logger.warning("job {} failed with exception\n{}".format(job.id, job.exception)) 66 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/core/nameserver.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import json 4 | import threading 5 | 6 | import Pyro4.naming 7 | 8 | 9 | def nic_name_to_host(nic_name): 10 | """ helper function to translate the name of a network card into a valid host name""" 11 | from netifaces import ifaddresses, AF_INET 12 | host = ifaddresses(nic_name).setdefault(AF_INET, [{'addr': 'No IP addr'}] )[0]['addr'] 13 | return(host) 14 | 15 | 16 | class NameServer(object): 17 | """ 18 | The nameserver serves as a phonebook-like lookup table for your workers. Unique names are created so the workers 19 | can work in parallel and register their results without creating racing conditions. The implementation uses 20 | `PYRO4 `_ as a backend and this class is basically a wrapper. 21 | """ 22 | def __init__(self, run_id, working_directory=None, host=None, port=0, nic_name=None): 23 | """ 24 | Parameters 25 | ---------- 26 | run_id: str 27 | unique run_id associated with the HPB run 28 | working_directory: str 29 | path to the working directory of the HPB run to store the nameservers credentials. 30 | If None, no config file will be written. 31 | host: str 32 | the hostname to use for the nameserver 33 | port: int 34 | the port to be used. Default (=0) means a random port 35 | nic_name: str 36 | name of the network interface to use (only used if host is not given) 37 | """ 38 | self.run_id = run_id 39 | self.host = host 40 | self.nic_name = nic_name 41 | self.port = port 42 | self.dir = working_directory 43 | self.conf_fn = None 44 | self.pyro_ns = None 45 | 46 | 47 | 48 | def start(self): 49 | """ 50 | starts a Pyro4 nameserver in a separate thread 51 | 52 | Returns 53 | ------- 54 | tuple (str, int): 55 | the host name and the used port 56 | """ 57 | 58 | if self.host is None: 59 | if self.nic_name is None: 60 | self.host = 'localhost' 61 | else: 62 | self.host = nic_name_to_host(self.nic_name) 63 | 64 | uri, self.pyro_ns, _ = Pyro4.naming.startNS(host=self.host, port=self.port) 65 | 66 | self.host, self.port = self.pyro_ns.locationStr.split(':') 67 | self.port = int(self.port) 68 | 69 | thread = threading.Thread(target=self.pyro_ns.requestLoop, name='Pyro4 nameserver started by HpBandSter') 70 | thread.start() 71 | 72 | if not self.dir is None: 73 | os.makedirs(self.dir, exist_ok=True) 74 | self.conf_fn = os.path.join(self.dir, 'HPB_run_%s_pyro.pkl'%self.run_id) 75 | 76 | with open(self.conf_fn, 'wb') as fh: 77 | pickle.dump((self.host, self.port), fh) 78 | 79 | return(self.host, self.port) 80 | 81 | 82 | def shutdown(self): 83 | """ 84 | clean shutdown of the nameserver and the config file (if written) 85 | """ 86 | if not self.pyro_ns is None: 87 | self.pyro_ns.shutdown() 88 | self.pyro_ns = None 89 | 90 | if not self.conf_fn is None: 91 | os.remove(self.conf_fn) 92 | self.conf_fn = None 93 | 94 | 95 | def __del__(self): 96 | self.shutdown() 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/README.txt: -------------------------------------------------------------------------------- 1 | Examples - How to use HpBandSter 2 | ================================ 3 | 4 | This is a collection of examples of how to use the HpBandster Module. 5 | 6 | 7 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/commons.py: -------------------------------------------------------------------------------- 1 | """ 2 | Worker for Examples 1-4 3 | ======================= 4 | 5 | This class implements a very simple worker used in the firt examples. 6 | """ 7 | 8 | import numpy 9 | import time 10 | 11 | import ConfigSpace as CS 12 | from hpbandster.core.worker import Worker 13 | 14 | 15 | class MyWorker(Worker): 16 | 17 | def __init__(self, *args, sleep_interval=0, **kwargs): 18 | super().__init__(*args, **kwargs) 19 | 20 | self.sleep_interval = sleep_interval 21 | 22 | def compute(self, config, budget, **kwargs): 23 | """ 24 | Simple example for a compute function 25 | The loss is just a the config + some noise (that decreases with the budget) 26 | 27 | For dramatization, the function can sleep for a given interval to emphasizes 28 | the speed ups achievable with parallel workers. 29 | 30 | Args: 31 | config: dictionary containing the sampled configurations by the optimizer 32 | budget: (float) amount of time/epochs/etc. the model can use to train 33 | 34 | Returns: 35 | dictionary with mandatory fields: 36 | 'loss' (scalar) 37 | 'info' (dict) 38 | """ 39 | 40 | res = numpy.clip(config['x'] + numpy.random.randn()/budget, config['x']/2, 1.5*config['x']) 41 | time.sleep(self.sleep_interval) 42 | 43 | return({ 44 | 'loss': float(res), # this is the a mandatory field to run hyperband 45 | 'info': res # can be used for any user-defined information - also mandatory 46 | }) 47 | 48 | @staticmethod 49 | def get_configspace(): 50 | config_space = CS.ConfigurationSpace() 51 | config_space.add_hyperparameter(CS.UniformFloatHyperparameter('x', lower=0, upper=1)) 52 | return(config_space) 53 | 54 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_1_local_sequential.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 1 - Local and Sequential 3 | ================================ 4 | 5 | """ 6 | import logging 7 | logging.basicConfig(level=logging.WARNING) 8 | 9 | import argparse 10 | 11 | import hpbandster.core.nameserver as hpns 12 | import hpbandster.core.result as hpres 13 | 14 | from hpbandster.optimizers import BOHB as BOHB 15 | from hpbandster.examples.commons import MyWorker 16 | 17 | 18 | 19 | parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') 20 | parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=9) 21 | parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=243) 22 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) 23 | args=parser.parse_args() 24 | 25 | 26 | # Step 1: Start a nameserver 27 | # Every run needs a nameserver. It could be a 'static' server with a 28 | # permanent address, but here it will be started for the local machine with the default port. 29 | # The nameserver manages the concurrent running workers across all possible threads or clusternodes. 30 | # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer. 31 | NS = hpns.NameServer(run_id='example1', host='127.0.0.1', port=None) 32 | NS.start() 33 | 34 | # Step 2: Start a worker 35 | # Now we can instantiate a worker, providing the mandatory information 36 | # Besides the sleep_interval, we need to define the nameserver information and 37 | # the same run_id as above. After that, we can start the worker in the background, 38 | # where it will wait for incoming configurations to evaluate. 39 | w = MyWorker(sleep_interval = 0, nameserver='127.0.0.1',run_id='example1') 40 | w.run(background=True) 41 | 42 | # Step 3: Run an optimizer 43 | # Now we can create an optimizer object and start the run. 44 | # Here, we run BOHB, but that is not essential. 45 | # The run method will return the `Result` that contains all runs performed. 46 | bohb = BOHB( configspace = w.get_configspace(), 47 | run_id = 'example1', nameserver='127.0.0.1', 48 | min_budget=args.min_budget, max_budget=args.max_budget 49 | ) 50 | res = bohb.run(n_iterations=args.n_iterations) 51 | 52 | # Step 4: Shutdown 53 | # After the optimizer run, we must shutdown the master and the nameserver. 54 | bohb.shutdown(shutdown_workers=True) 55 | NS.shutdown() 56 | 57 | # Step 5: Analysis 58 | # Each optimizer returns a hpbandster.core.result.Result object. 59 | # It holds informations about the optimization run like the incumbent (=best) configuration. 60 | # For further details about the Result object, see its documentation. 61 | # Here we simply print out the best config and some statistics about the performed runs. 62 | id2config = res.get_id2config_mapping() 63 | incumbent = res.get_incumbent_id() 64 | 65 | print('Best found configuration:', id2config[incumbent]['config']) 66 | print('A total of %i unique configurations where sampled.' % len(id2config.keys())) 67 | print('A total of %i runs where executed.' % len(res.get_all_runs())) 68 | print('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in res.get_all_runs()])/args.max_budget)) 69 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_2_local_parallel_threads.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 2 - Local and Parallel (using threads) 3 | ============================================== 4 | 5 | This example extends example 1 to multiple parallel workers. 6 | For demonstrational purposes, the workers are started in separate threads. 7 | This is probably not the most common use case but serves as a nice example. 8 | The setup can be useful if the workers do their computations outside Python's Global Interpreter Lock, e.g. in some 3rd party extension in C/C++. 9 | In that case, all workers can truely work in parallel. 10 | 11 | """ 12 | import logging 13 | logging.basicConfig(level=logging.WARNING) 14 | 15 | import argparse 16 | 17 | import hpbandster.core.nameserver as hpns 18 | import hpbandster.core.result as hpres 19 | 20 | from hpbandster.optimizers import BOHB as BOHB 21 | from hpbandster.examples.commons import MyWorker 22 | 23 | 24 | 25 | parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') 26 | parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=9) 27 | parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=243) 28 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) 29 | parser.add_argument('--n_workers', type=int, help='Number of workers to run in parallel.', default=2) 30 | 31 | args=parser.parse_args() 32 | 33 | 34 | # Step 1: Start a nameserver (see example_1) 35 | NS = hpns.NameServer(run_id='example2', host='127.0.0.1', port=None) 36 | NS.start() 37 | 38 | # Step 2: Start the workers 39 | # Now we can instantiate the specified number of workers. To emphasize the effect, 40 | # we introduce a sleep_interval of one second, which makes every function evaluation 41 | # take a bit of time. Note the additional id argument that helps separating the 42 | # individual workers. This is necessary because every worker uses its processes 43 | # ID which is the same for all threads here. 44 | workers=[] 45 | for i in range(args.n_workers): 46 | w = MyWorker(sleep_interval = 0.5, nameserver='127.0.0.1',run_id='example2', id=i) 47 | w.run(background=True) 48 | workers.append(w) 49 | 50 | # Step 3: Run an optimizer 51 | # Now we can create an optimizer object and start the run. 52 | # We add the min_n_workers argument to the run methods to make the optimizer wait 53 | # for all workers to start. This is not mandatory, and workers can be added 54 | # at any time, but if the timing of the run is essential, this can be used to 55 | # synchronize all workers right at the start. 56 | bohb = BOHB( configspace = w.get_configspace(), 57 | run_id = 'example2', 58 | min_budget=args.min_budget, max_budget=args.max_budget 59 | ) 60 | res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers) 61 | 62 | # Step 4: Shutdown 63 | # After the optimizer run, we must shutdown the master and the nameserver. 64 | bohb.shutdown(shutdown_workers=True) 65 | NS.shutdown() 66 | 67 | # Step 5: Analysis 68 | # Each optimizer returns a hpbandster.core.result.Result object. 69 | # It holds informations about the optimization run like the incumbent (=best) configuration. 70 | # For further details about the Result object, see its documentation. 71 | # Here we simply print out the best config and some statistics about the performed runs. 72 | id2config = res.get_id2config_mapping() 73 | incumbent = res.get_incumbent_id() 74 | 75 | all_runs = res.get_all_runs() 76 | 77 | print('Best found configuration:', id2config[incumbent]['config']) 78 | print('A total of %i unique configurations where sampled.' % len(id2config.keys())) 79 | print('A total of %i runs where executed.' % len(res.get_all_runs())) 80 | print('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in all_runs])/args.max_budget)) 81 | print('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in all_runs])/args.max_budget)) 82 | print('The run took %.1f seconds to complete.'%(all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started'])) 83 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_3_local_parallel_processes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 3 - Local and Parallel (using processes) 3 | ================================================ 4 | 5 | Getting closer to a distributed setup, this examples shows how to connect a nameserver, an optimizer and several workers running in different processes. 6 | This would also allow true parallelism if the workers do all the computation in Python, such that the thread based paralelization of example 2 would not work. 7 | 8 | """ 9 | import logging 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | import argparse 13 | 14 | import hpbandster.core.nameserver as hpns 15 | import hpbandster.core.result as hpres 16 | 17 | from hpbandster.optimizers import BOHB as BOHB 18 | from hpbandster.examples.commons import MyWorker 19 | 20 | 21 | 22 | parser = argparse.ArgumentParser(description='Example 3 - Local and Parallel Execution.') 23 | parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=9) 24 | parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=243) 25 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) 26 | parser.add_argument('--n_workers', type=int, help='Number of workers to run in parallel.', default=2) 27 | parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') 28 | 29 | args=parser.parse_args() 30 | 31 | 32 | 33 | if args.worker: 34 | w = MyWorker(sleep_interval = 0.5, nameserver='127.0.0.1',run_id='example3') 35 | w.run(background=False) 36 | exit(0) 37 | 38 | # Start a nameserver (see example_1) 39 | NS = hpns.NameServer(run_id='example3', host='127.0.0.1', port=None) 40 | NS.start() 41 | 42 | 43 | # Run an optimizer (see example_2) 44 | bohb = BOHB( configspace = MyWorker.get_configspace(), 45 | run_id = 'example3', 46 | min_budget=args.min_budget, max_budget=args.max_budget 47 | ) 48 | res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers) 49 | 50 | # Step 4: Shutdown 51 | # After the optimizer run, we must shutdown the master and the nameserver. 52 | bohb.shutdown(shutdown_workers=True) 53 | NS.shutdown() 54 | 55 | # Step 5: Analysis 56 | # Each optimizer returns a hpbandster.core.result.Result object. 57 | # It holds informations about the optimization run like the incumbent (=best) configuration. 58 | # For further details about the Result object, see its documentation. 59 | # Here we simply print out the best config and some statistics about the performed runs. 60 | id2config = res.get_id2config_mapping() 61 | incumbent = res.get_incumbent_id() 62 | 63 | all_runs = res.get_all_runs() 64 | 65 | print('Best found configuration:', id2config[incumbent]['config']) 66 | print('A total of %i unique configurations where sampled.' % len(id2config.keys())) 67 | print('A total of %i runs where executed.' % len(res.get_all_runs())) 68 | print('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in all_runs])/args.max_budget)) 69 | print('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in all_runs])/args.max_budget)) 70 | print('The run took %.1f seconds to complete.'%(all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started'])) 71 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_4_cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 4 - on the cluster 3 | ========================== 4 | 5 | This example shows how to run HpBandster in a cluster environment. 6 | The actual python code does differ substantially from example 3, except for a 7 | shared directory that is used to communicate the location of the nameserver to 8 | every worker, and the fact that the communication is done over the network instead 9 | of just the loop back interface. 10 | 11 | 12 | To actually run it as a batch job, usually a shell script is required. 13 | Those differer slightly from scheduler to scheduler. 14 | Here we provide an example script for the Sun Grid Engine (SGE), but adapting that to 15 | any other scheduler should be easy. 16 | The script simply specifies the logging files for output (`-o`) and error `-e`), 17 | loads a virtual environment, and then executes the master for the first array task 18 | and a worker otherwise. 19 | Array jobs execute the same source multiple times and are bundled together into one job, 20 | where each task gets a unique task ID. 21 | For SGE those IDs are positive integers and we simply say the first task is the master. 22 | 23 | 24 | .. code-block:: bash 25 | 26 | # submit via qsub -t 1-4 -q test_core.q example_4_cluster_submit_me.sh 27 | 28 | #$ -cwd 29 | #$ -o $JOB_ID-$TASK_ID.o 30 | #$ -e $JOB_ID-$TASK_ID.e 31 | 32 | # enter the virtual environment 33 | source ~sfalkner/virtualenvs/HpBandSter_tests/bin/activate 34 | 35 | 36 | if [ $SGE_TASK_ID -eq 1] 37 | then python3 example_4_cluster.py --run_id $JOB_ID --nic_name eth0 --working_dir . 38 | else 39 | python3 example_4_cluster.py --run_id $JOB_ID --nic_name eth0 --working_dir . --worker 40 | fi 41 | 42 | You can simply copy the above code into a file, say submit_me.sh, and tell SGE to run it via: 43 | 44 | .. code-block:: bash 45 | 46 | qsub -t 1-4 -q your_queue_name submit_me.sh 47 | 48 | 49 | Now to the actual python source: 50 | """ 51 | import logging 52 | logging.basicConfig(level=logging.INFO) 53 | 54 | import argparse 55 | import pickle 56 | import time 57 | 58 | import hpbandster.core.nameserver as hpns 59 | import hpbandster.core.result as hpres 60 | 61 | from hpbandster.optimizers import BOHB as BOHB 62 | from hpbandster.examples.commons import MyWorker 63 | 64 | 65 | 66 | parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') 67 | parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=9) 68 | parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=243) 69 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) 70 | parser.add_argument('--n_workers', type=int, help='Number of workers to run in parallel.', default=2) 71 | parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') 72 | parser.add_argument('--run_id', type=str, help='A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.') 73 | parser.add_argument('--nic_name',type=str, help='Which network interface to use for communication.') 74 | parser.add_argument('--shared_directory',type=str, help='A directory that is accessible for all processes, e.g. a NFS share.') 75 | 76 | 77 | args=parser.parse_args() 78 | 79 | # Every process has to lookup the hostname 80 | host = hpns.nic_name_to_host(args.nic_name) 81 | 82 | 83 | if args.worker: 84 | time.sleep(5) # short artificial delay to make sure the nameserver is already running 85 | w = MyWorker(sleep_interval = 0.5,run_id=args.run_id, host=host) 86 | w.load_nameserver_credentials(working_directory=args.shared_directory) 87 | w.run(background=False) 88 | exit(0) 89 | 90 | # Start a nameserver: 91 | # We now start the nameserver with the host name from above and a random open port (by setting the port to 0) 92 | NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) 93 | ns_host, ns_port = NS.start() 94 | 95 | # Most optimizers are so computationally inexpensive that we can affort to run a 96 | # worker in parallel to it. Note that this one has to run in the background to 97 | # not plock! 98 | w = MyWorker(sleep_interval = 0.5,run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port) 99 | w.run(background=True) 100 | 101 | # Run an optimizer 102 | # We now have to specify the host, and the nameserver information 103 | bohb = BOHB( configspace = MyWorker.get_configspace(), 104 | run_id = args.run_id, 105 | host=host, 106 | nameserver=ns_host, 107 | nameserver_port=ns_port, 108 | min_budget=args.min_budget, max_budget=args.max_budget 109 | ) 110 | res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers) 111 | 112 | 113 | # In a cluster environment, you usually want to store the results for later analysis. 114 | # One option is to simply pickle the Result object 115 | with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: 116 | pickle.dump(res, fh) 117 | 118 | 119 | # Step 4: Shutdown 120 | # After the optimizer run, we must shutdown the master and the nameserver. 121 | bohb.shutdown(shutdown_workers=True) 122 | NS.shutdown() 123 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_5_mnist.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 5 - MNIST 3 | ================= 4 | 5 | Small CNN for MNIST implementet in both Keras and PyTorch. 6 | This example also shows how to log results to disk during the optimization 7 | which is useful for long runs, because intermediate results are directly available 8 | for analysis. It also contains a more realistic search space with different types 9 | of variables to be optimized. 10 | 11 | """ 12 | import os 13 | import pickle 14 | import argparse 15 | 16 | import hpbandster.core.nameserver as hpns 17 | import hpbandster.core.result as hpres 18 | 19 | from hpbandster.optimizers import BOHB 20 | 21 | import logging 22 | logging.basicConfig(level=logging.DEBUG) 23 | 24 | 25 | 26 | parser = argparse.ArgumentParser(description='Example 5 - CNN on MNIST') 27 | parser.add_argument('--min_budget', type=float, help='Minimum number of epochs for training.', default=1) 28 | parser.add_argument('--max_budget', type=float, help='Maximum number of epochs for training.', default=9) 29 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=16) 30 | parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') 31 | parser.add_argument('--run_id', type=str, help='A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.') 32 | parser.add_argument('--nic_name',type=str, help='Which network interface to use for communication.', default='lo') 33 | parser.add_argument('--shared_directory',type=str, help='A directory that is accessible for all processes, e.g. a NFS share.', default='.') 34 | parser.add_argument('--backend',help='Toggles which worker is used. Choose between a pytorch and a keras implementation.', choices=['pytorch', 'keras'], default='keras') 35 | 36 | args=parser.parse_args() 37 | 38 | 39 | if args.backend == 'pytorch': 40 | from example_5_pytorch_worker import PyTorchWorker as worker 41 | else: 42 | from example_5_keras_worker import KerasWorker as worker 43 | 44 | 45 | # Every process has to lookup the hostname 46 | host = hpns.nic_name_to_host(args.nic_name) 47 | 48 | 49 | if args.worker: 50 | import time 51 | time.sleep(5) # short artificial delay to make sure the nameserver is already running 52 | w = worker(run_id=args.run_id, host=host, timeout=120) 53 | w.load_nameserver_credentials(working_directory=args.shared_directory) 54 | w.run(background=False) 55 | exit(0) 56 | 57 | 58 | # This example shows how to log live results. This is most useful 59 | # for really long runs, where intermediate results could already be 60 | # interesting. The core.result submodule contains the functionality to 61 | # read the two generated files (results.json and configs.json) and 62 | # create a Result object. 63 | result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=False) 64 | 65 | 66 | # Start a nameserver: 67 | NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) 68 | ns_host, ns_port = NS.start() 69 | 70 | # Start local worker 71 | w = worker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120) 72 | w.run(background=True) 73 | 74 | # Run an optimizer 75 | bohb = BOHB( configspace = worker.get_configspace(), 76 | run_id = args.run_id, 77 | host=host, 78 | nameserver=ns_host, 79 | nameserver_port=ns_port, 80 | result_logger=result_logger, 81 | min_budget=args.min_budget, max_budget=args.max_budget, 82 | ) 83 | res = bohb.run(n_iterations=args.n_iterations) 84 | 85 | # store results 86 | with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: 87 | pickle.dump(res, fh) 88 | 89 | # shutdown 90 | bohb.shutdown(shutdown_workers=True) 91 | NS.shutdown() 92 | 93 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/example_8_mnist_continued.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 8 - Warmstarting for MNIST 3 | ================================== 4 | 5 | Sometimes it is desired to continue an already finished run because the optimization 6 | requires more function evaluations. In other cases, one might wish to use results 7 | from previous runs to speed up the optimization. This might be useful if initial 8 | runs were done with relatively small budgets, or on only a subset of the data to 9 | get an initial understanding of the problem. 10 | 11 | Here we shall see how to use the results from example 5 to initialize BOHB's model. 12 | What changed are 13 | - the number of training points is increased from 8192 to 32768 14 | - the number of validation points is increased from 1024 to 16384 15 | - the mimum budget is now 3 instead of 1 because we have already quite a few runs for a small number of epochs 16 | 17 | Note that the loaded runs will show up in the results of the new run. They are all 18 | combined into an iteration with the index -1 and their time stamps are manipulated 19 | such that the last run finishes at time 0 with all other times being negative. 20 | That info can be used to filter those runs when analysing the run. 21 | 22 | """ 23 | import os 24 | import pickle 25 | import argparse 26 | 27 | import hpbandster.core.nameserver as hpns 28 | import hpbandster.core.result as hpres 29 | 30 | from hpbandster.optimizers import BOHB 31 | 32 | import logging 33 | logging.basicConfig(level=logging.DEBUG) 34 | 35 | 36 | 37 | parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') 38 | parser.add_argument('--min_budget', type=float, help='Minimum number of epochs for training.', default=3) 39 | parser.add_argument('--max_budget', type=float, help='Maximum number of epochs for training.', default=9) 40 | parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) 41 | parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') 42 | parser.add_argument('--run_id', type=str, help='A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.') 43 | parser.add_argument('--nic_name',type=str, help='Which network interface to use for communication.', default='lo') 44 | parser.add_argument('--shared_directory',type=str, help='A directory that is accessible for all processes, e.g. a NFS share.', default='.') 45 | parser.add_argument('--backend',help='Toggles which worker is used. Choose between a pytorch and a keras implementation.', choices=['pytorch', 'keras'], default='keras') 46 | parser.add_argument('--previous_run_dir',type=str, help='A directory that contains a config.json and results.json for the same configuration space.', default='./example_5_run/') 47 | 48 | args=parser.parse_args() 49 | 50 | 51 | if args.backend == 'pytorch': 52 | from example_5_pytorch_worker import PyTorchWorker as worker 53 | else: 54 | from example_5_keras_worker import KerasWorker as worker 55 | 56 | 57 | # Every process has to lookup the hostname 58 | host = hpns.nic_name_to_host(args.nic_name) 59 | 60 | 61 | if args.worker: 62 | import time 63 | time.sleep(5) # short artificial delay to make sure the nameserver is already running 64 | w = worker(run_id=args.run_id, host=host, timeout=120) 65 | w.load_nameserver_credentials(working_directory=args.shared_directory) 66 | w.run(background=False) 67 | exit(0) 68 | 69 | 70 | # This example shows how to log live results. This is most useful 71 | # for really long runs, where intermediate results could already be 72 | # interesting. The core.result submodule contains the functionality to 73 | # read the two generated files (results.json and configs.json) and 74 | # create a Result object. 75 | result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=False) 76 | 77 | 78 | # Start a nameserver: 79 | NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) 80 | ns_host, ns_port = NS.start() 81 | 82 | # Start local worker 83 | w = worker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120) 84 | w.run(background=True) 85 | 86 | 87 | # Let us load the old run now to use its results to warmstart a new run with slightly 88 | # different budgets in terms of datapoints and epochs. 89 | # Note that the search space has to be identical though! 90 | previous_run = hpres.logged_results_to_HBS_result(args.previous_run_dir) 91 | 92 | 93 | # Run an optimizer 94 | bohb = BOHB( configspace = worker.get_configspace(), 95 | run_id = args.run_id, 96 | host=host, 97 | nameserver=ns_host, 98 | nameserver_port=ns_port, 99 | result_logger=result_logger, 100 | min_budget=args.min_budget, max_budget=args.max_budget, 101 | previous_result = previous_run, # this is how you tell any optimizer about previous runs 102 | ) 103 | res = bohb.run(n_iterations=args.n_iterations) 104 | 105 | # store results 106 | with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: 107 | pickle.dump(res, fh) 108 | 109 | # shutdown 110 | bohb.shutdown(shutdown_workers=True) 111 | NS.shutdown() 112 | 113 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/plot_example_6_analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 6 - Analysis of a Run 3 | ============================== 4 | 5 | This example takes a run from example 5 and performs some analysis of it. 6 | It shows how to get the best performing configuration, and its attributes. 7 | More advanced analysis plots provide some insights into a run and the problem. 8 | 9 | """ 10 | 11 | import matplotlib.pyplot as plt 12 | import hpbandster.core.result as hpres 13 | import hpbandster.visualization as hpvis 14 | 15 | 16 | 17 | # load the example run from the log files 18 | result = hpres.logged_results_to_HBS_result('example_5_run/') 19 | 20 | # get all executed runs 21 | all_runs = result.get_all_runs() 22 | 23 | # get the 'dict' that translates config ids to the actual configurations 24 | id2conf = result.get_id2config_mapping() 25 | 26 | 27 | # Here is how you get he incumbent (best configuration) 28 | inc_id = result.get_incumbent_id() 29 | 30 | # let's grab the run on the highest budget 31 | inc_runs = result.get_runs_by_id(inc_id) 32 | inc_run = inc_runs[-1] 33 | 34 | 35 | # We have access to all information: the config, the loss observed during 36 | #optimization, and all the additional information 37 | inc_loss = inc_run.loss 38 | inc_config = id2conf[inc_id]['config'] 39 | inc_test_loss = inc_run.info['test accuracy'] 40 | 41 | print('Best found configuration:') 42 | print(inc_config) 43 | print('It achieved accuracies of %f (validation) and %f (test).'%(1-inc_loss, inc_test_loss)) 44 | 45 | 46 | # Let's plot the observed losses grouped by budget, 47 | hpvis.losses_over_time(all_runs) 48 | 49 | # the number of concurent runs, 50 | hpvis.concurrent_runs_over_time(all_runs) 51 | 52 | # and the number of finished runs. 53 | hpvis.finished_runs_over_time(all_runs) 54 | 55 | # This one visualizes the spearman rank correlation coefficients of the losses 56 | # between different budgets. 57 | hpvis.correlation_across_budgets(result) 58 | 59 | # For model based optimizers, one might wonder how much the model actually helped. 60 | # The next plot compares the performance of configs picked by the model vs. random ones 61 | hpvis.performance_histogram_model_vs_random(all_runs, id2conf) 62 | 63 | plt.show() 64 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/examples/plot_example_7_interactive_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 7 - Interactive Exploration of the Results 3 | ================================================== 4 | 5 | This example takes a run from example 5 and allows you to interactively look at 6 | all runs performed and the associated loss. 7 | The plot allows you to only include certain iterations (selected by the checkboxes on the left). 8 | By hovering over the learning curves (all runs of a single configuration on all respective budets) 9 | you see some information about the configuration and its performance. 10 | Clicking on it will make the tool tip persistent.Clicking on the curve again, will remove it again. 11 | 12 | This tool is not very mature, but maybe it can help you to explore the structure hidden in your 13 | results. Please refer to the documentation of the visualization submodule to see all 14 | options. 15 | """ 16 | 17 | import matplotlib.pyplot as plt 18 | import hpbandster.core.result as hpres 19 | import hpbandster.visualization as hpvis 20 | 21 | 22 | 23 | # load the example run from the log files 24 | result = hpres.logged_results_to_HBS_result('example_5_run/') 25 | 26 | # get all executed runs 27 | all_runs = result.get_all_runs() 28 | 29 | # get the 'dict' that translates config ids to the actual configurations 30 | id2conf = result.get_id2config_mapping() 31 | 32 | lcs = result.get_learning_curves() 33 | 34 | hpvis.interactive_HBS_plot(lcs, tool_tip_strings=hpvis.default_tool_tips(result, lcs)) 35 | 36 | 37 | 38 | 39 | def realtime_learning_curves(runs): 40 | """ 41 | example how to extract a different kind of learning curve. 42 | 43 | The x values are now the time the runs finished, not the budget anymore. 44 | We no longer plot the validation loss on the y axis, but now the test accuracy. 45 | 46 | This is just to show how to get different information into the interactive plot. 47 | 48 | """ 49 | sr = sorted(runs, key=lambda r: r.budget) 50 | lc = list(filter(lambda t: not t[1] is None, [(r.time_stamps['finished'], r.info['test accuracy']) for r in sr])) 51 | return([lc,]) 52 | 53 | lcs = result.get_learning_curves(lc_extractor=realtime_learning_curves) 54 | 55 | hpvis.interactive_HBS_plot(lcs, tool_tip_strings=hpvis.default_tool_tips(result, lcs)) 56 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from hpbandster.optimizers.randomsearch import RandomSearch 2 | from hpbandster.optimizers.hyperband import HyperBand 3 | from hpbandster.optimizers.bohb import BOHB 4 | from hpbandster.optimizers.h2bo import H2BO 5 | from hpbandster.optimizers.mobohb import MOBOHB 6 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/bohb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import copy 5 | import logging 6 | 7 | import numpy as np 8 | 9 | 10 | import ConfigSpace as CS 11 | 12 | from hpbandster.core.master import Master 13 | from hpbandster.optimizers.iterations import SuccessiveHalving 14 | from hpbandster.optimizers.config_generators.bohb import BOHB as CG_BOHB 15 | 16 | class BOHB(Master): 17 | def __init__(self, configspace = None, 18 | eta=3, min_budget=0.01, max_budget=1, 19 | min_points_in_model = None, top_n_percent=15, 20 | num_samples = 64, random_fraction=1/3, bandwidth_factor=3, 21 | min_bandwidth=1e-3, 22 | **kwargs ): 23 | """ 24 | BOHB performs robust and efficient hyperparameter optimization 25 | at scale by combining the speed of Hyperband searches with the 26 | guidance and guarantees of convergence of Bayesian 27 | Optimization. Instead of sampling new configurations at random, 28 | BOHB uses kernel density estimators to select promising candidates. 29 | 30 | .. highlight:: none 31 | 32 | For reference: :: 33 | 34 | @InProceedings{falkner-icml-18, 35 | title = {{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale}, 36 | author = {Falkner, Stefan and Klein, Aaron and Hutter, Frank}, 37 | booktitle = {Proceedings of the 35th International Conference on Machine Learning}, 38 | pages = {1436--1445}, 39 | year = {2018}, 40 | } 41 | 42 | Parameters 43 | ---------- 44 | configspace: ConfigSpace object 45 | valid representation of the search space 46 | eta : float 47 | In each iteration, a complete run of sequential halving is executed. In it, 48 | after evaluating each configuration on the same subset size, only a fraction of 49 | 1/eta of them 'advances' to the next round. 50 | Must be greater or equal to 2. 51 | min_budget : float 52 | The smallest budget to consider. Needs to be positive! 53 | max_budget : float 54 | The largest budget to consider. Needs to be larger than min_budget! 55 | The budgets will be geometrically distributed 56 | :math:`a^2 + b^2 = c^2 \sim \eta^k` for :math:`k\in [0, 1, ... , num\_subsets - 1]`. 57 | min_points_in_model: int 58 | number of observations to start building a KDE. Default 'None' means 59 | dim+1, the bare minimum. 60 | top_n_percent: int 61 | percentage ( between 1 and 99, default 15) of the observations that are considered good. 62 | num_samples: int 63 | number of samples to optimize EI (default 64) 64 | random_fraction: float 65 | fraction of purely random configurations that are sampled from the 66 | prior without the model. 67 | bandwidth_factor: float 68 | to encourage diversity, the points proposed to optimize EI, are sampled 69 | from a 'widened' KDE where the bandwidth is multiplied by this factor (default: 3) 70 | min_bandwidth: float 71 | to keep diversity, even when all (good) samples have the same value for one of the parameters, 72 | a minimum bandwidth (Default: 1e-3) is used instead of zero. 73 | iteration_kwargs: dict 74 | kwargs to be added to the instantiation of each iteration 75 | """ 76 | 77 | 78 | 79 | # TODO: Propper check for ConfigSpace object! 80 | if configspace is None: 81 | raise ValueError("You have to provide a valid CofigSpace object") 82 | 83 | 84 | 85 | cg = CG_BOHB( configspace = configspace, 86 | min_points_in_model = min_points_in_model, 87 | top_n_percent=top_n_percent, 88 | num_samples = num_samples, 89 | random_fraction=random_fraction, 90 | bandwidth_factor=bandwidth_factor, 91 | min_bandwidth = min_bandwidth 92 | ) 93 | 94 | super().__init__(config_generator=cg, **kwargs) 95 | 96 | # Hyperband related stuff 97 | self.eta = eta 98 | self.min_budget = min_budget 99 | self.max_budget = max_budget 100 | 101 | # precompute some HB stuff 102 | self.max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1 103 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter-1, 0, self.max_SH_iter)) 104 | 105 | self.config.update({ 106 | 'eta' : eta, 107 | 'min_budget' : min_budget, 108 | 'max_budget' : max_budget, 109 | 'budgets' : self.budgets, 110 | 'max_SH_iter': self.max_SH_iter, 111 | 'min_points_in_model' : min_points_in_model, 112 | 'top_n_percent' : top_n_percent, 113 | 'num_samples' : num_samples, 114 | 'random_fraction' : random_fraction, 115 | 'bandwidth_factor' : bandwidth_factor, 116 | 'min_bandwidth': min_bandwidth 117 | }) 118 | 119 | def get_next_iteration(self, iteration, iteration_kwargs={}): 120 | """ 121 | BO-HB uses (just like Hyperband) SuccessiveHalving for each iteration. 122 | See Li et al. (2016) for reference. 123 | 124 | Parameters 125 | ---------- 126 | iteration: int 127 | the index of the iteration to be instantiated 128 | 129 | Returns 130 | ------- 131 | SuccessiveHalving: the SuccessiveHalving iteration with the 132 | corresponding number of configurations 133 | """ 134 | 135 | # number of 'SH rungs' 136 | s = self.max_SH_iter - 1 - (iteration%self.max_SH_iter) 137 | # number of configurations in that bracket 138 | n0 = int(np.floor((self.max_SH_iter)/(s+1)) * self.eta**s) 139 | ns = [max(int(n0*(self.eta**(-i))), 1) for i in range(s+1)] 140 | 141 | return(SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=self.budgets[(-s-1):], config_sampler=self.config_generator.get_config, **iteration_kwargs)) 142 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/config_generators/__init__.py: -------------------------------------------------------------------------------- 1 | from hpbandster.optimizers.config_generators.random_sampling import RandomSampling 2 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/config_generators/kde.py: -------------------------------------------------------------------------------- 1 | import ConfigSpace 2 | import numpy as np 3 | import scipy.stats as sps 4 | import statsmodels.api as sm 5 | 6 | from hpbandster.config_generators.base import base_config_generator 7 | 8 | 9 | class KernelDensityEstimator(base_config_generator): 10 | 11 | def __init__(self, configspace, top_n_percent=10, update_after_n_points=50, 12 | min_points_in_model = None, 13 | *kwargs): 14 | """ 15 | Fits for each given budget a kernel density estimator on the best N percent of the 16 | evaluated configurations on this budget. 17 | 18 | 19 | Parameters: 20 | ----------- 21 | configspace: ConfigSpace 22 | Configuration space object 23 | top_n_percent: int 24 | Determines the percentile of configurations that will be used as training data 25 | for the kernel density estimator, e.g if set to 10 the 10% best configurations will be considered 26 | for training. 27 | update_after_n_points: int 28 | Specifies after how many new observed points the kernel density will be retrained. 29 | min_points_in_model: int 30 | minimum number of datapoints needed to fit a model 31 | 32 | """ 33 | super(KernelDensityEstimator, self).__init__(**kwargs) 34 | 35 | self.top_n_percent = top_n_percent 36 | self.update_after_n_points = update_after_n_points 37 | self.configspace = configspace 38 | 39 | self.min_points_in_model = min_points_in_model 40 | if min_points_in_model is None: 41 | self.min_points_in_model = len(self.configspace.get_hyperparameters())+1 42 | 43 | 44 | # TODO: so far we only consider continuous configuration spaces 45 | self.var_type = "c" * len(self.configspace.get_hyperparameters()) 46 | self.configs = dict() 47 | self.losses = dict() 48 | self.kde_models = dict() 49 | 50 | def get_config(self, budget): 51 | """ 52 | Function to sample a new configuration 53 | 54 | This function is called inside Hyperband to query a new configuration 55 | 56 | 57 | Parameters: 58 | ----------- 59 | budget: float 60 | the budget for which this configuration is scheduled 61 | 62 | returns: config 63 | should return a valid configuration 64 | 65 | """ 66 | # No observations available for this budget sample from the prior 67 | if len(self.kde_models.keys()) == 0: 68 | return self.configspace.sample_configuration().get_dictionary() 69 | # If we haven't seen anything with this budget, we sample from the kde trained on the highest budget 70 | if budget not in self.kde_models.keys(): 71 | budget = sorted(self.kde_models.keys())[-1] 72 | # TODO: This only works in continuous space and with gaussian kernels 73 | kde = self.kde_models[budget] 74 | idx = np.random.randint(0, len(self.kde_models[budget].data)) 75 | 76 | vector = [sps.truncnorm.rvs(-m/bw,(1-m)/bw, loc=m, scale=bw) for m,bw in zip(self.kde_models[budget].data[idx], kde.bw)] 77 | 78 | if np.any(np.array(vector)>1) or np.any(np.array(vector)<0): 79 | raise RuntimeError("truncated normal sampling problems!") 80 | 81 | sample = ConfigSpace.Configuration(self.configspace, vector=vector) 82 | return sample.get_dictionary(), {} 83 | 84 | def new_result(self, job): 85 | """ 86 | function to register finished runs 87 | 88 | Every time a run has finished, this function should be called 89 | to register it with the result logger. If overwritten, make 90 | sure to call this method from the base class to ensure proper 91 | logging. 92 | 93 | 94 | Parameters: 95 | ----------- 96 | job_id: dict 97 | a dictionary containing all the info about the run 98 | job_result: dict 99 | contains all the results of the job, i.e. it's a dict with 100 | the keys 'loss' and 'info' 101 | 102 | """ 103 | 104 | super(KernelDensityEstimator, self).new_result(job) 105 | budget = job.kwargs["budget"] 106 | if budget not in self.configs.keys(): 107 | self.configs[budget] = [] 108 | self.losses[budget] = [] 109 | 110 | # We want to get a numerical representation of the configuration in the original space 111 | conf = ConfigSpace.Configuration(self.configspace, job.kwargs['config']) 112 | self.configs[budget].append(conf.get_array()) 113 | self.losses[budget].append(job.result['result']["loss"]) 114 | 115 | 116 | # Check if we have enough data points to fit a KDE 117 | if len(self.configs[budget]) % self.update_after_n_points == 0: 118 | train_configs, train_losses = [], [] 119 | 120 | train_configs.extend(self.configs[budget]) 121 | train_losses.extend(self.losses[budget]) 122 | 123 | n = int(self.top_n_percent * len(train_configs) / 100.) 124 | 125 | remaining_budgets = list(self.configs.keys()) 126 | remaining_budgets.remove(budget) 127 | remaining_budgets.sort(reverse=True) 128 | 129 | 130 | for b in remaining_budgets: 131 | if n >= self.min_points_in_model: break 132 | train_configs.extend(self.configs[b]) 133 | train_losses.extend(self.losses[b]) 134 | n = int(self.top_n_percent * len(train_configs) / 100.) 135 | 136 | if len(train_losses) < self.min_points_in_model: 137 | return 138 | 139 | n = max(self.min_points_in_model, n) 140 | 141 | # Refit KDE for the current budget 142 | idx = np.argsort(train_losses) 143 | 144 | train_data = (np.array(train_configs)[idx])[:n] 145 | self.kde_models[budget] = sm.nonparametric.KDEMultivariate(data=train_data, 146 | var_type=self.var_type, 147 | bw='cv_ls') 148 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/config_generators/lcnet.py: -------------------------------------------------------------------------------- 1 | import ConfigSpace 2 | import numpy as np 3 | import threading 4 | 5 | from robo.models.lcnet import LCNet, get_lc_net 6 | 7 | from hpbandster.core.base_config_generator import base_config_generator 8 | 9 | 10 | def smoothing(lc): 11 | new_lc = [] 12 | curr_best = np.inf 13 | for i in range(len(lc)): 14 | if lc[i] < curr_best: 15 | curr_best = lc[i] 16 | new_lc.append(curr_best) 17 | return new_lc 18 | 19 | 20 | class LCNetWrapper(base_config_generator): 21 | def __init__(self, 22 | configspace, 23 | max_budget, 24 | n_points=2000, 25 | delta=1.0, 26 | n_candidates=1024, 27 | **kwargs): 28 | """ 29 | Parameters: 30 | ----------- 31 | 32 | directory: string 33 | where the results are logged 34 | logger: hpbandster.utils.result_logger_v?? 35 | the logger to store the data, defaults to v1 36 | overwrite: bool 37 | whether or not existing data will be overwritten 38 | 39 | """ 40 | 41 | super(LCNetWrapper, self).__init__(**kwargs) 42 | 43 | self.n_candidates = n_candidates 44 | self.model = LCNet(sampling_method="sghmc", 45 | l_rate=np.sqrt(1e-4), 46 | mdecay=.05, 47 | n_nets=100, 48 | burn_in=500, 49 | n_iters=3000, 50 | get_net=get_lc_net, 51 | precondition=True) 52 | 53 | self.config_space = configspace 54 | self.max_budget = max_budget 55 | self.train = None 56 | self.train_targets = None 57 | self.n_points = n_points 58 | self.is_trained = False 59 | self.counter = 0 60 | self.delta = delta 61 | self.lock = threading.Lock() 62 | 63 | def get_config(self, budget): 64 | """ 65 | function to sample a new configuration 66 | 67 | This function is called inside Hyperband to query a new configuration 68 | 69 | 70 | Parameters: 71 | ----------- 72 | budget: float 73 | the budget for which this configuration is scheduled 74 | 75 | returns: config 76 | should return a valid configuration 77 | 78 | """ 79 | self.lock.acquire() 80 | if not self.is_trained: 81 | c = self.config_space.sample_configuration().get_array() 82 | else: 83 | candidates = np.array([self.config_space.sample_configuration().get_array() 84 | for _ in range(self.n_candidates)]) 85 | 86 | # We are only interested on the asymptotic value 87 | projected_candidates = np.concatenate((candidates, np.ones([self.n_candidates, 1])), axis=1) 88 | 89 | # Compute the upper confidence bound of the function at the asymptote 90 | m, v = self.model.predict(projected_candidates) 91 | 92 | ucb_values = m + self.delta * np.sqrt(v) 93 | print(ucb_values) 94 | # Sample a configuration based on the ucb values 95 | p = np.ones(self.n_candidates) * (ucb_values / np.sum(ucb_values)) 96 | idx = np.random.choice(self.n_candidates, 1, False, p) 97 | 98 | c = candidates[idx][0] 99 | 100 | config = ConfigSpace.Configuration(self.config_space, vector=c) 101 | 102 | self.lock.release() 103 | return config.get_dictionary(), {} 104 | 105 | def new_result(self, job): 106 | """ 107 | function to register finished runs 108 | 109 | Every time a run has finished, this function should be called 110 | to register it with the result logger. If overwritten, make 111 | sure to call this method from the base class to ensure proper 112 | logging. 113 | 114 | 115 | Parameters: 116 | ----------- 117 | job_id: dict 118 | a dictionary containing all the info about the run 119 | job_result: dict 120 | contains all the results of the job, i.e. it's a dict with 121 | the keys 'loss' and 'info' 122 | 123 | """ 124 | super().new_result(job) 125 | 126 | conf = ConfigSpace.Configuration(self.config_space, job.kwargs['config']).get_array() 127 | 128 | epochs = len(job.result["info"]["learning_curve"]) 129 | budget = int(job.kwargs["budget"]) 130 | 131 | t_idx = np.linspace(budget / epochs, budget, epochs) / self.max_budget 132 | x_new = np.repeat(conf[None, :], t_idx.shape[0], axis=0) 133 | 134 | x_new = np.concatenate((x_new, t_idx[:, None]), axis=1) 135 | 136 | # Smooth learning curve 137 | lc = smoothing(job.result["info"]["learning_curve"]) 138 | 139 | # Flip learning curves since LC-Net wants increasing curves 140 | lc_new = [1 - y for y in lc] 141 | 142 | if self.train is None: 143 | self.train = x_new 144 | self.train_targets = lc_new 145 | else: 146 | self.train = np.append(self.train, x_new, axis=0) 147 | self.train_targets = np.append(self.train_targets, lc_new, axis=0) 148 | 149 | if self.counter >= self.n_points: 150 | 151 | self.lock.acquire() 152 | y_min = np.min(self.train_targets) 153 | y_max = np.max(self.train_targets) 154 | 155 | train_targets = (self.train_targets - y_min) / (y_max - y_min) 156 | 157 | self.model.train(self.train, train_targets) 158 | self.is_trained = True 159 | self.counter = 0 160 | self.lock.release() 161 | 162 | else: 163 | self.counter += epochs 164 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/config_generators/random_sampling.py: -------------------------------------------------------------------------------- 1 | from hpbandster.core.base_config_generator import base_config_generator 2 | 3 | 4 | 5 | 6 | class RandomSampling(base_config_generator): 7 | """ 8 | class to implement random sampling from a ConfigSpace 9 | """ 10 | 11 | def __init__(self, configspace, **kwargs): 12 | """ 13 | 14 | Parameters: 15 | ----------- 16 | 17 | configspace: ConfigSpace.ConfigurationSpace 18 | The configuration space to sample from. It contains the full 19 | specification of the Hyperparameters with their priors 20 | **kwargs: 21 | see hyperband.config_generators.base.base_config_generator for additional arguments 22 | """ 23 | 24 | super().__init__(**kwargs) 25 | self.configspace = configspace 26 | 27 | 28 | def get_config(self, budget): 29 | return(self.configspace.sample_configuration().get_dictionary(), {}) 30 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/h2bo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import copy 5 | import logging 6 | 7 | import numpy as np 8 | 9 | 10 | import ConfigSpace as CS 11 | 12 | from hpbandster.core.master import Master 13 | from hpbandster.optimizers.iterations import SuccessiveHalving 14 | from hpbandster.optimizers.config_generators.h2bo import H2BO as CG_H2BO 15 | 16 | class H2BO(Master): 17 | def __init__(self, 18 | configspace = None, 19 | eta=3, min_budget=0.01, max_budget=1, 20 | min_points_in_model = None, top_n_percent=15, 21 | num_samples = 32, random_fraction=1/3, bandwidth_factor=1, 22 | min_bandwidth=1e-3,bw_estimator='scott',fully_dimensional=True, 23 | **kwargs 24 | ): 25 | """ 26 | 27 | Parameters 28 | ---------- 29 | configspace: ConfigSpace object 30 | valid representation of the search space 31 | eta : float 32 | In each iteration, a complete run of sequential halving is executed. In it, 33 | after evaluating each configuration on the same subset size, only a fraction of 34 | 1/eta of them 'advances' to the next round. 35 | Must be greater or equal to 2. 36 | min_budget : float 37 | The smallest budget to consider. Needs to be positive! 38 | max_budget : float 39 | the largest budget to consider. Needs to be larger than min_budget! 40 | The budgets will be geometrically distributed $\sim \eta^k$ for 41 | $k\in [0, 1, ... , num_subsets - 1]$. 42 | min_points_in_model: int 43 | number of observations to start building a KDE. Default 'None' means 44 | dim+1, the bare minimum. 45 | top_n_percent: int 46 | percentage ( between 1 and 99, default 15) of the observations that are considered good. 47 | num_samples: int 48 | number of samples to optimize EI (default 64) 49 | random_fraction: float 50 | fraction of purely random configurations that are sampled from the 51 | prior without the model. 52 | bw_estimator: str 53 | controls the way the bandwidths are estimator. For 'scott' a quick rule of thumb based 54 | on the empirical variance is used, for 'mlvc' the likelihood based on 55 | leave on out cross validation is maximized. 56 | min_bandwidth: float 57 | to keep diversity, even when all (good) samples have the same value for one of the parameters, 58 | a minimum bandwidth (Default: 1e-3) is used instead of zero. 59 | iteration_kwargs: dict 60 | kwargs to be added to the instantiation of each iteration 61 | fully_dimensional: bool 62 | if true, the KDE is uses factored kernel across all dimensions, otherwise the PDF is a product of 1d PDFs 63 | """ 64 | 65 | 66 | 67 | # TODO: Propper check for ConfigSpace object! 68 | if configspace is None: 69 | raise ValueError("You have to provide a valid CofigSpace object") 70 | 71 | 72 | 73 | cg = CG_H2BO( configspace = configspace, 74 | min_points_in_model = min_points_in_model, 75 | top_n_percent=top_n_percent, 76 | num_samples = num_samples, 77 | random_fraction=random_fraction, 78 | bw_estimator=bw_estimator, 79 | min_bandwidth = min_bandwidth, 80 | fully_dimensional=fully_dimensional 81 | ) 82 | 83 | super().__init__(config_generator=cg, **kwargs) 84 | 85 | # Hyperband related stuff 86 | self.eta = eta 87 | self.min_budget = min_budget 88 | self.max_budget = max_budget 89 | 90 | 91 | # precompute some HB stuff 92 | self.max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1 93 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter-1, 0, self.max_SH_iter)) 94 | 95 | 96 | 97 | # max total budget for one iteration 98 | self.budget_per_iteration = sum([b*self.eta**i for i, b in enumerate(self.budgets[::-1])]) 99 | 100 | self.config.update({ 101 | 'eta' : eta, 102 | 'min_budget' : min_budget, 103 | 'max_budget' : max_budget, 104 | 'budgets' : self.budgets, 105 | 'max_SH_iter': self.max_SH_iter, 106 | 'min_points_in_model' : min_points_in_model, 107 | 'top_n_percent' : top_n_percent, 108 | 'num_samples' : num_samples, 109 | 'random_fraction' : random_fraction, 110 | 'min_bandwidth': min_bandwidth, 111 | 'bw_estimator': bw_estimator, 112 | 'fully_dimensional': fully_dimensional, 113 | }) 114 | 115 | def get_next_iteration(self, iteration, iteration_kwargs={}): 116 | """ 117 | BO-HB uses (just like Hyperband) SuccessiveHalving for each iteration. 118 | See Li et al. (2016) for reference. 119 | 120 | Parameters: 121 | ----------- 122 | iteration: int 123 | the index of the iteration to be instantiated 124 | 125 | Returns: 126 | -------- 127 | SuccessiveHalving: the SuccessiveHalving iteration with the 128 | corresponding number of configurations 129 | """ 130 | 131 | min_budget = max( self.min_budget, self.config_generator.largest_budget_with_model()) 132 | max_budget = self.max_budget 133 | eta = self.eta 134 | 135 | # precompute some HB stuff 136 | max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1 137 | budgets = max_budget * np.power(eta, -np.linspace(max_SH_iter-1, 0, max_SH_iter)) 138 | 139 | 140 | # number of 'SH rungs' 141 | s = max_SH_iter - 1 142 | # number of configurations in that bracket 143 | n0 = int(np.floor((self.max_SH_iter)/(s+1)) * eta**s) 144 | ns = np.array([max(int(n0*(eta**(-i))), 1) for i in range(s+1)]) 145 | 146 | while (ns * budgets[-s-1:]).sum() <= self.budget_per_iteration: 147 | n0 += 1 148 | ns = np.array([max(int(n0*(eta**(-i))), 1) for i in range(s+1)]) 149 | 150 | n0 -= 1 151 | ns = np.array([max(int(n0*(eta**(-i))), 1) for i in range(s+1)]) 152 | 153 | assert (ns * budgets[-s-1:]).sum() <= self.budget_per_iteration, 'Sampled iteration exceeds the budget per iteration!' 154 | 155 | return(SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=budgets, config_sampler=self.config_generator.get_config, **iteration_kwargs)) 156 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/hyperband.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import pdb 5 | import copy 6 | import logging 7 | 8 | import numpy as np 9 | 10 | 11 | import ConfigSpace as CS 12 | 13 | from hpbandster.core.master import Master 14 | from hpbandster.optimizers.iterations import SuccessiveHalving 15 | from hpbandster.optimizers.config_generators import RandomSampling 16 | 17 | class HyperBand(Master): 18 | def __init__(self, configspace = None, 19 | eta=3, min_budget=0.01, max_budget=1, 20 | **kwargs ): 21 | """ 22 | Hyperband implements hyperparameter optimization by sampling 23 | candidates at random and "trying" them first, running them for 24 | a specific budget. The approach is iterative, promising 25 | candidates are run for a longer time, increasing the fidelity 26 | for their performance. While this is a very efficient racing 27 | approach, random sampling makes no use of the knowledge gained 28 | about the candidates during optimization. 29 | 30 | Parameters 31 | ---------- 32 | configspace: ConfigSpace object 33 | valid representation of the search space 34 | eta : float 35 | In each iteration, a complete run of sequential halving is executed. In it, 36 | after evaluating each configuration on the same subset size, only a fraction of 37 | 1/eta of them 'advances' to the next round. 38 | Must be greater or equal to 2. 39 | min_budget : float 40 | The smallest budget to consider. Needs to be positive! 41 | max_budget : float 42 | the largest budget to consider. Needs to be larger than min_budget! 43 | The budgets will be geometrically distributed $\sim \eta^k$ for 44 | $k\in [0, 1, ... , num_subsets - 1]$. 45 | """ 46 | 47 | 48 | # TODO: Propper check for ConfigSpace object! 49 | if configspace is None: 50 | raise ValueError("You have to provide a valid CofigSpace object") 51 | 52 | super().__init__(config_generator=RandomSampling(configspace), **kwargs) 53 | 54 | # Hyperband related stuff 55 | self.eta = eta 56 | self.min_budget = min_budget 57 | self.max_budget = max_budget 58 | 59 | # precompute some HB stuff 60 | self.max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1 61 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter-1, 0, self.max_SH_iter)) 62 | 63 | self.config.update({ 64 | 'eta' : eta, 65 | 'min_budget' : min_budget, 66 | 'max_budget' : max_budget, 67 | 'budgets' : self.budgets, 68 | 'max_SH_iter': self.max_SH_iter, 69 | }) 70 | 71 | 72 | 73 | def get_next_iteration(self, iteration, iteration_kwargs={}): 74 | """ 75 | Hyperband uses SuccessiveHalving for each iteration. 76 | See Li et al. (2016) for reference. 77 | 78 | Parameters 79 | ---------- 80 | iteration: int 81 | the index of the iteration to be instantiated 82 | 83 | Returns 84 | ------- 85 | SuccessiveHalving: the SuccessiveHalving iteration with the 86 | corresponding number of configurations 87 | """ 88 | 89 | # number of 'SH rungs' 90 | s = self.max_SH_iter - 1 - (iteration%self.max_SH_iter) 91 | # number of configurations in that bracket 92 | n0 = int(np.floor((self.max_SH_iter)/(s+1)) * self.eta**s) 93 | ns = [max(int(n0*(self.eta**(-i))), 1) for i in range(s+1)] 94 | 95 | return(SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=self.budgets[(-s-1):], config_sampler=self.config_generator.get_config, **iteration_kwargs)) 96 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/iterations/__init__.py: -------------------------------------------------------------------------------- 1 | from hpbandster.optimizers.iterations.successivehalving import SuccessiveHalving 2 | from hpbandster.optimizers.iterations.sucessivehalvingmobohb import SuccessiveHalvingMOBOHB 3 | from hpbandster.optimizers.iterations.successivehalvingparego import SuccessiveHalvingParEGO 4 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/iterations/successivehalving.py: -------------------------------------------------------------------------------- 1 | from hpbandster.core.base_iteration import BaseIteration 2 | import numpy as np 3 | 4 | 5 | class SuccessiveHalving(BaseIteration): 6 | 7 | def _advance_to_next_stage(self, config_ids, losses): 8 | """ 9 | SuccessiveHalving simply continues the best based on the current loss. 10 | """ 11 | ranks = np.argsort(np.argsort(losses)) 12 | return(ranks < self.num_configs[self.stage]) 13 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/iterations/successivehalvingparego.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from hpbandster.core.base_iteration import BaseIteration 3 | 4 | class SuccessiveHalvingParEGO(BaseIteration): 5 | 6 | def parEG0_scalarization(self, cost): 7 | w = np.random.random_sample(2) 8 | w /= np.sum(w) 9 | 10 | w_f = w * cost 11 | max_k = np.max(w_f) 12 | rho_sum_wf = self.rho * np.sum(w_f) 13 | return max_k + rho_sum_wf 14 | 15 | def _advance_to_next_stage(self, config_ids, losses): 16 | """ 17 | SuccessiveHalving MOBOHB simply continues the best based on the current multi-objective loss. 18 | """ 19 | losses = self.parEG0_scalarization(losses) 20 | ranks = np.argsort(np.argsort(losses)) 21 | return (ranks < self.num_configs[self.stage]) -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/iterations/successiveresampling.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from hpbandster.iterations.base import BaseIteration 4 | 5 | import numpy as np 6 | 7 | 8 | class SuccessiveResampling(BaseIteration): 9 | 10 | def __init__(self, *args, resampling_rate = 0.5, min_samples_advance = 1, **kwargs): 11 | """ 12 | Iteration class to resample new configurations along side keeping the good ones 13 | in SuccessiveHalving. 14 | 15 | Parameters: 16 | ----------- 17 | resampling_rate: float 18 | fraction of configurations that are resampled at each stage 19 | min_samples_advance:int 20 | number of samples that are guaranteed to proceed to the next 21 | stage regardless of the fraction. 22 | 23 | """ 24 | self.resampling_rate = resampling_rate 25 | self.min_samples_advance = min_samples_advance 26 | 27 | 28 | def _advance_to_next_stage(self, config_ids, losses): 29 | """ 30 | SuccessiveHalving simply continues the best based on the current loss. 31 | """ 32 | 33 | ranks = np.argsort(np.argsort(losses)) 34 | return(ranks < max(self.min_samples_advance, self.num_configs[self.stage] * (1-self.resampling_rate)) ) 35 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/iterations/sucessivehalvingmobohb.py: -------------------------------------------------------------------------------- 1 | from hpbandster.core.base_iteration import BaseIteration 2 | import numpy as np 3 | import sys 4 | from pygmo import hypervolume 5 | 6 | eps = sys.float_info.epsilon 7 | 8 | def nondominated_sort(points): 9 | points = points.copy() 10 | ranks = np.zeros(len(points)) 11 | r = 0 12 | c = len(points) 13 | while c > 0: 14 | extended = np.tile(points, (points.shape[0], 1, 1)) 15 | dominance = np.sum(np.logical_and( 16 | np.all(extended <= np.swapaxes(extended, 0, 1), axis=2), 17 | np.any(extended < np.swapaxes(extended, 0, 1), axis=2)), axis=1) 18 | points[dominance == 0] = 1e9 # mark as used 19 | ranks[dominance == 0] = r 20 | r += 1 21 | c -= np.sum(dominance == 0) 22 | return ranks 23 | 24 | class SuccessiveHalvingMOBOHB(BaseIteration): 25 | 26 | def _advance_to_next_stage(self, config_ids, losses): 27 | """ 28 | SuccessiveHalving MOBOHB simply continues the best based on the current multi-objective loss. 29 | """ 30 | rank = nondominated_sort(losses) 31 | indices = np.array(range(len(losses))) 32 | keep_indices = np.array([], dtype=int) 33 | 34 | # nondominance rank-based selection 35 | i = 0 36 | while len(keep_indices) + sum(rank == i) <= self.num_configs[self.stage]: 37 | keep_indices = np.append(keep_indices, indices[rank == i]) 38 | i += 1 39 | keep_indices = np.append(keep_indices, indices[rank == i]) 40 | 41 | # hypervolume contribution-based selection 42 | #ys_r = losses[rank == i] 43 | #indices_r = indices[rank == i] 44 | #worst_point = np.max(losses, axis=0) 45 | #reference_point = np.maximum( 46 | # np.maximum( 47 | # 1.1 * worst_point, # case: value > 0 48 | # 0.9 * worst_point # case: value < 0 49 | # ), 50 | # np.full(len(worst_point), eps) # case: value = 0 51 | #) 52 | 53 | #S = [] 54 | #contributions = [] 55 | #for j in range(len(ys_r)): 56 | # contributions.append(hypervolume([ys_r[j]]).compute(reference_point)) 57 | #while len(keep_indices) + 1 <= self.num_configs[self.stage]: 58 | # hv_S = 0 59 | # if len(S) > 0: 60 | # hv_S = hypervolume(S).compute(reference_point) 61 | # index = np.argmax(contributions) 62 | # contributions[index] = -1e9 # mark as already selected 63 | # for j in range(len(contributions)): 64 | # if j == index: 65 | # continue 66 | # p_q = np.max([ys_r[index], ys_r[j]], axis=0) 67 | # contributions[j] = contributions[j] - (hypervolume(S + [p_q]).compute(reference_point) - hv_S) 68 | # S = S + [ys_r[index]] 69 | # keep_indices = np.append(keep_indices, indices_r[index]) 70 | 71 | return_stat = np.zeros((len(losses))).astype(bool) 72 | return_stat[keep_indices] = True 73 | return return_stat 74 | 75 | # ranks = np.argsort(np.argsort(losses)) 76 | # return (ranks < self.num_configs[self.stage]) 77 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/kde/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/mobohb/hpbandster/optimizers/kde/__init__.py -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/lcnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import copy 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import ConfigSpace as CS 10 | 11 | from hpbandster.core.master import Master 12 | from hpbandster.optimizers.iterations import SuccessiveHalving 13 | from hpbandster.optimizers.config_generators.lcnet import LCNetWrapper as CG_LCNet 14 | # from hpbandster.optimizers.config_generators import RandomSampling 15 | 16 | 17 | class LCNet(Master): 18 | def __init__(self, configspace=None, 19 | eta=3, min_budget=0.01, max_budget=1, 20 | min_points_in_model=None, top_n_percent=15, 21 | num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, 22 | min_bandwidth=1e-3, n_points=200, n_candidates=1024, delta=1, 23 | **kwargs): 24 | """ 25 | 26 | Parameters 27 | ---------- 28 | configspace: ConfigSpace object 29 | valid representation of the search space 30 | eta : float 31 | In each iteration, a complete run of sequential halving is executed. In it, 32 | after evaluating each configuration on the same subset size, only a fraction of 33 | 1/eta of them 'advances' to the next round. 34 | Must be greater or equal to 2. 35 | min_budget : float 36 | The smallest budget to consider. Needs to be positive! 37 | max_budget : float 38 | the largest budget to consider. Needs to be larger than min_budget! 39 | The budgets will be geometrically distributed $\sim \eta^k$ for 40 | $k\in [0, 1, ... , num_subsets - 1]$. 41 | min_points_in_model: int 42 | number of observations to start building a KDE. Default 'None' means 43 | dim+1, the bare minimum. 44 | top_n_percent: int 45 | percentage ( between 1 and 99, default 15) of the observations that are considered good. 46 | num_samples: int 47 | number of samples to optimize EI (default 64) 48 | random_fraction: float 49 | fraction of purely random configurations that are sampled from the 50 | prior without the model. 51 | bandwidth_factor: float 52 | to encourage diversity, the points proposed to optimize EI, are sampled 53 | from a 'widened' KDE where the bandwidth is multiplied by this factor (default: 3) 54 | min_bandwidth: float 55 | to keep diversity, even when all (good) samples have the same value for one of the parameters, 56 | a minimum bandwidth (Default: 1e-3) is used instead of zero. 57 | iteration_kwargs: dict 58 | kwargs to be added to the instantiation of each iteration 59 | """ 60 | 61 | # TODO: Propper check for ConfigSpace object! 62 | if configspace is None: 63 | raise ValueError("You have to provide a valid CofigSpace object") 64 | 65 | cg = CG_LCNet(configspace=configspace, 66 | max_budget=max_budget, 67 | n_points=n_points, 68 | n_candidates=n_candidates, 69 | delta=delta) 70 | 71 | # cg = RandomSampling(configspace) 72 | 73 | super().__init__(config_generator=cg, **kwargs) 74 | 75 | # Hyperband related stuff 76 | self.eta = eta 77 | self.min_budget = min_budget 78 | self.max_budget = max_budget 79 | 80 | # precompute some HB stuff 81 | self.max_SH_iter = -int(np.log(min_budget / max_budget) / np.log(eta)) + 1 82 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter - 1, 0, self.max_SH_iter)) 83 | 84 | self.config.update({ 85 | 'eta': eta, 86 | 'min_budget': min_budget, 87 | 'max_budget': max_budget, 88 | 'budgets': self.budgets, 89 | 'max_SH_iter': self.max_SH_iter, 90 | 'min_points_in_model': min_points_in_model, 91 | 'top_n_percent': top_n_percent, 92 | 'num_samples': num_samples, 93 | 'random_fraction': random_fraction, 94 | 'bandwidth_factor': bandwidth_factor, 95 | 'min_bandwidth': min_bandwidth 96 | }) 97 | 98 | def get_next_iteration(self, iteration, iteration_kwargs={}): 99 | """ 100 | BO-HB uses (just like Hyperband) SuccessiveHalving for each iteration. 101 | See Li et al. (2016) for reference. 102 | 103 | Parameters: 104 | ----------- 105 | iteration: int 106 | the index of the iteration to be instantiated 107 | 108 | Returns: 109 | -------- 110 | SuccessiveHalving: the SuccessiveHalving iteration with the 111 | corresponding number of configurations 112 | """ 113 | 114 | # number of 'SH rungs' 115 | s = self.max_SH_iter - 1 - (iteration % self.max_SH_iter) 116 | # number of configurations in that bracket 117 | n0 = int(np.floor((self.max_SH_iter) / (s + 1)) * self.eta ** s) 118 | ns = [max(int(n0 * (self.eta ** (-i))), 1) for i in range(s + 1)] 119 | 120 | return (SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=self.budgets[(-s - 1):], 121 | config_sampler=self.config_generator.get_config, **iteration_kwargs)) 122 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/learning_curve_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/methods/mobohb/hpbandster/optimizers/learning_curve_models/__init__.py -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/learning_curve_models/arif.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../../") 3 | 4 | import numpy as np 5 | from sklearn.ensemble import RandomForestRegressor as rfr 6 | 7 | from hpbandster.learning_curve_models.base import LCModel as lcm_base 8 | 9 | from IPython import embed 10 | 11 | class ARIF(lcm_base): 12 | """ 13 | An 'Auto Regressive Integrated (Random) Forest' 14 | """ 15 | def __init__(self, order=2, diff_order=0): 16 | """ 17 | Parameters: 18 | ----------- 19 | 20 | order: int 21 | the order of the 'autoregressiveness' 22 | diff_order: int 23 | the differencing order used 24 | TODO: Not used so far! 25 | """ 26 | self.order = order 27 | self.diff_order = diff_order 28 | 29 | def apply_differencing(self, series, order=None): 30 | 31 | if order is None: order = self.diff_order 32 | 33 | for o in range(order): 34 | series = series[1:]-series[:-1] 35 | return series 36 | 37 | 38 | def invert_differencing(self, initial_part, differenced_rest, order=None): 39 | """ 40 | function to invert the differencing 41 | """ 42 | 43 | if order is None: order = self.diff_order 44 | 45 | # compute the differenced values of the initial part: 46 | starting_points = [ self.apply_differencing(initial_part, order=order)[-1] for order in range(self.diff_order)] 47 | 48 | actual_predictions = differenced_rest 49 | import pdb 50 | pdb.set_trace() 51 | for s in starting_points[::-1]: 52 | actual_predictions = np.cumsum(np.hstack([s, actual_predictions]))[1:] 53 | 54 | return(actual_predictions) 55 | 56 | def fit(self, losses, configs=None): 57 | 58 | if configs is None: 59 | configs = [[]]*len(times) 60 | 61 | # convert learning curves into X and y data 62 | 63 | X = [] 64 | y = [] 65 | 66 | for l,c in zip(losses, configs): 67 | l = self.apply_differencing(l) 68 | 69 | for i in range(self.order, len(l)): 70 | X.append(np.hstack([l[i-self.order:i], c])) 71 | y.append(l[i]) 72 | 73 | self.X = np.array(X) 74 | self.y = np.array(y) 75 | 76 | 77 | self.rfr = rfr().fit(self.X,self.y) 78 | 79 | 80 | def extend_partial(self, obs_losses, num_steps, config=None): 81 | # TODO: add variance predictions 82 | if config is None: 83 | config = [] 84 | 85 | d_losses = self.apply_differencing(obs_losses) 86 | 87 | 88 | for t in range(num_steps): 89 | x = np.hstack([d_losses[-self.order:], config]) 90 | y = self.rfr.predict([x]) 91 | d_losses = np.hstack([d_losses, y]) 92 | 93 | 94 | prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:]) 95 | 96 | return(prediction) 97 | 98 | 99 | if __name__ == "__main__": 100 | 101 | sys.path.append("/home/sfalkner/repositories/bitbucket/learning_curve_prediction") 102 | 103 | 104 | 105 | from lc_prediction.utils import load_configs 106 | 107 | 108 | 109 | data = load_configs("/home/sfalkner/repositories/bitbucket/learning_curve_prediction/data/fc_net_mnist", 1024) 110 | 111 | 112 | data = (data[0], data[1][:,:40]) 113 | 114 | import matplotlib.pyplot as plt 115 | 116 | #plt.plot(data[1].T) 117 | #plt.show() 118 | 119 | 120 | full_lcs = [ lc for lc in data[1]] 121 | 122 | T_max = len(full_lcs[0]) 123 | 124 | learning_curves = [ lc[:np.random.randint(lc.shape[0]-8) + 8]for lc in data[1]] 125 | #learning_curves = [ lc[:4+ int(np.random.exponential(5))] for lc in data[1]] 126 | times = [np.arange(1, lc.shape[0]+1) for lc in learning_curves] 127 | 128 | lc_model = ARIF(order=3, diff_order=2) 129 | 130 | 131 | 132 | test_order = 2 133 | random_sequence = np.random.rand(5) 134 | tmp = lc_model.apply_differencing(random_sequence, order=test_order) 135 | 136 | for i in range(test_order+1): 137 | print(lc_model.apply_differencing(random_sequence, order=i)) 138 | reconstruction = lc_model.invert_differencing(random_sequence[:1+test_order], tmp, order=test_order) 139 | 140 | embed() 141 | 142 | 143 | 144 | lc_model.fit(learning_curves, data[0]) 145 | 146 | for i in range(16): 147 | pred_times = range(times[i][-1]+1, T_max) 148 | #pred = lc_model.extend_partial(learning_curves[i], min(10, T_max - len(learning_curves[i])), config=data[0][i]) 149 | pred = lc_model.extend_partial(learning_curves[i], T_max - len(learning_curves[i]), config=data[0][i]) 150 | plt.plot(full_lcs[i]) 151 | plt.plot(range(len(learning_curves[i]), len(learning_curves[i])+ len(pred)), pred, '--') 152 | plt.show() 153 | 154 | embed() 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/learning_curve_models/base.py: -------------------------------------------------------------------------------- 1 | class LCModel(): 2 | """ 3 | base class for simple learning curve models 4 | """ 5 | 6 | def fit(self, times, losses, configs=None): 7 | """ 8 | function to train the model on the observed data 9 | 10 | Parameters: 11 | ----------- 12 | 13 | times: list 14 | list of numpy arrays of the timesteps for each curve 15 | losses: list 16 | list of numpy arrays of the loss (the actual learning curve) 17 | configs: list or None 18 | list of the configurations for each sample. Each element 19 | has to be a numpy array. Set to None, if no configuration 20 | information is available. 21 | """ 22 | raise NotImplementedError() 23 | 24 | 25 | def predict_unseen(self, times, config): 26 | """ 27 | predict the loss of an unseen configuration 28 | 29 | Parameters: 30 | ----------- 31 | 32 | times: numpy array 33 | times where to predict the loss 34 | config: numpy array 35 | the numerical representation of the config 36 | 37 | Returns: 38 | -------- 39 | 40 | mean and variance prediction at input times for the given config 41 | """ 42 | raise NotImplementedError() 43 | 44 | 45 | def extend_partial(self, times, obs_times, obs_losses, config=None): 46 | """ 47 | extends a partially observed curve 48 | 49 | Parameters: 50 | ----------- 51 | 52 | times: numpy array 53 | times where to predict the loss 54 | obs_times: numpy array 55 | times where the curve has already been observed 56 | obs_losses: numpy array 57 | corresponding observed losses 58 | config: numpy array 59 | numerical reperesentation of the config; None if no config 60 | information is available 61 | 62 | Returns: 63 | -------- 64 | 65 | mean and variance prediction at input times 66 | 67 | 68 | """ 69 | 70 | 71 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/learning_curve_models/lcnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from hpbandster.learning_curve_models.base import LCModel 4 | 5 | from robo.models.lcnet import LCNet, get_lc_net 6 | 7 | 8 | class LCNetWrapper(LCModel): 9 | """ 10 | Wrapper around LC-Net 11 | """ 12 | 13 | def __init__(self, max_num_epochs): 14 | self.max_num_epochs = max_num_epochs 15 | self.model = LCNet(sampling_method="sghmc", 16 | l_rate=np.sqrt(1e-4), 17 | mdecay=.05, 18 | n_nets=100, 19 | burn_in=5000, 20 | n_iters=30000, 21 | get_net=get_lc_net, 22 | precondition=True) 23 | 24 | def fit(self, times, losses, configs=None): 25 | """ 26 | function to train the model on the observed data 27 | 28 | Parameters: 29 | ----------- 30 | 31 | times: list 32 | list of numpy arrays of the timesteps for each curve 33 | losses: list 34 | list of numpy arrays of the loss (the actual learning curve) 35 | configs: list or None 36 | list of the configurations for each sample. Each element 37 | has to be a numpy array. Set to None, if no configuration 38 | information is available. 39 | """ 40 | 41 | assert np.all(times > 0) and np.all(times <= self.max_num_epochs) 42 | 43 | train = None 44 | targets = None 45 | 46 | for i in range(len(configs)): 47 | 48 | t_idx = times[i] / self.max_num_epochs 49 | 50 | x = np.repeat(np.array(configs[i])[None, :], t_idx.shape[0], axis=0) 51 | x = np.concatenate((x, t_idx[:, None]), axis=1) 52 | 53 | # LCNet assumes increasing curves, if we feed in losses here we have to flip the curves 54 | lc = [1 - l for l in losses[i]] 55 | 56 | if train is None: 57 | train = x 58 | targets = lc 59 | else: 60 | train = np.concatenate((train, x), 0) 61 | targets = np.concatenate((targets, lc), 0) 62 | 63 | self.model.train(train, targets) 64 | 65 | def predict_unseen(self, times, config): 66 | """ 67 | predict the loss of an unseen configuration 68 | 69 | Parameters: 70 | ----------- 71 | 72 | times: numpy array 73 | times where to predict the loss 74 | config: numpy array 75 | the numerical representation of the config 76 | 77 | Returns: 78 | -------- 79 | 80 | mean and variance prediction at input times for the given config 81 | """ 82 | 83 | assert np.all(times > 0) and np.all(times <= self.max_num_epochs) 84 | 85 | x = np.array(config)[None, :] 86 | 87 | idx = times / self.max_num_epochs 88 | x = np.repeat(x, idx.shape[0], axis=0) 89 | 90 | x = np.concatenate((x, idx[:, None]), axis=1) 91 | 92 | mean, var = self.model.predict(x) 93 | return 1 - mean, var 94 | 95 | def extend_partial(self, times, obs_times, obs_losses, config=None): 96 | """ 97 | extends a partially observed curve 98 | 99 | Parameters: 100 | ----------- 101 | 102 | times: numpy array 103 | times where to predict the loss 104 | obs_times: numpy array 105 | times where the curve has already been observed 106 | obs_losses: numpy array 107 | corresponding observed losses 108 | config: numpy array 109 | numerical reperesentation of the config; None if no config 110 | information is available 111 | 112 | Returns: 113 | -------- 114 | 115 | mean and variance prediction at input times 116 | 117 | 118 | """ 119 | return self.predict_unseen(times, config) 120 | 121 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/parego.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import copy 5 | import logging 6 | 7 | import numpy as np 8 | 9 | import ConfigSpace as CS 10 | 11 | from hpbandster.core.master import Master 12 | from hpbandster.optimizers.iterations import SuccessiveHalvingParEGO 13 | from hpbandster.optimizers.config_generators.parego import ParEGO as CG_PAREGO 14 | 15 | 16 | class ParEGO(Master): 17 | def __init__(self, configspace=None, history_dir=None, 18 | eta=3, min_budget=0.01, max_budget=1, 19 | min_points_in_model=None, top_n_percent=10, 20 | num_samples=24, random_fraction=1 / 3, bandwidth_factor=3, 21 | min_bandwidth=1e-3, 22 | **kwargs): 23 | """ 24 | BOHB performs robust and efficient hyperparameter optimization 25 | at scale by combining the speed of Hyperband searches with the 26 | guidance and guarantees of convergence of Bayesian 27 | Optimization. Instead of sampling new configurations at random, 28 | BOHB uses kernel density estimators to select promising candidates. 29 | 30 | .. highlight:: none 31 | 32 | For reference: :: 33 | 34 | @InProceedings{falkner-icml-18, 35 | title = {{BOHB}: Robust and Efficient Hyperparameter Optimization at Scale}, 36 | author = {Falkner, Stefan and Klein, Aaron and Hutter, Frank}, 37 | booktitle = {Proceedings of the 35th International Conference on Machine Learning}, 38 | pages = {1436--1445}, 39 | year = {2018}, 40 | } 41 | 42 | Parameters 43 | ---------- 44 | configspace: ConfigSpace object 45 | valid representation of the search space 46 | eta : float 47 | In each iteration, a complete run of sequential halving is executed. In it, 48 | after evaluating each configuration on the same subset size, only a fraction of 49 | 1/eta of them 'advances' to the next round. 50 | Must be greater or equal to 2. 51 | min_budget : float 52 | The smallest budget to consider. Needs to be positive! 53 | max_budget : float 54 | The largest budget to consider. Needs to be larger than min_budget! 55 | The budgets will be geometrically distributed 56 | :math:`a^2 + b^2 = c^2 \sim \eta^k` for :math:`k\in [0, 1, ... , num\_subsets - 1]`. 57 | min_points_in_model: int 58 | number of observations to start building a KDE. Default 'None' means 59 | dim+1, the bare minimum. 60 | top_n_percent: int 61 | percentage ( between 1 and 99, default 15) of the observations that are considered good. 62 | num_samples: int 63 | number of samples to optimize EI (default 64) 64 | random_fraction: float 65 | fraction of purely random configurations that are sampled from the 66 | prior without the model. 67 | bandwidth_factor: float 68 | to encourage diversity, the points proposed to optimize EI, are sampled 69 | from a 'widened' KDE where the bandwidth is multiplied by this factor (default: 3) 70 | min_bandwidth: float 71 | to keep diversity, even when all (good) samples have the same value for one of the parameters, 72 | a minimum bandwidth (Default: 1e-3) is used instead of zero. 73 | iteration_kwargs: dict 74 | kwargs to be added to the instantiation of each iteration 75 | """ 76 | 77 | # TODO: Propper check for ConfigSpace object! 78 | if configspace is None: 79 | raise ValueError("You have to provide a valid CofigSpace object") 80 | 81 | cg = CG_PAREGO(configspace=configspace, 82 | history_dir=history_dir, 83 | run_id=kwargs['run_id'], 84 | min_points_in_model=min_points_in_model, 85 | top_n_percent=top_n_percent, 86 | num_samples=num_samples, 87 | random_fraction=random_fraction, 88 | bandwidth_factor=bandwidth_factor, 89 | min_bandwidth=min_bandwidth 90 | ) 91 | 92 | super().__init__(config_generator=cg, **kwargs) 93 | 94 | # Hyperband related stuff 95 | self.eta = eta 96 | self.min_budget = min_budget 97 | self.max_budget = max_budget 98 | 99 | self.currently_writting = False 100 | 101 | # precompute some HB stuff 102 | self.max_SH_iter = -int(np.log(min_budget / max_budget) / np.log(eta)) + 1 103 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter - 1, 0, self.max_SH_iter)) 104 | 105 | self.config.update({ 106 | 'eta': eta, 107 | 'min_budget': min_budget, 108 | 'max_budget': max_budget, 109 | 'budgets': self.budgets, 110 | 'max_SH_iter': self.max_SH_iter, 111 | 'min_points_in_model': min_points_in_model, 112 | 'top_n_percent': top_n_percent, 113 | 'num_samples': num_samples, 114 | 'random_fraction': random_fraction, 115 | 'bandwidth_factor': bandwidth_factor, 116 | 'min_bandwidth': min_bandwidth 117 | }) 118 | 119 | def is_write(self): 120 | return self.currently_writting 121 | 122 | def get_next_iteration(self, iteration, iteration_kwargs={}): 123 | """ 124 | BO-HB uses (just like Hyperband) SuccessiveHalving for each iteration. 125 | See Li et al. (2016) for reference. 126 | 127 | Parameters 128 | ---------- 129 | iteration: int 130 | the index of the iteration to be instantiated 131 | 132 | Returns 133 | ------- 134 | SuccessiveHalving: the SuccessiveHalving iteration with the 135 | corresponding number of configurations 136 | """ 137 | 138 | # number of 'SH rungs' 139 | s = self.max_SH_iter - 1 - (iteration % self.max_SH_iter) 140 | # number of configurations in that bracket 141 | n0 = int(np.floor((self.max_SH_iter) / (s + 1)) * self.eta ** s) 142 | ns = [max(int(n0 * (self.eta ** (-i))), 1) for i in range(s + 1)] 143 | 144 | return (SuccessiveHalvingParEGO(HPB_iter=iteration, num_configs=ns, budgets=self.budgets[(-s - 1):], 145 | config_sampler=self.config_generator.get_config, **iteration_kwargs)) 146 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/optimizers/randomsearch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import copy 5 | import logging 6 | 7 | import numpy as np 8 | 9 | 10 | import ConfigSpace as CS 11 | 12 | from hpbandster.core.master import Master 13 | from hpbandster.optimizers.iterations import SuccessiveHalving 14 | from hpbandster.optimizers.config_generators.random_sampling import RandomSampling as RS 15 | 16 | class RandomSearch(Master): 17 | def __init__(self, configspace = None, 18 | eta = 3, min_budget=1, max_budget=1, 19 | **kwargs 20 | ): 21 | """ 22 | Implements a random search across the search space for comparison. 23 | Candidates are sampled at random and run on the maximum budget. 24 | 25 | Parameters 26 | ---------- 27 | configspace: ConfigSpace object 28 | valid representation of the search space 29 | eta : float 30 | In each iteration, a complete run of sequential halving is executed. In it, 31 | after evaluating each configuration on the same subset size, only a fraction of 32 | 1/eta of them 'advances' to the next round. 33 | Must be greater or equal to 2. 34 | budget : float 35 | budget for the evaluation 36 | """ 37 | 38 | # TODO: Propper check for ConfigSpace object! 39 | if configspace is None: 40 | raise ValueError("You have to provide a valid ConfigSpace object") 41 | 42 | 43 | 44 | cg = RS( configspace = configspace ) 45 | 46 | super().__init__(config_generator=cg, **kwargs) 47 | 48 | # Hyperband related stuff 49 | self.eta = eta 50 | self.min_budget = max_budget 51 | self.max_budget = max_budget 52 | 53 | 54 | # precompute some HB stuff 55 | self.max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1 56 | self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter-1, 0, self.max_SH_iter)) 57 | 58 | # max total budget for one iteration 59 | self.budget_per_iteration = sum([b*self.eta**i for i, b in enumerate(self.budgets[::-1])]) 60 | 61 | self.config.update({ 62 | 'eta' : eta, 63 | 'min_budget' : max_budget, 64 | 'max_budget' : max_budget, 65 | }) 66 | 67 | def get_next_iteration(self, iteration, iteration_kwargs={}): 68 | """ 69 | Returns a SH iteration with only evaluations on the biggest budget 70 | 71 | Parameters 72 | ---------- 73 | iteration: int 74 | the index of the iteration to be instantiated 75 | 76 | Returns 77 | ------- 78 | SuccessiveHalving: the SuccessiveHalving iteration with the 79 | corresponding number of configurations 80 | """ 81 | 82 | 83 | budgets = [self.max_budget] 84 | ns = [self.budget_per_iteration//self.max_budget] 85 | 86 | return(SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=budgets, config_sampler=self.config_generator.get_config, **iteration_kwargs)) 87 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/utils.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import json 3 | import threading 4 | 5 | import Pyro4 6 | import Pyro4.naming 7 | 8 | 9 | from hpbandster.core.result import Result 10 | from hpbandster.core.base_iteration import Datum 11 | 12 | 13 | 14 | def nic_name_to_host(nic_name): 15 | """ translates the name of a network card into a valid host name""" 16 | from netifaces import ifaddresses, AF_INET 17 | host = ifaddresses(nic_name).setdefault(AF_INET, [{'addr': 'No IP addr'}] )[0]['addr'] 18 | return(host) 19 | 20 | 21 | 22 | def start_local_nameserver(host=None, port=0, nic_name=None): 23 | """ 24 | starts a Pyro4 nameserver in a daemon thread 25 | 26 | Parameters: 27 | ----------- 28 | host: str 29 | the hostname to use for the nameserver 30 | port: int 31 | the port to be used. Default =0 means a random port 32 | nic_name: str 33 | name of the network interface to use 34 | 35 | Returns: 36 | -------- 37 | tuple (str, int): 38 | the host name and the used port 39 | """ 40 | 41 | if host is None: 42 | if nic_name is None: 43 | host = 'localhost' 44 | else: 45 | host = nic_name_to_host(nic_name) 46 | 47 | uri, ns, _ = Pyro4.naming.startNS(host=host, port=port) 48 | host, port = ns.locationStr.split(':') 49 | 50 | 51 | thread = threading.Thread(target=ns.requestLoop, name='Pyro4 nameserver started by HpBandSter') 52 | thread.daemon=True 53 | 54 | thread.start() 55 | return(host, int(port)) 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/workers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/hpbandster/workers/hpolibbenchmark.py: -------------------------------------------------------------------------------- 1 | 2 | import ConfigSpace as CS 3 | from hpbandster.core.worker import Worker 4 | 5 | class HPOlib2Worker(Worker): 6 | def __init__(self,benchmark, configspace=None, budget_name='budget', budget_preprocessor=None, measure_test_loss=False, config_as_array=True, **kwargs): 7 | 8 | super().__init__(**kwargs) 9 | self.benchmark = benchmark 10 | 11 | if configspace is None: 12 | self.configspace = benchmark.get_configuration_space() 13 | else: 14 | self.configspace = configspace 15 | 16 | self.budget_name=budget_name 17 | 18 | if budget_preprocessor is None: 19 | self.budget_preprocessor = lambda b: b 20 | else: 21 | self.budget_preprocessor = budget_preprocessor 22 | 23 | self.config_as_array = config_as_array 24 | 25 | self.measure_test_loss = measure_test_loss 26 | 27 | def compute(self, config, budget, **kwargs): 28 | 29 | if self.config_as_array: 30 | c = CS.Configuration(self.configspace, values=config) 31 | else: 32 | c = config 33 | 34 | kwargs = {self.budget_name: self.budget_preprocessor(budget)} 35 | res = self.benchmark.objective_function(c, **kwargs) 36 | if self.measure_test_loss: 37 | del kwargs[self.budget_name] 38 | res['test_loss'] = self.benchmark.objective_function_test(c, **kwargs)['function_value'] 39 | return({ 40 | 'loss': res['function_value'], 41 | 'info': res 42 | }) 43 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/mobohb_worker.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | from ax import Data, GeneratorRun, Arm 4 | import pandas as pd 5 | 6 | from hpbandster.core.worker import Worker 7 | import ConfigSpace as CS 8 | import numpy as np 9 | 10 | 11 | 12 | class MOBOHBWorker(Worker): 13 | def __init__(self, experiment, search_space, eval_function, seed=42, **kwargs): 14 | super().__init__(**kwargs) 15 | 16 | self.experiment = experiment 17 | self.eval_function = eval_function 18 | self.search_space = search_space 19 | self.seed = seed 20 | 21 | def tchebycheff_norm(self, cost, rho=0.05): 22 | w = np.random.random_sample(2) 23 | w /= np.sum(w) 24 | 25 | w_f = w * cost 26 | max_k = np.max(w_f) 27 | rho_sum_wf = rho * np.sum(w_f) 28 | return max_k + rho_sum_wf 29 | 30 | def compute(self, config_id:int, config: CS.Configuration, budget:float, working_directory:str, *args, **kwargs) -> dict: 31 | 32 | 33 | params = deepcopy(config) 34 | params['budget'] = int(budget) 35 | 36 | params['n_conv_0'] = params['n_conv_0'] if 'n_conv_0' in params else 16 37 | params['n_conv_1'] = params['n_conv_1'] if 'n_conv_1' in params else 16 38 | params['n_conv_2'] = params['n_conv_2'] if 'n_conv_2' in params else 16 39 | 40 | params['n_fc_0'] = params['n_fc_0'] if 'n_fc_0' in params else 16 41 | params['n_fc_1'] = params['n_fc_1'] if 'n_fc_1' in params else 16 42 | params['n_fc_2'] = params['n_fc_2'] if 'n_fc_2' in params else 16 43 | 44 | params['kernel_size'] = [3, 5, 7][params['kernel_size']] 45 | params['batch_norm'] = bool(params['batch_norm']) 46 | params['global_avg_pooling'] = bool(params['global_avg_pooling']) 47 | params['id'] = str(config_id) 48 | 49 | trial = self.experiment.new_trial(GeneratorRun([Arm(params, name=str(config_id))])) 50 | data = self.experiment.eval_trial(trial) 51 | 52 | acc = float(data.df[data.df['metric_name'] == 'val_acc_1']['mean']) 53 | len = float(data.df[data.df['metric_name'] == 'num_params']['mean']) 54 | 55 | return {'loss': (acc, len)} 56 | -------------------------------------------------------------------------------- /baselines/methods/mobohb/run_mobohb.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from threading import Thread 3 | import os 4 | import shutil 5 | 6 | import argparse 7 | 8 | import sys 9 | from pathlib import Path 10 | 11 | sys.path.append(str(Path(__file__).parent)) 12 | 13 | from hpbandster.core.nameserver import NameServer 14 | from hpbandster.optimizers.mobohb import MOBOHB 15 | from mobohb_worker import MOBOHBWorker 16 | 17 | 18 | def main_mobohb(): 19 | res = mobohb.run(n_iterations=10e20) 20 | 21 | def str2bool(v): 22 | if isinstance(v, bool): 23 | return v 24 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 25 | return True 26 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 27 | return False 28 | else: 29 | raise argparse.ArgumentTypeError('Boolean value expected.') 30 | 31 | 32 | def get_MOBOHB( 33 | experiment, 34 | search_space, 35 | num_initial_samples=10, 36 | num_candidates=24, 37 | gamma=0.10, 38 | seed=0, 39 | num_iterations=2000, 40 | history_dir=os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'history', 'mobohb'), 41 | init_method='random', 42 | budget=25, 43 | min_budget=5, 44 | max_budget=25, 45 | init=True, 46 | ): 47 | 48 | NS = NameServer(run_id=str(seed), host='127.0.0.1', port=0) 49 | ns_host, ns_port = NS.start() 50 | w = MOBOHBWorker(experiment, search_space.as_uniform_space(), None, seed, run_id=str(seed), host='127.0.0.1', nameserver=ns_host, nameserver_port=ns_port) 51 | w.run(background=True) 52 | 53 | motpe_params = { 54 | 'init_method': init_method, 55 | 'num_initial_samples': num_initial_samples, 56 | 'num_candidates': num_candidates, 57 | 'gamma': gamma, 58 | 'budget': budget 59 | } 60 | mobohb = MOBOHB(configspace=search_space.as_uniform_space(), parameters=motpe_params, history_dir=history_dir, init=init, 61 | run_id=str(seed), nameserver=ns_host, nameserver_port=ns_port, 62 | min_budget=min_budget, max_budget=max_budget 63 | ) 64 | 65 | main_mobohb = lambda : mobohb.run(n_iterations=num_iterations) 66 | 67 | t = Thread(target=main_mobohb) 68 | t.daemon = True 69 | t.start() 70 | 71 | snoozeiness = 24 * 3600 72 | mobohb.is_write() 73 | sleep(snoozeiness) 74 | 75 | while mobohb.is_write(): 76 | sleep(2) 77 | 78 | mobohb.shutdown(shutdown_workers=True) 79 | NS.shutdown() 80 | 81 | return experiment 82 | -------------------------------------------------------------------------------- /baselines/methods/msehvi/__init__.py: -------------------------------------------------------------------------------- 1 | from .msehvi import MSEHVI -------------------------------------------------------------------------------- /baselines/methods/shemoa/__init__.py: -------------------------------------------------------------------------------- 1 | from .shemoa import SHEMOA, Mutation, ParentSelection, Recombination -------------------------------------------------------------------------------- /baselines/problems/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_problems import get_branin_currin, BraninCurrinEvalFunction 2 | from .fashion import get_fashion 3 | from .flowers import get_flowers -------------------------------------------------------------------------------- /baselines/problems/fashion/__init__.py: -------------------------------------------------------------------------------- 1 | from .fashionnet import evaluate_network 2 | from .fashionnet import extract_num_parameters as discrete_fashion 3 | from .search_space import CustomSearchSpace as FashionSearchSpace 4 | from .problem import get_fashion -------------------------------------------------------------------------------- /baselines/problems/fashion/data/x_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/x_test.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/data/x_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/x_train.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/data/x_val.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/x_val.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/data/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/y_test.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/data/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/y_train.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/data/y_val.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/fashion/data/y_val.npy -------------------------------------------------------------------------------- /baselines/problems/fashion/problem.py: -------------------------------------------------------------------------------- 1 | from ax import Metric 2 | from ax import MultiObjective 3 | from ax import ObjectiveThreshold 4 | from ax import MultiObjectiveOptimizationConfig 5 | 6 | from baselines import MultiObjectiveSimpleExperiment 7 | from .fashionnet import evaluate_network 8 | from .search_space import CustomSearchSpace 9 | 10 | def get_fashion(name=None): 11 | 12 | val_acc_1 = Metric('val_acc_1', True) 13 | val_acc_3 = Metric('val_acc_3', True) 14 | tst_acc_1 = Metric('tst_acc_1', True) 15 | tst_acc_3 = Metric('tst_acc_3', True) 16 | num_params = Metric('num_params', True) 17 | 18 | objective = MultiObjective([val_acc_1, num_params]) 19 | thresholds = [ 20 | ObjectiveThreshold(val_acc_1, 0.0), 21 | ObjectiveThreshold(num_params, 8.0) 22 | ] 23 | optimization_config = MultiObjectiveOptimizationConfig( 24 | objective=objective, 25 | objective_thresholds=thresholds 26 | ) 27 | 28 | return MultiObjectiveSimpleExperiment( 29 | name=name, 30 | search_space=CustomSearchSpace().as_ax_space(), 31 | evaluation_function=evaluate_network, 32 | optimization_config=optimization_config, 33 | extra_metrics=[val_acc_3, tst_acc_1, tst_acc_3] 34 | ) 35 | -------------------------------------------------------------------------------- /baselines/problems/fashion/search_space.py: -------------------------------------------------------------------------------- 1 | """A common search space for all the experiments 2 | """ 3 | 4 | import ConfigSpace as CS 5 | from ConfigSpace.hyperparameters import UniformIntegerHyperparameter 6 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter 7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter 8 | 9 | 10 | class CustomSearchSpace(CS.ConfigurationSpace): 11 | 12 | def __init__(self): 13 | super(CustomSearchSpace, self).__init__() 14 | 15 | # Convolution 16 | n_conv_l = UniformIntegerHyperparameter("n_conv_l", 1, 3, default_value=3) 17 | n_conv_0 = UniformIntegerHyperparameter("n_conv_0", 16, 1024, default_value=128, log=True) 18 | n_conv_1 = UniformIntegerHyperparameter("n_conv_1", 16, 1024, default_value=128, log=True) 19 | n_conv_2 = UniformIntegerHyperparameter("n_conv_2", 16, 1024, default_value=128, log=True) 20 | 21 | # Dense 22 | n_fc_l = UniformIntegerHyperparameter("n_fc_l", 1, 3, default_value=3) 23 | n_fc_0 = UniformIntegerHyperparameter("n_fc_0", 2, 512, default_value=32, log=True) 24 | n_fc_1 = UniformIntegerHyperparameter("n_fc_1", 2, 512, default_value=32, log=True) 25 | n_fc_2 = UniformIntegerHyperparameter("n_fc_2", 2, 512, default_value=32, log=True) 26 | 27 | # Kernel Size 28 | ks = CategoricalHyperparameter("kernel_size", choices=[7, 5, 3], default_value=5) 29 | 30 | # Learning Rate 31 | lr = UniformFloatHyperparameter('lr_init', 0.00001, 1.0, default_value=0.001, log=True) 32 | 33 | # Use Batch Normalization 34 | bn = CategoricalHyperparameter("batch_norm", choices=[False, True], default_value=False) 35 | 36 | # Batch size 37 | bs = UniformIntegerHyperparameter('batch_size', 1, 512, default_value=128, log=True) 38 | 39 | # Global Avg Pooling 40 | ga = CategoricalHyperparameter("global_avg_pooling", choices=[False, True], default_value=True) 41 | 42 | # Conditions 43 | 44 | cond1 = CS.conditions.InCondition(n_conv_2, n_conv_l, [3]) 45 | cond2 = CS.conditions.InCondition(n_conv_1, n_conv_l, [2, 3]) 46 | 47 | cond3 = CS.conditions.InCondition(n_fc_2, n_fc_l, [3]) 48 | cond4 = CS.conditions.InCondition(n_fc_1, n_fc_l, [2, 3]) 49 | cond5 = CS.conditions.InCondition(n_fc_0, n_fc_l, [1, 2, 3]) 50 | 51 | self.not_mutables = ['n_conv_l', 'n_fc_l'] 52 | 53 | self.add_hyperparameters([n_conv_l, n_conv_0, n_conv_1, n_conv_2]) 54 | self.add_hyperparameters([n_fc_l, n_fc_0, n_fc_1, n_fc_2]) 55 | self.add_hyperparameters([ks, lr, bn, bs, ga]) 56 | self.add_conditions([cond1, cond2, cond3, cond4, cond5]) 57 | 58 | def as_uniform_space(self): 59 | 60 | # Convolution 61 | n_conv_l = self.get_hyperparameter('n_conv_l') 62 | n_conv_0 = self.get_hyperparameter('n_conv_0') 63 | n_conv_1 = self.get_hyperparameter('n_conv_1') 64 | n_conv_2 = self.get_hyperparameter('n_conv_2') 65 | 66 | # Dense 67 | n_fc_l = self.get_hyperparameter('n_fc_l') 68 | n_fc_0 = self.get_hyperparameter('n_fc_0') 69 | n_fc_1 = self.get_hyperparameter('n_fc_1') 70 | n_fc_2 = self.get_hyperparameter('n_fc_2') 71 | 72 | # Kernel Size 73 | ks = UniformIntegerHyperparameter('kernel_size', 0, 2, default_value=1) 74 | 75 | # Learning Rate 76 | lr = self.get_hyperparameter('lr_init') 77 | 78 | # Use Batch Normalization 79 | bn = UniformIntegerHyperparameter("batch_norm", 0, 1, default_value=1) 80 | 81 | # Batch size 82 | bs = self.get_hyperparameter('batch_size') 83 | 84 | # Global Avg Pooling 85 | ga = UniformIntegerHyperparameter('global_avg_pooling', 0, 1, default_value=1) 86 | 87 | # Conditions 88 | cond1 = CS.conditions.InCondition(n_conv_2, n_conv_l, [3]) 89 | cond2 = CS.conditions.InCondition(n_conv_1, n_conv_l, [2, 3]) 90 | 91 | cond3 = CS.conditions.InCondition(n_fc_2, n_fc_l, [3]) 92 | cond4 = CS.conditions.InCondition(n_fc_1, n_fc_l, [2, 3]) 93 | cond5 = CS.conditions.InCondition(n_fc_0, n_fc_l, [1, 2, 3]) 94 | 95 | cs = CS.ConfigurationSpace() 96 | 97 | cs.add_hyperparameters([n_conv_l, n_conv_0, n_conv_1, n_conv_2]) 98 | cs.add_hyperparameters([n_fc_l, n_fc_0, n_fc_1, n_fc_2]) 99 | cs.add_hyperparameters([ks, lr, bn, bs, ga]) 100 | cs.add_conditions([cond1, cond2, cond3, cond4, cond5]) 101 | return cs 102 | 103 | def as_ax_space(self): 104 | from ax import ParameterType, RangeParameter, FixedParameter, ChoiceParameter, SearchSpace 105 | 106 | # Convolution 107 | n_conv_l = RangeParameter('n_conv_l', ParameterType.INT, 1, 3) 108 | n_conv_0 = RangeParameter('n_conv_0', ParameterType.INT, 16, 1024, True) 109 | n_conv_1 = RangeParameter('n_conv_1', ParameterType.INT, 16, 1024, True) 110 | n_conv_2 = RangeParameter('n_conv_2', ParameterType.INT, 16, 1024, True) 111 | 112 | # Dense 113 | n_fc_l = RangeParameter('n_fc_l', ParameterType.INT, 1, 3) 114 | n_fc_0 = RangeParameter('n_fc_0', ParameterType.INT, 2, 512, True) 115 | n_fc_1 = RangeParameter('n_fc_1', ParameterType.INT, 2, 512, True) 116 | n_fc_2 = RangeParameter('n_fc_2', ParameterType.INT, 2, 512, True) 117 | 118 | # Kernel Size 119 | ks = ChoiceParameter('kernel_size', ParameterType.INT, values=[3, 5, 7]) 120 | 121 | # Learning Rate 122 | lr = RangeParameter('lr_init', ParameterType.FLOAT, 0.00001, 1.0, True) 123 | 124 | # Use Batch Normalization 125 | bn = ChoiceParameter('batch_norm', ParameterType.BOOL, values=[True, False]) 126 | 127 | # Batch size 128 | bs = RangeParameter('batch_size', ParameterType.INT, 1, 512, True) 129 | 130 | # Global Avg Pooling 131 | ga = ChoiceParameter('global_avg_pooling', ParameterType.BOOL, values=[True, False]) 132 | 133 | b = FixedParameter('budget', ParameterType.INT, 25) 134 | 135 | i = FixedParameter('id', ParameterType.STRING, 'dummy') 136 | 137 | return SearchSpace( 138 | parameters=[n_conv_l, n_conv_0, n_conv_1, n_conv_2, n_fc_l, n_fc_0, n_fc_1, n_fc_2, ks, lr, bn, bs, ga, b, i], 139 | ) 140 | 141 | 142 | def sample_hyperparameter(self, hp): 143 | if not self.is_mutable_hyperparameter(hp): 144 | raise Exception("Hyperparameter {} is not mutable and must be fixed".format(hp)) 145 | return self.get_hyperparameter(hp).sample(self.random) 146 | 147 | def is_mutable_hyperparameter(self, hp): 148 | return hp not in self.not_mutables -------------------------------------------------------------------------------- /baselines/problems/fashion/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class Accuracy: 4 | 5 | def __init__(self): 6 | self.reset() 7 | 8 | self.sum = 0 9 | self.cnt = 0 10 | 11 | def reset(self): 12 | self.sum = 0 13 | self.cnt = 0 14 | 15 | def __call__(self, y_true, y_pred): 16 | self.sum += torch.sum(y_true == y_pred).to('cpu').numpy() 17 | self.cnt += y_true.size(0) 18 | 19 | return self.sum / self.cnt 20 | 21 | class AccuracyTop1: 22 | 23 | def __init__(self): 24 | self.reset() 25 | 26 | self.sum = 0 27 | self.cnt = 0 28 | 29 | def reset(self): 30 | self.sum = 0 31 | self.cnt = 0 32 | 33 | def __call__(self, y_true, y_pred): 34 | 35 | self.sum += y_pred.topk(1)[1].eq(y_true.argmax(-1).reshape(-1, 1).expand(-1, 1)).float().sum().to('cpu').numpy() 36 | self.cnt += y_pred.size(0) 37 | 38 | return self.sum / self.cnt 39 | 40 | class AccuracyTop3: 41 | 42 | def __init__(self): 43 | self.reset() 44 | 45 | self.sum = 0 46 | self.cnt = 0 47 | 48 | def reset(self): 49 | self.sum = 0 50 | self.cnt = 0 51 | 52 | def __call__(self, y_true, y_pred): 53 | 54 | self.sum += y_pred.topk(3)[1].eq(y_true.argmax(-1).reshape(-1, 1).expand(-1, 3)).float().sum().to('cpu').numpy() 55 | self.cnt += y_pred.size(0) 56 | 57 | return self.sum / self.cnt 58 | -------------------------------------------------------------------------------- /baselines/problems/flowers/__init__.py: -------------------------------------------------------------------------------- 1 | from .flowernet import evaluate_network 2 | from .flowernet import extract_num_parameters as discrete_flowers 3 | from .search_space import CustomSearchSpace as FlowersSearchSpace 4 | from .problem import get_flowers -------------------------------------------------------------------------------- /baselines/problems/flowers/data/x_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/x_test.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/data/x_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/x_train.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/data/x_val.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/x_val.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/data/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/y_test.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/data/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/y_train.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/data/y_val.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/multi-obj-baselines/e02f4118a87384deb3c2ba77c6b58c39c0389554/baselines/problems/flowers/data/y_val.npy -------------------------------------------------------------------------------- /baselines/problems/flowers/problem.py: -------------------------------------------------------------------------------- 1 | from ax import Metric 2 | from ax import MultiObjective 3 | from ax import ObjectiveThreshold 4 | from ax import MultiObjectiveOptimizationConfig 5 | 6 | from baselines import MultiObjectiveSimpleExperiment 7 | from .flowernet import evaluate_network 8 | from .search_space import CustomSearchSpace 9 | 10 | def get_flowers(name=None): 11 | 12 | val_acc_1 = Metric('val_acc_1', True) 13 | val_acc_3 = Metric('val_acc_3', True) 14 | tst_acc_1 = Metric('tst_acc_1', True) 15 | tst_acc_3 = Metric('tst_acc_3', True) 16 | num_params = Metric('num_params', True) 17 | 18 | objective = MultiObjective([val_acc_1, num_params]) 19 | thresholds = [ 20 | ObjectiveThreshold(val_acc_1, 0.0), 21 | ObjectiveThreshold(num_params, 8.0) 22 | ] 23 | optimization_config = MultiObjectiveOptimizationConfig( 24 | objective=objective, 25 | objective_thresholds=thresholds 26 | ) 27 | 28 | return MultiObjectiveSimpleExperiment( 29 | name=name, 30 | search_space=CustomSearchSpace().as_ax_space(), 31 | eval_function=evaluate_network, 32 | optimization_config=optimization_config, 33 | extra_metrics=[val_acc_3, tst_acc_1, tst_acc_3] 34 | ) 35 | -------------------------------------------------------------------------------- /baselines/problems/flowers/search_space.py: -------------------------------------------------------------------------------- 1 | """A common search space for all the experiments 2 | """ 3 | 4 | import ConfigSpace as CS 5 | from ConfigSpace.hyperparameters import UniformIntegerHyperparameter 6 | from ConfigSpace.hyperparameters import UniformFloatHyperparameter 7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter 8 | 9 | 10 | class CustomSearchSpace(CS.ConfigurationSpace): 11 | 12 | def __init__(self): 13 | super(CustomSearchSpace, self).__init__() 14 | 15 | # Convolution 16 | n_conv_l = UniformIntegerHyperparameter("n_conv_l", 1, 3, default_value=3) 17 | n_conv_0 = UniformIntegerHyperparameter("n_conv_0", 16, 1024, default_value=128, log=True) 18 | n_conv_1 = UniformIntegerHyperparameter("n_conv_1", 16, 1024, default_value=128, log=True) 19 | n_conv_2 = UniformIntegerHyperparameter("n_conv_2", 16, 1024, default_value=128, log=True) 20 | 21 | # Dense 22 | n_fc_l = UniformIntegerHyperparameter("n_fc_l", 1, 3, default_value=3) 23 | n_fc_0 = UniformIntegerHyperparameter("n_fc_0", 2, 512, default_value=32, log=True) 24 | n_fc_1 = UniformIntegerHyperparameter("n_fc_1", 2, 512, default_value=32, log=True) 25 | n_fc_2 = UniformIntegerHyperparameter("n_fc_2", 2, 512, default_value=32, log=True) 26 | 27 | # Kernel Size 28 | ks = CategoricalHyperparameter("kernel_size", choices=[7, 5, 3], default_value=5) 29 | 30 | # Learning Rate 31 | lr = UniformFloatHyperparameter('lr_init', 0.00001, 1.0, default_value=0.001, log=True) 32 | 33 | # Use Batch Normalization 34 | bn = CategoricalHyperparameter("batch_norm", choices=[False, True], default_value=False) 35 | 36 | # Batch size 37 | bs = UniformIntegerHyperparameter('batch_size', 1, 512, default_value=128, log=True) 38 | 39 | # Global Avg Pooling 40 | ga = CategoricalHyperparameter("global_avg_pooling", choices=[False, True], default_value=True) 41 | 42 | # Conditions 43 | 44 | cond1 = CS.conditions.InCondition(n_conv_2, n_conv_l, [3]) 45 | cond2 = CS.conditions.InCondition(n_conv_1, n_conv_l, [2, 3]) 46 | 47 | cond3 = CS.conditions.InCondition(n_fc_2, n_fc_l, [3]) 48 | cond4 = CS.conditions.InCondition(n_fc_1, n_fc_l, [2, 3]) 49 | cond5 = CS.conditions.InCondition(n_fc_0, n_fc_l, [1, 2, 3]) 50 | 51 | self.not_mutables = ['n_conv_l', 'n_fc_l'] 52 | 53 | self.add_hyperparameters([n_conv_l, n_conv_0, n_conv_1, n_conv_2]) 54 | self.add_hyperparameters([n_fc_l, n_fc_0, n_fc_1, n_fc_2]) 55 | self.add_hyperparameters([ks, lr, bn, bs, ga]) 56 | self.add_conditions([cond1, cond2, cond3, cond4, cond5]) 57 | 58 | def as_uniform_space(self): 59 | 60 | # Convolution 61 | n_conv_l = self.get_hyperparameter('n_conv_l') 62 | n_conv_0 = self.get_hyperparameter('n_conv_0') 63 | n_conv_1 = self.get_hyperparameter('n_conv_1') 64 | n_conv_2 = self.get_hyperparameter('n_conv_2') 65 | 66 | # Dense 67 | n_fc_l = self.get_hyperparameter('n_fc_l') 68 | n_fc_0 = self.get_hyperparameter('n_fc_0') 69 | n_fc_1 = self.get_hyperparameter('n_fc_1') 70 | n_fc_2 = self.get_hyperparameter('n_fc_2') 71 | 72 | # Kernel Size 73 | ks = UniformIntegerHyperparameter('kernel_size', 0, 2, default_value=1) 74 | 75 | # Learning Rate 76 | lr = self.get_hyperparameter('lr_init') 77 | 78 | # Use Batch Normalization 79 | bn = UniformIntegerHyperparameter("batch_norm", 0, 1, default_value=1) 80 | 81 | # Batch size 82 | bs = self.get_hyperparameter('batch_size') 83 | 84 | # Global Avg Pooling 85 | ga = UniformIntegerHyperparameter('global_avg_pooling', 0, 1, default_value=1) 86 | 87 | # Conditions 88 | cond1 = CS.conditions.InCondition(n_conv_2, n_conv_l, [3]) 89 | cond2 = CS.conditions.InCondition(n_conv_1, n_conv_l, [2, 3]) 90 | 91 | cond3 = CS.conditions.InCondition(n_fc_2, n_fc_l, [3]) 92 | cond4 = CS.conditions.InCondition(n_fc_1, n_fc_l, [2, 3]) 93 | cond5 = CS.conditions.InCondition(n_fc_0, n_fc_l, [1, 2, 3]) 94 | 95 | cs = CS.ConfigurationSpace() 96 | 97 | cs.add_hyperparameters([n_conv_l, n_conv_0, n_conv_1, n_conv_2]) 98 | cs.add_hyperparameters([n_fc_l, n_fc_0, n_fc_1, n_fc_2]) 99 | cs.add_hyperparameters([ks, lr, bn, bs, ga]) 100 | cs.add_conditions([cond1, cond2, cond3, cond4, cond5]) 101 | return cs 102 | 103 | def as_ax_space(self): 104 | from ax import ParameterType, RangeParameter, FixedParameter, ChoiceParameter, SearchSpace 105 | 106 | # Convolution 107 | n_conv_l = RangeParameter('n_conv_l', ParameterType.INT, 1, 3) 108 | n_conv_0 = RangeParameter('n_conv_0', ParameterType.INT, 16, 1024, True) 109 | n_conv_1 = RangeParameter('n_conv_1', ParameterType.INT, 16, 1024, True) 110 | n_conv_2 = RangeParameter('n_conv_2', ParameterType.INT, 16, 1024, True) 111 | 112 | # Dense 113 | n_fc_l = RangeParameter('n_fc_l', ParameterType.INT, 1, 3) 114 | n_fc_0 = RangeParameter('n_fc_0', ParameterType.INT, 2, 512, True) 115 | n_fc_1 = RangeParameter('n_fc_1', ParameterType.INT, 2, 512, True) 116 | n_fc_2 = RangeParameter('n_fc_2', ParameterType.INT, 2, 512, True) 117 | 118 | # Kernel Size 119 | ks = ChoiceParameter('kernel_size', ParameterType.INT, values=[3, 5, 7]) 120 | 121 | # Learning Rate 122 | lr = RangeParameter('lr_init', ParameterType.FLOAT, 0.00001, 1.0, True) 123 | 124 | # Use Batch Normalization 125 | bn = ChoiceParameter('batch_norm', ParameterType.BOOL, values=[True, False]) 126 | 127 | # Batch size 128 | bs = RangeParameter('batch_size', ParameterType.INT, 1, 512, True) 129 | 130 | # Global Avg Pooling 131 | ga = ChoiceParameter('global_avg_pooling', ParameterType.BOOL, values=[True, False]) 132 | 133 | b = FixedParameter('budget', ParameterType.INT, 25) 134 | 135 | i = FixedParameter('id', ParameterType.STRING, 'dummy') 136 | 137 | return SearchSpace( 138 | parameters=[n_conv_l, n_conv_0, n_conv_1, n_conv_2, n_fc_l, n_fc_0, n_fc_1, n_fc_2, ks, lr, bn, bs, ga, b, i], 139 | ) 140 | 141 | 142 | def sample_hyperparameter(self, hp): 143 | if not self.is_mutable_hyperparameter(hp): 144 | raise Exception("Hyperparameter {} is not mutable and must be fixed".format(hp)) 145 | return self.get_hyperparameter(hp).sample(self.random) 146 | 147 | def is_mutable_hyperparameter(self, hp): 148 | return hp not in self.not_mutables -------------------------------------------------------------------------------- /baselines/problems/flowers/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class Accuracy: 4 | 5 | def __init__(self): 6 | self.reset() 7 | 8 | self.sum = 0 9 | self.cnt = 0 10 | 11 | def reset(self): 12 | self.sum = 0 13 | self.cnt = 0 14 | 15 | def __call__(self, y_true, y_pred): 16 | self.sum += torch.sum(y_true == y_pred).to('cpu').numpy() 17 | self.cnt += y_true.size(0) 18 | 19 | return self.sum / self.cnt 20 | 21 | class AccuracyTop1: 22 | 23 | def __init__(self): 24 | self.reset() 25 | 26 | self.sum = 0 27 | self.cnt = 0 28 | 29 | def reset(self): 30 | self.sum = 0 31 | self.cnt = 0 32 | 33 | def __call__(self, y_true, y_pred): 34 | 35 | self.sum += y_pred.topk(1)[1].eq(y_true.argmax(-1).reshape(-1, 1).expand(-1, 1)).float().sum().to('cpu').numpy() 36 | self.cnt += y_pred.size(0) 37 | 38 | return self.sum / self.cnt 39 | 40 | class AccuracyTop3: 41 | 42 | def __init__(self): 43 | self.reset() 44 | 45 | self.sum = 0 46 | self.cnt = 0 47 | 48 | def reset(self): 49 | self.sum = 0 50 | self.cnt = 0 51 | 52 | def __call__(self, y_true, y_pred): 53 | 54 | self.sum += y_pred.topk(3)[1].eq(y_true.argmax(-1).reshape(-1, 1).expand(-1, 3)).float().sum().to('cpu').numpy() 55 | self.cnt += y_pred.size(0) 56 | 57 | return self.sum / self.cnt 58 | -------------------------------------------------------------------------------- /baselines/problems/simple_problems.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from botorch.test_functions.multi_objective import BraninCurrin 5 | 6 | from ax import Metric 7 | from ax.core.search_space import SearchSpace 8 | from ax.core.objective import MultiObjective 9 | from ax.core.parameter import ParameterType, RangeParameter 10 | from ax.core.outcome_constraint import ObjectiveThreshold 11 | from ax.core.optimization_config import MultiObjectiveOptimizationConfig 12 | 13 | from baselines import MultiObjectiveSimpleExperiment 14 | 15 | def get_branin_currin(name=None): 16 | 17 | metric_a = Metric('a', False) 18 | metric_b = Metric('b', False) 19 | 20 | objective = MultiObjective([metric_a, metric_b]) 21 | thresholds = [ 22 | ObjectiveThreshold(metric_a, 0.0), 23 | ObjectiveThreshold(metric_b, 8.0) 24 | ] 25 | optimization_config = MultiObjectiveOptimizationConfig( 26 | objective=objective, 27 | objective_thresholds=thresholds 28 | ) 29 | 30 | x1 = RangeParameter( 31 | name="x1", lower=0, upper=1, parameter_type=ParameterType.FLOAT 32 | ) 33 | x2 = RangeParameter( 34 | name="x2", lower=0, upper=1, parameter_type=ParameterType.FLOAT 35 | ) 36 | 37 | search_space = SearchSpace( 38 | parameters=[x1, x2], 39 | ) 40 | 41 | branin_currin = BraninCurrinEvalFunction() 42 | 43 | return MultiObjectiveSimpleExperiment( 44 | name=name, 45 | search_space=search_space, 46 | eval_function=branin_currin, 47 | optimization_config=optimization_config, 48 | ) 49 | 50 | class BraninCurrinEvalFunction: 51 | def __init__(self): 52 | self.branin_currin = BraninCurrin(negate=True).to( 53 | dtype=torch.double, 54 | device= torch.device("cuda" if torch.cuda.is_available() else "cpu"), 55 | ) 56 | 57 | def __call__(self, x): 58 | x = torch.tensor([x['x1'], x['x2']]) 59 | return { 60 | 'a': (float(self.branin_currin(x)[0]), 0.0), 61 | 'b': (float(self.branin_currin(x)[1]), 0.0), 62 | } 63 | 64 | def discrete_call(self, x): 65 | return self(x)['a'][0] 66 | -------------------------------------------------------------------------------- /examples/bulkandcut.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import pathlib 4 | import numpy as np 5 | import torch 6 | 7 | import baselines.methods.bulkandcut as bnc 8 | from baselines.problems import get_flowers 9 | from baselines.problems import flowers 10 | from baselines.problems import get_fashion 11 | from baselines.problems import fashion 12 | from baselines import save_experiment 13 | 14 | 15 | def get_datasets(path): 16 | x_train = torch.tensor(np.load(path('x_train.npy'))).float() 17 | x_train = x_train.permute(0, 3, 1, 2) 18 | y_train = torch.tensor(np.load(path('y_train.npy'))).long() 19 | 20 | ds_train = torch.utils.data.TensorDataset(x_train, y_train) 21 | 22 | x_val = torch.tensor(np.load(path('x_val.npy'))).float() 23 | x_val = x_val.permute(0, 3, 1, 2) 24 | y_val = torch.tensor(np.load(path('y_val.npy'))).long() 25 | 26 | ds_val = torch.utils.data.TensorDataset(x_val, y_val) 27 | 28 | 29 | x_test = torch.tensor(np.load(path('x_test.npy'))).float() 30 | x_test = x_test.permute(0, 3, 1, 2) 31 | y_test = torch.tensor(np.load(path('y_test.npy'))).long() 32 | 33 | ds_test = torch.utils.data.TensorDataset(x_test, y_test) 34 | 35 | return ds_train, ds_val, ds_test 36 | 37 | 38 | if __name__ == '__main__': 39 | 40 | # Parameters Flowers 41 | input_shape = (3, 16, 16) 42 | num_classes = 17 43 | budget = 24 * 3600 44 | path = lambda x: str( 45 | pathlib.Path(flowers.__file__). 46 | parent.absolute().joinpath('data').joinpath(x) 47 | ) 48 | experiment = get_flowers('BNC') 49 | 50 | # Parameters Fashion 51 | # input_shape = (1, 28, 28) 52 | # num_classes = 10 53 | # budget = 24 * 3600 54 | # path = lambda x: str( 55 | # pathlib.Path(fashion.__file__). 56 | # parent.absolute().joinpath('data').joinpath(x) 57 | # ) 58 | # experiment = get_fashion('BNC') 59 | 60 | 61 | ################ 62 | #### MOBOHB #### 63 | ################ 64 | # Run a full optimization: 65 | ds_train, ds_val, ds_test = get_datasets(path) 66 | work_dir = os.path.join('bulkandcutoutput', f"{str(uuid.uuid4())}") 67 | evolution = bnc.Evolution( 68 | experiment, 69 | input_shape=input_shape, 70 | n_classes=num_classes, 71 | work_directory=work_dir, 72 | train_dataset=ds_train, 73 | valid_dataset=ds_val, 74 | test_dataset=ds_test, 75 | debugging=False, 76 | ) 77 | evolution.run(time_budget=budget) 78 | 79 | save_experiment(experiment, f'{experiment.name}.pickle') -------------------------------------------------------------------------------- /examples/mobohb.py: -------------------------------------------------------------------------------- 1 | from baselines.methods.mobohb.run_mobohb import get_MOBOHB 2 | from baselines.problems import get_flowers 3 | from baselines.problems.flowers import FlowersSearchSpace 4 | from baselines.problems import get_fashion 5 | from baselines.problems.fashion import FashionSearchSpace 6 | from baselines import save_experiment 7 | 8 | 9 | if __name__ == '__main__': 10 | 11 | # Parameters Flowers 12 | N_init = 50 13 | num_candidates = 24 14 | gamma = 0.10 15 | min_budget = 5 16 | max_budget = 25 17 | max_function_evals = 2000 18 | search_space = FlowersSearchSpace() 19 | experiment = get_flowers('MOBOHB') 20 | 21 | # Parameters Fashion 22 | # N_init = 10 23 | # num_candidates = 24 24 | # gamma = 0.10 25 | # min_budget = 5 26 | # max_budget = 25 27 | # max_function_evals = 150 28 | # search_space = FlowersSearchSpace() 29 | # experiment = get_flowers('MOBOHB') 30 | 31 | 32 | ################ 33 | #### MOBOHB #### 34 | ################ 35 | get_MOBOHB( 36 | experiment, 37 | search_space, 38 | num_initial_samples=N_init, 39 | num_candidates=num_candidates, 40 | gamma=gamma, 41 | num_iterations=max_function_evals, 42 | min_budget=min_budget, 43 | max_budget=max_budget, 44 | ) 45 | save_experiment(experiment, f'{experiment.name}.pickle') 46 | -------------------------------------------------------------------------------- /examples/moshbananas.py: -------------------------------------------------------------------------------- 1 | from baselines.problems import get_flowers 2 | from baselines.problems.flowers import FlowersSearchSpace 3 | from baselines.problems import get_fashion 4 | from baselines.problems.fashion import FashionSearchSpace 5 | from baselines import save_experiment 6 | from baselines.methods.mobananas import get_MOSHBANANAS 7 | 8 | 9 | if __name__ == '__main__': 10 | 11 | # Parameters Flowers 12 | N_init = 10 13 | min_budget = 5 14 | max_budget = 25 15 | max_function_evals = 10000 16 | num_arch=20 17 | select_models=10 18 | eta=3 19 | search_space = FlowersSearchSpace() 20 | experiment = get_flowers('MOSHBANANAS') 21 | 22 | # Parameters Fashion 23 | # N_init = 10 24 | # min_budget = 5 25 | # max_budget = 25 26 | # max_function_evals = 400 27 | # num_arch=20 28 | # select_models=10 29 | # eta=3 30 | # search_space = FashionSearchSpace() 31 | # experiment = get_fashion('MOSHBANANAS') 32 | 33 | 34 | ##################### 35 | #### MOSHBANANAS #### 36 | ##################### 37 | get_MOSHBANANAS( 38 | experiment, 39 | search_space, 40 | initial_samples=N_init, 41 | select_models=select_models, 42 | num_arch=num_arch, 43 | min_budget=min_budget, 44 | max_budget=max_budget, 45 | function_evaluations=max_function_evals, 46 | eta=eta 47 | ) 48 | save_experiment(experiment, f'{experiment.name}.pickle') 49 | -------------------------------------------------------------------------------- /examples/msehvi.py: -------------------------------------------------------------------------------- 1 | from baselines.problems.flowers import discrete_flowers 2 | from baselines.problems import get_flowers 3 | from baselines.problems.fashion import discrete_fashion 4 | from baselines.problems import get_fashion 5 | from baselines.problems import get_branin_currin, BraninCurrinEvalFunction 6 | from baselines import save_experiment 7 | from baselines.methods.msehvi.msehvi import MSEHVI 8 | from ax import Models 9 | 10 | 11 | if __name__ == '__main__': 12 | 13 | # Parameters Flowers 14 | N_init = 50 # Number of initial random samples 15 | N = 20000 # Number of MS-EHVI samples (it is not important) 16 | discrete_f = discrete_flowers # Discrete function 17 | discrete_m = 'num_params' # Name of the discrete metric 18 | experiment = get_flowers('MSEHVI') # Function to get the problem 19 | 20 | # Parameters Fashion 21 | # N_init = 10 # Number of initial random samples 22 | # N = 20000 # Number of MS-EHVI samples (it is not important) 23 | # discrete_f = discrete_fashion # Discrete function 24 | # discrete_m = 'num_params' # Name of the discrete metric 25 | # experiment = get_fashion('MSEHVI') # Function to get the problem 26 | 27 | # Parameters Branin Crunin 28 | # N_init = 10 29 | # N = 100 30 | # discrete_f = BraninCurrinEvalFunction().discrete_call 31 | # discrete_m = 'a' 32 | # experiment = get_branin_currin('MSEHVI') 33 | 34 | ################# 35 | #### MS-EHVI #### 36 | ################# 37 | 38 | # Random search initialization 39 | for _ in range(N_init): 40 | experiment.new_trial(Models.SOBOL(experiment.search_space).gen(1)) 41 | experiment.fetch_data() 42 | 43 | # Proper guided search 44 | msehvi = MSEHVI(experiment, discrete_m, discrete_f) 45 | for _ in range(N): 46 | msehvi.step() 47 | 48 | save_experiment(experiment, f'{experiment.name}.pickle') 49 | -------------------------------------------------------------------------------- /examples/random_search.py: -------------------------------------------------------------------------------- 1 | from ax import Models 2 | 3 | from baselines.problems import get_flowers 4 | from baselines.problems import get_branin_currin 5 | from baselines.problems import get_fashion 6 | from baselines import save_experiment 7 | 8 | if __name__ == '__main__': 9 | 10 | # Parameters Flowers 11 | N = 20000 # Number of samples (it is not important) 12 | experiment = get_flowers('RandomSearch') # Function to get the problem 13 | 14 | # Parameters Fashion 15 | # N = 20000 # Number of samples (it is not important) 16 | # experiment = get_fashion('RandomSearch') # Function to get the problem 17 | 18 | ####################### 19 | #### Random Search #### 20 | ####################### 21 | for _ in range(N): 22 | experiment.new_trial(Models.SOBOL(experiment.search_space).gen(1)) 23 | experiment.fetch_data() 24 | 25 | print(experiment.fetch_data().df) 26 | save_experiment(experiment, f'{experiment.name}.pickle') 27 | -------------------------------------------------------------------------------- /examples/shemoa.py: -------------------------------------------------------------------------------- 1 | from baselines.problems import get_flowers 2 | from baselines.problems.flowers import FlowersSearchSpace 3 | from baselines.problems import get_fashion 4 | from baselines.problems.fashion import FashionSearchSpace 5 | from baselines import save_experiment 6 | from baselines.methods.shemoa import SHEMOA 7 | from baselines.methods.shemoa import Mutation, Recombination, ParentSelection 8 | 9 | 10 | if __name__ == '__main__': 11 | 12 | # Parameters Flowers 13 | N_init = 100 14 | min_budget = 5 15 | max_budget = 25 16 | max_function_evals = 15000 17 | mutation_type = Mutation.UNIFORM 18 | recombination_type = Recombination.UNIFORM 19 | selection_type = ParentSelection.TOURNAMENT 20 | search_space = FlowersSearchSpace() 21 | experiment = get_flowers('SHEMOA') 22 | 23 | # Parameters Fashion 24 | # N_init = 10 25 | # min_budget = 5 26 | # max_budget = 25 27 | # max_function_evals = 150 28 | # mutation_type = Mutation.UNIFORM 29 | # recombination_type = Recombination.UNIFORM 30 | # selection_type = ParentSelection.TOURNAMENT 31 | # search_space = FashionSearchSpace() 32 | # experiment = get_fashion('SHEMOA') 33 | 34 | ################# 35 | #### SH-EMOA #### 36 | ################# 37 | ea = SHEMOA( 38 | search_space, 39 | experiment, 40 | N_init, min_budget, max_budget, 41 | mutation_type=mutation_type, 42 | recombination_type=recombination_type, 43 | selection_type=selection_type, 44 | total_number_of_function_evaluations=max_function_evals 45 | ) 46 | ea.optimize() 47 | save_experiment(experiment, f'{experiment.name}.pickle') 48 | --------------------------------------------------------------------------------