element.
168 | # For black navbar, do "navbar navbar-inverse"
169 | 'navbar_class': "navbar",
170 |
171 | # Fix navigation bar to top of page?
172 | # Values: "true" (default) or "false"
173 | 'navbar_fixed_top': "true",
174 |
175 | # Location of link to source.
176 | # Options are "nav" (default), "footer" or anything else to exclude.
177 | 'source_link_position': "footer",
178 |
179 | # Bootswatch (http://bootswatch.com/) theme.
180 | #
181 | # Options are nothing with "" (default) or the name of a valid theme
182 | # such as "amelia" or "cosmo".
183 | 'bootswatch_theme': "cosmo",
184 |
185 | # Choose Bootstrap version.
186 | # Values: "3" (default) or "2" (in quotes)
187 | 'bootstrap_version': "3",
188 | }
189 |
190 | # Add any paths that contain custom themes here, relative to this directory.
191 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
192 |
193 | # The name for this set of Sphinx documents. If None, it defaults to
194 | # "
v documentation".
195 | #html_title = None
196 |
197 | # A shorter title for the navigation bar. Default is the same as html_title.
198 | #html_short_title = None
199 |
200 | # The name of an image file (relative to this directory) to place at the top
201 | # of the sidebar.
202 | #html_logo = None
203 |
204 | # The name of an image file (within the static path) to use as favicon of the
205 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
206 | # pixels large.
207 | #html_favicon = None
208 |
209 | # Add any paths that contain custom static files (such as style sheets) here,
210 | # relative to this directory. They are copied after the builtin static files,
211 | # so a file named "default.css" will overwrite the builtin "default.css".
212 | html_static_path = ['_static']
213 |
214 | # Add any extra paths that contain custom files (such as robots.txt or
215 | # .htaccess) here, relative to this directory. These files are copied
216 | # directly to the root of the documentation.
217 | #html_extra_path = []
218 |
219 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
220 | # using the given strftime format.
221 | #html_last_updated_fmt = '%b %d, %Y'
222 |
223 | # If true, SmartyPants will be used to convert quotes and dashes to
224 | # typographically correct entities.
225 | #html_use_smartypants = True
226 |
227 | # Custom sidebar templates, maps document names to template names.
228 | html_sidebars = {'**': ['localtoc.html']}
229 |
230 | # Additional templates that should be rendered to pages, maps page names to
231 | # template names.
232 | #html_additional_pages = {}
233 |
234 | # If false, no module index is generated.
235 | #html_domain_indices = True
236 |
237 | # If false, no index is generated.
238 | #html_use_index = True
239 |
240 | # If true, the index is split into individual pages for each letter.
241 | #html_split_index = False
242 |
243 | # If true, links to the reST sources are added to the pages.
244 | #html_show_sourcelink = True
245 |
246 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
247 | #html_show_sphinx = True
248 |
249 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
250 | #html_show_copyright = True
251 |
252 | # If true, an OpenSearch description file will be output, and all pages will
253 | # contain a tag referring to it. The value of this option must be the
254 | # base URL from which the finished HTML is served.
255 | #html_use_opensearch = ''
256 |
257 | # This is the file name suffix for HTML files (e.g. ".xhtml").
258 | #html_file_suffix = None
259 |
260 | # Language to be used for generating the HTML full-text search index.
261 | # Sphinx supports the following languages:
262 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
263 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
264 | #html_search_language = 'en'
265 |
266 | # A dictionary with options for the search language support, empty by default.
267 | # Now only 'ja' uses this config value
268 | #html_search_options = {'type': 'default'}
269 |
270 | # The name of a javascript file (relative to the configuration directory) that
271 | # implements a search results scorer. If empty, the default will be used.
272 | #html_search_scorer = 'scorer.js'
273 |
274 | # Output file base name for HTML help builder.
275 | htmlhelp_basename = 'AutoFoliodoc'
276 |
277 | # -- Options for LaTeX output ---------------------------------------------
278 |
279 | latex_elements = {
280 | # The paper size ('letterpaper' or 'a4paper').
281 | #'papersize': 'letterpaper',
282 |
283 | # The font size ('10pt', '11pt' or '12pt').
284 | #'pointsize': '10pt',
285 |
286 | # Additional stuff for the LaTeX preamble.
287 | #'preamble': '',
288 |
289 | # Latex figure (float) alignment
290 | #'figure_align': 'htbp',
291 | }
292 |
293 | # Grouping the document tree into LaTeX files. List of tuples
294 | # (source start file, target name, title,
295 | # author, documentclass [howto, manual, or own class]).
296 | latex_documents = [
297 | (master_doc, 'AutoFolio.tex', u'AutoFolio Documentation', autofolio.AUTHORS, 'manual'),
298 | ]
299 |
300 | # The name of an image file (relative to this directory) to place at the top of
301 | # the title page.
302 | #latex_logo = None
303 |
304 | # For "manual" documents, if this is true, then toplevel headings are parts,
305 | # not chapters.
306 | #latex_use_parts = False
307 |
308 | # If true, show page references after internal links.
309 | #latex_show_pagerefs = False
310 |
311 | # If true, show URL addresses after external links.
312 | #latex_show_urls = False
313 |
314 | # Documents to append as an appendix to all manuals.
315 | #latex_appendices = []
316 |
317 | # If false, no module index is generated.
318 | #latex_domain_indices = True
319 |
320 |
321 | # -- Options for manual page output ---------------------------------------
322 |
323 | # One entry per manual page. List of tuples
324 | # (source start file, name, description, authors, manual section).
325 | man_pages = [
326 | (master_doc, 'autofolio', u'AutoFolio Documentation',
327 | [author], 1)
328 | ]
329 |
330 | # If true, show URL addresses after external links.
331 | #man_show_urls = False
332 |
333 |
334 | # -- Options for Texinfo output -------------------------------------------
335 |
336 | # Grouping the document tree into Texinfo files. List of tuples
337 | # (source start file, target name, title, author,
338 | # dir menu entry, description, category)
339 | texinfo_documents = [
340 | (master_doc, 'AutoFolio', u'AutoFolio Documentation',
341 | author, 'AutoFolio', 'One line description of project.',
342 | 'Miscellaneous'),
343 | ]
344 |
345 | # Documents to append as an appendix to all manuals.
346 | #texinfo_appendices = []
347 |
348 | # If false, no module index is generated.
349 | #texinfo_domain_indices = True
350 |
351 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
352 | #texinfo_show_urls = 'footnote'
353 |
354 | # If true, do not generate a @detailmenu in the "Top" node's menu.
355 | #texinfo_no_detailmenu = False
356 |
--------------------------------------------------------------------------------
/autofolio/autofolio.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import functools
3 | import traceback
4 | import random
5 | from itertools import tee
6 | import pickle
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import yaml
11 |
12 | from ConfigSpace.configuration_space import Configuration, \
13 | ConfigurationSpace
14 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
15 | UniformFloatHyperparameter, UniformIntegerHyperparameter
16 |
17 | # SMAC3
18 | from smac.tae.execute_func import ExecuteTAFuncDict
19 | from smac.scenario.scenario import Scenario
20 | from smac.stats.stats import Stats as AC_Stats
21 | from smac.facade.smac_hpo_facade import SMAC4HPO as SMAC
22 |
23 | from autofolio.io.cmd import CMDParser
24 | from aslib_scenario.aslib_scenario import ASlibScenario
25 |
26 | # feature preprocessing
27 | from autofolio.feature_preprocessing.pca import PCAWrapper
28 | from autofolio.feature_preprocessing.missing_values import ImputerWrapper
29 | from autofolio.feature_preprocessing.feature_group_filtering import FeatureGroupFiltering
30 | from autofolio.feature_preprocessing.standardscaler import StandardScalerWrapper
31 |
32 | # presolving
33 | from autofolio.pre_solving.aspeed_schedule import Aspeed
34 |
35 | # classifiers
36 | from autofolio.selector.classifiers.random_forest import RandomForest
37 | from autofolio.selector.classifiers.xgboost import XGBoost
38 |
39 | # regressors
40 | from autofolio.selector.regressors.random_forest import RandomForestRegressor
41 |
42 | # selectors
43 | from autofolio.selector.pairwise_classification import PairwiseClassifier
44 | from autofolio.selector.multi_classification import MultiClassifier
45 | from autofolio.selector.ind_regression import IndRegression
46 | from autofolio.selector.joint_regression import JointRegression
47 | from autofolio.selector.pairwise_regression import PairwiseRegression
48 |
49 | # validation
50 | from autofolio.validation.validate import Validator, Stats
51 |
52 | __author__ = "Marius Lindauer"
53 | __license__ = "BSD"
54 | __version__ = "2.2.0"
55 |
56 |
57 | class AutoFolio(object):
58 |
59 | def __init__(self, random_seed: int=12345):
60 | ''' Constructor
61 |
62 | Arguments
63 | ---------
64 | random_seed: int
65 | random seed for numpy and random packages
66 | '''
67 |
68 | np.random.seed(random_seed) # fix seed
69 | random.seed(random_seed)
70 |
71 | # I don't know the reason, but without an initial print with
72 | # logging.info we don't get any output
73 | logging.info("Init AutoFolio")
74 | self._root_logger = logging.getLogger()
75 | self.logger = logging.getLogger("AutoFolio")
76 | self.cs = None
77 |
78 | self.overwrite_args = None
79 |
80 | def run_cli(self):
81 | '''
82 | main method of AutoFolio based on command line interface
83 | '''
84 |
85 | cmd_parser = CMDParser()
86 | args_, self.overwrite_args = cmd_parser.parse()
87 |
88 | self._root_logger.setLevel(args_.verbose)
89 |
90 | if args_.load:
91 | pred = self.read_model_and_predict(
92 | model_fn=args_.load, feature_vec=list(map(float, args_.feature_vec.split(" "))))
93 | print("Selected Schedule [(algorithm, budget)]: %s" % (pred))
94 |
95 | else:
96 |
97 | scenario = ASlibScenario()
98 | if args_.scenario:
99 | scenario.read_scenario(args_.scenario)
100 | elif args_.performance_csv and args_.feature_csv:
101 | scenario.read_from_csv(perf_fn=args_.performance_csv,
102 | feat_fn=args_.feature_csv,
103 | objective=args_.objective,
104 | runtime_cutoff=args_.runtime_cutoff,
105 | maximize=args_.maximize,
106 | cv_fn=args_.cv_csv)
107 | else:
108 | raise ValueError("Missing inputs to read scenario data.")
109 |
110 | test_scenario = None
111 | if args_.performance_test_csv and args_.feature_test_csv:
112 | test_scenario = ASlibScenario()
113 | test_scenario.read_from_csv(perf_fn=args_.performance_test_csv,
114 | feat_fn=args_.feature_test_csv,
115 | objective=args_.objective,
116 | runtime_cutoff=args_.runtime_cutoff,
117 | maximize=args_.maximize,
118 | cv_fn=None)
119 |
120 | config = {}
121 | if args_.config is not None:
122 | self.logger.info("Reading yaml config file")
123 | config = yaml.load(open(args_.config))
124 | if not config.get("wallclock_limit"):
125 | config["wallclock_limit"] = args_.wallclock_limit
126 | if not config.get("runcount_limit"):
127 | config["runcount_limit"] = args_.runcount_limit
128 | if not config.get("output-dir"):
129 | config["output-dir"] = args_.output_dir
130 |
131 | self.cs = self.get_cs(scenario, config)
132 |
133 | if args_.outer_cv:
134 | self._outer_cv(scenario, config, args_.outer_cv_fold,
135 | args_.out_template, smac_seed=args_.smac_seed)
136 | return 0
137 |
138 | if args_.tune:
139 | config = self.get_tuned_config(scenario,
140 | wallclock_limit=args_.wallclock_limit,
141 | runcount_limit=args_.runcount_limit,
142 | autofolio_config=config,
143 | seed=args_.smac_seed)
144 | else:
145 | config = self.cs.get_default_configuration()
146 | self.logger.debug(config)
147 |
148 | if args_.save:
149 | feature_pre_pipeline, pre_solver, selector = self.fit(
150 | scenario=scenario, config=config)
151 | self._save_model(
152 | args_.save, scenario, feature_pre_pipeline, pre_solver, selector, config)
153 | else:
154 | self.run_cv(config=config, scenario=scenario, folds=int(scenario.cv_data.max().max()))
155 |
156 | if test_scenario is not None:
157 | stats = self.run_fold(config=config,
158 | fold=0,
159 | return_fit=False,
160 | scenario=scenario,
161 | test_scenario=test_scenario)
162 |
163 | def _outer_cv(self, scenario: ASlibScenario, autofolio_config:dict=None,
164 | outer_cv_fold:int=None, out_template:str=None,
165 | smac_seed:int=42):
166 | '''
167 | Evaluate on a scenario using an "outer" cross-fold validation
168 | scheme. In particular, this ensures that SMAC does not use the test
169 | set during hyperparameter optimization.
170 |
171 | Arguments
172 | ---------
173 | scenario: ASlibScenario
174 | ASlib Scenario at hand
175 |
176 | autofolio_config: dict, or None
177 | An optional dictionary of configuration options
178 |
179 | outer_cv_fold: int, or None
180 | If given, then only the single outer-cv fold is processed
181 |
182 | out_template: str, or None
183 | If given, the learned configurations are written to the
184 | specified locations. The string is considered a template, and
185 | "%fold%" will be replaced with the fold.
186 |
187 | smac_seed:int
188 | random seed for SMAC
189 |
190 | Returns
191 | -------
192 | stats: validate.Stats
193 | Performance over all outer-cv folds
194 |
195 | '''
196 | import string
197 |
198 | outer_stats = None
199 |
200 | # For each outer split
201 | outer_cv_folds = range(1, 11)
202 | if outer_cv_fold is not None:
203 | outer_cv_folds = range(outer_cv_fold, outer_cv_fold+1)
204 |
205 | for cv_fold in outer_cv_folds:
206 |
207 | # Use ‘ASlibScenario.get_split()’ to get the outer split
208 | outer_testing, outer_training = scenario.get_split(cv_fold)
209 |
210 | msg = ">>>>> Outer CV fold: {} <<<<<".format(cv_fold)
211 | self.logger.info(msg)
212 |
213 | # Use ASlibScenario.create_cv_splits() to get an inner-cv
214 | outer_training.create_cv_splits(n_folds=10)
215 |
216 | # Use ‘AutoFolio.get_tuned_config()’ to tune on inner-cv
217 | config = self.get_tuned_config(
218 | outer_training,
219 | autofolio_config=autofolio_config,
220 | seed=smac_seed
221 | )
222 |
223 | # Use `AutoFolio.run_fold()’ to get the performance on the outer split
224 | stats, fit, schedule = self.run_fold(
225 | config,
226 | scenario,
227 | cv_fold,
228 | return_fit=True
229 | )
230 |
231 | feature_pre_pipeline, pre_solver, selector = fit
232 |
233 | if outer_stats is None:
234 | outer_stats = stats
235 | else:
236 | outer_stats.merge(stats)
237 |
238 | # save the model, if given an output location
239 | if out_template is not None:
240 | out_template_ = string.Template(out_template)
241 | model_fn = out_template_.substitute(fold=cv_fold, type="pkl")
242 |
243 | msg = "Writing model to: {}".format(model_fn)
244 | self.logger.info(msg)
245 |
246 | self._save_model(
247 | model_fn,
248 | scenario,
249 | feature_pre_pipeline,
250 | pre_solver,
251 | selector,
252 | config
253 | )
254 |
255 | # convert the schedule to a data frame
256 | schedule_df = pd.Series(schedule, name="solver")
257 | schedule_df.index.name = "instance"
258 | schedule_df = schedule_df.reset_index()
259 |
260 | # just keep the solver name; we don't care about the time
261 |
262 | # x[0] gets the first pair in the schedule list
263 | # and x[0][0] gets the name of the solver from that pair
264 | schedule_df['solver'] = schedule_df['solver'].apply(lambda x: x[0][0])
265 |
266 | selections_fn = out_template_.substitute(fold=cv_fold, type="csv")
267 |
268 | msg = "Writing solver choices to: {}".format(selections_fn)
269 | self.logger.info(msg)
270 |
271 | schedule_df.to_csv(selections_fn, index=False)
272 |
273 | self.logger.info(">>>>> Final Stats <<<<<")
274 | outer_stats.show()
275 |
276 | def _save_model(self, out_fn: str, scenario: ASlibScenario, feature_pre_pipeline: list, pre_solver: Aspeed, selector, config: Configuration):
277 | '''
278 | save all pipeline objects for predictions
279 |
280 | Arguments
281 | ---------
282 | out_fn: str
283 | filename of output file
284 | scenario: AslibScenario
285 | ASlib scenario with all the data
286 | feature_pre_pipeline: list
287 | list of preprocessing objects
288 | pre_solver: Aspeed
289 | aspeed object with pre-solving schedule
290 | selector: autofolio.selector.*
291 | fitted selector object
292 | config: Configuration
293 | parameter setting configuration
294 | '''
295 | scenario.logger = None
296 | for fpp in feature_pre_pipeline:
297 | fpp.logger = None
298 | if pre_solver:
299 | pre_solver.logger = None
300 | selector.logger = None
301 | model = [scenario, feature_pre_pipeline, pre_solver, selector, config]
302 | with open(out_fn, "bw") as fp:
303 | pickle.dump(model, fp)
304 |
305 | def read_model_and_predict(self, model_fn: str, feature_vec: list):
306 | '''
307 | reads saved model from disk and predicts the selected algorithm schedule for a given feature vector
308 |
309 | Arguments
310 | --------
311 | model_fn: str
312 | file name of saved model
313 | feature_vec: list
314 | instance feature vector as a list of floats
315 |
316 | Returns
317 | -------
318 | list of tuple
319 | Selected schedule [(algorithm, budget)]
320 | '''
321 | with open(model_fn, "br") as fp:
322 | scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load(
323 | fp)
324 |
325 | for fpp in feature_pre_pipeline:
326 | fpp.logger = logging.getLogger("Feature Preprocessing")
327 | if pre_solver:
328 | pre_solver.logger = logging.getLogger("Aspeed PreSolving")
329 | selector.logger = logging.getLogger("Selector")
330 |
331 | # saved scenario is adapted to given feature vector
332 | feature_vec = np.array([feature_vec])
333 | scenario.feature_data = pd.DataFrame(
334 | feature_vec, index=["pseudo_instance"], columns=scenario.features)
335 | scenario.instances = ["pseudo_instance"]
336 | pred = self.predict(scenario=scenario, config=config,
337 | feature_pre_pipeline=feature_pre_pipeline, pre_solver=pre_solver, selector=selector)
338 |
339 | return pred["pseudo_instance"]
340 |
341 | def get_cs(self, scenario: ASlibScenario, autofolio_config:dict=None):
342 | '''
343 | returns the parameter configuration space of AutoFolio
344 | (based on the automl config space: https://github.com/automl/ConfigSpace)
345 |
346 | Arguments
347 | ---------
348 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
349 | aslib scenario at hand
350 |
351 | autofolio_config: dict, or None
352 | An optional dictionary of configuration options
353 | '''
354 |
355 | self.cs = ConfigurationSpace()
356 |
357 | # only allow the feature groups specified in the config file
358 | # by default, though, all of the feature groups are allowed.
359 | allowed_feature_groups = autofolio_config.get("allowed_feature_groups",
360 | scenario.feature_steps)
361 |
362 | if len(allowed_feature_groups) == 0:
363 | msg = "Please ensure at least one feature group is allowed"
364 | raise ValueError(msg)
365 |
366 |
367 | if len(allowed_feature_groups) == 1:
368 | choices = [True] # if we only have one feature group, it has to be active
369 | else:
370 | choices = [True, False]
371 | default = True
372 |
373 | for fs in allowed_feature_groups:
374 |
375 | fs_param = CategoricalHyperparameter(name="fgroup_%s" % (fs),
376 | choices=choices, default_value=default)
377 | self.cs.add_hyperparameter(fs_param)
378 |
379 | # preprocessing
380 | if autofolio_config.get("pca", True):
381 | PCAWrapper.add_params(self.cs)
382 |
383 | if autofolio_config.get("impute", True):
384 | ImputerWrapper.add_params(self.cs)
385 |
386 | if autofolio_config.get("scale", True):
387 | StandardScalerWrapper.add_params(self.cs)
388 |
389 | # Pre-Solving
390 | if scenario.performance_type[0] == "runtime":
391 | if autofolio_config.get("presolve", True):
392 | Aspeed.add_params(
393 | cs=self.cs, cutoff=scenario.algorithm_cutoff_time)
394 |
395 | if autofolio_config.get("classifier"):
396 | # fix parameter
397 | cls_choices = [autofolio_config["classifier"]]
398 | cls_def = autofolio_config["classifier"]
399 | else:
400 | cls_choices = ["RandomForest","XGBoost"]
401 | cls_def = "RandomForest"
402 | classifier = CategoricalHyperparameter(
403 | "classifier", choices=cls_choices,
404 | default_value=cls_def)
405 |
406 | self.cs.add_hyperparameter(classifier)
407 |
408 | RandomForest.add_params(self.cs)
409 | XGBoost.add_params(self.cs)
410 |
411 | if autofolio_config.get("regressor"):
412 | # fix parameter
413 | reg_choices = [autofolio_config["regressor"]]
414 | reg_def = autofolio_config["regressor"]
415 | else:
416 | reg_choices = ["RandomForestRegressor"]
417 | reg_def = "RandomForestRegressor"
418 |
419 | regressor = CategoricalHyperparameter(
420 | "regressor", choices=reg_choices, default_value=reg_def)
421 | self.cs.add_hyperparameter(regressor)
422 | RandomForestRegressor.add_params(self.cs)
423 |
424 | # selectors
425 | if autofolio_config.get("selector"):
426 | # fix parameter
427 | sel_choices = [autofolio_config["selector"]]
428 | sel_def = autofolio_config["selector"]
429 | else:
430 | sel_choices = ["PairwiseClassifier","PairwiseRegressor"]
431 | sel_def = "PairwiseClassifier"
432 |
433 | selector = CategoricalHyperparameter(
434 | "selector", choices=sel_choices, default_value=sel_def)
435 | self.cs.add_hyperparameter(selector)
436 | PairwiseClassifier.add_params(self.cs)
437 | PairwiseRegression.add_params(self.cs)
438 |
439 | self.logger.debug(self.cs)
440 |
441 | return self.cs
442 |
443 | def get_tuned_config(self, scenario: ASlibScenario,
444 | runcount_limit:int=42,
445 | wallclock_limit:int=300,
446 | autofolio_config:dict=dict(),
447 | seed:int=42):
448 | '''
449 | uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario
450 |
451 | Arguments
452 | ---------
453 | scenario: ASlibScenario
454 | ASlib Scenario at hand
455 | runcount_limit: int
456 | runcount_limit for SMAC scenario
457 | wallclock_limit: int
458 | wallclock limit in sec for SMAC scenario
459 | (overwritten by autofolio_config)
460 | autofolio_config: dict, or None
461 | An optional dictionary of configuration options
462 | seed: int
463 | random seed for SMAC
464 |
465 | Returns
466 | -------
467 | Configuration
468 | best incumbent configuration found by SMAC
469 | '''
470 |
471 | wallclock_limit = autofolio_config.get("wallclock_limit", wallclock_limit)
472 | runcount_limit = autofolio_config.get("runcount_limit", runcount_limit)
473 |
474 | taf = functools.partial(self.called_by_smac, scenario=scenario)
475 | max_fold = scenario.cv_data.max().max()
476 | max_fold = int(max_fold)
477 |
478 | ac_scenario = Scenario({"run_obj": "quality", # we optimize quality
479 | "runcount-limit": runcount_limit,
480 | "cs": self.cs, # configuration space
481 | "deterministic": "true",
482 | "instances": [[str(i)] for i in range(1, max_fold+1)],
483 | "wallclock-limit": wallclock_limit,
484 | "output-dir" : "" if not autofolio_config.get("output-dir",None) else autofolio_config.get("output-dir")
485 | })
486 |
487 | # necessary to use stats options related to scenario information
488 | AC_Stats.scenario = ac_scenario
489 |
490 | # Optimize
491 | self.logger.info(
492 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
493 | self.logger.info("Start Configuration")
494 | self.logger.info(
495 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
496 | smac = SMAC(scenario=ac_scenario, tae_runner=taf,
497 | rng=np.random.RandomState(seed))
498 | incumbent = smac.optimize()
499 |
500 | self.logger.info("Final Incumbent: %s" % (incumbent))
501 |
502 | return incumbent
503 |
504 | def called_by_smac(self, config: Configuration, scenario: ASlibScenario, instance:str=None, seed:int=1):
505 | '''
506 | run a cross fold validation based on the given data from cv.arff
507 |
508 | Arguments
509 | ---------
510 | config: Configuration
511 | parameter configuration to use for preprocessing
512 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
513 | aslib scenario at hand
514 | instance: str
515 | cv-fold index
516 | seed: int
517 | random seed (not used)
518 |
519 | Returns
520 | -------
521 | float: average performance
522 | '''
523 |
524 | if instance is None:
525 | perf = self.run_cv(config=config, scenario=scenario)
526 | else:
527 | try:
528 | stats = self.run_fold(config=config, scenario=scenario, fold=int(instance))
529 | perf = stats.show()
530 | except ValueError:
531 | if scenario.performance_type[0] == "runtime":
532 | perf = scenario.algorithm_cutoff_time * 20
533 | else:
534 | # try to impute a worst case perf
535 | perf = scenario.performance_data.max().max()
536 |
537 | if scenario.maximize[0]:
538 | perf *= -1
539 |
540 | return perf
541 |
542 | def run_cv(self, config: Configuration, scenario: ASlibScenario, folds:int=10):
543 | '''
544 | run a cross fold validation based on the given data from cv.arff
545 |
546 | Arguments
547 | ---------
548 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
549 | aslib scenario at hand
550 | config: Configuration
551 | parameter configuration to use for preprocessing
552 | folds: int
553 | number of cv-splits
554 | seed: int
555 | random seed (not used)
556 | '''
557 | #TODO: use seed and instance in an appropriate way
558 | try:
559 | if scenario.performance_type[0] == "runtime":
560 | cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time)
561 | else:
562 | cv_stat = Stats(runtime_cutoff=0)
563 | for i in range(1, folds + 1):
564 | self.logger.info("CV-Iteration: %d" % (i))
565 | stats = self.run_fold(config=config,
566 | scenario=scenario,
567 | fold=i)
568 | cv_stat.merge(stat=stats)
569 |
570 | self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
571 | self.logger.info("CV Stats")
572 | par10 = cv_stat.show()
573 | except ValueError:
574 | traceback.print_exc()
575 | par10 = scenario.algorithm_cutoff_time * 10
576 |
577 | if scenario.maximize[0]:
578 | par10 *= -1
579 |
580 | return par10
581 |
582 | def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int, test_scenario=None, return_fit:bool=False):
583 | '''
584 | run a given fold of cross validation
585 |
586 | Arguments
587 | ---------
588 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
589 | aslib scenario at hand
590 | config: Configuration
591 | parameter configuration to use for preprocessing
592 | fold: int
593 | fold id
594 | test_scenario:aslib_scenario.aslib_scenario.ASlibScenario
595 | aslib scenario with test data for validation
596 | generated from if None
597 |
598 | return_fit: bool
599 | optionally, the learned preprocessing options, presolver and
600 | selector can be returned
601 |
602 | Returns
603 | -------
604 | Stats()
605 |
606 | (pre_pipeline, pre_solver, selector):
607 | only present if return_fit is True
608 | the pipeline components fit with the configuration options
609 |
610 | schedule: dict of string -> list of (solver, cutoff) pairs
611 | only present if return_fit is True
612 | the solver choices for each instance
613 |
614 |
615 | '''
616 |
617 | if test_scenario is None:
618 | self.logger.info("CV-Iteration: %d" % (fold))
619 | test_scenario, training_scenario = scenario.get_split(indx=fold)
620 | else:
621 | self.logger.info("Validation on test data")
622 | training_scenario = scenario
623 |
624 | feature_pre_pipeline, pre_solver, selector = self.fit(
625 | scenario=training_scenario, config=config)
626 |
627 | schedules = self.predict(
628 | test_scenario, config, feature_pre_pipeline, pre_solver, selector)
629 |
630 | val = Validator()
631 | if scenario.performance_type[0] == "runtime":
632 | stats = val.validate_runtime(
633 | schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
634 | elif scenario.performance_type[0] == "solution_quality":
635 | stats = val.validate_quality(
636 | schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
637 | else:
638 | raise ValueError("Unknown: %s" %(scenario.performance_type[0]))
639 |
640 | if return_fit:
641 | return stats, (feature_pre_pipeline, pre_solver, selector), schedules
642 | else:
643 | return stats
644 |
645 | def fit(self, scenario: ASlibScenario, config: Configuration):
646 | '''
647 | fit AutoFolio on given ASlib Scenario
648 |
649 | Arguments
650 | ---------
651 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
652 | aslib scenario at hand
653 | config: Configuration
654 | parameter configuration to use for preprocessing
655 |
656 | Returns
657 | -------
658 | list of fitted feature preproccessing objects
659 | pre-solving object
660 | fitted selector
661 | '''
662 | self.logger.info("Given Configuration: %s" % (config))
663 |
664 | if self.overwrite_args:
665 | config = self._overwrite_configuration(
666 | config=config, overwrite_args=self.overwrite_args)
667 | self.logger.info("Overwritten Configuration: %s" % (config))
668 |
669 | scenario, feature_pre_pipeline = self.fit_transform_feature_preprocessing(
670 | scenario, config)
671 |
672 | pre_solver = self.fit_pre_solving(scenario, config)
673 |
674 | selector = self.fit_selector(scenario, config)
675 |
676 | return feature_pre_pipeline, pre_solver, selector
677 |
678 | def _overwrite_configuration(self, config: Configuration, overwrite_args: list):
679 | '''
680 | overwrites a given configuration with some new settings
681 |
682 | Arguments
683 | ---------
684 | config: Configuration
685 | initial configuration to be adapted
686 | overwrite_args: list
687 | new parameter settings as a list of strings
688 |
689 | Returns
690 | -------
691 | Configuration
692 | '''
693 |
694 | def pairwise(iterable):
695 | a, b = tee(iterable)
696 | next(b, None)
697 | return zip(a, b)
698 |
699 | dict_conf = config.get_dictionary()
700 | for param, value in pairwise(overwrite_args):
701 | try:
702 | ok = self.cs.get_hyperparameter(param)
703 | except KeyError:
704 | ok = None
705 | if ok is not None:
706 | if type(self.cs.get_hyperparameter(param)) is UniformIntegerHyperparameter:
707 | dict_conf[param] = int(value)
708 | elif type(self.cs.get_hyperparameter(param)) is UniformFloatHyperparameter:
709 | dict_conf[param] = float(value)
710 | elif value == "True":
711 | dict_conf[param] = True
712 | elif value == "False":
713 | dict_conf[param] = False
714 | else:
715 | dict_conf[param] = value
716 | else:
717 | self.logger.warn(
718 | "Unknown given parameter: %s %s" % (param, value))
719 | config = Configuration(self.cs, values=dict_conf, allow_inactive_with_values=True)
720 |
721 | return config
722 |
723 | def fit_transform_feature_preprocessing(self, scenario: ASlibScenario, config: Configuration):
724 | '''
725 | performs feature preprocessing on a given ASlib scenario wrt to a given configuration
726 |
727 | Arguments
728 | ---------
729 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
730 | aslib scenario at hand
731 | config: Configuration
732 | parameter configuration to use for preprocessing
733 |
734 | Returns
735 | -------
736 | list of fitted feature preproccessing objects
737 | '''
738 |
739 | pipeline = []
740 | fgf = FeatureGroupFiltering()
741 | scenario = fgf.fit_transform(scenario, config)
742 |
743 | imputer = ImputerWrapper()
744 | scenario = imputer.fit_transform(scenario, config)
745 |
746 | scaler = StandardScalerWrapper()
747 | scenario = scaler.fit_transform(scenario, config)
748 |
749 | pca = PCAWrapper()
750 | scenario = pca.fit_transform(scenario, config)
751 |
752 | return scenario, [fgf, imputer, scaler, pca]
753 |
754 | def fit_pre_solving(self, scenario: ASlibScenario, config: Configuration):
755 | '''
756 | fits an pre-solving schedule using Aspeed [Hoos et al, 2015 TPLP)
757 |
758 | Arguments
759 | ---------
760 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
761 | aslib scenario at hand
762 | config: Configuration
763 | parameter configuration to use for preprocessing
764 |
765 | Returns
766 | -------
767 | instance of Aspeed() with a fitted pre-solving schedule if performance_type of scenario is runtime; else None
768 | '''
769 | if scenario.performance_type[0] == "runtime":
770 | aspeed = Aspeed()
771 | aspeed.fit(scenario=scenario, config=config)
772 | return aspeed
773 | else:
774 | return None
775 |
776 | def fit_selector(self, scenario: ASlibScenario, config: Configuration):
777 | '''
778 | fits an algorithm selector for a given scenario wrt a given configuration
779 |
780 | Arguments
781 | ---------
782 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
783 | aslib scenario at hand
784 | config: Configuration
785 | parameter configuration
786 | '''
787 |
788 | if config.get("selector") == "PairwiseClassifier":
789 | clf_class = None
790 | if config.get("classifier") == "RandomForest":
791 | clf_class = RandomForest
792 | if config.get("classifier") == "XGBoost":
793 | clf_class = XGBoost
794 |
795 | selector = PairwiseClassifier(classifier_class=clf_class)
796 | selector.fit(scenario=scenario, config=config)
797 |
798 | if config.get("selector") == "MultiClassifier":
799 | clf_class = None
800 | if config.get("classifier") == "RandomForest":
801 | clf_class = RandomForest
802 | if config.get("classifier") == "XGBoost":
803 | clf_class = XGBoost
804 |
805 | selector = MultiClassifier(classifier_class=clf_class)
806 | selector.fit(scenario=scenario, config=config)
807 |
808 | if config.get("selector") == "IndRegressor":
809 | reg_class = None
810 | if config.get("regressor") == "RandomForestRegressor":
811 | reg_class = RandomForestRegressor
812 |
813 | selector = IndRegression(regressor_class=reg_class)
814 | selector.fit(scenario=scenario, config=config)
815 |
816 | if config.get("selector") == "JointRegressor":
817 | reg_class = None
818 | if config.get("regressor") == "RandomForestRegressor":
819 | reg_class = RandomForestRegressor
820 |
821 | selector = JointRegression(regressor_class=reg_class)
822 | selector.fit(scenario=scenario, config=config)
823 |
824 | if config.get("selector") == "PairwiseRegressor":
825 | reg_class = None
826 | if config.get("regressor") == "RandomForestRegressor":
827 | reg_class = RandomForestRegressor
828 |
829 | selector = PairwiseRegression(regressor_class=reg_class)
830 | selector.fit(scenario=scenario, config=config)
831 |
832 | return selector
833 |
834 | def predict(self, scenario: ASlibScenario, config: Configuration, feature_pre_pipeline: list, pre_solver: Aspeed, selector):
835 | '''
836 | predicts algorithm schedules wrt a given config
837 | and given pipelines
838 |
839 | Arguments
840 | ---------
841 | scenario: aslib_scenario.aslib_scenario.ASlibScenario
842 | aslib scenario at hand
843 | config: Configuration
844 | parameter configuration
845 | feature_pre_pipeline: list
846 | list of fitted feature preprocessors
847 | pre_solver: Aspeed
848 | pre solver object with a saved static schedule
849 | selector: autofolio.selector.*
850 | fitted selector object
851 | '''
852 |
853 | self.logger.info("Predict on Test")
854 | for f_pre in feature_pre_pipeline:
855 | scenario = f_pre.transform(scenario)
856 |
857 | if pre_solver:
858 | pre_solving_schedule = pre_solver.predict(scenario=scenario)
859 | else:
860 | pre_solving_schedule = {}
861 |
862 | pred_schedules = selector.predict(scenario=scenario)
863 |
864 | # combine schedules
865 | if pre_solving_schedule:
866 | return dict((inst, pre_solving_schedule.get(inst, []) + schedule) for inst, schedule in pred_schedules.items())
867 | else:
868 | return pred_schedules
869 |
870 |
871 | def main():
872 | af = AutoFolio()
873 | af.run_cli()
874 |
875 |
876 | if __name__ == "__main__":
877 | main()
878 |
--------------------------------------------------------------------------------