├── autofolio ├── facade │ ├── __init__.py │ └── af_csv_facade.py ├── io │ ├── __init__.py │ └── cmd.py ├── pre_solving │ ├── __init__.py │ └── aspeed_schedule.py ├── selector │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── random_forest.py │ │ └── xgboost.py │ ├── regressors │ │ ├── __init__.py │ │ └── random_forest.py │ ├── ind_regression.py │ ├── multi_classification.py │ ├── pairwise_regression.py │ ├── joint_regression.py │ └── pairwise_classification.py ├── validation │ ├── __init__.py │ └── validate.py ├── feature_preprocessing │ ├── __init__.py │ ├── standardscaler.py │ ├── missing_values.py │ ├── pca.py │ └── feature_group_filtering.py ├── __init__.py ├── __version__.py └── autofolio.py ├── aspeed ├── clingo ├── runsolver └── enc1.lp ├── examples ├── toy_example_csv │ ├── feats.csv │ ├── perf.csv │ └── example.py ├── asp-aslib │ ├── data │ │ ├── citation.bib │ │ ├── readme.txt │ │ └── description.txt │ └── README.md └── ttp-csv │ └── README.md ├── requirements.txt ├── doc ├── contact.rst ├── installation.rst ├── license.rst ├── index.rst ├── manual.rst ├── Makefile └── conf.py ├── CITATION.cff ├── scripts ├── autofolio └── plot_config.py ├── .gitignore ├── setup.py ├── LICENSE └── README.md /autofolio/facade/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/pre_solving/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/selector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/feature_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/selector/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/selector/regressors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autofolio/__init__.py: -------------------------------------------------------------------------------- 1 | __authors__ = 'Marius Lindauer' 2 | -------------------------------------------------------------------------------- /aspeed/clingo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/AutoFolio/HEAD/aspeed/clingo -------------------------------------------------------------------------------- /aspeed/runsolver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/AutoFolio/HEAD/aspeed/runsolver -------------------------------------------------------------------------------- /autofolio/__version__.py: -------------------------------------------------------------------------------- 1 | """Version information.""" 2 | 3 | # The following line *must* be the last in the module, exactly as formatted: 4 | __version__ = "2.1.2" 5 | -------------------------------------------------------------------------------- /examples/toy_example_csv/feats.csv: -------------------------------------------------------------------------------- 1 | ,Feature_1 2 | inst1,2 3 | inst2,1 4 | inst3,2 5 | inst4,0 6 | inst5,1 7 | inst6,2 8 | inst7,2 9 | inst8,1 10 | inst9,2 11 | inst10,0 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | numpy 3 | scipy 4 | scikit-learn>=0.20.0 5 | matplotlib 6 | pandas 7 | xgboost 8 | ConfigSpace 9 | pyrfr 10 | smac>=0.11.1 11 | git+https://github.com/mlindauer/ASlibScenario 12 | 13 | -------------------------------------------------------------------------------- /doc/contact.rst: -------------------------------------------------------------------------------- 1 | Contact 2 | ======= 3 | 4 | AutoFolio v2 is developed by the `ML4AAD Group of the University of Freiburg `_. 5 | 6 | If you found a bug, please report to https://github.com/mlindauer/AutoFolio 7 | 8 | -------------------------------------------------------------------------------- /examples/toy_example_csv/perf.csv: -------------------------------------------------------------------------------- 1 | ,Algorithm_1,Algorithm_2 2 | inst1,1.,2. 3 | inst2,10.,1. 4 | inst3,3.,10. 5 | inst4,4.,4. 6 | inst5,4.,2. 7 | inst6,2.,5. 8 | inst7,2.,10. 9 | inst8,10.,2. 10 | inst9,1.,7. 11 | inst10,2.,2. 12 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | @ARTICLE{lindauer-jair15a, 2 | author = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub}, 3 | title = {AutoFolio: An automatically configured Algorithm Selector}, 4 | volume = {53}, 5 | journal = {Journal of Artificial Intelligence Research}, 6 | year = {2015}, 7 | pages = {745-778} } 8 | -------------------------------------------------------------------------------- /examples/asp-aslib/data/citation.bib: -------------------------------------------------------------------------------- 1 | @STRING{tplp = "Theory and Practice of Logic Programming"} 2 | 3 | @Article{holisc14a, 4 | author = "H. Hoos and M. Lindauer and T. Schaub", 5 | title = "claspfolio 2: Advances in Algorithm Selection for Answer Set Programming", 6 | journal = tplp, 7 | year = "2014", 8 | OPTkey = "", 9 | OPTvolume = "", 10 | OPTnumber = "", 11 | OPTpages = "", 12 | OPTmonth = "", 13 | note = "To appear.", 14 | OPTannote = "" 15 | } -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | .. note:: 5 | AutoFolio was written in Python3.5. Probably, it will also run well with a Python 3 Version > 3.5. It is not compatible with Python 2.x 6 | 7 | .. _manual_installation: 8 | 9 | Manual Installation 10 | ------------------- 11 | | To install AutoFolio from command line, please type the following commands on the command line 12 | 13 | .. code-block:: bash 14 | 15 | cat requirements.txt | xargs -n 1 -L 1 pip install 16 | python setup.py install 17 | 18 | -------------------------------------------------------------------------------- /examples/asp-aslib/data/readme.txt: -------------------------------------------------------------------------------- 1 | Author: Marius Lindauer 2 | Date: 27.02.2014 3 | 4 | These data set was generated for a publication about claspfolio 2.0, 5 | i.e., an algorithm selector for ASP. 6 | The algorithm portfolio of clasp (2.1.4) configuration is generated by the hydra method (see http://www.cs.ubc.ca/labs/beta/Projects/Hydra/) 7 | in combination with SMAC. 8 | To generate the features, I used claspre, a light-weight version of the ASP solver clasp, 9 | with static and dynamic features (4 restarts each after 32 conflicts). 10 | 11 | -------------------------------------------------------------------------------- /scripts/autofolio: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging 4 | import sys 5 | import os 6 | import inspect 7 | cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0])) 8 | cmd_folder = os.path.realpath(os.path.join(cmd_folder, "..")) 9 | if cmd_folder not in sys.path: 10 | sys.path.insert(0,cmd_folder) 11 | 12 | from autofolio.autofolio import AutoFolio 13 | 14 | if __name__ == "__main__": 15 | 16 | #logging.basicConfig(level=logging.INFO) 17 | 18 | af = AutoFolio() 19 | af.run_cli() 20 | 21 | -------------------------------------------------------------------------------- /examples/asp-aslib/README.md: -------------------------------------------------------------------------------- 1 | # Traveling Thief Problem Example 2 | 3 | This is an example to use AutoFolio on the Answer Set Programming using the ASlib format. 4 | Here, we use the data of the original `ASP-POTASSCO` ASlib scenario. 5 | For all full description of the format, please see www.aslib.net. 6 | 7 | By calling: 8 | 9 | `python3 ../../scripts/autofolio --scenario data/` 10 | 11 | AutoFolio will perform a 10-fold cross validation on selecting the algorithm with the smallest runtime (see `data/description.txt`) for each given instance. We expect a performance of roughly 135. 12 | 13 | To get a better performance, please use the option `--tune`. 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/ttp-csv/README.md: -------------------------------------------------------------------------------- 1 | # Traveling Thief Problem Example 2 | 3 | This is an example to use AutoFolio on the Traveling Thief Problem using the csv format. 4 | In `data/features.csv`, you find the instance features for each problem instance; 5 | and in `data/perf.csv`, you find the performance of each algorithm on each problem instance. 6 | 7 | By calling: 8 | 9 | `python3 ../../scripts/autofolio --performance_csv data/perf.csv --feature_csv data/features.csv --maximize` 10 | 11 | AutoFolio will perform a 10-fold cross validation on selecting the algorithm with the largest performance value (`--maximize`) for each given instance. We expect a performance of roughly 0.99. 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /examples/toy_example_csv/example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from autofolio.facade.af_csv_facade import AFCsvFacade 4 | 5 | __author__ = "Marius Lindauer" 6 | __license__ = "BSD" 7 | __version__ = "2.1.0" 8 | 9 | perf_fn = "perf.csv" 10 | feat_fn = "feats.csv" 11 | 12 | # will be created (or overwritten) by AutoFolio 13 | model_fn = "af_model.pkl" 14 | 15 | af = AFCsvFacade(perf_fn=perf_fn, feat_fn=feat_fn) 16 | 17 | # fit AutoFolio; will use default hyperparameters of AutoFolio 18 | af.fit() 19 | 20 | # tune AutoFolio's hyperparameter configuration for 4 seconds 21 | config = af.tune(wallclock_limit=4) 22 | 23 | # evaluate configuration using a 10-fold cross validation 24 | score = af.cross_validation(config=config) 25 | 26 | # re-fit AutoFolio using the (hopefully) better configuration 27 | # and save model to disk 28 | af.fit(config=config, save_fn=model_fn) 29 | 30 | # load AutoFolio model and 31 | # get predictions for new meta-feature vector 32 | pred = AFCsvFacade.load_and_predict(vec=np.array([1.]), load_fn=model_fn) 33 | 34 | print(pred) 35 | 36 | 37 | -------------------------------------------------------------------------------- /aspeed/enc1.lp: -------------------------------------------------------------------------------- 1 | #script(python) 2 | 3 | #import gringo 4 | 5 | ts = {} 6 | def insert(i,s,t): 7 | key = str(s) 8 | if not ts.get(key): 9 | ts[key] = [] 10 | ts[key].append([i,t]) 11 | return 1 12 | 13 | def order(s): 14 | key = str(s) 15 | if not ts.get(key): 16 | ts[key] = [] 17 | ts[key].sort(key=lambda x: int(x[1])) 18 | p = None 19 | r = [] 20 | for i, v in ts[key]: 21 | if p: 22 | r.append((p,i)) 23 | p = i 24 | return r 25 | 26 | #end. 27 | 28 | #const cores=1. 29 | 30 | solver(S) :- time(_,S,_). 31 | time(S,T) :- time(_,S,T). 32 | unit(1..cores). 33 | 34 | insert(@insert(I,S,T)) :- time(I,S,T). 35 | order(I,K,S) :- insert(_), solver(S), (I,K) = @order(S). 36 | 37 | { slice(U,S,T) : time(S,T), T <= K, unit(U) } 1 :- 38 | solver(S), kappa(K). 39 | slice(S,T) :- slice(_,S,T). 40 | 41 | :- not #sum { T,S : slice(U,S,T) } K, kappa(K), unit(U). 42 | 43 | solved(I,S) :- slice(S,T), time(I,S,T). 44 | solved(I,S) :- solved(J,S), order(I,J,S). 45 | solved(I) :- solved(I,_). 46 | 47 | #maximize { 1@2,I: solved(I) }. 48 | #minimize { T*T@1,S : slice(S,T)}. 49 | 50 | #show slice/3. 51 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | 4 | console_scripts = [ 5 | 'autofolio=autofolio.autofolio:main' 6 | ] 7 | 8 | with open("autofolio/__version__.py") as fh: 9 | version = fh.readlines()[-1].split()[-1].strip("\"'") 10 | 11 | setuptools.setup( 12 | name="autofolio", 13 | version=version, 14 | author="Marius Lindauer", 15 | author_email="lindauer@cs.uni-freiburg.de", 16 | description=("AutoFolio 2, an automaticalliy configured algorithm selector."), 17 | license="2-clause BSD", 18 | keywords="algortithm selection", 19 | url="", 20 | packages=setuptools.find_packages(exclude=['test', 'source']), 21 | classifiers=[ 22 | "Development Status :: 3 - Alpha", 23 | "Topic :: Utilities", 24 | "Topic :: Scientific/Engineering", 25 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 26 | "License :: OSI Approved :: 2-clause BSD", 27 | ], 28 | platforms=['Linux'], 29 | tests_require=['mock', 30 | 'nose'], 31 | test_suite='nose.collector', 32 | entry_points = { 33 | 'console_scripts': console_scripts 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /doc/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the 2-clause BSD license (see below). 6 | 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | BSD 12 | --- 13 | Copyright (c) 2016, Ml4AAD Group (http://www.ml4aad.org/) 14 | All rights reserved. 15 | 16 | Redistribution and use in source and binary forms, with or without 17 | modification, are permitted provided that the following conditions are met: 18 | 19 | * Redistributions of source code must retain the above copyright notice, this 20 | list of conditions and the following disclaimer. 21 | 22 | * Redistributions in binary form must reproduce the above copyright notice, 23 | this list of conditions and the following disclaimer in the documentation 24 | and/or other materials provided with the distribution. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. AutoFolio documentation master file, created by 2 | sphinx-quickstart on Mon Sep 14 12:36:21 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | AutoFolio documentation! 7 | ================================= 8 | AutoFolio is an algorithm selection tool, 9 | i.e., selecting a well-performing algorithm for a given instance [Rice 1976]. 10 | In contrast to other algorithm selection tools, 11 | users of AutoFolio are bothered with the decision which algorithm selection approach to use 12 | and how to set its hyper-parameters. 13 | AutoFolio uses one of the state-of-the-art algorithm configuration tools, namely SMAC [Hutter et al LION'16] 14 | to automatically determine a well-performing algorithm selection approach 15 | and its hyper-parameters for a given algorithm selection data. 16 | Therefore, AutoFolio has a robust performance across different algorithm selection tasks. 17 | 18 | .. note:: 19 | 20 | For a detailed description of its main idea, 21 | we refer to 22 | 23 | `JAIR Journal Article `_ 24 | 25 | @ARTICLE{lindauer-jair15a, 26 | author = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub}, 27 | title = {AutoFolio: An automatically configured Algorithm Selector}, 28 | volume = {53}, 29 | journal = {Journal of Artificial Intelligence Research}, 30 | year = {2015}, 31 | pages = {745-778} 32 | } 33 | 34 | 35 | AutoFolio is mainly written in Python 3.5. 36 | 37 | .. note:: 38 | 39 | This version is a re-implementation of the original AutoFolio implementation 40 | and has not the same configuration space of the original implementation -- 41 | e.g., the clustering approach was not re-implementation because the performance had not met our expectations; 42 | e.g., as a new approach we implemented pair-wise performance difference prediction approach. 43 | 44 | 45 | Contents: 46 | --------- 47 | .. toctree:: 48 | :maxdepth: 2 49 | 50 | installation 51 | manual 52 | contact 53 | license 54 | 55 | 56 | 57 | Indices and tables 58 | ------------------ 59 | 60 | * :ref:`genindex` 61 | * :ref:`modindex` 62 | * :ref:`search` 63 | 64 | -------------------------------------------------------------------------------- /autofolio/feature_preprocessing/standardscaler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 9 | UniformFloatHyperparameter, UniformIntegerHyperparameter 10 | from ConfigSpace.conditions import EqualsCondition, InCondition 11 | from ConfigSpace.configuration_space import ConfigurationSpace 12 | from ConfigSpace import Configuration 13 | 14 | from aslib_scenario.aslib_scenario import ASlibScenario 15 | 16 | __author__ = "Marius Lindauer" 17 | __license__ = "BSD" 18 | 19 | 20 | class StandardScalerWrapper(object): 21 | 22 | @staticmethod 23 | def add_params(cs: ConfigurationSpace): 24 | ''' 25 | adds parameters to ConfigurationSpace 26 | ''' 27 | switch = CategoricalHyperparameter( 28 | "StandardScaler", choices=[True, False], default_value=True) 29 | cs.add_hyperparameter(switch) 30 | 31 | def __init__(self): 32 | ''' 33 | Constructor 34 | ''' 35 | self.scaler = None 36 | self.active = False 37 | self.logger = logging.getLogger("StandardScaler") 38 | 39 | def fit(self, scenario: ASlibScenario, config: Configuration): 40 | ''' 41 | fit StandardScaler object to ASlib scenario data 42 | 43 | Arguments 44 | --------- 45 | scenario: data.aslib_scenario.ASlibScenario 46 | ASlib Scenario with all data in pandas 47 | config: ConfigSpace.Configuration 48 | configuration 49 | ''' 50 | 51 | if config.get("StandardScaler"): 52 | self.active = True 53 | self.scaler = StandardScaler() 54 | self.scaler.fit(scenario.feature_data.values) 55 | 56 | def transform(self, scenario: ASlibScenario): 57 | ''' 58 | transform ASLib scenario data 59 | 60 | Arguments 61 | --------- 62 | scenario: data.aslib_scenario.ASlibScenario 63 | ASlib Scenario with all data in pandas 64 | 65 | Returns 66 | ------- 67 | data.aslib_scenario.ASlibScenario 68 | ''' 69 | if self.scaler: 70 | self.logger.debug("Applying StandardScaler") 71 | 72 | values = self.scaler.transform( 73 | np.array(scenario.feature_data.values)) 74 | 75 | scenario.feature_data = pd.DataFrame( 76 | data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns) 77 | 78 | return scenario 79 | 80 | def fit_transform(self, scenario: ASlibScenario, config: Configuration): 81 | ''' 82 | fit and transform 83 | 84 | Arguments 85 | --------- 86 | scenario: data.aslib_scenario.ASlibScenario 87 | ASlib Scenario with all data in pandas 88 | config: ConfigSpace.Configuration 89 | configuration 90 | 91 | Returns 92 | ------- 93 | data.aslib_scenario.ASlibScenario 94 | ''' 95 | self.fit(scenario, config) 96 | scenario = self.transform(scenario) 97 | return scenario 98 | -------------------------------------------------------------------------------- /autofolio/facade/af_csv_facade.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | 4 | from ConfigSpace.configuration_space import Configuration 5 | from aslib_scenario.aslib_scenario import ASlibScenario 6 | 7 | from autofolio.autofolio import AutoFolio 8 | 9 | __author__ = "Marius Lindauer" 10 | __license__ = "BSD" 11 | __version__ = "2.1.0" 12 | 13 | 14 | class AFCsvFacade(object): 15 | 16 | def __init__(self, 17 | perf_fn:str, 18 | feat_fn:str, 19 | objective:str = "solution_quality", 20 | runtime_cutoff:float = None, 21 | maximize:bool = True, 22 | cv_fn:str = None, 23 | seed: int = 12345 24 | ): 25 | """ Constructor """ 26 | self.scenario = ASlibScenario() 27 | self.scenario.read_from_csv(perf_fn=perf_fn, 28 | feat_fn=feat_fn, 29 | objective=objective, 30 | runtime_cutoff=runtime_cutoff, 31 | maximize=maximize, 32 | cv_fn=cv_fn) 33 | self.seed = seed 34 | 35 | self.af = AutoFolio(random_seed=seed) 36 | self.logger = logging.getLogger("AF Facade") 37 | 38 | def fit(self, 39 | config:Configuration=None, 40 | save_fn:str = None): 41 | """ Train AutoFolio on data from init""" 42 | self.logger.info("Fit") 43 | if config is None: 44 | cs = self.af.get_cs(self.scenario, {}) 45 | config = cs.get_default_configuration() 46 | feature_pre_pipeline, pre_solver, selector = self.af.fit(scenario=self.scenario, config=config) 47 | 48 | if save_fn: 49 | self.af._save_model(save_fn, self.scenario, feature_pre_pipeline, pre_solver, selector, config) 50 | self.logger.info("AutoFolio model saved to %s" %(save_fn)) 51 | 52 | def tune(self, 53 | wallclock_limit:int = 1200, 54 | runcount_limit:int = np.inf, 55 | ): 56 | 57 | config = self.af.get_tuned_config(self.scenario, 58 | wallclock_limit=wallclock_limit, 59 | runcount_limit=runcount_limit, 60 | autofolio_config={}, 61 | seed=self.seed) 62 | self.logger.info("Optimized Configuration: %s" %(config)) 63 | return config 64 | 65 | def cross_validation(self, config:Configuration): 66 | """ run a cross validation on given AutoFolio configuration""" 67 | score = -1 * self.af.run_cv(config=config, scenario=self.scenario, folds=int(self.scenario.cv_data.max().max())) 68 | self.logger.info("AF's final performance %f" %(score)) 69 | 70 | return score 71 | 72 | @staticmethod 73 | def load_and_predict(vec: np.ndarray, 74 | load_fn:str): 75 | """ get predicted algorithm for given meta-feature vector""" 76 | af = AutoFolio(random_seed=42) # random seed doesn't matter here 77 | pred = af.read_model_and_predict(model_fn=load_fn, feature_vec=vec) 78 | print("Selected Schedule [(algorithm, budget)]: %s" % (pred)) 79 | return pred[0][0] 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /autofolio/feature_preprocessing/missing_values.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from sklearn.impute import SimpleImputer 7 | 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 9 | UniformFloatHyperparameter, UniformIntegerHyperparameter 10 | from ConfigSpace import Configuration 11 | from ConfigSpace.configuration_space import ConfigurationSpace 12 | 13 | from aslib_scenario.aslib_scenario import ASlibScenario 14 | 15 | __author__ = "Marius Lindauer" 16 | __license__ = "BSD" 17 | 18 | 19 | class ImputerWrapper(object): 20 | 21 | @staticmethod 22 | def add_params(cs: ConfigurationSpace): 23 | ''' 24 | adds parameters to ConfigurationSpace 25 | ''' 26 | stratgey = CategoricalHyperparameter( 27 | "imputer_strategy", choices=["mean", "median", "most_frequent"], default_value="mean") 28 | cs.add_hyperparameter(stratgey) 29 | 30 | def __init__(self): 31 | ''' 32 | Constructor 33 | ''' 34 | self.imputer = None 35 | self.active = False 36 | 37 | self.logger = logging.getLogger("MissingValueImputation") 38 | 39 | def fit(self, scenario: ASlibScenario, config: Configuration): 40 | ''' 41 | fit pca object to ASlib scenario data 42 | 43 | Arguments 44 | --------- 45 | scenario: data.aslib_scenario.ASlibScenario 46 | ASlib Scenario with all data in pandas 47 | config: ConfigSpace.Configuration 48 | configuration 49 | ''' 50 | 51 | self.imputer = SimpleImputer(strategy=config.get("imputer_strategy")) 52 | self.imputer.fit(scenario.feature_data.values) 53 | self.active = True 54 | 55 | def transform(self, scenario: ASlibScenario): 56 | ''' 57 | transform ASLib scenario data 58 | 59 | Arguments 60 | --------- 61 | scenario: data.aslib_scenario.ASlibScenario 62 | ASlib Scenario with all data in pandas 63 | 64 | Returns 65 | ------- 66 | data.aslib_scenario.ASlibScenario 67 | ''' 68 | self.logger.debug("Impute Missing Feature Values") 69 | 70 | values = self.imputer.transform( 71 | np.array(scenario.feature_data.values)) 72 | scenario.feature_data = pd.DataFrame( 73 | data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns) 74 | 75 | return scenario 76 | 77 | def fit_transform(self, scenario: ASlibScenario, config: Configuration): 78 | ''' 79 | fit and transform 80 | 81 | Arguments 82 | --------- 83 | scenario: data.aslib_scenario.ASlibScenario 84 | ASlib Scenario with all data in pandas 85 | config: ConfigSpace.Configuration 86 | configuration 87 | 88 | Returns 89 | ------- 90 | data.aslib_scenario.ASlibScenario 91 | ''' 92 | self.fit(scenario, config) 93 | scenario = self.transform(scenario) 94 | return scenario 95 | 96 | def get_attributes(self): 97 | ''' 98 | returns a list of tuples of (attribute,value) 99 | for all learned attributes 100 | 101 | Arguments 102 | --------- 103 | config: ConfigSpace.Configuration 104 | configuration 105 | 106 | 107 | Returns 108 | ------- 109 | list of tuples of (attribute,value) 110 | ''' 111 | return ["Strategy=%s" %(self.imputer.strategy)] -------------------------------------------------------------------------------- /autofolio/feature_preprocessing/pca.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from sklearn.decomposition import PCA 7 | 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 9 | UniformFloatHyperparameter, UniformIntegerHyperparameter 10 | from ConfigSpace.conditions import EqualsCondition, InCondition 11 | from ConfigSpace.configuration_space import ConfigurationSpace 12 | from ConfigSpace import Configuration 13 | 14 | from aslib_scenario.aslib_scenario import ASlibScenario 15 | 16 | __author__ = "Marius Lindauer" 17 | __license__ = "BSD" 18 | 19 | 20 | class PCAWrapper(object): 21 | 22 | @staticmethod 23 | def add_params(cs: ConfigurationSpace): 24 | ''' 25 | adds parameters to ConfigurationSpace 26 | ''' 27 | pca_switch = CategoricalHyperparameter( 28 | "pca", choices=[True, False], default_value=False) 29 | n_components = UniformIntegerHyperparameter( 30 | "pca_n_components", lower=1, upper=20, default_value=7, log=True) 31 | cs.add_hyperparameter(pca_switch) 32 | cs.add_hyperparameter(n_components) 33 | cond = InCondition( 34 | child=n_components, parent=pca_switch, values=[True]) 35 | cs.add_condition(cond) 36 | 37 | def __init__(self): 38 | ''' 39 | Constructor 40 | ''' 41 | self.pca = None 42 | self.active = False 43 | 44 | self.logger = logging.getLogger("PCA") 45 | 46 | def fit(self, scenario: ASlibScenario, config: Configuration): 47 | ''' 48 | fit pca object to ASlib scenario data 49 | 50 | Arguments 51 | --------- 52 | scenario: data.aslib_scenario.ASlibScenario 53 | ASlib Scenario with all data in pandas 54 | config: ConfigSpace.Configuration 55 | configuration 56 | ''' 57 | 58 | if config.get("pca"): 59 | self.pca = PCA(n_components=config.get("pca_n_components")) 60 | self.pca.fit(scenario.feature_data.values) 61 | self.active = True 62 | 63 | def transform(self, scenario: ASlibScenario): 64 | ''' 65 | transform ASLib scenario data 66 | 67 | Arguments 68 | --------- 69 | scenario: data.aslib_scenario.ASlibScenario 70 | ASlib Scenario with all data in pandas 71 | 72 | Returns 73 | ------- 74 | data.aslib_scenario.ASlibScenario 75 | ''' 76 | if self.pca: 77 | self.logger.debug("Applying PCA") 78 | values = self.pca.transform( 79 | np.array(scenario.feature_data.values)) 80 | 81 | scenario.feature_data = pd.DataFrame( 82 | data=values, index=scenario.feature_data.index, columns=["f%d" % (i) for i in range(values.shape[1])]) 83 | 84 | return scenario 85 | 86 | def fit_transform(self, scenario: ASlibScenario, config: Configuration): 87 | ''' 88 | fit and transform 89 | 90 | Arguments 91 | --------- 92 | scenario: data.aslib_scenario.ASlibScenario 93 | ASlib Scenario with all data in pandas 94 | config: ConfigSpace.Configuration 95 | configuration 96 | 97 | Returns 98 | ------- 99 | data.aslib_scenario.ASlibScenario 100 | ''' 101 | self.fit(scenario, config) 102 | scenario = self.transform(scenario) 103 | return scenario 104 | 105 | def get_attributes(self): 106 | ''' 107 | returns a list of tuples of (attribute,value) 108 | for all learned attributes 109 | 110 | Returns 111 | ------- 112 | list of tuples of (attribute,value) 113 | ''' 114 | return ["Dimensions=%s" %(self.pca.n_components)] 115 | -------------------------------------------------------------------------------- /autofolio/selector/ind_regression.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 8 | UniformFloatHyperparameter, UniformIntegerHyperparameter 9 | from ConfigSpace.conditions import EqualsCondition, InCondition 10 | from ConfigSpace.configuration_space import ConfigurationSpace 11 | from ConfigSpace import Configuration 12 | 13 | from aslib_scenario.aslib_scenario import ASlibScenario 14 | 15 | __author__ = "Marius Lindauer" 16 | __license__ = "BSD" 17 | 18 | 19 | class IndRegression(object): 20 | 21 | @staticmethod 22 | def add_params(cs: ConfigurationSpace): 23 | ''' 24 | adds parameters to ConfigurationSpace 25 | ''' 26 | 27 | selector = cs.get_hyperparameter("selector") 28 | regressor = cs.get_hyperparameter("regressor") 29 | if "IndRegressor" in selector.choices: 30 | cond = InCondition(child=regressor, parent=selector, values=["IndRegressor"]) 31 | cs.add_condition(cond) 32 | 33 | def __init__(self, regressor_class): 34 | ''' 35 | Constructor 36 | ''' 37 | self.regressors = [] 38 | self.logger = logging.getLogger("IndRegressor") 39 | self.regressor_class = regressor_class 40 | 41 | def fit(self, scenario: ASlibScenario, config: Configuration): 42 | ''' 43 | fit pca object to ASlib scenario data 44 | 45 | Arguments 46 | --------- 47 | scenario: data.aslib_scenario.ASlibScenario 48 | ASlib Scenario with all data in pandas 49 | config: ConfigSpace.Configuration 50 | configuration 51 | ''' 52 | self.logger.info("Fit PairwiseRegressor with %s" % 53 | (self.regressor_class)) 54 | 55 | self.algorithms = scenario.algorithms 56 | 57 | n_algos = len(scenario.algorithms) 58 | X = scenario.feature_data.values 59 | 60 | for i in range(n_algos): 61 | y = scenario.performance_data[scenario.algorithms[i]].values 62 | reg = self.regressor_class() 63 | reg.fit(X, y, config) 64 | self.regressors.append(reg) 65 | 66 | def predict(self, scenario: ASlibScenario): 67 | ''' 68 | predict schedules for all instances in ASLib scenario data 69 | 70 | Arguments 71 | --------- 72 | scenario: data.aslib_scenario.ASlibScenario 73 | ASlib Scenario with all data in pandas 74 | 75 | Returns 76 | ------- 77 | schedule: {inst -> (solver, time)} 78 | schedule of solvers with a running time budget 79 | ''' 80 | 81 | if scenario.algorithm_cutoff_time: 82 | cutoff = scenario.algorithm_cutoff_time 83 | else: 84 | cutoff = 2**31 85 | 86 | n_algos = len(scenario.algorithms) 87 | X = scenario.feature_data.values 88 | scores = np.zeros((X.shape[0], n_algos)) 89 | for i in range(n_algos): 90 | reg = self.regressors[i] 91 | Y = reg.predict(X) 92 | scores[:, i] += Y 93 | 94 | #self.logger.debug( 95 | # sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True)) 96 | algo_indx = np.argmin(scores, axis=1) 97 | 98 | schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index)) 99 | #self.logger.debug(schedules) 100 | return schedules 101 | 102 | def get_attributes(self): 103 | ''' 104 | returns a list of tuples of (attribute,value) 105 | for all learned attributes 106 | 107 | Returns 108 | ------- 109 | list of tuples of (attribute,value) 110 | ''' 111 | reg_attr = self.regressors[0].get_attributes() 112 | attr = [{self.regressor_class.__name__:reg_attr}] 113 | 114 | return attr -------------------------------------------------------------------------------- /autofolio/selector/multi_classification.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 9 | UniformFloatHyperparameter, UniformIntegerHyperparameter 10 | from ConfigSpace.conditions import EqualsCondition, InCondition 11 | from ConfigSpace.configuration_space import ConfigurationSpace 12 | from ConfigSpace import Configuration 13 | 14 | from aslib_scenario.aslib_scenario import ASlibScenario 15 | 16 | __author__ = "Marius Lindauer" 17 | __license__ = "BSD" 18 | 19 | 20 | class MultiClassifier(object): 21 | 22 | @staticmethod 23 | def add_params(cs: ConfigurationSpace): 24 | ''' 25 | adds parameters to ConfigurationSpace 26 | ''' 27 | 28 | selector = cs.get_hyperparameter("selector") 29 | classifier = cs.get_hyperparameter("classifier") 30 | if "MultiClassifier" in selector.choices: 31 | cond = InCondition(child=classifier, parent=selector, values=["MultiClassifier"]) 32 | cs.add_condition(cond) 33 | 34 | def __init__(self, classifier_class): 35 | ''' 36 | Constructor 37 | ''' 38 | self.classifiers = [] 39 | self.logger = logging.getLogger("MultiClassifier") 40 | self.classifier_class = classifier_class 41 | self.normalizer = MinMaxScaler() 42 | 43 | def fit(self, scenario: ASlibScenario, config: Configuration): 44 | ''' 45 | fit pca object to ASlib scenario data 46 | 47 | Arguments 48 | --------- 49 | scenario: data.aslib_scenario.ASlibScenario 50 | ASlib Scenario with all data in pandas 51 | config: ConfigSpace.Configuration 52 | configuration 53 | ''' 54 | self.logger.info("Fit PairwiseClassifier with %s" % 55 | (self.classifier_class)) 56 | 57 | self.algorithms = scenario.algorithms 58 | 59 | from sklearn.utils import check_array 60 | from sklearn.tree._tree import DTYPE 61 | 62 | n_algos = len(scenario.algorithms) 63 | X = scenario.feature_data.values 64 | # since sklearn (at least the RFs) 65 | # uses float32 and we pass float64, 66 | # the normalization ensures that floats 67 | # are not converted to inf or -inf 68 | #X = (X - np.min(X)) / (np.max(X) - np.min(X)) 69 | X = self.normalizer.fit_transform(X) 70 | y = np.argmin(scenario.performance_data.values,axis=1) 71 | weights = scenario.performance_data.std(axis=1) 72 | clf = self.classifier_class() 73 | clf.fit(X, y, config, weights) 74 | self.classifier = clf 75 | 76 | def predict(self, scenario: ASlibScenario): 77 | ''' 78 | predict schedules for all instances in ASLib scenario data 79 | 80 | Arguments 81 | --------- 82 | scenario: data.aslib_scenario.ASlibScenario 83 | ASlib Scenario with all data in pandas 84 | 85 | Returns 86 | ------- 87 | schedule: {inst -> (solver, time)} 88 | schedule of solvers with a running time budget 89 | ''' 90 | 91 | if scenario.algorithm_cutoff_time: 92 | cutoff = scenario.algorithm_cutoff_time 93 | else: 94 | cutoff = 2**31 95 | 96 | n_algos = len(scenario.algorithms) 97 | X = scenario.feature_data.values 98 | X = self.normalizer.transform(X) 99 | algo_indx = self.classifier.predict(X) 100 | 101 | schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index)) 102 | #self.logger.debug(schedules) 103 | return schedules 104 | 105 | def get_attributes(self): 106 | ''' 107 | returns a list of tuples of (attribute,value) 108 | for all learned attributes 109 | 110 | Returns 111 | ------- 112 | list of tuples of (attribute,value) 113 | ''' 114 | class_attr = self.classifiers[0].get_attributes() 115 | attr = [{self.classifier_class.__name__:class_attr}] 116 | 117 | return attr -------------------------------------------------------------------------------- /autofolio/selector/pairwise_regression.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 8 | UniformFloatHyperparameter, UniformIntegerHyperparameter 9 | from ConfigSpace.conditions import EqualsCondition, InCondition 10 | from ConfigSpace.configuration_space import ConfigurationSpace 11 | from ConfigSpace import Configuration 12 | 13 | from aslib_scenario.aslib_scenario import ASlibScenario 14 | 15 | __author__ = "Marius Lindauer" 16 | __license__ = "BSD" 17 | 18 | 19 | class PairwiseRegression(object): 20 | 21 | @staticmethod 22 | def add_params(cs: ConfigurationSpace): 23 | ''' 24 | adds parameters to ConfigurationSpace 25 | ''' 26 | 27 | selector = cs.get_hyperparameter("selector") 28 | regressor = cs.get_hyperparameter("regressor") 29 | if "PairwiseRegressor" in selector.choices: 30 | cond = InCondition(child=regressor, parent=selector, values=["PairwiseRegressor"]) 31 | cs.add_condition(cond) 32 | 33 | def __init__(self, regressor_class): 34 | ''' 35 | Constructor 36 | ''' 37 | self.regressors = [] 38 | self.logger = logging.getLogger("PairwiseRegressor") 39 | self.regressor_class = regressor_class 40 | 41 | def fit(self, scenario: ASlibScenario, config: Configuration): 42 | ''' 43 | fit pca object to ASlib scenario data 44 | 45 | Arguments 46 | --------- 47 | scenario: data.aslib_scenario.ASlibScenario 48 | ASlib Scenario with all data in pandas 49 | config: ConfigSpace.Configuration 50 | configuration 51 | ''' 52 | self.logger.info("Fit PairwiseRegressor with %s" % 53 | (self.regressor_class)) 54 | 55 | self.algorithms = scenario.algorithms 56 | 57 | n_algos = len(scenario.algorithms) 58 | X = scenario.feature_data.values 59 | for i in range(n_algos): 60 | for j in range(i + 1, n_algos): 61 | y_i = scenario.performance_data[scenario.algorithms[i]].values 62 | y_j = scenario.performance_data[scenario.algorithms[j]].values 63 | y = y_i - y_j 64 | reg = self.regressor_class() 65 | reg.fit(X, y, config) 66 | self.regressors.append(reg) 67 | 68 | def predict(self, scenario: ASlibScenario): 69 | ''' 70 | predict schedules for all instances in ASLib scenario data 71 | 72 | Arguments 73 | --------- 74 | scenario: data.aslib_scenario.ASlibScenario 75 | ASlib Scenario with all data in pandas 76 | 77 | Returns 78 | ------- 79 | schedule: {inst -> (solver, time)} 80 | schedule of solvers with a running time budget 81 | ''' 82 | 83 | if scenario.algorithm_cutoff_time: 84 | cutoff = scenario.algorithm_cutoff_time 85 | else: 86 | cutoff = 2**31 87 | 88 | n_algos = len(scenario.algorithms) 89 | X = scenario.feature_data.values 90 | scores = np.zeros((X.shape[0], n_algos)) 91 | reg_indx = 0 92 | for i in range(n_algos): 93 | for j in range(i + 1, n_algos): 94 | reg = self.regressors[reg_indx] 95 | Y = reg.predict(X) 96 | scores[:, i] += Y 97 | scores[:, j] += -1 * Y 98 | reg_indx += 1 99 | 100 | #self.logger.debug( 101 | # sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True)) 102 | algo_indx = np.argmin(scores, axis=1) 103 | 104 | schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index)) 105 | #self.logger.debug(schedules) 106 | return schedules 107 | 108 | def get_attributes(self): 109 | ''' 110 | returns a list of tuples of (attribute,value) 111 | for all learned attributes 112 | 113 | Returns 114 | ------- 115 | list of tuples of (attribute,value) 116 | ''' 117 | reg_attr = self.regressors[0].get_attributes() 118 | attr = [{self.regressor_class.__name__:reg_attr}] 119 | 120 | return attr -------------------------------------------------------------------------------- /autofolio/selector/joint_regression.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 8 | UniformFloatHyperparameter, UniformIntegerHyperparameter 9 | from ConfigSpace.conditions import EqualsCondition, InCondition 10 | from ConfigSpace.configuration_space import ConfigurationSpace 11 | from ConfigSpace import Configuration 12 | 13 | from aslib_scenario.aslib_scenario import ASlibScenario 14 | 15 | __author__ = "Marius Lindauer" 16 | __license__ = "BSD" 17 | 18 | 19 | class JointRegression(object): 20 | 21 | @staticmethod 22 | def add_params(cs: ConfigurationSpace): 23 | ''' 24 | adds parameters to ConfigurationSpace 25 | ''' 26 | 27 | selector = cs.get_hyperparameter("selector") 28 | regressor = cs.get_hyperparameter("regressor") 29 | if "JointRegressor" in selector.choices: 30 | cond = InCondition(child=regressor, parent=selector, values=["JointRegressor"]) 31 | cs.add_condition(cond) 32 | 33 | def __init__(self, regressor_class): 34 | ''' 35 | Constructor 36 | ''' 37 | self.regressors = [] 38 | self.logger = logging.getLogger("JointRegressor") 39 | self.regressor_class = regressor_class 40 | 41 | def fit(self, scenario: ASlibScenario, config: Configuration): 42 | ''' 43 | fit pca object to ASlib scenario data 44 | 45 | Arguments 46 | --------- 47 | scenario: data.aslib_scenario.ASlibScenario 48 | ASlib Scenario with all data in pandas 49 | config: ConfigSpace.Configuration 50 | configuration 51 | ''' 52 | self.logger.info("Fit PairwiseRegressor with %s" % 53 | (self.regressor_class)) 54 | 55 | self.algorithms = scenario.algorithms 56 | 57 | n_algos = len(scenario.algorithms) 58 | X = scenario.feature_data.values 59 | Xs = None 60 | Ys = None 61 | 62 | for i in range(n_algos): 63 | y = scenario.performance_data[scenario.algorithms[i]].values 64 | x_i = np.zeros((X.shape[0], n_algos)) 65 | x_i[:,i] = 1 66 | X_i = np.hstack([X,x_i]) 67 | if i == 0: 68 | Xs = X_i 69 | Ys = y 70 | else: 71 | Xs = np.vstack([Xs,X_i]) 72 | Ys = np.hstack([Ys,y]) 73 | 74 | reg = self.regressor_class() 75 | reg.fit(Xs, Ys, config) 76 | self.regressors = reg 77 | 78 | def predict(self, scenario: ASlibScenario): 79 | ''' 80 | predict schedules for all instances in ASLib scenario data 81 | 82 | Arguments 83 | --------- 84 | scenario: data.aslib_scenario.ASlibScenario 85 | ASlib Scenario with all data in pandas 86 | 87 | Returns 88 | ------- 89 | schedule: {inst -> (solver, time)} 90 | schedule of solvers with a running time budget 91 | ''' 92 | 93 | if scenario.algorithm_cutoff_time: 94 | cutoff = scenario.algorithm_cutoff_time 95 | else: 96 | cutoff = 2**31 97 | 98 | n_algos = len(scenario.algorithms) 99 | X = scenario.feature_data.values 100 | scores = np.zeros((X.shape[0], n_algos)) 101 | for i in range(n_algos): 102 | x_i = np.zeros((X.shape[0], n_algos)) 103 | x_i[:,i] = 1 104 | X_i = np.hstack([X,x_i]) 105 | Y = self.regressors.predict(X_i) 106 | scores[:, i] += Y 107 | 108 | #self.logger.debug( 109 | # sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True)) 110 | algo_indx = np.argmin(scores, axis=1) 111 | 112 | schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index)) 113 | #self.logger.debug(schedules) 114 | return schedules 115 | 116 | def get_attributes(self): 117 | ''' 118 | returns a list of tuples of (attribute,value) 119 | for all learned attributes 120 | 121 | Returns 122 | ------- 123 | list of tuples of (attribute,value) 124 | ''' 125 | reg_attr = self.regressors[0].get_attributes() 126 | attr = [{self.regressor_class.__name__:reg_attr}] 127 | 128 | return attr -------------------------------------------------------------------------------- /autofolio/feature_preprocessing/feature_group_filtering.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 7 | UniformFloatHyperparameter, UniformIntegerHyperparameter 8 | 9 | __author__ = "Marius Lindauer" 10 | __license__ = "BSD" 11 | 12 | class FeatureGroupFiltering(object): 13 | ''' 14 | based on the selected feature group, we remove all features that are not available; 15 | we also add the feature costs for each individual instance 16 | ''' 17 | 18 | @staticmethod 19 | def add_params(cs): 20 | ''' 21 | adds parameters to ConfigurationSpace 22 | ''' 23 | 24 | def __init__(self): 25 | ''' 26 | Constructor 27 | ''' 28 | self.logger = logging.getLogger("FeatureGroupFiltering") 29 | self.active_features = [] 30 | self.active_groups = [] 31 | self.active = False 32 | 33 | def fit(self, scenario, config): 34 | ''' 35 | fit pca object to ASlib scenario data 36 | 37 | Arguments 38 | --------- 39 | scenario: data.aslib_scenario.ASlibScenario 40 | ASlib Scenario with all data in pandas 41 | config: ConfigSpace.Configuration 42 | configuration 43 | ''' 44 | self.active = True 45 | active_groups = [] 46 | for param in config: 47 | if param.startswith("fgroup_") and config[param]: 48 | active_groups.append(param.replace("fgroup_", "")) 49 | 50 | active_groups.sort() # to ensure same order of features always 51 | 52 | # check requirements for each step 53 | change = True 54 | while change: 55 | change = False 56 | for group in active_groups: 57 | if scenario.feature_group_dict[group].get("requires"): 58 | valid = True 59 | for req_group in scenario.feature_group_dict[group].get("requires"): 60 | if req_group not in active_groups: 61 | valid = False 62 | break 63 | if not valid: 64 | active_groups.remove(group) 65 | change = True 66 | 67 | self.logger.debug("Active feature groups: %s" %(active_groups)) 68 | self.active_groups = active_groups 69 | 70 | # get active features 71 | for group in active_groups: 72 | if scenario.feature_group_dict[group].get("provides"): 73 | self.active_features.extend(scenario.feature_group_dict[group].get("provides")) 74 | 75 | self.logger.debug("Active features (%d): %s" %(len(self.active_features), self.active_features)) 76 | 77 | if not self.active_features: 78 | self.logger.warn("No active features left after filtering according to selected feature steps") 79 | 80 | 81 | def transform(self, scenario): 82 | ''' 83 | transform ASLib scenario data 84 | 85 | Arguments 86 | --------- 87 | scenario: data.aslib_scenario.ASlibScenario 88 | ASlib Scenario with all data in pandas 89 | 90 | Returns 91 | ------- 92 | data.aslib_scenario.ASlibScenario 93 | ''' 94 | 95 | 96 | scenario.feature_data = scenario.feature_data[self.active_features] 97 | scenario.used_feature_groups = self.active_groups 98 | 99 | return scenario 100 | 101 | def fit_transform(self, scenario, config): 102 | ''' 103 | fit and transform 104 | 105 | Arguments 106 | --------- 107 | scenario: data.aslib_scenario.ASlibScenario 108 | ASlib Scenario with all data in pandas 109 | config: ConfigSpace.Configuration 110 | configuration 111 | 112 | Returns 113 | ------- 114 | data.aslib_scenario.ASlibScenario 115 | ''' 116 | self.fit(scenario, config) 117 | scenario = self.transform(scenario) 118 | return scenario 119 | 120 | def get_attributes(self): 121 | ''' 122 | returns a list of tuples of (attribute,value) 123 | for all learned attributes 124 | 125 | Returns 126 | ------- 127 | list of tuples of (attribute,value) 128 | ''' 129 | return [{"Feature Groups":self.active_groups}] -------------------------------------------------------------------------------- /autofolio/selector/pairwise_classification.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 9 | UniformFloatHyperparameter, UniformIntegerHyperparameter 10 | from ConfigSpace.conditions import EqualsCondition, InCondition 11 | from ConfigSpace.configuration_space import ConfigurationSpace 12 | from ConfigSpace import Configuration 13 | 14 | from aslib_scenario.aslib_scenario import ASlibScenario 15 | 16 | __author__ = "Marius Lindauer" 17 | __license__ = "BSD" 18 | 19 | 20 | class PairwiseClassifier(object): 21 | 22 | @staticmethod 23 | def add_params(cs: ConfigurationSpace): 24 | ''' 25 | adds parameters to ConfigurationSpace 26 | ''' 27 | 28 | selector = cs.get_hyperparameter("selector") 29 | classifier = cs.get_hyperparameter("classifier") 30 | if "PairwiseClassifier" in selector.choices: 31 | cond = InCondition(child=classifier, parent=selector, values=["PairwiseClassifier"]) 32 | cs.add_condition(cond) 33 | 34 | def __init__(self, classifier_class): 35 | ''' 36 | Constructor 37 | ''' 38 | self.classifiers = [] 39 | self.logger = logging.getLogger("PairwiseClassifier") 40 | self.classifier_class = classifier_class 41 | self.normalizer = MinMaxScaler() 42 | 43 | def fit(self, scenario: ASlibScenario, config: Configuration): 44 | ''' 45 | fit pca object to ASlib scenario data 46 | 47 | Arguments 48 | --------- 49 | scenario: data.aslib_scenario.ASlibScenario 50 | ASlib Scenario with all data in pandas 51 | config: ConfigSpace.Configuration 52 | configuration 53 | ''' 54 | self.logger.info("Fit PairwiseClassifier with %s" % 55 | (self.classifier_class)) 56 | 57 | self.algorithms = scenario.algorithms 58 | 59 | from sklearn.utils import check_array 60 | from sklearn.tree._tree import DTYPE 61 | 62 | n_algos = len(scenario.algorithms) 63 | X = scenario.feature_data.values 64 | # since sklearn (at least the RFs) 65 | # uses float32 and we pass float64, 66 | # the normalization ensures that floats 67 | # are not converted to inf or -inf 68 | #X = (X - np.min(X)) / (np.max(X) - np.min(X)) 69 | X = self.normalizer.fit_transform(X) 70 | for i in range(n_algos): 71 | for j in range(i + 1, n_algos): 72 | y_i = scenario.performance_data[scenario.algorithms[i]].values 73 | y_j = scenario.performance_data[scenario.algorithms[j]].values 74 | y = y_i < y_j 75 | weights = np.abs(y_i - y_j) 76 | clf = self.classifier_class() 77 | clf.fit(X, y, config, weights) 78 | self.classifiers.append(clf) 79 | 80 | def predict(self, scenario: ASlibScenario): 81 | ''' 82 | predict schedules for all instances in ASLib scenario data 83 | 84 | Arguments 85 | --------- 86 | scenario: data.aslib_scenario.ASlibScenario 87 | ASlib Scenario with all data in pandas 88 | 89 | Returns 90 | ------- 91 | schedule: {inst -> (solver, time)} 92 | schedule of solvers with a running time budget 93 | ''' 94 | 95 | if scenario.algorithm_cutoff_time: 96 | cutoff = scenario.algorithm_cutoff_time 97 | else: 98 | cutoff = 2**31 99 | 100 | n_algos = len(scenario.algorithms) 101 | X = scenario.feature_data.values 102 | X = self.normalizer.transform(X) 103 | scores = np.zeros((X.shape[0], n_algos)) 104 | clf_indx = 0 105 | for i in range(n_algos): 106 | for j in range(i + 1, n_algos): 107 | clf = self.classifiers[clf_indx] 108 | Y = clf.predict(X) 109 | scores[Y == 1, i] += 1 110 | scores[Y == 0, j] += 1 111 | clf_indx += 1 112 | 113 | #self.logger.debug( 114 | # sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True)) 115 | algo_indx = np.argmax(scores, axis=1) 116 | 117 | schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index)) 118 | #self.logger.debug(schedules) 119 | return schedules 120 | 121 | def get_attributes(self): 122 | ''' 123 | returns a list of tuples of (attribute,value) 124 | for all learned attributes 125 | 126 | Returns 127 | ------- 128 | list of tuples of (attribute,value) 129 | ''' 130 | class_attr = self.classifiers[0].get_attributes() 131 | attr = [{self.classifier_class.__name__:class_attr}] 132 | 133 | return attr -------------------------------------------------------------------------------- /scripts/plot_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging 4 | import sys 5 | import os 6 | import inspect 7 | cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0])) 8 | cmd_folder = os.path.realpath(os.path.join(cmd_folder, "..")) 9 | if cmd_folder not in sys.path: 10 | sys.path.insert(0,cmd_folder) 11 | 12 | import pickle 13 | import argparse 14 | from graphviz import Digraph 15 | import traceback 16 | 17 | def load_model(model_fn: str): 18 | ''' 19 | loads saved model 20 | 21 | Arguments 22 | -------- 23 | model_fn: str 24 | file name of saved model 25 | 26 | Returns 27 | ------- 28 | scenario, feature_pre_pipeline, pre_solver, selector, config 29 | ''' 30 | with open(model_fn, "br") as fp: 31 | scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load( 32 | fp) 33 | 34 | for fpp in feature_pre_pipeline: 35 | fpp.logger = logging.getLogger("Feature Preprocessing") 36 | if pre_solver: 37 | pre_solver.logger = logging.getLogger("Aspeed PreSolving") 38 | selector.logger = logging.getLogger("Selector") 39 | 40 | return scenario, feature_pre_pipeline, pre_solver, selector, config 41 | 42 | def visualize(feature_pre_pipeline, pre_solver, selector): 43 | ''' 44 | visualize all loaded components 45 | 46 | Arguments 47 | --------- 48 | feature_pre_pipeline: list 49 | list of fitted feature preprocessors 50 | pre_solver: Aspeed 51 | pre solver object with a saved static schedule 52 | selector: autofolio.selector.* 53 | fitted selector object 54 | ''' 55 | 56 | dot = Digraph(comment='AutoFolio') 57 | fpp_idx = 0 58 | for fpp in feature_pre_pipeline: 59 | if not fpp.active: 60 | continue 61 | fpp_idx += 1 62 | dot.node('fpp_%d' %(fpp_idx), fpp.__class__.__name__) 63 | if fpp_idx > 0: 64 | dot.edge('fpp_%d' %(fpp_idx-1),'fpp_%d' %(fpp_idx)) 65 | try: 66 | attributes = fpp.get_attributes() 67 | add_attributes(attributes=attributes, node_name='fpp_%d' %(fpp_idx), dot=dot) 68 | except AttributeError: 69 | #traceback.print_exc() 70 | pass 71 | 72 | for idx,presolver in enumerate(pre_solver.schedule): 73 | dot.node('pre_%d' %(idx), "%s for %d sec" %(presolver[0], presolver[1])) 74 | if idx > 0: 75 | dot.edge('pre_%d' %(idx-1),'pre_%d' %(idx)) 76 | elif feature_pre_pipeline: 77 | dot.edge('fpp_%d' %(fpp_idx),'pre_%d' %(idx)) 78 | 79 | dot.node("selector", selector.__class__.__name__) 80 | if pre_solver.schedule: 81 | dot.edge('pre_%d' %(len(pre_solver.schedule)-1), "selector") 82 | elif feature_pre_pipeline: 83 | dot.edge('fpp_%d' %(fpp_idx),'selector') 84 | try: 85 | attributes = selector.get_attributes() 86 | add_attributes(attributes=attributes, node_name='selector', dot=dot) 87 | except AttributeError: 88 | traceback.print_exc() 89 | pass 90 | 91 | 92 | dot.render('test-output/autofolio', view=True) 93 | 94 | def add_attributes(attributes, node_name:str, dot:Digraph): 95 | ''' 96 | add attributes of with to 97 | 98 | Arguments 99 | --------- 100 | attributes: str|list|dict 101 | attributes 102 | node_name: str 103 | node name of obj in dot 104 | dot: Digraph 105 | digraph of graphviz 106 | ''' 107 | 108 | 109 | if isinstance(attributes,str): 110 | dot.node(attributes,attributes, shape='box', style='filled', color='lightgrey') 111 | dot.edge(node_name,attributes) 112 | return 113 | elif isinstance(attributes,list): 114 | for attr in attributes: 115 | add_attributes(attributes=attr, node_name=node_name, dot=dot) 116 | elif isinstance(attributes,dict): 117 | for k, v in attributes.items(): 118 | dot.node(k,k, shape='box', style='filled', color='lightgrey') 119 | dot.edge(node_name,k) 120 | add_attributes(attributes=v, node_name=k, dot=dot) 121 | else: 122 | print("UNKNOWN TYPE: %s" %(attributes)) 123 | 124 | 125 | #=========================================================================== 126 | # for name, attr in attributes: 127 | # if isinstance(attr, str): 128 | # dot.node(name, "%s:%s"%(name, attr)) 129 | # elif isinstance(attr, list): 130 | # dot.node(name, "%s"%(name)) 131 | # for s in attr: 132 | # dot.node(str(s), str(s)) 133 | # dot.edge("%s"%(name), str(s)) 134 | # else: 135 | # print("UNKNOWN TYPE: %s" %(attr)) 136 | # 137 | # dot.edge(node_name,name) 138 | #=========================================================================== 139 | 140 | 141 | 142 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 143 | parser.add_argument("--load", type=str, default=None, 144 | help="loads model (from --save); other modes are disabled with this options") 145 | args = parser.parse_args() 146 | 147 | scenario, feature_pre_pipeline, pre_solver, selector, config = load_model(args.load) 148 | 149 | print(config) 150 | 151 | visualize(feature_pre_pipeline, pre_solver, selector) 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /doc/manual.rst: -------------------------------------------------------------------------------- 1 | Manual 2 | ====== 3 | .. role:: bash(code) 4 | :language: bash 5 | 6 | 7 | In the following we will show how to use **AutFolio**. 8 | 9 | .. _quick: 10 | 11 | Quick Start 12 | ----------- 13 | | If you have not installed *AutFolio* yet take a look at the `installation instructions `_ and make sure that all the requirements are fulfilled. 14 | | In the examples folder, you can find examples that illustrate how to reads scenario files that allow you to automatically configure an algorithm, as well as examples that show how to directly use *AutFolio* in Python. 15 | 16 | We will demonstrate the usage of *AutFolio* on a simple toy example, see `examples/toy_example_csv`. 17 | 18 | To run the example, change into the root-directory of *AutoFolio* and type the following commands: 19 | 20 | .. code-block:: bash 21 | 22 | cd examples/toy_example_csv/ 23 | python ../../scripts/autofolio --perf perf.csv --feature_csv feats.csv 24 | 25 | 26 | AutoFolio will run a 10-fold cross validation on the given data. 27 | The `perf.csv` file is a csv file where each column corresponds to an algorithm 28 | and each row to an instance. Each entry is the performance of an algorithm on a given instance. 29 | The `feats.csv` is a csv file where each column corresponds an instance feature 30 | and each row to an instance. 31 | Per default, AutoFolio assumes that we want to minimize the performance as solution cost metric (in contrast to a runtime metric). 32 | 33 | In the end, AutoFolio prints the aggregated performance across the 10-folds. 34 | 35 | .. code-block:: bash 36 | 37 | INFO:AutoFolio:>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 38 | INFO:AutoFolio:CV Stats 39 | INFO:Stats:Number of instances: 10 40 | INFO:Stats:Average Solution Quality: 6.0000 41 | 42 | Looking into `perf.csv`, we can see that AutoFolio performs quite poorly on this example. 43 | The better of the two algorithms in `perf.csv` has a mean performance of 3.9. 44 | The issue is that two default of the hyperparameters of the random forest (`rf:min_samples_leaf` and `rf:bootstrap`) 45 | are a bad choice. 46 | 47 | So far, AutoFolio used only its default parameters. 48 | To automatically optimize its parameters use the arguments `-t, --tune`, e.g., 49 | 50 | .. code-block:: bash 51 | 52 | cd examples/toy_example_csv/ 53 | python ../../scripts/autofolio --perf perf.csv --feature_csv feats.csv -t 54 | 55 | In most cases, AutoFolio should be able to figure out that the previously mentioned parameters have to be changed 56 | such that AutoFolio can get a better performance. 57 | 58 | .. code-block:: bash 59 | 60 | INFO:AutoFolio:>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 61 | INFO:AutoFolio:CV Stats 62 | INFO:Stats:Number of instances: 10 63 | INFO:Stats:Average Solution Quality: 2.0000 64 | 65 | Input 66 | ----- 67 | 68 | AutoFolio can read two input formats: 69 | i) A simple interface for beginners based on two csv files (i.e., one for the performance of each algorithm on each instance 70 | and one for the instance features on each instance). 71 | ii) An advanced interface based on the ASlib format specification. 72 | 73 | CSV Input Format 74 | ---------------- 75 | 76 | The csv input format consists of two files: 77 | 78 | 1. A performance file, where each column corresponds to an algorithm and each row to an instance. 79 | 2. A feature file, where each column corresponds to an instance feature and each row to an instance. 80 | 81 | See `examples/toy_examples_csv` for a trivial example 82 | and `examples/ttp` for a complex example. 83 | 84 | Furthermore, you can specify 3 how to interprete the performance file: 85 | 86 | .. code-block:: bash 87 | 88 | --objective {runtime,solution_quality} 89 | Are the objective values in the performance data 90 | runtimes or an arbitrary solution quality (or cost) 91 | value (default: solution_quality) 92 | --runtime_cutoff RUNTIME_CUTOFF 93 | cutoff time for each algorithm run for the performance 94 | data (default: None) 95 | --maximize Set this parameter to indicate maximization of the 96 | performance metric (default: minimization) (default: 97 | False) 98 | 99 | ASlib Input format 100 | ------------------ 101 | 102 | The ASlib Input format is more complex 103 | but also is more flexible and allows to express more complex scenarios. 104 | See for examples and a formal specification. 105 | `examples/asp-aslib` also provides a complex scenario as an example in this format. 106 | 107 | To use this format, please use 108 | 109 | .. code-block:: bash 110 | 111 | -s SCENARIO, --scenario SCENARIO 112 | directory with ASlib scenario files (required if not 113 | using --load or csv input files (default: None) 114 | 115 | Modes 116 | ----- 117 | 118 | Cross-Validation Mode 119 | --------------------- 120 | 121 | The default mode of AutoFolio is running a 10-fold cross validation to estimate the performance of AutFolio. 122 | 123 | Prediction Mode 124 | --------------- 125 | 126 | If you want to use AutoFolio to predict for instances not represented in the given data, 127 | you need to train AutoFolio save its internal state to disk (use `python3 scripts/autofolio --save [filename]`). 128 | To predict on a new instance, 129 | please run 130 | 131 | .. code-block:: bash 132 | 133 | python3 scripts/autofolio --load [filename] --feature_vec [space-separated feature vector] 134 | 135 | Self-Tuning Mode 136 | ---------------- 137 | 138 | To use algorithm configuration to optimize the performance of AutoFolio please use the option `--tune`. 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /autofolio/selector/regressors/random_forest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 5 | UniformFloatHyperparameter, UniformIntegerHyperparameter 6 | from ConfigSpace.conditions import EqualsCondition, InCondition 7 | from ConfigSpace.configuration_space import ConfigurationSpace 8 | from ConfigSpace import Configuration 9 | 10 | from aslib_scenario.aslib_scenario import ASlibScenario 11 | 12 | import sklearn.ensemble 13 | 14 | __author__ = "Marius Lindauer" 15 | __license__ = "BSD" 16 | 17 | 18 | class RandomForestRegressor(object): 19 | 20 | @staticmethod 21 | def add_params(cs: ConfigurationSpace): 22 | ''' 23 | adds parameters to ConfigurationSpace 24 | ''' 25 | 26 | try: 27 | regressor = cs.get_hyperparameter("regressor") 28 | if "RandomForestRegressor" not in regressor.choices: 29 | return 30 | 31 | n_estimators = UniformIntegerHyperparameter( 32 | name="rfreg:n_estimators", lower=10, upper=100, default_value=10, log=True) 33 | cs.add_hyperparameter(n_estimators) 34 | max_features = CategoricalHyperparameter( 35 | name="rfreg:max_features", choices=["sqrt", "log2", "None"], default_value="sqrt") 36 | cs.add_hyperparameter(max_features) 37 | max_depth = UniformIntegerHyperparameter( 38 | name="rfreg:max_depth", lower=10, upper=2 ** 31, default_value=2 ** 31, log=True) 39 | cs.add_hyperparameter(max_depth) 40 | min_samples_split = UniformIntegerHyperparameter( 41 | name="rfreg:min_samples_split", lower=2, upper=100, default_value=2, log=True) 42 | cs.add_hyperparameter(min_samples_split) 43 | min_samples_leaf = UniformIntegerHyperparameter( 44 | name="rfreg:min_samples_leaf", lower=2, upper=100, default_value=10, log=True) 45 | cs.add_hyperparameter(min_samples_leaf) 46 | bootstrap = CategoricalHyperparameter( 47 | name="rfreg:bootstrap", choices=[True, False], default_value=True) 48 | cs.add_hyperparameter(bootstrap) 49 | 50 | cond = InCondition( 51 | child=n_estimators, parent=regressor, values=["RandomForestRegressor"]) 52 | cs.add_condition(cond) 53 | cond = InCondition( 54 | child=max_features, parent=regressor, values=["RandomForestRegressor"]) 55 | cs.add_condition(cond) 56 | cond = InCondition( 57 | child=max_depth, parent=regressor, values=["RandomForestRegressor"]) 58 | cs.add_condition(cond) 59 | cond = InCondition( 60 | child=min_samples_split, parent=regressor, values=["RandomForestRegressor"]) 61 | cs.add_condition(cond) 62 | cond = InCondition( 63 | child=min_samples_leaf, parent=regressor, values=["RandomForestRegressor"]) 64 | cs.add_condition(cond) 65 | cond = InCondition( 66 | child=bootstrap, parent=regressor, values=["RandomForestRegressor"]) 67 | cs.add_condition(cond) 68 | 69 | except: 70 | return 71 | 72 | def __init__(self): 73 | ''' 74 | Constructor 75 | ''' 76 | 77 | self.model = None 78 | 79 | def __str__(self): 80 | return "RandomForestRegressor" 81 | 82 | def fit(self, X, y, config: Configuration): 83 | ''' 84 | fit pca object to ASlib scenario data 85 | 86 | Arguments 87 | --------- 88 | X: numpy.array 89 | feature matrix 90 | y: numpy.array 91 | label vector 92 | weights: numpy.array 93 | vector with sample weights 94 | config: ConfigSpace.Configuration 95 | configuration 96 | 97 | ''' 98 | 99 | self.model = sklearn.ensemble.RandomForestRegressor(n_estimators=config["rfreg:n_estimators"], 100 | max_features=config[ 101 | "rfreg:max_features"] if config[ 102 | "rfreg:max_features"] != "None" else None, 103 | max_depth=config["rfreg:max_depth"], 104 | min_samples_split=config[ 105 | "rfreg:min_samples_split"], 106 | min_samples_leaf=config[ 107 | "rfreg:min_samples_leaf"], 108 | bootstrap=config["rfreg:bootstrap"], 109 | random_state=12345) 110 | self.model.fit(X, y) 111 | 112 | def predict(self, X): 113 | ''' 114 | transform ASLib scenario data 115 | 116 | Arguments 117 | --------- 118 | X: numpy.array 119 | instance feature matrix 120 | 121 | Returns 122 | ------- 123 | 124 | ''' 125 | 126 | return self.model.predict(X) 127 | 128 | def get_attributes(self): 129 | ''' 130 | returns a list of tuples of (attribute,value) 131 | for all learned attributes 132 | 133 | Returns 134 | ------- 135 | list of tuples of (attribute,value) 136 | ''' 137 | attr = [] 138 | attr.append("max_depth = %d" % (self.model.max_depth)) 139 | attr.append("min_samples_split = %d" % (self.model.min_samples_split)) 140 | attr.append("min_samples_leaf = %d" % (self.model.min_samples_leaf)) 141 | attr.append("n_estimators = %d" % (self.model.n_estimators)) 142 | attr.append("max_features = %s" % (self.model.max_features)) 143 | return attr 144 | 145 | -------------------------------------------------------------------------------- /autofolio/selector/classifiers/random_forest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 5 | UniformFloatHyperparameter, UniformIntegerHyperparameter 6 | from ConfigSpace.conditions import EqualsCondition, InCondition 7 | from ConfigSpace.configuration_space import ConfigurationSpace 8 | from ConfigSpace import Configuration 9 | 10 | from aslib_scenario.aslib_scenario import ASlibScenario 11 | 12 | from sklearn.ensemble import RandomForestClassifier 13 | 14 | __author__ = "Marius Lindauer" 15 | __license__ = "BSD" 16 | 17 | 18 | class RandomForest(object): 19 | 20 | @staticmethod 21 | def add_params(cs: ConfigurationSpace): 22 | ''' 23 | adds parameters to ConfigurationSpace 24 | ''' 25 | try: 26 | classifier = cs.get_hyperparameter("classifier") 27 | if "RandomForest" not in classifier.choices: 28 | return 29 | 30 | n_estimators = UniformIntegerHyperparameter( 31 | name="rf:n_estimators", lower=10, upper=100, default_value=10, log=True) 32 | cs.add_hyperparameter(n_estimators) 33 | criterion = CategoricalHyperparameter( 34 | name="rf:criterion", choices=["gini", "entropy"], default_value="gini") 35 | cs.add_hyperparameter(criterion) 36 | max_features = CategoricalHyperparameter( 37 | name="rf:max_features", choices=["sqrt", "log2", "None"], default_value="sqrt") 38 | cs.add_hyperparameter(max_features) 39 | max_depth = UniformIntegerHyperparameter( 40 | name="rf:max_depth", lower=10, upper=2**31, default_value=2**31, log=True) 41 | cs.add_hyperparameter(max_depth) 42 | min_samples_split = UniformIntegerHyperparameter( 43 | name="rf:min_samples_split", lower=2, upper=100, default_value=2, log=True) 44 | cs.add_hyperparameter(min_samples_split) 45 | min_samples_leaf = UniformIntegerHyperparameter( 46 | name="rf:min_samples_leaf", lower=2, upper=100, default_value=10, log=True) 47 | cs.add_hyperparameter(min_samples_leaf) 48 | bootstrap = CategoricalHyperparameter( 49 | name="rf:bootstrap", choices=[True, False], default_value=True) 50 | cs.add_hyperparameter(bootstrap) 51 | 52 | cond = InCondition( 53 | child=n_estimators, parent=classifier, values=["RandomForest"]) 54 | cs.add_condition(cond) 55 | cond = InCondition( 56 | child=criterion, parent=classifier, values=["RandomForest"]) 57 | cs.add_condition(cond) 58 | cond = InCondition( 59 | child=max_features, parent=classifier, values=["RandomForest"]) 60 | cs.add_condition(cond) 61 | cond = InCondition( 62 | child=max_depth, parent=classifier, values=["RandomForest"]) 63 | cs.add_condition(cond) 64 | cond = InCondition( 65 | child=min_samples_split, parent=classifier, values=["RandomForest"]) 66 | cs.add_condition(cond) 67 | cond = InCondition( 68 | child=min_samples_leaf, parent=classifier, values=["RandomForest"]) 69 | cs.add_condition(cond) 70 | cond = InCondition( 71 | child=bootstrap, parent=classifier, values=["RandomForest"]) 72 | cs.add_condition(cond) 73 | print(cs) 74 | except: 75 | return 76 | 77 | def __init__(self): 78 | ''' 79 | Constructor 80 | ''' 81 | 82 | self.model = None 83 | 84 | def __str__(self): 85 | return "RandomForest" 86 | 87 | def fit(self, X, y, config: Configuration, weights=None): 88 | ''' 89 | fit pca object to ASlib scenario data 90 | 91 | Arguments 92 | --------- 93 | X: numpy.array 94 | feature matrix 95 | y: numpy.array 96 | label vector 97 | weights: numpy.array 98 | vector with sample weights 99 | config: ConfigSpace.Configuration 100 | configuration 101 | 102 | ''' 103 | 104 | self.model = RandomForestClassifier(n_estimators=config["rf:n_estimators"], 105 | max_features= config[ 106 | "rf:max_features"] if config[ 107 | "rf:max_features"] != "None" else None, 108 | criterion=config["rf:criterion"], 109 | max_depth=config["rf:max_depth"], 110 | min_samples_split=config[ 111 | "rf:min_samples_split"], 112 | min_samples_leaf=config[ 113 | "rf:min_samples_leaf"], 114 | bootstrap=config["rf:bootstrap"], 115 | random_state=12345) 116 | self.model.fit(X, y, weights) 117 | 118 | def predict(self, X): 119 | ''' 120 | transform ASLib scenario data 121 | 122 | Arguments 123 | --------- 124 | X: numpy.array 125 | instance feature matrix 126 | 127 | Returns 128 | ------- 129 | 130 | ''' 131 | 132 | return self.model.predict(X) 133 | 134 | def get_attributes(self): 135 | ''' 136 | returns a list of tuples of (attribute,value) 137 | for all learned attributes 138 | 139 | Returns 140 | ------- 141 | list of tuples of (attribute,value) 142 | ''' 143 | attr = [] 144 | attr.append("max_depth = %d" %(self.model.max_depth)) 145 | attr.append("min_samples_split = %d" %(self.model.min_samples_split)) 146 | attr.append("min_samples_leaf = %d" %(self.model.min_samples_leaf)) 147 | attr.append("criterion = %s" %(self.model.criterion)) 148 | attr.append("n_estimators = %d" %(self.model.n_estimators)) 149 | attr.append("max_features = %s" %(self.model.max_features)) 150 | return attr 151 | -------------------------------------------------------------------------------- /autofolio/io/cmd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import os 4 | import logging 5 | 6 | __author__ = "Marius Lindauer" 7 | __version__ = "2.0.0" 8 | __license__ = "BSD" 9 | 10 | 11 | class CMDParser(object): 12 | 13 | def __init__(self): 14 | ''' 15 | Constructor 16 | ''' 17 | self.logger = logging.getLogger("CMDParser") 18 | 19 | self._arg_parser = argparse.ArgumentParser( 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | 22 | aslib = self._arg_parser.add_argument_group( 23 | "Reading from ASlib Format") 24 | aslib.add_argument("-s", "--scenario", default=None, 25 | help="directory with ASlib scenario files (required if not using --load or csv input files") 26 | 27 | csv = self._arg_parser.add_argument_group("Reading from CSV Format") 28 | csv.add_argument("--performance_csv", default=None, 29 | help="performance data in csv table (column: algorithm, row: instance, delimeter: ,)") 30 | csv.add_argument("--feature_csv", default=None, 31 | help="instance features data in csv table (column: features, row: instance, delimeter: ,)") 32 | csv.add_argument("--performance_test_csv", default=None, 33 | help="performance *test* data in csv table (column: algorithm, row: instance, delimeter: ,)") 34 | csv.add_argument("--feature_test_csv", default=None, 35 | help="instance *test* features data in csv table (column: features, row: instance, delimeter: ,)") 36 | csv.add_argument("--cv_csv", default=None, 37 | help="cross validation splits in csv table (column: split ID, row: instance, delimeter: ,)") 38 | csv.add_argument("--objective", default="solution_quality", choices=[ 39 | "runtime", "solution_quality"], help="Are the objective values in the performance data runtimes or an arbitrary solution quality (or cost) value") 40 | csv.add_argument("--runtime_cutoff", default=None, type=float, 41 | help="cutoff time for each algorithm run for the performance data") 42 | csv.add_argument("--maximize", default=False, action="store_true", help="Set this parameter to indicate maximization of the performance metric (default: minimization)") 43 | 44 | opt = self._arg_parser.add_argument_group("Optional Options") 45 | opt.add_argument("-t", "--tune", action="store_true", default=False, 46 | help="uses SMAC3 to determine a better hyperparameter configuration") 47 | opt.add_argument("--smac_seed", default=42, type=int, 48 | help="Seed passed to SMAC") 49 | opt.add_argument("-p", "--pcs", default=None, 50 | help="pcs file to be read") 51 | opt.add_argument("--output_dir", default=None, 52 | help="output directory of SMAC") 53 | opt.add_argument("--runcount_limit", type=int, default=42, 54 | help="maximal number of AS evaluations (SMAC budget)") 55 | opt.add_argument("--wallclock_limit", type=int, default=300, 56 | help="wallclock time limit in sec (SMAC budget)") 57 | opt.add_argument( 58 | "-v", "--verbose", choices=["INFO", "DEBUG"], default="INFO", help="verbose level") 59 | opt.add_argument("--save", type=str, default=None, 60 | help="trains AutoFolio and saves AutoFolio's state in the given filename") 61 | opt.add_argument("--load", type=str, default=None, 62 | help="loads model (from --save); other modes are disabled with this options") 63 | opt.add_argument("--feature_vec", default=None, type=str, 64 | help="feature vector to predict algorithm to use -- has to be used in combination with --load") 65 | 66 | opt.add_argument("--config", type=str, default=None, 67 | help="(yaml) config file with run-specific " 68 | "configuration options for autofolio") 69 | 70 | outer_cv = self._arg_parser.add_argument_group("Outer Cross-fold Validation Options") 71 | 72 | outer_cv.add_argument("--outer-cv", action="store_true", default=False, 73 | help="Use an \"outer\" cross-fold validation scheme " 74 | "for tuning to ensure that SMAC does not peek at " 75 | "the test set during hyperparameter optimization.") 76 | 77 | outer_cv.add_argument("--outer-cv-fold", type=int, default=None, 78 | help="If this argument is given in --outer-cv " 79 | "mode, then only the specified outer-cv fold " 80 | "will be processed. Presumably, the learned " 81 | "model will be saved using --save and the " 82 | "results for all folds will be combined later.") 83 | 84 | outer_cv.add_argument("--out-template", type=str, default=None, 85 | help="If given, then the fit model and solver " 86 | "choices will be saved to this location. The " 87 | "string is considered a template. \"$fold\" " 88 | "will be replaced with the fold, and " 89 | "\"$type\" will be replaced with the " 90 | "appropriate file extension, \"pkl\" for the " 91 | "models and \"csv\" for the solver choices. See " 92 | "string.Template for more details about valid " 93 | "tempaltes.") 94 | 95 | def parse(self): 96 | ''' 97 | uses the self._arg_parser object to parse the cmd line arguments 98 | 99 | Returns 100 | ------- 101 | parsed arguments 102 | unknown arguments 103 | ''' 104 | 105 | self.args_, misc_params = self._arg_parser.parse_known_args() 106 | 107 | return self.args_, misc_params 108 | 109 | def _check_args(self): 110 | ''' 111 | checks whether all provides options are ok (e.g., existence of files) 112 | ''' 113 | 114 | if not os.path.isdir(self.args_.scenario): 115 | self.logger.error( 116 | "ASlib Scenario directory not found: %s" % (self.args_.scenario)) 117 | sys.exit(1) 118 | -------------------------------------------------------------------------------- /autofolio/pre_solving/aspeed_schedule.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | import math 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import subprocess 9 | 10 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 11 | UniformFloatHyperparameter, UniformIntegerHyperparameter 12 | from ConfigSpace.conditions import EqualsCondition, InCondition 13 | from ConfigSpace.configuration_space import ConfigurationSpace 14 | from ConfigSpace import Configuration 15 | 16 | from aslib_scenario.aslib_scenario import ASlibScenario 17 | 18 | __author__ = "Marius Lindauer" 19 | __license__ = "BSD" 20 | 21 | 22 | class Aspeed(object): 23 | 24 | @staticmethod 25 | def add_params(cs: ConfigurationSpace, cutoff: int): 26 | ''' 27 | adds parameters to ConfigurationSpace 28 | 29 | Arguments 30 | --------- 31 | cs: ConfigurationSpace 32 | configuration space to add new parameters and conditions 33 | cutoff: int 34 | maximal possible time for aspeed 35 | ''' 36 | 37 | pre_solving = CategoricalHyperparameter( 38 | "presolving", choices=[True, False], default_value=False) 39 | cs.add_hyperparameter(pre_solving) 40 | pre_cutoff = UniformIntegerHyperparameter( 41 | "pre:cutoff", lower=1, upper=cutoff, default_value=math.ceil(cutoff * 0.1), log=True) 42 | cs.add_hyperparameter(pre_cutoff) 43 | cond = InCondition(child=pre_cutoff, parent=pre_solving, values=[True]) 44 | cs.add_condition(cond) 45 | 46 | def __init__(self, clingo: str=None, runsolver: str=None, enc_fn: str=None): 47 | ''' 48 | Constructor 49 | 50 | Arguments 51 | --------- 52 | clingo: str 53 | path to clingo binary 54 | runsolver: str 55 | path to runsolver binary 56 | enc_fn: str 57 | path to encoding file name 58 | ''' 59 | self.logger = logging.getLogger("Aspeed") 60 | 61 | if not runsolver: 62 | self.runsolver = os.path.join( 63 | os.path.dirname(sys.argv[0]), "..", "aspeed", "runsolver") 64 | else: 65 | self.runsolver = runsolver 66 | if not clingo: 67 | self.clingo = os.path.join( 68 | os.path.dirname(sys.argv[0]), "..", "aspeed", "clingo") 69 | else: 70 | self.clingo = clingo 71 | if not enc_fn: 72 | self.enc_fn = os.path.join( 73 | os.path.dirname(sys.argv[0]), "..", "aspeed", "enc1.lp") 74 | else: 75 | self.enc_fn = enc_fn 76 | 77 | self.mem_limit = 2000 # mb 78 | self.cutoff = 60 79 | 80 | self.data_threshold = 300 # minimal number of instances to use 81 | self.data_fraction = 0.3 # fraction of instances to use 82 | 83 | self.schedule = [] 84 | 85 | def fit(self, scenario: ASlibScenario, config: Configuration): 86 | ''' 87 | fit pca object to ASlib scenario data 88 | 89 | Arguments 90 | --------- 91 | scenario: data.aslib_scenario.ASlibScenario 92 | ASlib Scenario with all data in pandas 93 | config: ConfigSpace.Configuration 94 | configuration 95 | classifier_class: selector.classifier.* 96 | class for classification 97 | ''' 98 | 99 | if config["presolving"]: 100 | self.logger.info("Compute Presolving Schedule with Aspeed") 101 | 102 | X = scenario.performance_data.values 103 | 104 | # if the instance set is too large, we subsample it 105 | if X.shape[0] > self.data_threshold: 106 | random_indx = np.random.choice( 107 | range(X.shape[0]), 108 | size=min(X.shape[0], max(int(X.shape[0] * self.data_fraction), self.data_threshold)), 109 | replace=True) 110 | X = X[random_indx, :] 111 | 112 | self.logger.debug("#Instances for pre-solving schedule: %d" %(X.shape[0])) 113 | times = ["time(i%d, %d, %d)." % (i, j, max(1,math.ceil(X[i, j]))) 114 | for i in range(X.shape[0]) for j in range(X.shape[1])] 115 | 116 | kappa = "kappa(%d)." % (config["pre:cutoff"]) 117 | 118 | data_in = " ".join(times) + " " + kappa 119 | 120 | # call aspeed and save schedule 121 | self._call_clingo(data_in=data_in, algorithms=scenario.performance_data.columns) 122 | 123 | def _call_clingo(self, data_in: str, algorithms: list): 124 | ''' 125 | call clingo on self.enc_fn and facts from data_in 126 | 127 | Arguments 128 | --------- 129 | data_in: str 130 | facts in format time(I,A,T) and kappa(C) 131 | algorithms: list 132 | list of algorithm names 133 | ''' 134 | cmd = "%s -C %d -M %d -w /dev/null %s %s -" % ( 135 | self.runsolver, self.cutoff, self.mem_limit, self.clingo, self.enc_fn) 136 | 137 | self.logger.info("Call: %s" % (cmd)) 138 | 139 | p = subprocess.Popen(cmd, 140 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True, universal_newlines=True) 141 | stdout, stderr = p.communicate(input=data_in) 142 | 143 | self.logger.debug(stdout) 144 | 145 | schedule_dict = {} 146 | for line in stdout.split("\n"): 147 | if line.startswith("slice"): 148 | schedule_dict = {} # reinitizalize for every found schedule 149 | slices_str = line.split(" ") 150 | for slice in slices_str: 151 | s_tuple = slice.replace("slice(", "").rstrip(")").split(",") 152 | algo = algorithms[int(s_tuple[1])] 153 | budget = int(s_tuple[2]) 154 | schedule_dict[algo] = budget 155 | 156 | self.schedule = sorted(schedule_dict.items(), key=lambda x: x[1]) 157 | 158 | self.logger.info("Fitted Schedule: %s" % (self.schedule)) 159 | 160 | def predict(self, scenario: ASlibScenario): 161 | ''' 162 | transform ASLib scenario data 163 | 164 | Arguments 165 | --------- 166 | scenario: data.aslib_scenario.ASlibScenario 167 | ASlib Scenario with all data in pandas 168 | 169 | Returns 170 | ------- 171 | schedule:{inst -> (solver, time)} 172 | schedule of solvers with a running time budget 173 | ''' 174 | 175 | return dict((inst, self.schedule) for inst in scenario.instances) 176 | -------------------------------------------------------------------------------- /autofolio/selector/classifiers/xgboost.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 5 | UniformFloatHyperparameter, UniformIntegerHyperparameter 6 | from ConfigSpace.conditions import EqualsCondition, InCondition 7 | from ConfigSpace.configuration_space import ConfigurationSpace 8 | from ConfigSpace import Configuration 9 | 10 | from aslib_scenario.aslib_scenario import ASlibScenario 11 | 12 | import xgboost as xgb 13 | 14 | __author__ = "Marius Lindauer" 15 | __license__ = "BSD" 16 | 17 | 18 | class XGBoost(object): 19 | 20 | @staticmethod 21 | def add_params(cs: ConfigurationSpace): 22 | ''' 23 | adds parameters to ConfigurationSpace 24 | ''' 25 | 26 | try: 27 | classifier = cs.get_hyperparameter("classifier") 28 | if "XGBoost" not in classifier.choices: 29 | return 30 | 31 | num_round = UniformIntegerHyperparameter( 32 | name="xgb:num_round", lower=10, upper=100, default_value=50, log=True) 33 | cs.add_hyperparameter(num_round) 34 | alpha = UniformFloatHyperparameter( 35 | name="xgb:alpha", lower=0, upper=10, default_value=1) 36 | cs.add_hyperparameter(alpha) 37 | lambda_ = UniformFloatHyperparameter( 38 | name="xgb:lambda", lower=1, upper=10, default_value=1) 39 | cs.add_hyperparameter(lambda_) 40 | colsample_bylevel = UniformFloatHyperparameter( 41 | name="xgb:colsample_bylevel", lower=0.5, upper=1, default_value=1) 42 | cs.add_hyperparameter(colsample_bylevel) 43 | colsample_bytree = UniformFloatHyperparameter( 44 | name="xgb:colsample_bytree", lower=0.5, upper=1, default_value=1) 45 | cs.add_hyperparameter(colsample_bytree) 46 | subsample = UniformFloatHyperparameter( 47 | name="xgb:subsample", lower=0.01, upper=1, default_value=1) 48 | cs.add_hyperparameter(subsample) 49 | max_delta_step = UniformFloatHyperparameter( 50 | name="xgb:max_delta_step", lower=0, upper=10, default_value=0) 51 | cs.add_hyperparameter(max_delta_step) 52 | min_child_weight = UniformFloatHyperparameter( 53 | name="xgb:min_child_weight", lower=0, upper=20, default_value=1) 54 | cs.add_hyperparameter(min_child_weight) 55 | max_depth = UniformIntegerHyperparameter( 56 | name="xgb:max_depth", lower=1, upper=10, default_value=6) 57 | cs.add_hyperparameter(max_depth) 58 | gamma = UniformFloatHyperparameter( 59 | name="xgb:gamma", lower=0, upper=10, default_value=0) 60 | cs.add_hyperparameter(gamma) 61 | eta = UniformFloatHyperparameter( 62 | name="xgb:eta", lower=0, upper=1, default_value=0.3) 63 | cs.add_hyperparameter(eta) 64 | 65 | cond = InCondition( 66 | child=num_round, parent=classifier, values=["XGBoost"]) 67 | cs.add_condition(cond) 68 | cond = InCondition( 69 | child=alpha, parent=classifier, values=["XGBoost"]) 70 | cs.add_condition(cond) 71 | cond = InCondition( 72 | child=lambda_, parent=classifier, values=["XGBoost"]) 73 | cs.add_condition(cond) 74 | cond = InCondition( 75 | child=colsample_bylevel, parent=classifier, values=["XGBoost"]) 76 | cs.add_condition(cond) 77 | cond = InCondition( 78 | child=colsample_bytree, parent=classifier, values=["XGBoost"]) 79 | cs.add_condition(cond) 80 | cond = InCondition( 81 | child=subsample, parent=classifier, values=["XGBoost"]) 82 | cs.add_condition(cond) 83 | cond = InCondition( 84 | child=max_delta_step, parent=classifier, values=["XGBoost"]) 85 | cs.add_condition(cond) 86 | cond = InCondition( 87 | child=min_child_weight, parent=classifier, values=["XGBoost"]) 88 | cs.add_condition(cond) 89 | cond = InCondition( 90 | child=max_depth, parent=classifier, values=["XGBoost"]) 91 | cs.add_condition(cond) 92 | cond = InCondition( 93 | child=gamma, parent=classifier, values=["XGBoost"]) 94 | cs.add_condition(cond) 95 | cond = InCondition( 96 | child=eta, parent=classifier, values=["XGBoost"]) 97 | cs.add_condition(cond) 98 | except: 99 | return 100 | 101 | 102 | def __init__(self): 103 | ''' 104 | Constructor 105 | ''' 106 | 107 | self.model = None 108 | self.attr = [] 109 | 110 | def __str__(self): 111 | return "XGBoost" 112 | 113 | def fit(self, X, y, config: Configuration, weights=None): 114 | ''' 115 | fit pca object to ASlib scenario data 116 | 117 | Arguments 118 | --------- 119 | X: numpy.array 120 | feature matrix 121 | y: numpy.array 122 | label vector 123 | weights: numpy.array 124 | vector with sample weights 125 | config: ConfigSpace.Configuration 126 | configuration 127 | 128 | ''' 129 | 130 | xgb_config = {'nthread': 1, 131 | 'silent': 1, 132 | 'objective': 'binary:logistic', 133 | 'seed': 12345} 134 | for param in config: 135 | if param.startswith("xgb:") and config[param] is not None: 136 | self.attr.append("%s=%s"%(param[4:],config[param])) 137 | if param == "xgb:num_round": 138 | continue 139 | xgb_config[param[4:]] = config[param] 140 | 141 | dtrain = xgb.DMatrix(X, label=y, weight=weights) 142 | self.model = xgb.train(xgb_config, dtrain, config["xgb:num_round"]) 143 | 144 | 145 | def predict(self, X): 146 | ''' 147 | transform ASLib scenario data 148 | 149 | Arguments 150 | --------- 151 | X: numpy.array 152 | instance feature matrix 153 | 154 | Returns 155 | ------- 156 | 157 | ''' 158 | preds = np.array(self.model.predict(xgb.DMatrix(X))) 159 | preds[preds < 0.5] = 0 160 | preds[preds >= 0.5] = 1 161 | return preds 162 | 163 | def get_attributes(self): 164 | ''' 165 | returns a list of tuples of (attribute,value) 166 | for all learned attributes 167 | 168 | Returns 169 | ------- 170 | list of tuples of (attribute,value) 171 | ''' 172 | return self.attr 173 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/AutoFolio.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/AutoFolio.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/AutoFolio" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/AutoFolio" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoFolio 2 | 3 | AutoFolio is an algorithm selection tool, 4 | i.e., selecting a well-performing algorithm for a given instance [Rice 1976]. 5 | In contrast to other algorithm selection tools, 6 | users of AutoFolio are bothered with the decision which algorithm selection approach to use 7 | and how to set its hyper-parameters. 8 | AutoFolio uses one of the state-of-the-art algorithm configuration tools, namely SMAC [Hutter et al LION'16] 9 | to automatically determine a well-performing algorithm selection approach 10 | and its hyper-parameters for a given algorithm selection data. 11 | Therefore, AutoFolio has a robust performance across different algorithm selection tasks. 12 | 13 | ## Version 14 | 15 | This package is a re-implementation of the original AutoFolio. 16 | It follows the same approach as the original AutoFolio 17 | but it has some crucial differences: 18 | 19 | * instead of SMAC v2, we use the pure Python implementation of SMAC (v3) 20 | * less implemented algorithm selection approaches -- focus on promising approaches to waste not unnecessary time during configuration 21 | * support of solution quality scenarios 22 | 23 | ## License 24 | 25 | This program is free software: you can redistribute it and/or modify it under the terms of the 2-clause BSD license (please see the LICENSE file). 26 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 27 | You should have received a copy of the 2-clause BSD license along with this program (see LICENSE file). If not, see https://opensource.org/licenses/BSD-2-Clause. 28 | 29 | ## Installation 30 | 31 | ### Requirements 32 | 33 | NOTE: AutoFolio requires the future SMAC 0.9; currently only available in the [development branch of SMAC](https://github.com/automl/SMAC3/tree/development) 34 | 35 | AutoFolio runs with '''Python 3.5'''. 36 | 37 | To install (nearly) all requirements, please run: 38 | 39 | `cat requirements.txt | xargs -n 1 -L 1 pip install` 40 | 41 | Many of its dependencies can be fulfilled by using [Anaconda >3.4](https://www.continuum.io/). 42 | If you use Anaconda as your Python environment, you have to install three packages before you can install SMAC (as one of AutoFolio's requirements): 43 | 44 | `conda install gxx_linux-64 gcc_linux-64 swig` 45 | 46 | To use pre-solving schedules, [clingo](http://potassco.sourceforge.net/) is required. We provide binary compiled under Ubuntu 14.04 which may not work under another OS. Please put a working `clingo` binary with Python support into the folder `aspeed/`. 47 | 48 | ## Usage 49 | 50 | We provide under `scripts` a command-line interface for AutoFolio. 51 | To get an overview over all options of AutoFolio, simply run: 52 | 53 | `python3 scripts/autofolio --help` 54 | 55 | We provide some examples in `examples/` 56 | 57 | ### Input Formats 58 | 59 | AutoFolio reads two input formats: CSV and [ASlib](www.aslib.net). 60 | The CSV format is easier for new users but has some limitations to express all kind of input data. 61 | The ASlib format has a higher expressiveness -- please see [www.aslib.net](www.aslib.net) for all details on this input format. 62 | 63 | For the CSV format, simply two files are required. 64 | One file with the performance data of each algorithm on each instance (each row an instance, and each column an algorithm). 65 | And another file with the instance features for each instance (each row an instance and each column an feature). 66 | All other meta-data (such as runtime cutoff) has to be specified by command line options (see `python3 scripts/autofolio --help`). 67 | 68 | ### Configuration file 69 | 70 | A YAML configuration file can be given to control some of the internal AutoFolio 71 | behavior. It is given with the `--config` option. 72 | 73 | The recognized options and their types are as follows. 74 | 75 | * `wallclock_limit`. The amount of time (in seconds) for optimizing 76 | hyperparameters. Type: integer. Default: 300 seconds --- should be increased! 77 | 78 | #### Feature groups 79 | 80 | * `allowed_feature_groups`. A list of the feature groups to consider for 81 | prediction. This must match those specified in the ASlib scenario. Type: list 82 | of strings. Default: all feature sets are allowed. 83 | 84 | #### Preprocessing 85 | 86 | * `pca`. Whether to include PCA as a choice for preprocessing. Type: Boolean. Default: True. 87 | 88 | * `impute`. Whether missing value imputation is a choice for preprocessing. Type: Boolean. Default: True. 89 | 90 | * `scale`. Whether z-score scaling is a choice for preprocessing. Type: Boolean. Default: True. 91 | 92 | #### Presolving 93 | 94 | * `presolve`. Whether to use a presolver. Type: Boolean. Default: True. 95 | 96 | #### Algorithm selection model classes 97 | 98 | * `random_forest_classifier`. Whether the random forest classifier is a model class choice. Type: Boolean. Default: True. 99 | 100 | * `xgboost_classifier`. Whether the XGBoost classifier is a model class choice. Type: Boolean. Default: True. 101 | 102 | * `random_forest_regressor`. Whether the random forest regressor is a model class choice. Type: Boolean. Default: True. 103 | 104 | ### Cross-Validation Mode 105 | 106 | The default mode of AutoFolio is running a 10-fold cross-validation to estimate the performance of AutoFolio. 107 | 108 | ### "Outer" Cross-Validation Mode 109 | 110 | "Outer" cross-validation again uses a 10-fold cross-validation scheme to 111 | evaluate AutoFolio; in this case, though, the subset for testing is not at all 112 | seen by AutoFolio during training. Internally, the nine training folds are 113 | further use in an "inner" cross-validation to avoid overfitting. 114 | 115 | The `--outer-cv` flag indicates to use this mode. For example: 116 | 117 | ``` 118 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv 119 | 120 | ``` 121 | #### Saving the outer cross-validation choices 122 | 123 | The learned model and solver choices for each instance can be saved using the 124 | `--out-template` option. If given, the fit model and solver choices will be 125 | saved to this location. The string is considered a template. "${fold}" will be 126 | replaced with the outer cv fold, and "${type}" will be replaced with the 127 | appropriate file extension, "pkl" for the models and "csv" for the solver 128 | choices. See string.Template for more details about valid tempaltes. 129 | 130 | **N.B.** In many shells (such as bash), it is necessary to put the template in 131 | single quotes to avoid shell replacement in the template. (Double quotes will 132 | not typically work.) 133 | 134 | ``` 135 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv --out-template 'asp.fold-${fold}.${type}' 136 | 137 | ``` 138 | #### Parallelizing the outer cross-validation 139 | 140 | Optionally, only a single "outer" cv fold can be run. Presumably, this is used 141 | to parallelize the outer cv calls across a cluster. The `--outer-cv-fold` option 142 | specifies which fold is used. Typically, this option would be combined with 143 | `--out-template`, and the results would be combined in post-processing. 144 | 145 | **N.B.** This number should range from 1 to 10 (not 0 to 9). 146 | 147 | ``` 148 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv --outer-cv-fold 1 --out-template 'asp.fold-${fold}.${type}' 149 | ``` 150 | 151 | 152 | ### Prediction Mode 153 | 154 | If you want to use AutoFolio to predict for instances not represented in the given data, 155 | you need to train AutoFolio save its internal state to disk (use `python3 scripts/autofolio --save [filename]`). 156 | To predict on a new instance, 157 | please run 158 | 159 | `python3 scripts/autofolio --load [filename] --feature_vec "[space-separated feature vector]"` 160 | 161 | Please note that the quotes around the feature vector are important. 162 | 163 | ### Self-Tuning Mode 164 | 165 | To use algorithm configuration to optimize the performance of AutoFolio please use the option `--tune`. 166 | 167 | ## Reference 168 | 169 | [JAIR Journal Article](http://aad.informatik.uni-freiburg.de/papers/15-JAIR-Autofolio.pdf) 170 | 171 | @ARTICLE{lindauer-jair15a, 172 | author = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub}, 173 | title = {AutoFolio: An automatically configured Algorithm Selector}, 174 | volume = {53}, 175 | journal = {Journal of Artificial Intelligence Research}, 176 | year = {2015}, 177 | pages = {745-778} 178 | } 179 | 180 | ## Contact 181 | 182 | Marius Lindauer: lindauer@cs.uni-freiburg.de 183 | -------------------------------------------------------------------------------- /examples/asp-aslib/data/description.txt: -------------------------------------------------------------------------------- 1 | algorithm_cutoff_memory: '?' 2 | algorithm_cutoff_time: 600 3 | default_steps: 4 | - Static 5 | - Dynamic-1 6 | - Dynamic-2 7 | - Dynamic-3 8 | - Dynamic-4 9 | feature_steps: 10 | Dynamic-1: 11 | provides: 12 | - Choices-1 13 | - Conflicts/Choices-1 14 | - Avg_Conflict_Levels-1 15 | - Avg_LBD_Levels-1 16 | - Learnt_from_Conflict-1 17 | - Learnt_from_Loop-1 18 | - Frac_Learnt_from_Conflict-1 19 | - Frac_Learnt_from_Loop-1 20 | - Literals_in_Conflict_Nogoods-1 21 | - Literals_in_Loop_Nogoods-1 22 | - Frac_Literals_in_Conflict_Nogoods-1 23 | - Frac_Literals_in_Loop_Nogoods-1 24 | - Removed_Nogoods-1 25 | - Learnt_Binary-1 26 | - Learnt_Ternary-1 27 | - Learnt_Others-1 28 | - Frac_Removed_Nogood-1 29 | - Frac_Learnt_Binary-1 30 | - Frac_Learnt_Ternary-1 31 | - Frac_Learnt_Others-1 32 | - Skipped_Levels_while_Backjumping-1 33 | - Avg_Skipped_Levels_while_Backjumping-1 34 | - Longest_Backjumping-1 35 | - Running_Avg_Conflictlevel-1 36 | - Running_Avg_LBD-1 37 | requires: 38 | - Static 39 | Dynamic-2: 40 | provides: 41 | - Choices-2 42 | - Conflicts/Choices-2 43 | - Avg_Conflict_Levels-2 44 | - Avg_LBD_Levels-2 45 | - Learnt_from_Conflict-2 46 | - Learnt_from_Loop-2 47 | - Frac_Learnt_from_Conflict-2 48 | - Frac_Learnt_from_Loop-2 49 | - Literals_in_Conflict_Nogoods-2 50 | - Literals_in_Loop_Nogoods-2 51 | - Frac_Literals_in_Conflict_Nogoods-2 52 | - Frac_Literals_in_Loop_Nogoods-2 53 | - Removed_Nogoods-2 54 | - Learnt_Binary-2 55 | - Learnt_Ternary-2 56 | - Learnt_Others-2 57 | - Frac_Removed_Nogood-2 58 | - Frac_Learnt_Binary-2 59 | - Frac_Learnt_Ternary-2 60 | - Frac_Learnt_Others-2 61 | - Skipped_Levels_while_Backjumping-2 62 | - Avg_Skipped_Levels_while_Backjumping-2 63 | - Longest_Backjumping-2 64 | - Running_Avg_Conflictlevel-2 65 | - Running_Avg_LBD-2 66 | requires: 67 | - Static 68 | - Dynamic-1 69 | Dynamic-3: 70 | provides: 71 | - Choices-3 72 | - Conflicts/Choices-3 73 | - Avg_Conflict_Levels-3 74 | - Avg_LBD_Levels-3 75 | - Learnt_from_Conflict-3 76 | - Learnt_from_Loop-3 77 | - Frac_Learnt_from_Conflict-3 78 | - Frac_Learnt_from_Loop-3 79 | - Literals_in_Conflict_Nogoods-3 80 | - Literals_in_Loop_Nogoods-3 81 | - Frac_Literals_in_Conflict_Nogoods-3 82 | - Frac_Literals_in_Loop_Nogoods-3 83 | - Removed_Nogoods-3 84 | - Learnt_Binary-3 85 | - Learnt_Ternary-3 86 | - Learnt_Others-3 87 | - Frac_Removed_Nogood-3 88 | - Frac_Learnt_Binary-3 89 | - Frac_Learnt_Ternary-3 90 | - Frac_Learnt_Others-3 91 | - Skipped_Levels_while_Backjumping-3 92 | - Avg_Skipped_Levels_while_Backjumping-3 93 | - Longest_Backjumping-3 94 | - Running_Avg_Conflictlevel-3 95 | - Running_Avg_LBD-3 96 | - Choices-4 97 | requires: 98 | - Static 99 | - Dynamic-1 100 | - Dynamic-2 101 | Dynamic-4: 102 | provides: 103 | - Choices-4 104 | - Conflicts/Choices-4 105 | - Avg_Conflict_Levels-4 106 | - Avg_LBD_Levels-4 107 | - Learnt_from_Conflict-4 108 | - Learnt_from_Loop-4 109 | - Frac_Learnt_from_Conflict-4 110 | - Frac_Learnt_from_Loop-4 111 | - Literals_in_Conflict_Nogoods-4 112 | - Literals_in_Loop_Nogoods-4 113 | - Frac_Literals_in_Conflict_Nogoods-4 114 | - Frac_Literals_in_Loop_Nogoods-4 115 | - Removed_Nogoods-4 116 | - Learnt_Binary-4 117 | - Learnt_Ternary-4 118 | - Learnt_Others-4 119 | - Frac_Removed_Nogood-4 120 | - Frac_Learnt_Binary-4 121 | - Frac_Learnt_Ternary-4 122 | - Frac_Learnt_Others-4 123 | - Skipped_Levels_while_Backjumping-4 124 | - Avg_Skipped_Levels_while_Backjumping-4 125 | - Longest_Backjumping-4 126 | - Running_Avg_Conflictlevel-4 127 | - Running_Avg_LBD-4 128 | requires: 129 | - Static 130 | - Dynamic-1 131 | - Dynamic-2 132 | - Dynamic-3 133 | Static: 134 | provides: 135 | - Frac_Neg_Body 136 | - Frac_Pos_Body 137 | - Frac_Unary_Rules 138 | - Frac_Binary_Rules 139 | - Frac_Ternary_Rules 140 | - Frac_Integrity_Rules 141 | - Tight 142 | - Problem_Variables 143 | - Free_Problem_Variables 144 | - Assigned_Problem_Variables 145 | - Constraints 146 | - Constraints/Vars 147 | - Created_Bodies 148 | - Program_Atoms 149 | - SCCS 150 | - Nodes_in_Positive_BADG 151 | - Rules 152 | - Normal_Rules 153 | - Cardinality_Rules 154 | - Choice_Rules 155 | - Weight_Rules 156 | - Frac_Normal_Rules 157 | - Frac_Cardinality_Rules 158 | - Frac_Choice_Rules 159 | - Frac_Weight_Rules 160 | - Equivalences 161 | - Atom-Atom_Equivalences 162 | - Body-Body_Equivalences 163 | - Other_Equivalences 164 | - Frac_Atom-Atom_Equivalences 165 | - Frac_Body-Body_Equivalences 166 | - Frac_Other_Equivalences 167 | - Binary_Constraints 168 | - Ternary_Constraints 169 | - Other_Constraints 170 | - Frac_Binary_Constraints 171 | - Frac_Ternary_Constraints 172 | - Frac_Other_Constraints 173 | features_cutoff_memory: '?' 174 | features_cutoff_time: 600 175 | features_deterministic: 176 | - Frac_Neg_Body 177 | - Frac_Pos_Body 178 | - Frac_Unary_Rules 179 | - Frac_Binary_Rules 180 | - Frac_Ternary_Rules 181 | - Frac_Integrity_Rules 182 | - Tight 183 | - Problem_Variables 184 | - Free_Problem_Variables 185 | - Assigned_Problem_Variables 186 | - Constraints 187 | - Constraints/Vars 188 | - Created_Bodies 189 | - Program_Atoms 190 | - SCCS 191 | - Nodes_in_Positive_BADG 192 | - Rules 193 | - Normal_Rules 194 | - Cardinality_Rules 195 | - Choice_Rules 196 | - Weight_Rules 197 | - Frac_Normal_Rules 198 | - Frac_Cardinality_Rules 199 | - Frac_Choice_Rules 200 | - Frac_Weight_Rules 201 | - Equivalences 202 | - Atom-Atom_Equivalences 203 | - Body-Body_Equivalences 204 | - Other_Equivalences 205 | - Frac_Atom-Atom_Equivalences 206 | - Frac_Body-Body_Equivalences 207 | - Frac_Other_Equivalences 208 | - Binary_Constraints 209 | - Ternary_Constraints 210 | - Other_Constraints 211 | - Frac_Binary_Constraints 212 | - Frac_Ternary_Constraints 213 | - Frac_Other_Constraints 214 | - Choices-1 215 | - Conflicts/Choices-1 216 | - Avg_Conflict_Levels-1 217 | - Avg_LBD_Levels-1 218 | - Learnt_from_Conflict-1 219 | - Learnt_from_Loop-1 220 | - Frac_Learnt_from_Conflict-1 221 | - Frac_Learnt_from_Loop-1 222 | - Literals_in_Conflict_Nogoods-1 223 | - Literals_in_Loop_Nogoods-1 224 | - Frac_Literals_in_Conflict_Nogoods-1 225 | - Frac_Literals_in_Loop_Nogoods-1 226 | - Removed_Nogoods-1 227 | - Learnt_Binary-1 228 | - Learnt_Ternary-1 229 | - Learnt_Others-1 230 | - Frac_Removed_Nogood-1 231 | - Frac_Learnt_Binary-1 232 | - Frac_Learnt_Ternary-1 233 | - Frac_Learnt_Others-1 234 | - Skipped_Levels_while_Backjumping-1 235 | - Avg_Skipped_Levels_while_Backjumping-1 236 | - Longest_Backjumping-1 237 | - Running_Avg_Conflictlevel-1 238 | - Running_Avg_LBD-1 239 | - Choices-2 240 | - Conflicts/Choices-2 241 | - Avg_Conflict_Levels-2 242 | - Avg_LBD_Levels-2 243 | - Learnt_from_Conflict-2 244 | - Learnt_from_Loop-2 245 | - Frac_Learnt_from_Conflict-2 246 | - Frac_Learnt_from_Loop-2 247 | - Literals_in_Conflict_Nogoods-2 248 | - Literals_in_Loop_Nogoods-2 249 | - Frac_Literals_in_Conflict_Nogoods-2 250 | - Frac_Literals_in_Loop_Nogoods-2 251 | - Removed_Nogoods-2 252 | - Learnt_Binary-2 253 | - Learnt_Ternary-2 254 | - Learnt_Others-2 255 | - Frac_Removed_Nogood-2 256 | - Frac_Learnt_Binary-2 257 | - Frac_Learnt_Ternary-2 258 | - Frac_Learnt_Others-2 259 | - Skipped_Levels_while_Backjumping-2 260 | - Avg_Skipped_Levels_while_Backjumping-2 261 | - Longest_Backjumping-2 262 | - Running_Avg_Conflictlevel-2 263 | - Running_Avg_LBD-2 264 | - Choices-3 265 | - Conflicts/Choices-3 266 | - Avg_Conflict_Levels-3 267 | - Avg_LBD_Levels-3 268 | - Learnt_from_Conflict-3 269 | - Learnt_from_Loop-3 270 | - Frac_Learnt_from_Conflict-3 271 | - Frac_Learnt_from_Loop-3 272 | - Literals_in_Conflict_Nogoods-3 273 | - Literals_in_Loop_Nogoods-3 274 | - Frac_Literals_in_Conflict_Nogoods-3 275 | - Frac_Literals_in_Loop_Nogoods-3 276 | - Removed_Nogoods-3 277 | - Learnt_Binary-3 278 | - Learnt_Ternary-3 279 | - Learnt_Others-3 280 | - Frac_Removed_Nogood-3 281 | - Frac_Learnt_Binary-3 282 | - Frac_Learnt_Ternary-3 283 | - Frac_Learnt_Others-3 284 | - Skipped_Levels_while_Backjumping-3 285 | - Avg_Skipped_Levels_while_Backjumping-3 286 | - Longest_Backjumping-3 287 | - Running_Avg_Conflictlevel-3 288 | - Running_Avg_LBD-3 289 | - Choices-4 290 | - Conflicts/Choices-4 291 | - Avg_Conflict_Levels-4 292 | - Avg_LBD_Levels-4 293 | - Learnt_from_Conflict-4 294 | - Learnt_from_Loop-4 295 | - Frac_Learnt_from_Conflict-4 296 | - Frac_Learnt_from_Loop-4 297 | - Literals_in_Conflict_Nogoods-4 298 | - Literals_in_Loop_Nogoods-4 299 | - Frac_Literals_in_Conflict_Nogoods-4 300 | - Frac_Literals_in_Loop_Nogoods-4 301 | - Removed_Nogoods-4 302 | - Learnt_Binary-4 303 | - Learnt_Ternary-4 304 | - Learnt_Others-4 305 | - Frac_Removed_Nogood-4 306 | - Frac_Learnt_Binary-4 307 | - Frac_Learnt_Ternary-4 308 | - Frac_Learnt_Others-4 309 | - Skipped_Levels_while_Backjumping-4 310 | - Avg_Skipped_Levels_while_Backjumping-4 311 | - Longest_Backjumping-4 312 | - Running_Avg_Conflictlevel-4 313 | - Running_Avg_LBD-4 314 | features_stochastic: null 315 | maximize: 316 | - false 317 | metainfo_algorithms: 318 | clasp/2.1.3/h1-n1: 319 | configuration: '' 320 | deterministic: true 321 | clasp/2.1.3/h10-n1: 322 | configuration: '' 323 | deterministic: true 324 | clasp/2.1.3/h11-n1: 325 | configuration: '' 326 | deterministic: true 327 | clasp/2.1.3/h2-n1: 328 | configuration: '' 329 | deterministic: true 330 | clasp/2.1.3/h3-n1: 331 | configuration: '' 332 | deterministic: true 333 | clasp/2.1.3/h4-n1: 334 | configuration: '' 335 | deterministic: true 336 | clasp/2.1.3/h5-n1: 337 | configuration: '' 338 | deterministic: true 339 | clasp/2.1.3/h6-n1: 340 | configuration: '' 341 | deterministic: true 342 | clasp/2.1.3/h7-n1: 343 | configuration: '' 344 | deterministic: true 345 | clasp/2.1.3/h8-n1: 346 | configuration: '' 347 | deterministic: true 348 | clasp/2.1.3/h9-n1: 349 | configuration: '' 350 | deterministic: true 351 | number_of_feature_steps: 5 352 | performance_measures: 353 | - runtime 354 | performance_type: 355 | - runtime 356 | scenario_id: ASP-POTASSCO 357 | -------------------------------------------------------------------------------- /autofolio/validation/validate.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from aslib_scenario.aslib_scenario import ASlibScenario 4 | 5 | __author__ = "Marius Lindauer" 6 | __license__ = "BSD" 7 | 8 | 9 | class Stats(object): 10 | 11 | def __init__(self, runtime_cutoff): 12 | ''' Constructor 13 | 14 | Arguments 15 | --------- 16 | runtime_cutoff: int 17 | maximal running time 18 | ''' 19 | self.par1 = 0.0 20 | self.par10 = 0.0 21 | self.timeouts = 0 22 | self.solved = 0 23 | self.unsolvable = 0 24 | self.presolved_feats = 0 25 | self.oracle = 0 26 | self.sbs = 0 27 | 28 | self.runtime_cutoff = runtime_cutoff 29 | 30 | self.selection_freq = {} 31 | 32 | self.logger = logging.getLogger("Stats") 33 | 34 | def show(self, remove_unsolvable: bool=True): 35 | ''' 36 | shows statistics 37 | 38 | Arguments 39 | -------- 40 | remove_unsolvable : bool 41 | remove unsolvable from stats 42 | 43 | Returns 44 | ------- 45 | par10: int 46 | penalized average runtime 47 | ''' 48 | 49 | if remove_unsolvable and self.runtime_cutoff: 50 | rm_string = "removed" 51 | self.logger.debug("Statistics before removing unsolvable instances") 52 | self.logger.debug("PAR1: %.4f" %(self.par1 / (self.timeouts + self.solved))) 53 | self.logger.debug("PAR10: %.4f" %(self.par10 / (self.timeouts + self.solved))) 54 | self.logger.debug("Timeouts: %d / %d" %(self.timeouts, self.timeouts + self.solved)) 55 | timeouts = self.timeouts - self.unsolvable 56 | par1 = self.par1 - (self.unsolvable * self.runtime_cutoff) 57 | par10 = self.par10 - (self.unsolvable * self.runtime_cutoff * 10) 58 | oracle = self.oracle - (self.unsolvable * self.runtime_cutoff * 10) 59 | sbs = self.sbs - (self.unsolvable * self.runtime_cutoff * 10) 60 | else: 61 | rm_string = "not removed" 62 | timeouts = self.timeouts 63 | par1 = self.par1 64 | par10 = self.par10 65 | oracle = self.oracle 66 | sbs = self.sbs 67 | 68 | if self.runtime_cutoff: 69 | n_samples = timeouts + self.solved 70 | self.logger.info("PAR1: %.4f" % (par1 / n_samples)) 71 | self.logger.info("PAR10: %.4f" % (par10 / n_samples)) 72 | self.logger.info("Timeouts: %d / %d" % (timeouts, n_samples)) 73 | self.logger.info("Presolved during feature computation: %d / %d" % (self.presolved_feats, n_samples)) 74 | self.logger.info("Solved: %d / %d" % (self.solved, n_samples)) 75 | self.logger.info("Unsolvable (%s): %d / %d" % 76 | (rm_string, self.unsolvable, n_samples+self.unsolvable)) 77 | else: 78 | n_samples = self.solved 79 | self.logger.info("Number of instances: %d" %(n_samples)) 80 | self.logger.info("Average Solution Quality: %.4f" % (par1 / n_samples)) 81 | par10 = par1 82 | 83 | self.logger.info("Oracle: %.4f" %(oracle / n_samples)) 84 | if sbs > 0: 85 | self.logger.info("Single Best: %.4f" %(sbs / n_samples)) 86 | self.logger.info("Normalized Score: %.4f" %( ( par10 - oracle) / (sbs - oracle))) 87 | 88 | self.logger.debug("Selection Frequency") 89 | for algo, n in self.selection_freq.items(): 90 | self.logger.debug("%s: %.2f" %(algo, n/(timeouts + self.solved))) 91 | 92 | return par10 / n_samples 93 | 94 | def merge(self, stat): 95 | ''' 96 | adds stats from another given Stats objects 97 | 98 | Arguments 99 | --------- 100 | stat : Stats 101 | ''' 102 | self.par1 += stat.par1 103 | self.par10 += stat.par10 104 | self.timeouts += stat.timeouts 105 | self.solved += stat.solved 106 | self.unsolvable += stat.unsolvable 107 | self.presolved_feats += stat.presolved_feats 108 | self.oracle += stat.oracle 109 | self.sbs += stat.sbs 110 | 111 | for algo, n in stat.selection_freq.items(): 112 | self.selection_freq[algo] = self.selection_freq.get(algo, 0) + n 113 | 114 | class Validator(object): 115 | 116 | def __init__(self): 117 | ''' Constructor ''' 118 | self.logger = logging.getLogger("Validation") 119 | 120 | def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario, 121 | train_scenario: ASlibScenario=None): 122 | ''' 123 | validate selected schedules on test instances for runtime 124 | 125 | Arguments 126 | --------- 127 | schedules: dict {instance name -> tuples [algo, bugdet]} 128 | algorithm schedules per instance 129 | test_scenario: ASlibScenario 130 | ASlib scenario with test instances 131 | train_scnenario: ASlibScenario 132 | ASlib scenario with training instances; 133 | required for SBS score computation 134 | ''' 135 | if test_scenario.performance_type[0] != "runtime": 136 | raise ValueError("Cannot validate non-runtime scenario with runtime validation method") 137 | 138 | stat = Stats(runtime_cutoff=test_scenario.algorithm_cutoff_time) 139 | 140 | feature_times = False 141 | if test_scenario.feature_cost_data is not None and test_scenario.performance_type[0] == "runtime": 142 | f_times = test_scenario.feature_cost_data[ 143 | test_scenario.used_feature_groups].sum(axis=1) 144 | feature_times = True 145 | 146 | feature_stati = test_scenario.feature_runstatus_data[ 147 | test_scenario.used_feature_groups] 148 | 149 | stat.oracle = test_scenario.performance_data.min(axis=1).sum() 150 | if train_scenario: 151 | sbs = train_scenario.performance_data.sum(axis=0).idxmin() 152 | stat.sbs = test_scenario.performance_data.sum(axis=0)[sbs] 153 | 154 | ok_status = test_scenario.runstatus_data == "ok" 155 | unsolvable = ok_status.sum(axis=1) == 0 156 | stat.unsolvable += unsolvable.sum() 157 | 158 | for inst, schedule in schedules.items(): 159 | self.logger.debug("Validate %s on %s" % (schedule, inst)) 160 | used_time = 0 161 | if feature_times: 162 | used_time += f_times[inst] 163 | self.logger.debug("Used Feature time: %f" % (used_time)) 164 | 165 | presolved = False 166 | for fg in test_scenario.used_feature_groups: 167 | if "presolved" in feature_stati[fg][inst]: 168 | presolved = True 169 | break 170 | 171 | if presolved and used_time < test_scenario.algorithm_cutoff_time: 172 | stat.par1 += used_time 173 | stat.solved += 1 174 | stat.presolved_feats += 1 175 | self.logger.debug("Presolved during feature computation") 176 | continue 177 | elif presolved and used_time >= test_scenario.algorithm_cutoff_time: 178 | stat.par1 += test_scenario.algorithm_cutoff_time 179 | stat.timeouts += 1 180 | continue 181 | 182 | for algo, budget in schedule: 183 | stat.selection_freq[algo] = stat.selection_freq.get(algo, 0) + 1 184 | time = test_scenario.performance_data[algo][inst] 185 | used_time += min(time, budget) 186 | if time <= budget and used_time <= test_scenario.algorithm_cutoff_time and test_scenario.runstatus_data[algo][inst] == "ok": 187 | stat.par1 += used_time 188 | stat.solved += 1 189 | self.logger.debug("Solved by %s (budget: %f -- required to solve: %f)" % (algo, budget, time)) 190 | break 191 | 192 | if used_time >= test_scenario.algorithm_cutoff_time: 193 | stat.par1 += test_scenario.algorithm_cutoff_time 194 | stat.timeouts += 1 195 | self.logger.debug("Timeout after %d" % (used_time)) 196 | break 197 | 198 | stat.par10 = stat.par1 + 9 * \ 199 | test_scenario.algorithm_cutoff_time * stat.timeouts 200 | 201 | stat.show() 202 | 203 | return stat 204 | 205 | def validate_quality(self, schedules: dict, test_scenario: ASlibScenario, 206 | train_scenario: ASlibScenario=None): 207 | ''' 208 | validate selected schedules on test instances for solution quality 209 | 210 | Arguments 211 | --------- 212 | schedules: dict {instance name -> tuples [algo, bugdet]} 213 | algorithm schedules per instance 214 | test_scenario: ASlibScenario 215 | ASlib scenario with test instances 216 | train_scnenario: ASlibScenario 217 | ASlib scenario with training instances; 218 | required for SBS score computation 219 | ''' 220 | if test_scenario.performance_type[0] != "solution_quality": 221 | raise ValueError("Cannot validate non-solution_quality scenario with solution_quality validation method") 222 | 223 | self.logger.debug("FYI: Feature costs and algorithm runstatus is ignored") 224 | 225 | stat = Stats(runtime_cutoff=None) 226 | 227 | stat.oracle = test_scenario.performance_data.min(axis=1).sum() 228 | if train_scenario: 229 | sbs = train_scenario.performance_data.sum(axis=0).idxmin() 230 | stat.sbs = test_scenario.performance_data.sum(axis=0)[sbs] 231 | 232 | if test_scenario.maximize[0]: 233 | test_scenario.performance_data *= -1 234 | self.logger.debug("Removing *-1 in performance data because of maximization") 235 | stat.sbs *= -1 236 | stat.oracle *= -1 237 | 238 | for inst, schedule in schedules.items(): 239 | if len(schedule) > 1: 240 | self.logger.error("AutoFolio does not support schedules for solution quality") 241 | sys.exit(9) 242 | 243 | selected_algo = schedule[0][0] 244 | stat.selection_freq[selected_algo] = stat.selection_freq.get(selected_algo, 0) + 1 245 | perf = test_scenario.performance_data[selected_algo][inst] 246 | 247 | self.logger.debug("Using %s on %s with performance %f" %(selected_algo, inst, perf)) 248 | 249 | stat.par1 += perf 250 | stat.solved += 1 251 | 252 | stat.show(remove_unsolvable=False) 253 | 254 | return stat 255 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # AutoFolio documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Sep 14 12:36:21 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import datetime 16 | import sys 17 | import os 18 | import shlex 19 | import sphinx_bootstrap_theme 20 | 21 | sys.path.insert(0, '..') 22 | import autofolio 23 | 24 | 25 | # If extensions (or modules to document with autodoc) are in another directory, 26 | # add these directories to sys.path here. If the directory is relative to the 27 | # documentation root, use os.path.abspath to make it absolute, like shown here. 28 | #sys.path.insert(0, os.path.abspath('.')) 29 | 30 | # -- General configuration ------------------------------------------------ 31 | 32 | # If your documentation needs a minimal Sphinx version, state it here. 33 | #needs_sphinx = '1.0' 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.doctest', 41 | 'sphinx.ext.coverage', 42 | 'sphinx.ext.mathjax', 43 | 'sphinx.ext.viewcode', 44 | ] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # The suffix(es) of source filenames. 50 | # You can specify multiple suffix as a list of string: 51 | # source_suffix = ['.rst', '.md'] 52 | source_suffix = '.rst' 53 | 54 | # The encoding of source files. 55 | #source_encoding = 'utf-8-sig' 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # General information about the project. 61 | project = u'AutoFolio' 62 | copyright = '2015-%s, %s' % (datetime.datetime.now().year, autofolio.AUTHORS) 63 | author = autofolio.AUTHORS 64 | 65 | # The version info for the project you're documenting, acts as replacement for 66 | # |version| and |release|, also used in various other places throughout the 67 | # built documents. 68 | # 69 | # The short X.Y version. 70 | version = autofolio.VERSION 71 | # The full version, including alpha/beta/rc tags. 72 | release = autofolio.VERSION 73 | 74 | # The language for content autogenerated by Sphinx. Refer to documentation 75 | # for a list of supported languages. 76 | # 77 | # This is also used if you do content translation via gettext catalogs. 78 | # Usually you set "language" from the command line for these cases. 79 | language = None 80 | 81 | # There are two options for replacing |today|: either, you set today to some 82 | # non-false value, then it is used: 83 | #today = '' 84 | # Else, today_fmt is used as the format for a strftime call. 85 | #today_fmt = '%B %d, %Y' 86 | 87 | # List of patterns, relative to source directory, that match files and 88 | # directories to ignore when looking for source files. 89 | exclude_patterns = ['_static'] 90 | 91 | # The reST default role (used for this markup: `text`) to use for all 92 | # documents. 93 | #default_role = None 94 | 95 | # If true, '()' will be appended to :func: etc. cross-reference text. 96 | #add_function_parentheses = True 97 | 98 | # If true, the current module name will be prepended to all description 99 | # unit titles (such as .. function::). 100 | #add_module_names = True 101 | 102 | # If true, sectionauthor and moduleauthor directives will be shown in the 103 | # output. They are ignored by default. 104 | #show_authors = False 105 | 106 | # The name of the Pygments (syntax highlighting) style to use. 107 | pygments_style = 'sphinx' 108 | 109 | # A list of ignored prefixes for module index sorting. 110 | #modindex_common_prefix = [] 111 | 112 | # If true, keep warnings as "system message" paragraphs in the built documents. 113 | #keep_warnings = False 114 | 115 | # If true, `todo` and `todoList` produce output, else they produce nothing. 116 | todo_include_todos = False 117 | 118 | 119 | # -- Options for HTML output ---------------------------------------------- 120 | 121 | # The theme to use for HTML and HTML Help pages. See the documentation for 122 | # a list of builtin themes. 123 | html_theme = 'bootstrap' 124 | 125 | # Theme options are theme-specific and customize the look and feel of a theme 126 | # further. For a list of options available for each theme, see the 127 | # documentation. 128 | html_theme_options = { 129 | # Navigation bar title. (Default: ``project`` value) 130 | 'navbar_title': "AutoFolio", 131 | 132 | # Tab name for entire site. (Default: "Site") 133 | # 'navbar_site_name': "Site", 134 | 135 | # A list of tuples containting pages to link to. The value should 136 | # be in the form [(name, page), ..] 137 | 'navbar_links': [ 138 | ('Start', 'index'), 139 | ('Installation', 'installation'), 140 | ('Manual', 'manual'), 141 | ('Contact', 'contact'), 142 | ('License', 'license'), 143 | ], 144 | 145 | # Render the next and previous page links in navbar. (Default: true) 146 | 'navbar_sidebarrel': False, 147 | 148 | # Render the current pages TOC in the navbar. (Default: true) 149 | 'navbar_pagenav': False, 150 | 151 | # Tab name for the current pages TOC. (Default: "Page") 152 | 'navbar_pagenav_name': "On this page", 153 | 154 | # Global TOC depth for "site" navbar tab. (Default: 1) 155 | # Switching to -1 shows all levels. 156 | 'globaltoc_depth': 1, 157 | 158 | # Include hidden TOCs in Site navbar? 159 | # 160 | # Note: If this is "false", you cannot have mixed ``:hidden:`` and 161 | # non-hidden ``toctree`` directives in the same page, or else the build 162 | # will break. 163 | # 164 | # Values: "true" (default) or "false" 165 | 'globaltoc_includehidden': "false", 166 | 167 | # HTML navbar class (Default: "navbar") to attach to
element. 168 | # For black navbar, do "navbar navbar-inverse" 169 | 'navbar_class': "navbar", 170 | 171 | # Fix navigation bar to top of page? 172 | # Values: "true" (default) or "false" 173 | 'navbar_fixed_top': "true", 174 | 175 | # Location of link to source. 176 | # Options are "nav" (default), "footer" or anything else to exclude. 177 | 'source_link_position': "footer", 178 | 179 | # Bootswatch (http://bootswatch.com/) theme. 180 | # 181 | # Options are nothing with "" (default) or the name of a valid theme 182 | # such as "amelia" or "cosmo". 183 | 'bootswatch_theme': "cosmo", 184 | 185 | # Choose Bootstrap version. 186 | # Values: "3" (default) or "2" (in quotes) 187 | 'bootstrap_version': "3", 188 | } 189 | 190 | # Add any paths that contain custom themes here, relative to this directory. 191 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 192 | 193 | # The name for this set of Sphinx documents. If None, it defaults to 194 | # " v documentation". 195 | #html_title = None 196 | 197 | # A shorter title for the navigation bar. Default is the same as html_title. 198 | #html_short_title = None 199 | 200 | # The name of an image file (relative to this directory) to place at the top 201 | # of the sidebar. 202 | #html_logo = None 203 | 204 | # The name of an image file (within the static path) to use as favicon of the 205 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 206 | # pixels large. 207 | #html_favicon = None 208 | 209 | # Add any paths that contain custom static files (such as style sheets) here, 210 | # relative to this directory. They are copied after the builtin static files, 211 | # so a file named "default.css" will overwrite the builtin "default.css". 212 | html_static_path = ['_static'] 213 | 214 | # Add any extra paths that contain custom files (such as robots.txt or 215 | # .htaccess) here, relative to this directory. These files are copied 216 | # directly to the root of the documentation. 217 | #html_extra_path = [] 218 | 219 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 220 | # using the given strftime format. 221 | #html_last_updated_fmt = '%b %d, %Y' 222 | 223 | # If true, SmartyPants will be used to convert quotes and dashes to 224 | # typographically correct entities. 225 | #html_use_smartypants = True 226 | 227 | # Custom sidebar templates, maps document names to template names. 228 | html_sidebars = {'**': ['localtoc.html']} 229 | 230 | # Additional templates that should be rendered to pages, maps page names to 231 | # template names. 232 | #html_additional_pages = {} 233 | 234 | # If false, no module index is generated. 235 | #html_domain_indices = True 236 | 237 | # If false, no index is generated. 238 | #html_use_index = True 239 | 240 | # If true, the index is split into individual pages for each letter. 241 | #html_split_index = False 242 | 243 | # If true, links to the reST sources are added to the pages. 244 | #html_show_sourcelink = True 245 | 246 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 247 | #html_show_sphinx = True 248 | 249 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 250 | #html_show_copyright = True 251 | 252 | # If true, an OpenSearch description file will be output, and all pages will 253 | # contain a tag referring to it. The value of this option must be the 254 | # base URL from which the finished HTML is served. 255 | #html_use_opensearch = '' 256 | 257 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 258 | #html_file_suffix = None 259 | 260 | # Language to be used for generating the HTML full-text search index. 261 | # Sphinx supports the following languages: 262 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 263 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 264 | #html_search_language = 'en' 265 | 266 | # A dictionary with options for the search language support, empty by default. 267 | # Now only 'ja' uses this config value 268 | #html_search_options = {'type': 'default'} 269 | 270 | # The name of a javascript file (relative to the configuration directory) that 271 | # implements a search results scorer. If empty, the default will be used. 272 | #html_search_scorer = 'scorer.js' 273 | 274 | # Output file base name for HTML help builder. 275 | htmlhelp_basename = 'AutoFoliodoc' 276 | 277 | # -- Options for LaTeX output --------------------------------------------- 278 | 279 | latex_elements = { 280 | # The paper size ('letterpaper' or 'a4paper'). 281 | #'papersize': 'letterpaper', 282 | 283 | # The font size ('10pt', '11pt' or '12pt'). 284 | #'pointsize': '10pt', 285 | 286 | # Additional stuff for the LaTeX preamble. 287 | #'preamble': '', 288 | 289 | # Latex figure (float) alignment 290 | #'figure_align': 'htbp', 291 | } 292 | 293 | # Grouping the document tree into LaTeX files. List of tuples 294 | # (source start file, target name, title, 295 | # author, documentclass [howto, manual, or own class]). 296 | latex_documents = [ 297 | (master_doc, 'AutoFolio.tex', u'AutoFolio Documentation', autofolio.AUTHORS, 'manual'), 298 | ] 299 | 300 | # The name of an image file (relative to this directory) to place at the top of 301 | # the title page. 302 | #latex_logo = None 303 | 304 | # For "manual" documents, if this is true, then toplevel headings are parts, 305 | # not chapters. 306 | #latex_use_parts = False 307 | 308 | # If true, show page references after internal links. 309 | #latex_show_pagerefs = False 310 | 311 | # If true, show URL addresses after external links. 312 | #latex_show_urls = False 313 | 314 | # Documents to append as an appendix to all manuals. 315 | #latex_appendices = [] 316 | 317 | # If false, no module index is generated. 318 | #latex_domain_indices = True 319 | 320 | 321 | # -- Options for manual page output --------------------------------------- 322 | 323 | # One entry per manual page. List of tuples 324 | # (source start file, name, description, authors, manual section). 325 | man_pages = [ 326 | (master_doc, 'autofolio', u'AutoFolio Documentation', 327 | [author], 1) 328 | ] 329 | 330 | # If true, show URL addresses after external links. 331 | #man_show_urls = False 332 | 333 | 334 | # -- Options for Texinfo output ------------------------------------------- 335 | 336 | # Grouping the document tree into Texinfo files. List of tuples 337 | # (source start file, target name, title, author, 338 | # dir menu entry, description, category) 339 | texinfo_documents = [ 340 | (master_doc, 'AutoFolio', u'AutoFolio Documentation', 341 | author, 'AutoFolio', 'One line description of project.', 342 | 'Miscellaneous'), 343 | ] 344 | 345 | # Documents to append as an appendix to all manuals. 346 | #texinfo_appendices = [] 347 | 348 | # If false, no module index is generated. 349 | #texinfo_domain_indices = True 350 | 351 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 352 | #texinfo_show_urls = 'footnote' 353 | 354 | # If true, do not generate a @detailmenu in the "Top" node's menu. 355 | #texinfo_no_detailmenu = False 356 | -------------------------------------------------------------------------------- /autofolio/autofolio.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import functools 3 | import traceback 4 | import random 5 | from itertools import tee 6 | import pickle 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import yaml 11 | 12 | from ConfigSpace.configuration_space import Configuration, \ 13 | ConfigurationSpace 14 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ 15 | UniformFloatHyperparameter, UniformIntegerHyperparameter 16 | 17 | # SMAC3 18 | from smac.tae.execute_func import ExecuteTAFuncDict 19 | from smac.scenario.scenario import Scenario 20 | from smac.stats.stats import Stats as AC_Stats 21 | from smac.facade.smac_hpo_facade import SMAC4HPO as SMAC 22 | 23 | from autofolio.io.cmd import CMDParser 24 | from aslib_scenario.aslib_scenario import ASlibScenario 25 | 26 | # feature preprocessing 27 | from autofolio.feature_preprocessing.pca import PCAWrapper 28 | from autofolio.feature_preprocessing.missing_values import ImputerWrapper 29 | from autofolio.feature_preprocessing.feature_group_filtering import FeatureGroupFiltering 30 | from autofolio.feature_preprocessing.standardscaler import StandardScalerWrapper 31 | 32 | # presolving 33 | from autofolio.pre_solving.aspeed_schedule import Aspeed 34 | 35 | # classifiers 36 | from autofolio.selector.classifiers.random_forest import RandomForest 37 | from autofolio.selector.classifiers.xgboost import XGBoost 38 | 39 | # regressors 40 | from autofolio.selector.regressors.random_forest import RandomForestRegressor 41 | 42 | # selectors 43 | from autofolio.selector.pairwise_classification import PairwiseClassifier 44 | from autofolio.selector.multi_classification import MultiClassifier 45 | from autofolio.selector.ind_regression import IndRegression 46 | from autofolio.selector.joint_regression import JointRegression 47 | from autofolio.selector.pairwise_regression import PairwiseRegression 48 | 49 | # validation 50 | from autofolio.validation.validate import Validator, Stats 51 | 52 | __author__ = "Marius Lindauer" 53 | __license__ = "BSD" 54 | __version__ = "2.2.0" 55 | 56 | 57 | class AutoFolio(object): 58 | 59 | def __init__(self, random_seed: int=12345): 60 | ''' Constructor 61 | 62 | Arguments 63 | --------- 64 | random_seed: int 65 | random seed for numpy and random packages 66 | ''' 67 | 68 | np.random.seed(random_seed) # fix seed 69 | random.seed(random_seed) 70 | 71 | # I don't know the reason, but without an initial print with 72 | # logging.info we don't get any output 73 | logging.info("Init AutoFolio") 74 | self._root_logger = logging.getLogger() 75 | self.logger = logging.getLogger("AutoFolio") 76 | self.cs = None 77 | 78 | self.overwrite_args = None 79 | 80 | def run_cli(self): 81 | ''' 82 | main method of AutoFolio based on command line interface 83 | ''' 84 | 85 | cmd_parser = CMDParser() 86 | args_, self.overwrite_args = cmd_parser.parse() 87 | 88 | self._root_logger.setLevel(args_.verbose) 89 | 90 | if args_.load: 91 | pred = self.read_model_and_predict( 92 | model_fn=args_.load, feature_vec=list(map(float, args_.feature_vec.split(" ")))) 93 | print("Selected Schedule [(algorithm, budget)]: %s" % (pred)) 94 | 95 | else: 96 | 97 | scenario = ASlibScenario() 98 | if args_.scenario: 99 | scenario.read_scenario(args_.scenario) 100 | elif args_.performance_csv and args_.feature_csv: 101 | scenario.read_from_csv(perf_fn=args_.performance_csv, 102 | feat_fn=args_.feature_csv, 103 | objective=args_.objective, 104 | runtime_cutoff=args_.runtime_cutoff, 105 | maximize=args_.maximize, 106 | cv_fn=args_.cv_csv) 107 | else: 108 | raise ValueError("Missing inputs to read scenario data.") 109 | 110 | test_scenario = None 111 | if args_.performance_test_csv and args_.feature_test_csv: 112 | test_scenario = ASlibScenario() 113 | test_scenario.read_from_csv(perf_fn=args_.performance_test_csv, 114 | feat_fn=args_.feature_test_csv, 115 | objective=args_.objective, 116 | runtime_cutoff=args_.runtime_cutoff, 117 | maximize=args_.maximize, 118 | cv_fn=None) 119 | 120 | config = {} 121 | if args_.config is not None: 122 | self.logger.info("Reading yaml config file") 123 | config = yaml.load(open(args_.config)) 124 | if not config.get("wallclock_limit"): 125 | config["wallclock_limit"] = args_.wallclock_limit 126 | if not config.get("runcount_limit"): 127 | config["runcount_limit"] = args_.runcount_limit 128 | if not config.get("output-dir"): 129 | config["output-dir"] = args_.output_dir 130 | 131 | self.cs = self.get_cs(scenario, config) 132 | 133 | if args_.outer_cv: 134 | self._outer_cv(scenario, config, args_.outer_cv_fold, 135 | args_.out_template, smac_seed=args_.smac_seed) 136 | return 0 137 | 138 | if args_.tune: 139 | config = self.get_tuned_config(scenario, 140 | wallclock_limit=args_.wallclock_limit, 141 | runcount_limit=args_.runcount_limit, 142 | autofolio_config=config, 143 | seed=args_.smac_seed) 144 | else: 145 | config = self.cs.get_default_configuration() 146 | self.logger.debug(config) 147 | 148 | if args_.save: 149 | feature_pre_pipeline, pre_solver, selector = self.fit( 150 | scenario=scenario, config=config) 151 | self._save_model( 152 | args_.save, scenario, feature_pre_pipeline, pre_solver, selector, config) 153 | else: 154 | self.run_cv(config=config, scenario=scenario, folds=int(scenario.cv_data.max().max())) 155 | 156 | if test_scenario is not None: 157 | stats = self.run_fold(config=config, 158 | fold=0, 159 | return_fit=False, 160 | scenario=scenario, 161 | test_scenario=test_scenario) 162 | 163 | def _outer_cv(self, scenario: ASlibScenario, autofolio_config:dict=None, 164 | outer_cv_fold:int=None, out_template:str=None, 165 | smac_seed:int=42): 166 | ''' 167 | Evaluate on a scenario using an "outer" cross-fold validation 168 | scheme. In particular, this ensures that SMAC does not use the test 169 | set during hyperparameter optimization. 170 | 171 | Arguments 172 | --------- 173 | scenario: ASlibScenario 174 | ASlib Scenario at hand 175 | 176 | autofolio_config: dict, or None 177 | An optional dictionary of configuration options 178 | 179 | outer_cv_fold: int, or None 180 | If given, then only the single outer-cv fold is processed 181 | 182 | out_template: str, or None 183 | If given, the learned configurations are written to the 184 | specified locations. The string is considered a template, and 185 | "%fold%" will be replaced with the fold. 186 | 187 | smac_seed:int 188 | random seed for SMAC 189 | 190 | Returns 191 | ------- 192 | stats: validate.Stats 193 | Performance over all outer-cv folds 194 | 195 | ''' 196 | import string 197 | 198 | outer_stats = None 199 | 200 | # For each outer split 201 | outer_cv_folds = range(1, 11) 202 | if outer_cv_fold is not None: 203 | outer_cv_folds = range(outer_cv_fold, outer_cv_fold+1) 204 | 205 | for cv_fold in outer_cv_folds: 206 | 207 | # Use ‘ASlibScenario.get_split()’ to get the outer split 208 | outer_testing, outer_training = scenario.get_split(cv_fold) 209 | 210 | msg = ">>>>> Outer CV fold: {} <<<<<".format(cv_fold) 211 | self.logger.info(msg) 212 | 213 | # Use ASlibScenario.create_cv_splits() to get an inner-cv 214 | outer_training.create_cv_splits(n_folds=10) 215 | 216 | # Use ‘AutoFolio.get_tuned_config()’ to tune on inner-cv 217 | config = self.get_tuned_config( 218 | outer_training, 219 | autofolio_config=autofolio_config, 220 | seed=smac_seed 221 | ) 222 | 223 | # Use `AutoFolio.run_fold()’ to get the performance on the outer split 224 | stats, fit, schedule = self.run_fold( 225 | config, 226 | scenario, 227 | cv_fold, 228 | return_fit=True 229 | ) 230 | 231 | feature_pre_pipeline, pre_solver, selector = fit 232 | 233 | if outer_stats is None: 234 | outer_stats = stats 235 | else: 236 | outer_stats.merge(stats) 237 | 238 | # save the model, if given an output location 239 | if out_template is not None: 240 | out_template_ = string.Template(out_template) 241 | model_fn = out_template_.substitute(fold=cv_fold, type="pkl") 242 | 243 | msg = "Writing model to: {}".format(model_fn) 244 | self.logger.info(msg) 245 | 246 | self._save_model( 247 | model_fn, 248 | scenario, 249 | feature_pre_pipeline, 250 | pre_solver, 251 | selector, 252 | config 253 | ) 254 | 255 | # convert the schedule to a data frame 256 | schedule_df = pd.Series(schedule, name="solver") 257 | schedule_df.index.name = "instance" 258 | schedule_df = schedule_df.reset_index() 259 | 260 | # just keep the solver name; we don't care about the time 261 | 262 | # x[0] gets the first pair in the schedule list 263 | # and x[0][0] gets the name of the solver from that pair 264 | schedule_df['solver'] = schedule_df['solver'].apply(lambda x: x[0][0]) 265 | 266 | selections_fn = out_template_.substitute(fold=cv_fold, type="csv") 267 | 268 | msg = "Writing solver choices to: {}".format(selections_fn) 269 | self.logger.info(msg) 270 | 271 | schedule_df.to_csv(selections_fn, index=False) 272 | 273 | self.logger.info(">>>>> Final Stats <<<<<") 274 | outer_stats.show() 275 | 276 | def _save_model(self, out_fn: str, scenario: ASlibScenario, feature_pre_pipeline: list, pre_solver: Aspeed, selector, config: Configuration): 277 | ''' 278 | save all pipeline objects for predictions 279 | 280 | Arguments 281 | --------- 282 | out_fn: str 283 | filename of output file 284 | scenario: AslibScenario 285 | ASlib scenario with all the data 286 | feature_pre_pipeline: list 287 | list of preprocessing objects 288 | pre_solver: Aspeed 289 | aspeed object with pre-solving schedule 290 | selector: autofolio.selector.* 291 | fitted selector object 292 | config: Configuration 293 | parameter setting configuration 294 | ''' 295 | scenario.logger = None 296 | for fpp in feature_pre_pipeline: 297 | fpp.logger = None 298 | if pre_solver: 299 | pre_solver.logger = None 300 | selector.logger = None 301 | model = [scenario, feature_pre_pipeline, pre_solver, selector, config] 302 | with open(out_fn, "bw") as fp: 303 | pickle.dump(model, fp) 304 | 305 | def read_model_and_predict(self, model_fn: str, feature_vec: list): 306 | ''' 307 | reads saved model from disk and predicts the selected algorithm schedule for a given feature vector 308 | 309 | Arguments 310 | -------- 311 | model_fn: str 312 | file name of saved model 313 | feature_vec: list 314 | instance feature vector as a list of floats 315 | 316 | Returns 317 | ------- 318 | list of tuple 319 | Selected schedule [(algorithm, budget)] 320 | ''' 321 | with open(model_fn, "br") as fp: 322 | scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load( 323 | fp) 324 | 325 | for fpp in feature_pre_pipeline: 326 | fpp.logger = logging.getLogger("Feature Preprocessing") 327 | if pre_solver: 328 | pre_solver.logger = logging.getLogger("Aspeed PreSolving") 329 | selector.logger = logging.getLogger("Selector") 330 | 331 | # saved scenario is adapted to given feature vector 332 | feature_vec = np.array([feature_vec]) 333 | scenario.feature_data = pd.DataFrame( 334 | feature_vec, index=["pseudo_instance"], columns=scenario.features) 335 | scenario.instances = ["pseudo_instance"] 336 | pred = self.predict(scenario=scenario, config=config, 337 | feature_pre_pipeline=feature_pre_pipeline, pre_solver=pre_solver, selector=selector) 338 | 339 | return pred["pseudo_instance"] 340 | 341 | def get_cs(self, scenario: ASlibScenario, autofolio_config:dict=None): 342 | ''' 343 | returns the parameter configuration space of AutoFolio 344 | (based on the automl config space: https://github.com/automl/ConfigSpace) 345 | 346 | Arguments 347 | --------- 348 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 349 | aslib scenario at hand 350 | 351 | autofolio_config: dict, or None 352 | An optional dictionary of configuration options 353 | ''' 354 | 355 | self.cs = ConfigurationSpace() 356 | 357 | # only allow the feature groups specified in the config file 358 | # by default, though, all of the feature groups are allowed. 359 | allowed_feature_groups = autofolio_config.get("allowed_feature_groups", 360 | scenario.feature_steps) 361 | 362 | if len(allowed_feature_groups) == 0: 363 | msg = "Please ensure at least one feature group is allowed" 364 | raise ValueError(msg) 365 | 366 | 367 | if len(allowed_feature_groups) == 1: 368 | choices = [True] # if we only have one feature group, it has to be active 369 | else: 370 | choices = [True, False] 371 | default = True 372 | 373 | for fs in allowed_feature_groups: 374 | 375 | fs_param = CategoricalHyperparameter(name="fgroup_%s" % (fs), 376 | choices=choices, default_value=default) 377 | self.cs.add_hyperparameter(fs_param) 378 | 379 | # preprocessing 380 | if autofolio_config.get("pca", True): 381 | PCAWrapper.add_params(self.cs) 382 | 383 | if autofolio_config.get("impute", True): 384 | ImputerWrapper.add_params(self.cs) 385 | 386 | if autofolio_config.get("scale", True): 387 | StandardScalerWrapper.add_params(self.cs) 388 | 389 | # Pre-Solving 390 | if scenario.performance_type[0] == "runtime": 391 | if autofolio_config.get("presolve", True): 392 | Aspeed.add_params( 393 | cs=self.cs, cutoff=scenario.algorithm_cutoff_time) 394 | 395 | if autofolio_config.get("classifier"): 396 | # fix parameter 397 | cls_choices = [autofolio_config["classifier"]] 398 | cls_def = autofolio_config["classifier"] 399 | else: 400 | cls_choices = ["RandomForest","XGBoost"] 401 | cls_def = "RandomForest" 402 | classifier = CategoricalHyperparameter( 403 | "classifier", choices=cls_choices, 404 | default_value=cls_def) 405 | 406 | self.cs.add_hyperparameter(classifier) 407 | 408 | RandomForest.add_params(self.cs) 409 | XGBoost.add_params(self.cs) 410 | 411 | if autofolio_config.get("regressor"): 412 | # fix parameter 413 | reg_choices = [autofolio_config["regressor"]] 414 | reg_def = autofolio_config["regressor"] 415 | else: 416 | reg_choices = ["RandomForestRegressor"] 417 | reg_def = "RandomForestRegressor" 418 | 419 | regressor = CategoricalHyperparameter( 420 | "regressor", choices=reg_choices, default_value=reg_def) 421 | self.cs.add_hyperparameter(regressor) 422 | RandomForestRegressor.add_params(self.cs) 423 | 424 | # selectors 425 | if autofolio_config.get("selector"): 426 | # fix parameter 427 | sel_choices = [autofolio_config["selector"]] 428 | sel_def = autofolio_config["selector"] 429 | else: 430 | sel_choices = ["PairwiseClassifier","PairwiseRegressor"] 431 | sel_def = "PairwiseClassifier" 432 | 433 | selector = CategoricalHyperparameter( 434 | "selector", choices=sel_choices, default_value=sel_def) 435 | self.cs.add_hyperparameter(selector) 436 | PairwiseClassifier.add_params(self.cs) 437 | PairwiseRegression.add_params(self.cs) 438 | 439 | self.logger.debug(self.cs) 440 | 441 | return self.cs 442 | 443 | def get_tuned_config(self, scenario: ASlibScenario, 444 | runcount_limit:int=42, 445 | wallclock_limit:int=300, 446 | autofolio_config:dict=dict(), 447 | seed:int=42): 448 | ''' 449 | uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario 450 | 451 | Arguments 452 | --------- 453 | scenario: ASlibScenario 454 | ASlib Scenario at hand 455 | runcount_limit: int 456 | runcount_limit for SMAC scenario 457 | wallclock_limit: int 458 | wallclock limit in sec for SMAC scenario 459 | (overwritten by autofolio_config) 460 | autofolio_config: dict, or None 461 | An optional dictionary of configuration options 462 | seed: int 463 | random seed for SMAC 464 | 465 | Returns 466 | ------- 467 | Configuration 468 | best incumbent configuration found by SMAC 469 | ''' 470 | 471 | wallclock_limit = autofolio_config.get("wallclock_limit", wallclock_limit) 472 | runcount_limit = autofolio_config.get("runcount_limit", runcount_limit) 473 | 474 | taf = functools.partial(self.called_by_smac, scenario=scenario) 475 | max_fold = scenario.cv_data.max().max() 476 | max_fold = int(max_fold) 477 | 478 | ac_scenario = Scenario({"run_obj": "quality", # we optimize quality 479 | "runcount-limit": runcount_limit, 480 | "cs": self.cs, # configuration space 481 | "deterministic": "true", 482 | "instances": [[str(i)] for i in range(1, max_fold+1)], 483 | "wallclock-limit": wallclock_limit, 484 | "output-dir" : "" if not autofolio_config.get("output-dir",None) else autofolio_config.get("output-dir") 485 | }) 486 | 487 | # necessary to use stats options related to scenario information 488 | AC_Stats.scenario = ac_scenario 489 | 490 | # Optimize 491 | self.logger.info( 492 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") 493 | self.logger.info("Start Configuration") 494 | self.logger.info( 495 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") 496 | smac = SMAC(scenario=ac_scenario, tae_runner=taf, 497 | rng=np.random.RandomState(seed)) 498 | incumbent = smac.optimize() 499 | 500 | self.logger.info("Final Incumbent: %s" % (incumbent)) 501 | 502 | return incumbent 503 | 504 | def called_by_smac(self, config: Configuration, scenario: ASlibScenario, instance:str=None, seed:int=1): 505 | ''' 506 | run a cross fold validation based on the given data from cv.arff 507 | 508 | Arguments 509 | --------- 510 | config: Configuration 511 | parameter configuration to use for preprocessing 512 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 513 | aslib scenario at hand 514 | instance: str 515 | cv-fold index 516 | seed: int 517 | random seed (not used) 518 | 519 | Returns 520 | ------- 521 | float: average performance 522 | ''' 523 | 524 | if instance is None: 525 | perf = self.run_cv(config=config, scenario=scenario) 526 | else: 527 | try: 528 | stats = self.run_fold(config=config, scenario=scenario, fold=int(instance)) 529 | perf = stats.show() 530 | except ValueError: 531 | if scenario.performance_type[0] == "runtime": 532 | perf = scenario.algorithm_cutoff_time * 20 533 | else: 534 | # try to impute a worst case perf 535 | perf = scenario.performance_data.max().max() 536 | 537 | if scenario.maximize[0]: 538 | perf *= -1 539 | 540 | return perf 541 | 542 | def run_cv(self, config: Configuration, scenario: ASlibScenario, folds:int=10): 543 | ''' 544 | run a cross fold validation based on the given data from cv.arff 545 | 546 | Arguments 547 | --------- 548 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 549 | aslib scenario at hand 550 | config: Configuration 551 | parameter configuration to use for preprocessing 552 | folds: int 553 | number of cv-splits 554 | seed: int 555 | random seed (not used) 556 | ''' 557 | #TODO: use seed and instance in an appropriate way 558 | try: 559 | if scenario.performance_type[0] == "runtime": 560 | cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time) 561 | else: 562 | cv_stat = Stats(runtime_cutoff=0) 563 | for i in range(1, folds + 1): 564 | self.logger.info("CV-Iteration: %d" % (i)) 565 | stats = self.run_fold(config=config, 566 | scenario=scenario, 567 | fold=i) 568 | cv_stat.merge(stat=stats) 569 | 570 | self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") 571 | self.logger.info("CV Stats") 572 | par10 = cv_stat.show() 573 | except ValueError: 574 | traceback.print_exc() 575 | par10 = scenario.algorithm_cutoff_time * 10 576 | 577 | if scenario.maximize[0]: 578 | par10 *= -1 579 | 580 | return par10 581 | 582 | def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int, test_scenario=None, return_fit:bool=False): 583 | ''' 584 | run a given fold of cross validation 585 | 586 | Arguments 587 | --------- 588 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 589 | aslib scenario at hand 590 | config: Configuration 591 | parameter configuration to use for preprocessing 592 | fold: int 593 | fold id 594 | test_scenario:aslib_scenario.aslib_scenario.ASlibScenario 595 | aslib scenario with test data for validation 596 | generated from if None 597 | 598 | return_fit: bool 599 | optionally, the learned preprocessing options, presolver and 600 | selector can be returned 601 | 602 | Returns 603 | ------- 604 | Stats() 605 | 606 | (pre_pipeline, pre_solver, selector): 607 | only present if return_fit is True 608 | the pipeline components fit with the configuration options 609 | 610 | schedule: dict of string -> list of (solver, cutoff) pairs 611 | only present if return_fit is True 612 | the solver choices for each instance 613 | 614 | 615 | ''' 616 | 617 | if test_scenario is None: 618 | self.logger.info("CV-Iteration: %d" % (fold)) 619 | test_scenario, training_scenario = scenario.get_split(indx=fold) 620 | else: 621 | self.logger.info("Validation on test data") 622 | training_scenario = scenario 623 | 624 | feature_pre_pipeline, pre_solver, selector = self.fit( 625 | scenario=training_scenario, config=config) 626 | 627 | schedules = self.predict( 628 | test_scenario, config, feature_pre_pipeline, pre_solver, selector) 629 | 630 | val = Validator() 631 | if scenario.performance_type[0] == "runtime": 632 | stats = val.validate_runtime( 633 | schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario) 634 | elif scenario.performance_type[0] == "solution_quality": 635 | stats = val.validate_quality( 636 | schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario) 637 | else: 638 | raise ValueError("Unknown: %s" %(scenario.performance_type[0])) 639 | 640 | if return_fit: 641 | return stats, (feature_pre_pipeline, pre_solver, selector), schedules 642 | else: 643 | return stats 644 | 645 | def fit(self, scenario: ASlibScenario, config: Configuration): 646 | ''' 647 | fit AutoFolio on given ASlib Scenario 648 | 649 | Arguments 650 | --------- 651 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 652 | aslib scenario at hand 653 | config: Configuration 654 | parameter configuration to use for preprocessing 655 | 656 | Returns 657 | ------- 658 | list of fitted feature preproccessing objects 659 | pre-solving object 660 | fitted selector 661 | ''' 662 | self.logger.info("Given Configuration: %s" % (config)) 663 | 664 | if self.overwrite_args: 665 | config = self._overwrite_configuration( 666 | config=config, overwrite_args=self.overwrite_args) 667 | self.logger.info("Overwritten Configuration: %s" % (config)) 668 | 669 | scenario, feature_pre_pipeline = self.fit_transform_feature_preprocessing( 670 | scenario, config) 671 | 672 | pre_solver = self.fit_pre_solving(scenario, config) 673 | 674 | selector = self.fit_selector(scenario, config) 675 | 676 | return feature_pre_pipeline, pre_solver, selector 677 | 678 | def _overwrite_configuration(self, config: Configuration, overwrite_args: list): 679 | ''' 680 | overwrites a given configuration with some new settings 681 | 682 | Arguments 683 | --------- 684 | config: Configuration 685 | initial configuration to be adapted 686 | overwrite_args: list 687 | new parameter settings as a list of strings 688 | 689 | Returns 690 | ------- 691 | Configuration 692 | ''' 693 | 694 | def pairwise(iterable): 695 | a, b = tee(iterable) 696 | next(b, None) 697 | return zip(a, b) 698 | 699 | dict_conf = config.get_dictionary() 700 | for param, value in pairwise(overwrite_args): 701 | try: 702 | ok = self.cs.get_hyperparameter(param) 703 | except KeyError: 704 | ok = None 705 | if ok is not None: 706 | if type(self.cs.get_hyperparameter(param)) is UniformIntegerHyperparameter: 707 | dict_conf[param] = int(value) 708 | elif type(self.cs.get_hyperparameter(param)) is UniformFloatHyperparameter: 709 | dict_conf[param] = float(value) 710 | elif value == "True": 711 | dict_conf[param] = True 712 | elif value == "False": 713 | dict_conf[param] = False 714 | else: 715 | dict_conf[param] = value 716 | else: 717 | self.logger.warn( 718 | "Unknown given parameter: %s %s" % (param, value)) 719 | config = Configuration(self.cs, values=dict_conf, allow_inactive_with_values=True) 720 | 721 | return config 722 | 723 | def fit_transform_feature_preprocessing(self, scenario: ASlibScenario, config: Configuration): 724 | ''' 725 | performs feature preprocessing on a given ASlib scenario wrt to a given configuration 726 | 727 | Arguments 728 | --------- 729 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 730 | aslib scenario at hand 731 | config: Configuration 732 | parameter configuration to use for preprocessing 733 | 734 | Returns 735 | ------- 736 | list of fitted feature preproccessing objects 737 | ''' 738 | 739 | pipeline = [] 740 | fgf = FeatureGroupFiltering() 741 | scenario = fgf.fit_transform(scenario, config) 742 | 743 | imputer = ImputerWrapper() 744 | scenario = imputer.fit_transform(scenario, config) 745 | 746 | scaler = StandardScalerWrapper() 747 | scenario = scaler.fit_transform(scenario, config) 748 | 749 | pca = PCAWrapper() 750 | scenario = pca.fit_transform(scenario, config) 751 | 752 | return scenario, [fgf, imputer, scaler, pca] 753 | 754 | def fit_pre_solving(self, scenario: ASlibScenario, config: Configuration): 755 | ''' 756 | fits an pre-solving schedule using Aspeed [Hoos et al, 2015 TPLP) 757 | 758 | Arguments 759 | --------- 760 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 761 | aslib scenario at hand 762 | config: Configuration 763 | parameter configuration to use for preprocessing 764 | 765 | Returns 766 | ------- 767 | instance of Aspeed() with a fitted pre-solving schedule if performance_type of scenario is runtime; else None 768 | ''' 769 | if scenario.performance_type[0] == "runtime": 770 | aspeed = Aspeed() 771 | aspeed.fit(scenario=scenario, config=config) 772 | return aspeed 773 | else: 774 | return None 775 | 776 | def fit_selector(self, scenario: ASlibScenario, config: Configuration): 777 | ''' 778 | fits an algorithm selector for a given scenario wrt a given configuration 779 | 780 | Arguments 781 | --------- 782 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 783 | aslib scenario at hand 784 | config: Configuration 785 | parameter configuration 786 | ''' 787 | 788 | if config.get("selector") == "PairwiseClassifier": 789 | clf_class = None 790 | if config.get("classifier") == "RandomForest": 791 | clf_class = RandomForest 792 | if config.get("classifier") == "XGBoost": 793 | clf_class = XGBoost 794 | 795 | selector = PairwiseClassifier(classifier_class=clf_class) 796 | selector.fit(scenario=scenario, config=config) 797 | 798 | if config.get("selector") == "MultiClassifier": 799 | clf_class = None 800 | if config.get("classifier") == "RandomForest": 801 | clf_class = RandomForest 802 | if config.get("classifier") == "XGBoost": 803 | clf_class = XGBoost 804 | 805 | selector = MultiClassifier(classifier_class=clf_class) 806 | selector.fit(scenario=scenario, config=config) 807 | 808 | if config.get("selector") == "IndRegressor": 809 | reg_class = None 810 | if config.get("regressor") == "RandomForestRegressor": 811 | reg_class = RandomForestRegressor 812 | 813 | selector = IndRegression(regressor_class=reg_class) 814 | selector.fit(scenario=scenario, config=config) 815 | 816 | if config.get("selector") == "JointRegressor": 817 | reg_class = None 818 | if config.get("regressor") == "RandomForestRegressor": 819 | reg_class = RandomForestRegressor 820 | 821 | selector = JointRegression(regressor_class=reg_class) 822 | selector.fit(scenario=scenario, config=config) 823 | 824 | if config.get("selector") == "PairwiseRegressor": 825 | reg_class = None 826 | if config.get("regressor") == "RandomForestRegressor": 827 | reg_class = RandomForestRegressor 828 | 829 | selector = PairwiseRegression(regressor_class=reg_class) 830 | selector.fit(scenario=scenario, config=config) 831 | 832 | return selector 833 | 834 | def predict(self, scenario: ASlibScenario, config: Configuration, feature_pre_pipeline: list, pre_solver: Aspeed, selector): 835 | ''' 836 | predicts algorithm schedules wrt a given config 837 | and given pipelines 838 | 839 | Arguments 840 | --------- 841 | scenario: aslib_scenario.aslib_scenario.ASlibScenario 842 | aslib scenario at hand 843 | config: Configuration 844 | parameter configuration 845 | feature_pre_pipeline: list 846 | list of fitted feature preprocessors 847 | pre_solver: Aspeed 848 | pre solver object with a saved static schedule 849 | selector: autofolio.selector.* 850 | fitted selector object 851 | ''' 852 | 853 | self.logger.info("Predict on Test") 854 | for f_pre in feature_pre_pipeline: 855 | scenario = f_pre.transform(scenario) 856 | 857 | if pre_solver: 858 | pre_solving_schedule = pre_solver.predict(scenario=scenario) 859 | else: 860 | pre_solving_schedule = {} 861 | 862 | pred_schedules = selector.predict(scenario=scenario) 863 | 864 | # combine schedules 865 | if pre_solving_schedule: 866 | return dict((inst, pre_solving_schedule.get(inst, []) + schedule) for inst, schedule in pred_schedules.items()) 867 | else: 868 | return pred_schedules 869 | 870 | 871 | def main(): 872 | af = AutoFolio() 873 | af.run_cli() 874 | 875 | 876 | if __name__ == "__main__": 877 | main() 878 | --------------------------------------------------------------------------------