├── autofolio
    ├── facade
    │   ├── __init__.py
    │   └── af_csv_facade.py
    ├── io
    │   ├── __init__.py
    │   └── cmd.py
    ├── pre_solving
    │   ├── __init__.py
    │   └── aspeed_schedule.py
    ├── selector
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── random_forest.py
    │   │   └── xgboost.py
    │   ├── regressors
    │   │   ├── __init__.py
    │   │   └── random_forest.py
    │   ├── ind_regression.py
    │   ├── multi_classification.py
    │   ├── pairwise_regression.py
    │   ├── joint_regression.py
    │   └── pairwise_classification.py
    ├── validation
    │   ├── __init__.py
    │   └── validate.py
    ├── feature_preprocessing
    │   ├── __init__.py
    │   ├── standardscaler.py
    │   ├── missing_values.py
    │   ├── pca.py
    │   └── feature_group_filtering.py
    ├── __init__.py
    ├── __version__.py
    └── autofolio.py
├── aspeed
    ├── clingo
    ├── runsolver
    └── enc1.lp
├── examples
    ├── toy_example_csv
    │   ├── feats.csv
    │   ├── perf.csv
    │   └── example.py
    ├── asp-aslib
    │   ├── data
    │   │   ├── citation.bib
    │   │   ├── readme.txt
    │   │   └── description.txt
    │   └── README.md
    └── ttp-csv
    │   └── README.md
├── requirements.txt
├── doc
    ├── contact.rst
    ├── installation.rst
    ├── license.rst
    ├── index.rst
    ├── manual.rst
    ├── Makefile
    └── conf.py
├── CITATION.cff
├── scripts
    ├── autofolio
    └── plot_config.py
├── .gitignore
├── setup.py
├── LICENSE
└── README.md


/autofolio/facade/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/io/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/pre_solving/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/selector/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/validation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/feature_preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/selector/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/selector/regressors/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autofolio/__init__.py:
--------------------------------------------------------------------------------
1 | __authors__ = 'Marius Lindauer'
2 | 


--------------------------------------------------------------------------------
/aspeed/clingo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/AutoFolio/HEAD/aspeed/clingo


--------------------------------------------------------------------------------
/aspeed/runsolver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automl/AutoFolio/HEAD/aspeed/runsolver


--------------------------------------------------------------------------------
/autofolio/__version__.py:
--------------------------------------------------------------------------------
1 | """Version information."""
2 | 
3 | # The following line *must* be the last in the module, exactly as formatted:
4 | __version__ = "2.1.2"
5 | 


--------------------------------------------------------------------------------
/examples/toy_example_csv/feats.csv:
--------------------------------------------------------------------------------
 1 | ,Feature_1
 2 | inst1,2
 3 | inst2,1
 4 | inst3,2
 5 | inst4,0
 6 | inst5,1
 7 | inst6,2
 8 | inst7,2
 9 | inst8,1
10 | inst9,2
11 | inst10,0
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython
 2 | numpy
 3 | scipy
 4 | scikit-learn>=0.20.0
 5 | matplotlib
 6 | pandas
 7 | xgboost
 8 | ConfigSpace
 9 | pyrfr
10 | smac>=0.11.1
11 | git+https://github.com/mlindauer/ASlibScenario
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/contact.rst:
--------------------------------------------------------------------------------
1 | Contact
2 | =======
3 | 
4 | AutoFolio v2 is developed by the `ML4AAD Group of the University of Freiburg <http://www.ml4aad.org/>`_.
5 | 
6 | If you found a bug, please report to https://github.com/mlindauer/AutoFolio
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/toy_example_csv/perf.csv:
--------------------------------------------------------------------------------
 1 | ,Algorithm_1,Algorithm_2
 2 | inst1,1.,2.
 3 | inst2,10.,1.
 4 | inst3,3.,10.
 5 | inst4,4.,4.
 6 | inst5,4.,2.
 7 | inst6,2.,5.
 8 | inst7,2.,10.
 9 | inst8,10.,2.
10 | inst9,1.,7.
11 | inst10,2.,2.
12 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | @ARTICLE{lindauer-jair15a, 
2 |   author = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub}, 
3 |   title = {AutoFolio: An automatically configured Algorithm Selector}, 
4 |   volume = {53}, 
5 |   journal = {Journal of Artificial Intelligence Research}, 
6 |   year = {2015}, 
7 |   pages = {745-778} }
8 | 


--------------------------------------------------------------------------------
/examples/asp-aslib/data/citation.bib:
--------------------------------------------------------------------------------
 1 | @STRING{tplp    = "Theory and Practice of Logic Programming"}
 2 | 
 3 | @Article{holisc14a,
 4 |   author = 	 "H. Hoos and M. Lindauer and T. Schaub",
 5 |   title = 	 "claspfolio 2: Advances in Algorithm Selection for Answer Set Programming",
 6 |   journal = 	 tplp,
 7 |   year = 	 "2014",
 8 |   OPTkey = 	 "",
 9 |   OPTvolume = 	 "",
10 |   OPTnumber = 	 "",
11 |   OPTpages = 	 "",
12 |   OPTmonth = 	 "",
13 |   note =	 "To appear.",
14 |   OPTannote = 	 ""
15 | }


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | .. note::
 5 |    AutoFolio was written in Python3.5. Probably, it will also run well with a Python 3 Version > 3.5. It is not compatible with Python 2.x
 6 | 
 7 | .. _manual_installation:
 8 | 
 9 | Manual Installation
10 | -------------------
11 | | To install AutoFolio from command line, please type the following commands on the command line
12 | 
13 | .. code-block:: bash
14 | 
15 |     cat requirements.txt | xargs -n 1 -L 1 pip install
16 |     python setup.py install
17 | 
18 |  


--------------------------------------------------------------------------------
/examples/asp-aslib/data/readme.txt:
--------------------------------------------------------------------------------
 1 | Author: Marius Lindauer
 2 | Date: 27.02.2014
 3 | 
 4 | These data set was generated for a publication about claspfolio 2.0,
 5 | i.e., an algorithm selector for ASP.
 6 | The algorithm portfolio of clasp (2.1.4) configuration is generated by the hydra method (see http://www.cs.ubc.ca/labs/beta/Projects/Hydra/)
 7 | in combination with SMAC.
 8 | To generate the features, I used claspre, a light-weight version of the ASP solver clasp,
 9 | with static and dynamic features (4 restarts each after 32 conflicts).
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/autofolio:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import logging
 4 | import sys
 5 | import os
 6 | import inspect
 7 | cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0]))
 8 | cmd_folder = os.path.realpath(os.path.join(cmd_folder, ".."))
 9 | if cmd_folder not in sys.path:
10 |     sys.path.insert(0,cmd_folder)
11 |     
12 | from autofolio.autofolio import AutoFolio
13 | 
14 | if __name__ == "__main__":
15 |     
16 |     #logging.basicConfig(level=logging.INFO)
17 |     
18 |     af = AutoFolio()
19 |     af.run_cli()
20 |     
21 | 


--------------------------------------------------------------------------------
/examples/asp-aslib/README.md:
--------------------------------------------------------------------------------
 1 | # Traveling Thief Problem Example
 2 | 
 3 | This is an example to use AutoFolio on the Answer Set Programming using the ASlib format.
 4 | Here, we use the data of the original `ASP-POTASSCO` ASlib scenario.
 5 | For all full description of the format, please see www.aslib.net.
 6 | 
 7 | By calling:
 8 | 
 9 | `python3 ../../scripts/autofolio --scenario data/`
10 | 
11 | AutoFolio will perform a 10-fold cross validation on selecting the algorithm with the smallest runtime (see `data/description.txt`) for each given instance. We expect a performance of roughly 135.
12 | 
13 | To get a better performance, please use the option `--tune`.
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/examples/ttp-csv/README.md:
--------------------------------------------------------------------------------
 1 | # Traveling Thief Problem Example
 2 | 
 3 | This is an example to use AutoFolio on the Traveling Thief Problem using the csv format.
 4 | In `data/features.csv`, you find the instance features for each problem instance;
 5 | and in `data/perf.csv`, you find the performance of each algorithm on each problem instance.
 6 | 
 7 | By calling:
 8 | 
 9 | `python3 ../../scripts/autofolio --performance_csv data/perf.csv --feature_csv data/features.csv --maximize`
10 | 
11 | AutoFolio will perform a 10-fold cross validation on selecting the algorithm with the largest performance value (`--maximize`) for each given instance. We expect a performance of roughly 0.99.
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | 


--------------------------------------------------------------------------------
/examples/toy_example_csv/example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from autofolio.facade.af_csv_facade import AFCsvFacade
 4 | 
 5 | __author__ = "Marius Lindauer"
 6 | __license__ = "BSD"
 7 | __version__ = "2.1.0"
 8 | 
 9 | perf_fn = "perf.csv"
10 | feat_fn = "feats.csv"
11 | 
12 | # will be created (or overwritten) by AutoFolio
13 | model_fn = "af_model.pkl"
14 | 
15 | af = AFCsvFacade(perf_fn=perf_fn, feat_fn=feat_fn)
16 | 
17 | # fit AutoFolio; will use default hyperparameters of AutoFolio
18 | af.fit()
19 | 
20 | # tune AutoFolio's hyperparameter configuration for 4 seconds
21 | config = af.tune(wallclock_limit=4)
22 | 
23 | # evaluate configuration using a 10-fold cross validation
24 | score = af.cross_validation(config=config)
25 | 
26 | # re-fit AutoFolio using the (hopefully) better configuration
27 | # and save model to disk
28 | af.fit(config=config, save_fn=model_fn)
29 | 
30 | # load AutoFolio model and
31 | # get predictions for new meta-feature vector
32 | pred = AFCsvFacade.load_and_predict(vec=np.array([1.]), load_fn=model_fn)
33 | 
34 | print(pred)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/aspeed/enc1.lp:
--------------------------------------------------------------------------------
 1 | #script(python)
 2 | 
 3 | #import gringo
 4 | 
 5 | ts = {}
 6 | def insert(i,s,t):
 7 |   key = str(s)
 8 |   if not ts.get(key):
 9 |     ts[key] = []
10 |   ts[key].append([i,t])
11 |   return 1
12 | 
13 | def order(s):
14 |   key = str(s)
15 |   if not ts.get(key):
16 |     ts[key] = []
17 |   ts[key].sort(key=lambda x: int(x[1]))
18 |   p = None
19 |   r = []
20 |   for i, v in ts[key]:
21 |     if p:
22 |       r.append((p,i))
23 |     p = i
24 |   return r
25 | 
26 | #end.
27 | 
28 | #const cores=1.
29 | 
30 | solver(S)  :- time(_,S,_).
31 | time(S,T)  :- time(_,S,T).
32 | unit(1..cores).
33 | 
34 | insert(@insert(I,S,T)) :- time(I,S,T).
35 | order(I,K,S) :- insert(_), solver(S), (I,K) = @order(S).
36 | 
37 | { slice(U,S,T) : time(S,T), T <= K, unit(U) } 1 :- 
38 |   solver(S), kappa(K).
39 | slice(S,T) :- slice(_,S,T).
40 | 
41 |  :- not #sum { T,S : slice(U,S,T) } K, kappa(K), unit(U).
42 | 
43 | solved(I,S) :- slice(S,T), time(I,S,T).
44 | solved(I,S) :- solved(J,S), order(I,J,S).
45 | solved(I)   :- solved(I,_).
46 | 
47 | #maximize { 1@2,I: solved(I) }.  
48 | #minimize { T*T@1,S : slice(S,T)}.
49 | 
50 | #show slice/3.
51 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import setuptools
 3 | 
 4 | console_scripts = [
 5 |     'autofolio=autofolio.autofolio:main'
 6 | ]
 7 | 
 8 | with open("autofolio/__version__.py") as fh:
 9 |     version = fh.readlines()[-1].split()[-1].strip("\"'")
10 | 
11 | setuptools.setup(
12 |     name="autofolio",
13 |     version=version,
14 |     author="Marius Lindauer",
15 |     author_email="lindauer@cs.uni-freiburg.de",
16 |     description=("AutoFolio 2, an automaticalliy configured algorithm selector."),
17 |     license="2-clause BSD",
18 |     keywords="algortithm selection",
19 |     url="",
20 |     packages=setuptools.find_packages(exclude=['test', 'source']),
21 |     classifiers=[
22 |         "Development Status :: 3 - Alpha",
23 |         "Topic :: Utilities",
24 |         "Topic :: Scientific/Engineering",
25 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
26 |         "License :: OSI Approved :: 2-clause BSD",
27 |     ],
28 |     platforms=['Linux'],
29 |     tests_require=['mock',
30 |                    'nose'],
31 |     test_suite='nose.collector',
32 |     entry_points = {
33 |         'console_scripts': console_scripts
34 |     }
35 | )
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, 
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/doc/license.rst:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the 2-clause BSD license (see below).
 6 | 
 7 | This program is distributed in the hope that it will be useful,
 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 | 
11 | BSD
12 | ---
13 | Copyright (c) 2016, Ml4AAD Group (http://www.ml4aad.org/)
14 | All rights reserved.
15 | 
16 | Redistribution and use in source and binary forms, with or without
17 | modification, are permitted provided that the following conditions are met:
18 | 
19 | * Redistributions of source code must retain the above copyright notice, this
20 |   list of conditions and the following disclaimer.
21 | 
22 | * Redistributions in binary form must reproduce the above copyright notice,
23 |   this list of conditions and the following disclaimer in the documentation
24 |   and/or other materials provided with the distribution.
25 | 
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. AutoFolio documentation master file, created by
 2 |    sphinx-quickstart on Mon Sep 14 12:36:21 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | AutoFolio documentation!
 7 | =================================
 8 | AutoFolio is an algorithm selection tool,
 9 | i.e., selecting a well-performing algorithm for a given instance [Rice 1976].
10 | In contrast to other algorithm selection tools,
11 | users of AutoFolio are bothered with the decision which algorithm selection approach to use
12 | and how to set its hyper-parameters.
13 | AutoFolio uses one of the state-of-the-art algorithm configuration tools, namely SMAC [Hutter et al LION'16]
14 | to automatically determine a well-performing algorithm selection approach
15 | and its hyper-parameters for a given algorithm selection data.
16 | Therefore, AutoFolio has a robust performance across different algorithm selection tasks.
17 | 
18 | .. note::
19 | 
20 |    For a detailed description of its main idea,
21 |    we refer to
22 | 
23 | 	`JAIR Journal Article <http://aad.informatik.uni-freiburg.de/papers/15-JAIR-Autofolio.pdf>`_
24 | 	
25 | 	@ARTICLE{lindauer-jair15a,
26 | 	  author    = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub},
27 | 	  title     = {AutoFolio: An automatically configured Algorithm Selector},
28 | 	  volume    = {53},
29 | 	  journal   = {Journal of Artificial Intelligence Research},
30 | 	  year      = {2015},
31 | 	  pages     = {745-778}
32 | 	}
33 | 
34 | 
35 | AutoFolio is mainly written in Python 3.5.
36 | 
37 | .. note::
38 | 
39 |     This version is a re-implementation of the original AutoFolio implementation
40 |     and has not the same configuration space of the original implementation -- 
41 |     e.g., the clustering approach was not re-implementation because the performance had not met our expectations;
42 |     e.g., as a new approach we implemented pair-wise performance difference prediction approach.
43 | 
44 | 
45 | Contents:
46 | ---------
47 | .. toctree::
48 |    :maxdepth: 2
49 | 
50 |    installation
51 |    manual
52 |    contact
53 |    license
54 | 
55 | 
56 | 
57 | Indices and tables
58 | ------------------
59 | 
60 | * :ref:`genindex`
61 | * :ref:`modindex`
62 | * :ref:`search`
63 | 
64 | 


--------------------------------------------------------------------------------
/autofolio/feature_preprocessing/standardscaler.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from sklearn.preprocessing import StandardScaler
 7 | 
 8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
 9 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
10 | from ConfigSpace.conditions import EqualsCondition, InCondition
11 | from ConfigSpace.configuration_space import ConfigurationSpace
12 | from ConfigSpace import Configuration
13 | 
14 | from aslib_scenario.aslib_scenario import ASlibScenario
15 | 
16 | __author__ = "Marius Lindauer"
17 | __license__ = "BSD"
18 | 
19 | 
20 | class StandardScalerWrapper(object):
21 | 
22 |     @staticmethod
23 |     def add_params(cs: ConfigurationSpace):
24 |         '''
25 |             adds parameters to ConfigurationSpace 
26 |         '''
27 |         switch = CategoricalHyperparameter(
28 |             "StandardScaler", choices=[True, False], default_value=True)
29 |         cs.add_hyperparameter(switch)
30 | 
31 |     def __init__(self):
32 |         '''
33 |             Constructor
34 |         '''
35 |         self.scaler = None
36 |         self.active = False
37 |         self.logger = logging.getLogger("StandardScaler")
38 | 
39 |     def fit(self, scenario: ASlibScenario, config: Configuration):
40 |         '''
41 |             fit StandardScaler object to ASlib scenario data
42 | 
43 |             Arguments
44 |             ---------
45 |             scenario: data.aslib_scenario.ASlibScenario
46 |                 ASlib Scenario with all data in pandas
47 |             config: ConfigSpace.Configuration
48 |                 configuration
49 |         '''
50 | 
51 |         if config.get("StandardScaler"):
52 |             self.active = True
53 |             self.scaler = StandardScaler()
54 |             self.scaler.fit(scenario.feature_data.values)
55 | 
56 |     def transform(self, scenario: ASlibScenario):
57 |         '''
58 |             transform ASLib scenario data
59 | 
60 |             Arguments
61 |             ---------
62 |             scenario: data.aslib_scenario.ASlibScenario
63 |                 ASlib Scenario with all data in pandas
64 | 
65 |             Returns
66 |             -------
67 |             data.aslib_scenario.ASlibScenario
68 |         '''
69 |         if self.scaler:
70 |             self.logger.debug("Applying StandardScaler")
71 |             
72 |             values = self.scaler.transform(
73 |                 np.array(scenario.feature_data.values))
74 | 
75 |             scenario.feature_data = pd.DataFrame(
76 |                 data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns)
77 | 
78 |         return scenario
79 | 
80 |     def fit_transform(self, scenario: ASlibScenario, config: Configuration):
81 |         '''
82 |             fit and transform
83 | 
84 |             Arguments
85 |             ---------
86 |             scenario: data.aslib_scenario.ASlibScenario
87 |                 ASlib Scenario with all data in pandas
88 |             config: ConfigSpace.Configuration
89 |                 configuration
90 | 
91 |             Returns
92 |             -------
93 |             data.aslib_scenario.ASlibScenario
94 |         '''
95 |         self.fit(scenario, config)
96 |         scenario = self.transform(scenario)
97 |         return scenario
98 | 


--------------------------------------------------------------------------------
/autofolio/facade/af_csv_facade.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import logging
 3 | 
 4 | from ConfigSpace.configuration_space import Configuration
 5 | from aslib_scenario.aslib_scenario import ASlibScenario
 6 | 
 7 | from autofolio.autofolio import AutoFolio
 8 | 
 9 | __author__ = "Marius Lindauer"
10 | __license__ = "BSD"
11 | __version__ = "2.1.0"
12 | 
13 | 
14 | class AFCsvFacade(object):
15 | 
16 |     def __init__(self,
17 |                  perf_fn:str,
18 |                  feat_fn:str,
19 |                  objective:str = "solution_quality",
20 |                  runtime_cutoff:float = None,
21 |                  maximize:bool = True,
22 |                  cv_fn:str = None,
23 |                  seed: int = 12345
24 |                  ):
25 |         """ Constructor """
26 |         self.scenario = ASlibScenario()
27 |         self.scenario.read_from_csv(perf_fn=perf_fn,
28 |                                                feat_fn=feat_fn,
29 |                                                objective=objective,
30 |                                                runtime_cutoff=runtime_cutoff,
31 |                                                maximize=maximize,
32 |                                                cv_fn=cv_fn)
33 |         self.seed = seed
34 | 
35 |         self.af = AutoFolio(random_seed=seed)
36 |         self.logger = logging.getLogger("AF Facade")
37 | 
38 |     def fit(self,
39 |             config:Configuration=None,
40 |             save_fn:str = None):
41 |         """ Train AutoFolio on data from init"""
42 |         self.logger.info("Fit")
43 |         if config is None:
44 |             cs = self.af.get_cs(self.scenario, {})
45 |             config = cs.get_default_configuration()
46 |         feature_pre_pipeline, pre_solver, selector = self.af.fit(scenario=self.scenario, config=config)
47 | 
48 |         if save_fn:
49 |             self.af._save_model(save_fn, self.scenario, feature_pre_pipeline, pre_solver, selector, config)
50 |         self.logger.info("AutoFolio model saved to %s" %(save_fn))
51 | 
52 |     def tune(self,
53 |              wallclock_limit:int = 1200,
54 |              runcount_limit:int = np.inf,
55 |              ):
56 | 
57 |         config = self.af.get_tuned_config(self.scenario,
58 |                                        wallclock_limit=wallclock_limit,
59 |                                        runcount_limit=runcount_limit,
60 |                                        autofolio_config={},
61 |                                        seed=self.seed)
62 |         self.logger.info("Optimized Configuration: %s" %(config))
63 |         return config
64 | 
65 |     def cross_validation(self, config:Configuration):
66 |         """ run a cross validation on given AutoFolio configuration"""
67 |         score = -1 * self.af.run_cv(config=config, scenario=self.scenario, folds=int(self.scenario.cv_data.max().max()))
68 |         self.logger.info("AF's final performance %f" %(score))
69 | 
70 |         return score
71 | 
72 |     @staticmethod
73 |     def load_and_predict(vec: np.ndarray,
74 |                          load_fn:str):
75 |         """ get predicted algorithm for given meta-feature vector"""
76 |         af = AutoFolio(random_seed=42) # random seed doesn't matter here
77 |         pred = af.read_model_and_predict(model_fn=load_fn, feature_vec=vec)
78 |         print("Selected Schedule [(algorithm, budget)]: %s" % (pred))
79 |         return pred[0][0]
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/autofolio/feature_preprocessing/missing_values.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from sklearn.impute import SimpleImputer
  7 | 
  8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  9 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 10 | from ConfigSpace import Configuration
 11 | from ConfigSpace.configuration_space import ConfigurationSpace
 12 | 
 13 | from aslib_scenario.aslib_scenario import ASlibScenario
 14 | 
 15 | __author__ = "Marius Lindauer"
 16 | __license__ = "BSD"
 17 | 
 18 | 
 19 | class ImputerWrapper(object):
 20 | 
 21 |     @staticmethod
 22 |     def add_params(cs: ConfigurationSpace):
 23 |         '''
 24 |             adds parameters to ConfigurationSpace 
 25 |         '''
 26 |         stratgey = CategoricalHyperparameter(
 27 |             "imputer_strategy", choices=["mean", "median", "most_frequent"], default_value="mean")
 28 |         cs.add_hyperparameter(stratgey)
 29 | 
 30 |     def __init__(self):
 31 |         '''
 32 |             Constructor
 33 |         '''
 34 |         self.imputer = None
 35 |         self.active = False
 36 | 
 37 |         self.logger = logging.getLogger("MissingValueImputation")
 38 | 
 39 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 40 |         '''
 41 |             fit pca object to ASlib scenario data
 42 | 
 43 |             Arguments
 44 |             ---------
 45 |             scenario: data.aslib_scenario.ASlibScenario
 46 |                 ASlib Scenario with all data in pandas
 47 |             config: ConfigSpace.Configuration
 48 |                 configuration
 49 |         '''
 50 | 
 51 |         self.imputer = SimpleImputer(strategy=config.get("imputer_strategy"))
 52 |         self.imputer.fit(scenario.feature_data.values)
 53 |         self.active = True
 54 | 
 55 |     def transform(self, scenario: ASlibScenario):
 56 |         '''
 57 |             transform ASLib scenario data
 58 | 
 59 |             Arguments
 60 |             ---------
 61 |             scenario: data.aslib_scenario.ASlibScenario
 62 |                 ASlib Scenario with all data in pandas
 63 | 
 64 |             Returns
 65 |             -------
 66 |             data.aslib_scenario.ASlibScenario
 67 |         '''
 68 |         self.logger.debug("Impute Missing Feature Values")
 69 | 
 70 |         values = self.imputer.transform(
 71 |             np.array(scenario.feature_data.values))
 72 |         scenario.feature_data = pd.DataFrame(
 73 |             data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns)
 74 | 
 75 |         return scenario
 76 | 
 77 |     def fit_transform(self, scenario: ASlibScenario, config: Configuration):
 78 |         '''
 79 |             fit and transform
 80 | 
 81 |             Arguments
 82 |             ---------
 83 |             scenario: data.aslib_scenario.ASlibScenario
 84 |                 ASlib Scenario with all data in pandas
 85 |             config: ConfigSpace.Configuration
 86 |                 configuration
 87 | 
 88 |             Returns
 89 |             -------
 90 |             data.aslib_scenario.ASlibScenario
 91 |         '''
 92 |         self.fit(scenario, config)
 93 |         scenario = self.transform(scenario)
 94 |         return scenario
 95 |     
 96 |     def get_attributes(self):
 97 |         '''
 98 |             returns a list of tuples of (attribute,value) 
 99 |             for all learned attributes
100 |             
101 |             Arguments
102 |             ---------
103 |             config: ConfigSpace.Configuration
104 |                 configuration
105 | 
106 |             
107 |             Returns
108 |             -------
109 |             list of tuples of (attribute,value) 
110 |         '''
111 |         return ["Strategy=%s" %(self.imputer.strategy)]


--------------------------------------------------------------------------------
/autofolio/feature_preprocessing/pca.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from sklearn.decomposition import PCA
  7 | 
  8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  9 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 10 | from ConfigSpace.conditions import EqualsCondition, InCondition
 11 | from ConfigSpace.configuration_space import ConfigurationSpace
 12 | from ConfigSpace import Configuration
 13 | 
 14 | from aslib_scenario.aslib_scenario import ASlibScenario
 15 | 
 16 | __author__ = "Marius Lindauer"
 17 | __license__ = "BSD"
 18 | 
 19 | 
 20 | class PCAWrapper(object):
 21 | 
 22 |     @staticmethod
 23 |     def add_params(cs: ConfigurationSpace):
 24 |         '''
 25 |             adds parameters to ConfigurationSpace 
 26 |         '''
 27 |         pca_switch = CategoricalHyperparameter(
 28 |             "pca", choices=[True, False], default_value=False)
 29 |         n_components = UniformIntegerHyperparameter(
 30 |             "pca_n_components", lower=1, upper=20, default_value=7, log=True)
 31 |         cs.add_hyperparameter(pca_switch)
 32 |         cs.add_hyperparameter(n_components)
 33 |         cond = InCondition(
 34 |             child=n_components, parent=pca_switch, values=[True])
 35 |         cs.add_condition(cond)
 36 | 
 37 |     def __init__(self):
 38 |         '''
 39 |             Constructor
 40 |         '''
 41 |         self.pca = None
 42 |         self.active = False
 43 | 
 44 |         self.logger = logging.getLogger("PCA")
 45 | 
 46 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 47 |         '''
 48 |             fit pca object to ASlib scenario data
 49 | 
 50 |             Arguments
 51 |             ---------
 52 |             scenario: data.aslib_scenario.ASlibScenario
 53 |                 ASlib Scenario with all data in pandas
 54 |             config: ConfigSpace.Configuration
 55 |                 configuration
 56 |         '''
 57 | 
 58 |         if config.get("pca"):
 59 |             self.pca = PCA(n_components=config.get("pca_n_components"))
 60 |             self.pca.fit(scenario.feature_data.values)
 61 |             self.active = True
 62 | 
 63 |     def transform(self, scenario: ASlibScenario):
 64 |         '''
 65 |             transform ASLib scenario data
 66 | 
 67 |             Arguments
 68 |             ---------
 69 |             scenario: data.aslib_scenario.ASlibScenario
 70 |                 ASlib Scenario with all data in pandas
 71 | 
 72 |             Returns
 73 |             -------
 74 |             data.aslib_scenario.ASlibScenario
 75 |         '''
 76 |         if self.pca:
 77 |             self.logger.debug("Applying PCA")
 78 |             values = self.pca.transform(
 79 |                 np.array(scenario.feature_data.values))
 80 | 
 81 |             scenario.feature_data = pd.DataFrame(
 82 |                 data=values, index=scenario.feature_data.index, columns=["f%d" % (i) for i in range(values.shape[1])])
 83 | 
 84 |         return scenario
 85 | 
 86 |     def fit_transform(self, scenario: ASlibScenario, config: Configuration):
 87 |         '''
 88 |             fit and transform
 89 | 
 90 |             Arguments
 91 |             ---------
 92 |             scenario: data.aslib_scenario.ASlibScenario
 93 |                 ASlib Scenario with all data in pandas
 94 |             config: ConfigSpace.Configuration
 95 |                 configuration
 96 | 
 97 |             Returns
 98 |             -------
 99 |             data.aslib_scenario.ASlibScenario
100 |         '''
101 |         self.fit(scenario, config)
102 |         scenario = self.transform(scenario)
103 |         return scenario
104 |     
105 |     def get_attributes(self):
106 |         '''
107 |             returns a list of tuples of (attribute,value) 
108 |             for all learned attributes
109 |             
110 |             Returns
111 |             -------
112 |             list of tuples of (attribute,value) 
113 |         '''
114 |         return ["Dimensions=%s" %(self.pca.n_components)]
115 | 


--------------------------------------------------------------------------------
/autofolio/selector/ind_regression.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import traceback
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  8 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  9 | from ConfigSpace.conditions import EqualsCondition, InCondition
 10 | from ConfigSpace.configuration_space import ConfigurationSpace
 11 | from ConfigSpace import Configuration
 12 | 
 13 | from aslib_scenario.aslib_scenario import ASlibScenario
 14 | 
 15 | __author__ = "Marius Lindauer"
 16 | __license__ = "BSD"
 17 | 
 18 | 
 19 | class IndRegression(object):
 20 | 
 21 |     @staticmethod
 22 |     def add_params(cs: ConfigurationSpace):
 23 |         '''
 24 |             adds parameters to ConfigurationSpace 
 25 |         '''
 26 | 
 27 |         selector = cs.get_hyperparameter("selector")
 28 |         regressor = cs.get_hyperparameter("regressor")
 29 |         if "IndRegressor" in selector.choices:
 30 |             cond = InCondition(child=regressor, parent=selector, values=["IndRegressor"])
 31 |             cs.add_condition(cond)
 32 | 
 33 |     def __init__(self, regressor_class):
 34 |         '''
 35 |             Constructor
 36 |         '''
 37 |         self.regressors = []
 38 |         self.logger = logging.getLogger("IndRegressor")
 39 |         self.regressor_class = regressor_class
 40 | 
 41 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 42 |         '''
 43 |             fit pca object to ASlib scenario data
 44 | 
 45 |             Arguments
 46 |             ---------
 47 |             scenario: data.aslib_scenario.ASlibScenario
 48 |                 ASlib Scenario with all data in pandas
 49 |             config: ConfigSpace.Configuration
 50 |                 configuration
 51 |         '''
 52 |         self.logger.info("Fit PairwiseRegressor with %s" %
 53 |                          (self.regressor_class))
 54 | 
 55 |         self.algorithms = scenario.algorithms
 56 | 
 57 |         n_algos = len(scenario.algorithms)
 58 |         X = scenario.feature_data.values
 59 |         
 60 |         for i in range(n_algos):
 61 |             y = scenario.performance_data[scenario.algorithms[i]].values
 62 |             reg = self.regressor_class()
 63 |             reg.fit(X, y, config)
 64 |             self.regressors.append(reg)
 65 | 
 66 |     def predict(self, scenario: ASlibScenario):
 67 |         '''
 68 |             predict schedules for all instances in ASLib scenario data
 69 | 
 70 |             Arguments
 71 |             ---------
 72 |             scenario: data.aslib_scenario.ASlibScenario
 73 |                 ASlib Scenario with all data in pandas
 74 | 
 75 |             Returns
 76 |             -------
 77 |                 schedule: {inst -> (solver, time)}
 78 |                     schedule of solvers with a running time budget
 79 |         '''
 80 | 
 81 |         if scenario.algorithm_cutoff_time:
 82 |             cutoff = scenario.algorithm_cutoff_time
 83 |         else:
 84 |             cutoff = 2**31
 85 | 
 86 |         n_algos = len(scenario.algorithms)
 87 |         X = scenario.feature_data.values
 88 |         scores = np.zeros((X.shape[0], n_algos))
 89 |         for i in range(n_algos):
 90 |             reg = self.regressors[i]
 91 |             Y = reg.predict(X)
 92 |             scores[:, i] += Y
 93 | 
 94 |         #self.logger.debug(
 95 |         #   sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True))
 96 |         algo_indx = np.argmin(scores, axis=1)
 97 |         
 98 |         schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index))
 99 |         #self.logger.debug(schedules)
100 |         return schedules
101 | 
102 |     def get_attributes(self):
103 |         '''
104 |             returns a list of tuples of (attribute,value) 
105 |             for all learned attributes
106 |             
107 |             Returns
108 |             -------
109 |             list of tuples of (attribute,value) 
110 |         '''
111 |         reg_attr = self.regressors[0].get_attributes()
112 |         attr = [{self.regressor_class.__name__:reg_attr}]
113 | 
114 |         return attr


--------------------------------------------------------------------------------
/autofolio/selector/multi_classification.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import traceback
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from sklearn.preprocessing import MinMaxScaler
  7 | 
  8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  9 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 10 | from ConfigSpace.conditions import EqualsCondition, InCondition
 11 | from ConfigSpace.configuration_space import ConfigurationSpace
 12 | from ConfigSpace import Configuration
 13 | 
 14 | from aslib_scenario.aslib_scenario import ASlibScenario
 15 | 
 16 | __author__ = "Marius Lindauer"
 17 | __license__ = "BSD"
 18 | 
 19 | 
 20 | class MultiClassifier(object):
 21 | 
 22 |     @staticmethod
 23 |     def add_params(cs: ConfigurationSpace):
 24 |         '''
 25 |             adds parameters to ConfigurationSpace 
 26 |         '''
 27 |         
 28 |         selector = cs.get_hyperparameter("selector")
 29 |         classifier = cs.get_hyperparameter("classifier")
 30 |         if "MultiClassifier" in selector.choices:
 31 |             cond = InCondition(child=classifier, parent=selector, values=["MultiClassifier"])
 32 |             cs.add_condition(cond)
 33 | 
 34 |     def __init__(self, classifier_class):
 35 |         '''
 36 |             Constructor
 37 |         '''
 38 |         self.classifiers = []
 39 |         self.logger = logging.getLogger("MultiClassifier")
 40 |         self.classifier_class = classifier_class
 41 |         self.normalizer = MinMaxScaler()
 42 | 
 43 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 44 |         '''
 45 |             fit pca object to ASlib scenario data
 46 | 
 47 |             Arguments
 48 |             ---------
 49 |             scenario: data.aslib_scenario.ASlibScenario
 50 |                 ASlib Scenario with all data in pandas
 51 |             config: ConfigSpace.Configuration
 52 |                 configuration
 53 |         '''
 54 |         self.logger.info("Fit PairwiseClassifier with %s" %
 55 |                          (self.classifier_class))
 56 | 
 57 |         self.algorithms = scenario.algorithms
 58 | 
 59 |         from sklearn.utils import check_array
 60 |         from sklearn.tree._tree import DTYPE
 61 | 
 62 |         n_algos = len(scenario.algorithms)
 63 |         X = scenario.feature_data.values
 64 |         # since sklearn (at least the RFs) 
 65 |         # uses float32 and we pass float64,
 66 |         # the normalization ensures that floats
 67 |         # are not converted to inf or -inf
 68 |         #X = (X - np.min(X)) / (np.max(X) - np.min(X))
 69 |         X = self.normalizer.fit_transform(X)
 70 |         y = np.argmin(scenario.performance_data.values,axis=1)
 71 |         weights = scenario.performance_data.std(axis=1)
 72 |         clf = self.classifier_class()
 73 |         clf.fit(X, y, config, weights)
 74 |         self.classifier = clf
 75 |          
 76 |     def predict(self, scenario: ASlibScenario):
 77 |         '''
 78 |             predict schedules for all instances in ASLib scenario data
 79 | 
 80 |             Arguments
 81 |             ---------
 82 |             scenario: data.aslib_scenario.ASlibScenario
 83 |                 ASlib Scenario with all data in pandas
 84 | 
 85 |             Returns
 86 |             -------
 87 |                 schedule: {inst -> (solver, time)}
 88 |                     schedule of solvers with a running time budget
 89 |         '''
 90 | 
 91 |         if scenario.algorithm_cutoff_time:
 92 |             cutoff = scenario.algorithm_cutoff_time
 93 |         else:
 94 |             cutoff = 2**31
 95 | 
 96 |         n_algos = len(scenario.algorithms)
 97 |         X = scenario.feature_data.values
 98 |         X = self.normalizer.transform(X)
 99 |         algo_indx = self.classifier.predict(X)
100 |         
101 |         schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index))
102 |         #self.logger.debug(schedules)
103 |         return schedules
104 | 
105 |     def get_attributes(self):
106 |         '''
107 |             returns a list of tuples of (attribute,value) 
108 |             for all learned attributes
109 |             
110 |             Returns
111 |             -------
112 |             list of tuples of (attribute,value) 
113 |         '''
114 |         class_attr = self.classifiers[0].get_attributes()
115 |         attr = [{self.classifier_class.__name__:class_attr}]
116 | 
117 |         return attr


--------------------------------------------------------------------------------
/autofolio/selector/pairwise_regression.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import traceback
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  8 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  9 | from ConfigSpace.conditions import EqualsCondition, InCondition
 10 | from ConfigSpace.configuration_space import ConfigurationSpace
 11 | from ConfigSpace import Configuration
 12 | 
 13 | from aslib_scenario.aslib_scenario import ASlibScenario
 14 | 
 15 | __author__ = "Marius Lindauer"
 16 | __license__ = "BSD"
 17 | 
 18 | 
 19 | class PairwiseRegression(object):
 20 | 
 21 |     @staticmethod
 22 |     def add_params(cs: ConfigurationSpace):
 23 |         '''
 24 |             adds parameters to ConfigurationSpace 
 25 |         '''
 26 | 
 27 |         selector = cs.get_hyperparameter("selector")
 28 |         regressor = cs.get_hyperparameter("regressor")
 29 |         if "PairwiseRegressor" in selector.choices:
 30 |             cond = InCondition(child=regressor, parent=selector, values=["PairwiseRegressor"])
 31 |             cs.add_condition(cond)
 32 | 
 33 |     def __init__(self, regressor_class):
 34 |         '''
 35 |             Constructor
 36 |         '''
 37 |         self.regressors = []
 38 |         self.logger = logging.getLogger("PairwiseRegressor")
 39 |         self.regressor_class = regressor_class
 40 | 
 41 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 42 |         '''
 43 |             fit pca object to ASlib scenario data
 44 | 
 45 |             Arguments
 46 |             ---------
 47 |             scenario: data.aslib_scenario.ASlibScenario
 48 |                 ASlib Scenario with all data in pandas
 49 |             config: ConfigSpace.Configuration
 50 |                 configuration
 51 |         '''
 52 |         self.logger.info("Fit PairwiseRegressor with %s" %
 53 |                          (self.regressor_class))
 54 | 
 55 |         self.algorithms = scenario.algorithms
 56 | 
 57 |         n_algos = len(scenario.algorithms)
 58 |         X = scenario.feature_data.values
 59 |         for i in range(n_algos):
 60 |             for j in range(i + 1, n_algos):
 61 |                 y_i = scenario.performance_data[scenario.algorithms[i]].values
 62 |                 y_j = scenario.performance_data[scenario.algorithms[j]].values
 63 |                 y = y_i - y_j
 64 |                 reg = self.regressor_class()
 65 |                 reg.fit(X, y, config)
 66 |                 self.regressors.append(reg)
 67 | 
 68 |     def predict(self, scenario: ASlibScenario):
 69 |         '''
 70 |             predict schedules for all instances in ASLib scenario data
 71 | 
 72 |             Arguments
 73 |             ---------
 74 |             scenario: data.aslib_scenario.ASlibScenario
 75 |                 ASlib Scenario with all data in pandas
 76 | 
 77 |             Returns
 78 |             -------
 79 |                 schedule: {inst -> (solver, time)}
 80 |                     schedule of solvers with a running time budget
 81 |         '''
 82 | 
 83 |         if scenario.algorithm_cutoff_time:
 84 |             cutoff = scenario.algorithm_cutoff_time
 85 |         else:
 86 |             cutoff = 2**31
 87 | 
 88 |         n_algos = len(scenario.algorithms)
 89 |         X = scenario.feature_data.values
 90 |         scores = np.zeros((X.shape[0], n_algos))
 91 |         reg_indx = 0
 92 |         for i in range(n_algos):
 93 |             for j in range(i + 1, n_algos):
 94 |                 reg = self.regressors[reg_indx]
 95 |                 Y = reg.predict(X)
 96 |                 scores[:, i] += Y
 97 |                 scores[:, j] += -1 * Y
 98 |                 reg_indx += 1
 99 | 
100 |         #self.logger.debug(
101 |         #   sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True))
102 |         algo_indx = np.argmin(scores, axis=1)
103 |         
104 |         schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index))
105 |         #self.logger.debug(schedules)
106 |         return schedules
107 | 
108 |     def get_attributes(self):
109 |         '''
110 |             returns a list of tuples of (attribute,value) 
111 |             for all learned attributes
112 |             
113 |             Returns
114 |             -------
115 |             list of tuples of (attribute,value) 
116 |         '''
117 |         reg_attr = self.regressors[0].get_attributes()
118 |         attr = [{self.regressor_class.__name__:reg_attr}]
119 | 
120 |         return attr


--------------------------------------------------------------------------------
/autofolio/selector/joint_regression.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import traceback
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  8 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  9 | from ConfigSpace.conditions import EqualsCondition, InCondition
 10 | from ConfigSpace.configuration_space import ConfigurationSpace
 11 | from ConfigSpace import Configuration
 12 | 
 13 | from aslib_scenario.aslib_scenario import ASlibScenario
 14 | 
 15 | __author__ = "Marius Lindauer"
 16 | __license__ = "BSD"
 17 | 
 18 | 
 19 | class JointRegression(object):
 20 | 
 21 |     @staticmethod
 22 |     def add_params(cs: ConfigurationSpace):
 23 |         '''
 24 |             adds parameters to ConfigurationSpace 
 25 |         '''
 26 | 
 27 |         selector = cs.get_hyperparameter("selector")
 28 |         regressor = cs.get_hyperparameter("regressor")
 29 |         if "JointRegressor" in selector.choices:
 30 |             cond = InCondition(child=regressor, parent=selector, values=["JointRegressor"])
 31 |             cs.add_condition(cond)
 32 | 
 33 |     def __init__(self, regressor_class):
 34 |         '''
 35 |             Constructor
 36 |         '''
 37 |         self.regressors = []
 38 |         self.logger = logging.getLogger("JointRegressor")
 39 |         self.regressor_class = regressor_class
 40 | 
 41 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 42 |         '''
 43 |             fit pca object to ASlib scenario data
 44 | 
 45 |             Arguments
 46 |             ---------
 47 |             scenario: data.aslib_scenario.ASlibScenario
 48 |                 ASlib Scenario with all data in pandas
 49 |             config: ConfigSpace.Configuration
 50 |                 configuration
 51 |         '''
 52 |         self.logger.info("Fit PairwiseRegressor with %s" %
 53 |                          (self.regressor_class))
 54 | 
 55 |         self.algorithms = scenario.algorithms
 56 | 
 57 |         n_algos = len(scenario.algorithms)
 58 |         X = scenario.feature_data.values
 59 |         Xs = None
 60 |         Ys = None
 61 |         
 62 |         for i in range(n_algos):
 63 |             y = scenario.performance_data[scenario.algorithms[i]].values
 64 |             x_i = np.zeros((X.shape[0], n_algos))
 65 |             x_i[:,i] = 1
 66 |             X_i = np.hstack([X,x_i])
 67 |             if i == 0:
 68 |                 Xs = X_i
 69 |                 Ys = y
 70 |             else:
 71 |                 Xs = np.vstack([Xs,X_i])
 72 |                 Ys = np.hstack([Ys,y])
 73 |             
 74 |         reg = self.regressor_class()
 75 |         reg.fit(Xs, Ys, config)
 76 |         self.regressors = reg
 77 | 
 78 |     def predict(self, scenario: ASlibScenario):
 79 |         '''
 80 |             predict schedules for all instances in ASLib scenario data
 81 | 
 82 |             Arguments
 83 |             ---------
 84 |             scenario: data.aslib_scenario.ASlibScenario
 85 |                 ASlib Scenario with all data in pandas
 86 | 
 87 |             Returns
 88 |             -------
 89 |                 schedule: {inst -> (solver, time)}
 90 |                     schedule of solvers with a running time budget
 91 |         '''
 92 | 
 93 |         if scenario.algorithm_cutoff_time:
 94 |             cutoff = scenario.algorithm_cutoff_time
 95 |         else:
 96 |             cutoff = 2**31
 97 | 
 98 |         n_algos = len(scenario.algorithms)
 99 |         X = scenario.feature_data.values
100 |         scores = np.zeros((X.shape[0], n_algos))
101 |         for i in range(n_algos):
102 |             x_i = np.zeros((X.shape[0], n_algos))
103 |             x_i[:,i] = 1
104 |             X_i = np.hstack([X,x_i])
105 |             Y = self.regressors.predict(X_i)
106 |             scores[:, i] += Y
107 | 
108 |         #self.logger.debug(
109 |         #   sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True))
110 |         algo_indx = np.argmin(scores, axis=1)
111 |         
112 |         schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index))
113 |         #self.logger.debug(schedules)
114 |         return schedules
115 | 
116 |     def get_attributes(self):
117 |         '''
118 |             returns a list of tuples of (attribute,value) 
119 |             for all learned attributes
120 |             
121 |             Returns
122 |             -------
123 |             list of tuples of (attribute,value) 
124 |         '''
125 |         reg_attr = self.regressors[0].get_attributes()
126 |         attr = [{self.regressor_class.__name__:reg_attr}]
127 | 
128 |         return attr


--------------------------------------------------------------------------------
/autofolio/feature_preprocessing/feature_group_filtering.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  7 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  8 | 
  9 | __author__ = "Marius Lindauer"
 10 | __license__ = "BSD"
 11 | 
 12 | class FeatureGroupFiltering(object):
 13 |     '''
 14 |         based on the selected feature group, we remove all features that are not available;
 15 |         we also add the feature costs for each individual instance
 16 |     '''
 17 | 
 18 |     @staticmethod
 19 |     def add_params(cs):
 20 |         '''
 21 |             adds parameters to ConfigurationSpace 
 22 |         '''
 23 | 
 24 |     def __init__(self):
 25 |         '''
 26 |             Constructor
 27 |         '''
 28 |         self.logger = logging.getLogger("FeatureGroupFiltering")
 29 |         self.active_features = []
 30 |         self.active_groups = []
 31 |         self.active = False
 32 | 
 33 |     def fit(self, scenario, config):
 34 |         '''
 35 |             fit pca object to ASlib scenario data
 36 | 
 37 |             Arguments
 38 |             ---------
 39 |             scenario: data.aslib_scenario.ASlibScenario
 40 |                 ASlib Scenario with all data in pandas
 41 |             config: ConfigSpace.Configuration
 42 |                 configuration
 43 |         '''
 44 |         self.active = True
 45 |         active_groups = []
 46 |         for param in config:
 47 |             if param.startswith("fgroup_") and config[param]:
 48 |                 active_groups.append(param.replace("fgroup_", ""))
 49 |         
 50 |         active_groups.sort() # to ensure same order of features always
 51 |         
 52 |         # check requirements for each step
 53 |         change = True
 54 |         while change:
 55 |             change = False
 56 |             for group in active_groups:
 57 |                 if scenario.feature_group_dict[group].get("requires"):
 58 |                     valid = True
 59 |                     for req_group in scenario.feature_group_dict[group].get("requires"):
 60 |                         if req_group not in active_groups:
 61 |                             valid = False
 62 |                             break
 63 |                     if not valid:
 64 |                         active_groups.remove(group)
 65 |                         change = True
 66 | 
 67 |         self.logger.debug("Active feature groups: %s" %(active_groups))
 68 |         self.active_groups = active_groups
 69 |         
 70 |         # get active features
 71 |         for group in active_groups:
 72 |             if scenario.feature_group_dict[group].get("provides"):
 73 |                 self.active_features.extend(scenario.feature_group_dict[group].get("provides"))
 74 |         
 75 |         self.logger.debug("Active features (%d): %s" %(len(self.active_features), self.active_features))
 76 |             
 77 |         if not self.active_features:
 78 |             self.logger.warn("No active features left after filtering according to selected feature steps")
 79 | 
 80 | 
 81 |     def transform(self, scenario):
 82 |         '''
 83 |             transform ASLib scenario data
 84 | 
 85 |             Arguments
 86 |             ---------
 87 |             scenario: data.aslib_scenario.ASlibScenario
 88 |                 ASlib Scenario with all data in pandas
 89 | 
 90 |             Returns
 91 |             -------
 92 |             data.aslib_scenario.ASlibScenario
 93 |         '''
 94 |         
 95 |         
 96 |         scenario.feature_data = scenario.feature_data[self.active_features]
 97 |         scenario.used_feature_groups = self.active_groups
 98 |         
 99 |         return scenario
100 | 
101 |     def fit_transform(self, scenario, config):
102 |         '''
103 |             fit and transform
104 | 
105 |             Arguments
106 |             ---------
107 |             scenario: data.aslib_scenario.ASlibScenario
108 |                 ASlib Scenario with all data in pandas
109 |             config: ConfigSpace.Configuration
110 |                 configuration
111 | 
112 |             Returns
113 |             -------
114 |             data.aslib_scenario.ASlibScenario
115 |         '''
116 |         self.fit(scenario, config)
117 |         scenario = self.transform(scenario)
118 |         return scenario
119 |     
120 |     def get_attributes(self):
121 |         '''
122 |             returns a list of tuples of (attribute,value) 
123 |             for all learned attributes
124 |             
125 |             Returns
126 |             -------
127 |             list of tuples of (attribute,value) 
128 |         '''
129 |         return [{"Feature Groups":self.active_groups}]


--------------------------------------------------------------------------------
/autofolio/selector/pairwise_classification.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import traceback
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from sklearn.preprocessing import MinMaxScaler
  7 | 
  8 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  9 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 10 | from ConfigSpace.conditions import EqualsCondition, InCondition
 11 | from ConfigSpace.configuration_space import ConfigurationSpace
 12 | from ConfigSpace import Configuration
 13 | 
 14 | from aslib_scenario.aslib_scenario import ASlibScenario
 15 | 
 16 | __author__ = "Marius Lindauer"
 17 | __license__ = "BSD"
 18 | 
 19 | 
 20 | class PairwiseClassifier(object):
 21 | 
 22 |     @staticmethod
 23 |     def add_params(cs: ConfigurationSpace):
 24 |         '''
 25 |             adds parameters to ConfigurationSpace 
 26 |         '''
 27 |         
 28 |         selector = cs.get_hyperparameter("selector")
 29 |         classifier = cs.get_hyperparameter("classifier")
 30 |         if "PairwiseClassifier" in selector.choices:
 31 |             cond = InCondition(child=classifier, parent=selector, values=["PairwiseClassifier"])
 32 |             cs.add_condition(cond)
 33 | 
 34 |     def __init__(self, classifier_class):
 35 |         '''
 36 |             Constructor
 37 |         '''
 38 |         self.classifiers = []
 39 |         self.logger = logging.getLogger("PairwiseClassifier")
 40 |         self.classifier_class = classifier_class
 41 |         self.normalizer = MinMaxScaler()
 42 | 
 43 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 44 |         '''
 45 |             fit pca object to ASlib scenario data
 46 | 
 47 |             Arguments
 48 |             ---------
 49 |             scenario: data.aslib_scenario.ASlibScenario
 50 |                 ASlib Scenario with all data in pandas
 51 |             config: ConfigSpace.Configuration
 52 |                 configuration
 53 |         '''
 54 |         self.logger.info("Fit PairwiseClassifier with %s" %
 55 |                          (self.classifier_class))
 56 | 
 57 |         self.algorithms = scenario.algorithms
 58 | 
 59 |         from sklearn.utils import check_array
 60 |         from sklearn.tree._tree import DTYPE
 61 | 
 62 |         n_algos = len(scenario.algorithms)
 63 |         X = scenario.feature_data.values
 64 |         # since sklearn (at least the RFs) 
 65 |         # uses float32 and we pass float64,
 66 |         # the normalization ensures that floats
 67 |         # are not converted to inf or -inf
 68 |         #X = (X - np.min(X)) / (np.max(X) - np.min(X))
 69 |         X = self.normalizer.fit_transform(X)
 70 |         for i in range(n_algos):
 71 |             for j in range(i + 1, n_algos):
 72 |                 y_i = scenario.performance_data[scenario.algorithms[i]].values
 73 |                 y_j = scenario.performance_data[scenario.algorithms[j]].values
 74 |                 y = y_i < y_j
 75 |                 weights = np.abs(y_i - y_j)
 76 |                 clf = self.classifier_class()
 77 |                 clf.fit(X, y, config, weights)
 78 |                 self.classifiers.append(clf)
 79 | 
 80 |     def predict(self, scenario: ASlibScenario):
 81 |         '''
 82 |             predict schedules for all instances in ASLib scenario data
 83 | 
 84 |             Arguments
 85 |             ---------
 86 |             scenario: data.aslib_scenario.ASlibScenario
 87 |                 ASlib Scenario with all data in pandas
 88 | 
 89 |             Returns
 90 |             -------
 91 |                 schedule: {inst -> (solver, time)}
 92 |                     schedule of solvers with a running time budget
 93 |         '''
 94 | 
 95 |         if scenario.algorithm_cutoff_time:
 96 |             cutoff = scenario.algorithm_cutoff_time
 97 |         else:
 98 |             cutoff = 2**31
 99 | 
100 |         n_algos = len(scenario.algorithms)
101 |         X = scenario.feature_data.values
102 |         X = self.normalizer.transform(X)
103 |         scores = np.zeros((X.shape[0], n_algos))
104 |         clf_indx = 0
105 |         for i in range(n_algos):
106 |             for j in range(i + 1, n_algos):
107 |                 clf = self.classifiers[clf_indx]
108 |                 Y = clf.predict(X)
109 |                 scores[Y == 1, i] += 1
110 |                 scores[Y == 0, j] += 1
111 |                 clf_indx += 1
112 | 
113 |         #self.logger.debug(
114 |         #   sorted(list(zip(scenario.algorithms, scores)), key=lambda x: x[1], reverse=True))
115 |         algo_indx = np.argmax(scores, axis=1)
116 |         
117 |         schedules = dict((str(inst),[s]) for s,inst in zip([(scenario.algorithms[i], cutoff+1) for i in algo_indx], scenario.feature_data.index))
118 |         #self.logger.debug(schedules)
119 |         return schedules
120 | 
121 |     def get_attributes(self):
122 |         '''
123 |             returns a list of tuples of (attribute,value) 
124 |             for all learned attributes
125 |             
126 |             Returns
127 |             -------
128 |             list of tuples of (attribute,value) 
129 |         '''
130 |         class_attr = self.classifiers[0].get_attributes()
131 |         attr = [{self.classifier_class.__name__:class_attr}]
132 | 
133 |         return attr


--------------------------------------------------------------------------------
/scripts/plot_config.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import logging
  4 | import sys
  5 | import os
  6 | import inspect
  7 | cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0]))
  8 | cmd_folder = os.path.realpath(os.path.join(cmd_folder, ".."))
  9 | if cmd_folder not in sys.path:
 10 |     sys.path.insert(0,cmd_folder)
 11 | 
 12 | import pickle
 13 | import argparse
 14 | from graphviz import Digraph
 15 | import traceback
 16 | 
 17 | def load_model(model_fn: str):
 18 |     '''
 19 |         loads saved model
 20 | 
 21 |         Arguments
 22 |         --------
 23 |         model_fn: str
 24 |             file name of saved model
 25 |             
 26 |         Returns
 27 |         -------
 28 |         scenario, feature_pre_pipeline, pre_solver, selector, config
 29 |     '''
 30 |     with open(model_fn, "br") as fp:
 31 |         scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load(
 32 |             fp)
 33 | 
 34 |     for fpp in feature_pre_pipeline:
 35 |         fpp.logger = logging.getLogger("Feature Preprocessing")
 36 |     if pre_solver:
 37 |         pre_solver.logger = logging.getLogger("Aspeed PreSolving")
 38 |     selector.logger = logging.getLogger("Selector")
 39 |     
 40 |     return scenario, feature_pre_pipeline, pre_solver, selector, config
 41 | 
 42 | def visualize(feature_pre_pipeline, pre_solver, selector):
 43 |     '''
 44 |         visualize all loaded components
 45 |         
 46 |         Arguments
 47 |         ---------
 48 |         feature_pre_pipeline: list
 49 |                 list of fitted feature preprocessors
 50 |         pre_solver: Aspeed
 51 |                 pre solver object with a saved static schedule
 52 |         selector: autofolio.selector.*
 53 |                 fitted selector object
 54 |     '''
 55 |     
 56 |     dot = Digraph(comment='AutoFolio')
 57 |     fpp_idx = 0
 58 |     for fpp in feature_pre_pipeline:
 59 |         if not fpp.active:
 60 |             continue
 61 |         fpp_idx += 1
 62 |         dot.node('fpp_%d' %(fpp_idx), fpp.__class__.__name__)
 63 |         if fpp_idx > 0:
 64 |             dot.edge('fpp_%d' %(fpp_idx-1),'fpp_%d' %(fpp_idx))
 65 |         try:
 66 |             attributes = fpp.get_attributes()
 67 |             add_attributes(attributes=attributes, node_name='fpp_%d' %(fpp_idx), dot=dot)
 68 |         except AttributeError:
 69 |             #traceback.print_exc()
 70 |             pass
 71 |             
 72 |     for idx,presolver in enumerate(pre_solver.schedule):
 73 |         dot.node('pre_%d' %(idx), "%s for %d sec" %(presolver[0], presolver[1]))
 74 |         if idx > 0:
 75 |             dot.edge('pre_%d' %(idx-1),'pre_%d' %(idx))
 76 |         elif feature_pre_pipeline:
 77 |             dot.edge('fpp_%d' %(fpp_idx),'pre_%d' %(idx))
 78 |             
 79 |     dot.node("selector", selector.__class__.__name__)
 80 |     if pre_solver.schedule:
 81 |         dot.edge('pre_%d' %(len(pre_solver.schedule)-1), "selector")
 82 |     elif feature_pre_pipeline:
 83 |         dot.edge('fpp_%d' %(fpp_idx),'selector')
 84 |     try:
 85 |         attributes = selector.get_attributes()
 86 |         add_attributes(attributes=attributes, node_name='selector', dot=dot)
 87 |     except AttributeError:
 88 |         traceback.print_exc()
 89 |         pass
 90 |                    
 91 |         
 92 |     dot.render('test-output/autofolio', view=True)
 93 |     
 94 | def add_attributes(attributes, node_name:str, dot:Digraph):
 95 |     '''
 96 |         add attributes of <obj> with <node_name> to <dot>
 97 |         
 98 |         Arguments
 99 |         ---------
100 |         attributes: str|list|dict
101 |             attributes
102 |         node_name: str
103 |             node name of obj in dot
104 |         dot: Digraph
105 |             digraph of graphviz
106 |     '''
107 |     
108 | 
109 |     if isinstance(attributes,str):
110 |         dot.node(attributes,attributes, shape='box', style='filled', color='lightgrey')
111 |         dot.edge(node_name,attributes)
112 |         return
113 |     elif isinstance(attributes,list):
114 |         for attr in attributes:
115 |             add_attributes(attributes=attr, node_name=node_name, dot=dot)
116 |     elif isinstance(attributes,dict):
117 |         for k, v in attributes.items():
118 |             dot.node(k,k, shape='box', style='filled', color='lightgrey')
119 |             dot.edge(node_name,k)
120 |             add_attributes(attributes=v, node_name=k, dot=dot)
121 |     else:
122 |         print("UNKNOWN TYPE: %s" %(attributes))
123 |     
124 |     
125 |     #===========================================================================
126 |     # for name, attr in attributes:
127 |     #     if isinstance(attr, str):
128 |     #         dot.node(name, "%s:%s"%(name, attr))
129 |     #     elif isinstance(attr, list):
130 |     #         dot.node(name, "%s"%(name))
131 |     #         for s in attr:
132 |     #             dot.node(str(s), str(s))
133 |     #             dot.edge("%s"%(name), str(s))
134 |     #     else:
135 |     #         print("UNKNOWN TYPE: %s" %(attr))
136 |     #         
137 |     #     dot.edge(node_name,name)
138 |     #===========================================================================
139 |         
140 |     
141 | 
142 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
143 | parser.add_argument("--load", type=str, default=None,
144 |                          help="loads model (from --save); other modes are disabled with this options")
145 | args = parser.parse_args()
146 |         
147 | scenario, feature_pre_pipeline, pre_solver, selector, config = load_model(args.load)
148 | 
149 | print(config)
150 | 
151 | visualize(feature_pre_pipeline, pre_solver, selector)
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/doc/manual.rst:
--------------------------------------------------------------------------------
  1 | Manual
  2 | ======
  3 | .. role:: bash(code)
  4 |     :language: bash
  5 | 
  6 | 
  7 | In the following we will show how to use **AutFolio**.
  8 | 
  9 | .. _quick:
 10 | 
 11 | Quick Start
 12 | -----------
 13 | | If you have not installed *AutFolio* yet take a look at the `installation instructions <installation.html>`_ and make sure that all the requirements are fulfilled.
 14 | | In the examples folder, you can find examples that illustrate how to reads scenario files that allow you to automatically configure an algorithm, as well as examples that show how to directly use *AutFolio* in Python.
 15 | 
 16 | We will demonstrate the usage of *AutFolio* on a simple toy example, see `examples/toy_example_csv`.
 17 | 
 18 | To run the example, change into the root-directory of *AutoFolio* and type the following commands:
 19 | 
 20 | .. code-block:: bash
 21 | 
 22 |     cd examples/toy_example_csv/
 23 |     python ../../scripts/autofolio --perf perf.csv --feature_csv feats.csv 
 24 |      
 25 | 
 26 | AutoFolio will run a 10-fold cross validation on the given data.
 27 | The `perf.csv` file is a csv file where each column corresponds to an algorithm
 28 | and each row to an instance. Each entry is the performance of an algorithm on a given instance.
 29 | The `feats.csv` is a csv file where each column corresponds an instance feature
 30 | and each row to an instance.
 31 | Per default, AutoFolio assumes that we want to minimize the performance as solution cost metric (in contrast to a runtime metric).
 32 | 
 33 | In the end, AutoFolio prints the aggregated performance across the 10-folds.
 34 | 
 35 | .. code-block:: bash
 36 | 
 37 | 	INFO:AutoFolio:>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 38 | 	INFO:AutoFolio:CV Stats
 39 | 	INFO:Stats:Number of instances: 10
 40 | 	INFO:Stats:Average Solution Quality: 6.0000
 41 | 
 42 | Looking into `perf.csv`, we can see that AutoFolio performs quite poorly on this example.
 43 | The better of the two algorithms in `perf.csv` has a mean performance of 3.9.
 44 | The issue is that two default of the hyperparameters of the random forest (`rf:min_samples_leaf` and `rf:bootstrap`)
 45 | are a bad choice. 
 46 | 
 47 | So far, AutoFolio used only its default parameters.
 48 | To automatically optimize its parameters use the arguments `-t, --tune`, e.g.,
 49 | 
 50 | .. code-block:: bash
 51 | 
 52 |     cd examples/toy_example_csv/
 53 |     python ../../scripts/autofolio --perf perf.csv --feature_csv feats.csv -t
 54 |     
 55 | In most cases, AutoFolio should be able to figure out that the previously mentioned parameters have to be changed
 56 | such that AutoFolio can get a better performance.
 57 | 
 58 | .. code-block:: bash
 59 | 
 60 | 	INFO:AutoFolio:>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 61 | 	INFO:AutoFolio:CV Stats
 62 | 	INFO:Stats:Number of instances: 10
 63 | 	INFO:Stats:Average Solution Quality: 2.0000
 64 | 
 65 | Input
 66 | -----
 67 | 
 68 | AutoFolio can read two input formats:
 69 | i) A simple interface for beginners based on two csv files (i.e., one for the performance of each algorithm on each instance
 70 | and one for the instance features on each instance).
 71 | ii) An advanced interface based on the ASlib format specification.
 72 | 
 73 | CSV Input Format
 74 | ----------------
 75 | 
 76 | The csv input format consists of two files:
 77 | 
 78 | 	1. A performance file, where each column corresponds to an algorithm and each row to an instance.
 79 | 	2. A feature file, where each column corresponds to an instance feature and each row to an instance.
 80 | 	
 81 | See `examples/toy_examples_csv` for a trivial example
 82 | and `examples/ttp` for a complex example.
 83 | 
 84 | Furthermore, you can specify 3 how to interprete the performance file:
 85 | 
 86 | .. code-block:: bash
 87 | 
 88 |   --objective {runtime,solution_quality}
 89 |                         Are the objective values in the performance data
 90 |                         runtimes or an arbitrary solution quality (or cost)
 91 |                         value (default: solution_quality)
 92 |   --runtime_cutoff RUNTIME_CUTOFF
 93 |                         cutoff time for each algorithm run for the performance
 94 |                         data (default: None)
 95 |   --maximize            Set this parameter to indicate maximization of the
 96 |                         performance metric (default: minimization) (default:
 97 |                         False)
 98 | 
 99 | ASlib Input format
100 | ------------------
101 | 
102 | The ASlib Input format is more complex
103 | but also is more flexible and allows to express more complex scenarios.
104 | See <http://www.aslib.net> for examples and a formal specification.
105 | `examples/asp-aslib` also provides a complex scenario as an example in this format.
106 | 
107 | To use this format, please use
108 | 
109 | .. code-block:: bash
110 | 
111 |   -s SCENARIO, --scenario SCENARIO
112 |                         directory with ASlib scenario files (required if not
113 |                         using --load or csv input files (default: None)
114 | 
115 | Modes
116 | -----
117 | 
118 | Cross-Validation Mode
119 | ---------------------
120 | 
121 | The default mode of AutoFolio is running a 10-fold cross validation to estimate the performance of AutFolio.
122 | 
123 | Prediction Mode
124 | ---------------
125 | 
126 | If you want to use AutoFolio to predict for instances not represented in the given data,
127 | you need to train AutoFolio save its internal state to disk (use `python3 scripts/autofolio --save [filename]`).
128 | To predict on a new instance,
129 | please run
130 | 
131 | .. code-block:: bash
132 | 
133 | 	python3 scripts/autofolio --load [filename] --feature_vec [space-separated feature vector]
134 | 
135 | Self-Tuning Mode
136 | ----------------
137 | 
138 | To use algorithm configuration to optimize the performance of AutoFolio please use the option `--tune`. 
139 | 
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/autofolio/selector/regressors/random_forest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  5 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  6 | from ConfigSpace.conditions import EqualsCondition, InCondition
  7 | from ConfigSpace.configuration_space import ConfigurationSpace
  8 | from ConfigSpace import Configuration
  9 | 
 10 | from aslib_scenario.aslib_scenario import ASlibScenario
 11 | 
 12 | import sklearn.ensemble
 13 | 
 14 | __author__ = "Marius Lindauer"
 15 | __license__ = "BSD"
 16 | 
 17 | 
 18 | class RandomForestRegressor(object):
 19 | 
 20 |     @staticmethod
 21 |     def add_params(cs: ConfigurationSpace):
 22 |         '''
 23 |             adds parameters to ConfigurationSpace 
 24 |         '''
 25 | 
 26 |         try:
 27 |             regressor = cs.get_hyperparameter("regressor")
 28 |             if "RandomForestRegressor" not in regressor.choices:
 29 |                 return
 30 | 
 31 |             n_estimators = UniformIntegerHyperparameter(
 32 |                 name="rfreg:n_estimators", lower=10, upper=100, default_value=10, log=True)
 33 |             cs.add_hyperparameter(n_estimators)
 34 |             max_features = CategoricalHyperparameter(
 35 |                 name="rfreg:max_features", choices=["sqrt", "log2", "None"], default_value="sqrt")
 36 |             cs.add_hyperparameter(max_features)
 37 |             max_depth = UniformIntegerHyperparameter(
 38 |                 name="rfreg:max_depth", lower=10, upper=2 ** 31, default_value=2 ** 31, log=True)
 39 |             cs.add_hyperparameter(max_depth)
 40 |             min_samples_split = UniformIntegerHyperparameter(
 41 |                 name="rfreg:min_samples_split", lower=2, upper=100, default_value=2, log=True)
 42 |             cs.add_hyperparameter(min_samples_split)
 43 |             min_samples_leaf = UniformIntegerHyperparameter(
 44 |                 name="rfreg:min_samples_leaf", lower=2, upper=100, default_value=10, log=True)
 45 |             cs.add_hyperparameter(min_samples_leaf)
 46 |             bootstrap = CategoricalHyperparameter(
 47 |                 name="rfreg:bootstrap", choices=[True, False], default_value=True)
 48 |             cs.add_hyperparameter(bootstrap)
 49 | 
 50 |             cond = InCondition(
 51 |                 child=n_estimators, parent=regressor, values=["RandomForestRegressor"])
 52 |             cs.add_condition(cond)
 53 |             cond = InCondition(
 54 |                 child=max_features, parent=regressor, values=["RandomForestRegressor"])
 55 |             cs.add_condition(cond)
 56 |             cond = InCondition(
 57 |                 child=max_depth, parent=regressor, values=["RandomForestRegressor"])
 58 |             cs.add_condition(cond)
 59 |             cond = InCondition(
 60 |                 child=min_samples_split, parent=regressor, values=["RandomForestRegressor"])
 61 |             cs.add_condition(cond)
 62 |             cond = InCondition(
 63 |                 child=min_samples_leaf, parent=regressor, values=["RandomForestRegressor"])
 64 |             cs.add_condition(cond)
 65 |             cond = InCondition(
 66 |                 child=bootstrap, parent=regressor, values=["RandomForestRegressor"])
 67 |             cs.add_condition(cond)
 68 | 
 69 |         except:
 70 |             return
 71 | 
 72 |     def __init__(self):
 73 |         '''
 74 |             Constructor
 75 |         '''
 76 | 
 77 |         self.model = None
 78 | 
 79 |     def __str__(self):
 80 |         return "RandomForestRegressor"
 81 | 
 82 |     def fit(self, X, y, config: Configuration):
 83 |         '''
 84 |             fit pca object to ASlib scenario data
 85 | 
 86 |             Arguments
 87 |             ---------
 88 |             X: numpy.array
 89 |                 feature matrix
 90 |             y: numpy.array
 91 |                 label vector
 92 |             weights: numpy.array
 93 |                 vector with sample weights
 94 |             config: ConfigSpace.Configuration
 95 |                 configuration
 96 | 
 97 |         '''
 98 | 
 99 |         self.model = sklearn.ensemble.RandomForestRegressor(n_estimators=config["rfreg:n_estimators"],
100 |                                             max_features=config[
101 |                                                 "rfreg:max_features"] if config[
102 |                                                 "rfreg:max_features"] != "None" else None,
103 |                                             max_depth=config["rfreg:max_depth"],
104 |                                             min_samples_split=config[
105 |                                                 "rfreg:min_samples_split"],
106 |                                             min_samples_leaf=config[
107 |                                                 "rfreg:min_samples_leaf"],
108 |                                             bootstrap=config["rfreg:bootstrap"],
109 |                                             random_state=12345)
110 |         self.model.fit(X, y)
111 | 
112 |     def predict(self, X):
113 |         '''
114 |             transform ASLib scenario data
115 | 
116 |             Arguments
117 |             ---------
118 |             X: numpy.array
119 |                 instance feature matrix
120 | 
121 |             Returns
122 |             -------
123 | 
124 |         '''
125 | 
126 |         return self.model.predict(X)
127 |     
128 |     def get_attributes(self):
129 |         '''
130 |             returns a list of tuples of (attribute,value) 
131 |             for all learned attributes
132 |             
133 |             Returns
134 |             -------
135 |             list of tuples of (attribute,value) 
136 |         '''
137 |         attr = []
138 |         attr.append("max_depth = %d" % (self.model.max_depth))
139 |         attr.append("min_samples_split = %d" % (self.model.min_samples_split))
140 |         attr.append("min_samples_leaf = %d" % (self.model.min_samples_leaf))
141 |         attr.append("n_estimators = %d" % (self.model.n_estimators))
142 |         attr.append("max_features = %s" % (self.model.max_features))
143 |         return attr
144 |         
145 | 


--------------------------------------------------------------------------------
/autofolio/selector/classifiers/random_forest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  5 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  6 | from ConfigSpace.conditions import EqualsCondition, InCondition
  7 | from ConfigSpace.configuration_space import ConfigurationSpace
  8 | from ConfigSpace import Configuration
  9 | 
 10 | from aslib_scenario.aslib_scenario import ASlibScenario
 11 | 
 12 | from sklearn.ensemble import RandomForestClassifier
 13 | 
 14 | __author__ = "Marius Lindauer"
 15 | __license__ = "BSD"
 16 | 
 17 | 
 18 | class RandomForest(object):
 19 | 
 20 |     @staticmethod
 21 |     def add_params(cs: ConfigurationSpace):
 22 |         '''
 23 |             adds parameters to ConfigurationSpace 
 24 |         '''
 25 |         try:
 26 |             classifier = cs.get_hyperparameter("classifier")
 27 |             if "RandomForest" not in classifier.choices:
 28 |                 return
 29 | 
 30 |             n_estimators = UniformIntegerHyperparameter(
 31 |                 name="rf:n_estimators", lower=10, upper=100, default_value=10, log=True)
 32 |             cs.add_hyperparameter(n_estimators)
 33 |             criterion = CategoricalHyperparameter(
 34 |                 name="rf:criterion", choices=["gini", "entropy"], default_value="gini")
 35 |             cs.add_hyperparameter(criterion)
 36 |             max_features = CategoricalHyperparameter(
 37 |                 name="rf:max_features", choices=["sqrt", "log2", "None"], default_value="sqrt")
 38 |             cs.add_hyperparameter(max_features)
 39 |             max_depth = UniformIntegerHyperparameter(
 40 |                 name="rf:max_depth", lower=10, upper=2**31, default_value=2**31, log=True)
 41 |             cs.add_hyperparameter(max_depth)
 42 |             min_samples_split = UniformIntegerHyperparameter(
 43 |                 name="rf:min_samples_split", lower=2, upper=100, default_value=2, log=True)
 44 |             cs.add_hyperparameter(min_samples_split)
 45 |             min_samples_leaf = UniformIntegerHyperparameter(
 46 |                 name="rf:min_samples_leaf", lower=2, upper=100, default_value=10, log=True)
 47 |             cs.add_hyperparameter(min_samples_leaf)
 48 |             bootstrap = CategoricalHyperparameter(
 49 |                 name="rf:bootstrap", choices=[True, False], default_value=True)
 50 |             cs.add_hyperparameter(bootstrap)
 51 | 
 52 |             cond = InCondition(
 53 |                 child=n_estimators, parent=classifier, values=["RandomForest"])
 54 |             cs.add_condition(cond)
 55 |             cond = InCondition(
 56 |                 child=criterion, parent=classifier, values=["RandomForest"])
 57 |             cs.add_condition(cond)
 58 |             cond = InCondition(
 59 |                 child=max_features, parent=classifier, values=["RandomForest"])
 60 |             cs.add_condition(cond)
 61 |             cond = InCondition(
 62 |                 child=max_depth, parent=classifier, values=["RandomForest"])
 63 |             cs.add_condition(cond)
 64 |             cond = InCondition(
 65 |                 child=min_samples_split, parent=classifier, values=["RandomForest"])
 66 |             cs.add_condition(cond)
 67 |             cond = InCondition(
 68 |                 child=min_samples_leaf, parent=classifier, values=["RandomForest"])
 69 |             cs.add_condition(cond)
 70 |             cond = InCondition(
 71 |                 child=bootstrap, parent=classifier, values=["RandomForest"])
 72 |             cs.add_condition(cond)
 73 |             print(cs)
 74 |         except:
 75 |             return
 76 | 
 77 |     def __init__(self):
 78 |         '''
 79 |             Constructor
 80 |         '''
 81 | 
 82 |         self.model = None
 83 | 
 84 |     def __str__(self):
 85 |         return "RandomForest"
 86 | 
 87 |     def fit(self, X, y, config: Configuration, weights=None):
 88 |         '''
 89 |             fit pca object to ASlib scenario data
 90 | 
 91 |             Arguments
 92 |             ---------
 93 |             X: numpy.array
 94 |                 feature matrix
 95 |             y: numpy.array
 96 |                 label vector
 97 |             weights: numpy.array
 98 |                 vector with sample weights
 99 |             config: ConfigSpace.Configuration
100 |                 configuration
101 | 
102 |         '''
103 | 
104 |         self.model = RandomForestClassifier(n_estimators=config["rf:n_estimators"],
105 |                                             max_features= config[
106 |                                                 "rf:max_features"] if config[
107 |                                                 "rf:max_features"] != "None" else None,
108 |                                             criterion=config["rf:criterion"],
109 |                                             max_depth=config["rf:max_depth"],
110 |                                             min_samples_split=config[
111 |                                                 "rf:min_samples_split"],
112 |                                             min_samples_leaf=config[
113 |                                                 "rf:min_samples_leaf"],
114 |                                             bootstrap=config["rf:bootstrap"],
115 |                                             random_state=12345)
116 |         self.model.fit(X, y, weights)
117 | 
118 |     def predict(self, X):
119 |         '''
120 |             transform ASLib scenario data
121 | 
122 |             Arguments
123 |             ---------
124 |             X: numpy.array
125 |                 instance feature matrix
126 | 
127 |             Returns
128 |             -------
129 | 
130 |         '''
131 | 
132 |         return self.model.predict(X)
133 |     
134 |     def get_attributes(self):
135 |         '''
136 |             returns a list of tuples of (attribute,value) 
137 |             for all learned attributes
138 |             
139 |             Returns
140 |             -------
141 |             list of tuples of (attribute,value) 
142 |         '''
143 |         attr = []
144 |         attr.append("max_depth = %d" %(self.model.max_depth))
145 |         attr.append("min_samples_split = %d" %(self.model.min_samples_split))
146 |         attr.append("min_samples_leaf = %d" %(self.model.min_samples_leaf))
147 |         attr.append("criterion = %s" %(self.model.criterion))
148 |         attr.append("n_estimators = %d" %(self.model.n_estimators))
149 |         attr.append("max_features = %s" %(self.model.max_features))
150 |         return attr
151 |         


--------------------------------------------------------------------------------
/autofolio/io/cmd.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import os
  4 | import logging
  5 | 
  6 | __author__ = "Marius Lindauer"
  7 | __version__ = "2.0.0"
  8 | __license__ = "BSD"
  9 | 
 10 | 
 11 | class CMDParser(object):
 12 | 
 13 |     def __init__(self):
 14 |         '''
 15 |             Constructor
 16 |         '''
 17 |         self.logger = logging.getLogger("CMDParser")
 18 | 
 19 |         self._arg_parser = argparse.ArgumentParser(
 20 |             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 21 | 
 22 |         aslib = self._arg_parser.add_argument_group(
 23 |             "Reading from ASlib Format")
 24 |         aslib.add_argument("-s", "--scenario", default=None,
 25 |                            help="directory with ASlib scenario files (required if not using --load or csv input files")
 26 | 
 27 |         csv = self._arg_parser.add_argument_group("Reading from CSV Format")
 28 |         csv.add_argument("--performance_csv", default=None,
 29 |                          help="performance data in csv table (column: algorithm, row: instance, delimeter: ,)")
 30 |         csv.add_argument("--feature_csv", default=None,
 31 |                          help="instance features data in csv table (column: features, row: instance, delimeter: ,)")
 32 |         csv.add_argument("--performance_test_csv", default=None,
 33 |                          help="performance *test* data in csv table (column: algorithm, row: instance, delimeter: ,)")
 34 |         csv.add_argument("--feature_test_csv", default=None,
 35 |                          help="instance *test* features data in csv table (column: features, row: instance, delimeter: ,)")
 36 |         csv.add_argument("--cv_csv", default=None,
 37 |                          help="cross validation splits in csv table (column: split ID, row: instance, delimeter: ,)")
 38 |         csv.add_argument("--objective", default="solution_quality", choices=[
 39 |                          "runtime", "solution_quality"], help="Are the objective values in the performance data runtimes or an arbitrary solution quality (or cost) value")
 40 |         csv.add_argument("--runtime_cutoff", default=None, type=float,
 41 |                          help="cutoff time for each algorithm run for the performance data")
 42 |         csv.add_argument("--maximize", default=False, action="store_true", help="Set this parameter to indicate maximization of the performance metric (default: minimization)")
 43 | 
 44 |         opt = self._arg_parser.add_argument_group("Optional Options")
 45 |         opt.add_argument("-t", "--tune", action="store_true", default=False,
 46 |                          help="uses SMAC3 to determine a better hyperparameter configuration")
 47 |         opt.add_argument("--smac_seed", default=42, type=int,
 48 |                          help="Seed passed to SMAC")
 49 |         opt.add_argument("-p", "--pcs", default=None,
 50 |                          help="pcs file to be read")
 51 |         opt.add_argument("--output_dir", default=None,
 52 |                          help="output directory of SMAC")
 53 |         opt.add_argument("--runcount_limit", type=int, default=42,
 54 |                          help="maximal number of AS evaluations (SMAC budget)")
 55 |         opt.add_argument("--wallclock_limit", type=int, default=300,
 56 |                          help="wallclock time limit in sec (SMAC budget)")
 57 |         opt.add_argument(
 58 |             "-v", "--verbose", choices=["INFO", "DEBUG"], default="INFO", help="verbose level")
 59 |         opt.add_argument("--save", type=str, default=None,
 60 |                          help="trains AutoFolio and saves AutoFolio's state in the given filename")
 61 |         opt.add_argument("--load", type=str, default=None,
 62 |                          help="loads model (from --save); other modes are disabled with this options")
 63 |         opt.add_argument("--feature_vec", default=None, type=str,
 64 |                          help="feature vector to predict algorithm to use -- has to be used in combination with --load")
 65 | 
 66 |         opt.add_argument("--config", type=str, default=None,
 67 |                               help="(yaml) config file with run-specific "
 68 |                               "configuration options for autofolio")
 69 | 
 70 |         outer_cv = self._arg_parser.add_argument_group("Outer Cross-fold Validation Options")
 71 | 
 72 |         outer_cv.add_argument("--outer-cv", action="store_true", default=False,
 73 |                               help="Use an \"outer\" cross-fold validation scheme "
 74 |                               "for tuning to ensure that SMAC does not peek at "
 75 |                               "the test set during hyperparameter optimization.")
 76 | 
 77 |         outer_cv.add_argument("--outer-cv-fold", type=int, default=None,
 78 |                               help="If this argument is given in --outer-cv "
 79 |                               "mode, then only the specified outer-cv fold "
 80 |                               "will be processed. Presumably, the learned "
 81 |                               "model will be saved using --save and the "
 82 |                               "results for all folds will be combined later.")
 83 | 
 84 |         outer_cv.add_argument("--out-template", type=str, default=None,
 85 |                               help="If given, then the fit model and solver "
 86 |                               "choices will be saved to this location. The "
 87 |                               "string is considered a template. \"$fold\" "
 88 |                               "will be replaced with the fold, and "
 89 |                               "\"$type\" will be replaced with the "
 90 |                               "appropriate file extension, \"pkl\" for the "
 91 |                               "models and \"csv\" for the solver choices. See "
 92 |                               "string.Template for more details about valid "
 93 |                               "tempaltes.")
 94 | 
 95 |     def parse(self):
 96 |         '''
 97 |             uses the self._arg_parser object to parse the cmd line arguments
 98 | 
 99 |             Returns
100 |             -------
101 |                 parsed arguments
102 |                 unknown arguments
103 |         '''
104 | 
105 |         self.args_, misc_params = self._arg_parser.parse_known_args()
106 | 
107 |         return self.args_, misc_params
108 | 
109 |     def _check_args(self):
110 |         '''
111 |             checks whether all provides options are ok (e.g., existence of files)
112 |         '''
113 | 
114 |         if not os.path.isdir(self.args_.scenario):
115 |             self.logger.error(
116 |                 "ASlib Scenario directory not found: %s" % (self.args_.scenario))
117 |             sys.exit(1)
118 | 


--------------------------------------------------------------------------------
/autofolio/pre_solving/aspeed_schedule.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import logging
  4 | import math
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import subprocess
  9 | 
 10 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
 11 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 12 | from ConfigSpace.conditions import EqualsCondition, InCondition
 13 | from ConfigSpace.configuration_space import ConfigurationSpace
 14 | from ConfigSpace import Configuration
 15 | 
 16 | from aslib_scenario.aslib_scenario import ASlibScenario
 17 | 
 18 | __author__ = "Marius Lindauer"
 19 | __license__ = "BSD"
 20 | 
 21 | 
 22 | class Aspeed(object):
 23 | 
 24 |     @staticmethod
 25 |     def add_params(cs: ConfigurationSpace, cutoff: int):
 26 |         '''
 27 |             adds parameters to ConfigurationSpace
 28 | 
 29 |             Arguments
 30 |             ---------
 31 |             cs: ConfigurationSpace
 32 |                 configuration space to add new parameters and conditions
 33 |             cutoff: int
 34 |                 maximal possible time for aspeed
 35 |         '''
 36 | 
 37 |         pre_solving = CategoricalHyperparameter(
 38 |             "presolving", choices=[True, False], default_value=False)
 39 |         cs.add_hyperparameter(pre_solving)
 40 |         pre_cutoff = UniformIntegerHyperparameter(
 41 |             "pre:cutoff", lower=1, upper=cutoff, default_value=math.ceil(cutoff * 0.1), log=True)
 42 |         cs.add_hyperparameter(pre_cutoff)
 43 |         cond = InCondition(child=pre_cutoff, parent=pre_solving, values=[True])
 44 |         cs.add_condition(cond)
 45 | 
 46 |     def __init__(self, clingo: str=None, runsolver: str=None, enc_fn: str=None):
 47 |         '''
 48 |             Constructor
 49 | 
 50 |             Arguments
 51 |             ---------
 52 |             clingo: str
 53 |                 path to clingo binary
 54 |             runsolver: str
 55 |                 path to runsolver binary
 56 |             enc_fn: str
 57 |                 path to encoding file name
 58 |         '''
 59 |         self.logger = logging.getLogger("Aspeed")
 60 | 
 61 |         if not runsolver:
 62 |             self.runsolver = os.path.join(
 63 |                 os.path.dirname(sys.argv[0]), "..", "aspeed", "runsolver")
 64 |         else:
 65 |             self.runsolver = runsolver
 66 |         if not clingo:
 67 |             self.clingo = os.path.join(
 68 |                 os.path.dirname(sys.argv[0]), "..", "aspeed", "clingo")
 69 |         else:
 70 |             self.clingo = clingo
 71 |         if not enc_fn:
 72 |             self.enc_fn = os.path.join(
 73 |                 os.path.dirname(sys.argv[0]), "..", "aspeed", "enc1.lp")
 74 |         else:
 75 |             self.enc_fn = enc_fn
 76 | 
 77 |         self.mem_limit = 2000  # mb
 78 |         self.cutoff = 60
 79 | 
 80 |         self.data_threshold = 300  # minimal number of instances to use
 81 |         self.data_fraction = 0.3  # fraction of instances to use
 82 | 
 83 |         self.schedule = []
 84 | 
 85 |     def fit(self, scenario: ASlibScenario, config: Configuration):
 86 |         '''
 87 |             fit pca object to ASlib scenario data
 88 | 
 89 |             Arguments
 90 |             ---------
 91 |             scenario: data.aslib_scenario.ASlibScenario
 92 |                 ASlib Scenario with all data in pandas
 93 |             config: ConfigSpace.Configuration
 94 |                 configuration
 95 |             classifier_class: selector.classifier.*
 96 |                 class for classification
 97 |         '''
 98 | 
 99 |         if config["presolving"]:
100 |             self.logger.info("Compute Presolving Schedule with Aspeed")
101 | 
102 |             X = scenario.performance_data.values
103 | 
104 |             # if the instance set is too large, we subsample it
105 |             if X.shape[0] > self.data_threshold:
106 |                 random_indx = np.random.choice(
107 |                     range(X.shape[0]),
108 |                     size=min(X.shape[0], max(int(X.shape[0] * self.data_fraction), self.data_threshold)), 
109 |                     replace=True)
110 |                 X = X[random_indx, :]
111 | 
112 |             self.logger.debug("#Instances for pre-solving schedule: %d" %(X.shape[0]))
113 |             times = ["time(i%d, %d, %d)." % (i, j, max(1,math.ceil(X[i, j])))
114 |                      for i in range(X.shape[0]) for j in range(X.shape[1])]
115 | 
116 |             kappa = "kappa(%d)." % (config["pre:cutoff"])
117 | 
118 |             data_in = " ".join(times) + " " + kappa
119 | 
120 |             # call aspeed and save schedule
121 |             self._call_clingo(data_in=data_in, algorithms=scenario.performance_data.columns)
122 | 
123 |     def _call_clingo(self, data_in: str, algorithms: list):
124 |         '''
125 |             call clingo on self.enc_fn and facts from data_in
126 | 
127 |             Arguments
128 |             ---------
129 |             data_in: str
130 |                 facts in format time(I,A,T) and kappa(C)
131 |             algorithms: list
132 |                 list of algorithm names
133 |         '''
134 |         cmd = "%s -C %d -M %d -w /dev/null %s %s -" % (
135 |             self.runsolver, self.cutoff, self.mem_limit, self.clingo, self.enc_fn)
136 | 
137 |         self.logger.info("Call: %s" % (cmd))
138 | 
139 |         p = subprocess.Popen(cmd,
140 |                              stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True, universal_newlines=True)
141 |         stdout, stderr = p.communicate(input=data_in)
142 |         
143 |         self.logger.debug(stdout)
144 |         
145 |         schedule_dict = {}
146 |         for line in stdout.split("\n"):
147 |             if line.startswith("slice"):
148 |                 schedule_dict = {}  # reinitizalize for every found schedule
149 |                 slices_str = line.split(" ")
150 |                 for slice in slices_str:
151 |                     s_tuple = slice.replace("slice(", "").rstrip(")").split(",")
152 |                     algo = algorithms[int(s_tuple[1])]
153 |                     budget = int(s_tuple[2])
154 |                     schedule_dict[algo] = budget
155 |         
156 |         self.schedule = sorted(schedule_dict.items(), key=lambda x: x[1])
157 |         
158 |         self.logger.info("Fitted Schedule: %s" % (self.schedule))
159 |         
160 |     def predict(self, scenario: ASlibScenario):
161 |         '''
162 |             transform ASLib scenario data
163 | 
164 |             Arguments
165 |             ---------
166 |             scenario: data.aslib_scenario.ASlibScenario
167 |                 ASlib Scenario with all data in pandas
168 | 
169 |             Returns
170 |             -------
171 |                 schedule:{inst -> (solver, time)}
172 |                     schedule of solvers with a running time budget
173 |         '''
174 | 
175 |         return dict((inst, self.schedule) for inst in scenario.instances)
176 | 


--------------------------------------------------------------------------------
/autofolio/selector/classifiers/xgboost.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
  5 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
  6 | from ConfigSpace.conditions import EqualsCondition, InCondition
  7 | from ConfigSpace.configuration_space import ConfigurationSpace
  8 | from ConfigSpace import Configuration
  9 | 
 10 | from aslib_scenario.aslib_scenario import ASlibScenario
 11 | 
 12 | import xgboost as xgb
 13 | 
 14 | __author__ = "Marius Lindauer"
 15 | __license__ = "BSD"
 16 | 
 17 | 
 18 | class XGBoost(object):
 19 | 
 20 |     @staticmethod
 21 |     def add_params(cs: ConfigurationSpace):
 22 |         '''
 23 |             adds parameters to ConfigurationSpace 
 24 |         '''
 25 |         
 26 |         try:
 27 |             classifier = cs.get_hyperparameter("classifier")
 28 |             if "XGBoost" not in classifier.choices:
 29 |                 return 
 30 | 
 31 |             num_round = UniformIntegerHyperparameter(
 32 |                 name="xgb:num_round", lower=10, upper=100, default_value=50, log=True)
 33 |             cs.add_hyperparameter(num_round)
 34 |             alpha = UniformFloatHyperparameter(
 35 |                 name="xgb:alpha", lower=0, upper=10, default_value=1)
 36 |             cs.add_hyperparameter(alpha)
 37 |             lambda_ = UniformFloatHyperparameter(
 38 |                 name="xgb:lambda", lower=1, upper=10, default_value=1)
 39 |             cs.add_hyperparameter(lambda_)
 40 |             colsample_bylevel = UniformFloatHyperparameter(
 41 |                 name="xgb:colsample_bylevel", lower=0.5, upper=1, default_value=1)
 42 |             cs.add_hyperparameter(colsample_bylevel)
 43 |             colsample_bytree = UniformFloatHyperparameter(
 44 |                 name="xgb:colsample_bytree", lower=0.5, upper=1, default_value=1)
 45 |             cs.add_hyperparameter(colsample_bytree)
 46 |             subsample = UniformFloatHyperparameter(
 47 |                 name="xgb:subsample", lower=0.01, upper=1, default_value=1)
 48 |             cs.add_hyperparameter(subsample)
 49 |             max_delta_step = UniformFloatHyperparameter(
 50 |                 name="xgb:max_delta_step", lower=0, upper=10, default_value=0)
 51 |             cs.add_hyperparameter(max_delta_step)
 52 |             min_child_weight = UniformFloatHyperparameter(
 53 |                 name="xgb:min_child_weight", lower=0, upper=20, default_value=1)
 54 |             cs.add_hyperparameter(min_child_weight)
 55 |             max_depth = UniformIntegerHyperparameter(
 56 |                 name="xgb:max_depth", lower=1, upper=10, default_value=6)
 57 |             cs.add_hyperparameter(max_depth)
 58 |             gamma = UniformFloatHyperparameter(
 59 |                 name="xgb:gamma", lower=0, upper=10, default_value=0)
 60 |             cs.add_hyperparameter(gamma)
 61 |             eta = UniformFloatHyperparameter(
 62 |                 name="xgb:eta", lower=0, upper=1, default_value=0.3)
 63 |             cs.add_hyperparameter(eta)
 64 | 
 65 |             cond = InCondition(
 66 |                 child=num_round, parent=classifier, values=["XGBoost"])
 67 |             cs.add_condition(cond)
 68 |             cond = InCondition(
 69 |                 child=alpha, parent=classifier, values=["XGBoost"])
 70 |             cs.add_condition(cond)
 71 |             cond = InCondition(
 72 |                 child=lambda_, parent=classifier, values=["XGBoost"])
 73 |             cs.add_condition(cond)
 74 |             cond = InCondition(
 75 |                 child=colsample_bylevel, parent=classifier, values=["XGBoost"])
 76 |             cs.add_condition(cond)
 77 |             cond = InCondition(
 78 |                 child=colsample_bytree, parent=classifier, values=["XGBoost"])
 79 |             cs.add_condition(cond)
 80 |             cond = InCondition(
 81 |                 child=subsample, parent=classifier, values=["XGBoost"])
 82 |             cs.add_condition(cond)
 83 |             cond = InCondition(
 84 |                 child=max_delta_step, parent=classifier, values=["XGBoost"])
 85 |             cs.add_condition(cond)
 86 |             cond = InCondition(
 87 |                 child=min_child_weight, parent=classifier, values=["XGBoost"])
 88 |             cs.add_condition(cond)
 89 |             cond = InCondition(
 90 |                 child=max_depth, parent=classifier, values=["XGBoost"])
 91 |             cs.add_condition(cond)
 92 |             cond = InCondition(
 93 |                 child=gamma, parent=classifier, values=["XGBoost"])
 94 |             cs.add_condition(cond)
 95 |             cond = InCondition(
 96 |                 child=eta, parent=classifier, values=["XGBoost"])
 97 |             cs.add_condition(cond)
 98 |         except: 
 99 |             return
100 |         
101 | 
102 |     def __init__(self):
103 |         '''
104 |             Constructor
105 |         '''
106 | 
107 |         self.model = None
108 |         self.attr = []
109 | 
110 |     def __str__(self):
111 |         return "XGBoost"
112 | 
113 |     def fit(self, X, y, config: Configuration, weights=None):
114 |         '''
115 |             fit pca object to ASlib scenario data
116 | 
117 |             Arguments
118 |             ---------
119 |             X: numpy.array
120 |                 feature matrix
121 |             y: numpy.array
122 |                 label vector
123 |             weights: numpy.array
124 |                 vector with sample weights
125 |             config: ConfigSpace.Configuration
126 |                 configuration
127 | 
128 |         '''
129 |         
130 |         xgb_config = {'nthread': 1,
131 |          'silent': 1, 
132 |          'objective': 'binary:logistic',
133 |          'seed': 12345}
134 |         for param in config:
135 |             if param.startswith("xgb:") and config[param] is not None:
136 |                 self.attr.append("%s=%s"%(param[4:],config[param]))
137 |             if param == "xgb:num_round":
138 |                 continue
139 |             xgb_config[param[4:]] = config[param]
140 |             
141 |         dtrain = xgb.DMatrix(X, label=y, weight=weights)
142 |         self.model = xgb.train(xgb_config, dtrain, config["xgb:num_round"])
143 |         
144 | 
145 |     def predict(self, X):
146 |         '''
147 |             transform ASLib scenario data
148 | 
149 |             Arguments
150 |             ---------
151 |             X: numpy.array
152 |                 instance feature matrix
153 | 
154 |             Returns
155 |             -------
156 | 
157 |         '''
158 |         preds = np.array(self.model.predict(xgb.DMatrix(X)))
159 |         preds[preds < 0.5] = 0
160 |         preds[preds >= 0.5] = 1
161 |         return preds
162 | 
163 |     def get_attributes(self):
164 |         '''
165 |             returns a list of tuples of (attribute,value) 
166 |             for all learned attributes
167 | 
168 |             Returns
169 |             -------
170 |             list of tuples of (attribute,value) 
171 |         '''
172 |         return self.attr
173 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | dirhtml:
 60 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 63 | 
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | pickle:
 70 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the pickle files."
 73 | 
 74 | json:
 75 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the JSON files."
 78 | 
 79 | htmlhelp:
 80 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 81 | 	@echo
 82 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 83 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 84 | 
 85 | qthelp:
 86 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 89 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 90 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/AutoFolio.qhcp"
 91 | 	@echo "To view the help file:"
 92 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/AutoFolio.qhc"
 93 | 
 94 | applehelp:
 95 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 96 | 	@echo
 97 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
 98 | 	@echo "N.B. You won't be able to view it unless you put it in" \
 99 | 	      "~/Library/Documentation/Help or install it in your application" \
100 | 	      "bundle."
101 | 
102 | devhelp:
103 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | 	@echo
105 | 	@echo "Build finished."
106 | 	@echo "To view the help file:"
107 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/AutoFolio"
108 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/AutoFolio"
109 | 	@echo "# devhelp"
110 | 
111 | epub:
112 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | 	@echo
114 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 | 
116 | latex:
117 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | 	@echo
119 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | 	      "(use \`make latexpdf' here to do that automatically)."
122 | 
123 | latexpdf:
124 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | 	@echo "Running LaTeX files through pdflatex..."
126 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 | 
129 | latexpdfja:
130 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
132 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 | 
135 | text:
136 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | 	@echo
138 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
139 | 
140 | man:
141 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | 	@echo
143 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 | 
145 | texinfo:
146 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | 	@echo
148 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
150 | 	      "(use \`make info' here to do that automatically)."
151 | 
152 | info:
153 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | 	@echo "Running Texinfo files through makeinfo..."
155 | 	make -C $(BUILDDIR)/texinfo info
156 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 | 
158 | gettext:
159 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | 	@echo
161 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 | 
163 | changes:
164 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | 	@echo
166 | 	@echo "The overview file is in $(BUILDDIR)/changes."
167 | 
168 | linkcheck:
169 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | 	@echo
171 | 	@echo "Link check complete; look for any errors in the above output " \
172 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
173 | 
174 | doctest:
175 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | 	@echo "Testing of doctests in the sources finished, look at the " \
177 | 	      "results in $(BUILDDIR)/doctest/output.txt."
178 | 
179 | coverage:
180 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | 	@echo "Testing of coverage in the sources finished, look at the " \
182 | 	      "results in $(BUILDDIR)/coverage/python.txt."
183 | 
184 | xml:
185 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | 	@echo
187 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 | 
189 | pseudoxml:
190 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | 	@echo
192 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AutoFolio
  2 | 
  3 | AutoFolio is an algorithm selection tool,
  4 | i.e., selecting a well-performing algorithm for a given instance [Rice 1976].
  5 | In contrast to other algorithm selection tools,
  6 | users of AutoFolio are bothered with the decision which algorithm selection approach to use
  7 | and how to set its hyper-parameters.
  8 | AutoFolio uses one of the state-of-the-art algorithm configuration tools, namely SMAC [Hutter et al LION'16]
  9 | to automatically determine a well-performing algorithm selection approach
 10 | and its hyper-parameters for a given algorithm selection data.
 11 | Therefore, AutoFolio has a robust performance across different algorithm selection tasks.
 12 | 
 13 | ## Version
 14 | 
 15 | This package is a re-implementation of the original AutoFolio.
 16 | It follows the same approach as the original AutoFolio
 17 | but it has some crucial differences:
 18 | 
 19 | * instead of SMAC v2, we use the pure Python implementation of SMAC (v3)
 20 | * less implemented algorithm selection approaches -- focus on promising approaches to waste not unnecessary time during configuration
 21 | * support of solution quality scenarios
 22 | 
 23 | ## License
 24 | 
 25 | This program is free software: you can redistribute it and/or modify it under the terms of the 2-clause BSD license (please see the LICENSE file).
 26 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 27 | You should have received a copy of the 2-clause BSD license along with this program (see LICENSE file). If not, see https://opensource.org/licenses/BSD-2-Clause.
 28 | 
 29 | ## Installation
 30 | 
 31 | ### Requirements
 32 | 
 33 | NOTE: AutoFolio requires the future SMAC 0.9; currently only available in the [development branch of SMAC](https://github.com/automl/SMAC3/tree/development)
 34 | 
 35 | AutoFolio runs with '''Python 3.5'''.
 36 | 
 37 | To install (nearly) all requirements, please run:
 38 | 
 39 | `cat requirements.txt | xargs -n 1 -L 1 pip install`
 40 | 
 41 | Many of its dependencies can be fulfilled by using [Anaconda >3.4](https://www.continuum.io/).
 42 | If you use Anaconda as your Python environment, you have to install three packages before you can install SMAC (as one of AutoFolio's requirements):
 43 | 
 44 | `conda install gxx_linux-64 gcc_linux-64 swig`
 45 | 
 46 | To use pre-solving schedules, [clingo](http://potassco.sourceforge.net/) is required. We provide binary compiled under Ubuntu 14.04 which may not work under another OS. Please put a working `clingo` binary with Python support into the folder `aspeed/`.
 47 |  
 48 | ## Usage
 49 | 
 50 | We provide under `scripts` a command-line interface for AutoFolio.
 51 | To get an overview over all options of AutoFolio, simply run:
 52 | 
 53 | `python3 scripts/autofolio --help`
 54 | 
 55 | We provide some examples in `examples/`
 56 | 
 57 | ### Input Formats 
 58 | 
 59 | AutoFolio reads two input formats: CSV and [ASlib](www.aslib.net).
 60 | The CSV format is easier for new users but has some limitations to express all kind of input data.
 61 | The ASlib format has a higher expressiveness -- please see [www.aslib.net](www.aslib.net) for all details on this input format.
 62 | 
 63 | For the CSV format, simply two files are required.
 64 | One file with the performance data of each algorithm on each instance (each row an instance, and each column an algorithm).
 65 | And another file with the instance features for each instance (each row an instance and each column an feature).
 66 | All other meta-data (such as runtime cutoff) has to be specified by command line options (see `python3 scripts/autofolio --help`).
 67 | 
 68 | ### Configuration file
 69 | 
 70 | A YAML configuration file can be given to control some of the internal AutoFolio
 71 | behavior. It is given with the `--config` option. 
 72 | 
 73 | The recognized options and their types are as follows.
 74 | 
 75 | * `wallclock_limit`. The amount of time (in seconds) for optimizing 
 76 |   hyperparameters. Type: integer. Default: 300 seconds --- should be increased!
 77 |   
 78 | #### Feature groups
 79 |   
 80 | * `allowed_feature_groups`. A list of the feature groups to consider for 
 81 |   prediction. This must match those specified in the ASlib scenario. Type: list
 82 |   of strings. Default: all feature sets are allowed.
 83 | 
 84 | #### Preprocessing
 85 | 
 86 | * `pca`. Whether to include PCA as a choice for preprocessing. Type: Boolean. Default: True.
 87 | 
 88 | * `impute`. Whether missing value imputation is a choice for preprocessing. Type: Boolean. Default: True.
 89 | 
 90 | * `scale`. Whether z-score scaling is a choice for preprocessing. Type: Boolean. Default: True.
 91 | 
 92 | #### Presolving
 93 | 
 94 | * `presolve`. Whether to use a presolver. Type: Boolean. Default: True.
 95 | 
 96 | #### Algorithm selection model classes
 97 | 
 98 | * `random_forest_classifier`. Whether the random forest classifier is a model class choice. Type: Boolean. Default: True.
 99 | 
100 | * `xgboost_classifier`. Whether the XGBoost classifier is a model class choice. Type: Boolean. Default: True.
101 | 
102 | * `random_forest_regressor`. Whether the random forest regressor is a model class choice. Type: Boolean. Default: True.
103 | 
104 | ### Cross-Validation Mode
105 | 
106 | The default mode of AutoFolio is running a 10-fold cross-validation to estimate the performance of AutoFolio.
107 | 
108 | ### "Outer" Cross-Validation Mode
109 | 
110 | "Outer" cross-validation again uses a 10-fold cross-validation scheme to
111 | evaluate AutoFolio; in this case, though, the subset for testing is not at all
112 | seen by AutoFolio during training. Internally, the nine training folds are
113 | further use in an "inner" cross-validation to avoid overfitting.
114 | 
115 | The `--outer-cv` flag indicates to use this mode. For example:
116 | 
117 | ```
118 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv
119 | 
120 | ```
121 | #### Saving the outer cross-validation choices
122 | 
123 | The learned model and solver choices for each instance can be saved using the
124 | `--out-template` option. If given, the fit model and solver choices will be
125 | saved to this location. The string is considered a template. "${fold}" will be 
126 | replaced with the outer cv fold, and "${type}" will be replaced with the 
127 | appropriate file extension, "pkl" for the models and "csv" for the solver 
128 | choices. See string.Template for more details about valid tempaltes.
129 | 
130 | **N.B.** In many shells (such as bash), it is necessary to put the template in 
131 | single quotes to avoid shell replacement in the template. (Double quotes will
132 | not typically work.)
133 | 
134 | ```
135 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv --out-template 'asp.fold-${fold}.${type}'
136 | 
137 | ```
138 | #### Parallelizing the outer cross-validation
139 | 
140 | Optionally, only a single "outer" cv fold can be run. Presumably, this is used
141 | to parallelize the outer cv calls across a cluster. The `--outer-cv-fold` option
142 | specifies which fold is used. Typically, this option would be combined with
143 | `--out-template`, and the results would be combined in post-processing.
144 | 
145 | **N.B.** This number should range from 1 to 10 (not 0 to 9).
146 | 
147 | ```
148 | python3 scripts/autofolio -s examples/asp-aslib/data/ --outer-cv --outer-cv-fold 1 --out-template 'asp.fold-${fold}.${type}'
149 | ```
150 | 
151 | 
152 | ### Prediction Mode
153 | 
154 | If you want to use AutoFolio to predict for instances not represented in the given data,
155 | you need to train AutoFolio save its internal state to disk (use `python3 scripts/autofolio --save [filename]`).
156 | To predict on a new instance,
157 | please run
158 | 
159 | `python3 scripts/autofolio --load [filename] --feature_vec "[space-separated feature vector]"`
160 | 
161 | Please note that the quotes around the feature vector are important.
162 | 
163 | ### Self-Tuning Mode
164 | 
165 | To use algorithm configuration to optimize the performance of AutoFolio please use the option `--tune`. 
166 | 
167 | ## Reference
168 | 
169 | [JAIR Journal Article](http://aad.informatik.uni-freiburg.de/papers/15-JAIR-Autofolio.pdf)
170 | 
171 | @ARTICLE{lindauer-jair15a,
172 |   author    = {M. Lindauer and H. Hoos and F. Hutter and T. Schaub},
173 |   title     = {AutoFolio: An automatically configured Algorithm Selector},
174 |   volume    = {53},
175 |   journal   = {Journal of Artificial Intelligence Research},
176 |   year      = {2015},
177 |   pages     = {745-778}
178 | }
179 | 
180 | ## Contact
181 | 
182 | Marius Lindauer: lindauer@cs.uni-freiburg.de
183 | 


--------------------------------------------------------------------------------
/examples/asp-aslib/data/description.txt:
--------------------------------------------------------------------------------
  1 | algorithm_cutoff_memory: '?'
  2 | algorithm_cutoff_time: 600
  3 | default_steps:
  4 | - Static
  5 | - Dynamic-1
  6 | - Dynamic-2
  7 | - Dynamic-3
  8 | - Dynamic-4
  9 | feature_steps:
 10 |   Dynamic-1:
 11 |     provides:
 12 |     - Choices-1
 13 |     - Conflicts/Choices-1
 14 |     - Avg_Conflict_Levels-1
 15 |     - Avg_LBD_Levels-1
 16 |     - Learnt_from_Conflict-1
 17 |     - Learnt_from_Loop-1
 18 |     - Frac_Learnt_from_Conflict-1
 19 |     - Frac_Learnt_from_Loop-1
 20 |     - Literals_in_Conflict_Nogoods-1
 21 |     - Literals_in_Loop_Nogoods-1
 22 |     - Frac_Literals_in_Conflict_Nogoods-1
 23 |     - Frac_Literals_in_Loop_Nogoods-1
 24 |     - Removed_Nogoods-1
 25 |     - Learnt_Binary-1
 26 |     - Learnt_Ternary-1
 27 |     - Learnt_Others-1
 28 |     - Frac_Removed_Nogood-1
 29 |     - Frac_Learnt_Binary-1
 30 |     - Frac_Learnt_Ternary-1
 31 |     - Frac_Learnt_Others-1
 32 |     - Skipped_Levels_while_Backjumping-1
 33 |     - Avg_Skipped_Levels_while_Backjumping-1
 34 |     - Longest_Backjumping-1
 35 |     - Running_Avg_Conflictlevel-1
 36 |     - Running_Avg_LBD-1
 37 |     requires:
 38 |     - Static
 39 |   Dynamic-2:
 40 |     provides:
 41 |     - Choices-2
 42 |     - Conflicts/Choices-2
 43 |     - Avg_Conflict_Levels-2
 44 |     - Avg_LBD_Levels-2
 45 |     - Learnt_from_Conflict-2
 46 |     - Learnt_from_Loop-2
 47 |     - Frac_Learnt_from_Conflict-2
 48 |     - Frac_Learnt_from_Loop-2
 49 |     - Literals_in_Conflict_Nogoods-2
 50 |     - Literals_in_Loop_Nogoods-2
 51 |     - Frac_Literals_in_Conflict_Nogoods-2
 52 |     - Frac_Literals_in_Loop_Nogoods-2
 53 |     - Removed_Nogoods-2
 54 |     - Learnt_Binary-2
 55 |     - Learnt_Ternary-2
 56 |     - Learnt_Others-2
 57 |     - Frac_Removed_Nogood-2
 58 |     - Frac_Learnt_Binary-2
 59 |     - Frac_Learnt_Ternary-2
 60 |     - Frac_Learnt_Others-2
 61 |     - Skipped_Levels_while_Backjumping-2
 62 |     - Avg_Skipped_Levels_while_Backjumping-2
 63 |     - Longest_Backjumping-2
 64 |     - Running_Avg_Conflictlevel-2
 65 |     - Running_Avg_LBD-2
 66 |     requires:
 67 |     - Static
 68 |     - Dynamic-1
 69 |   Dynamic-3:
 70 |     provides:
 71 |     - Choices-3
 72 |     - Conflicts/Choices-3
 73 |     - Avg_Conflict_Levels-3
 74 |     - Avg_LBD_Levels-3
 75 |     - Learnt_from_Conflict-3
 76 |     - Learnt_from_Loop-3
 77 |     - Frac_Learnt_from_Conflict-3
 78 |     - Frac_Learnt_from_Loop-3
 79 |     - Literals_in_Conflict_Nogoods-3
 80 |     - Literals_in_Loop_Nogoods-3
 81 |     - Frac_Literals_in_Conflict_Nogoods-3
 82 |     - Frac_Literals_in_Loop_Nogoods-3
 83 |     - Removed_Nogoods-3
 84 |     - Learnt_Binary-3
 85 |     - Learnt_Ternary-3
 86 |     - Learnt_Others-3
 87 |     - Frac_Removed_Nogood-3
 88 |     - Frac_Learnt_Binary-3
 89 |     - Frac_Learnt_Ternary-3
 90 |     - Frac_Learnt_Others-3
 91 |     - Skipped_Levels_while_Backjumping-3
 92 |     - Avg_Skipped_Levels_while_Backjumping-3
 93 |     - Longest_Backjumping-3
 94 |     - Running_Avg_Conflictlevel-3
 95 |     - Running_Avg_LBD-3
 96 |     - Choices-4
 97 |     requires:
 98 |     - Static
 99 |     - Dynamic-1
100 |     - Dynamic-2
101 |   Dynamic-4:
102 |     provides:
103 |     - Choices-4
104 |     - Conflicts/Choices-4
105 |     - Avg_Conflict_Levels-4
106 |     - Avg_LBD_Levels-4
107 |     - Learnt_from_Conflict-4
108 |     - Learnt_from_Loop-4
109 |     - Frac_Learnt_from_Conflict-4
110 |     - Frac_Learnt_from_Loop-4
111 |     - Literals_in_Conflict_Nogoods-4
112 |     - Literals_in_Loop_Nogoods-4
113 |     - Frac_Literals_in_Conflict_Nogoods-4
114 |     - Frac_Literals_in_Loop_Nogoods-4
115 |     - Removed_Nogoods-4
116 |     - Learnt_Binary-4
117 |     - Learnt_Ternary-4
118 |     - Learnt_Others-4
119 |     - Frac_Removed_Nogood-4
120 |     - Frac_Learnt_Binary-4
121 |     - Frac_Learnt_Ternary-4
122 |     - Frac_Learnt_Others-4
123 |     - Skipped_Levels_while_Backjumping-4
124 |     - Avg_Skipped_Levels_while_Backjumping-4
125 |     - Longest_Backjumping-4
126 |     - Running_Avg_Conflictlevel-4
127 |     - Running_Avg_LBD-4
128 |     requires:
129 |     - Static
130 |     - Dynamic-1
131 |     - Dynamic-2
132 |     - Dynamic-3
133 |   Static:
134 |     provides:
135 |     - Frac_Neg_Body
136 |     - Frac_Pos_Body
137 |     - Frac_Unary_Rules
138 |     - Frac_Binary_Rules
139 |     - Frac_Ternary_Rules
140 |     - Frac_Integrity_Rules
141 |     - Tight
142 |     - Problem_Variables
143 |     - Free_Problem_Variables
144 |     - Assigned_Problem_Variables
145 |     - Constraints
146 |     - Constraints/Vars
147 |     - Created_Bodies
148 |     - Program_Atoms
149 |     - SCCS
150 |     - Nodes_in_Positive_BADG
151 |     - Rules
152 |     - Normal_Rules
153 |     - Cardinality_Rules
154 |     - Choice_Rules
155 |     - Weight_Rules
156 |     - Frac_Normal_Rules
157 |     - Frac_Cardinality_Rules
158 |     - Frac_Choice_Rules
159 |     - Frac_Weight_Rules
160 |     - Equivalences
161 |     - Atom-Atom_Equivalences
162 |     - Body-Body_Equivalences
163 |     - Other_Equivalences
164 |     - Frac_Atom-Atom_Equivalences
165 |     - Frac_Body-Body_Equivalences
166 |     - Frac_Other_Equivalences
167 |     - Binary_Constraints
168 |     - Ternary_Constraints
169 |     - Other_Constraints
170 |     - Frac_Binary_Constraints
171 |     - Frac_Ternary_Constraints
172 |     - Frac_Other_Constraints
173 | features_cutoff_memory: '?'
174 | features_cutoff_time: 600
175 | features_deterministic:
176 | - Frac_Neg_Body
177 | - Frac_Pos_Body
178 | - Frac_Unary_Rules
179 | - Frac_Binary_Rules
180 | - Frac_Ternary_Rules
181 | - Frac_Integrity_Rules
182 | - Tight
183 | - Problem_Variables
184 | - Free_Problem_Variables
185 | - Assigned_Problem_Variables
186 | - Constraints
187 | - Constraints/Vars
188 | - Created_Bodies
189 | - Program_Atoms
190 | - SCCS
191 | - Nodes_in_Positive_BADG
192 | - Rules
193 | - Normal_Rules
194 | - Cardinality_Rules
195 | - Choice_Rules
196 | - Weight_Rules
197 | - Frac_Normal_Rules
198 | - Frac_Cardinality_Rules
199 | - Frac_Choice_Rules
200 | - Frac_Weight_Rules
201 | - Equivalences
202 | - Atom-Atom_Equivalences
203 | - Body-Body_Equivalences
204 | - Other_Equivalences
205 | - Frac_Atom-Atom_Equivalences
206 | - Frac_Body-Body_Equivalences
207 | - Frac_Other_Equivalences
208 | - Binary_Constraints
209 | - Ternary_Constraints
210 | - Other_Constraints
211 | - Frac_Binary_Constraints
212 | - Frac_Ternary_Constraints
213 | - Frac_Other_Constraints
214 | - Choices-1
215 | - Conflicts/Choices-1
216 | - Avg_Conflict_Levels-1
217 | - Avg_LBD_Levels-1
218 | - Learnt_from_Conflict-1
219 | - Learnt_from_Loop-1
220 | - Frac_Learnt_from_Conflict-1
221 | - Frac_Learnt_from_Loop-1
222 | - Literals_in_Conflict_Nogoods-1
223 | - Literals_in_Loop_Nogoods-1
224 | - Frac_Literals_in_Conflict_Nogoods-1
225 | - Frac_Literals_in_Loop_Nogoods-1
226 | - Removed_Nogoods-1
227 | - Learnt_Binary-1
228 | - Learnt_Ternary-1
229 | - Learnt_Others-1
230 | - Frac_Removed_Nogood-1
231 | - Frac_Learnt_Binary-1
232 | - Frac_Learnt_Ternary-1
233 | - Frac_Learnt_Others-1
234 | - Skipped_Levels_while_Backjumping-1
235 | - Avg_Skipped_Levels_while_Backjumping-1
236 | - Longest_Backjumping-1
237 | - Running_Avg_Conflictlevel-1
238 | - Running_Avg_LBD-1
239 | - Choices-2
240 | - Conflicts/Choices-2
241 | - Avg_Conflict_Levels-2
242 | - Avg_LBD_Levels-2
243 | - Learnt_from_Conflict-2
244 | - Learnt_from_Loop-2
245 | - Frac_Learnt_from_Conflict-2
246 | - Frac_Learnt_from_Loop-2
247 | - Literals_in_Conflict_Nogoods-2
248 | - Literals_in_Loop_Nogoods-2
249 | - Frac_Literals_in_Conflict_Nogoods-2
250 | - Frac_Literals_in_Loop_Nogoods-2
251 | - Removed_Nogoods-2
252 | - Learnt_Binary-2
253 | - Learnt_Ternary-2
254 | - Learnt_Others-2
255 | - Frac_Removed_Nogood-2
256 | - Frac_Learnt_Binary-2
257 | - Frac_Learnt_Ternary-2
258 | - Frac_Learnt_Others-2
259 | - Skipped_Levels_while_Backjumping-2
260 | - Avg_Skipped_Levels_while_Backjumping-2
261 | - Longest_Backjumping-2
262 | - Running_Avg_Conflictlevel-2
263 | - Running_Avg_LBD-2
264 | - Choices-3
265 | - Conflicts/Choices-3
266 | - Avg_Conflict_Levels-3
267 | - Avg_LBD_Levels-3
268 | - Learnt_from_Conflict-3
269 | - Learnt_from_Loop-3
270 | - Frac_Learnt_from_Conflict-3
271 | - Frac_Learnt_from_Loop-3
272 | - Literals_in_Conflict_Nogoods-3
273 | - Literals_in_Loop_Nogoods-3
274 | - Frac_Literals_in_Conflict_Nogoods-3
275 | - Frac_Literals_in_Loop_Nogoods-3
276 | - Removed_Nogoods-3
277 | - Learnt_Binary-3
278 | - Learnt_Ternary-3
279 | - Learnt_Others-3
280 | - Frac_Removed_Nogood-3
281 | - Frac_Learnt_Binary-3
282 | - Frac_Learnt_Ternary-3
283 | - Frac_Learnt_Others-3
284 | - Skipped_Levels_while_Backjumping-3
285 | - Avg_Skipped_Levels_while_Backjumping-3
286 | - Longest_Backjumping-3
287 | - Running_Avg_Conflictlevel-3
288 | - Running_Avg_LBD-3
289 | - Choices-4
290 | - Conflicts/Choices-4
291 | - Avg_Conflict_Levels-4
292 | - Avg_LBD_Levels-4
293 | - Learnt_from_Conflict-4
294 | - Learnt_from_Loop-4
295 | - Frac_Learnt_from_Conflict-4
296 | - Frac_Learnt_from_Loop-4
297 | - Literals_in_Conflict_Nogoods-4
298 | - Literals_in_Loop_Nogoods-4
299 | - Frac_Literals_in_Conflict_Nogoods-4
300 | - Frac_Literals_in_Loop_Nogoods-4
301 | - Removed_Nogoods-4
302 | - Learnt_Binary-4
303 | - Learnt_Ternary-4
304 | - Learnt_Others-4
305 | - Frac_Removed_Nogood-4
306 | - Frac_Learnt_Binary-4
307 | - Frac_Learnt_Ternary-4
308 | - Frac_Learnt_Others-4
309 | - Skipped_Levels_while_Backjumping-4
310 | - Avg_Skipped_Levels_while_Backjumping-4
311 | - Longest_Backjumping-4
312 | - Running_Avg_Conflictlevel-4
313 | - Running_Avg_LBD-4
314 | features_stochastic: null
315 | maximize:
316 | - false
317 | metainfo_algorithms:
318 |   clasp/2.1.3/h1-n1:
319 |     configuration: ''
320 |     deterministic: true
321 |   clasp/2.1.3/h10-n1:
322 |     configuration: ''
323 |     deterministic: true
324 |   clasp/2.1.3/h11-n1:
325 |     configuration: ''
326 |     deterministic: true
327 |   clasp/2.1.3/h2-n1:
328 |     configuration: ''
329 |     deterministic: true
330 |   clasp/2.1.3/h3-n1:
331 |     configuration: ''
332 |     deterministic: true
333 |   clasp/2.1.3/h4-n1:
334 |     configuration: ''
335 |     deterministic: true
336 |   clasp/2.1.3/h5-n1:
337 |     configuration: ''
338 |     deterministic: true
339 |   clasp/2.1.3/h6-n1:
340 |     configuration: ''
341 |     deterministic: true
342 |   clasp/2.1.3/h7-n1:
343 |     configuration: ''
344 |     deterministic: true
345 |   clasp/2.1.3/h8-n1:
346 |     configuration: ''
347 |     deterministic: true
348 |   clasp/2.1.3/h9-n1:
349 |     configuration: ''
350 |     deterministic: true
351 | number_of_feature_steps: 5
352 | performance_measures:
353 | - runtime
354 | performance_type:
355 | - runtime
356 | scenario_id: ASP-POTASSCO
357 | 


--------------------------------------------------------------------------------
/autofolio/validation/validate.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from aslib_scenario.aslib_scenario import ASlibScenario
  4 | 
  5 | __author__ = "Marius Lindauer"
  6 | __license__ = "BSD"
  7 | 
  8 | 
  9 | class Stats(object):
 10 | 
 11 |     def __init__(self, runtime_cutoff):
 12 |         ''' Constructor 
 13 | 
 14 |             Arguments
 15 |             ---------
 16 |             runtime_cutoff: int
 17 |                 maximal running time
 18 |         '''
 19 |         self.par1 = 0.0
 20 |         self.par10 = 0.0
 21 |         self.timeouts = 0
 22 |         self.solved = 0
 23 |         self.unsolvable = 0
 24 |         self.presolved_feats = 0
 25 |         self.oracle = 0
 26 |         self.sbs = 0
 27 | 
 28 |         self.runtime_cutoff = runtime_cutoff
 29 |         
 30 |         self.selection_freq = {}
 31 | 
 32 |         self.logger = logging.getLogger("Stats")
 33 | 
 34 |     def show(self, remove_unsolvable: bool=True):
 35 |         '''
 36 |             shows statistics
 37 | 
 38 |             Arguments
 39 |             --------
 40 |             remove_unsolvable : bool
 41 |                 remove unsolvable from stats
 42 |                 
 43 |             Returns
 44 |             -------
 45 |             par10: int
 46 |                 penalized average runtime 
 47 |         '''
 48 | 
 49 |         if remove_unsolvable and self.runtime_cutoff:
 50 |             rm_string = "removed"
 51 |             self.logger.debug("Statistics before removing unsolvable instances")
 52 |             self.logger.debug("PAR1: %.4f" %(self.par1 / (self.timeouts + self.solved)))
 53 |             self.logger.debug("PAR10: %.4f" %(self.par10 / (self.timeouts + self.solved)))
 54 |             self.logger.debug("Timeouts: %d / %d" %(self.timeouts, self.timeouts + self.solved))
 55 |             timeouts = self.timeouts - self.unsolvable
 56 |             par1 = self.par1 - (self.unsolvable * self.runtime_cutoff)
 57 |             par10 = self.par10 - (self.unsolvable * self.runtime_cutoff * 10)
 58 |             oracle = self.oracle - (self.unsolvable * self.runtime_cutoff * 10)
 59 |             sbs = self.sbs - (self.unsolvable * self.runtime_cutoff * 10)
 60 |         else:
 61 |             rm_string = "not removed"
 62 |             timeouts = self.timeouts
 63 |             par1 = self.par1
 64 |             par10 = self.par10
 65 |             oracle = self.oracle
 66 |             sbs = self.sbs
 67 |             
 68 |         if self.runtime_cutoff:
 69 |             n_samples = timeouts + self.solved
 70 |             self.logger.info("PAR1: %.4f" % (par1 / n_samples))
 71 |             self.logger.info("PAR10: %.4f" % (par10 / n_samples))
 72 |             self.logger.info("Timeouts: %d / %d" % (timeouts, n_samples))
 73 |             self.logger.info("Presolved during feature computation: %d / %d" % (self.presolved_feats, n_samples))
 74 |             self.logger.info("Solved: %d / %d" % (self.solved, n_samples))
 75 |             self.logger.info("Unsolvable (%s): %d / %d" % 
 76 |                              (rm_string, self.unsolvable, n_samples+self.unsolvable))
 77 |         else:
 78 |             n_samples = self.solved
 79 |             self.logger.info("Number of instances: %d" %(n_samples))
 80 |             self.logger.info("Average Solution Quality: %.4f" % (par1 / n_samples))
 81 |             par10 = par1
 82 |             
 83 |         self.logger.info("Oracle: %.4f" %(oracle / n_samples))
 84 |         if sbs > 0:
 85 |             self.logger.info("Single Best: %.4f" %(sbs / n_samples))
 86 |             self.logger.info("Normalized Score: %.4f" %( ( par10 - oracle) / (sbs - oracle)))
 87 |             
 88 |         self.logger.debug("Selection Frequency")
 89 |         for algo, n in self.selection_freq.items():
 90 |             self.logger.debug("%s: %.2f" %(algo, n/(timeouts + self.solved)))
 91 |             
 92 |         return par10 / n_samples
 93 | 
 94 |     def merge(self, stat):
 95 |         '''
 96 |             adds stats from another given Stats objects
 97 | 
 98 |             Arguments
 99 |             ---------
100 |             stat : Stats
101 |         '''
102 |         self.par1 += stat.par1
103 |         self.par10 += stat.par10
104 |         self.timeouts += stat.timeouts
105 |         self.solved += stat.solved
106 |         self.unsolvable += stat.unsolvable
107 |         self.presolved_feats += stat.presolved_feats
108 |         self.oracle += stat.oracle
109 |         self.sbs += stat.sbs
110 |         
111 |         for algo, n in stat.selection_freq.items():
112 |             self.selection_freq[algo]  = self.selection_freq.get(algo, 0) + n
113 | 
114 | class Validator(object):
115 | 
116 |     def __init__(self):
117 |         ''' Constructor '''
118 |         self.logger = logging.getLogger("Validation")
119 | 
120 |     def validate_runtime(self, schedules: dict, test_scenario: ASlibScenario, 
121 |                          train_scenario: ASlibScenario=None):
122 |         '''
123 |             validate selected schedules on test instances for runtime
124 | 
125 |             Arguments
126 |             ---------
127 |             schedules: dict {instance name -> tuples [algo, bugdet]}
128 |                 algorithm schedules per instance
129 |             test_scenario: ASlibScenario
130 |                 ASlib scenario with test instances
131 |             train_scnenario: ASlibScenario
132 |                 ASlib scenario with training instances;
133 |                 required for SBS score computation
134 |         '''
135 |         if test_scenario.performance_type[0] != "runtime":
136 |             raise ValueError("Cannot validate non-runtime scenario with runtime validation method")
137 |         
138 |         stat = Stats(runtime_cutoff=test_scenario.algorithm_cutoff_time)
139 | 
140 |         feature_times = False
141 |         if test_scenario.feature_cost_data is not None and test_scenario.performance_type[0] == "runtime":
142 |             f_times = test_scenario.feature_cost_data[
143 |                 test_scenario.used_feature_groups].sum(axis=1)
144 |             feature_times = True
145 | 
146 |         feature_stati = test_scenario.feature_runstatus_data[
147 |             test_scenario.used_feature_groups]
148 | 
149 |         stat.oracle = test_scenario.performance_data.min(axis=1).sum()
150 |         if train_scenario:
151 |             sbs = train_scenario.performance_data.sum(axis=0).idxmin()
152 |             stat.sbs = test_scenario.performance_data.sum(axis=0)[sbs]
153 |         
154 |         ok_status = test_scenario.runstatus_data == "ok"
155 |         unsolvable = ok_status.sum(axis=1) == 0
156 |         stat.unsolvable += unsolvable.sum()
157 | 
158 |         for inst, schedule in schedules.items():
159 |             self.logger.debug("Validate %s on %s" % (schedule, inst))
160 |             used_time = 0
161 |             if feature_times:
162 |                 used_time += f_times[inst]
163 |                 self.logger.debug("Used Feature time: %f" % (used_time))
164 | 
165 |             presolved = False
166 |             for fg in test_scenario.used_feature_groups:
167 |                 if "presolved" in feature_stati[fg][inst]:
168 |                     presolved = True
169 |                     break
170 | 
171 |             if presolved and used_time < test_scenario.algorithm_cutoff_time:
172 |                 stat.par1 += used_time
173 |                 stat.solved += 1
174 |                 stat.presolved_feats += 1
175 |                 self.logger.debug("Presolved during feature computation")
176 |                 continue
177 |             elif presolved and used_time >= test_scenario.algorithm_cutoff_time:
178 |                 stat.par1 += test_scenario.algorithm_cutoff_time
179 |                 stat.timeouts += 1
180 |                 continue
181 | 
182 |             for algo, budget in schedule:
183 |                 stat.selection_freq[algo]  = stat.selection_freq.get(algo, 0) + 1
184 |                 time = test_scenario.performance_data[algo][inst]
185 |                 used_time += min(time, budget)
186 |                 if time <= budget and used_time <= test_scenario.algorithm_cutoff_time and test_scenario.runstatus_data[algo][inst] == "ok":
187 |                     stat.par1 += used_time
188 |                     stat.solved += 1
189 |                     self.logger.debug("Solved by %s (budget: %f -- required to solve: %f)" % (algo, budget, time))
190 |                     break
191 | 
192 |                 if used_time >= test_scenario.algorithm_cutoff_time:
193 |                     stat.par1 += test_scenario.algorithm_cutoff_time
194 |                     stat.timeouts += 1
195 |                     self.logger.debug("Timeout after %d" % (used_time))
196 |                     break
197 | 
198 |         stat.par10 = stat.par1 + 9 * \
199 |             test_scenario.algorithm_cutoff_time * stat.timeouts
200 |         
201 |         stat.show()
202 | 
203 |         return stat
204 | 
205 |     def validate_quality(self, schedules: dict, test_scenario: ASlibScenario, 
206 |                          train_scenario: ASlibScenario=None):
207 |         '''
208 |             validate selected schedules on test instances for solution quality
209 | 
210 |             Arguments
211 |             ---------
212 |             schedules: dict {instance name -> tuples [algo, bugdet]}
213 |                 algorithm schedules per instance
214 |             test_scenario: ASlibScenario
215 |                 ASlib scenario with test instances
216 |             train_scnenario: ASlibScenario
217 |                 ASlib scenario with training instances;
218 |                 required for SBS score computation
219 |         '''
220 |         if test_scenario.performance_type[0] != "solution_quality":
221 |             raise ValueError("Cannot validate non-solution_quality scenario with solution_quality validation method")
222 |         
223 |         self.logger.debug("FYI: Feature costs and algorithm runstatus is ignored")
224 |         
225 |         stat = Stats(runtime_cutoff=None)
226 |         
227 |         stat.oracle = test_scenario.performance_data.min(axis=1).sum()
228 |         if train_scenario:
229 |             sbs = train_scenario.performance_data.sum(axis=0).idxmin()
230 |             stat.sbs = test_scenario.performance_data.sum(axis=0)[sbs]
231 | 
232 |         if test_scenario.maximize[0]:
233 |             test_scenario.performance_data *= -1
234 |             self.logger.debug("Removing *-1 in performance data because of maximization")
235 |             stat.sbs *= -1
236 |             stat.oracle *= -1
237 |         
238 |         for inst, schedule in schedules.items():
239 |             if len(schedule) > 1:
240 |                 self.logger.error("AutoFolio does not support schedules for solution quality")
241 |                 sys.exit(9)
242 |                 
243 |             selected_algo = schedule[0][0]
244 |             stat.selection_freq[selected_algo]  = stat.selection_freq.get(selected_algo, 0) + 1
245 |             perf = test_scenario.performance_data[selected_algo][inst]
246 |             
247 |             self.logger.debug("Using %s on %s with performance %f" %(selected_algo, inst, perf))
248 |             
249 |             stat.par1 += perf
250 |             stat.solved += 1
251 |         
252 |         stat.show(remove_unsolvable=False)
253 |         
254 |         return stat
255 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # AutoFolio documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Sep 14 12:36:21 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import datetime
 16 | import sys
 17 | import os
 18 | import shlex
 19 | import sphinx_bootstrap_theme
 20 | 
 21 | sys.path.insert(0, '..')
 22 | import autofolio
 23 | 
 24 | 
 25 | # If extensions (or modules to document with autodoc) are in another directory,
 26 | # add these directories to sys.path here. If the directory is relative to the
 27 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 28 | #sys.path.insert(0, os.path.abspath('.'))
 29 | 
 30 | # -- General configuration ------------------------------------------------
 31 | 
 32 | # If your documentation needs a minimal Sphinx version, state it here.
 33 | #needs_sphinx = '1.0'
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | extensions = [
 39 |     'sphinx.ext.autodoc',
 40 |     'sphinx.ext.doctest',
 41 |     'sphinx.ext.coverage',
 42 |     'sphinx.ext.mathjax',
 43 |     'sphinx.ext.viewcode',
 44 | ]
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ['_templates']
 48 | 
 49 | # The suffix(es) of source filenames.
 50 | # You can specify multiple suffix as a list of string:
 51 | # source_suffix = ['.rst', '.md']
 52 | source_suffix = '.rst'
 53 | 
 54 | # The encoding of source files.
 55 | #source_encoding = 'utf-8-sig'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # General information about the project.
 61 | project = u'AutoFolio'
 62 | copyright = '2015-%s, %s' % (datetime.datetime.now().year, autofolio.AUTHORS)
 63 | author = autofolio.AUTHORS
 64 | 
 65 | # The version info for the project you're documenting, acts as replacement for
 66 | # |version| and |release|, also used in various other places throughout the
 67 | # built documents.
 68 | #
 69 | # The short X.Y version.
 70 | version = autofolio.VERSION
 71 | # The full version, including alpha/beta/rc tags.
 72 | release = autofolio.VERSION
 73 | 
 74 | # The language for content autogenerated by Sphinx. Refer to documentation
 75 | # for a list of supported languages.
 76 | #
 77 | # This is also used if you do content translation via gettext catalogs.
 78 | # Usually you set "language" from the command line for these cases.
 79 | language = None
 80 | 
 81 | # There are two options for replacing |today|: either, you set today to some
 82 | # non-false value, then it is used:
 83 | #today = ''
 84 | # Else, today_fmt is used as the format for a strftime call.
 85 | #today_fmt = '%B %d, %Y'
 86 | 
 87 | # List of patterns, relative to source directory, that match files and
 88 | # directories to ignore when looking for source files.
 89 | exclude_patterns = ['_static']
 90 | 
 91 | # The reST default role (used for this markup: `text`) to use for all
 92 | # documents.
 93 | #default_role = None
 94 | 
 95 | # If true, '()' will be appended to :func: etc. cross-reference text.
 96 | #add_function_parentheses = True
 97 | 
 98 | # If true, the current module name will be prepended to all description
 99 | # unit titles (such as .. function::).
100 | #add_module_names = True
101 | 
102 | # If true, sectionauthor and moduleauthor directives will be shown in the
103 | # output. They are ignored by default.
104 | #show_authors = False
105 | 
106 | # The name of the Pygments (syntax highlighting) style to use.
107 | pygments_style = 'sphinx'
108 | 
109 | # A list of ignored prefixes for module index sorting.
110 | #modindex_common_prefix = []
111 | 
112 | # If true, keep warnings as "system message" paragraphs in the built documents.
113 | #keep_warnings = False
114 | 
115 | # If true, `todo` and `todoList` produce output, else they produce nothing.
116 | todo_include_todos = False
117 | 
118 | 
119 | # -- Options for HTML output ----------------------------------------------
120 | 
121 | # The theme to use for HTML and HTML Help pages.  See the documentation for
122 | # a list of builtin themes.
123 | html_theme = 'bootstrap'
124 | 
125 | # Theme options are theme-specific and customize the look and feel of a theme
126 | # further.  For a list of options available for each theme, see the
127 | # documentation.
128 | html_theme_options = {
129 |     # Navigation bar title. (Default: ``project`` value)
130 |     'navbar_title': "AutoFolio",
131 | 
132 |     # Tab name for entire site. (Default: "Site")
133 |     # 'navbar_site_name': "Site",
134 | 
135 |     # A list of tuples containting pages to link to.  The value should
136 |     # be in the form [(name, page), ..]
137 |     'navbar_links': [
138 |         ('Start', 'index'),
139 |         ('Installation', 'installation'),
140 |         ('Manual', 'manual'),
141 |         ('Contact', 'contact'),
142 | 	('License', 'license'),
143 |     ],
144 | 
145 |     # Render the next and previous page links in navbar. (Default: true)
146 |     'navbar_sidebarrel': False,
147 | 
148 |     # Render the current pages TOC in the navbar. (Default: true)
149 |     'navbar_pagenav': False,
150 | 
151 |     # Tab name for the current pages TOC. (Default: "Page")
152 |     'navbar_pagenav_name': "On this page",
153 | 
154 |     # Global TOC depth for "site" navbar tab. (Default: 1)
155 |     # Switching to -1 shows all levels.
156 |     'globaltoc_depth': 1,
157 | 
158 |     # Include hidden TOCs in Site navbar?
159 |     #
160 |     # Note: If this is "false", you cannot have mixed ``:hidden:`` and
161 |     # non-hidden ``toctree`` directives in the same page, or else the build
162 |     # will break.
163 |     #
164 |     # Values: "true" (default) or "false"
165 |     'globaltoc_includehidden': "false",
166 | 
167 |     # HTML navbar class (Default: "navbar") to attach to <div> element.
168 |     # For black navbar, do "navbar navbar-inverse"
169 |     'navbar_class': "navbar",
170 | 
171 |     # Fix navigation bar to top of page?
172 |     # Values: "true" (default) or "false"
173 |     'navbar_fixed_top': "true",
174 | 
175 |     # Location of link to source.
176 |     # Options are "nav" (default), "footer" or anything else to exclude.
177 |     'source_link_position': "footer",
178 | 
179 |     # Bootswatch (http://bootswatch.com/) theme.
180 |     #
181 |     # Options are nothing with "" (default) or the name of a valid theme
182 |     # such as "amelia" or "cosmo".
183 |     'bootswatch_theme': "cosmo",
184 | 
185 |     # Choose Bootstrap version.
186 |     # Values: "3" (default) or "2" (in quotes)
187 |     'bootstrap_version': "3",
188 | }
189 | 
190 | # Add any paths that contain custom themes here, relative to this directory.
191 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
192 | 
193 | # The name for this set of Sphinx documents.  If None, it defaults to
194 | # "<project> v<release> documentation".
195 | #html_title = None
196 | 
197 | # A shorter title for the navigation bar.  Default is the same as html_title.
198 | #html_short_title = None
199 | 
200 | # The name of an image file (relative to this directory) to place at the top
201 | # of the sidebar.
202 | #html_logo = None
203 | 
204 | # The name of an image file (within the static path) to use as favicon of the
205 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
206 | # pixels large.
207 | #html_favicon = None
208 | 
209 | # Add any paths that contain custom static files (such as style sheets) here,
210 | # relative to this directory. They are copied after the builtin static files,
211 | # so a file named "default.css" will overwrite the builtin "default.css".
212 | html_static_path = ['_static']
213 | 
214 | # Add any extra paths that contain custom files (such as robots.txt or
215 | # .htaccess) here, relative to this directory. These files are copied
216 | # directly to the root of the documentation.
217 | #html_extra_path = []
218 | 
219 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
220 | # using the given strftime format.
221 | #html_last_updated_fmt = '%b %d, %Y'
222 | 
223 | # If true, SmartyPants will be used to convert quotes and dashes to
224 | # typographically correct entities.
225 | #html_use_smartypants = True
226 | 
227 | # Custom sidebar templates, maps document names to template names.
228 | html_sidebars = {'**': ['localtoc.html']}
229 | 
230 | # Additional templates that should be rendered to pages, maps page names to
231 | # template names.
232 | #html_additional_pages = {}
233 | 
234 | # If false, no module index is generated.
235 | #html_domain_indices = True
236 | 
237 | # If false, no index is generated.
238 | #html_use_index = True
239 | 
240 | # If true, the index is split into individual pages for each letter.
241 | #html_split_index = False
242 | 
243 | # If true, links to the reST sources are added to the pages.
244 | #html_show_sourcelink = True
245 | 
246 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
247 | #html_show_sphinx = True
248 | 
249 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
250 | #html_show_copyright = True
251 | 
252 | # If true, an OpenSearch description file will be output, and all pages will
253 | # contain a <link> tag referring to it.  The value of this option must be the
254 | # base URL from which the finished HTML is served.
255 | #html_use_opensearch = ''
256 | 
257 | # This is the file name suffix for HTML files (e.g. ".xhtml").
258 | #html_file_suffix = None
259 | 
260 | # Language to be used for generating the HTML full-text search index.
261 | # Sphinx supports the following languages:
262 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
263 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
264 | #html_search_language = 'en'
265 | 
266 | # A dictionary with options for the search language support, empty by default.
267 | # Now only 'ja' uses this config value
268 | #html_search_options = {'type': 'default'}
269 | 
270 | # The name of a javascript file (relative to the configuration directory) that
271 | # implements a search results scorer. If empty, the default will be used.
272 | #html_search_scorer = 'scorer.js'
273 | 
274 | # Output file base name for HTML help builder.
275 | htmlhelp_basename = 'AutoFoliodoc'
276 | 
277 | # -- Options for LaTeX output ---------------------------------------------
278 | 
279 | latex_elements = {
280 | # The paper size ('letterpaper' or 'a4paper').
281 | #'papersize': 'letterpaper',
282 | 
283 | # The font size ('10pt', '11pt' or '12pt').
284 | #'pointsize': '10pt',
285 | 
286 | # Additional stuff for the LaTeX preamble.
287 | #'preamble': '',
288 | 
289 | # Latex figure (float) alignment
290 | #'figure_align': 'htbp',
291 | }
292 | 
293 | # Grouping the document tree into LaTeX files. List of tuples
294 | # (source start file, target name, title,
295 | #  author, documentclass [howto, manual, or own class]).
296 | latex_documents = [
297 |   (master_doc, 'AutoFolio.tex', u'AutoFolio Documentation', autofolio.AUTHORS, 'manual'),
298 | ]
299 | 
300 | # The name of an image file (relative to this directory) to place at the top of
301 | # the title page.
302 | #latex_logo = None
303 | 
304 | # For "manual" documents, if this is true, then toplevel headings are parts,
305 | # not chapters.
306 | #latex_use_parts = False
307 | 
308 | # If true, show page references after internal links.
309 | #latex_show_pagerefs = False
310 | 
311 | # If true, show URL addresses after external links.
312 | #latex_show_urls = False
313 | 
314 | # Documents to append as an appendix to all manuals.
315 | #latex_appendices = []
316 | 
317 | # If false, no module index is generated.
318 | #latex_domain_indices = True
319 | 
320 | 
321 | # -- Options for manual page output ---------------------------------------
322 | 
323 | # One entry per manual page. List of tuples
324 | # (source start file, name, description, authors, manual section).
325 | man_pages = [
326 |     (master_doc, 'autofolio', u'AutoFolio Documentation',
327 |      [author], 1)
328 | ]
329 | 
330 | # If true, show URL addresses after external links.
331 | #man_show_urls = False
332 | 
333 | 
334 | # -- Options for Texinfo output -------------------------------------------
335 | 
336 | # Grouping the document tree into Texinfo files. List of tuples
337 | # (source start file, target name, title, author,
338 | #  dir menu entry, description, category)
339 | texinfo_documents = [
340 |   (master_doc, 'AutoFolio', u'AutoFolio Documentation',
341 |    author, 'AutoFolio', 'One line description of project.',
342 |    'Miscellaneous'),
343 | ]
344 | 
345 | # Documents to append as an appendix to all manuals.
346 | #texinfo_appendices = []
347 | 
348 | # If false, no module index is generated.
349 | #texinfo_domain_indices = True
350 | 
351 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
352 | #texinfo_show_urls = 'footnote'
353 | 
354 | # If true, do not generate a @detailmenu in the "Top" node's menu.
355 | #texinfo_no_detailmenu = False
356 | 


--------------------------------------------------------------------------------
/autofolio/autofolio.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import functools
  3 | import traceback
  4 | import random
  5 | from itertools import tee
  6 | import pickle
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import yaml
 11 | 
 12 | from ConfigSpace.configuration_space import Configuration, \
 13 |     ConfigurationSpace
 14 | from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
 15 |     UniformFloatHyperparameter, UniformIntegerHyperparameter
 16 | 
 17 | # SMAC3
 18 | from smac.tae.execute_func import ExecuteTAFuncDict
 19 | from smac.scenario.scenario import Scenario
 20 | from smac.stats.stats import Stats as AC_Stats
 21 | from smac.facade.smac_hpo_facade import SMAC4HPO as SMAC
 22 | 
 23 | from autofolio.io.cmd import CMDParser
 24 | from aslib_scenario.aslib_scenario import ASlibScenario
 25 | 
 26 | # feature preprocessing
 27 | from autofolio.feature_preprocessing.pca import PCAWrapper
 28 | from autofolio.feature_preprocessing.missing_values import ImputerWrapper
 29 | from autofolio.feature_preprocessing.feature_group_filtering import FeatureGroupFiltering
 30 | from autofolio.feature_preprocessing.standardscaler import StandardScalerWrapper
 31 | 
 32 | # presolving
 33 | from autofolio.pre_solving.aspeed_schedule import Aspeed
 34 | 
 35 | # classifiers
 36 | from autofolio.selector.classifiers.random_forest import RandomForest
 37 | from autofolio.selector.classifiers.xgboost import XGBoost
 38 | 
 39 | # regressors
 40 | from autofolio.selector.regressors.random_forest import RandomForestRegressor
 41 | 
 42 | # selectors
 43 | from autofolio.selector.pairwise_classification import PairwiseClassifier
 44 | from autofolio.selector.multi_classification import MultiClassifier
 45 | from autofolio.selector.ind_regression import IndRegression
 46 | from autofolio.selector.joint_regression import JointRegression
 47 | from autofolio.selector.pairwise_regression import PairwiseRegression
 48 | 
 49 | # validation
 50 | from autofolio.validation.validate import Validator, Stats
 51 | 
 52 | __author__ = "Marius Lindauer"
 53 | __license__ = "BSD"
 54 | __version__ = "2.2.0"
 55 | 
 56 | 
 57 | class AutoFolio(object):
 58 | 
 59 |     def __init__(self, random_seed: int=12345):
 60 |         ''' Constructor 
 61 | 
 62 |             Arguments
 63 |             ---------
 64 |             random_seed: int
 65 |                 random seed for numpy and random packages
 66 |         '''
 67 | 
 68 |         np.random.seed(random_seed)  # fix seed
 69 |         random.seed(random_seed)
 70 | 
 71 |         # I don't know the reason, but without an initial print with
 72 |         # logging.info we don't get any output
 73 |         logging.info("Init AutoFolio")
 74 |         self._root_logger = logging.getLogger()
 75 |         self.logger = logging.getLogger("AutoFolio")
 76 |         self.cs = None
 77 | 
 78 |         self.overwrite_args = None
 79 | 
 80 |     def run_cli(self):
 81 |         '''
 82 |             main method of AutoFolio based on command line interface
 83 |         '''
 84 | 
 85 |         cmd_parser = CMDParser()
 86 |         args_, self.overwrite_args = cmd_parser.parse()
 87 | 
 88 |         self._root_logger.setLevel(args_.verbose)
 89 | 
 90 |         if args_.load:
 91 |             pred = self.read_model_and_predict(
 92 |                 model_fn=args_.load, feature_vec=list(map(float, args_.feature_vec.split(" "))))
 93 |             print("Selected Schedule [(algorithm, budget)]: %s" % (pred))
 94 | 
 95 |         else:
 96 | 
 97 |             scenario = ASlibScenario()
 98 |             if args_.scenario:
 99 |                 scenario.read_scenario(args_.scenario)
100 |             elif args_.performance_csv and args_.feature_csv:
101 |                 scenario.read_from_csv(perf_fn=args_.performance_csv,
102 |                                        feat_fn=args_.feature_csv,
103 |                                        objective=args_.objective,
104 |                                        runtime_cutoff=args_.runtime_cutoff,
105 |                                        maximize=args_.maximize,
106 |                                        cv_fn=args_.cv_csv)
107 |             else:
108 |                 raise ValueError("Missing inputs to read scenario data.")
109 | 
110 |             test_scenario = None
111 |             if args_.performance_test_csv and args_.feature_test_csv:
112 |                 test_scenario = ASlibScenario()
113 |                 test_scenario.read_from_csv(perf_fn=args_.performance_test_csv,
114 |                                        feat_fn=args_.feature_test_csv,
115 |                                        objective=args_.objective,
116 |                                        runtime_cutoff=args_.runtime_cutoff,
117 |                                        maximize=args_.maximize,
118 |                                        cv_fn=None)
119 | 
120 |             config = {}
121 |             if args_.config is not None:
122 |                 self.logger.info("Reading yaml config file")
123 |                 config = yaml.load(open(args_.config))
124 |             if not config.get("wallclock_limit"):
125 |                 config["wallclock_limit"] = args_.wallclock_limit
126 |             if not config.get("runcount_limit"):
127 |                 config["runcount_limit"] = args_.runcount_limit
128 |             if not config.get("output-dir"):
129 |                 config["output-dir"] = args_.output_dir
130 | 
131 |             self.cs = self.get_cs(scenario, config)
132 | 
133 |             if args_.outer_cv:
134 |                 self._outer_cv(scenario, config, args_.outer_cv_fold, 
135 |                     args_.out_template, smac_seed=args_.smac_seed)
136 |                 return 0
137 |             
138 |             if args_.tune:
139 |                 config = self.get_tuned_config(scenario,
140 |                                                wallclock_limit=args_.wallclock_limit,
141 |                                                runcount_limit=args_.runcount_limit,
142 |                                                autofolio_config=config,
143 |                                                seed=args_.smac_seed)
144 |             else:
145 |                 config = self.cs.get_default_configuration()
146 |             self.logger.debug(config)
147 | 
148 |             if args_.save:
149 |                 feature_pre_pipeline, pre_solver, selector = self.fit(
150 |                     scenario=scenario, config=config)
151 |                 self._save_model(
152 |                     args_.save, scenario, feature_pre_pipeline, pre_solver, selector, config)
153 |             else:
154 |                 self.run_cv(config=config, scenario=scenario, folds=int(scenario.cv_data.max().max()))
155 | 
156 |             if test_scenario is not None:
157 |                 stats = self.run_fold(config=config,
158 |                                       fold=0,
159 |                                       return_fit=False,
160 |                                       scenario=scenario,
161 |                                       test_scenario=test_scenario)
162 | 
163 |     def _outer_cv(self, scenario: ASlibScenario, autofolio_config:dict=None, 
164 |             outer_cv_fold:int=None, out_template:str=None,
165 |             smac_seed:int=42):
166 |         '''
167 |             Evaluate on a scenario using an "outer" cross-fold validation
168 |             scheme. In particular, this ensures that SMAC does not use the test
169 |             set during hyperparameter optimization.
170 | 
171 |             Arguments
172 |             ---------
173 |             scenario: ASlibScenario
174 |                 ASlib Scenario at hand
175 |             
176 |             autofolio_config: dict, or None
177 |                 An optional dictionary of configuration options
178 | 
179 |             outer_cv_fold: int, or None
180 |                 If given, then only the single outer-cv fold is processed
181 | 
182 |             out_template: str, or None
183 |                 If given, the learned configurations are written to the 
184 |                 specified locations. The string is considered a template, and
185 |                 "%fold%" will be replaced with the fold.
186 | 
187 |             smac_seed:int 
188 |                 random seed for SMAC
189 | 
190 |             Returns
191 |             -------
192 |             stats: validate.Stats
193 |                 Performance over all outer-cv folds
194 | 
195 |         '''
196 |         import string
197 | 
198 |         outer_stats = None
199 | 
200 |         # For each outer split
201 |         outer_cv_folds = range(1, 11)
202 |         if outer_cv_fold is not None:
203 |             outer_cv_folds = range(outer_cv_fold, outer_cv_fold+1)
204 | 
205 |         for cv_fold in outer_cv_folds:
206 |             
207 |             # Use ‘ASlibScenario.get_split()’ to get the outer split
208 |             outer_testing, outer_training = scenario.get_split(cv_fold)
209 |             
210 |             msg = ">>>>> Outer CV fold: {} <<<<<".format(cv_fold)
211 |             self.logger.info(msg)
212 | 
213 |             # Use ASlibScenario.create_cv_splits() to get an inner-cv
214 |             outer_training.create_cv_splits(n_folds=10)
215 |             
216 |             # Use ‘AutoFolio.get_tuned_config()’ to tune on inner-cv
217 |             config = self.get_tuned_config(
218 |                 outer_training, 
219 |                 autofolio_config=autofolio_config,
220 |                 seed=smac_seed
221 |             )
222 |             
223 |             # Use `AutoFolio.run_fold()’ to get the performance on the outer split
224 |             stats, fit, schedule = self.run_fold(
225 |                 config, 
226 |                 scenario, 
227 |                 cv_fold, 
228 |                 return_fit=True
229 |             )
230 | 
231 |             feature_pre_pipeline, pre_solver, selector = fit
232 | 
233 |             if outer_stats is None:
234 |                 outer_stats = stats
235 |             else:
236 |                 outer_stats.merge(stats)
237 | 
238 |             # save the model, if given an output location
239 |             if out_template is not None:
240 |                 out_template_ = string.Template(out_template)
241 |                 model_fn = out_template_.substitute(fold=cv_fold, type="pkl")
242 |                 
243 |                 msg = "Writing model to: {}".format(model_fn)
244 |                 self.logger.info(msg)
245 | 
246 |                 self._save_model(
247 |                     model_fn, 
248 |                     scenario, 
249 |                     feature_pre_pipeline, 
250 |                     pre_solver, 
251 |                     selector, 
252 |                     config
253 |                 )
254 | 
255 |                 # convert the schedule to a data frame
256 |                 schedule_df = pd.Series(schedule, name="solver")
257 |                 schedule_df.index.name = "instance"
258 |                 schedule_df = schedule_df.reset_index()
259 | 
260 |                 # just keep the solver name; we don't care about the time
261 | 
262 |                 # x[0] gets the first pair in the schedule list
263 |                 # and x[0][0] gets the name of the solver from that pair
264 |                 schedule_df['solver'] = schedule_df['solver'].apply(lambda x: x[0][0])
265 | 
266 |                 selections_fn = out_template_.substitute(fold=cv_fold, type="csv")
267 | 
268 |                 msg = "Writing solver choices to: {}".format(selections_fn)
269 |                 self.logger.info(msg)
270 | 
271 |                 schedule_df.to_csv(selections_fn, index=False)
272 | 
273 |         self.logger.info(">>>>> Final Stats <<<<<")
274 |         outer_stats.show()
275 | 
276 |     def _save_model(self, out_fn: str, scenario: ASlibScenario, feature_pre_pipeline: list, pre_solver: Aspeed, selector, config: Configuration):
277 |         '''
278 |             save all pipeline objects for predictions
279 | 
280 |             Arguments
281 |             ---------
282 |             out_fn: str
283 |                 filename of output file
284 |             scenario: AslibScenario
285 |                 ASlib scenario with all the data
286 |             feature_pre_pipeline: list
287 |                 list of preprocessing objects
288 |             pre_solver: Aspeed
289 |                 aspeed object with pre-solving schedule
290 |             selector: autofolio.selector.*
291 |                 fitted selector object
292 |             config: Configuration
293 |                 parameter setting configuration
294 |         '''
295 |         scenario.logger = None
296 |         for fpp in feature_pre_pipeline:
297 |             fpp.logger = None
298 |         if pre_solver:
299 |             pre_solver.logger = None
300 |         selector.logger = None
301 |         model = [scenario, feature_pre_pipeline, pre_solver, selector, config]
302 |         with open(out_fn, "bw") as fp:
303 |             pickle.dump(model, fp)
304 | 
305 |     def read_model_and_predict(self, model_fn: str, feature_vec: list):
306 |         '''
307 |             reads saved model from disk and predicts the selected algorithm schedule for a given feature vector
308 | 
309 |             Arguments
310 |             --------
311 |             model_fn: str
312 |                 file name of saved model
313 |             feature_vec: list
314 |                 instance feature vector as a list of floats 
315 | 
316 |             Returns
317 |             -------
318 |             list of tuple
319 |                 Selected schedule [(algorithm, budget)]
320 |         '''
321 |         with open(model_fn, "br") as fp:
322 |             scenario, feature_pre_pipeline, pre_solver, selector, config = pickle.load(
323 |                 fp)
324 | 
325 |         for fpp in feature_pre_pipeline:
326 |             fpp.logger = logging.getLogger("Feature Preprocessing")
327 |         if pre_solver:
328 |             pre_solver.logger = logging.getLogger("Aspeed PreSolving")
329 |         selector.logger = logging.getLogger("Selector")
330 | 
331 |         # saved scenario is adapted to given feature vector
332 |         feature_vec = np.array([feature_vec])
333 |         scenario.feature_data = pd.DataFrame(
334 |             feature_vec, index=["pseudo_instance"], columns=scenario.features)
335 |         scenario.instances = ["pseudo_instance"]
336 |         pred = self.predict(scenario=scenario, config=config,
337 |                             feature_pre_pipeline=feature_pre_pipeline, pre_solver=pre_solver, selector=selector)
338 | 
339 |         return pred["pseudo_instance"]
340 | 
341 |     def get_cs(self, scenario: ASlibScenario, autofolio_config:dict=None):
342 |         '''
343 |             returns the parameter configuration space of AutoFolio
344 |             (based on the automl config space: https://github.com/automl/ConfigSpace)
345 | 
346 |             Arguments
347 |             ---------
348 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
349 |                 aslib scenario at hand
350 | 
351 |             autofolio_config: dict, or None
352 |                 An optional dictionary of configuration options
353 |         '''
354 | 
355 |         self.cs = ConfigurationSpace()
356 | 
357 |         # only allow the feature groups specified in the config file
358 |         # by default, though, all of the feature groups are allowed.
359 |         allowed_feature_groups = autofolio_config.get("allowed_feature_groups", 
360 |             scenario.feature_steps)
361 | 
362 |         if len(allowed_feature_groups) == 0:
363 |             msg = "Please ensure at least one feature group is allowed"
364 |             raise ValueError(msg)
365 | 
366 | 
367 |         if len(allowed_feature_groups) == 1: 
368 |             choices = [True] # if we only have one feature group, it has to be active 
369 |         else:
370 |             choices = [True, False]
371 |         default = True
372 | 
373 |         for fs in allowed_feature_groups:
374 |             
375 |             fs_param = CategoricalHyperparameter(name="fgroup_%s" % (fs),
376 |                 choices=choices, default_value=default)
377 |             self.cs.add_hyperparameter(fs_param)
378 | 
379 |         # preprocessing
380 |         if autofolio_config.get("pca", True):
381 |             PCAWrapper.add_params(self.cs)
382 | 
383 |         if autofolio_config.get("impute", True):
384 |             ImputerWrapper.add_params(self.cs)
385 | 
386 |         if autofolio_config.get("scale", True):
387 |             StandardScalerWrapper.add_params(self.cs)
388 | 
389 |         # Pre-Solving
390 |         if scenario.performance_type[0] == "runtime":
391 |             if autofolio_config.get("presolve", True):
392 |                 Aspeed.add_params(
393 |                     cs=self.cs, cutoff=scenario.algorithm_cutoff_time)
394 | 
395 |         if autofolio_config.get("classifier"):
396 |             # fix parameter
397 |             cls_choices = [autofolio_config["classifier"]]
398 |             cls_def = autofolio_config["classifier"]
399 |         else:
400 |             cls_choices = ["RandomForest","XGBoost"]
401 |             cls_def = "RandomForest"
402 |         classifier = CategoricalHyperparameter(
403 |                 "classifier", choices=cls_choices, 
404 |                 default_value=cls_def)
405 | 
406 |         self.cs.add_hyperparameter(classifier)
407 | 
408 |         RandomForest.add_params(self.cs)
409 |         XGBoost.add_params(self.cs)
410 | 
411 |         if autofolio_config.get("regressor"):
412 |             # fix parameter
413 |             reg_choices = [autofolio_config["regressor"]]
414 |             reg_def = autofolio_config["regressor"]
415 |         else:
416 |             reg_choices = ["RandomForestRegressor"]
417 |             reg_def = "RandomForestRegressor"
418 | 
419 |         regressor = CategoricalHyperparameter(
420 |                 "regressor", choices=reg_choices, default_value=reg_def)
421 |         self.cs.add_hyperparameter(regressor)
422 |         RandomForestRegressor.add_params(self.cs)
423 | 
424 |         # selectors
425 |         if autofolio_config.get("selector"):
426 |             # fix parameter
427 |             sel_choices = [autofolio_config["selector"]]
428 |             sel_def = autofolio_config["selector"]
429 |         else:
430 |             sel_choices = ["PairwiseClassifier","PairwiseRegressor"]
431 |             sel_def = "PairwiseClassifier"
432 |             
433 |         selector = CategoricalHyperparameter(
434 |                 "selector", choices=sel_choices, default_value=sel_def)
435 |         self.cs.add_hyperparameter(selector)
436 |         PairwiseClassifier.add_params(self.cs)
437 |         PairwiseRegression.add_params(self.cs)  
438 | 
439 |         self.logger.debug(self.cs)
440 | 
441 |         return self.cs
442 | 
443 |     def get_tuned_config(self, scenario: ASlibScenario, 
444 |                          runcount_limit:int=42,
445 |                          wallclock_limit:int=300,
446 |                          autofolio_config:dict=dict(),
447 |                          seed:int=42):
448 |         '''
449 |             uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario
450 | 
451 |             Arguments
452 |             ---------
453 |             scenario: ASlibScenario
454 |                 ASlib Scenario at hand
455 |             runcount_limit: int
456 |                 runcount_limit for SMAC scenario
457 |             wallclock_limit: int
458 |                 wallclock limit in sec for SMAC scenario
459 |                 (overwritten by autofolio_config)
460 |             autofolio_config: dict, or None
461 |                 An optional dictionary of configuration options
462 |             seed: int
463 |                 random seed for SMAC
464 | 
465 |             Returns
466 |             -------
467 |             Configuration
468 |                 best incumbent configuration found by SMAC
469 |         '''
470 | 
471 |         wallclock_limit = autofolio_config.get("wallclock_limit", wallclock_limit)
472 |         runcount_limit = autofolio_config.get("runcount_limit", runcount_limit)
473 | 
474 |         taf = functools.partial(self.called_by_smac, scenario=scenario)
475 |         max_fold = scenario.cv_data.max().max()
476 |         max_fold = int(max_fold)
477 | 
478 |         ac_scenario = Scenario({"run_obj": "quality",  # we optimize quality
479 |                                 "runcount-limit": runcount_limit,
480 |                                 "cs": self.cs,  # configuration space
481 |                                 "deterministic": "true",
482 |                                 "instances": [[str(i)] for i in range(1, max_fold+1)],
483 |                                 "wallclock-limit": wallclock_limit,
484 |                                 "output-dir" : "" if not autofolio_config.get("output-dir",None) else autofolio_config.get("output-dir") 
485 |                                 })
486 | 
487 |         # necessary to use stats options related to scenario information
488 |         AC_Stats.scenario = ac_scenario
489 | 
490 |         # Optimize
491 |         self.logger.info(
492 |             ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
493 |         self.logger.info("Start Configuration")
494 |         self.logger.info(
495 |             ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
496 |         smac = SMAC(scenario=ac_scenario, tae_runner=taf,
497 |                     rng=np.random.RandomState(seed))
498 |         incumbent = smac.optimize()
499 | 
500 |         self.logger.info("Final Incumbent: %s" % (incumbent))
501 | 
502 |         return incumbent
503 | 
504 |     def called_by_smac(self, config: Configuration, scenario: ASlibScenario, instance:str=None, seed:int=1):
505 |         '''
506 |             run a cross fold validation based on the given data from cv.arff
507 | 
508 |             Arguments
509 |             ---------
510 |             config: Configuration
511 |                 parameter configuration to use for preprocessing
512 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
513 |                 aslib scenario at hand
514 |             instance: str
515 |                 cv-fold index 
516 |             seed: int
517 |                 random seed (not used)
518 |                 
519 |             Returns
520 |             -------
521 |             float: average performance
522 |         '''
523 |         
524 |         if instance is None:
525 |             perf = self.run_cv(config=config, scenario=scenario)
526 |         else:
527 |             try:
528 |                 stats = self.run_fold(config=config, scenario=scenario, fold=int(instance))
529 |                 perf = stats.show()
530 |             except ValueError:
531 |                 if scenario.performance_type[0] == "runtime":
532 |                     perf = scenario.algorithm_cutoff_time * 20
533 |                 else:
534 |                     # try to impute a worst case perf
535 |                     perf = scenario.performance_data.max().max()
536 |                 
537 |         if scenario.maximize[0]:
538 |             perf *= -1
539 |         
540 |         return perf
541 | 
542 |     def run_cv(self, config: Configuration, scenario: ASlibScenario, folds:int=10):
543 |         '''
544 |             run a cross fold validation based on the given data from cv.arff
545 | 
546 |             Arguments
547 |             ---------
548 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
549 |                 aslib scenario at hand
550 |             config: Configuration
551 |                 parameter configuration to use for preprocessing
552 |             folds: int
553 |                 number of cv-splits
554 |             seed: int
555 |                 random seed (not used)
556 |         '''
557 |         #TODO: use seed and instance in an appropriate way
558 |         try:
559 |             if scenario.performance_type[0] == "runtime":
560 |                 cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time)
561 |             else:
562 |                 cv_stat = Stats(runtime_cutoff=0)
563 |             for i in range(1, folds + 1):
564 |                 self.logger.info("CV-Iteration: %d" % (i))
565 |                 stats = self.run_fold(config=config,
566 |                                       scenario=scenario,
567 |                                       fold=i)
568 |                 cv_stat.merge(stat=stats)
569 | 
570 |             self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
571 |             self.logger.info("CV Stats")
572 |             par10 = cv_stat.show()
573 |         except ValueError:
574 |             traceback.print_exc()
575 |             par10 = scenario.algorithm_cutoff_time * 10
576 | 
577 |         if scenario.maximize[0]:
578 |             par10 *= -1
579 | 
580 |         return par10
581 | 
582 |     def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int, test_scenario=None, return_fit:bool=False):
583 |         '''
584 |             run a given fold of cross validation
585 |             
586 |             Arguments
587 |             ---------
588 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
589 |                 aslib scenario at hand
590 |             config: Configuration
591 |                 parameter configuration to use for preprocessing
592 |             fold: int
593 |                 fold id
594 |             test_scenario:aslib_scenario.aslib_scenario.ASlibScenario
595 |                 aslib scenario with test data for validation
596 |                 generated from <scenario> if None
597 | 
598 |             return_fit: bool
599 |                 optionally, the learned preprocessing options, presolver and
600 |                 selector can be returned
601 |                 
602 |             Returns
603 |             -------
604 |             Stats()
605 | 
606 |             (pre_pipeline, pre_solver, selector):
607 |                 only present if return_fit is True
608 |                 the pipeline components fit with the configuration options
609 | 
610 |             schedule: dict of string -> list of (solver, cutoff) pairs
611 |                 only present if return_fit is True
612 |                 the solver choices for each instance
613 |                 
614 |                 
615 |         '''
616 | 
617 |         if test_scenario is None:
618 |             self.logger.info("CV-Iteration: %d" % (fold))
619 |             test_scenario, training_scenario = scenario.get_split(indx=fold)
620 |         else:
621 |             self.logger.info("Validation on test data")
622 |             training_scenario = scenario
623 | 
624 |         feature_pre_pipeline, pre_solver, selector = self.fit(
625 |             scenario=training_scenario, config=config)
626 | 
627 |         schedules = self.predict(
628 |             test_scenario, config, feature_pre_pipeline, pre_solver, selector)
629 | 
630 |         val = Validator()
631 |         if scenario.performance_type[0] == "runtime":
632 |             stats = val.validate_runtime(
633 |                 schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
634 |         elif scenario.performance_type[0] == "solution_quality":
635 |             stats = val.validate_quality(
636 |                 schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
637 |         else:
638 |             raise ValueError("Unknown: %s" %(scenario.performance_type[0]))
639 |         
640 |         if return_fit:
641 |             return stats, (feature_pre_pipeline, pre_solver, selector), schedules
642 |         else:
643 |             return stats
644 | 
645 |     def fit(self, scenario: ASlibScenario, config: Configuration):
646 |         '''
647 |             fit AutoFolio on given ASlib Scenario
648 | 
649 |             Arguments
650 |             ---------
651 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
652 |                 aslib scenario at hand
653 |             config: Configuration
654 |                 parameter configuration to use for preprocessing
655 | 
656 |             Returns
657 |             -------
658 |                 list of fitted feature preproccessing objects
659 |                 pre-solving object
660 |                 fitted selector
661 |         '''
662 |         self.logger.info("Given Configuration: %s" % (config))
663 | 
664 |         if self.overwrite_args:
665 |             config = self._overwrite_configuration(
666 |                 config=config, overwrite_args=self.overwrite_args)
667 |             self.logger.info("Overwritten Configuration: %s" % (config))
668 | 
669 |         scenario, feature_pre_pipeline = self.fit_transform_feature_preprocessing(
670 |             scenario, config)
671 | 
672 |         pre_solver = self.fit_pre_solving(scenario, config)
673 | 
674 |         selector = self.fit_selector(scenario, config)
675 | 
676 |         return feature_pre_pipeline, pre_solver, selector
677 | 
678 |     def _overwrite_configuration(self, config: Configuration, overwrite_args: list):
679 |         '''
680 |             overwrites a given configuration with some new settings
681 | 
682 |             Arguments
683 |             ---------
684 |             config: Configuration
685 |                 initial configuration to be adapted
686 |             overwrite_args: list
687 |                 new parameter settings as a list of strings
688 | 
689 |             Returns
690 |             -------
691 |             Configuration
692 |         '''
693 | 
694 |         def pairwise(iterable):
695 |             a, b = tee(iterable)
696 |             next(b, None)
697 |             return zip(a, b)
698 | 
699 |         dict_conf = config.get_dictionary()
700 |         for param, value in pairwise(overwrite_args):
701 |             try:
702 |                 ok = self.cs.get_hyperparameter(param)
703 |             except KeyError:
704 |                 ok = None
705 |             if ok is not None:
706 |                 if type(self.cs.get_hyperparameter(param)) is UniformIntegerHyperparameter:
707 |                     dict_conf[param] = int(value)
708 |                 elif type(self.cs.get_hyperparameter(param)) is UniformFloatHyperparameter:
709 |                     dict_conf[param] = float(value)
710 |                 elif value == "True":
711 |                     dict_conf[param] = True
712 |                 elif value == "False":
713 |                     dict_conf[param] = False
714 |                 else:
715 |                     dict_conf[param] = value
716 |             else:
717 |                 self.logger.warn(
718 |                     "Unknown given parameter: %s %s" % (param, value))
719 |         config = Configuration(self.cs, values=dict_conf, allow_inactive_with_values=True)
720 | 
721 |         return config
722 | 
723 |     def fit_transform_feature_preprocessing(self, scenario: ASlibScenario, config: Configuration):
724 |         '''
725 |             performs feature preprocessing on a given ASlib scenario wrt to a given configuration
726 | 
727 |             Arguments
728 |             ---------
729 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
730 |                 aslib scenario at hand
731 |             config: Configuration
732 |                 parameter configuration to use for preprocessing
733 | 
734 |             Returns
735 |             -------
736 |                 list of fitted feature preproccessing objects
737 |         '''
738 | 
739 |         pipeline = []
740 |         fgf = FeatureGroupFiltering()
741 |         scenario = fgf.fit_transform(scenario, config)
742 | 
743 |         imputer = ImputerWrapper()
744 |         scenario = imputer.fit_transform(scenario, config)
745 | 
746 |         scaler = StandardScalerWrapper()
747 |         scenario = scaler.fit_transform(scenario, config)
748 | 
749 |         pca = PCAWrapper()
750 |         scenario = pca.fit_transform(scenario, config)
751 | 
752 |         return scenario, [fgf, imputer, scaler, pca]
753 | 
754 |     def fit_pre_solving(self, scenario: ASlibScenario, config: Configuration):
755 |         '''
756 |             fits an pre-solving schedule using Aspeed [Hoos et al, 2015 TPLP) 
757 | 
758 |             Arguments
759 |             ---------
760 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
761 |                 aslib scenario at hand
762 |             config: Configuration
763 |                 parameter configuration to use for preprocessing
764 | 
765 |             Returns
766 |             -------
767 |             instance of Aspeed() with a fitted pre-solving schedule if performance_type of scenario is runtime; else None
768 |         '''
769 |         if scenario.performance_type[0] == "runtime":
770 |             aspeed = Aspeed()
771 |             aspeed.fit(scenario=scenario, config=config)
772 |             return aspeed
773 |         else:
774 |             return None
775 | 
776 |     def fit_selector(self, scenario: ASlibScenario, config: Configuration):
777 |         '''
778 |             fits an algorithm selector for a given scenario wrt a given configuration
779 | 
780 |             Arguments
781 |             ---------
782 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
783 |                 aslib scenario at hand
784 |             config: Configuration
785 |                 parameter configuration
786 |         '''
787 | 
788 |         if config.get("selector") == "PairwiseClassifier":
789 |             clf_class = None
790 |             if config.get("classifier") == "RandomForest":
791 |                 clf_class = RandomForest
792 |             if config.get("classifier") == "XGBoost":
793 |                 clf_class = XGBoost
794 | 
795 |             selector = PairwiseClassifier(classifier_class=clf_class)
796 |             selector.fit(scenario=scenario, config=config)
797 | 
798 |         if config.get("selector") == "MultiClassifier":
799 |             clf_class = None
800 |             if config.get("classifier") == "RandomForest":
801 |                 clf_class = RandomForest
802 |             if config.get("classifier") == "XGBoost":
803 |                 clf_class = XGBoost
804 | 
805 |             selector = MultiClassifier(classifier_class=clf_class)
806 |             selector.fit(scenario=scenario, config=config)
807 | 
808 |         if config.get("selector") == "IndRegressor":
809 |             reg_class = None
810 |             if config.get("regressor") == "RandomForestRegressor":
811 |                 reg_class = RandomForestRegressor
812 |                 
813 |             selector = IndRegression(regressor_class=reg_class)
814 |             selector.fit(scenario=scenario, config=config)
815 |             
816 |         if config.get("selector") == "JointRegressor":
817 |             reg_class = None
818 |             if config.get("regressor") == "RandomForestRegressor":
819 |                 reg_class = RandomForestRegressor
820 |                 
821 |             selector = JointRegression(regressor_class=reg_class)
822 |             selector.fit(scenario=scenario, config=config)
823 | 
824 |         if config.get("selector") == "PairwiseRegressor":
825 |             reg_class = None
826 |             if config.get("regressor") == "RandomForestRegressor":
827 |                 reg_class = RandomForestRegressor
828 |                 
829 |             selector = PairwiseRegression(regressor_class=reg_class)
830 |             selector.fit(scenario=scenario, config=config)
831 | 
832 |         return selector
833 | 
834 |     def predict(self, scenario: ASlibScenario, config: Configuration, feature_pre_pipeline: list, pre_solver: Aspeed, selector):
835 |         '''
836 |             predicts algorithm schedules wrt a given config
837 |             and given pipelines
838 | 
839 |             Arguments
840 |             ---------
841 |             scenario: aslib_scenario.aslib_scenario.ASlibScenario
842 |                 aslib scenario at hand
843 |             config: Configuration
844 |                 parameter configuration
845 |             feature_pre_pipeline: list
846 |                 list of fitted feature preprocessors
847 |             pre_solver: Aspeed
848 |                 pre solver object with a saved static schedule
849 |             selector: autofolio.selector.*
850 |                 fitted selector object
851 |         '''
852 | 
853 |         self.logger.info("Predict on Test")
854 |         for f_pre in feature_pre_pipeline:
855 |             scenario = f_pre.transform(scenario)
856 | 
857 |         if pre_solver:
858 |             pre_solving_schedule = pre_solver.predict(scenario=scenario)
859 |         else:
860 |             pre_solving_schedule = {}
861 | 
862 |         pred_schedules = selector.predict(scenario=scenario)
863 | 
864 |         # combine schedules
865 |         if pre_solving_schedule:
866 |             return dict((inst, pre_solving_schedule.get(inst, []) + schedule) for inst, schedule in pred_schedules.items())
867 |         else:
868 |             return pred_schedules
869 | 
870 | 
871 | def main():
872 |     af = AutoFolio()
873 |     af.run_cli()
874 | 
875 | 
876 | if __name__ == "__main__":
877 |     main()
878 | 


--------------------------------------------------------------------------------