├── boexplain ├── optuna │ ├── __init__.py │ ├── optuna │ │ ├── .DS_Store │ │ ├── pruners │ │ │ ├── .DS_Store │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── nop.py │ │ ├── samplers │ │ │ ├── .DS_Store │ │ │ ├── tpe │ │ │ │ ├── __init__.py │ │ │ │ ├── parzen_estimator.py │ │ │ │ └── sampler.py │ │ │ ├── __init__.py │ │ │ ├── random.py │ │ │ ├── _search_space.py │ │ │ └── base.py │ │ ├── storages │ │ │ ├── __init__.py │ │ │ ├── in_memory.py │ │ │ └── base.py │ │ ├── trial │ │ │ ├── __init__.py │ │ │ ├── _util.py │ │ │ ├── _state.py │ │ │ ├── _base.py │ │ │ ├── _fixed.py │ │ │ └── _frozen.py │ │ ├── _study_direction.py │ │ ├── __init__.py │ │ ├── exceptions.py │ │ ├── progress_bar.py │ │ ├── logging.py │ │ ├── structs.py │ │ ├── distributions.py │ │ └── study.py │ └── setup.py ├── __init__.py └── files │ ├── __init__.py │ ├── cat_xform.py │ ├── search.py │ └── stats.py ├── data ├── credit_test.csv.zip ├── credit_labels.csv.zip ├── credit_record_train.csv.zip └── application_record_train.csv.zip ├── docs ├── source │ ├── api_reference │ │ ├── boexplain.files.search.rst │ │ └── boexplain.rst │ ├── index.rst │ └── conf.py ├── Makefile └── make.bat ├── pyproject.toml ├── LICENSE └── README.md /boexplain/optuna/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boexplain/__init__.py: -------------------------------------------------------------------------------- 1 | from .files import fmin, fmax 2 | 3 | __all__ = ["fmin", "fmax"] -------------------------------------------------------------------------------- /boexplain/files/__init__.py: -------------------------------------------------------------------------------- 1 | from .search import fmin, fmax 2 | 3 | __all__ = ["fmin", "fmax"] -------------------------------------------------------------------------------- /data/credit_test.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_test.csv.zip -------------------------------------------------------------------------------- /data/credit_labels.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_labels.csv.zip -------------------------------------------------------------------------------- /boexplain/optuna/optuna/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/.DS_Store -------------------------------------------------------------------------------- /data/credit_record_train.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_record_train.csv.zip -------------------------------------------------------------------------------- /data/application_record_train.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/application_record_train.csv.zip -------------------------------------------------------------------------------- /boexplain/optuna/optuna/pruners/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/pruners/.DS_Store -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/samplers/.DS_Store -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/tpe/__init__.py: -------------------------------------------------------------------------------- 1 | # from optuna.samplers.tpe.sampler import TPESampler # NOQA 2 | from .sampler import TPESampler # NOQA -------------------------------------------------------------------------------- /docs/source/api_reference/boexplain.files.search.rst: -------------------------------------------------------------------------------- 1 | .. _boexplain.files.search: 2 | 3 | boexplain.files.search 4 | ========================================= 5 | 6 | .. _boexplain_doc: 7 | 8 | BOExplain API 9 | ------------- 10 | 11 | .. automodule:: boexplain.files.search 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/api_reference/boexplain.rst: -------------------------------------------------------------------------------- 1 | .. _api_reference: 2 | 3 | .. _reference: 4 | 5 | ============= 6 | API Reference 7 | ============= 8 | 9 | This section contains the public API reference for DataPrep. It is 10 | auto-generated from the docstrings in the project source code. 11 | 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | 16 | boexplain.files.search -------------------------------------------------------------------------------- /boexplain/optuna/optuna/pruners/__init__.py: -------------------------------------------------------------------------------- 1 | # import optuna 2 | # from optuna.pruners.base import BasePruner 3 | # from optuna.pruners.nop import NopPruner 4 | from ... import optuna 5 | from .base import BasePruner 6 | from .nop import NopPruner 7 | 8 | 9 | def _filter_study( 10 | study: "optuna.study.Study", trial: "optuna.trial.FrozenTrial" 11 | ) -> "optuna.study.Study": 12 | return study 13 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/storages/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Union # NOQA 2 | 3 | # from optuna.storages.base import BaseStorage 4 | # from optuna.storages.in_memory import InMemoryStorage 5 | from .base import BaseStorage 6 | from .in_memory import InMemoryStorage 7 | 8 | def get_storage(storage): 9 | # type: (Union[None, str, BaseStorage]) -> BaseStorage 10 | if storage is None: 11 | return InMemoryStorage() 12 | else: 13 | return storage 14 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/__init__.py: -------------------------------------------------------------------------------- 1 | # from optuna.trial._base import BaseTrial # NOQA 2 | # from optuna.trial._fixed import FixedTrial # NOQA 3 | # from optuna.trial._frozen import FrozenTrial # NOQA 4 | # from optuna.trial._state import TrialState # NOQA 5 | # from optuna.trial._trial import Trial # NOQA 6 | from ._base import BaseTrial # NOQA 7 | from ._fixed import FixedTrial # NOQA 8 | from ._frozen import FrozenTrial # NOQA 9 | from ._state import TrialState # NOQA 10 | from ._trial import Trial # NOQA -------------------------------------------------------------------------------- /boexplain/optuna/optuna/_study_direction.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class StudyDirection(enum.Enum): 5 | """Direction of a :class:`~optuna.study.Study`. 6 | 7 | Attributes: 8 | NOT_SET: 9 | Direction has not been set. 10 | MINIMIZE: 11 | :class:`~optuna.study.Study` minimizes the objective function. 12 | MAXIMIZE: 13 | :class:`~optuna.study.Study` maximizes the objective function. 14 | """ 15 | 16 | NOT_SET = 0 17 | MINIMIZE = 1 18 | MAXIMIZE = 2 19 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. BOExplain documentation master file, created by 2 | sphinx-quickstart on Mon Feb 8 14:21:13 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to BOExplain's documentation! 7 | ===================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | api_reference/boexplain 14 | 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # from optuna.samplers._search_space import intersection_search_space # NOQA 2 | # from optuna.samplers._search_space import IntersectionSearchSpace # NOQA 3 | # from optuna.samplers.base import BaseSampler # NOQA 4 | # from optuna.samplers.random import RandomSampler # NOQA 5 | # from optuna.samplers.tpe import TPESampler # NOQA 6 | from ._search_space import intersection_search_space # NOQA 7 | from ._search_space import IntersectionSearchSpace # NOQA 8 | from .base import BaseSampler # NOQA 9 | from .random import RandomSampler # NOQA 10 | from .tpe import TPESampler # NOQA -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /boexplain/files/cat_xform.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | 5 | def individual_contribution(df, objective, cat_cols, **kwargs): 6 | # dictionary of dictionaries, one dictionary for each column 7 | # dictinary keys are the categorical values and the values are the individual contribution 8 | # for each value in the column, compute the individual contribution of that column 9 | # ie, remove tuples satisfying the single-clause predicate 'col=val', 10 | # and compute the objective function with this data 11 | 12 | cat_val_to_indiv_cont = { 13 | col: {val: objective(df[df[col] != val], **kwargs) for val in df[col].unique()} 14 | for col in cat_cols 15 | } 16 | 17 | return cat_val_to_indiv_cont 18 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/_util.py: -------------------------------------------------------------------------------- 1 | import decimal 2 | 3 | # from optuna import logging 4 | from .. import logging 5 | 6 | _logger = logging.get_logger(__name__) 7 | 8 | 9 | def _adjust_discrete_uniform_high(name, low, high, q): 10 | # type: (str, float, float, float) -> float 11 | 12 | d_high = decimal.Decimal(str(high)) 13 | d_low = decimal.Decimal(str(low)) 14 | d_q = decimal.Decimal(str(q)) 15 | 16 | d_r = d_high - d_low 17 | 18 | if d_r % d_q != decimal.Decimal("0"): 19 | high = float((d_r // d_q) * d_q + d_low) 20 | _logger.warning( 21 | "The range of parameter `{}` is not divisible by `q`, and is " 22 | "replaced by [{}, {}].".format(name, low, high) 23 | ) 24 | 25 | return high 26 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "boexplain" 3 | version = "0.1.1" 4 | description = "BOExplain" 5 | authors = ["Brandon Lockhart "] 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/sfu-db/BOExplain" 9 | homepage = "https://github.com/sfu-db/BOExplain" 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.9" 13 | pandas = "1.2.1" 14 | numpy = "1.20.0" 15 | scipy = "1.6.0" 16 | scikit-learn = "0.24.1" 17 | altair = "4.1.0" 18 | imblearn = "0.0" 19 | tqdm = "4.51.0" 20 | colorlog = "4.4.0" 21 | numpyencoder = "0.3.0" 22 | 23 | [tool.poetry.dev-dependencies] 24 | black = "^19.10b0" 25 | jupyter = "^1" 26 | ipykernel = "^5" 27 | 28 | [tool.black] 29 | line-length = 99 30 | target-version = ['py38'] 31 | 32 | [build-system] 33 | requires = ["poetry>=1"] 34 | build-backend = "poetry.masonry.api" 35 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import types 3 | from typing import Any 4 | 5 | # from optuna import distributions # NOQA 6 | # from optuna import exceptions # NOQA 7 | # from optuna import logging # NOQA 8 | # from optuna import pruners # NOQA 9 | # from optuna import samplers # NOQA 10 | # from optuna import storages # NOQA 11 | # from optuna import study # NOQA 12 | # from optuna import trial # NOQA 13 | from . import distributions # NOQA 14 | from . import exceptions # NOQA 15 | from . import logging # NOQA 16 | from . import pruners # NOQA 17 | from . import samplers # NOQA 18 | from . import storages # NOQA 19 | from . import study # NOQA 20 | from . import trial # NOQA 21 | 22 | from .study import create_study # NOQA 23 | from .study import Study # NOQA 24 | from .trial import Trial # NOQA 25 | # from study import create_study # NOQA 26 | # from study import Study # NOQA 27 | # from trial import Trial # NOQA -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/_state.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class TrialState(enum.Enum): 5 | """State of a :class:`~optuna.trial.Trial`. 6 | 7 | Attributes: 8 | RUNNING: 9 | The :class:`~optuna.trial.Trial` is running. 10 | COMPLETE: 11 | The :class:`~optuna.trial.Trial` has been finished without any error. 12 | PRUNED: 13 | The :class:`~optuna.trial.Trial` has been pruned with 14 | :class:`~optuna.exceptions.TrialPruned`. 15 | FAIL: 16 | The :class:`~optuna.trial.Trial` has failed due to an uncaught error. 17 | """ 18 | 19 | RUNNING = 0 20 | COMPLETE = 1 21 | PRUNED = 2 22 | FAIL = 3 23 | WAITING = 4 24 | 25 | def __repr__(self): 26 | # type: () -> str 27 | 28 | return str(self) 29 | 30 | def is_finished(self): 31 | # type: () -> bool 32 | 33 | return self != TrialState.RUNNING and self != TrialState.WAITING 34 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/pruners/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class BasePruner(object, metaclass=abc.ABCMeta): 5 | """Base class for pruners.""" 6 | 7 | @abc.abstractmethod 8 | def prune(self, study, trial): 9 | # type: (Study, FrozenTrial) -> bool 10 | """Judge whether the trial should be pruned based on the reported values. 11 | 12 | Note that this method is not supposed to be called by library users. Instead, 13 | :func:`optuna.trial.Trial.report` and :func:`optuna.trial.Trial.should_prune` provide 14 | user interfaces to implement pruning mechanism in an objective function. 15 | 16 | Args: 17 | study: 18 | Study object of the target study. 19 | trial: 20 | FrozenTrial object of the target trial. 21 | 22 | Returns: 23 | A boolean value representing whether the trial should be pruned. 24 | """ 25 | 26 | raise NotImplementedError 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021, Brandon Lockhart 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /boexplain/optuna/optuna/pruners/nop.py: -------------------------------------------------------------------------------- 1 | # from optuna.pruners import BasePruner 2 | from . import BasePruner 3 | 4 | 5 | class NopPruner(BasePruner): 6 | """Pruner which never prunes trials. 7 | 8 | Example: 9 | 10 | .. testsetup:: 11 | 12 | import numpy as np 13 | from sklearn.model_selection import train_test_split 14 | 15 | np.random.seed(seed=0) 16 | X = np.random.randn(200).reshape(-1, 1) 17 | y = np.where(X[:, 0] < 0.5, 0, 1) 18 | X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0) 19 | classes = np.unique(y) 20 | 21 | .. testcode:: 22 | 23 | import optuna 24 | from sklearn.linear_model import SGDClassifier 25 | 26 | def objective(trial): 27 | alpha = trial.suggest_uniform('alpha', 0.0, 1.0) 28 | clf = SGDClassifier(alpha=alpha) 29 | n_train_iter = 100 30 | 31 | for step in range(n_train_iter): 32 | clf.partial_fit(X_train, y_train, classes=classes) 33 | 34 | intermediate_value = clf.score(X_valid, y_valid) 35 | trial.report(intermediate_value, step) 36 | 37 | if trial.should_prune(): 38 | assert False, "should_prune() should always return False with this pruner." 39 | raise optuna.exceptions.TrialPruned() 40 | 41 | return clf.score(X_valid, y_valid) 42 | 43 | study = optuna.create_study(direction='maximize', 44 | pruner=optuna.pruners.NopPruner()) 45 | study.optimize(objective, n_trials=20) 46 | """ 47 | 48 | def prune(self, study, trial): 49 | # type: (Study, FrozenTrial) -> bool 50 | 51 | return False 52 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | sys.path.insert(0, os.path.abspath("../../")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'BOExplain' 22 | copyright = '2021, Brandon Lockhart' 23 | author = 'Brandon Lockhart' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = '0.1.0' 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 'sphinx.ext.autodoc' 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # List of patterns, relative to source directory, that match files and 41 | # directories to ignore when looking for source files. 42 | # This pattern also affects html_static_path and html_extra_path. 43 | exclude_patterns = [] 44 | 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | 48 | # The theme to use for HTML and HTML Help pages. See the documentation for 49 | # a list of builtin themes. 50 | # 51 | 52 | html_context = { 53 | 'AUTHOR': author, 54 | 'DESCRIPTION': 'BOExplain, documentation site.', 55 | 'SITEMAP_BASE_URL': 'https://sfu-db.github.io/BOExplain/', # Trailing slash is needed 56 | 'VERSION': release, 57 | } 58 | 59 | html_theme = 'alabaster' 60 | 61 | # Add any paths that contain custom static files (such as style sheets) here, 62 | # relative to this directory. They are copied after the builtin static files, 63 | # so a file named "default.css" will overwrite the builtin "default.css". 64 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /boexplain/optuna/optuna/exceptions.py: -------------------------------------------------------------------------------- 1 | class OptunaError(Exception): 2 | """Base class for Optuna specific errors.""" 3 | 4 | pass 5 | 6 | 7 | class TrialPruned(OptunaError): 8 | """Exception for pruned trials. 9 | 10 | This error tells a trainer that the current :class:`~optuna.trial.Trial` was pruned. It is 11 | supposed to be raised after :func:`optuna.trial.Trial.should_prune` as shown in the following 12 | example. 13 | 14 | Example: 15 | 16 | .. testsetup:: 17 | 18 | import numpy as np 19 | from sklearn.model_selection import train_test_split 20 | 21 | np.random.seed(seed=0) 22 | X = np.random.randn(200).reshape(-1, 1) 23 | y = np.where(X[:, 0] < 0.5, 0, 1) 24 | X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0) 25 | classes = np.unique(y) 26 | 27 | .. testcode:: 28 | 29 | import optuna 30 | from sklearn.linear_model import SGDClassifier 31 | 32 | def objective(trial): 33 | alpha = trial.suggest_uniform('alpha', 0.0, 1.0) 34 | clf = SGDClassifier(alpha=alpha) 35 | n_train_iter = 100 36 | 37 | for step in range(n_train_iter): 38 | clf.partial_fit(X_train, y_train, classes=classes) 39 | 40 | intermediate_value = clf.score(X_valid, y_valid) 41 | trial.report(intermediate_value, step) 42 | 43 | if trial.should_prune(): 44 | raise optuna.exceptions.TrialPruned() 45 | 46 | return clf.score(X_valid, y_valid) 47 | 48 | study = optuna.create_study(direction='maximize') 49 | study.optimize(objective, n_trials=20) 50 | """ 51 | 52 | pass 53 | 54 | 55 | class CLIUsageError(OptunaError): 56 | """Exception for CLI. 57 | 58 | CLI raises this exception when it receives invalid configuration. 59 | """ 60 | 61 | pass 62 | 63 | 64 | class StorageInternalError(OptunaError): 65 | """Exception for storage operation. 66 | 67 | This error is raised when an operation failed in backend DB of storage. 68 | """ 69 | 70 | pass 71 | 72 | 73 | class DuplicatedStudyError(OptunaError): 74 | """Exception for a duplicated study name. 75 | 76 | This error is raised when a specified study name already exists in the storage. 77 | """ 78 | 79 | pass 80 | 81 | 82 | class ExperimentalWarning(Warning): 83 | """Experimental Warning class. 84 | 85 | This implementation exists here because the policy of `FutureWarning` has been changed 86 | since Python 3.7 was released. See the details in 87 | https://docs.python.org/3/library/warnings.html#warning-categories. 88 | """ 89 | 90 | pass 91 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/progress_bar.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | from typing import Optional 4 | 5 | from tqdm.auto import tqdm 6 | 7 | # from optuna import logging as optuna_logging 8 | from . import logging as optuna_logging 9 | 10 | _tqdm_handler = None # type: Optional[_TqdmLoggingHandler] 11 | 12 | 13 | # Reference: https://gist.github.com/hvy/8b80c2cedf02b15c24f85d1fa17ebe02 14 | class _TqdmLoggingHandler(logging.StreamHandler): 15 | def emit(self, record: Any) -> None: 16 | try: 17 | msg = self.format(record) 18 | tqdm.write(msg) 19 | self.flush() 20 | except (KeyboardInterrupt, SystemExit): 21 | raise 22 | except Exception: 23 | self.handleError(record) 24 | 25 | 26 | class _ProgressBar(object): 27 | """Progress Bar implementation for `Study.optimize` on the top of `tqdm`. 28 | 29 | Args: 30 | is_valid: 31 | Whether to show progress bars in `Study.optimize`. 32 | n_trials: 33 | The number of trials. 34 | timeout: 35 | Stop study after the given number of second(s). 36 | """ 37 | 38 | def __init__( 39 | self, is_valid: bool, n_trials: Optional[int] = None, timeout: Optional[float] = None, 40 | ) -> None: 41 | self._is_valid = is_valid 42 | self._n_trials = n_trials 43 | self._timeout = timeout 44 | 45 | if self._is_valid: 46 | self._init_valid() 47 | 48 | # TODO(hvy): Remove initialization indirection via this method when the progress bar is no 49 | # longer experimental. 50 | def _init_valid(self) -> None: 51 | self._progress_bar = tqdm(range(self._n_trials) if self._n_trials is not None else None) 52 | global _tqdm_handler 53 | 54 | _tqdm_handler = _TqdmLoggingHandler() 55 | _tqdm_handler.setLevel(logging.INFO) 56 | _tqdm_handler.setFormatter(optuna_logging.create_default_formatter()) 57 | optuna_logging.disable_default_handler() 58 | optuna_logging._get_library_root_logger().addHandler(_tqdm_handler) 59 | 60 | def update(self, elapsed_seconds: Optional[float]) -> None: 61 | """Update the progress bars if ``is_valid`` is ``True``. 62 | 63 | Args: 64 | elapsed_seconds: 65 | The time past since `Study.optimize` started. 66 | """ 67 | if self._is_valid: 68 | self._progress_bar.update(1) 69 | if self._timeout is not None and elapsed_seconds is not None: 70 | self._progress_bar.set_postfix_str( 71 | "{:.02f}/{} seconds".format(elapsed_seconds, self._timeout) 72 | ) 73 | 74 | def close(self) -> None: 75 | """Close progress bars.""" 76 | if self._is_valid: 77 | self._progress_bar.close() 78 | assert _tqdm_handler is not None 79 | optuna_logging._get_library_root_logger().removeHandler(_tqdm_handler) 80 | optuna_logging.enable_default_handler() 81 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/_base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import datetime 3 | 4 | # from optuna import distributions 5 | # from optuna import logging 6 | from .. import distributions 7 | from .. import logging 8 | 9 | _logger = logging.get_logger(__name__) 10 | 11 | 12 | class BaseTrial(object, metaclass=abc.ABCMeta): 13 | """Base class for trials. 14 | 15 | Note that this class is not supposed to be directly accessed by library users. 16 | """ 17 | 18 | @abc.abstractmethod 19 | def suggest_float(self, name, low, high, *, log=False, step=None): 20 | # type: (str, float, float, bool, Optional[float])-> float 21 | 22 | # TODO(nzw0301) swap log's position for step's one to match suggest_int for consistency. 23 | 24 | raise NotImplementedError 25 | 26 | @abc.abstractmethod 27 | def suggest_uniform(self, name, low, high): 28 | # type: (str, float, float) -> float 29 | 30 | raise NotImplementedError 31 | 32 | @abc.abstractmethod 33 | def suggest_loguniform(self, name, low, high): 34 | # type: (str, float, float) -> float 35 | 36 | raise NotImplementedError 37 | 38 | @abc.abstractmethod 39 | def suggest_discrete_uniform(self, name, low, high, q): 40 | # type: (str, float, float, float) -> float 41 | 42 | raise NotImplementedError 43 | 44 | @abc.abstractmethod 45 | def suggest_int(self, name, low, high, step=1, log=False): 46 | # type: (str, int, int, int, bool) -> int 47 | 48 | raise NotImplementedError 49 | 50 | @abc.abstractmethod 51 | def suggest_categorical(self, name, choices): 52 | # type: (str, Sequence[CategoricalChoiceType]) -> CategoricalChoiceType 53 | 54 | raise NotImplementedError 55 | 56 | @abc.abstractmethod 57 | def report(self, value, step): 58 | # type: (float, int) -> None 59 | 60 | raise NotImplementedError 61 | 62 | @abc.abstractmethod 63 | def should_prune(self, step=None): 64 | # type: (Optional[int]) -> bool 65 | 66 | raise NotImplementedError 67 | 68 | @abc.abstractmethod 69 | def set_user_attr(self, key, value): 70 | # type: (str, Any) -> None 71 | 72 | raise NotImplementedError 73 | 74 | @abc.abstractmethod 75 | def set_system_attr(self, key, value): 76 | # type: (str, Any) -> None 77 | 78 | raise NotImplementedError 79 | 80 | @property 81 | @abc.abstractmethod 82 | def params(self): 83 | # type: () -> Dict[str, Any] 84 | 85 | raise NotImplementedError 86 | 87 | @property 88 | @abc.abstractmethod 89 | def distributions(self): 90 | # type: () -> Dict[str, BaseDistribution] 91 | 92 | raise NotImplementedError 93 | 94 | @property 95 | @abc.abstractmethod 96 | def user_attrs(self): 97 | # type: () -> Dict[str, Any] 98 | 99 | raise NotImplementedError 100 | 101 | @property 102 | @abc.abstractmethod 103 | def system_attrs(self): 104 | # type: () -> Dict[str, Any] 105 | 106 | raise NotImplementedError 107 | 108 | @property 109 | @abc.abstractmethod 110 | def datetime_start(self): 111 | # type: () -> Optional[datetime.datetime] 112 | 113 | raise NotImplementedError 114 | 115 | @property 116 | def number(self) -> int: 117 | 118 | raise NotImplementedError 119 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/random.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | # from optuna import distributions 4 | # from optuna.samplers.base import BaseSampler 5 | from .. import distributions 6 | from ..samplers.base import BaseSampler 7 | 8 | class RandomSampler(BaseSampler): 9 | """Sampler using random sampling. 10 | 11 | This sampler is based on *independent sampling*. 12 | See also :class:`~optuna.samplers.BaseSampler` for more details of 'independent sampling'. 13 | 14 | Example: 15 | 16 | .. testcode:: 17 | 18 | import optuna 19 | from optuna.samplers import RandomSampler 20 | 21 | def objective(trial): 22 | x = trial.suggest_uniform('x', -5, 5) 23 | return x**2 24 | 25 | study = optuna.create_study(sampler=RandomSampler()) 26 | study.optimize(objective, n_trials=10) 27 | 28 | Args: 29 | seed: Seed for random number generator. 30 | """ 31 | 32 | def __init__(self, seed=None): 33 | # type: (Optional[int]) -> None 34 | 35 | self._rng = numpy.random.RandomState(seed) 36 | 37 | def reseed_rng(self) -> None: 38 | 39 | self._rng = numpy.random.RandomState() 40 | 41 | def infer_relative_search_space(self, study, trial): 42 | # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution] 43 | 44 | return {} 45 | 46 | def sample_relative(self, study, trial, search_space): 47 | # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any] 48 | 49 | return {} 50 | 51 | def sample_independent(self, study, trial, param_name, param_distribution): 52 | # type: (Study, FrozenTrial, str, distributions.BaseDistribution) -> Any 53 | 54 | if isinstance(param_distribution, distributions.UniformDistribution): 55 | return self._rng.uniform(param_distribution.low, param_distribution.high) 56 | elif isinstance(param_distribution, distributions.LogUniformDistribution): 57 | log_low = numpy.log(param_distribution.low) 58 | log_high = numpy.log(param_distribution.high) 59 | return float(numpy.exp(self._rng.uniform(log_low, log_high))) 60 | elif isinstance(param_distribution, distributions.DiscreteUniformDistribution): 61 | q = param_distribution.q 62 | r = param_distribution.high - param_distribution.low 63 | # [low, high] is shifted to [0, r] to align sampled values at regular intervals. 64 | low = 0 - 0.5 * q 65 | high = r + 0.5 * q 66 | s = self._rng.uniform(low, high) 67 | v = numpy.round(s / q) * q + param_distribution.low 68 | # v may slightly exceed range due to round-off errors. 69 | return float(min(max(v, param_distribution.low), param_distribution.high)) 70 | elif isinstance(param_distribution, distributions.IntUniformDistribution): 71 | # [low, high] is shifted to [0, r] to align sampled values at regular intervals. 72 | r = (param_distribution.high - param_distribution.low) / param_distribution.step 73 | # numpy.random.randint includes low but excludes high. 74 | s = self._rng.randint(0, r + 1) 75 | v = s * param_distribution.step + param_distribution.low 76 | return int(v) 77 | elif isinstance(param_distribution, distributions.IntLogUniformDistribution): 78 | log_low = numpy.log(param_distribution.low - 0.5) 79 | log_high = numpy.log(param_distribution.high + 0.5) 80 | s = numpy.exp(self._rng.uniform(log_low, log_high)) 81 | v = ( 82 | numpy.round((s - param_distribution.low) / param_distribution.step) 83 | * param_distribution.step 84 | + param_distribution.low 85 | ) 86 | return int(min(max(v, param_distribution.low), param_distribution.high)) 87 | elif isinstance(param_distribution, distributions.CategoricalDistribution): 88 | choices = param_distribution.choices 89 | index = self._rng.randint(0, len(choices)) 90 | return choices[index] 91 | else: 92 | raise NotImplementedError 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BOExplain, Explaining Inference Queries with Bayesian Optimization 2 | 3 | BOExplain is a library for explaining inference queries with Bayesian optimization. The corresponding paper can be found at https://arxiv.org/abs/2102.05308. 4 | 5 | ## Installation 6 | 7 | ``` 8 | pip install boexplain 9 | ``` 10 | 11 | ## Documentation 12 | 13 | The documentation is available at [https://sfu-db.github.io/BOExplain/](https://sfu-db.github.io/BOExplain/). (shortcut to [fmin](https://sfu-db.github.io/BOExplain/api_reference/boexplain.files.search.html#boexplain.files.search.fmin), [fmax](https://sfu-db.github.io/BOExplain/api_reference/boexplain.files.search.html#boexplain.files.search.fmax)) 14 | 15 | ## Getting Started 16 | 17 | Derive an explanation for why the predicted rate of having an income over $50K is higher for men compared to women in the UCI ML [Adult dataset](https://archive.ics.uci.edu/ml/datasets/adult). 18 | 19 | 1. Load the data and prepare it for ML. 20 | ``` python 21 | import pandas as pd 22 | from sklearn.ensemble import RandomForestClassifier 23 | from sklearn.model_selection import train_test_split 24 | 25 | df = pd.read_csv("adult.data", 26 | names=[ 27 | "Age", "Workclass", "fnlwgt", "Education", 28 | "Education-Num", "Marital Status", "Occupation", 29 | "Relationship", "Race", "Gender", "Capital Gain", 30 | "Capital Loss", "Hours per week", "Country", "Income" 31 | ], 32 | na_values=" ?") 33 | 34 | df['Income'].replace({" <=50K": 0, ' >50K': 1}, inplace=True) 35 | df['Gender'].replace({" Male": 0, ' Female': 1}, inplace=True) 36 | df = pd.get_dummies(df) 37 | 38 | train, test = train_test_split(df, test_size=0.2) 39 | test = test.drop(columns='Income') 40 | ``` 41 | 42 | 2. Define the objective function that trains a random forest classifier and queries the ratio of predicted rates of having an income over $50K between men and women. 43 | ``` python 44 | def obj(train_filtered): 45 | rf = RandomForestClassifier(n_estimators=13, random_state=0) 46 | rf.fit(train_filtered.drop(columns='Income'), train_filtered['Income']) 47 | test["prediction"] = rf.predict(test) 48 | rates = test.groupby("Gender")["prediction"].sum() / test.groupby("Gender")["prediction"].size() 49 | test.drop(columns='prediction', inplace=True) 50 | return rates[0] / rates[1] 51 | ``` 52 | 53 | 54 | 3. Use the function `fmin` to minimize the objective function. 55 | ``` python 56 | from boexplain import fmin 57 | 58 | train_filtered = fmin( 59 | data=train, 60 | f=obj, 61 | columns=["Age", "Education-Num"], 62 | runtime=30, 63 | ) 64 | ``` 65 | 66 | 67 | ## Reproduce the Experiments 68 | 69 | To reproduce the experiments, you can clone the repo and create a poetry environment (install [Poetry](https://python-poetry.org/docs/#installation)). Run 70 | 71 | ```bash 72 | poetry install 73 | ``` 74 | 75 | To setup the poetry environment a for jupyter notebook, run 76 | 77 | ```bash 78 | poetry run ipython kernel install --name=boexplain 79 | ``` 80 | 81 | An ipython kernel has been created for this environemnt. 82 | 83 | ### Adult Experiment 84 | 85 | To reproduce the results of the Adult experiment and recreate Figure 6, follow the instruction in [adult.ipynb](https://github.com/sfu-db/BOExplain/blob/main/adult.ipynb). 86 | 87 | ### Credit Experiment 88 | 89 | To reproduce the results of the Credit experiment and recreate Figure 8, follow the instruction in [credit.ipynb](https://github.com/sfu-db/BOExplain/blob/main/credit.ipynb). 90 | 91 | ### House Experiment 92 | 93 | To reproduce the results of the House experiment and recreate Figure 7, follow the instruction in [house.ipynb](https://github.com/sfu-db/BOExplain/blob/main/house.ipynb). 94 | 95 | ### Scorpion Synthetic Data Experiment 96 | 97 | To reproduce the results of the experiment with Scorpion's synthetic data and corresponding query, and recreate Figure 4, follow the instruction in [scorpion.ipynb](https://github.com/sfu-db/BOExplain/blob/main/scorpion.ipynb). 98 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/_search_space.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import copy 3 | from typing import Dict 4 | from typing import Optional 5 | 6 | # import optuna 7 | # from optuna.distributions import BaseDistribution 8 | # from optuna.study import BaseStudy 9 | from ... import optuna 10 | from ..distributions import BaseDistribution 11 | from ..study import BaseStudy 12 | 13 | 14 | class IntersectionSearchSpace(object): 15 | """A class to calculate the intersection search space of a :class:`~optuna.study.BaseStudy`. 16 | 17 | Intersection search space contains the intersection of parameter distributions that have been 18 | suggested in the completed trials of the study so far. 19 | If there are multiple parameters that have the same name but different distributions, 20 | neither is included in the resulting search space 21 | (i.e., the parameters with dynamic value ranges are excluded). 22 | 23 | Note that an instance of this class is supposed to be used for only one study. 24 | If different studies are passed to :func:`~optuna.samplers.IntersectionSearchSpace.calculate`, 25 | a :obj:`ValueError` is raised. 26 | """ 27 | 28 | def __init__(self) -> None: 29 | self._cursor = -1 # type: int 30 | self._search_space = None # type: Optional[Dict[str, BaseDistribution]] 31 | self._study_id = None # type: Optional[int] 32 | 33 | def calculate( 34 | self, study: BaseStudy, ordered_dict: bool = False 35 | ) -> Dict[str, BaseDistribution]: 36 | """Returns the intersection search space of the :class:`~optuna.study.BaseStudy`. 37 | 38 | Args: 39 | study: 40 | A study with completed trials. 41 | ordered_dict: 42 | A boolean flag determining the return type. 43 | If :obj:`False`, the returned object will be a :obj:`dict`. 44 | If :obj:`True`, the returned object will be an :obj:`collections.OrderedDict` 45 | sorted by keys, i.e. parameter names. 46 | 47 | Returns: 48 | A dictionary containing the parameter names and parameter's distributions. 49 | """ 50 | 51 | if self._study_id is None: 52 | self._study_id = study._study_id 53 | else: 54 | # Note that the check below is meaningless when `InMemortyStorage` is used 55 | # because `InMemortyStorage.create_new_study` always returns the same study ID. 56 | if self._study_id != study._study_id: 57 | raise ValueError("`IntersectionSearchSpace` cannot handle multiple studies.") 58 | 59 | next_cursor = self._cursor 60 | for trial in reversed(study.get_trials(deepcopy=False)): 61 | if self._cursor > trial.number: 62 | break 63 | 64 | if not trial.state.is_finished(): 65 | next_cursor = trial.number 66 | 67 | if trial.state != optuna.trial.TrialState.COMPLETE: 68 | continue 69 | 70 | if self._search_space is None: 71 | self._search_space = copy.copy(trial.distributions) 72 | continue 73 | 74 | delete_list = [] 75 | for param_name, param_distribution in self._search_space.items(): 76 | if param_name not in trial.distributions: 77 | delete_list.append(param_name) 78 | elif trial.distributions[param_name] != param_distribution: 79 | delete_list.append(param_name) 80 | 81 | for param_name in delete_list: 82 | del self._search_space[param_name] 83 | 84 | self._cursor = next_cursor 85 | search_space = self._search_space or {} 86 | 87 | if ordered_dict: 88 | search_space = OrderedDict(sorted(search_space.items(), key=lambda x: x[0])) 89 | 90 | return copy.deepcopy(search_space) 91 | 92 | 93 | def intersection_search_space( 94 | study: BaseStudy, ordered_dict: bool = False 95 | ) -> Dict[str, BaseDistribution]: 96 | """Return the intersection search space of the :class:`~optuna.study.BaseStudy`. 97 | 98 | Intersection search space contains the intersection of parameter distributions that have been 99 | suggested in the completed trials of the study so far. 100 | If there are multiple parameters that have the same name but different distributions, 101 | neither is included in the resulting search space 102 | (i.e., the parameters with dynamic value ranges are excluded). 103 | 104 | .. note:: 105 | :class:`~optuna.samplers.IntersectionSearchSpace` provides the same functionality with 106 | a much faster way. Please consider using it if you want to reduce execution time 107 | as much as possible. 108 | 109 | Args: 110 | study: 111 | A study with completed trials. 112 | ordered_dict: 113 | A boolean flag determining the return type. 114 | If :obj:`False`, the returned object will be a :obj:`dict`. 115 | If :obj:`True`, the returned object will be an :obj:`collections.OrderedDict` sorted by 116 | keys, i.e. parameter names. 117 | 118 | Returns: 119 | A dictionary containing the parameter names and parameter's distributions. 120 | """ 121 | 122 | return IntersectionSearchSpace().calculate(study, ordered_dict=ordered_dict) 123 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class BaseSampler(object, metaclass=abc.ABCMeta): 5 | """Base class for samplers. 6 | 7 | Optuna combines two types of sampling strategies, which are called *relative sampling* and 8 | *independent sampling*. 9 | 10 | *The relative sampling* determines values of multiple parameters simultaneously so that 11 | sampling algorithms can use relationship between parameters (e.g., correlation). 12 | Target parameters of the relative sampling are described in a relative search space, which 13 | is determined by :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`. 14 | 15 | *The independent sampling* determines a value of a single parameter without considering any 16 | relationship between parameters. Target parameters of the independent sampling are the 17 | parameters not described in the relative search space. 18 | 19 | More specifically, parameters are sampled by the following procedure. 20 | At the beginning of a trial, :meth:`~optuna.samplers.BaseSampler.infer_relative_search_space` 21 | is called to determine the relative search space for the trial. Then, 22 | :meth:`~optuna.samplers.BaseSampler.sample_relative` is invoked to sample parameters 23 | from the relative search space. During the execution of the objective function, 24 | :meth:`~optuna.samplers.BaseSampler.sample_independent` is used to sample 25 | parameters that don't belong to the relative search space. 26 | 27 | The following figure depicts the lifetime of a trial and how the above three methods are 28 | called in the trial. 29 | 30 | .. image:: ../../image/sampling-sequence.png 31 | 32 | | 33 | 34 | """ 35 | 36 | @abc.abstractmethod 37 | def infer_relative_search_space(self, study, trial): 38 | # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution] 39 | """Infer the search space that will be used by relative sampling in the target trial. 40 | 41 | This method is called right before :func:`~optuna.samplers.BaseSampler.sample_relative` 42 | method, and the search space returned by this method is pass to it. The parameters not 43 | contained in the search space will be sampled by using 44 | :func:`~optuna.samplers.BaseSampler.sample_independent` method. 45 | 46 | Args: 47 | study: 48 | Target study object. 49 | trial: 50 | Target trial object. 51 | 52 | Returns: 53 | A dictionary containing the parameter names and parameter's distributions. 54 | 55 | .. seealso:: 56 | Please refer to :func:`~optuna.samplers.intersection_search_space` as an 57 | implementation of :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`. 58 | """ 59 | 60 | raise NotImplementedError 61 | 62 | @abc.abstractmethod 63 | def sample_relative(self, study, trial, search_space): 64 | # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any] 65 | """Sample parameters in a given search space. 66 | 67 | This method is called once at the beginning of each trial, i.e., right before the 68 | evaluation of the objective function. This method is suitable for sampling algorithms 69 | that use relationship between parameters such as Gaussian Process and CMA-ES. 70 | 71 | .. note:: 72 | The failed trials are ignored by any build-in samplers when they sample new 73 | parameters. Thus, failed trials are regarded as deleted in the samplers' 74 | perspective. 75 | 76 | Args: 77 | study: 78 | Target study object. 79 | trial: 80 | Target trial object. 81 | search_space: 82 | The search space returned by 83 | :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`. 84 | 85 | Returns: 86 | A dictionary containing the parameter names and the values. 87 | 88 | """ 89 | 90 | raise NotImplementedError 91 | 92 | @abc.abstractmethod 93 | def sample_independent(self, study, trial, param_name, param_distribution): 94 | # type: (Study, FrozenTrial, str, BaseDistribution) -> Any 95 | """Sample a parameter for a given distribution. 96 | 97 | This method is called only for the parameters not contained in the search space returned 98 | by :func:`~optuna.samplers.BaseSampler.sample_relative` method. This method is suitable 99 | for sampling algorithms that do not use relationship between parameters such as random 100 | sampling and TPE. 101 | 102 | .. note:: 103 | The failed trials are ignored by any build-in samplers when they sample new 104 | parameters. Thus, failed trials are regarded as deleted in the samplers' 105 | perspective. 106 | 107 | Args: 108 | study: 109 | Target study object. 110 | trial: 111 | Target trial object. 112 | param_name: 113 | Name of the sampled parameter. 114 | param_distribution: 115 | Distribution object that specifies a prior and/or scale of the sampling algorithm. 116 | 117 | Returns: 118 | A parameter value. 119 | 120 | """ 121 | 122 | raise NotImplementedError 123 | 124 | def reseed_rng(self) -> None: 125 | """Reseed sampler's random number generator. 126 | 127 | This method is called by the :class:`~optuna.study.Study` instance if trials are executed 128 | in parallel with the option ``n_jobs>1``. In that case, the sampler instance will be 129 | replicated including the state of the random number generator, and they may suggest the 130 | same values. To prevent this issue, this method assigns a different seed to each random 131 | number generator. 132 | """ 133 | 134 | pass 135 | -------------------------------------------------------------------------------- /boexplain/optuna/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import pkg_resources 5 | from setuptools import find_packages 6 | from setuptools import setup 7 | 8 | from typing import Dict 9 | from typing import List 10 | from typing import Optional 11 | 12 | 13 | def get_version() -> str: 14 | 15 | version_filepath = os.path.join(os.path.dirname(__file__), "optuna", "version.py") 16 | with open(version_filepath) as f: 17 | for line in f: 18 | if line.startswith("__version__"): 19 | return line.strip().split()[-1][1:-1] 20 | assert False 21 | 22 | 23 | def get_long_description() -> str: 24 | 25 | readme_filepath = os.path.join(os.path.dirname(__file__), "README.md") 26 | with open(readme_filepath) as f: 27 | return f.read() 28 | 29 | 30 | def get_install_requires() -> List[str]: 31 | 32 | return [ 33 | "alembic", 34 | "cliff", 35 | "cmaes>=0.5.0", 36 | "colorlog", 37 | "joblib", 38 | "numpy", 39 | "scipy!=1.4.0", 40 | "sqlalchemy>=1.1.0", 41 | "tqdm", 42 | ] 43 | 44 | 45 | def get_tests_require() -> List[str]: 46 | 47 | return get_extras_require()["testing"] 48 | 49 | 50 | def get_extras_require() -> Dict[str, List[str]]: 51 | 52 | requirements = { 53 | "checking": ["black", "hacking", "mypy"], 54 | "codecov": ["codecov", "pytest-cov"], 55 | "doctest": [ 56 | "cma", 57 | "pandas", 58 | "plotly>=4.0.0", 59 | "scikit-learn>=0.19.0,<0.23.0", 60 | "scikit-optimize", 61 | "mlflow", 62 | ], 63 | "document": ["sphinx", "sphinx_rtd_theme"], 64 | "example": [ 65 | "catboost", 66 | "chainer", 67 | "lightgbm", 68 | "mlflow", 69 | "mpi4py", 70 | "mxnet", 71 | "nbval", 72 | "pytorch-ignite", 73 | "scikit-image", 74 | "scikit-learn", 75 | "thop", 76 | "torch==1.4.0" if sys.platform == "darwin" else "torch==1.4.0+cpu", 77 | "torchvision==0.5.0" if sys.platform == "darwin" else "torchvision==0.5.0+cpu", 78 | "xgboost", 79 | ] 80 | + ( 81 | ["allennlp<1", "fastai<2", "pytorch-lightning>=0.7.1"] 82 | if (3, 5) < sys.version_info[:2] < (3, 8) 83 | else [] 84 | ) 85 | + ( 86 | ["llvmlite<=0.31.0"] if (3, 5) == sys.version_info[:2] else [] 87 | ) # Newer `llvmlite` is not distributed with wheels for Python 3.5. 88 | + ( 89 | ["dask[dataframe]", "dask-ml", "keras", "tensorflow>=2.0.0", "tensorflow-datasets"] 90 | if sys.version_info[:2] < (3, 8) 91 | else [] 92 | ), 93 | "experimental": ["redis"], 94 | "testing": [ 95 | # TODO(toshihikoyanase): Remove the version constraint after resolving the issue 96 | # https://github.com/optuna/optuna/issues/1000. 97 | "bokeh<2.0.0", 98 | "chainer>=5.0.0", 99 | "cma", 100 | "fakeredis", 101 | "fanova", 102 | "lightgbm", 103 | "mlflow", 104 | "mpi4py", 105 | "mxnet", 106 | "pandas", 107 | "plotly>=4.0.0", 108 | "pytest", 109 | "pytorch-ignite", 110 | "scikit-learn>=0.19.0,<0.23.0", 111 | "scikit-optimize", 112 | "torch==1.4.0" if sys.platform == "darwin" else "torch==1.4.0+cpu", 113 | "torchvision==0.5.0" if sys.platform == "darwin" else "torchvision==0.5.0+cpu", 114 | "xgboost", 115 | ] 116 | + ( 117 | ["allennlp<1", "fastai<2", "pytorch-lightning>=0.7.1"] 118 | if (3, 5) < sys.version_info[:2] < (3, 8) 119 | else [] 120 | ) 121 | + ( 122 | ["keras", "tensorflow", "tensorflow-datasets"] if sys.version_info[:2] < (3, 8) else [] 123 | ), 124 | } 125 | 126 | return requirements 127 | 128 | 129 | def find_any_distribution(pkgs: List[str]) -> Optional[pkg_resources.Distribution]: 130 | 131 | for pkg in pkgs: 132 | try: 133 | return pkg_resources.get_distribution(pkg) 134 | except pkg_resources.DistributionNotFound: 135 | pass 136 | return None 137 | 138 | 139 | pfnopt_pkg = find_any_distribution(["pfnopt"]) 140 | if pfnopt_pkg is not None: 141 | msg = ( 142 | "We detected that PFNOpt is installed in your environment.\n" 143 | "PFNOpt has been renamed Optuna. Please uninstall the old\n" 144 | "PFNOpt in advance (e.g. by executing `$ pip uninstall pfnopt`)." 145 | ) 146 | print(msg) 147 | exit(1) 148 | 149 | setup( 150 | name="optuna", 151 | version=get_version(), 152 | description="A hyperparameter optimization framework", 153 | long_description=get_long_description(), 154 | long_description_content_type="text/markdown", 155 | author="Takuya Akiba", 156 | author_email="akiba@preferred.jp", 157 | url="https://optuna.org/", 158 | packages=find_packages(), 159 | package_data={ 160 | "optuna": [ 161 | "storages/rdb/alembic.ini", 162 | "storages/rdb/alembic/*.*", 163 | "storages/rdb/alembic/versions/*.*", 164 | ] 165 | }, 166 | install_requires=get_install_requires(), 167 | tests_require=get_tests_require(), 168 | extras_require=get_extras_require(), 169 | entry_points={ 170 | "console_scripts": ["optuna = optuna.cli:main"], 171 | "optuna.command": [ 172 | "create-study = optuna.cli:_CreateStudy", 173 | "delete-study = optuna.cli:_DeleteStudy", 174 | "study set-user-attr = optuna.cli:_StudySetUserAttribute", 175 | "studies = optuna.cli:_Studies", 176 | "dashboard = optuna.cli:_Dashboard", 177 | "study optimize = optuna.cli:_StudyOptimize", 178 | "storage upgrade = optuna.cli:_StorageUpgrade", 179 | ], 180 | }, 181 | ) 182 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/tpe/parzen_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from typing import NamedTuple 3 | from typing import Optional 4 | 5 | import numpy 6 | from numpy import ndarray 7 | 8 | EPS = 1e-12 9 | 10 | 11 | class _ParzenEstimatorParameters( 12 | NamedTuple( 13 | "_ParzenEstimatorParameters", 14 | [ 15 | ("consider_prior", bool), 16 | ("prior_weight", Optional[float]), 17 | ("consider_magic_clip", bool), 18 | ("consider_endpoints", bool), 19 | ("weights", Callable[[int], ndarray]), 20 | ], 21 | ) 22 | ): 23 | pass 24 | 25 | 26 | class _ParzenEstimator(object): 27 | def __init__( 28 | self, 29 | mus, # type: ndarray 30 | low, # type: float 31 | high, # type: float 32 | parameters, # type: _ParzenEstimatorParameters 33 | ): 34 | # type: (...) -> None 35 | 36 | self.weights, self.mus, self.sigmas = _ParzenEstimator._calculate( 37 | mus, 38 | low, 39 | high, 40 | parameters.consider_prior, 41 | parameters.prior_weight, 42 | parameters.consider_magic_clip, 43 | parameters.consider_endpoints, 44 | parameters.weights, 45 | ) 46 | 47 | @classmethod 48 | def _calculate( 49 | cls, 50 | mus, # type: ndarray 51 | low, # type: float 52 | high, # type: float 53 | consider_prior, # type: bool 54 | prior_weight, # type: Optional[float] 55 | consider_magic_clip, # type: bool 56 | consider_endpoints, # type: bool 57 | weights_func, # type: Callable[[int], ndarray] 58 | ): 59 | # type: (...) -> Tuple[ndarray, ndarray, ndarray] 60 | """Calculates the weights, mus and sigma for the Parzen estimator. 61 | 62 | Note: When the number of observations is zero, the Parzen estimator ignores the 63 | `consider_prior` flag and utilizes a prior. Validation of this approach is future work. 64 | """ 65 | 66 | # initialize mus and sigmas for the KDE 67 | mus = numpy.asarray(mus) 68 | sigma = numpy.asarray([], dtype=float) 69 | prior_pos = 0 70 | 71 | # Parzen estimator construction requires at least one observation or a priror. 72 | if mus.size == 0: 73 | consider_prior = True 74 | 75 | # consider_prior = True. We have a prior over the space of ints 76 | if consider_prior: 77 | # prior mean is the midpoint 78 | prior_mu = 0.5 * (low + high) 79 | # prior std is the range of values 80 | prior_sigma = 1.0 * (high - low) 81 | if mus.size == 0: 82 | low_sorted_mus_high = numpy.zeros(3) 83 | sorted_mus = low_sorted_mus_high[1:-1] 84 | sorted_mus[0] = prior_mu 85 | sigma = numpy.asarray([prior_sigma]) 86 | prior_pos = 0 87 | order = [] # type: List[int] 88 | # THIS CODE ORDERS THE MEANS with the prior, confusing 89 | else: # When mus.size is greater than 0. <- OPTUNA COMMENT 90 | # We decide the place of the prior. <- OPTUNA COMMENT 91 | # order = indices that would sort the mus 92 | order = numpy.argsort(mus).astype(int) 93 | # mus in increasing order 94 | ordered_mus = mus[order] 95 | # find the index where prior_mu should be inserted to maintain order 96 | prior_pos = numpy.searchsorted(ordered_mus, prior_mu) 97 | # We decide the mus. <- OPTUNA COMMENT 98 | # low_sorted_mus_high gets updated with sorted_mus and is used below 99 | low_sorted_mus_high = numpy.zeros(len(mus) + 3) 100 | sorted_mus = low_sorted_mus_high[1:-1] 101 | # insert the prior appropriately in the ordered list of mus 102 | sorted_mus[:prior_pos] = ordered_mus[:prior_pos] 103 | sorted_mus[prior_pos] = prior_mu 104 | sorted_mus[prior_pos + 1 :] = ordered_mus[prior_pos:] 105 | else: 106 | order = numpy.argsort(mus) 107 | # We decide the mus. 108 | low_sorted_mus_high = numpy.zeros(len(mus) + 2) 109 | sorted_mus = low_sorted_mus_high[1:-1] 110 | sorted_mus[:] = mus[order] 111 | 112 | # We decide the sigma. 113 | if mus.size > 0: 114 | low_sorted_mus_high[-1] = high 115 | low_sorted_mus_high[0] = low 116 | # the standard deviation of each Gaussian was set to the greater of the distances to the left and right neighbour 117 | sigma = numpy.maximum( 118 | low_sorted_mus_high[1:-1] - low_sorted_mus_high[0:-2], 119 | low_sorted_mus_high[2:] - low_sorted_mus_high[1:-1], 120 | ) 121 | # If not considering endpoints, set the std of the min and max mus to be the 122 | # distance from its only neighbours, DEFAULT consider_endpoints=False 123 | if not consider_endpoints and low_sorted_mus_high.size > 2: 124 | sigma[0] = low_sorted_mus_high[2] - low_sorted_mus_high[1] 125 | sigma[-1] = low_sorted_mus_high[-2] - low_sorted_mus_high[-3] 126 | 127 | # We decide the weights. <- OPTUNA 128 | # Ramp of weights 129 | unsorted_weights = weights_func(mus.size) 130 | if consider_prior: 131 | # array of zeros in the shape of sorted_mus 132 | sorted_weights = numpy.zeros_like(sorted_mus) 133 | # sort the weights based on the increasing order of the mus 134 | sorted_weights[:prior_pos] = unsorted_weights[order[:prior_pos]] 135 | sorted_weights[prior_pos] = prior_weight 136 | sorted_weights[prior_pos + 1 :] = unsorted_weights[order[prior_pos:]] 137 | else: 138 | sorted_weights = unsorted_weights[order] 139 | # normalize the weights 140 | sorted_weights /= sorted_weights.sum() 141 | 142 | # We adjust the range of the 'sigma' according to the 'consider_magic_clip' flag. <-OTPUNA 143 | # Original TPE paper clips stds to remain in feasible range 144 | # largest std in sigma array 145 | maxsigma = 1.0 * (high - low) 146 | # limit the smallest stds in a gaussian distribution 147 | if consider_magic_clip: 148 | minsigma = 1.0 * (high - low) / min(100.0, (1.0 + len(sorted_mus))) 149 | else: 150 | minsigma = EPS 151 | # set all sigmas to be between minsigma and maxsigma 152 | sigma = numpy.clip(sigma, minsigma, maxsigma) 153 | if consider_prior: 154 | # don't modify the prior std 155 | sigma[prior_pos] = prior_sigma 156 | 157 | return sorted_weights, sorted_mus, sigma 158 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/_fixed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | # from optuna import distributions 4 | # from optuna.trial._base import BaseTrial 5 | # from optuna.trial._util import _adjust_discrete_uniform_high 6 | from .. import distributions 7 | from ._base import BaseTrial 8 | from ._util import _adjust_discrete_uniform_high 9 | 10 | 11 | class FixedTrial(BaseTrial): 12 | """A trial class which suggests a fixed value for each parameter. 13 | 14 | This object has the same methods as :class:`~optuna.trial.Trial`, and it suggests pre-defined 15 | parameter values. The parameter values can be determined at the construction of the 16 | :class:`~optuna.trial.FixedTrial` object. In contrast to :class:`~optuna.trial.Trial`, 17 | :class:`~optuna.trial.FixedTrial` does not depend on :class:`~optuna.study.Study`, and it is 18 | useful for deploying optimization results. 19 | 20 | Example: 21 | 22 | Evaluate an objective function with parameter values given by a user. 23 | 24 | .. testcode:: 25 | 26 | import optuna 27 | 28 | def objective(trial): 29 | x = trial.suggest_uniform('x', -100, 100) 30 | y = trial.suggest_categorical('y', [-1, 0, 1]) 31 | return x ** 2 + y 32 | 33 | assert objective(optuna.trial.FixedTrial({'x': 1, 'y': 0})) == 1 34 | 35 | 36 | .. note:: 37 | Please refer to :class:`~optuna.trial.Trial` for details of methods and properties. 38 | 39 | Args: 40 | params: 41 | A dictionary containing all parameters. 42 | number: 43 | A trial number. Defaults to ``0``. 44 | 45 | """ 46 | 47 | def __init__(self, params, number=0): 48 | # type: (Dict[str, Any], int) -> None 49 | 50 | self._params = params 51 | self._suggested_params = {} # type: Dict[str, Any] 52 | self._distributions = {} # type: Dict[str, BaseDistribution] 53 | self._user_attrs = {} # type: Dict[str, Any] 54 | self._system_attrs = {} # type: Dict[str, Any] 55 | self._datetime_start = datetime.datetime.now() 56 | self._number = number 57 | 58 | def suggest_float(self, name, low, high, *, log=False, step=None): 59 | # type: (str, float, float, bool, Optional[float]) -> float 60 | 61 | if step is not None: 62 | if log: 63 | raise NotImplementedError( 64 | "The parameter `step` is not supported when `log` is True." 65 | ) 66 | else: 67 | return self._suggest( 68 | name, distributions.DiscreteUniformDistribution(low=low, high=high, q=step) 69 | ) 70 | else: 71 | if log: 72 | return self._suggest( 73 | name, distributions.LogUniformDistribution(low=low, high=high) 74 | ) 75 | else: 76 | return self._suggest(name, distributions.UniformDistribution(low=low, high=high)) 77 | 78 | def suggest_uniform(self, name, low, high): 79 | # type: (str, float, float) -> float 80 | 81 | return self._suggest(name, distributions.UniformDistribution(low=low, high=high)) 82 | 83 | def suggest_loguniform(self, name, low, high): 84 | # type: (str, float, float) -> float 85 | 86 | return self._suggest(name, distributions.LogUniformDistribution(low=low, high=high)) 87 | 88 | def suggest_discrete_uniform(self, name, low, high, q): 89 | # type: (str, float, float, float) -> float 90 | 91 | high = _adjust_discrete_uniform_high(name, low, high, q) 92 | discrete = distributions.DiscreteUniformDistribution(low=low, high=high, q=q) 93 | return self._suggest(name, discrete) 94 | 95 | def suggest_int(self, name, low, high, step=1, log=False): 96 | # type: (str, int, int, int, bool) -> int 97 | if log: 98 | sample = self._suggest( 99 | name, distributions.IntLogUniformDistribution(low=low, high=high, step=step) 100 | ) 101 | else: 102 | sample = self._suggest( 103 | name, distributions.IntUniformDistribution(low=low, high=high, step=step) 104 | ) 105 | return int(sample) 106 | 107 | def suggest_categorical(self, name, choices): 108 | # type: (str, Sequence[CategoricalChoiceType]) -> CategoricalChoiceType 109 | 110 | choices = tuple(choices) 111 | return self._suggest(name, distributions.CategoricalDistribution(choices=choices)) 112 | 113 | def _suggest(self, name, distribution): 114 | # type: (str, BaseDistribution) -> Any 115 | 116 | if name not in self._params: 117 | raise ValueError( 118 | "The value of the parameter '{}' is not found. Please set it at " 119 | "the construction of the FixedTrial object.".format(name) 120 | ) 121 | 122 | value = self._params[name] 123 | param_value_in_internal_repr = distribution.to_internal_repr(value) 124 | if not distribution._contains(param_value_in_internal_repr): 125 | raise ValueError( 126 | "The value {} of the parameter '{}' is out of " 127 | "the range of the distribution {}.".format(value, name, distribution) 128 | ) 129 | 130 | if name in self._distributions: 131 | distributions.check_distribution_compatibility(self._distributions[name], distribution) 132 | 133 | self._suggested_params[name] = value 134 | self._distributions[name] = distribution 135 | 136 | return value 137 | 138 | def report(self, value, step): 139 | # type: (float, int) -> None 140 | 141 | pass 142 | 143 | def should_prune(self, step=None): 144 | # type: (Optional[int]) -> bool 145 | 146 | return False 147 | 148 | def set_user_attr(self, key, value): 149 | # type: (str, Any) -> None 150 | 151 | self._user_attrs[key] = value 152 | 153 | def set_system_attr(self, key, value): 154 | # type: (str, Any) -> None 155 | 156 | self._system_attrs[key] = value 157 | 158 | @property 159 | def params(self): 160 | # type: () -> Dict[str, Any] 161 | 162 | return self._suggested_params 163 | 164 | @property 165 | def distributions(self): 166 | # type: () -> Dict[str, BaseDistribution] 167 | 168 | return self._distributions 169 | 170 | @property 171 | def user_attrs(self): 172 | # type: () -> Dict[str, Any] 173 | 174 | return self._user_attrs 175 | 176 | @property 177 | def system_attrs(self): 178 | # type: () -> Dict[str, Any] 179 | 180 | return self._system_attrs 181 | 182 | @property 183 | def datetime_start(self): 184 | # type: () -> Optional[datetime.datetime] 185 | 186 | return self._datetime_start 187 | 188 | @property 189 | def number(self) -> int: 190 | 191 | return self._number 192 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from logging import CRITICAL # NOQA 3 | from logging import DEBUG # NOQA 4 | from logging import ERROR # NOQA 5 | from logging import FATAL # NOQA 6 | from logging import INFO # NOQA 7 | from logging import WARN # NOQA 8 | from logging import WARNING # NOQA 9 | import threading 10 | 11 | import colorlog 12 | 13 | _lock = threading.Lock() 14 | _default_handler = None # type: Optional[logging.Handler] 15 | 16 | 17 | def create_default_formatter() -> colorlog.ColoredFormatter: 18 | """Create a default formatter of log messages. 19 | 20 | This function is not supposed to be directly accessed by library users. 21 | """ 22 | 23 | return colorlog.ColoredFormatter( 24 | "%(log_color)s[%(levelname)1.1s %(asctime)s]%(reset)s %(message)s" 25 | ) 26 | 27 | 28 | def _get_library_name() -> str: 29 | 30 | return __name__.split(".")[0] 31 | 32 | 33 | def _get_library_root_logger() -> logging.Logger: 34 | 35 | return logging.getLogger(_get_library_name()) 36 | 37 | 38 | def _configure_library_root_logger() -> None: 39 | 40 | global _default_handler 41 | 42 | with _lock: 43 | if _default_handler: 44 | # This library has already configured the library root logger. 45 | return 46 | _default_handler = logging.StreamHandler() # Set sys.stderr as stream. 47 | _default_handler.setFormatter(create_default_formatter()) 48 | 49 | # Apply our default configuration to the library root logger. 50 | library_root_logger = _get_library_root_logger() 51 | library_root_logger.addHandler(_default_handler) 52 | library_root_logger.setLevel(logging.INFO) 53 | library_root_logger.propagate = False 54 | 55 | 56 | def _reset_library_root_logger() -> None: 57 | 58 | global _default_handler 59 | 60 | with _lock: 61 | if not _default_handler: 62 | return 63 | 64 | library_root_logger = _get_library_root_logger() 65 | library_root_logger.removeHandler(_default_handler) 66 | library_root_logger.setLevel(logging.NOTSET) 67 | _default_handler = None 68 | 69 | 70 | def get_logger(name: str) -> logging.Logger: 71 | """Return a logger with the specified name. 72 | 73 | This function is not supposed to be directly accessed by library users. 74 | """ 75 | 76 | _configure_library_root_logger() 77 | return logging.getLogger(name) 78 | 79 | 80 | def get_verbosity() -> int: 81 | """Return the current level for the Optuna's root logger. 82 | 83 | Returns: 84 | Logging level, e.g., ``optuna.logging.DEBUG`` and ``optuna.logging.INFO``. 85 | 86 | .. note:: 87 | Optuna has following logging levels: 88 | 89 | - ``optuna.logging.CRITICAL``, ``optuna.logging.FATAL`` 90 | - ``optuna.logging.ERROR`` 91 | - ``optuna.logging.WARNING``, ``optuna.logging.WARN`` 92 | - ``optuna.logging.INFO`` 93 | - ``optuna.logging.DEBUG`` 94 | """ 95 | 96 | _configure_library_root_logger() 97 | return _get_library_root_logger().getEffectiveLevel() 98 | 99 | 100 | def set_verbosity(verbosity: int) -> None: 101 | """Set the level for the Optuna's root logger. 102 | 103 | Args: 104 | verbosity: 105 | Logging level, e.g., ``optuna.logging.DEBUG`` and ``optuna.logging.INFO``. 106 | """ 107 | 108 | _configure_library_root_logger() 109 | _get_library_root_logger().setLevel(verbosity) 110 | 111 | 112 | def disable_default_handler() -> None: 113 | """Disable the default handler of the Optuna's root logger. 114 | 115 | Example: 116 | 117 | Stop and then resume logging to :obj:`sys.stderr`. 118 | 119 | .. testsetup:: 120 | 121 | def objective(trial): 122 | x = trial.suggest_uniform('x', -100, 100) 123 | y = trial.suggest_categorical('y', [-1, 0, 1]) 124 | return x ** 2 + y 125 | 126 | .. testcode:: 127 | 128 | import optuna 129 | 130 | study = optuna.create_study() 131 | 132 | # There are no logs in sys.stderr. 133 | optuna.logging.disable_default_handler() 134 | study.optimize(objective, n_trials=10) 135 | 136 | # There are logs in sys.stderr. 137 | optuna.logging.enable_default_handler() 138 | study.optimize(objective, n_trials=10) 139 | # [I 2020-02-23 17:00:54,314] Finished trial#10 with value: ... 140 | # [I 2020-02-23 17:00:54,356] Finished trial#11 with value: ... 141 | # ... 142 | 143 | """ 144 | 145 | _configure_library_root_logger() 146 | 147 | assert _default_handler is not None 148 | _get_library_root_logger().removeHandler(_default_handler) 149 | 150 | 151 | def enable_default_handler() -> None: 152 | """Enable the default handler of the Optuna's root logger. 153 | 154 | Please refer to the example shown in :func:`~optuna.logging.disable_default_handler()`. 155 | """ 156 | 157 | _configure_library_root_logger() 158 | 159 | assert _default_handler is not None 160 | _get_library_root_logger().addHandler(_default_handler) 161 | 162 | 163 | def disable_propagation() -> None: 164 | """Disable propagation of the library log outputs. 165 | 166 | Note that log propagation is disabled by default. 167 | """ 168 | 169 | _configure_library_root_logger() 170 | _get_library_root_logger().propagate = False 171 | 172 | 173 | def enable_propagation() -> None: 174 | """Enable propagation of the library log outputs. 175 | 176 | Please disable the Optuna's default handler to prevent double logging if the root logger has 177 | been configured. 178 | 179 | Example: 180 | 181 | Propagate all log output to the root logger in order to save them to the file. 182 | 183 | .. testsetup:: 184 | 185 | def objective(trial): 186 | x = trial.suggest_uniform('x', -100, 100) 187 | y = trial.suggest_categorical('y', [-1, 0, 1]) 188 | return x ** 2 + y 189 | 190 | .. testcode:: 191 | 192 | import optuna 193 | import logging 194 | 195 | logger = logging.getLogger() 196 | 197 | logger.setLevel(logging.INFO) # Setup the root logger. 198 | logger.addHandler(logging.FileHandler("foo.log", mode="w")) 199 | 200 | optuna.logging.enable_propagation() # Propagate logs to the root logger. 201 | optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr. 202 | 203 | study = optuna.create_study() 204 | 205 | logger.info("Start optimization.") 206 | study.optimize(objective, n_trials=10) 207 | 208 | with open('foo.log') as f: 209 | assert f.readline() == "Start optimization.\\n" 210 | assert f.readline().startswith("Finished trial#0 with value:") 211 | 212 | """ 213 | 214 | _configure_library_root_logger() 215 | _get_library_root_logger().propagate = True 216 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/trial/_frozen.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import warnings 3 | 4 | # from optuna import distributions 5 | # from optuna import logging 6 | # from optuna.trial._state import TrialState 7 | from .. import distributions 8 | from .. import logging 9 | from ._state import TrialState 10 | 11 | _logger = logging.get_logger(__name__) 12 | 13 | 14 | class FrozenTrial(object): 15 | """Status and results of a :class:`~optuna.trial.Trial`. 16 | 17 | Attributes: 18 | number: 19 | Unique and consecutive number of :class:`~optuna.trial.Trial` for each 20 | :class:`~optuna.study.Study`. Note that this field uses zero-based numbering. 21 | state: 22 | :class:`TrialState` of the :class:`~optuna.trial.Trial`. 23 | value: 24 | Objective value of the :class:`~optuna.trial.Trial`. 25 | datetime_start: 26 | Datetime where the :class:`~optuna.trial.Trial` started. 27 | datetime_complete: 28 | Datetime where the :class:`~optuna.trial.Trial` finished. 29 | params: 30 | Dictionary that contains suggested parameters. 31 | user_attrs: 32 | Dictionary that contains the attributes of the :class:`~optuna.trial.Trial` set with 33 | :func:`optuna.trial.Trial.set_user_attr`. 34 | intermediate_values: 35 | Intermediate objective values set with :func:`optuna.trial.Trial.report`. 36 | """ 37 | 38 | def __init__( 39 | self, 40 | number, # type: int 41 | state, # type: TrialState 42 | value, # type: Optional[float] 43 | datetime_start, # type: Optional[datetime.datetime] 44 | datetime_complete, # type: Optional[datetime.datetime] 45 | params, # type: Dict[str, Any] 46 | distributions, # type: Dict[str, BaseDistribution] 47 | user_attrs, # type: Dict[str, Any] 48 | system_attrs, # type: Dict[str, Any] 49 | intermediate_values, # type: Dict[int, float] 50 | trial_id, # type: int 51 | ): 52 | # type: (...) -> None 53 | 54 | self.number = number 55 | self.state = state 56 | self.value = value 57 | self.datetime_start = datetime_start 58 | self.datetime_complete = datetime_complete 59 | self.params = params 60 | self.user_attrs = user_attrs 61 | self.system_attrs = system_attrs 62 | self.intermediate_values = intermediate_values 63 | self._distributions = distributions 64 | self._trial_id = trial_id 65 | 66 | # Ordered list of fields required for `__repr__`, `__hash__` and dataframe creation. 67 | # TODO(hvy): Remove this list in Python 3.6 as the order of `self.__dict__` is preserved. 68 | _ordered_fields = [ 69 | "number", 70 | "value", 71 | "datetime_start", 72 | "datetime_complete", 73 | "params", 74 | "_distributions", 75 | "user_attrs", 76 | "system_attrs", 77 | "intermediate_values", 78 | "_trial_id", 79 | "state", 80 | ] 81 | 82 | def __eq__(self, other): 83 | # type: (Any) -> bool 84 | 85 | if not isinstance(other, FrozenTrial): 86 | return NotImplemented 87 | return other.__dict__ == self.__dict__ 88 | 89 | def __lt__(self, other): 90 | # type: (Any) -> bool 91 | 92 | if not isinstance(other, FrozenTrial): 93 | return NotImplemented 94 | 95 | return self.number < other.number 96 | 97 | def __le__(self, other): 98 | # type: (Any) -> bool 99 | 100 | if not isinstance(other, FrozenTrial): 101 | return NotImplemented 102 | 103 | return self.number <= other.number 104 | 105 | def __hash__(self): 106 | # type: () -> int 107 | 108 | return hash(tuple(getattr(self, field) for field in self._ordered_fields)) 109 | 110 | def __repr__(self): 111 | # type: () -> str 112 | 113 | return "{cls}({kwargs})".format( 114 | cls=self.__class__.__name__, 115 | kwargs=", ".join( 116 | "{field}={value}".format( 117 | field=field if not field.startswith("_") else field[1:], 118 | value=repr(getattr(self, field)), 119 | ) 120 | for field in self._ordered_fields 121 | ), 122 | ) 123 | 124 | def _validate(self): 125 | # type: () -> None 126 | 127 | if self.datetime_start is None: 128 | raise ValueError("`datetime_start` is supposed to be set.") 129 | 130 | if self.state.is_finished(): 131 | if self.datetime_complete is None: 132 | raise ValueError("`datetime_complete` is supposed to be set for a finished trial.") 133 | else: 134 | if self.datetime_complete is not None: 135 | raise ValueError( 136 | "`datetime_complete` is supposed to be None for an unfinished trial." 137 | ) 138 | 139 | if self.state == TrialState.COMPLETE and self.value is None: 140 | raise ValueError("`value` is supposed to be set for a complete trial.") 141 | 142 | if set(self.params.keys()) != set(self.distributions.keys()): 143 | raise ValueError( 144 | "Inconsistent parameters {} and distributions {}.".format( 145 | set(self.params.keys()), set(self.distributions.keys()) 146 | ) 147 | ) 148 | 149 | for param_name, param_value in self.params.items(): 150 | distribution = self.distributions[param_name] 151 | 152 | param_value_in_internal_repr = distribution.to_internal_repr(param_value) 153 | if not distribution._contains(param_value_in_internal_repr): 154 | raise ValueError( 155 | "The value {} of parameter '{}' isn't contained in the distribution " 156 | "{}.".format(param_value, param_name, distribution) 157 | ) 158 | 159 | @property 160 | def distributions(self): 161 | # type: () -> Dict[str, BaseDistribution] 162 | """Dictionary that contains the distributions of :attr:`params`.""" 163 | 164 | return self._distributions 165 | 166 | @distributions.setter 167 | def distributions(self, value): 168 | # type: (Dict[str, BaseDistribution]) -> None 169 | self._distributions = value 170 | 171 | @property 172 | def trial_id(self): 173 | # type: () -> int 174 | """Return the trial ID. 175 | 176 | .. deprecated:: 0.19.0 177 | The direct use of this attribute is deprecated and it is recommended that you use 178 | :attr:`~optuna.trial.FrozenTrial.number` instead. 179 | 180 | Returns: 181 | The trial ID. 182 | """ 183 | 184 | warnings.warn( 185 | "The use of `FrozenTrial.trial_id` is deprecated. " 186 | "Please use `FrozenTrial.number` instead.", 187 | DeprecationWarning, 188 | ) 189 | 190 | _logger.warning( 191 | "The use of `FrozenTrial.trial_id` is deprecated. " 192 | "Please use `FrozenTrial.number` instead." 193 | ) 194 | 195 | return self._trial_id 196 | 197 | @property 198 | def last_step(self): 199 | # type: () -> Optional[int] 200 | 201 | if len(self.intermediate_values) == 0: 202 | return None 203 | else: 204 | return max(self.intermediate_values.keys()) 205 | 206 | @property 207 | def duration(self): 208 | # type: () -> Optional[datetime.timedelta] 209 | """Return the elapsed time taken to complete the trial. 210 | 211 | Returns: 212 | The duration. 213 | """ 214 | 215 | if self.datetime_start and self.datetime_complete: 216 | return self.datetime_complete - self.datetime_start 217 | else: 218 | return None 219 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/structs.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from optuna import _study_direction 4 | from optuna import exceptions 5 | from optuna import logging 6 | from optuna import trial 7 | 8 | 9 | _logger = logging.get_logger(__name__) 10 | 11 | _message = ( 12 | "`structs` is deprecated. Classes have moved to the following modules. " 13 | "`structs.StudyDirection`->`study.StudyDirection`, " 14 | "`structs.StudySummary`->`study.StudySummary`, " 15 | "`structs.FrozenTrial`->`trial.FrozenTrial`, " 16 | "`structs.TrialState`->`trial.TrialState`, " 17 | "`structs.TrialPruned`->`exceptions.TrialPruned`." 18 | ) 19 | warnings.warn(_message, DeprecationWarning) 20 | _logger.warning(_message) 21 | 22 | # The use of the structs.StudyDirection is deprecated and it is recommended that you use 23 | # study.StudyDirection instead. See the API reference for more details. 24 | StudyDirection = _study_direction.StudyDirection 25 | 26 | # The use of the structs.TrialState is deprecated and it is recommended that you use 27 | # trial.TrialState instead. See the API reference for more details. 28 | TrialState = trial.TrialState 29 | 30 | 31 | class FrozenTrial(object): 32 | """Status and results of a :class:`~optuna.trial.Trial`. 33 | 34 | .. deprecated:: 1.4.0 35 | 36 | This class was moved to :mod:`~optuna.trial`. Please use 37 | :class:`~optuna.trial.FrozenTrial` instead. 38 | 39 | Attributes: 40 | number: 41 | Unique and consecutive number of :class:`~optuna.trial.Trial` for each 42 | :class:`~optuna.study.Study`. Note that this field uses zero-based numbering. 43 | state: 44 | :class:`TrialState` of the :class:`~optuna.trial.Trial`. 45 | value: 46 | Objective value of the :class:`~optuna.trial.Trial`. 47 | datetime_start: 48 | Datetime where the :class:`~optuna.trial.Trial` started. 49 | datetime_complete: 50 | Datetime where the :class:`~optuna.trial.Trial` finished. 51 | params: 52 | Dictionary that contains suggested parameters. 53 | user_attrs: 54 | Dictionary that contains the attributes of the :class:`~optuna.trial.Trial` set with 55 | :func:`optuna.trial.Trial.set_user_attr`. 56 | intermediate_values: 57 | Intermediate objective values set with :func:`optuna.trial.Trial.report`. 58 | """ 59 | 60 | def __init__( 61 | self, 62 | number, # type: int 63 | state, # type: TrialState 64 | value, # type: Optional[float] 65 | datetime_start, # type: Optional[datetime] 66 | datetime_complete, # type: Optional[datetime] 67 | params, # type: Dict[str, Any] 68 | distributions, # type: Dict[str, BaseDistribution] 69 | user_attrs, # type: Dict[str, Any] 70 | system_attrs, # type: Dict[str, Any] 71 | intermediate_values, # type: Dict[int, float] 72 | trial_id, # type: int 73 | ): 74 | # type: (...) -> None 75 | 76 | message = ( 77 | "The use of `structs.FrozenTrial` is deprecated. " 78 | "Please use `trial.FrozenTrial` instead." 79 | ) 80 | warnings.warn(message, DeprecationWarning) 81 | _logger.warning(message) 82 | 83 | self.number = number 84 | self.state = state 85 | self.value = value 86 | self.datetime_start = datetime_start 87 | self.datetime_complete = datetime_complete 88 | self.params = params 89 | self.user_attrs = user_attrs 90 | self.system_attrs = system_attrs 91 | self.intermediate_values = intermediate_values 92 | self._distributions = distributions 93 | self._trial_id = trial_id 94 | 95 | # Ordered list of fields required for `__repr__`, `__hash__` and dataframe creation. 96 | # TODO(hvy): Remove this list in Python 3.6 as the order of `self.__dict__` is preserved. 97 | _ordered_fields = [ 98 | "number", 99 | "value", 100 | "datetime_start", 101 | "datetime_complete", 102 | "params", 103 | "_distributions", 104 | "user_attrs", 105 | "system_attrs", 106 | "intermediate_values", 107 | "_trial_id", 108 | "state", 109 | ] 110 | 111 | def __eq__(self, other): 112 | # type: (Any) -> bool 113 | 114 | if not isinstance(other, FrozenTrial): 115 | return NotImplemented 116 | return other.__dict__ == self.__dict__ 117 | 118 | def __lt__(self, other): 119 | # type: (Any) -> bool 120 | 121 | if not isinstance(other, FrozenTrial): 122 | return NotImplemented 123 | 124 | return self.number < other.number 125 | 126 | def __le__(self, other): 127 | # type: (Any) -> bool 128 | 129 | if not isinstance(other, FrozenTrial): 130 | return NotImplemented 131 | 132 | return self.number <= other.number 133 | 134 | def __hash__(self): 135 | # type: () -> int 136 | 137 | return hash(tuple(getattr(self, field) for field in self._ordered_fields)) 138 | 139 | def __repr__(self): 140 | # type: () -> str 141 | 142 | return "{cls}({kwargs})".format( 143 | cls=self.__class__.__name__, 144 | kwargs=", ".join( 145 | "{field}={value}".format( 146 | field=field if not field.startswith("_") else field[1:], 147 | value=repr(getattr(self, field)), 148 | ) 149 | for field in self._ordered_fields 150 | ), 151 | ) 152 | 153 | def _validate(self): 154 | # type: () -> None 155 | 156 | if self.datetime_start is None: 157 | raise ValueError("`datetime_start` is supposed to be set.") 158 | 159 | if self.state.is_finished(): 160 | if self.datetime_complete is None: 161 | raise ValueError("`datetime_complete` is supposed to be set for a finished trial.") 162 | else: 163 | if self.datetime_complete is not None: 164 | raise ValueError( 165 | "`datetime_complete` is supposed to be None for an unfinished trial." 166 | ) 167 | 168 | if self.state == TrialState.COMPLETE and self.value is None: 169 | raise ValueError("`value` is supposed to be set for a complete trial.") 170 | 171 | if set(self.params.keys()) != set(self.distributions.keys()): 172 | raise ValueError( 173 | "Inconsistent parameters {} and distributions {}.".format( 174 | set(self.params.keys()), set(self.distributions.keys()) 175 | ) 176 | ) 177 | 178 | for param_name, param_value in self.params.items(): 179 | distribution = self.distributions[param_name] 180 | 181 | param_value_in_internal_repr = distribution.to_internal_repr(param_value) 182 | if not distribution._contains(param_value_in_internal_repr): 183 | raise ValueError( 184 | "The value {} of parameter '{}' isn't contained in the distribution " 185 | "{}.".format(param_value, param_name, distribution) 186 | ) 187 | 188 | @property 189 | def distributions(self): 190 | # type: () -> Dict[str, BaseDistribution] 191 | """Dictionary that contains the distributions of :attr:`params`.""" 192 | 193 | return self._distributions 194 | 195 | @distributions.setter 196 | def distributions(self, value): 197 | # type: (Dict[str, BaseDistribution]) -> None 198 | self._distributions = value 199 | 200 | @property 201 | def last_step(self): 202 | # type: () -> Optional[int] 203 | 204 | if len(self.intermediate_values) == 0: 205 | return None 206 | else: 207 | return max(self.intermediate_values.keys()) 208 | 209 | @property 210 | def duration(self): 211 | # type: () -> Optional[timedelta] 212 | """Return the elapsed time taken to complete the trial. 213 | 214 | Returns: 215 | The duration. 216 | """ 217 | 218 | if self.datetime_start and self.datetime_complete: 219 | return self.datetime_complete - self.datetime_start 220 | else: 221 | return None 222 | 223 | 224 | class TrialPruned(exceptions.TrialPruned): 225 | """Exception for pruned trials. 226 | 227 | .. deprecated:: 0.19.0 228 | 229 | This class was moved to :mod:`~optuna.exceptions`. Please use 230 | :class:`~optuna.exceptions.TrialPruned` instead. 231 | """ 232 | 233 | def __init__(self, *args, **kwargs): 234 | # type: (Any, Any) -> None 235 | 236 | message = ( 237 | "The use of `optuna.structs.TrialPruned` is deprecated. " 238 | "Please use `optuna.exceptions.TrialPruned` instead." 239 | ) 240 | warnings.warn(message, DeprecationWarning) 241 | _logger.warning(message) 242 | -------------------------------------------------------------------------------- /boexplain/files/search.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from pandas.api.types import is_numeric_dtype 4 | 5 | from .cat_xform import individual_contribution 6 | from .tpe_wrapper import TpeBo 7 | from .stats import Experiment, Stats 8 | 9 | CAT_ALG_MAP = { 10 | "individual_contribution": "individual_contribution_warm_start_top1", 11 | "categorical": "categorical", 12 | "categorical_warm_start": "categorical_warm_start", 13 | } 14 | 15 | 16 | def fmin( 17 | data, 18 | f, 19 | num_cols=[], 20 | cat_cols=[], 21 | columns=[], 22 | cat_alg=["individual_contribution"], 23 | n_trials=2000, 24 | runtime=10000, 25 | runs=1, 26 | k=5, 27 | random=False, 28 | correct_pred=None, 29 | increment=5, 30 | name="experiment_name", 31 | file=None, 32 | return_viz=False, 33 | use_seeds_from_paper=False, 34 | **kwargs, 35 | ): 36 | """ 37 | Use BOExplain to minimize the objective function. 38 | 39 | Parameters 40 | ---------- 41 | 42 | data 43 | pandas DataFrame of source, training, or inference data 44 | from which to derive an explanation. 45 | f 46 | Objective function to be minimized. 47 | num_cols 48 | Numerical columns over which to derive an explanation. 49 | cat_cols 50 | Categorical columns over which to derive an explanation. 51 | columns 52 | Columns over which to derive an explanation. 53 | cat_alg 54 | Algorithms to handle categorical parameters. Can be 55 | * 'individual_contribution' 56 | * 'categorical' 57 | * 'categorical_warm_start' 58 | See the paper for details. 59 | n_trials 60 | Maximum number of trials to perform during a run. 61 | runtime 62 | Maximum allowed time for a run in seconds. 63 | runs 64 | Number of runs to perform. 65 | k 66 | Number of TPE candidates to consider. (deprecated) 67 | random 68 | If True, perform a run using random search to 69 | find the constraint parameters. 70 | correct_pred 71 | If provided, will compute f-score, precision, recall, 72 | and jaccard similarity of the found predicates and 73 | the correct predicate 74 | increment 75 | How frequently (in seconds) to log results when finding the best 76 | result in each increment. 77 | name 78 | The name of an experiment. 79 | file 80 | File name to output statistics from the run. 81 | return_viz 82 | If True, return an Altair visualization of the objective function 83 | with iteration on the x-axis. 84 | use_seeds_from_paper 85 | If True, use the seeds that were used in the paper. For reproducibility. 86 | 87 | Returns 88 | ------- 89 | 90 | The input DataFrame filtered to contain all tuples that do not 91 | satisfy the explanation 92 | """ 93 | 94 | return _drop_tuples_satisfying_optimal_predicate( 95 | data, 96 | f, 97 | num_cols, 98 | cat_cols, 99 | columns, 100 | cat_alg, 101 | n_trials, 102 | runtime, 103 | runs, 104 | k, 105 | random, 106 | correct_pred, 107 | increment, 108 | name, 109 | file, 110 | return_viz, 111 | use_seeds_from_paper, 112 | direction="minimize", 113 | **kwargs, 114 | ) 115 | 116 | 117 | def fmax( 118 | data, 119 | f, 120 | num_cols=[], 121 | cat_cols=[], 122 | columns=[], 123 | cat_alg=["individual_contribution"], 124 | n_trials=2000, 125 | runtime=10000, 126 | runs=1, 127 | k=5, 128 | random=False, 129 | correct_pred=None, 130 | increment=5, 131 | name="experiment_name", 132 | file=None, 133 | return_viz=False, 134 | use_seeds_from_paper=False, 135 | **kwargs, 136 | ): 137 | """ 138 | Use BOExplain to maximize the objective function. 139 | 140 | Parameters 141 | ---------- 142 | 143 | data 144 | pandas DataFrame of source, training, or inference data 145 | from which to derive an explanation. 146 | f 147 | Objective function to be minimized. 148 | num_cols 149 | Numerical columns over which to derive an explanation. 150 | cat_cols 151 | Categorical columns over which to derive an explanation. 152 | columns 153 | Columns over which to derive an explanation. 154 | cat_alg 155 | Algorithms to handle categorical parameters. Can be 156 | * 'individual_contribution' 157 | * 'categorical' 158 | * 'categorical_warm_start' 159 | See the paper for details. 160 | n_trials 161 | Maximum number of trials to perform during a run. 162 | runtime 163 | Maximum allowed time for a run in seconds. 164 | runs 165 | Number of runs to perform. 166 | k 167 | Number of TPE candidates to consider. (deprecated) 168 | random 169 | If True, perform a run using random search to 170 | find the constraint parameters. 171 | correct_pred 172 | If provided, will compute f-score, precision, recall, 173 | and jaccard similarity of the found predicates and 174 | the correct predicate 175 | increment 176 | How frequently (in seconds) to log results when finding the best 177 | result in each increment. 178 | name 179 | The name of an experiment. 180 | file 181 | File name to output statistics from the run. 182 | return_viz 183 | If True, return an Altair visualization of the objective function 184 | with iteration on the x-axis. 185 | use_seeds_from_paper 186 | If True, use the seeds that were used in the paper. For reproducibility. 187 | 188 | Returns 189 | ------- 190 | 191 | The input DataFrame filtered to contain all tuples that do not 192 | satisfy the explanation 193 | """ 194 | return _drop_tuples_satisfying_optimal_predicate( 195 | data, 196 | f, 197 | num_cols, 198 | cat_cols, 199 | columns, 200 | cat_alg, 201 | n_trials, 202 | runtime, 203 | runs, 204 | k, 205 | random, 206 | correct_pred, 207 | increment, 208 | name, 209 | file, 210 | return_viz, 211 | use_seeds_from_paper, 212 | direction="maximize", 213 | **kwargs, 214 | ) 215 | 216 | 217 | def _drop_tuples_satisfying_optimal_predicate( 218 | data, 219 | f, 220 | num_cols=[], 221 | cat_cols=[], 222 | columns=[], 223 | cat_alg=["individual_contribution"], 224 | n_trials=2000, 225 | runtime=10000, 226 | runs=1, 227 | k=5, 228 | random=False, 229 | correct_pred=None, 230 | increment=5, 231 | name="experiment_name", 232 | file=None, 233 | return_viz=False, 234 | use_seeds_from_paper=False, 235 | direction="minimize", 236 | **kwargs, 237 | ): 238 | assert direction == "minimize" or direction == "maximize" 239 | 240 | for col in columns: 241 | if is_numeric_dtype(data[col]): 242 | num_cols.append(col) 243 | else: 244 | cat_cols.append(col) 245 | 246 | # cast categorical columns as string type 247 | if cat_cols: 248 | data[cat_cols] = data[cat_cols].astype(str) 249 | 250 | # get the nuber of unique values in each column 251 | num_cols_range = [(data[col].min(), data[col].max()) for col in num_cols] 252 | cat_cols_n_uniq = [data[col].nunique() for col in cat_cols] 253 | 254 | # dataset length 255 | dataset_length = len(data) 256 | 257 | experiment = Experiment( 258 | num_cols, 259 | cat_cols, 260 | direction, 261 | n_trials, 262 | runs, 263 | correct_pred, 264 | name, 265 | file, 266 | num_cols_range, 267 | cat_cols_n_uniq, 268 | dataset_length, 269 | runtime, 270 | increment, 271 | use_seeds_from_paper, 272 | ) 273 | 274 | cat_alg = [CAT_ALG_MAP[alg] for alg in cat_alg] 275 | 276 | for alg in cat_alg: 277 | stats = Stats(experiment, alg) 278 | cat_val_to_indiv_cont = {} 279 | if cat_cols and alg in { 280 | "individual_contribution_warm_start_topk", 281 | "categorical_warm_start", 282 | "individual_contribution_warm_start_top1", 283 | }: 284 | start = time.time() 285 | # encode categorical columns as numerical and record their encoding maps 286 | cat_val_to_indiv_cont = individual_contribution( 287 | data, 288 | objective=f, 289 | cat_cols=cat_cols, 290 | **kwargs, 291 | ) 292 | run_encoding_time = time.time() - start 293 | # print(alg, run_encoding_time) 294 | stats.set_run_encoding_time(run_encoding_time) 295 | 296 | # initialize a TpeBo object 297 | tpebo = TpeBo( 298 | df=data, 299 | objective=f, 300 | num_cols=num_cols, 301 | cat_cols=cat_cols, 302 | direction=direction, 303 | k=k, 304 | cat_alg=alg, 305 | cat_val_to_indiv_cont=cat_val_to_indiv_cont, 306 | correct_pred=correct_pred, 307 | ) 308 | # run the bayesian optimization 309 | df_rem = tpebo.run(stats, **kwargs) 310 | experiment.set_experiment(stats) 311 | 312 | if random: 313 | stats = Stats(experiment, None) 314 | tpebo = TpeBo( 315 | df=data, 316 | objective=f, 317 | num_cols=num_cols, 318 | cat_cols=cat_cols, 319 | direction=direction, 320 | k=k, 321 | cat_alg="random", 322 | cat_val_to_indiv_cont={}, 323 | correct_pred=correct_pred, 324 | ) 325 | df_rem = tpebo.random(stats, **kwargs) 326 | experiment.set_experiment(stats) 327 | 328 | viz = experiment.visualize_results() 329 | 330 | if file is not None: 331 | experiment.output_file() 332 | 333 | if return_viz: 334 | viz = experiment.visualize_results() 335 | return df_rem, viz 336 | 337 | return df_rem 338 | -------------------------------------------------------------------------------- /boexplain/files/stats.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import re 3 | import random 4 | import numpy as np 5 | import pandas as pd 6 | import altair as alt 7 | 8 | alt.data_transformers.disable_max_rows() 9 | from json import dumps 10 | from numpyencoder import NumpyEncoder 11 | 12 | 13 | class Experiment: 14 | 15 | experiments = dict() 16 | n_exp = 0 17 | 18 | def __init__( 19 | self, 20 | num_cols, 21 | cat_cols, 22 | direction, 23 | n_trials, 24 | runs, 25 | correct_pred, 26 | name, 27 | file, 28 | num_cols_range, 29 | cat_cols_n_uniq, 30 | dataset_length, 31 | runtime, 32 | increment, 33 | use_seeds_from_paper, 34 | ): 35 | 36 | self.num_cols = num_cols 37 | self.cat_cols = cat_cols 38 | self.direction = direction 39 | self.dir_enc = 1 if direction == "minimize" else -1 40 | self.n_trials = n_trials 41 | self.runs = runs 42 | self.correct_pred = correct_pred 43 | self.name = name 44 | self.file = file 45 | self.num_cols_range = num_cols_range 46 | self.cat_cols_n_uniq = cat_cols_n_uniq 47 | self.dataset_length = dataset_length 48 | self.runtime = runtime 49 | self.increment = increment 50 | 51 | if use_seeds_from_paper: 52 | self.seeds = [ 53 | 529840, 54 | 664234, 55 | 978546, 56 | 283991, 57 | 819362, 58 | 348229, 59 | 536289, 60 | 480291, 61 | 500927, 62 | 386602, 63 | ] 64 | else: 65 | self.seeds = random.sample(range(1000000), runs) 66 | 67 | def set_experiment(self, results) -> None: 68 | 69 | self.experiments[self.n_exp] = results.__dict__.copy() 70 | self.n_exp += 1 71 | 72 | def output_file(self): 73 | 74 | fo = open(self.file, "w") 75 | 76 | for v in self.experiments.values(): 77 | fo.write(f"{dumps(v, cls=NumpyEncoder)}\n") 78 | 79 | fo.close() 80 | 81 | def visualize_results(self): 82 | 83 | df = pd.DataFrame({}, columns=["Algorithm", "Iteration", "Value"]) 84 | for i in range(len(self.experiments)): 85 | df_new = pd.DataFrame.from_dict( 86 | { 87 | "Algorithm": self.experiments[i]["cat_enc"], 88 | "Iteration": list(range(self.experiments[i]["n_trials"])), 89 | "Value": self.experiments[i]["opt_res"], 90 | }, 91 | orient="index", 92 | ).T 93 | df = df.append(df_new) 94 | df = df.explode("Value") 95 | df = df.set_index(["Algorithm"]).apply(pd.Series.explode).reset_index() 96 | 97 | num_cols = f"{len(self.experiments[0]['num_cols'])} numerical columns: " 98 | for i, col in enumerate(self.experiments[0]["num_cols"]): 99 | num_cols += f"{col} (range {self.experiments[0]['num_cols_range'][i][0]} to {self.experiments[0]['num_cols_range'][i][1]}), " 100 | cat_cols = f"{len(self.experiments[0]['cat_cols'])} categorical columns: " 101 | for i, col in enumerate(self.experiments[0]["cat_cols"]): 102 | cat_cols += f"{col} ({self.experiments[0]['cat_cols_n_uniq'][i]} unique values), " 103 | 104 | out_str = f"Experiment: {self.experiments[0]['name']}. Completed {self.experiments[0]['n_trials']} iterations for {self.experiments[0]['runs']} runs. Search space includes " 105 | 106 | if len(self.experiments[0]["num_cols"]) > 0: 107 | out_str += num_cols 108 | if len(self.experiments[0]["cat_cols"]) > 0: 109 | out_str += "and " 110 | 111 | if len(self.experiments[0]["cat_cols"]) > 0: 112 | out_str += cat_cols 113 | 114 | out_str = f"{out_str[:-2]}." 115 | 116 | out_lst = [line.strip() for line in re.findall(r".{1,80}(?:\s+|$)", out_str)] 117 | 118 | line = ( 119 | alt.Chart(df) 120 | .mark_line() 121 | .encode( 122 | x="Iteration", 123 | y=alt.Y("mean(Value)", scale=alt.Scale(zero=False)), 124 | color="Algorithm", 125 | ) 126 | .properties(title=out_lst) # {"text": out_lst, "subtitle": ""} 127 | ) 128 | band = ( 129 | alt.Chart(df) 130 | .mark_errorband(extent="stdev") 131 | .encode( 132 | x="Iteration", 133 | y=alt.Y("Value", title="Mean Objective Function Value"), 134 | color="Algorithm", 135 | ) 136 | ) 137 | chart = band + line 138 | chart = chart.configure_title( 139 | anchor="start", 140 | ) 141 | return chart 142 | 143 | 144 | class Stats(Experiment): 145 | def __init__(self, experiment, cat_enc) -> None: 146 | self.__dict__ = experiment.__dict__ 147 | self.cat_enc = cat_enc 148 | 149 | self.run_times = np.zeros(self.runs) 150 | self.n_duplicates = np.zeros(self.runs) 151 | self.n_zero_tup_preds = np.zeros(self.runs) 152 | self.preds = dict() 153 | self.opt_res = np.full((self.runs, self.n_trials), self.dir_enc * 1e9) 154 | self.run_time_of_opt_res = np.zeros((self.runs, self.n_trials)) 155 | self.iter_completed = np.zeros(self.runs) 156 | self.min_iter_completed = self.n_trials 157 | self.n_tuples_removed_from_data = np.zeros(self.runs) 158 | self.best_obj_values = np.full(self.runs, self.dir_enc * 1e9) 159 | self.add_on = np.zeros(self.runs) 160 | 161 | if self.correct_pred: 162 | self.precision = np.zeros((self.runs, self.n_trials)) 163 | self.recall = np.zeros((self.runs, self.n_trials)) 164 | self.f_score = np.zeros((self.runs, self.n_trials)) 165 | self.jaccard = np.zeros((self.runs, self.n_trials)) 166 | 167 | self.final_precision = np.zeros(self.runs) 168 | self.final_recall = np.zeros(self.runs) 169 | self.final_f_score = np.zeros(self.runs) 170 | self.final_jaccard = np.zeros(self.runs) 171 | 172 | self.encoding_time = 0 173 | self.example_best_predicate = None 174 | 175 | self.time_array = np.zeros((self.runs, self.runtime // self.increment)) 176 | self.precision_time_array = np.zeros((self.runs, self.runtime // self.increment)) 177 | self.recall_time_array = np.zeros((self.runs, self.runtime // self.increment)) 178 | self.f_score_time_array = np.zeros((self.runs, self.runtime // self.increment)) 179 | self.jaccard_time_array = np.zeros((self.runs, self.runtime // self.increment)) 180 | 181 | def get_run_opt_res_array(self) -> np.ndarray: 182 | return np.full(self.n_trials, self.dir_enc * 1e9) 183 | 184 | def get_run_time_array(self) -> np.ndarray: 185 | return np.zeros(self.runtime // self.increment) 186 | 187 | def get_run_time_of_opt_res_array(self) -> np.ndarray: 188 | return np.zeros(self.n_trials) 189 | 190 | def set_run_encoding_time(self, run_encoding_time): 191 | 192 | self.encoding_time = run_encoding_time 193 | 194 | def set_run_opt_res(self, run_opt_res: np.ndarray, run: int) -> None: 195 | 196 | self.opt_res[run] = run_opt_res 197 | 198 | def set_run_time_array(self, run_time_array: np.ndarray, run: int) -> None: 199 | 200 | self.time_array[run] = run_time_array 201 | 202 | def set_precision_time_array(self, precision_time_array: np.ndarray, run: int) -> None: 203 | 204 | self.precision_time_array[run] = precision_time_array 205 | 206 | def set_recall_time_array(self, recall_time_array: np.ndarray, run: int) -> None: 207 | 208 | self.recall_time_array[run] = recall_time_array 209 | 210 | def set_f_score_time_array(self, f_score_time_array: np.ndarray, run: int) -> None: 211 | 212 | self.f_score_time_array[run] = f_score_time_array 213 | 214 | def set_jaccard_time_array(self, jaccard_time_array: np.ndarray, run: int) -> None: 215 | 216 | self.jaccard_time_array[run] = jaccard_time_array 217 | 218 | def set_run_time_of_opt_res(self, run_time_opt_res: np.ndarray, run: int) -> None: 219 | 220 | self.run_time_of_opt_res[run] = run_time_opt_res 221 | 222 | def set_run_time(self, run_time: float, run: int) -> None: 223 | 224 | self.run_times[run] = run_time 225 | 226 | def set_add_on(self, add_on: float, run: int) -> None: 227 | 228 | self.add_on[run] = add_on 229 | 230 | def set_run_n_duplicates(self, run_n_dups: float, run: int) -> None: 231 | 232 | self.n_duplicates[run] = run_n_dups 233 | 234 | def set_run_n_zero_tup_preds(self, run_n_zero_tup_preds: float, run: int) -> None: 235 | 236 | self.n_zero_tup_preds[run] = run_n_zero_tup_preds 237 | 238 | def set_run_preds(self, best_pred: dict[Any], run: int) -> None: 239 | 240 | self.preds[run] = best_pred 241 | 242 | def set_run_iter_completed(self, n_iter: int, run) -> None: 243 | 244 | self.iter_completed[run] = n_iter 245 | 246 | def set_run_best_objective_value(self, obj_value: int, run) -> None: 247 | 248 | self.best_obj_values[run] = obj_value 249 | 250 | def set_example_best_predicate(self, best_pred: dict[Any], run) -> None: 251 | 252 | if self.direction == "minimize": 253 | if self.best_obj_values[run] == self.best_obj_values.min(): 254 | self.example_best_predicate = best_pred 255 | else: 256 | if self.best_obj_values[run] == self.best_obj_values.max(): 257 | self.example_best_predicate = best_pred 258 | 259 | def set_min_iter_completed(self, n_iter: int) -> None: 260 | 261 | if n_iter < self.min_iter_completed: 262 | self.min_iter_completed = n_iter 263 | 264 | def set_run_n_tuples_removed_from_data(self, num_removed: int, run: int): 265 | 266 | self.n_tuples_removed_from_data[run] = num_removed 267 | 268 | def set_final_precision(self, precision: float, run: int) -> None: 269 | 270 | self.final_precision[run] = precision 271 | 272 | def set_final_recall(self, recall: float, run: int) -> None: 273 | 274 | self.final_recall[run] = recall 275 | 276 | def set_final_f_score(self, f_score: float, run: int) -> None: 277 | 278 | self.final_f_score[run] = f_score 279 | 280 | def set_final_jaccard(self, jaccard: float, run: int) -> None: 281 | 282 | self.final_jaccard[run] = jaccard 283 | 284 | def set_precision(self, precision: np.ndarray, run: int) -> None: 285 | 286 | self.precision[run] = precision 287 | 288 | def set_recall(self, recall: np.ndarray, run: int) -> None: 289 | 290 | self.recall[run] = recall 291 | 292 | def set_f_score(self, f_score: np.ndarray, run: int) -> None: 293 | 294 | self.f_score[run] = f_score 295 | 296 | def set_jaccard(self, jaccard: np.ndarray, run: int) -> None: 297 | 298 | self.jaccard[run] = jaccard 299 | 300 | def output_temp_file(self) -> None: 301 | 302 | fo = open("temp.json", "w") 303 | 304 | fo.write(f"{dumps(self.__dict__, cls=NumpyEncoder)}\n") 305 | 306 | fo.close() 307 | 308 | def standard_output(self) -> None: 309 | 310 | print("BEST SCORE", self.best_obj_values) 311 | print("AVERAGE NUMBER OF TUPLES REMOVED", self.n_tuples_removed_from_data.mean()) 312 | print("AVERAGE TIME", self.run_times.mean()) 313 | print("AVERAGE DUPLICATE COUNT", self.n_duplicates.mean()) 314 | print("AVERAGE ZERO TUPLE", self.n_zero_tup_preds.mean(), "\n") 315 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/storages/in_memory.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from datetime import datetime 3 | import threading 4 | import uuid 5 | 6 | # from optuna import distributions # NOQA 7 | # from optuna.exceptions import DuplicatedStudyError 8 | # from optuna.storages import base 9 | # from optuna.storages.base import DEFAULT_STUDY_NAME_PREFIX 10 | # from optuna.study import StudyDirection 11 | # from optuna.trial import FrozenTrial 12 | # from optuna.trial import TrialState 13 | from .. import distributions # NOQA 14 | from ..exceptions import DuplicatedStudyError 15 | from . import base 16 | from .base import DEFAULT_STUDY_NAME_PREFIX 17 | from ..study import StudyDirection 18 | from ..trial import FrozenTrial 19 | from ..trial import TrialState 20 | 21 | 22 | class InMemoryStorage(base.BaseStorage): 23 | """Storage class that stores data in memory of the Python process. 24 | 25 | This class is not supposed to be directly accessed by library users. 26 | """ 27 | 28 | def __init__(self): 29 | # type: () -> None 30 | self._trial_id_to_study_id_and_number = {} # type: Dict[int, Tuple[int, int]] 31 | self._study_name_to_id = {} # type: Dict[str, int] 32 | self._studies = {} # type: Dict[int, _StudyInfo] 33 | 34 | self._max_study_id = -1 35 | self._max_trial_id = -1 36 | 37 | self._lock = threading.RLock() 38 | 39 | def __getstate__(self): 40 | # type: () -> Dict[Any, Any] 41 | state = self.__dict__.copy() 42 | del state["_lock"] 43 | return state 44 | 45 | def __setstate__(self, state): 46 | # type: (Dict[Any, Any]) -> None 47 | self.__dict__.update(state) 48 | self._lock = threading.RLock() 49 | 50 | def create_new_study(self, study_name=None): 51 | # type: (Optional[str]) -> int 52 | 53 | with self._lock: 54 | study_id = self._max_study_id + 1 55 | self._max_study_id += 1 56 | 57 | if study_name is not None: 58 | if study_name in self._study_name_to_id: 59 | raise DuplicatedStudyError 60 | else: 61 | study_uuid = str(uuid.uuid4()) 62 | study_name = DEFAULT_STUDY_NAME_PREFIX + study_uuid 63 | self._studies[study_id] = _StudyInfo(study_name) 64 | self._study_name_to_id[study_name] = study_id 65 | 66 | return study_id 67 | 68 | def delete_study(self, study_id): 69 | # type: (int) -> None 70 | 71 | with self._lock: 72 | self._check_study_id(study_id) 73 | 74 | for trial in self._studies[study_id].trials: 75 | del self._trial_id_to_study_id_and_number[trial._trial_id] 76 | study_name = self._studies[study_id].name 77 | del self._study_name_to_id[study_name] 78 | del self._studies[study_id] 79 | 80 | def set_study_direction(self, study_id, direction): 81 | # type: (int, StudyDirection) -> None 82 | 83 | with self._lock: 84 | self._check_study_id(study_id) 85 | 86 | study = self._studies[study_id] 87 | if study.direction != StudyDirection.NOT_SET and study.direction != direction: 88 | raise ValueError( 89 | "Cannot overwrite study direction from {} to {}.".format( 90 | study.direction, direction 91 | ) 92 | ) 93 | study.direction = direction 94 | 95 | def set_study_user_attr(self, study_id, key, value): 96 | # type: (int, str, Any) -> None 97 | 98 | with self._lock: 99 | self._check_study_id(study_id) 100 | 101 | self._studies[study_id].user_attrs[key] = value 102 | 103 | def set_study_system_attr(self, study_id, key, value): 104 | # type: (int, str, Any) -> None 105 | 106 | with self._lock: 107 | self._check_study_id(study_id) 108 | 109 | self._studies[study_id].system_attrs[key] = value 110 | 111 | def get_study_id_from_name(self, study_name): 112 | # type: (str) -> int 113 | with self._lock: 114 | if study_name not in self._study_name_to_id: 115 | raise KeyError("No such study {}.".format(study_name)) 116 | 117 | return self._study_name_to_id[study_name] 118 | 119 | def get_study_id_from_trial_id(self, trial_id): 120 | # type: (int) -> int 121 | 122 | with self._lock: 123 | self._check_trial_id(trial_id) 124 | 125 | return self._trial_id_to_study_id_and_number[trial_id][0] 126 | 127 | def get_study_name_from_id(self, study_id): 128 | # type: (int) -> str 129 | 130 | with self._lock: 131 | self._check_study_id(study_id) 132 | return self._studies[study_id].name 133 | 134 | def get_study_direction(self, study_id): 135 | # type: (int) -> StudyDirection 136 | 137 | with self._lock: 138 | self._check_study_id(study_id) 139 | return self._studies[study_id].direction 140 | 141 | def get_study_user_attrs(self, study_id): 142 | # type: (int) -> Dict[str, Any] 143 | 144 | with self._lock: 145 | self._check_study_id(study_id) 146 | return copy.deepcopy(self._studies[study_id].user_attrs) 147 | 148 | def get_study_system_attrs(self, study_id): 149 | # type: (int) -> Dict[str, Any] 150 | 151 | with self._lock: 152 | self._check_study_id(study_id) 153 | return copy.deepcopy(self._studies[study_id].system_attrs) 154 | 155 | def create_new_trial(self, study_id, template_trial=None): 156 | # type: (int, Optional[FrozenTrial]) -> int 157 | 158 | with self._lock: 159 | self._check_study_id(study_id) 160 | 161 | if template_trial is None: 162 | trial = self._create_running_trial() 163 | else: 164 | trial = copy.deepcopy(template_trial) 165 | 166 | trial_id = self._max_trial_id + 1 167 | self._max_trial_id += 1 168 | trial.number = len(self._studies[study_id].trials) 169 | trial._trial_id = trial_id 170 | self._trial_id_to_study_id_and_number[trial_id] = (study_id, trial.number) 171 | self._studies[study_id].trials.append(trial) 172 | self._update_cache(trial_id, study_id) 173 | return trial_id 174 | 175 | @staticmethod 176 | def _create_running_trial(): 177 | # type: () -> FrozenTrial 178 | 179 | return FrozenTrial( 180 | trial_id=-1, # dummy value. 181 | number=-1, # dummy value. 182 | state=TrialState.RUNNING, 183 | params={}, 184 | distributions={}, 185 | user_attrs={}, 186 | system_attrs={}, 187 | value=None, 188 | intermediate_values={}, 189 | datetime_start=datetime.now(), 190 | datetime_complete=None, 191 | ) 192 | 193 | def set_trial_state(self, trial_id, state): 194 | # type: (int, TrialState) -> bool 195 | 196 | with self._lock: 197 | trial = self._get_trial(trial_id) 198 | self.check_trial_is_updatable(trial_id, trial.state) 199 | 200 | trial = copy.copy(trial) 201 | self.check_trial_is_updatable(trial_id, trial.state) 202 | 203 | if state == TrialState.RUNNING and trial.state != TrialState.WAITING: 204 | return False 205 | 206 | trial.state = state 207 | if state.is_finished(): 208 | trial.datetime_complete = datetime.now() 209 | self._set_trial(trial_id, trial) 210 | study_id = self._trial_id_to_study_id_and_number[trial_id][0] 211 | self._update_cache(trial_id, study_id) 212 | else: 213 | self._set_trial(trial_id, trial) 214 | 215 | return True 216 | 217 | def clear_params_and_dists(self, trial_id): 218 | trial = self._get_trial(trial_id) 219 | trial = copy.copy(trial) 220 | for name in list(trial.params.keys()): 221 | # if "_min" not in name and "_len" not in name: 222 | del trial.params[name] 223 | del trial.distributions[name] 224 | self._set_trial(trial_id, trial) 225 | 226 | return 227 | 228 | def set_trial_param(self, trial_id, param_name, param_value_internal, distribution): 229 | # type: (int, str, float, distributions.BaseDistribution) -> bool 230 | 231 | with self._lock: 232 | trial = self._get_trial(trial_id) 233 | 234 | self.check_trial_is_updatable(trial_id, trial.state) 235 | 236 | study_id = self._trial_id_to_study_id_and_number[trial_id][0] 237 | # Check param distribution compatibility with previous trial(s). 238 | if param_name in self._studies[study_id].param_distribution: 239 | distributions.check_distribution_compatibility( 240 | self._studies[study_id].param_distribution[param_name], distribution 241 | ) 242 | 243 | # Check param has not been set; otherwise, return False. 244 | if param_name in trial.params: 245 | return False 246 | 247 | # Set param distribution. 248 | self._studies[study_id].param_distribution[param_name] = distribution 249 | 250 | # Set param. 251 | trial = copy.copy(trial) 252 | trial.params = copy.copy(trial.params) 253 | trial.params[param_name] = distribution.to_external_repr(param_value_internal) 254 | trial.distributions = copy.copy(trial.distributions) 255 | trial.distributions[param_name] = distribution 256 | self._set_trial(trial_id, trial) 257 | 258 | return True 259 | 260 | def get_trial_number_from_id(self, trial_id): 261 | # type: (int) -> int 262 | 263 | with self._lock: 264 | self._check_trial_id(trial_id) 265 | 266 | return self._trial_id_to_study_id_and_number[trial_id][1] 267 | 268 | def get_best_trial(self, study_id): 269 | # type: (int) -> FrozenTrial 270 | 271 | with self._lock: 272 | self._check_study_id(study_id) 273 | 274 | best_trial_id = self._studies[study_id].best_trial_id 275 | if best_trial_id is None: 276 | raise ValueError("No trials are completed yet.") 277 | return self.get_trial(best_trial_id) 278 | 279 | def get_trial_param(self, trial_id, param_name): 280 | # type: (int, str) -> float 281 | 282 | with self._lock: 283 | trial = self._get_trial(trial_id) 284 | 285 | distribution = trial.distributions[param_name] 286 | return distribution.to_internal_repr(trial.params[param_name]) 287 | 288 | def set_trial_value(self, trial_id, value): 289 | # type: (int, float) -> None 290 | 291 | with self._lock: 292 | trial = self._get_trial(trial_id) 293 | self.check_trial_is_updatable(trial_id, trial.state) 294 | 295 | trial = copy.copy(trial) 296 | self.check_trial_is_updatable(trial_id, trial.state) 297 | 298 | trial.value = value 299 | self._set_trial(trial_id, trial) 300 | 301 | def _update_cache(self, trial_id: int, study_id: int) -> None: 302 | 303 | trial = self._get_trial(trial_id) 304 | 305 | if trial.state != TrialState.COMPLETE: 306 | return 307 | 308 | best_trial_id = self._studies[study_id].best_trial_id 309 | if best_trial_id is None: 310 | self._studies[study_id].best_trial_id = trial_id 311 | return 312 | best_trial = self._get_trial(best_trial_id) 313 | assert best_trial is not None 314 | best_value = best_trial.value 315 | new_value = trial.value 316 | if best_value is None: 317 | self._studies[study_id].best_trial_id = trial_id 318 | return 319 | # Complete trials do not have `None` values. 320 | assert new_value is not None 321 | 322 | if self.get_study_direction(study_id) == StudyDirection.MAXIMIZE: 323 | if best_value < new_value: 324 | self._studies[study_id].best_trial_id = trial_id 325 | else: 326 | if best_value > new_value: 327 | self._studies[study_id].best_trial_id = trial_id 328 | 329 | def set_trial_intermediate_value(self, trial_id, step, intermediate_value): 330 | # type: (int, int, float) -> bool 331 | 332 | with self._lock: 333 | trial = self._get_trial(trial_id) 334 | self.check_trial_is_updatable(trial_id, trial.state) 335 | 336 | self.check_trial_is_updatable(trial_id, trial.state) 337 | 338 | trial = copy.copy(trial) 339 | values = copy.copy(trial.intermediate_values) 340 | if step in values: 341 | return False 342 | 343 | values[step] = intermediate_value 344 | trial.intermediate_values = values 345 | self._set_trial(trial_id, trial) 346 | 347 | return True 348 | 349 | def set_trial_user_attr(self, trial_id, key, value): 350 | # type: (int, str, Any) -> None 351 | 352 | with self._lock: 353 | self._check_trial_id(trial_id) 354 | trial = self._get_trial(trial_id) 355 | self.check_trial_is_updatable(trial_id, trial.state) 356 | 357 | self.check_trial_is_updatable(trial_id, trial.state) 358 | 359 | trial = copy.copy(trial) 360 | trial.user_attrs = copy.copy(trial.user_attrs) 361 | trial.user_attrs[key] = value 362 | self._set_trial(trial_id, trial) 363 | 364 | def set_trial_system_attr(self, trial_id, key, value): 365 | # type: (int, str, Any) -> None 366 | 367 | with self._lock: 368 | trial = self._get_trial(trial_id) 369 | self.check_trial_is_updatable(trial_id, trial.state) 370 | 371 | self.check_trial_is_updatable(trial_id, trial.state) 372 | 373 | trial = copy.copy(trial) 374 | trial.system_attrs = copy.copy(trial.system_attrs) 375 | trial.system_attrs[key] = value 376 | self._set_trial(trial_id, trial) 377 | 378 | def get_trial(self, trial_id): 379 | # type: (int) -> FrozenTrial 380 | 381 | with self._lock: 382 | return copy.deepcopy(self._get_trial(trial_id)) 383 | 384 | def _get_trial(self, trial_id: int) -> FrozenTrial: 385 | 386 | self._check_trial_id(trial_id) 387 | # study_id=0, trial_number=0,1,2,... 388 | study_id, trial_number = self._trial_id_to_study_id_and_number[trial_id] 389 | return self._studies[study_id].trials[trial_number] 390 | 391 | def _set_trial(self, trial_id: int, trial: FrozenTrial) -> None: 392 | study_id, trial_number = self._trial_id_to_study_id_and_number[trial_id] 393 | self._studies[study_id].trials[trial_number] = trial 394 | 395 | def get_all_trials(self, study_id, deepcopy=True): 396 | # type: (int, bool) -> List[FrozenTrial] 397 | 398 | with self._lock: 399 | self._check_study_id(study_id) 400 | if deepcopy: 401 | return copy.deepcopy(self._studies[study_id].trials) 402 | else: 403 | return self._studies[study_id].trials[:] 404 | 405 | def get_n_trials(self, study_id, state=None): 406 | # type: (int, Optional[TrialState]) -> int 407 | 408 | with self._lock: 409 | self._check_study_id(study_id) 410 | if state is None: 411 | return len(self._studies[study_id].trials) 412 | 413 | return sum( 414 | trial.state == state for trial in self.get_all_trials(study_id, deepcopy=False) 415 | ) 416 | 417 | def _check_study_id(self, study_id): 418 | # type: (int) -> None 419 | 420 | if study_id not in self._studies: 421 | raise KeyError("No study with study_id {} exists.".format(study_id)) 422 | 423 | def _check_trial_id(self, trial_id: int) -> None: 424 | 425 | if trial_id not in self._trial_id_to_study_id_and_number: 426 | raise KeyError("No trial with trial_id {} exists.".format(trial_id)) 427 | 428 | 429 | class _StudyInfo: 430 | def __init__(self, name: str) -> None: 431 | self.trials = [] # type: List[FrozenTrial] 432 | self.param_distribution = {} # type: Dict[str, distributions.BaseDistribution] 433 | self.user_attrs = {} # type: Dict[str, Any] 434 | self.system_attrs = {} # type: Dict[str, Any] 435 | self.name = name # type: str 436 | self.direction = StudyDirection.NOT_SET 437 | self.best_trial_id = None # type: Optional[int] 438 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/distributions.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import decimal 3 | import json 4 | import warnings 5 | 6 | # from optuna import logging 7 | from . import logging 8 | 9 | 10 | class BaseDistribution(object, metaclass=abc.ABCMeta): 11 | """Base class for distributions. 12 | 13 | Note that distribution classes are not supposed to be called by library users. 14 | They are used by :class:`~optuna.trial.Trial` and :class:`~optuna.samplers` internally. 15 | """ 16 | 17 | def to_external_repr(self, param_value_in_internal_repr): 18 | # type: (float) -> Any 19 | """Convert internal representation of a parameter value into external representation. 20 | 21 | Args: 22 | param_value_in_internal_repr: 23 | Optuna's internal representation of a parameter value. 24 | 25 | Returns: 26 | Optuna's external representation of a parameter value. 27 | """ 28 | 29 | return param_value_in_internal_repr 30 | 31 | def to_internal_repr(self, param_value_in_external_repr): 32 | # type: (Any) -> float 33 | """Convert external representation of a parameter value into internal representation. 34 | 35 | Args: 36 | param_value_in_external_repr: 37 | Optuna's external representation of a parameter value. 38 | 39 | Returns: 40 | Optuna's internal representation of a parameter value. 41 | """ 42 | 43 | return param_value_in_external_repr 44 | 45 | @abc.abstractmethod 46 | def single(self): 47 | # type: () -> bool 48 | """Test whether the range of this distribution contains just a single value. 49 | 50 | When this method returns :obj:`True`, :mod:`~optuna.samplers` always sample 51 | the same value from the distribution. 52 | 53 | Returns: 54 | :obj:`True` if the range of this distribution contains just a single value, 55 | otherwise :obj:`False`. 56 | """ 57 | 58 | raise NotImplementedError 59 | 60 | @abc.abstractmethod 61 | def _contains(self, param_value_in_internal_repr): 62 | # type: (float) -> bool 63 | """Test if a parameter value is contained in the range of this distribution. 64 | 65 | Args: 66 | param_value_in_internal_repr: 67 | Optuna's internal representation of a parameter value. 68 | 69 | Returns: 70 | :obj:`True` if the parameter value is contained in the range of this distribution, 71 | otherwise :obj:`False`. 72 | """ 73 | 74 | raise NotImplementedError 75 | 76 | def _asdict(self): 77 | # type: () -> Dict 78 | 79 | return self.__dict__ 80 | 81 | def __eq__(self, other): 82 | # type: (Any) -> bool 83 | 84 | if not isinstance(other, BaseDistribution): 85 | return NotImplemented 86 | if not type(self) is type(other): 87 | return False 88 | return self.__dict__ == other.__dict__ 89 | 90 | def __hash__(self): 91 | # type: () -> int 92 | 93 | return hash((self.__class__,) + tuple(sorted(self.__dict__.items()))) 94 | 95 | def __repr__(self): 96 | # type: () -> str 97 | 98 | kwargs = ", ".join("{}={}".format(k, v) for k, v in sorted(self.__dict__.items())) 99 | return "{}({})".format(self.__class__.__name__, kwargs) 100 | 101 | 102 | class UniformDistribution(BaseDistribution): 103 | """A uniform distribution in the linear domain. 104 | 105 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_uniform`, and passed to 106 | :mod:`~optuna.samplers` in general. 107 | 108 | Attributes: 109 | low: 110 | Lower endpoint of the range of the distribution. ``low`` is included in the range. 111 | high: 112 | Upper endpoint of the range of the distribution. ``high`` is excluded from the range. 113 | """ 114 | 115 | def __init__(self, low, high): 116 | # type: (float, float) -> None 117 | 118 | if low > high: 119 | raise ValueError( 120 | "The `low` value must be smaller than or equal to the `high` value " 121 | "(low={}, high={}).".format(low, high) 122 | ) 123 | 124 | self.low = low 125 | self.high = high 126 | 127 | def single(self): 128 | # type: () -> bool 129 | 130 | return self.low == self.high 131 | 132 | def _contains(self, param_value_in_internal_repr): 133 | # type: (float) -> bool 134 | 135 | value = param_value_in_internal_repr 136 | if self.low == self.high: 137 | return value == self.low 138 | else: 139 | return self.low <= value < self.high 140 | 141 | 142 | class LogUniformDistribution(BaseDistribution): 143 | """A uniform distribution in the log domain. 144 | 145 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_loguniform`, and passed to 146 | :mod:`~optuna.samplers` in general. 147 | 148 | Attributes: 149 | low: 150 | Lower endpoint of the range of the distribution. ``low`` is included in the range. 151 | high: 152 | Upper endpoint of the range of the distribution. ``high`` is excluded from the range. 153 | """ 154 | 155 | def __init__(self, low, high): 156 | # type: (float, float) -> None 157 | 158 | if low > high: 159 | raise ValueError( 160 | "The `low` value must be smaller than or equal to the `high` value " 161 | "(low={}, high={}).".format(low, high) 162 | ) 163 | if low <= 0.0: 164 | raise ValueError( 165 | "The `low` value must be larger than 0 for a log distribution " 166 | "(low={}, high={}).".format(low, high) 167 | ) 168 | 169 | self.low = low 170 | self.high = high 171 | 172 | def single(self): 173 | # type: () -> bool 174 | 175 | return self.low == self.high 176 | 177 | def _contains(self, param_value_in_internal_repr): 178 | # type: (float) -> bool 179 | 180 | value = param_value_in_internal_repr 181 | if self.low == self.high: 182 | return value == self.low 183 | else: 184 | return self.low <= value < self.high 185 | 186 | 187 | class DiscreteUniformDistribution(BaseDistribution): 188 | """A discretized uniform distribution in the linear domain. 189 | 190 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_discrete_uniform`, and passed 191 | to :mod:`~optuna.samplers` in general. 192 | 193 | Attributes: 194 | low: 195 | Lower endpoint of the range of the distribution. ``low`` is included in the range. 196 | high: 197 | Upper endpoint of the range of the distribution. ``high`` is included in the range. 198 | q: 199 | A discretization step. 200 | """ 201 | 202 | def __init__(self, low, high, q): 203 | # type: (float, float, float) -> None 204 | 205 | if low > high: 206 | raise ValueError( 207 | "The `low` value must be smaller than or equal to the `high` value " 208 | "(low={}, high={}, q={}).".format(low, high, q) 209 | ) 210 | 211 | self.low = low 212 | self.high = high 213 | self.q = q 214 | 215 | def single(self): 216 | # type: () -> bool 217 | 218 | if self.low == self.high: 219 | return True 220 | high = decimal.Decimal(str(self.high)) 221 | low = decimal.Decimal(str(self.low)) 222 | q = decimal.Decimal(str(self.q)) 223 | if (high - low) < q: 224 | return True 225 | return False 226 | 227 | def _contains(self, param_value_in_internal_repr): 228 | # type: (float) -> bool 229 | 230 | value = param_value_in_internal_repr 231 | return self.low <= value <= self.high 232 | 233 | 234 | class IntUniformDistribution(BaseDistribution): 235 | """A uniform distribution on integers. 236 | 237 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_int`, and passed to 238 | :mod:`~optuna.samplers` in general. 239 | 240 | Attributes: 241 | low: 242 | Lower endpoint of the range of the distribution. ``low`` is included in the range. 243 | high: 244 | Upper endpoint of the range of the distribution. ``high`` is included in the range. 245 | step: 246 | A step for spacing between values. 247 | """ 248 | 249 | def __init__(self, low, high, step=1): 250 | # type: (int, int, int) -> None 251 | 252 | if low > high: 253 | raise ValueError( 254 | "The `low` value must be smaller than or equal to the `high` value " 255 | "(low={}, high={}).".format(low, high) 256 | ) 257 | if step <= 0: 258 | raise ValueError( 259 | "The `step` value must be non-zero positive value, but step={}.".format(step) 260 | ) 261 | 262 | self.low = low 263 | self.high = high 264 | self.step = step 265 | 266 | def to_external_repr(self, param_value_in_internal_repr): 267 | # type: (float) -> int 268 | 269 | return int(param_value_in_internal_repr) 270 | 271 | def to_internal_repr(self, param_value_in_external_repr): 272 | # type: (int) -> float 273 | 274 | return float(param_value_in_external_repr) 275 | 276 | def single(self): 277 | # type: () -> bool 278 | 279 | if self.low == self.high: 280 | return True 281 | return (self.high - self.low) < self.step 282 | 283 | def _contains(self, param_value_in_internal_repr): 284 | # type: (float) -> bool 285 | 286 | value = param_value_in_internal_repr 287 | return self.low <= value <= self.high 288 | 289 | 290 | class IntLogUniformDistribution(BaseDistribution): 291 | """A uniform distribution on integers in the log domain. 292 | 293 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_int`, and passed to 294 | :mod:`~optuna.samplers` in general. 295 | 296 | Attributes: 297 | low: 298 | Lower endpoint of the range of the distribution. ``low`` is included in the range. 299 | high: 300 | Upper endpoint of the range of the distribution. ``high`` is included in the range. 301 | step: 302 | A step for spacing between values. 303 | """ 304 | 305 | def __init__(self, low, high, step=1): 306 | # type: (int, int, int) -> None 307 | 308 | if low > high: 309 | raise ValueError( 310 | "The `low` value must be smaller than or equal to the `high` value " 311 | "(low={}, high={}).".format(low, high) 312 | ) 313 | if step <= 0: 314 | raise ValueError( 315 | "The `step` value must be non-zero positive value, but step={}.".format(step) 316 | ) 317 | 318 | if low <= 0.0: 319 | raise ValueError( 320 | "The `low` value must be larger than 0 for a log distribution " 321 | "(low={}, high={}).".format(low, high) 322 | ) 323 | 324 | self.low = low 325 | self.high = high 326 | self.step = step 327 | 328 | def to_external_repr(self, param_value_in_internal_repr): 329 | # type: (float) -> int 330 | 331 | return int(param_value_in_internal_repr) 332 | 333 | def to_internal_repr(self, param_value_in_external_repr): 334 | # type: (int) -> float 335 | 336 | return float(param_value_in_external_repr) 337 | 338 | def single(self): 339 | # type: () -> bool 340 | 341 | if self.low == self.high: 342 | return True 343 | return (self.high - self.low) < self.step 344 | 345 | def _contains(self, param_value_in_internal_repr): 346 | # type: (float) -> bool 347 | 348 | value = param_value_in_internal_repr 349 | return self.low <= value <= self.high 350 | 351 | 352 | class CategoricalDistribution(BaseDistribution): 353 | """A categorical distribution. 354 | 355 | This object is instantiated by :func:`~optuna.trial.Trial.suggest_categorical`, and 356 | passed to :mod:`~optuna.samplers` in general. 357 | 358 | Args: 359 | choices: 360 | Parameter value candidates. 361 | 362 | .. note:: 363 | 364 | Not all types are guaranteed to be compatible with all storages. It is recommended to 365 | restrict the types of the choices to :obj:`None`, :class:`bool`, :class:`int`, 366 | :class:`float` and :class:`str`. 367 | 368 | Attributes: 369 | choices: 370 | Parameter value candidates. 371 | """ 372 | 373 | def __init__(self, choices): 374 | # type: (Sequence[CategoricalChoiceType]) -> None 375 | 376 | if len(choices) == 0: 377 | raise ValueError("The `choices` must contains one or more elements.") 378 | for choice in choices: 379 | if choice is not None and not isinstance(choice, (bool, int, float, str)): 380 | message = ( 381 | "Choices for a categorical distribution should be a tuple of None, bool, " 382 | "int, float and str for persistent storage but contains {} which is of type " 383 | "{}.".format(choice, type(choice).__name__) 384 | ) 385 | warnings.warn(message) 386 | 387 | logger = logging._get_library_root_logger() 388 | logger.warning(message) 389 | 390 | self.choices = choices 391 | 392 | def to_external_repr(self, param_value_in_internal_repr): 393 | # type: (float) -> CategoricalChoiceType 394 | 395 | return self.choices[int(param_value_in_internal_repr)] 396 | 397 | def to_internal_repr(self, param_value_in_external_repr): 398 | # type: (CategoricalChoiceType) -> float 399 | 400 | try: 401 | return self.choices.index(param_value_in_external_repr) 402 | except ValueError as e: 403 | raise ValueError( 404 | "'{}' not in {}.".format(param_value_in_external_repr, self.choices) 405 | ) from e 406 | 407 | def single(self): 408 | # type: () -> bool 409 | 410 | return len(self.choices) == 1 411 | 412 | def _contains(self, param_value_in_internal_repr): 413 | # type: (float) -> bool 414 | 415 | index = int(param_value_in_internal_repr) 416 | return 0 <= index < len(self.choices) 417 | 418 | 419 | DISTRIBUTION_CLASSES = ( 420 | UniformDistribution, 421 | LogUniformDistribution, 422 | DiscreteUniformDistribution, 423 | IntUniformDistribution, 424 | IntLogUniformDistribution, 425 | CategoricalDistribution, 426 | ) 427 | 428 | 429 | def json_to_distribution(json_str): 430 | # type: (str) -> BaseDistribution 431 | """Deserialize a distribution in JSON format. 432 | 433 | Args: 434 | json_str: A JSON-serialized distribution. 435 | 436 | Returns: 437 | A deserialized distribution. 438 | """ 439 | 440 | json_dict = json.loads(json_str) 441 | 442 | if json_dict["name"] == CategoricalDistribution.__name__: 443 | json_dict["attributes"]["choices"] = tuple(json_dict["attributes"]["choices"]) 444 | 445 | for cls in DISTRIBUTION_CLASSES: 446 | if json_dict["name"] == cls.__name__: 447 | return cls(**json_dict["attributes"]) 448 | 449 | raise ValueError("Unknown distribution class: {}".format(json_dict["name"])) 450 | 451 | 452 | def distribution_to_json(dist): 453 | # type: (BaseDistribution) -> str 454 | """Serialize a distribution to JSON format. 455 | 456 | Args: 457 | dist: A distribution to be serialized. 458 | 459 | Returns: 460 | A JSON string of a given distribution. 461 | 462 | """ 463 | 464 | return json.dumps({"name": dist.__class__.__name__, "attributes": dist._asdict()}) 465 | 466 | 467 | def check_distribution_compatibility(dist_old, dist_new): 468 | # type: (BaseDistribution, BaseDistribution) -> None 469 | """A function to check compatibility of two distributions. 470 | 471 | Note that this method is not supposed to be called by library users. 472 | 473 | Args: 474 | dist_old: A distribution previously recorded in storage. 475 | dist_new: A distribution newly added to storage. 476 | 477 | Returns: 478 | True denotes given distributions are compatible. Otherwise, they are not. 479 | """ 480 | 481 | if dist_old.__class__ != dist_new.__class__: 482 | raise ValueError("Cannot set different distribution kind to the same parameter name.") 483 | 484 | if not isinstance(dist_old, CategoricalDistribution): 485 | return 486 | if not isinstance(dist_new, CategoricalDistribution): 487 | return 488 | if dist_old.choices != dist_new.choices: 489 | raise ValueError( 490 | CategoricalDistribution.__name__ + " does not support dynamic value space." 491 | ) 492 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/storages/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import copy 3 | from typing import Any 4 | from typing import Dict 5 | from typing import List 6 | from typing import Optional 7 | 8 | # from optuna import study 9 | # from optuna.trial import TrialState 10 | from .. import study 11 | from ..trial import TrialState 12 | 13 | DEFAULT_STUDY_NAME_PREFIX = "no-name-" 14 | 15 | 16 | class BaseStorage(object, metaclass=abc.ABCMeta): 17 | """Base class for storages. 18 | 19 | This class is not supposed to be directly accessed by library users. 20 | 21 | A storage class abstracts a backend database and provides library internal interfaces to 22 | read/write histories of studies and trials. 23 | 24 | **Thread safety** 25 | 26 | A storage class can be shared among multiple threads, and must therefore be thread-safe. 27 | It must guarantee that return values such as `FrozenTrial`s are never modified. 28 | A storage class can assume that return values are never modified by its user. 29 | When a user modifies a return value from a storage class, the internal state of the storage 30 | may become inconsistent. Consequences are undefined. 31 | 32 | **Ownership of RUNNING trials** 33 | 34 | Trials in finished states are not allowed to be modified. 35 | Trials in the WAITING state are not allowed to be modified except for the `state` field. 36 | A storage class can assume that each RUNNING trial is only modified from a single process. 37 | When a user modifies a RUNNING trial from multiple processes, the internal state of the storage 38 | may become inconsistent. Consequences are undefined. 39 | A storage class is not intended for inter-process communication. 40 | Consequently, users using optuna with MPI or other multi-process programs must make sure that 41 | only one process is used to access the optuna interface. 42 | 43 | **Consistency models** 44 | 45 | A storage class must support the monotonic-reads consistency model, that is, if a 46 | process reads data `X`, any successive reads on data `X` cannot return older values. 47 | It must support read-your-writes, that is, if a process writes to data `X`, 48 | any successive reads on data `X` from the same process must read the written 49 | value or one of the more recent values. 50 | 51 | **Stronger consistency requirements for special data** 52 | 53 | TODO(ytsmiling) Add load method to storage class implementations. 54 | 55 | Under a multi-worker setting, a storage class must return the latest values of any attributes 56 | of a study, not necessarily for the attributes of a `Trial`. 57 | However, if the `load(study_id)` method is called, any successive reads on the `state` 58 | attribute of a `Trial` are guaranteed to return the same or more recent values than the value 59 | at the time of call to the `load` method. 60 | Let `T` be a `Trial`. 61 | Let `P` be the process that last updated the `state` attribute of `T`. 62 | Then, any reads on any attributes of `T` are guaranteed to return the same or 63 | more recent values than any writes by `P` on the attribute before `P` updated 64 | the `state` attribute of `T`. 65 | The same applies for `user_attrs', 'system_attrs' and 'intermediate_values` attributes. 66 | 67 | .. note:: 68 | 69 | These attribute behaviors may become user customizable in the future. 70 | 71 | **Data persistence** 72 | 73 | A storage class does not guarantee that write operations are logged into a persistent 74 | storage, even when write methods succeed. 75 | Thus, when process failure occurs, some writes might be lost. 76 | As exceptions, when a persistent storage is available, any writes on any attributes 77 | of `Study` and writes on `state` of `Trial` are guaranteed to be persistent. 78 | Additionally, any preceding writes on any attributes of `Trial` are guaranteed to 79 | be written into a persistent storage before writes on `state` of `Trial` succeed. 80 | The same applies for `user_attrs', 'system_attrs' and 'intermediate_values` attributes. 81 | 82 | .. note:: 83 | 84 | These attribute behaviors may become user customizable in the future. 85 | """ 86 | 87 | # Basic study manipulation 88 | 89 | @abc.abstractmethod 90 | def create_new_study(self, study_name: Optional[str] = None) -> int: 91 | """Create a new study from a name. 92 | 93 | If no name is specified, the storage class generates a name. 94 | The returned study ID is unique among all current and deleted studies. 95 | 96 | Args: 97 | study_name: 98 | Name of the new study to create. 99 | 100 | Returns: 101 | ID of the created study. 102 | 103 | Raises: 104 | :exc:`optuna.exceptions.DuplicatedStudyError`: 105 | If a study with the same ``study_name`` already exists. 106 | """ 107 | # TODO(ytsmiling) Fix RDB storage implementation to ensure unique `study_id`. 108 | raise NotImplementedError 109 | 110 | @abc.abstractmethod 111 | def delete_study(self, study_id: int) -> None: 112 | """Delete a study. 113 | 114 | Args: 115 | study_id: 116 | ID of the study. 117 | 118 | Raises: 119 | :exc:`KeyError`: 120 | If no study with the matching ``study_id`` exists. 121 | """ 122 | raise NotImplementedError 123 | 124 | @abc.abstractmethod 125 | def set_study_user_attr(self, study_id: int, key: str, value: Any) -> None: 126 | """Register a user-defined attribute to a study. 127 | 128 | This method overwrites any existing attribute. 129 | 130 | Args: 131 | study_id: 132 | ID of the study. 133 | key: 134 | Attribute key. 135 | value: 136 | Attribute value. It should be JSON serializable. 137 | 138 | Raises: 139 | :exc:`KeyError`: 140 | If no study with the matching ``study_id`` exists. 141 | """ 142 | raise NotImplementedError 143 | 144 | @abc.abstractmethod 145 | def set_study_system_attr(self, study_id: int, key: str, value: Any) -> None: 146 | """Register an optuna-internal attribute to a study. 147 | 148 | This method overwrites any existing attribute. 149 | 150 | Args: 151 | study_id: 152 | ID of the study. 153 | key: 154 | Attribute key. 155 | value: 156 | Attribute value. It should be JSON serializable. 157 | 158 | Raises: 159 | :exc:`KeyError`: 160 | If no study with the matching ``study_id`` exists. 161 | """ 162 | raise NotImplementedError 163 | 164 | @abc.abstractmethod 165 | def set_study_direction(self, study_id: int, direction: study.StudyDirection) -> None: 166 | """Register an optimization problem direction to a study. 167 | 168 | Args: 169 | study_id: 170 | ID of the study. 171 | direction: 172 | Either :obj:`~optuna.study.StudyDirection.MAXIMIZE` or 173 | :obj:`~optuna.study.StudyDirection.MINIMIZE`. 174 | 175 | Raises: 176 | :exc:`KeyError`: 177 | If no study with the matching ``study_id`` exists. 178 | :exc:`ValueError`: 179 | If the direction is already set and the passed ``direction`` is the opposite 180 | direction or :obj:`~optuna.study.StudyDirection.NOT_SET`. 181 | """ 182 | raise NotImplementedError 183 | 184 | # Basic study access 185 | 186 | @abc.abstractmethod 187 | def get_study_id_from_name(self, study_name: str) -> int: 188 | """Read the ID of a study. 189 | 190 | Args: 191 | study_name: 192 | Name of the study. 193 | 194 | Returns: 195 | ID of the study. 196 | 197 | Raises: 198 | :exc:`KeyError`: 199 | If no study with the matching ``study_name`` exists. 200 | """ 201 | raise NotImplementedError 202 | 203 | @abc.abstractmethod 204 | def get_study_id_from_trial_id(self, trial_id: int) -> int: 205 | """Read the ID of a study to which a trial belongs. 206 | 207 | Args: 208 | trial_id: 209 | ID of the trial. 210 | 211 | Returns: 212 | ID of the study. 213 | 214 | Raises: 215 | :exc:`KeyError`: 216 | If no trial with the matching ``trial_id`` exists. 217 | """ 218 | raise NotImplementedError 219 | 220 | @abc.abstractmethod 221 | def get_study_name_from_id(self, study_id: int) -> str: 222 | """Read the study name of a study. 223 | 224 | Args: 225 | study_id: 226 | ID of the study. 227 | 228 | Returns: 229 | Name of the study. 230 | 231 | Raises: 232 | :exc:`KeyError`: 233 | If no study with the matching ``study_id`` exists. 234 | """ 235 | raise NotImplementedError 236 | 237 | @abc.abstractmethod 238 | def get_study_direction(self, study_id: int) -> study.StudyDirection: 239 | """Read whether a study maximizes or minimizes an objective. 240 | 241 | Args: 242 | study_id: 243 | ID of a study. 244 | 245 | Returns: 246 | Optimization direction of the study. 247 | 248 | Raises: 249 | :exc:`KeyError`: 250 | If no study with the matching ``study_id`` exists. 251 | """ 252 | raise NotImplementedError 253 | 254 | @abc.abstractmethod 255 | def get_study_user_attrs(self, study_id: int) -> Dict[str, Any]: 256 | """Read the user-defined attributes of a study. 257 | 258 | Args: 259 | study_id: 260 | ID of the study. 261 | 262 | Returns: 263 | Dictionary with the user attributes of the study. 264 | 265 | Raises: 266 | :exc:`KeyError`: 267 | If no study with the matching ``study_id`` exists. 268 | """ 269 | raise NotImplementedError 270 | 271 | @abc.abstractmethod 272 | def get_study_system_attrs(self, study_id: int) -> Dict[str, Any]: 273 | """Read the optuna-internal attributes of a study. 274 | 275 | Args: 276 | study_id: 277 | ID of the study. 278 | 279 | Returns: 280 | Dictionary with the optuna-internal attributes of the study. 281 | 282 | Raises: 283 | :exc:`KeyError`: 284 | If no study with the matching ``study_id`` exists. 285 | """ 286 | raise NotImplementedError 287 | 288 | # Basic trial manipulation 289 | 290 | @abc.abstractmethod 291 | def create_new_trial( 292 | self, study_id: int, template_trial: Optional["FrozenTrial"] = None 293 | ) -> int: 294 | """Create and add a new trial to a study. 295 | 296 | The returned trial ID is unique among all current and deleted trials. 297 | 298 | Args: 299 | study_id: 300 | ID of the study. 301 | template_trial: 302 | Template :class:`~optuna.trial.FronzenTrial` with default user-attributes, 303 | system-attributes, intermediate-values, and a state. 304 | 305 | Returns: 306 | ID of the created trial. 307 | 308 | Raises: 309 | :exc:`KeyError`: 310 | If no study with the matching ``study_id`` exists. 311 | """ 312 | raise NotImplementedError 313 | 314 | @abc.abstractmethod 315 | def set_trial_state(self, trial_id: int, state: TrialState) -> bool: 316 | """Update the state of a trial. 317 | 318 | Args: 319 | trial_id: 320 | ID of the trial. 321 | state: 322 | New state of the trial. 323 | 324 | Returns: 325 | :obj:`True` if the state is successfully updated. 326 | :obj:`False` if the state is kept the same. 327 | The latter happens when this method tries to update the state of 328 | :obj:`~optuna.trial.TrialState.RUNNING` trial to 329 | :obj:`~optuna.trial.TrialState.RUNNING`. 330 | 331 | Raises: 332 | :exc:`KeyError`: 333 | If no trial with the matching ``trial_id`` exists. 334 | :exc:`RuntimeError`: 335 | If the trial is already finished. 336 | """ 337 | raise NotImplementedError 338 | 339 | @abc.abstractmethod 340 | def set_trial_param( 341 | self, 342 | trial_id: int, 343 | param_name: str, 344 | param_value_internal: float, 345 | distribution: "distributions.BaseDistribution", 346 | ) -> bool: 347 | """Add a parameter to a trial. 348 | 349 | Args: 350 | trial_id: 351 | ID of the trial. 352 | param_name: 353 | Name of the parameter. 354 | param_value_internal: 355 | Internal representation of the parameter value. 356 | distribution: 357 | Sampled distribution of the parameter. 358 | 359 | Returns: 360 | :obj:`False` when the parameter is already set to the trial, :obj:`True` otherwise. 361 | 362 | Raises: 363 | :exc:`KeyError`: 364 | If no trial with the matching ``trial_id`` exists. 365 | :exc:`RuntimeError`: 366 | If the trial is already finished. 367 | """ 368 | raise NotImplementedError 369 | 370 | @abc.abstractmethod 371 | def get_trial_number_from_id(self, trial_id: int) -> int: 372 | """Read the trial number of a trial. 373 | 374 | .. note:: 375 | 376 | The trial number is only unique within a study, and is sequential. 377 | 378 | Args: 379 | trial_id: 380 | ID of the trial. 381 | 382 | Returns: 383 | Number of the trial. 384 | 385 | Raises: 386 | :exc:`KeyError`: 387 | If no trial with the matching ``trial_id`` exists. 388 | """ 389 | raise NotImplementedError 390 | 391 | @abc.abstractmethod 392 | def get_trial_param(self, trial_id: int, param_name: str) -> float: 393 | """Read the parameter of a trial. 394 | 395 | Args: 396 | trial_id: 397 | ID of the trial. 398 | param_name: 399 | Name of the parameter. 400 | 401 | Returns: 402 | Internal representation of the parameter. 403 | 404 | Raises: 405 | :exc:`KeyError`: 406 | If no trial with the matching ``trial_id`` exists. 407 | If no such parameter exists. 408 | """ 409 | raise NotImplementedError 410 | 411 | @abc.abstractmethod 412 | def set_trial_value(self, trial_id: int, value: float) -> None: 413 | """Set a return value of an objective function. 414 | 415 | This method overwrites any existing trial value. 416 | 417 | Args: 418 | trial_id: 419 | ID of the trial. 420 | value: 421 | Value of the objective function. 422 | 423 | Raises: 424 | :exc:`KeyError`: 425 | If no trial with the matching ``trial_id`` exists. 426 | :exc:`RuntimeError`: 427 | If the trial is already finished. 428 | """ 429 | raise NotImplementedError 430 | 431 | @abc.abstractmethod 432 | def set_trial_intermediate_value( 433 | self, trial_id: int, step: int, intermediate_value: float 434 | ) -> bool: 435 | """Report an intermediate value of an objective function. 436 | 437 | Args: 438 | trial_id: 439 | ID of the trial. 440 | step: 441 | Step of the trial (e.g., the epoch when training a neural network). 442 | intermediate_value: 443 | Intermediate value corresponding to the step. 444 | 445 | Returns: 446 | :obj:`False` when the step is already set, :obj:`True` otherwise. 447 | 448 | Raises: 449 | :exc:`KeyError`: 450 | If no trial with the matching ``trial_id`` exists. 451 | :exc:`RuntimeError`: 452 | If the trial is already finished. 453 | """ 454 | raise NotImplementedError 455 | 456 | @abc.abstractmethod 457 | def set_trial_user_attr(self, trial_id: int, key: str, value: Any) -> None: 458 | """Set a user-defined attribute to a trial. 459 | 460 | This method overwrites any existing attribute. 461 | 462 | Args: 463 | trial_id: 464 | ID of the trial. 465 | key: 466 | Attribute key. 467 | value: 468 | Attribute value. It should be JSON serializable. 469 | 470 | Raises: 471 | :exc:`KeyError`: 472 | If no trial with the matching ``trial_id`` exists. 473 | :exc:`RuntimeError`: 474 | If the trial is already finished. 475 | """ 476 | raise NotImplementedError 477 | 478 | @abc.abstractmethod 479 | def set_trial_system_attr(self, trial_id: int, key: str, value: Any) -> None: 480 | """Set an optuna-internal attribute to a trial. 481 | 482 | This method overwrites any existing attribute. 483 | 484 | Args: 485 | trial_id: 486 | ID of the trial. 487 | key: 488 | Attribute key. 489 | value: 490 | Attribute value. It should be JSON serializable. 491 | 492 | Raises: 493 | :exc:`KeyError`: 494 | If no trial with the matching ``trial_id`` exists. 495 | :exc:`RuntimeError`: 496 | If the trial is already finished. 497 | """ 498 | raise NotImplementedError 499 | 500 | # Basic trial access 501 | 502 | @abc.abstractmethod 503 | def get_trial(self, trial_id: int) -> "FrozenTrial": 504 | """Read a trial. 505 | 506 | Args: 507 | trial_id: 508 | ID of the trial. 509 | 510 | Returns: 511 | Trial with a matching trial ID. 512 | 513 | Raises: 514 | :exc:`KeyError`: 515 | If no trial with the matching ``trial_id`` exists. 516 | """ 517 | raise NotImplementedError 518 | 519 | @abc.abstractmethod 520 | def get_all_trials(self, study_id: int, deepcopy: bool = True) -> List["FrozenTrial"]: 521 | """Read all trials in a study. 522 | 523 | Args: 524 | study_id: 525 | ID of the study. 526 | deepcopy: 527 | Whether to copy the list of trials before returning. 528 | Set to :obj:`True` if you intend to update the list or elements of the list. 529 | 530 | Returns: 531 | List of trials in the study. 532 | 533 | Raises: 534 | :exc:`KeyError`: 535 | If no study with the matching ``study_id`` exists. 536 | """ 537 | raise NotImplementedError 538 | 539 | @abc.abstractmethod 540 | def get_n_trials(self, study_id: int, state: Optional[TrialState] = None) -> int: 541 | """Count the number of trials in a study. 542 | 543 | Args: 544 | study_id: 545 | ID of the study. 546 | state: 547 | :class:`~optuna.trial.TrialState` to filter trials. 548 | 549 | Returns: 550 | Number of trials in the study. 551 | 552 | Raises: 553 | :exc:`KeyError`: 554 | If no study with the matching ``study_id`` exists. 555 | """ 556 | raise NotImplementedError 557 | 558 | def get_best_trial(self, study_id: int) -> "FrozenTrial": 559 | """Return the trial with the best value in a study. 560 | 561 | Args: 562 | study_id: 563 | ID of the study. 564 | 565 | Returns: 566 | The trial with the best objective value among all finished trials in the study. 567 | 568 | Raises: 569 | :exc:`KeyError`: 570 | If no study with the matching ``study_id`` exists. 571 | :exc:`RuntimeError`: 572 | If no trials have been completed. 573 | """ 574 | all_trials = self.get_all_trials(study_id, deepcopy=False) 575 | all_trials = [t for t in all_trials if t.state is TrialState.COMPLETE] 576 | 577 | if len(all_trials) == 0: 578 | raise ValueError("No trials are completed yet.") 579 | 580 | if self.get_study_direction(study_id) == study.StudyDirection.MAXIMIZE: 581 | best_trial = max(all_trials, key=lambda t: t.value) 582 | else: 583 | best_trial = min(all_trials, key=lambda t: t.value) 584 | 585 | return copy.deepcopy(best_trial) 586 | 587 | def get_trial_params(self, trial_id: int) -> Dict[str, Any]: 588 | """Read the parameter dictionary of a trial. 589 | 590 | Args: 591 | trial_id: 592 | ID of the trial. 593 | 594 | Returns: 595 | Dictionary of a parameters. Keys are parameter names and values are internal 596 | representations of the parameter values. 597 | 598 | Raises: 599 | :exc:`KeyError`: 600 | If no trial with the matching ``trial_id`` exists. 601 | """ 602 | return self.get_trial(trial_id).params 603 | 604 | def get_trial_user_attrs(self, trial_id: int) -> Dict[str, Any]: 605 | """Read the user-defined attributes of a trial. 606 | 607 | Args: 608 | trial_id: 609 | ID of the trial. 610 | 611 | Returns: 612 | Dictionary with the user-defined attributes of the trial. 613 | 614 | Raises: 615 | :exc:`KeyError`: 616 | If no trial with the matching ``trial_id`` exists. 617 | """ 618 | return self.get_trial(trial_id).user_attrs 619 | 620 | def get_trial_system_attrs(self, trial_id: int) -> Dict[str, Any]: 621 | """Read the optuna-internal attributes of a trial. 622 | 623 | Args: 624 | trial_id: 625 | ID of the trial. 626 | 627 | Returns: 628 | Dictionary with the optuna-internal attributes of the trial. 629 | 630 | Raises: 631 | :exc:`KeyError`: 632 | If no trial with the matching ``trial_id`` exists. 633 | """ 634 | return self.get_trial(trial_id).system_attrs 635 | 636 | def remove_session(self) -> None: 637 | """Clean up all connections to a database.""" 638 | pass 639 | 640 | def check_trial_is_updatable(self, trial_id: int, trial_state: TrialState) -> None: 641 | """Check whether a trial state is updatable. 642 | 643 | Args: 644 | trial_id: 645 | ID of the trial. 646 | Only used for an error message. 647 | trial_state: 648 | Trial state to check. 649 | 650 | Raises: 651 | :exc:`RuntimeError`: 652 | If the trial is already finished. 653 | """ 654 | if trial_state.is_finished(): 655 | trial = self.get_trial(trial_id) 656 | raise RuntimeError( 657 | "Trial#{} has already finished and can not be updated.".format(trial.number) 658 | ) 659 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/study.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import datetime 3 | import gc 4 | import math 5 | import threading 6 | import warnings 7 | 8 | 9 | import numpy as np 10 | import pandas as pd # NOQA 11 | 12 | 13 | # from optuna._study_direction import StudyDirection 14 | # from optuna import exceptions 15 | # from optuna import logging 16 | # from optuna import progress_bar as pbar_module 17 | # from optuna import pruners 18 | # from optuna import samplers 19 | # from optuna import storages 20 | # from optuna import trial as trial_module 21 | # from optuna.trial import FrozenTrial 22 | # from optuna.trial import TrialState 23 | 24 | from ._study_direction import StudyDirection 25 | from . import exceptions 26 | from . import logging 27 | from . import progress_bar as pbar_module 28 | from . import pruners 29 | from . import samplers 30 | from . import storages 31 | from . import trial as trial_module 32 | from .trial import FrozenTrial 33 | from .trial import TrialState 34 | 35 | 36 | _logger = logging.get_logger(__name__) 37 | 38 | 39 | class BaseStudy(object): 40 | def __init__(self, study_id, storage): 41 | # type: (int, storages.BaseStorage) -> None 42 | 43 | self._study_id = study_id 44 | self._storage = storage 45 | 46 | @property 47 | def best_params(self): 48 | # type: () -> Dict[str, Any] 49 | """Return parameters of the best trial in the study. 50 | 51 | Returns: 52 | A dictionary containing parameters of the best trial. 53 | """ 54 | 55 | return self.best_trial.params 56 | 57 | @property 58 | def best_value(self): 59 | # type: () -> float 60 | """Return the best objective value in the study. 61 | 62 | Returns: 63 | A float representing the best objective value. 64 | """ 65 | 66 | best_value = self.best_trial.value 67 | assert best_value is not None 68 | 69 | return best_value 70 | 71 | @property 72 | def best_trial(self): 73 | # type: () -> FrozenTrial 74 | """Return the best trial in the study. 75 | 76 | Returns: 77 | A :class:`~optuna.FrozenTrial` object of the best trial. 78 | """ 79 | 80 | return self._storage.get_best_trial(self._study_id) 81 | 82 | @property 83 | def direction(self): 84 | # type: () -> StudyDirection 85 | """Return the direction of the study. 86 | 87 | Returns: 88 | A :class:`~optuna.study.StudyDirection` object. 89 | """ 90 | 91 | return self._storage.get_study_direction(self._study_id) 92 | 93 | @property 94 | def trials(self): 95 | # type: () -> List[FrozenTrial] 96 | """Return all trials in the study. 97 | 98 | The returned trials are ordered by trial number. 99 | 100 | This is a short form of ``self.get_trials(deepcopy=True)``. 101 | 102 | Returns: 103 | A list of :class:`~optuna.FrozenTrial` objects. 104 | """ 105 | 106 | return self.get_trials() 107 | 108 | def get_trials(self, deepcopy=True): 109 | # type: (bool) -> List[FrozenTrial] 110 | """Return all trials in the study. 111 | 112 | The returned trials are ordered by trial number. 113 | 114 | For library users, it's recommended to use more handy 115 | :attr:`~optuna.study.Study.trials` property to get the trials instead. 116 | 117 | Args: 118 | deepcopy: 119 | Flag to control whether to apply ``copy.deepcopy()`` to the trials. 120 | Note that if you set the flag to :obj:`False`, you shouldn't mutate 121 | any fields of the returned trial. Otherwise the internal state of 122 | the study may corrupt and unexpected behavior may happen. 123 | 124 | Returns: 125 | A list of :class:`~optuna.FrozenTrial` objects. 126 | """ 127 | 128 | return self._storage.get_all_trials(self._study_id, deepcopy=deepcopy) 129 | 130 | @property 131 | def storage(self): 132 | # type: () -> storages.BaseStorage 133 | """Return the storage object used by the study. 134 | 135 | .. deprecated:: 0.15.0 136 | The direct use of storage is deprecated. 137 | Please access to storage via study's public methods 138 | (e.g., :meth:`~optuna.study.Study.set_user_attr`). 139 | 140 | Returns: 141 | A storage object. 142 | """ 143 | 144 | warnings.warn( 145 | "The direct use of storage is deprecated. " 146 | "Please access to storage via study's public methods " 147 | "(e.g., `Study.set_user_attr`)", 148 | DeprecationWarning, 149 | ) 150 | 151 | _logger.warning( 152 | "The direct use of storage is deprecated. " 153 | "Please access to storage via study's public methods " 154 | "(e.g., `Study.set_user_attr`)" 155 | ) 156 | 157 | return self._storage 158 | 159 | 160 | class Study(BaseStudy): 161 | """A study corresponds to an optimization task, i.e., a set of trials. 162 | 163 | This object provides interfaces to run a new :class:`~optuna.trial.Trial`, access trials' 164 | history, set/get user-defined attributes of the study itself. 165 | 166 | Note that the direct use of this constructor is not recommended. 167 | To create and load a study, please refer to the documentation of 168 | :func:`~optuna.study.create_study` and :func:`~optuna.study.load_study` respectively. 169 | 170 | """ 171 | 172 | def __init__( 173 | self, 174 | study_name, # type: str 175 | storage, # type: Union[str, storages.BaseStorage] 176 | sampler=None, # type: samplers.BaseSampler 177 | pruner=None, # type: pruners.BasePruner 178 | seed=None, 179 | cat_preds=None, 180 | ): 181 | # type: (...) -> None 182 | self.add_on = 0 183 | 184 | self.study_name = study_name 185 | storage = storages.get_storage(storage) 186 | study_id = storage.get_study_id_from_name(study_name) 187 | super(Study, self).__init__(study_id, storage) 188 | 189 | # use TPE sampler 190 | self.sampler = sampler or samplers.TPESampler() 191 | # don't use prunning 192 | self.pruner = pruner or pruners.NopPruner() 193 | 194 | self._optimize_lock = threading.Lock() 195 | self._stop_flag = False 196 | 197 | self.evaled = set() 198 | self.rnd = np.random.RandomState(seed=seed) 199 | 200 | self.cat_preds = cat_preds 201 | try: 202 | self.cat_preds_set = set(cat_preds.values()) 203 | except: 204 | pass 205 | 206 | self.info = {} 207 | self.info["names"] = [] 208 | 209 | def __getstate__(self): 210 | # type: () -> Dict[Any, Any] 211 | 212 | state = self.__dict__.copy() 213 | del state["_optimize_lock"] 214 | return state 215 | 216 | def __setstate__(self, state): 217 | # type: (Dict[Any, Any]) -> None 218 | 219 | self.__dict__.update(state) 220 | self._optimize_lock = threading.Lock() 221 | 222 | @property 223 | def user_attrs(self): 224 | # type: () -> Dict[str, Any] 225 | """Return user attributes. 226 | 227 | Returns: 228 | A dictionary containing all user attributes. 229 | """ 230 | 231 | return self._storage.get_study_user_attrs(self._study_id) 232 | 233 | @property 234 | def system_attrs(self): 235 | # type: () -> Dict[str, Any] 236 | """Return system attributes. 237 | 238 | Returns: 239 | A dictionary containing all system attributes. 240 | """ 241 | 242 | return self._storage.get_study_system_attrs(self._study_id) 243 | 244 | def optimize( 245 | self, 246 | func, # type: ObjectiveFuncType 247 | n_trials=None, # type: Optional[int] 248 | timeout=None, # type: Optional[float] 249 | n_jobs=1, # type: int 250 | catch=(), # type: Union[Tuple[()], Tuple[Type[Exception]]] 251 | callbacks=None, # type: Optional[List[Callable[[Study, FrozenTrial], None]]] 252 | gc_after_trial=True, # type: bool 253 | show_progress_bar=False, # type: bool 254 | **kwargs, 255 | ): 256 | # type: (...) -> None 257 | """Optimize an objective function. 258 | 259 | Optimization is done by choosing a suitable set of hyperparameter values from a given 260 | range. Uses a sampler which implements the task of value suggestion based on a specified 261 | distribution. The sampler is specified in :func:`~optuna.study.create_study` and the 262 | default choice for the sampler is TPE. 263 | See also :class:`~optuna.samplers.TPESampler` for more details on 'TPE'. 264 | 265 | Args: 266 | func: 267 | A callable that implements objective function. 268 | n_trials: 269 | The number of trials. If this argument is set to :obj:`None`, there is no 270 | limitation on the number of trials. If :obj:`timeout` is also set to :obj:`None`, 271 | the study continues to create trials until it receives a termination signal such 272 | as Ctrl+C or SIGTERM. 273 | timeout: 274 | Stop study after the given number of second(s). If this argument is set to 275 | :obj:`None`, the study is executed without time limitation. If :obj:`n_trials` is 276 | also set to :obj:`None`, the study continues to create trials until it receives a 277 | termination signal such as Ctrl+C or SIGTERM. 278 | n_jobs: 279 | The number of parallel jobs. If this argument is set to :obj:`-1`, the number is 280 | set to CPU count. 281 | catch: 282 | A study continues to run even when a trial raises one of the exceptions specified 283 | in this argument. Default is an empty tuple, i.e. the study will stop for any 284 | exception except for :class:`~optuna.exceptions.TrialPruned`. 285 | callbacks: 286 | List of callback functions that are invoked at the end of each trial. Each function 287 | must accept two parameters with the following types in this order: 288 | :class:`~optuna.study.Study` and :class:`~optuna.FrozenTrial`. 289 | gc_after_trial: 290 | Flag to execute garbage collection at the end of each trial. By default, garbage 291 | collection is enabled, just in case. You can turn it off with this argument if 292 | memory is safely managed in your objective function. 293 | show_progress_bar: 294 | Flag to show progress bars or not. To disable progress bar, set this ``False``. 295 | Currently, progress bar is experimental feature and disabled 296 | when ``n_jobs`` :math:`\\ne 1`. 297 | """ 298 | 299 | # self._progress_bar = pbar_module._ProgressBar( 300 | # show_progress_bar and n_jobs == 1, n_trials, timeout 301 | # ) 302 | 303 | self._stop_flag = False 304 | 305 | # optimize one iteration at a time 306 | self._optimize_sequential(func, n_trials, timeout, catch, callbacks, gc_after_trial, None, **kwargs) 307 | 308 | # self._progress_bar.close() 309 | # del self._progress_bar 310 | 311 | def set_user_attr(self, key, value): 312 | # type: (str, Any) -> None 313 | """Set a user attribute to the study. 314 | 315 | Args: 316 | key: A key string of the attribute. 317 | value: A value of the attribute. The value should be JSON serializable. 318 | 319 | """ 320 | 321 | self._storage.set_study_user_attr(self._study_id, key, value) 322 | 323 | def set_system_attr(self, key, value): 324 | # type: (str, Any) -> None 325 | """Set a system attribute to the study. 326 | 327 | Note that Optuna internally uses this method to save system messages. Please use 328 | :func:`~optuna.study.Study.set_user_attr` to set users' attributes. 329 | 330 | Args: 331 | key: A key string of the attribute. 332 | value: A value of the attribute. The value should be JSON serializable. 333 | 334 | """ 335 | 336 | self._storage.set_study_system_attr(self._study_id, key, value) 337 | 338 | def trials_dataframe( 339 | self, 340 | attrs=( 341 | "number", 342 | "value", 343 | "datetime_start", 344 | "datetime_complete", 345 | "duration", 346 | "params", 347 | "user_attrs", 348 | "system_attrs", 349 | "state", 350 | ), # type: Tuple[str, ...] 351 | multi_index=False, # type: bool 352 | ): 353 | # type: (...) -> pd.DataFrame 354 | """Export trials as a pandas DataFrame_. 355 | 356 | The DataFrame_ provides various features to analyze studies. It is also useful to draw a 357 | histogram of objective values and to export trials as a CSV file. 358 | If there are no trials, an empty DataFrame_ is returned. 359 | 360 | Example: 361 | 362 | .. testcode:: 363 | 364 | import optuna 365 | import pandas 366 | 367 | def objective(trial): 368 | x = trial.suggest_uniform('x', -1, 1) 369 | return x ** 2 370 | 371 | study = optuna.create_study() 372 | study.optimize(objective, n_trials=3) 373 | 374 | # Create a dataframe from the study. 375 | df = study.trials_dataframe() 376 | assert isinstance(df, pandas.DataFrame) 377 | assert df.shape[0] == 3 # n_trials. 378 | 379 | Args: 380 | attrs: 381 | Specifies field names of :class:`~optuna.FrozenTrial` to include them to a 382 | DataFrame of trials. 383 | multi_index: 384 | Specifies whether the returned DataFrame_ employs MultiIndex_ or not. Columns that 385 | are hierarchical by nature such as ``(params, x)`` will be flattened to 386 | ``params_x`` when set to :obj:`False`. 387 | 388 | Returns: 389 | A pandas DataFrame_ of trials in the :class:`~optuna.study.Study`. 390 | 391 | .. _DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html 392 | .. _MultiIndex: https://pandas.pydata.org/pandas-docs/stable/advanced.html 393 | """ 394 | 395 | trials = self.get_trials(deepcopy=False) 396 | 397 | # If no trials, return an empty dataframe. 398 | if not len(trials): 399 | return pd.DataFrame() 400 | 401 | assert all(isinstance(trial, FrozenTrial) for trial in trials) 402 | attrs_to_df_columns = collections.OrderedDict() # type: Dict[str, str] 403 | for attr in attrs: 404 | if attr.startswith("_"): 405 | # Python conventional underscores are omitted in the dataframe. 406 | df_column = attr[1:] 407 | else: 408 | df_column = attr 409 | attrs_to_df_columns[attr] = df_column 410 | 411 | # column_agg is an aggregator of column names. 412 | # Keys of column agg are attributes of `FrozenTrial` such as 'trial_id' and 'params'. 413 | # Values are dataframe columns such as ('trial_id', '') and ('params', 'n_layers'). 414 | column_agg = collections.defaultdict(set) # type: Dict[str, Set] 415 | non_nested_attr = "" 416 | 417 | def _create_record_and_aggregate_column(trial): 418 | # type: (FrozenTrial) -> Dict[Tuple[str, str], Any] 419 | 420 | record = {} 421 | for attr, df_column in attrs_to_df_columns.items(): 422 | value = getattr(trial, attr) 423 | if isinstance(value, TrialState): 424 | # Convert TrialState to str and remove the common prefix. 425 | value = str(value).split(".")[-1] 426 | if isinstance(value, dict): 427 | for nested_attr, nested_value in value.items(): 428 | record[(df_column, nested_attr)] = nested_value 429 | column_agg[attr].add((df_column, nested_attr)) 430 | else: 431 | record[(df_column, non_nested_attr)] = value 432 | column_agg[attr].add((df_column, non_nested_attr)) 433 | return record 434 | 435 | records = list([_create_record_and_aggregate_column(trial) for trial in trials]) 436 | 437 | columns = sum( 438 | (sorted(column_agg[k]) for k in attrs if k in column_agg), [] 439 | ) # type: List[Tuple[str, str]] 440 | 441 | df = pd.DataFrame(records, columns=pd.MultiIndex.from_tuples(columns)) 442 | 443 | if not multi_index: 444 | # Flatten the `MultiIndex` columns where names are concatenated with underscores. 445 | # Filtering is required to omit non-nested columns avoiding unwanted trailing 446 | # underscores. 447 | df.columns = [ 448 | "_".join(filter(lambda c: c, map(lambda c: str(c), col))) for col in columns 449 | ] 450 | 451 | return df 452 | 453 | def _optimize_sequential( 454 | self, 455 | func, # type: ObjectiveFuncType 456 | n_trials, # type: Optional[int] 457 | timeout, # type: Optional[float] 458 | catch, # type: Union[Tuple[()], Tuple[Type[Exception]]] 459 | callbacks, # type: Optional[List[Callable[[Study, FrozenTrial], None]]] 460 | gc_after_trial, # type: bool 461 | time_start, # type: Optional[datetime.datetime] 462 | **kwargs, 463 | ): 464 | # type: (...) -> None 465 | 466 | # trial counter 467 | i_trial = 0 468 | 469 | # timer 470 | if time_start is None: 471 | time_start = datetime.datetime.now() 472 | 473 | while True: 474 | if self._stop_flag: 475 | break 476 | 477 | # check number of trials 478 | if n_trials is not None: 479 | if i_trial >= n_trials: 480 | break 481 | i_trial += 1 482 | 483 | # check if alloted time has expired 484 | if timeout is not None: 485 | elapsed_seconds = (datetime.datetime.now() - time_start).total_seconds() 486 | if elapsed_seconds - self.add_on >= timeout: 487 | break 488 | 489 | self.info["names"] = [] 490 | self._run_trial(func, catch, gc_after_trial, **kwargs) 491 | 492 | # self._progress_bar.update((datetime.datetime.now() - time_start).total_seconds()) 493 | 494 | self._storage.remove_session() 495 | 496 | def _run_trial( 497 | self, 498 | func, # type: ObjectiveFuncType 499 | catch, # type: Union[Tuple[()], Tuple[Type[Exception]]] 500 | gc_after_trial, # type: bool 501 | **kwargs, 502 | ): 503 | # type: (...) -> trial_module.Trial 504 | 505 | # trial_id enumerates the trials 0, 1, 2, ... 506 | trial_id = self._storage.create_new_trial(self._study_id) 507 | # create a new trial for this study (in file _trial.py) 508 | trial = trial_module.Trial(self, trial_id) 509 | # trial number is 0, 1, 2, ... 510 | trial_number = trial.number 511 | 512 | # evaluate the objective function 513 | result = func(trial, **kwargs) 514 | 515 | # The following line mitigates memory problems that can be occurred in some 516 | # environments (e.g., services that use computing containers such as CircleCI). 517 | if gc_after_trial: 518 | gc.collect() 519 | 520 | # return a float or TrialState.FAIL 521 | try: 522 | result = float(result) 523 | except ( 524 | ValueError, 525 | TypeError, 526 | ): 527 | message = ( 528 | "Setting status of trial#{} as {} because the returned value from the " 529 | "objective function cannot be casted to float. Returned value is: " 530 | "{}".format(trial_number, TrialState.FAIL, repr(result)) 531 | ) 532 | _logger.warning(message) 533 | self._storage.set_trial_system_attr(trial_id, "fail_reason", message) 534 | self._storage.set_trial_state(trial_id, TrialState.FAIL) 535 | return trial 536 | 537 | if math.isnan(result): 538 | message = ( 539 | "Setting status of trial#{} as {} because the objective function " 540 | "returned {}.".format(trial_number, TrialState.FAIL, result) 541 | ) 542 | _logger.warning(message) 543 | self._storage.set_trial_system_attr(trial_id, "fail_reason", message) 544 | self._storage.set_trial_state(trial_id, TrialState.FAIL) 545 | return trial 546 | 547 | # log results 548 | self._storage.set_trial_value(trial_id, result) 549 | self._storage.set_trial_state(trial_id, TrialState.COMPLETE) 550 | # self._log_completed_trial(trial, result) 551 | 552 | return trial 553 | 554 | def _log_completed_trial(self, trial, result): 555 | # type: (trial_module.Trial, float) -> None 556 | 557 | _logger.info( 558 | "Finished trial#{} with value: {} with parameters: {}. " 559 | "Best is trial#{} with value: {}.".format( 560 | trial.number, result, trial.params, self.best_trial.number, self.best_value 561 | ) 562 | ) 563 | 564 | 565 | def create_study( 566 | storage=None, # type: Union[None, str, storages.BaseStorage] 567 | sampler=None, # type: samplers.BaseSampler 568 | pruner=None, # type: pruners.BasePruner 569 | direction="minimize", # type: str 570 | load_if_exists=False, # type: bool 571 | seed=None, 572 | cat_preds=None, 573 | ): 574 | # type: (...) -> Study 575 | """Create a new :class:`~optuna.study.Study`. 576 | 577 | Args: 578 | storage: 579 | Database URL. If this argument is set to None, in-memory storage is used, and the 580 | :class:`~optuna.study.Study` will not be persistent. 581 | 582 | .. note:: 583 | When a database URL is passed, Optuna internally uses `SQLAlchemy`_ to handle 584 | the database. Please refer to `SQLAlchemy's document`_ for further details. 585 | If you want to specify non-default options to `SQLAlchemy Engine`_, you can 586 | instantiate :class:`~optuna.storages.RDBStorage` with your desired options and 587 | pass it to the ``storage`` argument instead of a URL. 588 | 589 | .. _SQLAlchemy: https://www.sqlalchemy.org/ 590 | .. _SQLAlchemy's document: 591 | https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls 592 | .. _SQLAlchemy Engine: https://docs.sqlalchemy.org/en/latest/core/engines.html 593 | 594 | sampler: 595 | A sampler object that implements background algorithm for value suggestion. 596 | If :obj:`None` is specified, :class:`~optuna.samplers.TPESampler` is used 597 | as the default. See also :class:`~optuna.samplers`. 598 | pruner: 599 | A pruner object that decides early stopping of unpromising trials. See also 600 | :class:`~optuna.pruners`. 601 | direction: 602 | Direction of optimization. Set ``minimize`` for minimization and ``maximize`` for 603 | maximization. 604 | load_if_exists: 605 | Flag to control the behavior to handle a conflict of study names. 606 | In the case where a study named ``study_name`` already exists in the ``storage``, 607 | a :class:`~optuna.exceptions.DuplicatedStudyError` is raised if ``load_if_exists`` is 608 | set to :obj:`False`. 609 | Otherwise, the creation of the study is skipped, and the existing one is returned. 610 | 611 | Returns: 612 | A :class:`~optuna.study.Study` object. 613 | 614 | """ 615 | 616 | # in memory or dbms (we will use only in memory data?) 617 | storage = storages.get_storage(storage) 618 | 619 | # study_id in our case is always 0, method in in_memory.py 620 | study_id = storage.create_new_study(None) 621 | 622 | # random string starting with "no-name" 623 | study_name = storage.get_study_name_from_id(study_id) 624 | 625 | # study seesion 626 | study = Study( 627 | study_name=study_name, 628 | storage=storage, 629 | sampler=sampler, 630 | pruner=pruner, 631 | seed=seed, 632 | cat_preds=cat_preds, 633 | ) 634 | 635 | if direction == "minimize": 636 | _direction = StudyDirection.MINIMIZE 637 | elif direction == "maximize": 638 | _direction = StudyDirection.MAXIMIZE 639 | else: 640 | raise ValueError("Please set either 'minimize' or 'maximize' to direction.") 641 | 642 | # set the study direction to be minimize or maximize 643 | study._storage.set_study_direction(study_id, _direction) 644 | 645 | return study 646 | -------------------------------------------------------------------------------- /boexplain/optuna/optuna/samplers/tpe/sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import scipy.special 5 | from scipy.stats import truncnorm 6 | 7 | # from optuna import distributions 8 | # from optuna.samplers import base 9 | # from optuna.samplers import random 10 | # from optuna.samplers.tpe.parzen_estimator import _ParzenEstimator 11 | # from optuna.samplers.tpe.parzen_estimator import _ParzenEstimatorParameters 12 | # from optuna.study import StudyDirection 13 | # from optuna.trial import TrialState 14 | from ... import distributions 15 | from ...samplers import base 16 | from ...samplers import random 17 | from ...samplers.tpe.parzen_estimator import _ParzenEstimator 18 | from ...samplers.tpe.parzen_estimator import _ParzenEstimatorParameters 19 | from ...study import StudyDirection 20 | from ...trial import TrialState 21 | 22 | EPS = 1e-12 23 | 24 | 25 | def default_gamma(x): 26 | # type: (int) -> int 27 | 28 | return min(int(np.ceil(0.1 * x)), 25) 29 | 30 | 31 | def hyperopt_default_gamma(x): 32 | # type: (int) -> int 33 | 34 | return min(int(np.ceil(0.25 * np.sqrt(x))), 25) 35 | 36 | 37 | def default_weights(x): 38 | # type: (int) -> np.ndarray 39 | 40 | if x == 0: 41 | return np.asarray([]) 42 | elif x < 25: 43 | return np.ones(x) 44 | else: 45 | ramp = np.linspace(1.0 / x, 1.0, num=x - 25) 46 | flat = np.ones(25) 47 | return np.concatenate([ramp, flat], axis=0) 48 | 49 | 50 | class TPESampler(base.BaseSampler): 51 | """Sampler using TPE (Tree-structured Parzen Estimator) algorithm. 52 | 53 | This sampler is based on *independent sampling*. 54 | See also :class:`~optuna.samplers.BaseSampler` for more details of 'independent sampling'. 55 | 56 | On each trial, for each parameter, TPE fits one Gaussian Mixture Model (GMM) ``l(x)`` to 57 | the set of parameter values associated with the best objective values, and another GMM 58 | ``g(x)`` to the remaining parameter values. It chooses the parameter value ``x`` that 59 | maximizes the ratio ``l(x)/g(x)``. 60 | 61 | For further information about TPE algorithm, please refer to the following papers: 62 | 63 | - `Algorithms for Hyper-Parameter Optimization 64 | `_ 65 | - `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of 66 | Dimensions for Vision Architectures `_ 67 | 68 | Example: 69 | 70 | .. testcode:: 71 | 72 | import optuna 73 | from optuna.samplers import TPESampler 74 | 75 | def objective(trial): 76 | x = trial.suggest_uniform('x', -10, 10) 77 | return x**2 78 | 79 | study = optuna.create_study(sampler=TPESampler()) 80 | study.optimize(objective, n_trials=10) 81 | 82 | Args: 83 | consider_prior: 84 | Enhance the stability of Parzen estimator by imposing a Gaussian prior when 85 | :obj:`True`. The prior is only effective if the sampling distribution is 86 | either :class:`~optuna.distributions.UniformDistribution`, 87 | :class:`~optuna.distributions.DiscreteUniformDistribution`, 88 | :class:`~optuna.distributions.LogUniformDistribution`, 89 | :class:`~optuna.distributions.IntUniformDistribution`, 90 | or :class:`~optuna.distributions.IntLogUniformDistribution`. 91 | prior_weight: 92 | The weight of the prior. This argument is used in 93 | :class:`~optuna.distributions.UniformDistribution`, 94 | :class:`~optuna.distributions.DiscreteUniformDistribution`, 95 | :class:`~optuna.distributions.LogUniformDistribution`, 96 | :class:`~optuna.distributions.IntUniformDistribution`, 97 | :class:`~optuna.distributions.IntLogUniformDistribution`, and 98 | :class:`~optuna.distributions.CategoricalDistribution`. 99 | consider_magic_clip: 100 | Enable a heuristic to limit the smallest variances of Gaussians used in 101 | the Parzen estimator. 102 | consider_endpoints: 103 | Take endpoints of domains into account when calculating variances of Gaussians 104 | in Parzen estimator. See the original paper for details on the heuristics 105 | to calculate the variances. 106 | n_startup_trials: 107 | The random sampling is used instead of the TPE algorithm until the given number 108 | of trials finish in the same study. 109 | n_ei_candidates: 110 | Number of candidate samples used to calculate the expected improvement. 111 | gamma: 112 | A function that takes the number of finished trials and returns the number 113 | of trials to form a density function for samples with low grains. 114 | See the original paper for more details. 115 | weights: 116 | A function that takes the number of finished trials and returns a weight for them. 117 | See `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of 118 | Dimensions for Vision Architectures `_ 119 | for more details. 120 | seed: 121 | Seed for random number generator. 122 | """ 123 | 124 | def __init__( 125 | self, 126 | consider_prior=True, # type: bool 127 | prior_weight=1.0, # type: float 128 | consider_magic_clip=True, # type: bool 129 | consider_endpoints=False, # type: bool 130 | n_startup_trials=10, # 10, # type: int 131 | n_ei_candidates=24, # type: int # USE 28 FOR ML EXPERIMENTS? 132 | gamma=default_gamma, # type: Callable[[int], int] 133 | weights=default_weights, # type: Callable[[int], np.ndarray] 134 | seed=None, # type: Optional[int] 135 | k=5, 136 | ): 137 | # type: (...) -> None 138 | 139 | self._parzen_estimator_parameters = _ParzenEstimatorParameters( 140 | consider_prior, prior_weight, consider_magic_clip, consider_endpoints, weights 141 | ) 142 | self._prior_weight = prior_weight 143 | self._n_startup_trials = n_startup_trials 144 | self._n_ei_candidates = n_ei_candidates 145 | self._gamma = gamma 146 | self._weights = weights 147 | self._k = k 148 | 149 | self._rng = np.random.RandomState(seed) 150 | self._random_sampler = random.RandomSampler(seed=seed) 151 | 152 | def reseed_rng(self) -> None: 153 | 154 | self._rng = np.random.RandomState() 155 | self._random_sampler.reseed_rng() 156 | 157 | def infer_relative_search_space(self, study, trial): 158 | # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution] 159 | 160 | return {} 161 | 162 | def sample_relative(self, study, trial, search_space): 163 | # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any] 164 | 165 | return {} 166 | 167 | def sample_independent(self, study, trial, param_name, param_distribution): 168 | # type: (Study, FrozenTrial, str, BaseDistribution) -> Any 169 | 170 | # parameter values and scores of previous iterations, of the form (param_value, (-step, value)) 171 | values, scores = _get_observation_pairs(study, param_name, trial) 172 | 173 | n = len(values) 174 | 175 | # randomly sample at the start 176 | if n < self._n_startup_trials: 177 | return ( 178 | self._random_sampler.sample_independent( 179 | study, trial, param_name, param_distribution 180 | ), 181 | None, 182 | None, 183 | ) 184 | # split the hyperparameters into good=below and bad=above. The best 10% of values 185 | # or the 25 best values go in "below" 186 | below_param_values, above_param_values = self._split_observation_pairs(values, scores) 187 | 188 | if isinstance(param_distribution, distributions.UniformDistribution): 189 | return self._sample_uniform(param_distribution, below_param_values, above_param_values) 190 | elif isinstance(param_distribution, distributions.LogUniformDistribution): 191 | return self._sample_loguniform( 192 | param_distribution, below_param_values, above_param_values 193 | ) 194 | elif isinstance(param_distribution, distributions.DiscreteUniformDistribution): 195 | return self._sample_discrete_uniform( 196 | param_distribution, below_param_values, above_param_values 197 | ) 198 | elif isinstance(param_distribution, distributions.IntUniformDistribution): 199 | return self._sample_int(param_distribution, below_param_values, above_param_values) 200 | elif isinstance(param_distribution, distributions.IntLogUniformDistribution): 201 | return self._sample_int_loguniform( 202 | param_distribution, below_param_values, above_param_values 203 | ) 204 | elif isinstance(param_distribution, distributions.CategoricalDistribution): 205 | index, samples, scores = self._sample_categorical_index( 206 | param_distribution, below_param_values, above_param_values 207 | ) 208 | return param_distribution.choices[index], samples, scores 209 | else: 210 | distribution_list = [ 211 | distributions.UniformDistribution.__name__, 212 | distributions.LogUniformDistribution.__name__, 213 | distributions.DiscreteUniformDistribution.__name__, 214 | distributions.IntUniformDistribution.__name__, 215 | distributions.IntLogUniformDistribution.__name__, 216 | distributions.CategoricalDistribution.__name__, 217 | ] 218 | raise NotImplementedError( 219 | "The distribution {} is not implemented. " 220 | "The parameter distribution should be one of the {}".format( 221 | param_distribution, distribution_list 222 | ) 223 | ) 224 | 225 | def _split_observation_pairs( 226 | self, 227 | config_vals, # type: List[Optional[float]] 228 | loss_vals, # type: List[Tuple[float, float]] 229 | ): 230 | # type: (...) -> Tuple[np.ndarray, np.ndarray] 231 | 232 | # parameters and objective function values to np arrays 233 | config_vals = np.asarray(config_vals) 234 | loss_vals = np.asarray(loss_vals, dtype=[("step", float), ("score", float)]) 235 | 236 | # number of good observations 237 | n_below = self._gamma(len(config_vals)) 238 | # indices of values that would sort the losses in ascending order 239 | loss_ascending = np.argsort(loss_vals) 240 | # best parameter values 241 | below = config_vals[np.sort(loss_ascending[:n_below])] 242 | below = np.asarray([v for v in below if v is not None], dtype=float) 243 | # worst parameter values 244 | above = config_vals[np.sort(loss_ascending[n_below:])] 245 | above = np.asarray([v for v in above if v is not None], dtype=float) 246 | return below, above 247 | 248 | def _sample_uniform(self, distribution, below, above): 249 | # type: (distributions.UniformDistribution, np.ndarray, np.ndarray) -> float 250 | 251 | low = distribution.low 252 | high = distribution.high 253 | return self._sample_numerical(low, high, below, above) 254 | 255 | def _sample_loguniform(self, distribution, below, above): 256 | # type: (distributions.LogUniformDistribution, np.ndarray, np.ndarray) -> float 257 | 258 | low = distribution.low 259 | high = distribution.high 260 | return self._sample_numerical(low, high, below, above, is_log=True) 261 | 262 | def _sample_discrete_uniform(self, distribution, below, above): 263 | # type:(distributions.DiscreteUniformDistribution, np.ndarray, np.ndarray) -> float 264 | 265 | # step size (1 for integers) 266 | q = distribution.q 267 | # value range 268 | r = distribution.high - distribution.low 269 | # [low, high] is shifted to [0, r] to align sampled values at regular intervals <- OPTUNA COMMENT 270 | # Use ±0.5*q for rounding evenly around the endpoints 271 | low = 0 - 0.5 * q 272 | high = r + 0.5 * q 273 | 274 | # Shift below and above to [0, r] <- OPTUNA COMMENT 275 | # ie, shift hyperparam values to be 0, 1, 2, ... when q=1 276 | above -= distribution.low 277 | below -= distribution.low 278 | 279 | # best sample 280 | best_sample, samples, scores = self._sample_numerical(low, high, below, above, q=q) 281 | best_sample += distribution.low 282 | best_sample = min(max(best_sample, distribution.low), distribution.high) 283 | samples = samples + distribution.low 284 | # best_sample = self._sample_numerical(low, high, below, above, q=q) + distribution.low 285 | return best_sample, samples, scores 286 | 287 | def _sample_int(self, distribution, below, above): 288 | # type: (distributions.IntUniformDistribution, np.ndarray, np.ndarray) -> int 289 | 290 | # IntUniformDistribution is the same as DiscreteUniformDistribution with q=1 291 | d = distributions.DiscreteUniformDistribution( 292 | low=distribution.low, high=distribution.high, q=distribution.step 293 | ) 294 | best_sample, samples, scores = self._sample_discrete_uniform(d, below, above) 295 | samples = [int(sample) for sample in samples] 296 | return int(best_sample), samples, scores 297 | 298 | def _sample_int_loguniform(self, distribution, below, above): 299 | # type: (distributions.IntLogUniformDistribution, np.ndarray, np.ndarray) -> int 300 | 301 | low = distribution.low - 0.5 302 | high = distribution.high + 0.5 303 | 304 | sample = self._sample_numerical(low, high, below, above, is_log=True) 305 | best_sample = ( 306 | np.round((sample - distribution.low) / distribution.step) * distribution.step 307 | + distribution.low 308 | ) 309 | return int(min(max(best_sample, distribution.low), distribution.high)) 310 | 311 | def _sample_numerical( 312 | self, 313 | low, # type: float 314 | high, # type: float 315 | below, # type: np.ndarray 316 | above, # type: np.ndarray 317 | q=None, # type: Optional[float] 318 | is_log=False, # type: bool 319 | ): 320 | # type: (...) -> float 321 | 322 | # log distribution 323 | if is_log: 324 | low = np.log(low) 325 | high = np.log(high) 326 | below = np.log(below) 327 | above = np.log(above) 328 | 329 | # number of ei candidates = 24 330 | size = (self._n_ei_candidates,) 331 | 332 | # get sigmas and sampling weights in sorted order for the good points 333 | parzen_estimator_below = _ParzenEstimator( 334 | mus=below, low=low, high=high, parameters=self._parzen_estimator_parameters 335 | ) 336 | # get a sample of 24 good points 337 | samples_below = self._sample_from_gmm( 338 | parzen_estimator=parzen_estimator_below, low=low, high=high, q=q, size=size, 339 | ) 340 | # log likelihoods of the sample points 341 | log_likelihoods_below = self._gmm_log_pdf( 342 | samples=samples_below, 343 | parzen_estimator=parzen_estimator_below, 344 | low=low, 345 | high=high, 346 | q=q, 347 | ) 348 | 349 | # build a KDE on the bad=above points 350 | parzen_estimator_above = _ParzenEstimator( 351 | mus=above, low=low, high=high, parameters=self._parzen_estimator_parameters 352 | ) 353 | # og likelihoods of the good sampled points occuring in the bad=above KDE 354 | log_likelihoods_above = self._gmm_log_pdf( 355 | samples=samples_below, 356 | parzen_estimator=parzen_estimator_above, 357 | low=low, 358 | high=high, 359 | q=q, 360 | ) 361 | 362 | ret, samples, scores = TPESampler._compare( 363 | samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above, k=self._k 364 | ) 365 | ret = float(ret[0]) 366 | ret = math.exp(ret) if is_log else ret 367 | 368 | # ret = float( 369 | # TPESampler._compare( 370 | # samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above 371 | # )[0] 372 | # ) 373 | return ret, samples, scores 374 | 375 | def _sample_categorical_index(self, distribution, below, above): 376 | # type: (distributions.CategoricalDistribution, np.ndarray, np.ndarray) -> int 377 | 378 | # parameter values 379 | choices = distribution.choices 380 | # convert the good=below and bad=above values to ints 381 | below = list(map(int, below)) 382 | above = list(map(int, above)) 383 | upper = len(choices) 384 | # number of ei candidates = 24 385 | size = (self._n_ei_candidates,) 386 | 387 | # Ramp of weights, weights are smaller for trials done earlier on 388 | weights_below = self._weights(len(below)) 389 | # Weighted count of the number of occurrences of each good hyperparameter (using int IDs) 390 | counts_below = np.bincount(below, minlength=upper, weights=weights_below) 391 | # Add a prior = 1 to avoid zero probability of choosing a hyperparameter 392 | weighted_below = counts_below + self._prior_weight 393 | # normalize 394 | weighted_below /= weighted_below.sum() 395 | # sample the good categorical values 396 | samples_below = self._sample_from_categorical_dist(weighted_below, size) 397 | # log probability of each categorical value in the sample 398 | log_likelihoods_below = TPESampler._categorical_log_pdf(samples_below, weighted_below) 399 | 400 | ## Now same for bad points 401 | # Ramp of weights 402 | weights_above = self._weights(len(above)) 403 | # Weighted count of the number of occurrences of each bad hyperparameter 404 | counts_above = np.bincount(above, minlength=upper, weights=weights_above) 405 | # Add a prior = 1 to avoid zero probability of choosing a hyperparameter 406 | weighted_above = counts_above + self._prior_weight 407 | # normalize 408 | weighted_above /= weighted_above.sum() 409 | # log likelihood of the GOOD sample points with their probabilities of being in the bad group 410 | log_likelihoods_above = TPESampler._categorical_log_pdf(samples_below, weighted_above) 411 | 412 | ret, samples, scores = TPESampler._compare( 413 | samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above, k=self._k 414 | ) 415 | ret = int(ret[0]) 416 | samples = [distribution.choices[samples[index]] for index in range(len(samples))] 417 | return ret, samples, scores 418 | 419 | # return int( 420 | # TPESampler._compare( 421 | # samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above 422 | # )[0] 423 | # ) 424 | 425 | def _sample_from_gmm( 426 | self, 427 | parzen_estimator, # type: _ParzenEstimator 428 | low, # type: float 429 | high, # type: float 430 | q=None, # type: Optional[float] 431 | size=(), # type: Tuple 432 | ): 433 | # type: (...) -> np.ndarray 434 | 435 | # weights, mus, and stds sorted by increasing mus of the good points 436 | weights = parzen_estimator.weights 437 | mus = parzen_estimator.mus 438 | sigmas = parzen_estimator.sigmas 439 | weights, mus, sigmas = map(np.asarray, (weights, mus, sigmas)) 440 | 441 | if low >= high: 442 | raise ValueError( 443 | "The 'low' should be lower than the 'high'. " 444 | "But (low, high) = ({}, {}).".format(low, high) 445 | ) 446 | # weighted multinomial sample of 24 good points based on the WEIGHTS, not mean/std 447 | active = np.argmax(self._rng.multinomial(1, weights, size=size), axis=-1) 448 | # normalize the active points 449 | trunc_low = (low - mus[active]) / sigmas[active] 450 | trunc_high = (high - mus[active]) / sigmas[active] 451 | while True: 452 | # sample from a truncated normal dist with means and stds of the active points 453 | samples = truncnorm.rvs( 454 | trunc_low, 455 | trunc_high, 456 | size=size, 457 | loc=mus[active], 458 | scale=sigmas[active], 459 | random_state=self._rng, 460 | ) 461 | if (samples < high).all(): # why not 462 | break 463 | 464 | if q is None: 465 | return samples 466 | else: 467 | # round the samples to ints 468 | return np.round(samples / q) * q 469 | 470 | def _gmm_log_pdf( 471 | self, 472 | samples, # type: np.ndarray 473 | parzen_estimator, # type: _ParzenEstimator 474 | low, # type: float 475 | high, # type: float 476 | q=None, # type: Optional[float] 477 | ): 478 | # type: (...) -> np.ndarray 479 | 480 | # weights, mus, and stds sorted by increasing mus of the good points 481 | weights = parzen_estimator.weights 482 | mus = parzen_estimator.mus 483 | sigmas = parzen_estimator.sigmas 484 | samples, weights, mus, sigmas = map(np.asarray, (samples, weights, mus, sigmas)) 485 | 486 | if samples.size == 0: 487 | return np.asarray([], dtype=float) 488 | if weights.ndim != 1: 489 | raise ValueError( 490 | "The 'weights' should be 2-dimension. " 491 | "But weights.shape = {}".format(weights.shape) 492 | ) 493 | if mus.ndim != 1: 494 | raise ValueError( 495 | "The 'mus' should be 2-dimension. " "But mus.shape = {}".format(mus.shape) 496 | ) 497 | if sigmas.ndim != 1: 498 | raise ValueError( 499 | "The 'sigmas' should be 2-dimension. " "But sigmas.shape = {}".format(sigmas.shape) 500 | ) 501 | # probability of each point times the probability it is in the 502 | # accepted range [low, high], ie normalization constant. weights are normalized 503 | p_accept = np.sum( 504 | weights 505 | * ( 506 | TPESampler._normal_cdf(high, mus, sigmas) 507 | - TPESampler._normal_cdf(low, mus, sigmas) 508 | ) 509 | ) 510 | 511 | if q is None: 512 | distance = samples[..., None] - mus 513 | mahalanobis = (distance / np.maximum(sigmas, EPS)) ** 2 514 | Z = np.sqrt(2 * np.pi) * sigmas 515 | coefficient = weights / Z / p_accept 516 | return TPESampler._logsum_rows(-0.5 * mahalanobis + np.log(coefficient)) 517 | else: 518 | cdf_func = TPESampler._normal_cdf 519 | # bounds on the normal distribution of each sample point. This is the probability space 520 | # that would have allowed for each sample value to be chosen 521 | upper_bound = np.minimum(samples + q / 2.0, high) 522 | lower_bound = np.maximum(samples - q / 2.0, low) 523 | # probability for each sampled point 524 | # weights[..., None] is weights reshaped from (len(weights),) to (len(weights),1), same for mus and sigmas 525 | # upper_bound[None] are the upper_bounds reshaped from (len(upper_bound),) to (1, len(upper_bounds)) 526 | # For each sample point, we compute the probability of it occuring in each Gaussian mixture (one for each point) 527 | # and then sum the mass. Finally we multiply by the weights of each point occuring 528 | probabilities = np.sum( 529 | weights[..., None] 530 | * ( 531 | cdf_func(upper_bound[None], mus[..., None], sigmas[..., None]) 532 | - cdf_func(lower_bound[None], mus[..., None], sigmas[..., None]) 533 | ), 534 | axis=0, 535 | ) 536 | # normalize by the probability of accepting 537 | return np.log(probabilities + EPS) - np.log(p_accept + EPS) 538 | 539 | def _sample_from_categorical_dist(self, probabilities, size): # weights=probabilities 540 | # type: (np.ndarray, Tuple[int]) -> np.ndarray 541 | 542 | # probabilities as np array 543 | if probabilities.size == 1 and isinstance(probabilities[0], np.ndarray): 544 | probabilities = probabilities[0] 545 | probabilities = np.asarray(probabilities) 546 | 547 | if size == (0,): 548 | return np.asarray([], dtype=float) 549 | assert len(size) 550 | assert probabilities.ndim == 1 551 | 552 | # n_draws = 24 553 | n_draws = int(np.prod(size)) 554 | # draw samples from the multinomial distribution 555 | sample = self._rng.multinomial(n=1, pvals=probabilities, size=n_draws) 556 | assert sample.shape == size + (probabilities.size,) 557 | # 24 categorical values selected from the multinomial sample 558 | return_val = np.dot(sample, np.arange(probabilities.size)) 559 | return_val.shape = size 560 | return return_val 561 | 562 | @classmethod 563 | def _categorical_log_pdf( 564 | cls, 565 | sample, # type: np.ndarray 566 | p, # type: np.ndarray 567 | ): 568 | # type: (...) -> np.ndarray 569 | 570 | if sample.size: 571 | # log probability of each sample 572 | return np.log(np.asarray(p)[sample]) 573 | else: 574 | return np.asarray([]) 575 | 576 | @classmethod 577 | def _compare(cls, samples, log_l, log_g, k): 578 | # type: (np.ndarray, np.ndarray, np.ndarray) -> np.ndarray 579 | 580 | # good samples, good log likelihoods, bad log likelihoods 581 | samples, log_l, log_g = map(np.asarray, (samples, log_l, log_g)) 582 | if samples.size: 583 | # ratio of likelihoods = difference of log likelihoods 584 | score = log_l - log_g 585 | if samples.size != score.size: 586 | raise ValueError( 587 | "The size of the 'samples' and that of the 'score' " 588 | "should be same. " 589 | "But (samples.size, score.size) = ({}, {})".format(samples.size, score.size) 590 | ) 591 | # this is the hyperparameter with the best expected score, can find top-k 592 | best = np.argmax(score) 593 | uniq_smpls, indices = np.unique(samples, return_index=True) 594 | uniq_scores = np.exp(score[indices]) 595 | # topk=min(len(uniq_scores), 32) 596 | k = min(k, len(uniq_smpls)) 597 | indicies = np.argpartition(uniq_scores, -k)[-k:] 598 | uniq_smpls = uniq_smpls[indicies] 599 | uniq_scores = uniq_scores[indicies] 600 | # sorted_indices = np.argsort(uniq_scores)[::-1] 601 | # uniq_smpls = uniq_smpls[sorted_indices] 602 | # uniq_scores = uniq_scores[sorted_indices] 603 | return np.asarray([samples[best]] * samples.size), uniq_smpls, uniq_scores #/ np.sum(uniq_scores) 604 | else: 605 | return np.asarray([]) 606 | 607 | @classmethod 608 | def _logsum_rows(cls, x): 609 | # type: (np.ndarray) -> np.ndarray 610 | 611 | x = np.asarray(x) 612 | m = x.max(axis=1) 613 | return np.log(np.exp(x - m[:, None]).sum(axis=1)) + m 614 | 615 | @classmethod 616 | def _normal_cdf(cls, x, mu, sigma): 617 | # type: (float, np.ndarray, np.ndarray) -> np.ndarray 618 | 619 | mu, sigma = map(np.asarray, (mu, sigma)) 620 | denominator = x - mu 621 | numerator = np.maximum(np.sqrt(2) * sigma, EPS) 622 | z = denominator / numerator 623 | return 0.5 * (1 + scipy.special.erf(z)) 624 | 625 | @classmethod 626 | def _log_normal_cdf(cls, x, mu, sigma): 627 | # type: (float, np.ndarray, np.ndarray) -> np.ndarray 628 | 629 | mu, sigma = map(np.asarray, (mu, sigma)) 630 | if x < 0: 631 | raise ValueError("Negative argument is given to _lognormal_cdf. x: {}".format(x)) 632 | denominator = np.log(np.maximum(x, EPS)) - mu 633 | numerator = np.maximum(np.sqrt(2) * sigma, EPS) 634 | z = denominator / numerator 635 | return 0.5 + 0.5 * scipy.special.erf(z) 636 | 637 | @staticmethod 638 | def hyperopt_parameters(): 639 | # type: () -> Dict[str, Any] 640 | """Return the the default parameters of hyperopt (v0.1.2). 641 | 642 | :class:`~optuna.samplers.TPESampler` can be instantiated with the parameters returned 643 | by this method. 644 | 645 | Example: 646 | 647 | Create a :class:`~optuna.samplers.TPESampler` instance with the default 648 | parameters of `hyperopt `_. 649 | 650 | .. testcode:: 651 | 652 | import optuna 653 | from optuna.samplers import TPESampler 654 | 655 | def objective(trial): 656 | x = trial.suggest_uniform('x', -10, 10) 657 | return x**2 658 | 659 | sampler = TPESampler(**TPESampler.hyperopt_parameters()) 660 | study = optuna.create_study(sampler=sampler) 661 | study.optimize(objective, n_trials=10) 662 | 663 | Returns: 664 | A dictionary containing the default parameters of hyperopt. 665 | 666 | """ 667 | 668 | return { 669 | "consider_prior": True, 670 | "prior_weight": 1.0, 671 | "consider_magic_clip": True, 672 | "consider_endpoints": False, 673 | "n_startup_trials": 20, 674 | "n_ei_candidates": 24, 675 | "gamma": hyperopt_default_gamma, 676 | "weights": default_weights, 677 | } 678 | 679 | 680 | def _get_observation_pairs(study, param_name, trial): 681 | # type: (Study, str, FrozenTrial) -> Tuple[List[Optional[float]], List[Tuple[float, float]]] 682 | """Get observation pairs from the study. 683 | 684 | This function collects observation pairs from the complete or pruned trials of the study. 685 | The values for trials that don't contain the parameter named ``param_name`` are set to None. 686 | 687 | An observation pair fundamentally consists of a parameter value and an objective value. 688 | However, due to the pruning mechanism of Optuna, final objective values are not always 689 | available. Therefore, this function uses intermediate values in addition to the final 690 | ones, and reports the value with its step count as ``(-step, value)``. 691 | Consequently, the structure of the observation pair is as follows: 692 | ``(param_value, (-step, value))``. 693 | 694 | The second element of an observation pair is used to rank observations in 695 | ``_split_observation_pairs`` method (i.e., observations are sorted lexicographically by 696 | ``(-step, value)``). 697 | """ 698 | 699 | sign = 1 700 | if study.direction == StudyDirection.MAXIMIZE: 701 | sign = -1 702 | 703 | values = [] 704 | scores = [] 705 | for trial in study.get_trials(deepcopy=False): 706 | if trial.state is TrialState.COMPLETE and trial.value is not None: 707 | score = (-float("inf"), sign * trial.value) 708 | elif trial.state is TrialState.PRUNED: 709 | if len(trial.intermediate_values) > 0: 710 | step, intermediate_value = max(trial.intermediate_values.items()) 711 | if math.isnan(intermediate_value): 712 | score = (-step, float("inf")) 713 | else: 714 | score = (-step, sign * intermediate_value) 715 | else: 716 | score = (float("inf"), 0.0) 717 | else: 718 | continue 719 | 720 | param_value = None # type: Optional[float] 721 | if param_name in trial.params: 722 | distribution = trial.distributions[param_name] 723 | param_value = distribution.to_internal_repr(trial.params[param_name]) 724 | 725 | values.append(param_value) 726 | scores.append(score) 727 | 728 | return values, scores 729 | --------------------------------------------------------------------------------