├── boexplain
    ├── optuna
    │   ├── __init__.py
    │   ├── optuna
    │   │   ├── .DS_Store
    │   │   ├── pruners
    │   │   │   ├── .DS_Store
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   └── nop.py
    │   │   ├── samplers
    │   │   │   ├── .DS_Store
    │   │   │   ├── tpe
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── parzen_estimator.py
    │   │   │   │   └── sampler.py
    │   │   │   ├── __init__.py
    │   │   │   ├── random.py
    │   │   │   ├── _search_space.py
    │   │   │   └── base.py
    │   │   ├── storages
    │   │   │   ├── __init__.py
    │   │   │   ├── in_memory.py
    │   │   │   └── base.py
    │   │   ├── trial
    │   │   │   ├── __init__.py
    │   │   │   ├── _util.py
    │   │   │   ├── _state.py
    │   │   │   ├── _base.py
    │   │   │   ├── _fixed.py
    │   │   │   └── _frozen.py
    │   │   ├── _study_direction.py
    │   │   ├── __init__.py
    │   │   ├── exceptions.py
    │   │   ├── progress_bar.py
    │   │   ├── logging.py
    │   │   ├── structs.py
    │   │   ├── distributions.py
    │   │   └── study.py
    │   └── setup.py
    ├── __init__.py
    └── files
    │   ├── __init__.py
    │   ├── cat_xform.py
    │   ├── search.py
    │   └── stats.py
├── data
    ├── credit_test.csv.zip
    ├── credit_labels.csv.zip
    ├── credit_record_train.csv.zip
    └── application_record_train.csv.zip
├── docs
    ├── source
    │   ├── api_reference
    │   │   ├── boexplain.files.search.rst
    │   │   └── boexplain.rst
    │   ├── index.rst
    │   └── conf.py
    ├── Makefile
    └── make.bat
├── pyproject.toml
├── LICENSE
└── README.md


/boexplain/optuna/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boexplain/__init__.py:
--------------------------------------------------------------------------------
1 | from .files import fmin, fmax
2 | 
3 | __all__ = ["fmin", "fmax"]


--------------------------------------------------------------------------------
/boexplain/files/__init__.py:
--------------------------------------------------------------------------------
1 | from .search import fmin, fmax
2 | 
3 | __all__ = ["fmin", "fmax"]


--------------------------------------------------------------------------------
/data/credit_test.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_test.csv.zip


--------------------------------------------------------------------------------
/data/credit_labels.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_labels.csv.zip


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/.DS_Store


--------------------------------------------------------------------------------
/data/credit_record_train.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/credit_record_train.csv.zip


--------------------------------------------------------------------------------
/data/application_record_train.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/data/application_record_train.csv.zip


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/pruners/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/pruners/.DS_Store


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/BOExplain/HEAD/boexplain/optuna/optuna/samplers/.DS_Store


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/tpe/__init__.py:
--------------------------------------------------------------------------------
1 | # from optuna.samplers.tpe.sampler import TPESampler  # NOQA
2 | from .sampler import TPESampler  # NOQA


--------------------------------------------------------------------------------
/docs/source/api_reference/boexplain.files.search.rst:
--------------------------------------------------------------------------------
 1 | .. _boexplain.files.search:
 2 | 
 3 | boexplain.files.search
 4 | =========================================
 5 | 
 6 | .. _boexplain_doc:
 7 | 
 8 | BOExplain API
 9 | -------------
10 | 
11 | .. automodule:: boexplain.files.search
12 |    :members:
13 |    :undoc-members:
14 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/api_reference/boexplain.rst:
--------------------------------------------------------------------------------
 1 | .. _api_reference:
 2 | 
 3 | .. _reference:
 4 | 
 5 | =============
 6 | API Reference
 7 | =============
 8 | 
 9 | This section contains the public API reference for DataPrep. It is 
10 | auto-generated from the docstrings in the project source code.
11 | 
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 | 
16 |    boexplain.files.search


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/pruners/__init__.py:
--------------------------------------------------------------------------------
 1 | # import optuna
 2 | # from optuna.pruners.base import BasePruner
 3 | # from optuna.pruners.nop import NopPruner
 4 | from ... import optuna
 5 | from .base import BasePruner
 6 | from .nop import NopPruner
 7 | 
 8 | 
 9 | def _filter_study(
10 |     study: "optuna.study.Study", trial: "optuna.trial.FrozenTrial"
11 | ) -> "optuna.study.Study":
12 |     return study
13 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/storages/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Union  # NOQA
 2 | 
 3 | # from optuna.storages.base import BaseStorage
 4 | # from optuna.storages.in_memory import InMemoryStorage
 5 | from .base import BaseStorage
 6 | from .in_memory import InMemoryStorage
 7 | 
 8 | def get_storage(storage):
 9 |     # type: (Union[None, str, BaseStorage]) -> BaseStorage
10 |     if storage is None:
11 |         return InMemoryStorage()
12 |     else:
13 |         return storage
14 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/__init__.py:
--------------------------------------------------------------------------------
 1 | # from optuna.trial._base import BaseTrial  # NOQA
 2 | # from optuna.trial._fixed import FixedTrial  # NOQA
 3 | # from optuna.trial._frozen import FrozenTrial  # NOQA
 4 | # from optuna.trial._state import TrialState  # NOQA
 5 | # from optuna.trial._trial import Trial  # NOQA
 6 | from ._base import BaseTrial  # NOQA
 7 | from ._fixed import FixedTrial  # NOQA
 8 | from ._frozen import FrozenTrial  # NOQA
 9 | from ._state import TrialState  # NOQA
10 | from ._trial import Trial  # NOQA


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/_study_direction.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | 
 4 | class StudyDirection(enum.Enum):
 5 |     """Direction of a :class:`~optuna.study.Study`.
 6 | 
 7 |     Attributes:
 8 |         NOT_SET:
 9 |             Direction has not been set.
10 |         MINIMIZE:
11 |             :class:`~optuna.study.Study` minimizes the objective function.
12 |         MAXIMIZE:
13 |             :class:`~optuna.study.Study` maximizes the objective function.
14 |     """
15 | 
16 |     NOT_SET = 0
17 |     MINIMIZE = 1
18 |     MAXIMIZE = 2
19 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. BOExplain documentation master file, created by
 2 |    sphinx-quickstart on Mon Feb  8 14:21:13 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to BOExplain's documentation!
 7 | =====================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    api_reference/boexplain
14 | 
15 | 
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # from optuna.samplers._search_space import intersection_search_space  # NOQA
 2 | # from optuna.samplers._search_space import IntersectionSearchSpace  # NOQA
 3 | # from optuna.samplers.base import BaseSampler  # NOQA
 4 | # from optuna.samplers.random import RandomSampler  # NOQA
 5 | # from optuna.samplers.tpe import TPESampler  # NOQA
 6 | from ._search_space import intersection_search_space  # NOQA
 7 | from ._search_space import IntersectionSearchSpace  # NOQA
 8 | from .base import BaseSampler  # NOQA
 9 | from .random import RandomSampler  # NOQA
10 | from .tpe import TPESampler  # NOQA


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/boexplain/files/cat_xform.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | def individual_contribution(df, objective, cat_cols, **kwargs):
 6 |     # dictionary of dictionaries, one dictionary for each column
 7 |     # dictinary keys are the categorical values and the values are the individual contribution
 8 |     # for each value in the column, compute the individual contribution of that column
 9 |     # ie, remove tuples satisfying the single-clause predicate 'col=val',
10 |     # and compute the objective function with this data
11 | 
12 |     cat_val_to_indiv_cont = {
13 |         col: {val: objective(df[df[col] != val], **kwargs) for val in df[col].unique()}
14 |         for col in cat_cols
15 |     }
16 | 
17 |     return cat_val_to_indiv_cont
18 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/_util.py:
--------------------------------------------------------------------------------
 1 | import decimal
 2 | 
 3 | # from optuna import logging
 4 | from .. import logging
 5 | 
 6 | _logger = logging.get_logger(__name__)
 7 | 
 8 | 
 9 | def _adjust_discrete_uniform_high(name, low, high, q):
10 |     # type: (str, float, float, float) -> float
11 | 
12 |     d_high = decimal.Decimal(str(high))
13 |     d_low = decimal.Decimal(str(low))
14 |     d_q = decimal.Decimal(str(q))
15 | 
16 |     d_r = d_high - d_low
17 | 
18 |     if d_r % d_q != decimal.Decimal("0"):
19 |         high = float((d_r // d_q) * d_q + d_low)
20 |         _logger.warning(
21 |             "The range of parameter `{}` is not divisible by `q`, and is "
22 |             "replaced by [{}, {}].".format(name, low, high)
23 |         )
24 | 
25 |     return high
26 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "boexplain"
 3 | version = "0.1.1"
 4 | description = "BOExplain"
 5 | authors = ["Brandon Lockhart <brandon_lockhart@sfu.ca>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | repository = "https://github.com/sfu-db/BOExplain"
 9 | homepage = "https://github.com/sfu-db/BOExplain"
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.9"
13 | pandas = "1.2.1"
14 | numpy = "1.20.0"
15 | scipy = "1.6.0"
16 | scikit-learn = "0.24.1"
17 | altair = "4.1.0"
18 | imblearn = "0.0"
19 | tqdm = "4.51.0"
20 | colorlog = "4.4.0"
21 | numpyencoder = "0.3.0"
22 | 
23 | [tool.poetry.dev-dependencies]
24 | black = "^19.10b0"
25 | jupyter = "^1"
26 | ipykernel = "^5"
27 | 
28 | [tool.black]
29 | line-length = 99
30 | target-version = ['py38']
31 | 
32 | [build-system]
33 | requires = ["poetry>=1"]
34 | build-backend = "poetry.masonry.api"
35 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import types
 3 | from typing import Any
 4 | 
 5 | # from optuna import distributions  # NOQA
 6 | # from optuna import exceptions  # NOQA
 7 | # from optuna import logging  # NOQA
 8 | # from optuna import pruners  # NOQA
 9 | # from optuna import samplers  # NOQA
10 | # from optuna import storages  # NOQA
11 | # from optuna import study  # NOQA
12 | # from optuna import trial  # NOQA
13 | from . import distributions  # NOQA
14 | from . import exceptions  # NOQA
15 | from . import logging  # NOQA
16 | from . import pruners  # NOQA
17 | from . import samplers  # NOQA
18 | from . import storages  # NOQA
19 | from . import study  # NOQA
20 | from . import trial  # NOQA
21 | 
22 | from .study import create_study  # NOQA
23 | from .study import Study  # NOQA
24 | from .trial import Trial  # NOQA
25 | # from study import create_study  # NOQA
26 | # from study import Study  # NOQA
27 | # from trial import Trial  # NOQA


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/_state.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | 
 4 | class TrialState(enum.Enum):
 5 |     """State of a :class:`~optuna.trial.Trial`.
 6 | 
 7 |     Attributes:
 8 |         RUNNING:
 9 |             The :class:`~optuna.trial.Trial` is running.
10 |         COMPLETE:
11 |             The :class:`~optuna.trial.Trial` has been finished without any error.
12 |         PRUNED:
13 |             The :class:`~optuna.trial.Trial` has been pruned with
14 |             :class:`~optuna.exceptions.TrialPruned`.
15 |         FAIL:
16 |             The :class:`~optuna.trial.Trial` has failed due to an uncaught error.
17 |     """
18 | 
19 |     RUNNING = 0
20 |     COMPLETE = 1
21 |     PRUNED = 2
22 |     FAIL = 3
23 |     WAITING = 4
24 | 
25 |     def __repr__(self):
26 |         # type: () -> str
27 | 
28 |         return str(self)
29 | 
30 |     def is_finished(self):
31 |         # type: () -> bool
32 | 
33 |         return self != TrialState.RUNNING and self != TrialState.WAITING
34 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/pruners/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class BasePruner(object, metaclass=abc.ABCMeta):
 5 |     """Base class for pruners."""
 6 | 
 7 |     @abc.abstractmethod
 8 |     def prune(self, study, trial):
 9 |         # type: (Study, FrozenTrial) -> bool
10 |         """Judge whether the trial should be pruned based on the reported values.
11 | 
12 |         Note that this method is not supposed to be called by library users. Instead,
13 |         :func:`optuna.trial.Trial.report` and :func:`optuna.trial.Trial.should_prune` provide
14 |         user interfaces to implement pruning mechanism in an objective function.
15 | 
16 |         Args:
17 |             study:
18 |                 Study object of the target study.
19 |             trial:
20 |                 FrozenTrial object of the target trial.
21 | 
22 |         Returns:
23 |             A boolean value representing whether the trial should be pruned.
24 |         """
25 | 
26 |         raise NotImplementedError
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 | 
3 | Copyright (c) 2021, Brandon Lockhart
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/pruners/nop.py:
--------------------------------------------------------------------------------
 1 | # from optuna.pruners import BasePruner
 2 | from . import BasePruner
 3 | 
 4 | 
 5 | class NopPruner(BasePruner):
 6 |     """Pruner which never prunes trials.
 7 | 
 8 |     Example:
 9 | 
10 |         .. testsetup::
11 | 
12 |             import numpy as np
13 |             from sklearn.model_selection import train_test_split
14 | 
15 |             np.random.seed(seed=0)
16 |             X = np.random.randn(200).reshape(-1, 1)
17 |             y = np.where(X[:, 0] < 0.5, 0, 1)
18 |             X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)
19 |             classes = np.unique(y)
20 | 
21 |         .. testcode::
22 | 
23 |             import optuna
24 |             from sklearn.linear_model import SGDClassifier
25 | 
26 |             def objective(trial):
27 |                 alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
28 |                 clf = SGDClassifier(alpha=alpha)
29 |                 n_train_iter = 100
30 | 
31 |                 for step in range(n_train_iter):
32 |                     clf.partial_fit(X_train, y_train, classes=classes)
33 | 
34 |                     intermediate_value = clf.score(X_valid, y_valid)
35 |                     trial.report(intermediate_value, step)
36 | 
37 |                     if trial.should_prune():
38 |                         assert False, "should_prune() should always return False with this pruner."
39 |                         raise optuna.exceptions.TrialPruned()
40 | 
41 |                 return clf.score(X_valid, y_valid)
42 | 
43 |             study = optuna.create_study(direction='maximize',
44 |                                         pruner=optuna.pruners.NopPruner())
45 |             study.optimize(objective, n_trials=20)
46 |     """
47 | 
48 |     def prune(self, study, trial):
49 |         # type: (Study, FrozenTrial) -> bool
50 | 
51 |         return False
52 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | sys.path.insert(0, os.path.abspath("../../"))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'BOExplain'
22 | copyright = '2021, Brandon Lockhart'
23 | author = 'Brandon Lockhart'
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | release = '0.1.0'
27 | 
28 | 
29 | # -- General configuration ---------------------------------------------------
30 | 
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [ 'sphinx.ext.autodoc'
35 | ]
36 | 
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['_templates']
39 | 
40 | # List of patterns, relative to source directory, that match files and
41 | # directories to ignore when looking for source files.
42 | # This pattern also affects html_static_path and html_extra_path.
43 | exclude_patterns = []
44 | 
45 | 
46 | # -- Options for HTML output -------------------------------------------------
47 | 
48 | # The theme to use for HTML and HTML Help pages.  See the documentation for
49 | # a list of builtin themes.
50 | #
51 | 
52 | html_context = {
53 |     'AUTHOR': author,
54 |     'DESCRIPTION': 'BOExplain, documentation site.',
55 |     'SITEMAP_BASE_URL': 'https://sfu-db.github.io/BOExplain/', # Trailing slash is needed
56 |     'VERSION': release,
57 | }
58 | 
59 | html_theme = 'alabaster'
60 | 
61 | # Add any paths that contain custom static files (such as style sheets) here,
62 | # relative to this directory. They are copied after the builtin static files,
63 | # so a file named "default.css" will overwrite the builtin "default.css".
64 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/exceptions.py:
--------------------------------------------------------------------------------
 1 | class OptunaError(Exception):
 2 |     """Base class for Optuna specific errors."""
 3 | 
 4 |     pass
 5 | 
 6 | 
 7 | class TrialPruned(OptunaError):
 8 |     """Exception for pruned trials.
 9 | 
10 |     This error tells a trainer that the current :class:`~optuna.trial.Trial` was pruned. It is
11 |     supposed to be raised after :func:`optuna.trial.Trial.should_prune` as shown in the following
12 |     example.
13 | 
14 |     Example:
15 | 
16 |         .. testsetup::
17 | 
18 |             import numpy as np
19 |             from sklearn.model_selection import train_test_split
20 | 
21 |             np.random.seed(seed=0)
22 |             X = np.random.randn(200).reshape(-1, 1)
23 |             y = np.where(X[:, 0] < 0.5, 0, 1)
24 |             X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)
25 |             classes = np.unique(y)
26 | 
27 |         .. testcode::
28 | 
29 |             import optuna
30 |             from sklearn.linear_model import SGDClassifier
31 | 
32 |             def objective(trial):
33 |                 alpha = trial.suggest_uniform('alpha', 0.0, 1.0)
34 |                 clf = SGDClassifier(alpha=alpha)
35 |                 n_train_iter = 100
36 | 
37 |                 for step in range(n_train_iter):
38 |                     clf.partial_fit(X_train, y_train, classes=classes)
39 | 
40 |                     intermediate_value = clf.score(X_valid, y_valid)
41 |                     trial.report(intermediate_value, step)
42 | 
43 |                     if trial.should_prune():
44 |                         raise optuna.exceptions.TrialPruned()
45 | 
46 |                 return clf.score(X_valid, y_valid)
47 | 
48 |             study = optuna.create_study(direction='maximize')
49 |             study.optimize(objective, n_trials=20)
50 |     """
51 | 
52 |     pass
53 | 
54 | 
55 | class CLIUsageError(OptunaError):
56 |     """Exception for CLI.
57 | 
58 |     CLI raises this exception when it receives invalid configuration.
59 |     """
60 | 
61 |     pass
62 | 
63 | 
64 | class StorageInternalError(OptunaError):
65 |     """Exception for storage operation.
66 | 
67 |     This error is raised when an operation failed in backend DB of storage.
68 |     """
69 | 
70 |     pass
71 | 
72 | 
73 | class DuplicatedStudyError(OptunaError):
74 |     """Exception for a duplicated study name.
75 | 
76 |     This error is raised when a specified study name already exists in the storage.
77 |     """
78 | 
79 |     pass
80 | 
81 | 
82 | class ExperimentalWarning(Warning):
83 |     """Experimental Warning class.
84 | 
85 |     This implementation exists here because the policy of `FutureWarning` has been changed
86 |     since Python 3.7 was released. See the details in
87 |     https://docs.python.org/3/library/warnings.html#warning-categories.
88 |     """
89 | 
90 |     pass
91 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/progress_bar.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any
 3 | from typing import Optional
 4 | 
 5 | from tqdm.auto import tqdm
 6 | 
 7 | # from optuna import logging as optuna_logging
 8 | from . import logging as optuna_logging
 9 | 
10 | _tqdm_handler = None  # type: Optional[_TqdmLoggingHandler]
11 | 
12 | 
13 | # Reference: https://gist.github.com/hvy/8b80c2cedf02b15c24f85d1fa17ebe02
14 | class _TqdmLoggingHandler(logging.StreamHandler):
15 |     def emit(self, record: Any) -> None:
16 |         try:
17 |             msg = self.format(record)
18 |             tqdm.write(msg)
19 |             self.flush()
20 |         except (KeyboardInterrupt, SystemExit):
21 |             raise
22 |         except Exception:
23 |             self.handleError(record)
24 | 
25 | 
26 | class _ProgressBar(object):
27 |     """Progress Bar implementation for `Study.optimize` on the top of `tqdm`.
28 | 
29 |     Args:
30 |         is_valid:
31 |             Whether to show progress bars in `Study.optimize`.
32 |         n_trials:
33 |             The number of trials.
34 |         timeout:
35 |             Stop study after the given number of second(s).
36 |     """
37 | 
38 |     def __init__(
39 |         self, is_valid: bool, n_trials: Optional[int] = None, timeout: Optional[float] = None,
40 |     ) -> None:
41 |         self._is_valid = is_valid
42 |         self._n_trials = n_trials
43 |         self._timeout = timeout
44 | 
45 |         if self._is_valid:
46 |             self._init_valid()
47 | 
48 |     # TODO(hvy): Remove initialization indirection via this method when the progress bar is no
49 |     # longer experimental.
50 |     def _init_valid(self) -> None:
51 |         self._progress_bar = tqdm(range(self._n_trials) if self._n_trials is not None else None)
52 |         global _tqdm_handler
53 | 
54 |         _tqdm_handler = _TqdmLoggingHandler()
55 |         _tqdm_handler.setLevel(logging.INFO)
56 |         _tqdm_handler.setFormatter(optuna_logging.create_default_formatter())
57 |         optuna_logging.disable_default_handler()
58 |         optuna_logging._get_library_root_logger().addHandler(_tqdm_handler)
59 | 
60 |     def update(self, elapsed_seconds: Optional[float]) -> None:
61 |         """Update the progress bars if ``is_valid`` is ``True``.
62 | 
63 |         Args:
64 |             elapsed_seconds:
65 |                 The time past since `Study.optimize` started.
66 |         """
67 |         if self._is_valid:
68 |             self._progress_bar.update(1)
69 |             if self._timeout is not None and elapsed_seconds is not None:
70 |                 self._progress_bar.set_postfix_str(
71 |                     "{:.02f}/{} seconds".format(elapsed_seconds, self._timeout)
72 |                 )
73 | 
74 |     def close(self) -> None:
75 |         """Close progress bars."""
76 |         if self._is_valid:
77 |             self._progress_bar.close()
78 |             assert _tqdm_handler is not None
79 |             optuna_logging._get_library_root_logger().removeHandler(_tqdm_handler)
80 |             optuna_logging.enable_default_handler()
81 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/_base.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import datetime
  3 | 
  4 | # from optuna import distributions
  5 | # from optuna import logging
  6 | from .. import distributions
  7 | from .. import logging
  8 | 
  9 | _logger = logging.get_logger(__name__)
 10 | 
 11 | 
 12 | class BaseTrial(object, metaclass=abc.ABCMeta):
 13 |     """Base class for trials.
 14 | 
 15 |     Note that this class is not supposed to be directly accessed by library users.
 16 |     """
 17 | 
 18 |     @abc.abstractmethod
 19 |     def suggest_float(self, name, low, high, *, log=False, step=None):
 20 |         # type: (str, float, float, bool, Optional[float])-> float
 21 | 
 22 |         # TODO(nzw0301) swap log's position for step's one to match suggest_int for consistency.
 23 | 
 24 |         raise NotImplementedError
 25 | 
 26 |     @abc.abstractmethod
 27 |     def suggest_uniform(self, name, low, high):
 28 |         # type: (str, float, float) -> float
 29 | 
 30 |         raise NotImplementedError
 31 | 
 32 |     @abc.abstractmethod
 33 |     def suggest_loguniform(self, name, low, high):
 34 |         # type: (str, float, float) -> float
 35 | 
 36 |         raise NotImplementedError
 37 | 
 38 |     @abc.abstractmethod
 39 |     def suggest_discrete_uniform(self, name, low, high, q):
 40 |         # type: (str, float, float, float) -> float
 41 | 
 42 |         raise NotImplementedError
 43 | 
 44 |     @abc.abstractmethod
 45 |     def suggest_int(self, name, low, high, step=1, log=False):
 46 |         # type: (str, int, int, int, bool) -> int
 47 | 
 48 |         raise NotImplementedError
 49 | 
 50 |     @abc.abstractmethod
 51 |     def suggest_categorical(self, name, choices):
 52 |         # type: (str, Sequence[CategoricalChoiceType]) -> CategoricalChoiceType
 53 | 
 54 |         raise NotImplementedError
 55 | 
 56 |     @abc.abstractmethod
 57 |     def report(self, value, step):
 58 |         # type: (float, int) -> None
 59 | 
 60 |         raise NotImplementedError
 61 | 
 62 |     @abc.abstractmethod
 63 |     def should_prune(self, step=None):
 64 |         # type: (Optional[int]) -> bool
 65 | 
 66 |         raise NotImplementedError
 67 | 
 68 |     @abc.abstractmethod
 69 |     def set_user_attr(self, key, value):
 70 |         # type: (str, Any) -> None
 71 | 
 72 |         raise NotImplementedError
 73 | 
 74 |     @abc.abstractmethod
 75 |     def set_system_attr(self, key, value):
 76 |         # type: (str, Any) -> None
 77 | 
 78 |         raise NotImplementedError
 79 | 
 80 |     @property
 81 |     @abc.abstractmethod
 82 |     def params(self):
 83 |         # type: () -> Dict[str, Any]
 84 | 
 85 |         raise NotImplementedError
 86 | 
 87 |     @property
 88 |     @abc.abstractmethod
 89 |     def distributions(self):
 90 |         # type: () -> Dict[str, BaseDistribution]
 91 | 
 92 |         raise NotImplementedError
 93 | 
 94 |     @property
 95 |     @abc.abstractmethod
 96 |     def user_attrs(self):
 97 |         # type: () -> Dict[str, Any]
 98 | 
 99 |         raise NotImplementedError
100 | 
101 |     @property
102 |     @abc.abstractmethod
103 |     def system_attrs(self):
104 |         # type: () -> Dict[str, Any]
105 | 
106 |         raise NotImplementedError
107 | 
108 |     @property
109 |     @abc.abstractmethod
110 |     def datetime_start(self):
111 |         # type: () -> Optional[datetime.datetime]
112 | 
113 |         raise NotImplementedError
114 | 
115 |     @property
116 |     def number(self) -> int:
117 | 
118 |         raise NotImplementedError
119 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/random.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | # from optuna import distributions
 4 | # from optuna.samplers.base import BaseSampler
 5 | from .. import distributions
 6 | from ..samplers.base import BaseSampler
 7 | 
 8 | class RandomSampler(BaseSampler):
 9 |     """Sampler using random sampling.
10 | 
11 |     This sampler is based on *independent sampling*.
12 |     See also :class:`~optuna.samplers.BaseSampler` for more details of 'independent sampling'.
13 | 
14 |     Example:
15 | 
16 |         .. testcode::
17 | 
18 |             import optuna
19 |             from optuna.samplers import RandomSampler
20 | 
21 |             def objective(trial):
22 |                 x = trial.suggest_uniform('x', -5, 5)
23 |                 return x**2
24 | 
25 |             study = optuna.create_study(sampler=RandomSampler())
26 |             study.optimize(objective, n_trials=10)
27 | 
28 |         Args:
29 |             seed: Seed for random number generator.
30 |     """
31 | 
32 |     def __init__(self, seed=None):
33 |         # type: (Optional[int]) -> None
34 | 
35 |         self._rng = numpy.random.RandomState(seed)
36 | 
37 |     def reseed_rng(self) -> None:
38 | 
39 |         self._rng = numpy.random.RandomState()
40 | 
41 |     def infer_relative_search_space(self, study, trial):
42 |         # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution]
43 | 
44 |         return {}
45 | 
46 |     def sample_relative(self, study, trial, search_space):
47 |         # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any]
48 | 
49 |         return {}
50 | 
51 |     def sample_independent(self, study, trial, param_name, param_distribution):
52 |         # type: (Study, FrozenTrial, str, distributions.BaseDistribution) -> Any
53 | 
54 |         if isinstance(param_distribution, distributions.UniformDistribution):
55 |             return self._rng.uniform(param_distribution.low, param_distribution.high)
56 |         elif isinstance(param_distribution, distributions.LogUniformDistribution):
57 |             log_low = numpy.log(param_distribution.low)
58 |             log_high = numpy.log(param_distribution.high)
59 |             return float(numpy.exp(self._rng.uniform(log_low, log_high)))
60 |         elif isinstance(param_distribution, distributions.DiscreteUniformDistribution):
61 |             q = param_distribution.q
62 |             r = param_distribution.high - param_distribution.low
63 |             # [low, high] is shifted to [0, r] to align sampled values at regular intervals.
64 |             low = 0 - 0.5 * q
65 |             high = r + 0.5 * q
66 |             s = self._rng.uniform(low, high)
67 |             v = numpy.round(s / q) * q + param_distribution.low
68 |             # v may slightly exceed range due to round-off errors.
69 |             return float(min(max(v, param_distribution.low), param_distribution.high))
70 |         elif isinstance(param_distribution, distributions.IntUniformDistribution):
71 |             # [low, high] is shifted to [0, r] to align sampled values at regular intervals.
72 |             r = (param_distribution.high - param_distribution.low) / param_distribution.step
73 |             # numpy.random.randint includes low but excludes high.
74 |             s = self._rng.randint(0, r + 1)
75 |             v = s * param_distribution.step + param_distribution.low
76 |             return int(v)
77 |         elif isinstance(param_distribution, distributions.IntLogUniformDistribution):
78 |             log_low = numpy.log(param_distribution.low - 0.5)
79 |             log_high = numpy.log(param_distribution.high + 0.5)
80 |             s = numpy.exp(self._rng.uniform(log_low, log_high))
81 |             v = (
82 |                 numpy.round((s - param_distribution.low) / param_distribution.step)
83 |                 * param_distribution.step
84 |                 + param_distribution.low
85 |             )
86 |             return int(min(max(v, param_distribution.low), param_distribution.high))
87 |         elif isinstance(param_distribution, distributions.CategoricalDistribution):
88 |             choices = param_distribution.choices
89 |             index = self._rng.randint(0, len(choices))
90 |             return choices[index]
91 |         else:
92 |             raise NotImplementedError
93 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BOExplain, Explaining Inference Queries with Bayesian Optimization 
 2 | 
 3 | BOExplain is a library for explaining inference queries with Bayesian optimization. The corresponding paper can be found at https://arxiv.org/abs/2102.05308.
 4 | 
 5 | ## Installation
 6 | 
 7 | ```
 8 | pip install boexplain
 9 | ```
10 | 
11 | ## Documentation
12 | 
13 | The documentation is available at [https://sfu-db.github.io/BOExplain/](https://sfu-db.github.io/BOExplain/). (shortcut to [fmin](https://sfu-db.github.io/BOExplain/api_reference/boexplain.files.search.html#boexplain.files.search.fmin), [fmax](https://sfu-db.github.io/BOExplain/api_reference/boexplain.files.search.html#boexplain.files.search.fmax))
14 | 
15 | ## Getting Started
16 | 
17 | Derive an explanation for why the predicted rate of having an income over $50K is higher for men compared to women in the UCI ML [Adult dataset](https://archive.ics.uci.edu/ml/datasets/adult).
18 | 
19 | 1. Load the data and prepare it for ML.
20 | ``` python
21 | import pandas as pd
22 | from sklearn.ensemble import RandomForestClassifier
23 | from sklearn.model_selection import train_test_split
24 | 
25 | df = pd.read_csv("adult.data",
26 |                  names=[
27 |                      "Age", "Workclass", "fnlwgt", "Education",
28 |                      "Education-Num", "Marital Status", "Occupation",
29 |                      "Relationship", "Race", "Gender", "Capital Gain",
30 |                      "Capital Loss", "Hours per week", "Country", "Income"
31 |                  ],
32 |                  na_values=" ?")
33 | 
34 | df['Income'].replace({" <=50K": 0, ' >50K': 1}, inplace=True)
35 | df['Gender'].replace({" Male": 0, ' Female': 1}, inplace=True)
36 | df = pd.get_dummies(df)
37 | 
38 | train, test = train_test_split(df, test_size=0.2)
39 | test = test.drop(columns='Income')
40 | ```
41 | 
42 | 2. Define the objective function that trains a random forest classifier and queries the ratio of predicted rates of having an income over $50K between men and women.
43 | ``` python
44 | def obj(train_filtered):
45 |     rf = RandomForestClassifier(n_estimators=13, random_state=0)
46 |     rf.fit(train_filtered.drop(columns='Income'), train_filtered['Income'])
47 |     test["prediction"] = rf.predict(test)
48 |     rates = test.groupby("Gender")["prediction"].sum() / test.groupby("Gender")["prediction"].size()
49 |     test.drop(columns='prediction', inplace=True)
50 |     return rates[0] / rates[1]
51 | ```
52 | 
53 | 
54 | 3. Use the function `fmin` to minimize the objective function.
55 | ``` python
56 | from boexplain import fmin
57 | 
58 | train_filtered = fmin(
59 |     data=train,
60 |     f=obj,
61 |     columns=["Age", "Education-Num"],
62 |     runtime=30,
63 | )
64 | ```
65 | <!-- which returns a predicate 28 <= Age <= 59 and 6 <= Education-Num <= 16. Removing the tuples satisfying the returned predicate reduces the ratio from 3.54 to 2.7. -->
66 | 
67 | ## Reproduce the Experiments
68 | 
69 | To reproduce the experiments, you can clone the repo and create a poetry environment (install [Poetry](https://python-poetry.org/docs/#installation)). Run
70 | 
71 | ```bash
72 | poetry install
73 | ```
74 | 
75 | To setup the poetry environment a for jupyter notebook, run
76 | 
77 | ```bash
78 | poetry run ipython kernel install --name=boexplain
79 | ```
80 | 
81 | An ipython kernel has been created for this environemnt.
82 | 
83 | ### Adult Experiment
84 | 
85 | To reproduce the results of the Adult experiment and recreate Figure 6, follow the instruction in [adult.ipynb](https://github.com/sfu-db/BOExplain/blob/main/adult.ipynb).
86 | 
87 | ### Credit Experiment
88 | 
89 | To reproduce the results of the Credit experiment and recreate Figure 8, follow the instruction in [credit.ipynb](https://github.com/sfu-db/BOExplain/blob/main/credit.ipynb).
90 | 
91 | ### House Experiment
92 | 
93 | To reproduce the results of the House experiment and recreate Figure 7, follow the instruction in [house.ipynb](https://github.com/sfu-db/BOExplain/blob/main/house.ipynb).
94 | 
95 | ### Scorpion Synthetic Data Experiment
96 | 
97 | To reproduce the results of the experiment with Scorpion's synthetic data and corresponding query, and recreate Figure 4, follow the instruction in [scorpion.ipynb](https://github.com/sfu-db/BOExplain/blob/main/scorpion.ipynb). 
98 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/_search_space.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import copy
  3 | from typing import Dict
  4 | from typing import Optional
  5 | 
  6 | # import optuna
  7 | # from optuna.distributions import BaseDistribution
  8 | # from optuna.study import BaseStudy
  9 | from ... import optuna
 10 | from ..distributions import BaseDistribution
 11 | from ..study import BaseStudy
 12 | 
 13 | 
 14 | class IntersectionSearchSpace(object):
 15 |     """A class to calculate the intersection search space of a :class:`~optuna.study.BaseStudy`.
 16 | 
 17 |     Intersection search space contains the intersection of parameter distributions that have been
 18 |     suggested in the completed trials of the study so far.
 19 |     If there are multiple parameters that have the same name but different distributions,
 20 |     neither is included in the resulting search space
 21 |     (i.e., the parameters with dynamic value ranges are excluded).
 22 | 
 23 |     Note that an instance of this class is supposed to be used for only one study.
 24 |     If different studies are passed to :func:`~optuna.samplers.IntersectionSearchSpace.calculate`,
 25 |     a :obj:`ValueError` is raised.
 26 |     """
 27 | 
 28 |     def __init__(self) -> None:
 29 |         self._cursor = -1  # type: int
 30 |         self._search_space = None  # type: Optional[Dict[str, BaseDistribution]]
 31 |         self._study_id = None  # type: Optional[int]
 32 | 
 33 |     def calculate(
 34 |         self, study: BaseStudy, ordered_dict: bool = False
 35 |     ) -> Dict[str, BaseDistribution]:
 36 |         """Returns the intersection search space of the :class:`~optuna.study.BaseStudy`.
 37 | 
 38 |         Args:
 39 |             study:
 40 |                 A study with completed trials.
 41 |             ordered_dict:
 42 |                 A boolean flag determining the return type.
 43 |                 If :obj:`False`, the returned object will be a :obj:`dict`.
 44 |                 If :obj:`True`, the returned object will be an :obj:`collections.OrderedDict`
 45 |                 sorted by keys, i.e. parameter names.
 46 | 
 47 |         Returns:
 48 |             A dictionary containing the parameter names and parameter's distributions.
 49 |         """
 50 | 
 51 |         if self._study_id is None:
 52 |             self._study_id = study._study_id
 53 |         else:
 54 |             # Note that the check below is meaningless when `InMemortyStorage` is used
 55 |             # because `InMemortyStorage.create_new_study` always returns the same study ID.
 56 |             if self._study_id != study._study_id:
 57 |                 raise ValueError("`IntersectionSearchSpace` cannot handle multiple studies.")
 58 | 
 59 |         next_cursor = self._cursor
 60 |         for trial in reversed(study.get_trials(deepcopy=False)):
 61 |             if self._cursor > trial.number:
 62 |                 break
 63 | 
 64 |             if not trial.state.is_finished():
 65 |                 next_cursor = trial.number
 66 | 
 67 |             if trial.state != optuna.trial.TrialState.COMPLETE:
 68 |                 continue
 69 | 
 70 |             if self._search_space is None:
 71 |                 self._search_space = copy.copy(trial.distributions)
 72 |                 continue
 73 | 
 74 |             delete_list = []
 75 |             for param_name, param_distribution in self._search_space.items():
 76 |                 if param_name not in trial.distributions:
 77 |                     delete_list.append(param_name)
 78 |                 elif trial.distributions[param_name] != param_distribution:
 79 |                     delete_list.append(param_name)
 80 | 
 81 |             for param_name in delete_list:
 82 |                 del self._search_space[param_name]
 83 | 
 84 |         self._cursor = next_cursor
 85 |         search_space = self._search_space or {}
 86 | 
 87 |         if ordered_dict:
 88 |             search_space = OrderedDict(sorted(search_space.items(), key=lambda x: x[0]))
 89 | 
 90 |         return copy.deepcopy(search_space)
 91 | 
 92 | 
 93 | def intersection_search_space(
 94 |     study: BaseStudy, ordered_dict: bool = False
 95 | ) -> Dict[str, BaseDistribution]:
 96 |     """Return the intersection search space of the :class:`~optuna.study.BaseStudy`.
 97 | 
 98 |     Intersection search space contains the intersection of parameter distributions that have been
 99 |     suggested in the completed trials of the study so far.
100 |     If there are multiple parameters that have the same name but different distributions,
101 |     neither is included in the resulting search space
102 |     (i.e., the parameters with dynamic value ranges are excluded).
103 | 
104 |     .. note::
105 |         :class:`~optuna.samplers.IntersectionSearchSpace` provides the same functionality with
106 |         a much faster way. Please consider using it if you want to reduce execution time
107 |         as much as possible.
108 | 
109 |     Args:
110 |         study:
111 |             A study with completed trials.
112 |         ordered_dict:
113 |             A boolean flag determining the return type.
114 |             If :obj:`False`, the returned object will be a :obj:`dict`.
115 |             If :obj:`True`, the returned object will be an :obj:`collections.OrderedDict` sorted by
116 |             keys, i.e. parameter names.
117 | 
118 |     Returns:
119 |         A dictionary containing the parameter names and parameter's distributions.
120 |     """
121 | 
122 |     return IntersectionSearchSpace().calculate(study, ordered_dict=ordered_dict)
123 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/base.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | 
  3 | 
  4 | class BaseSampler(object, metaclass=abc.ABCMeta):
  5 |     """Base class for samplers.
  6 | 
  7 |     Optuna combines two types of sampling strategies, which are called *relative sampling* and
  8 |     *independent sampling*.
  9 | 
 10 |     *The relative sampling* determines values of multiple parameters simultaneously so that
 11 |     sampling algorithms can use relationship between parameters (e.g., correlation).
 12 |     Target parameters of the relative sampling are described in a relative search space, which
 13 |     is determined by :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`.
 14 | 
 15 |     *The independent sampling* determines a value of a single parameter without considering any
 16 |     relationship between parameters. Target parameters of the independent sampling are the
 17 |     parameters not described in the relative search space.
 18 | 
 19 |     More specifically, parameters are sampled by the following procedure.
 20 |     At the beginning of a trial, :meth:`~optuna.samplers.BaseSampler.infer_relative_search_space`
 21 |     is called to determine the relative search space for the trial. Then,
 22 |     :meth:`~optuna.samplers.BaseSampler.sample_relative` is invoked to sample parameters
 23 |     from the relative search space. During the execution of the objective function,
 24 |     :meth:`~optuna.samplers.BaseSampler.sample_independent` is used to sample
 25 |     parameters that don't belong to the relative search space.
 26 | 
 27 |     The following figure depicts the lifetime of a trial and how the above three methods are
 28 |     called in the trial.
 29 | 
 30 |     .. image:: ../../image/sampling-sequence.png
 31 | 
 32 |     |
 33 | 
 34 |     """
 35 | 
 36 |     @abc.abstractmethod
 37 |     def infer_relative_search_space(self, study, trial):
 38 |         # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution]
 39 |         """Infer the search space that will be used by relative sampling in the target trial.
 40 | 
 41 |         This method is called right before :func:`~optuna.samplers.BaseSampler.sample_relative`
 42 |         method, and the search space returned by this method is pass to it. The parameters not
 43 |         contained in the search space will be sampled by using
 44 |         :func:`~optuna.samplers.BaseSampler.sample_independent` method.
 45 | 
 46 |         Args:
 47 |             study:
 48 |                 Target study object.
 49 |             trial:
 50 |                 Target trial object.
 51 | 
 52 |         Returns:
 53 |             A dictionary containing the parameter names and parameter's distributions.
 54 | 
 55 |         .. seealso::
 56 |             Please refer to :func:`~optuna.samplers.intersection_search_space` as an
 57 |             implementation of :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`.
 58 |         """
 59 | 
 60 |         raise NotImplementedError
 61 | 
 62 |     @abc.abstractmethod
 63 |     def sample_relative(self, study, trial, search_space):
 64 |         # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any]
 65 |         """Sample parameters in a given search space.
 66 | 
 67 |         This method is called once at the beginning of each trial, i.e., right before the
 68 |         evaluation of the objective function. This method is suitable for sampling algorithms
 69 |         that use relationship between parameters such as Gaussian Process and CMA-ES.
 70 | 
 71 |         .. note::
 72 |                 The failed trials are ignored by any build-in samplers when they sample new
 73 |                 parameters. Thus, failed trials are regarded as deleted in the samplers'
 74 |                 perspective.
 75 | 
 76 |         Args:
 77 |             study:
 78 |                 Target study object.
 79 |             trial:
 80 |                 Target trial object.
 81 |             search_space:
 82 |                 The search space returned by
 83 |                 :func:`~optuna.samplers.BaseSampler.infer_relative_search_space`.
 84 | 
 85 |         Returns:
 86 |             A dictionary containing the parameter names and the values.
 87 | 
 88 |         """
 89 | 
 90 |         raise NotImplementedError
 91 | 
 92 |     @abc.abstractmethod
 93 |     def sample_independent(self, study, trial, param_name, param_distribution):
 94 |         # type: (Study, FrozenTrial, str, BaseDistribution) -> Any
 95 |         """Sample a parameter for a given distribution.
 96 | 
 97 |         This method is called only for the parameters not contained in the search space returned
 98 |         by :func:`~optuna.samplers.BaseSampler.sample_relative` method. This method is suitable
 99 |         for sampling algorithms that do not use relationship between parameters such as random
100 |         sampling and TPE.
101 | 
102 |         .. note::
103 |                 The failed trials are ignored by any build-in samplers when they sample new
104 |                 parameters. Thus, failed trials are regarded as deleted in the samplers'
105 |                 perspective.
106 | 
107 |         Args:
108 |             study:
109 |                 Target study object.
110 |             trial:
111 |                 Target trial object.
112 |             param_name:
113 |                 Name of the sampled parameter.
114 |             param_distribution:
115 |                 Distribution object that specifies a prior and/or scale of the sampling algorithm.
116 | 
117 |         Returns:
118 |             A parameter value.
119 | 
120 |         """
121 | 
122 |         raise NotImplementedError
123 | 
124 |     def reseed_rng(self) -> None:
125 |         """Reseed sampler's random number generator.
126 | 
127 |         This method is called by the :class:`~optuna.study.Study` instance if trials are executed
128 |         in parallel with the option ``n_jobs>1``. In that case, the sampler instance will be
129 |         replicated including the state of the random number generator, and they may suggest the
130 |         same values. To prevent this issue, this method assigns a different seed to each random
131 |         number generator.
132 |         """
133 | 
134 |         pass
135 | 


--------------------------------------------------------------------------------
/boexplain/optuna/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import pkg_resources
  5 | from setuptools import find_packages
  6 | from setuptools import setup
  7 | 
  8 | from typing import Dict
  9 | from typing import List
 10 | from typing import Optional
 11 | 
 12 | 
 13 | def get_version() -> str:
 14 | 
 15 |     version_filepath = os.path.join(os.path.dirname(__file__), "optuna", "version.py")
 16 |     with open(version_filepath) as f:
 17 |         for line in f:
 18 |             if line.startswith("__version__"):
 19 |                 return line.strip().split()[-1][1:-1]
 20 |     assert False
 21 | 
 22 | 
 23 | def get_long_description() -> str:
 24 | 
 25 |     readme_filepath = os.path.join(os.path.dirname(__file__), "README.md")
 26 |     with open(readme_filepath) as f:
 27 |         return f.read()
 28 | 
 29 | 
 30 | def get_install_requires() -> List[str]:
 31 | 
 32 |     return [
 33 |         "alembic",
 34 |         "cliff",
 35 |         "cmaes>=0.5.0",
 36 |         "colorlog",
 37 |         "joblib",
 38 |         "numpy",
 39 |         "scipy!=1.4.0",
 40 |         "sqlalchemy>=1.1.0",
 41 |         "tqdm",
 42 |     ]
 43 | 
 44 | 
 45 | def get_tests_require() -> List[str]:
 46 | 
 47 |     return get_extras_require()["testing"]
 48 | 
 49 | 
 50 | def get_extras_require() -> Dict[str, List[str]]:
 51 | 
 52 |     requirements = {
 53 |         "checking": ["black", "hacking", "mypy"],
 54 |         "codecov": ["codecov", "pytest-cov"],
 55 |         "doctest": [
 56 |             "cma",
 57 |             "pandas",
 58 |             "plotly>=4.0.0",
 59 |             "scikit-learn>=0.19.0,<0.23.0",
 60 |             "scikit-optimize",
 61 |             "mlflow",
 62 |         ],
 63 |         "document": ["sphinx", "sphinx_rtd_theme"],
 64 |         "example": [
 65 |             "catboost",
 66 |             "chainer",
 67 |             "lightgbm",
 68 |             "mlflow",
 69 |             "mpi4py",
 70 |             "mxnet",
 71 |             "nbval",
 72 |             "pytorch-ignite",
 73 |             "scikit-image",
 74 |             "scikit-learn",
 75 |             "thop",
 76 |             "torch==1.4.0" if sys.platform == "darwin" else "torch==1.4.0+cpu",
 77 |             "torchvision==0.5.0" if sys.platform == "darwin" else "torchvision==0.5.0+cpu",
 78 |             "xgboost",
 79 |         ]
 80 |         + (
 81 |             ["allennlp<1", "fastai<2", "pytorch-lightning>=0.7.1"]
 82 |             if (3, 5) < sys.version_info[:2] < (3, 8)
 83 |             else []
 84 |         )
 85 |         + (
 86 |             ["llvmlite<=0.31.0"] if (3, 5) == sys.version_info[:2] else []
 87 |         )  # Newer `llvmlite` is not distributed with wheels for Python 3.5.
 88 |         + (
 89 |             ["dask[dataframe]", "dask-ml", "keras", "tensorflow>=2.0.0", "tensorflow-datasets"]
 90 |             if sys.version_info[:2] < (3, 8)
 91 |             else []
 92 |         ),
 93 |         "experimental": ["redis"],
 94 |         "testing": [
 95 |             # TODO(toshihikoyanase): Remove the version constraint after resolving the issue
 96 |             # https://github.com/optuna/optuna/issues/1000.
 97 |             "bokeh<2.0.0",
 98 |             "chainer>=5.0.0",
 99 |             "cma",
100 |             "fakeredis",
101 |             "fanova",
102 |             "lightgbm",
103 |             "mlflow",
104 |             "mpi4py",
105 |             "mxnet",
106 |             "pandas",
107 |             "plotly>=4.0.0",
108 |             "pytest",
109 |             "pytorch-ignite",
110 |             "scikit-learn>=0.19.0,<0.23.0",
111 |             "scikit-optimize",
112 |             "torch==1.4.0" if sys.platform == "darwin" else "torch==1.4.0+cpu",
113 |             "torchvision==0.5.0" if sys.platform == "darwin" else "torchvision==0.5.0+cpu",
114 |             "xgboost",
115 |         ]
116 |         + (
117 |             ["allennlp<1", "fastai<2", "pytorch-lightning>=0.7.1"]
118 |             if (3, 5) < sys.version_info[:2] < (3, 8)
119 |             else []
120 |         )
121 |         + (
122 |             ["keras", "tensorflow", "tensorflow-datasets"] if sys.version_info[:2] < (3, 8) else []
123 |         ),
124 |     }
125 | 
126 |     return requirements
127 | 
128 | 
129 | def find_any_distribution(pkgs: List[str]) -> Optional[pkg_resources.Distribution]:
130 | 
131 |     for pkg in pkgs:
132 |         try:
133 |             return pkg_resources.get_distribution(pkg)
134 |         except pkg_resources.DistributionNotFound:
135 |             pass
136 |     return None
137 | 
138 | 
139 | pfnopt_pkg = find_any_distribution(["pfnopt"])
140 | if pfnopt_pkg is not None:
141 |     msg = (
142 |         "We detected that PFNOpt is installed in your environment.\n"
143 |         "PFNOpt has been renamed Optuna. Please uninstall the old\n"
144 |         "PFNOpt in advance (e.g. by executing `$ pip uninstall pfnopt`)."
145 |     )
146 |     print(msg)
147 |     exit(1)
148 | 
149 | setup(
150 |     name="optuna",
151 |     version=get_version(),
152 |     description="A hyperparameter optimization framework",
153 |     long_description=get_long_description(),
154 |     long_description_content_type="text/markdown",
155 |     author="Takuya Akiba",
156 |     author_email="akiba@preferred.jp",
157 |     url="https://optuna.org/",
158 |     packages=find_packages(),
159 |     package_data={
160 |         "optuna": [
161 |             "storages/rdb/alembic.ini",
162 |             "storages/rdb/alembic/*.*",
163 |             "storages/rdb/alembic/versions/*.*",
164 |         ]
165 |     },
166 |     install_requires=get_install_requires(),
167 |     tests_require=get_tests_require(),
168 |     extras_require=get_extras_require(),
169 |     entry_points={
170 |         "console_scripts": ["optuna = optuna.cli:main"],
171 |         "optuna.command": [
172 |             "create-study = optuna.cli:_CreateStudy",
173 |             "delete-study = optuna.cli:_DeleteStudy",
174 |             "study set-user-attr = optuna.cli:_StudySetUserAttribute",
175 |             "studies = optuna.cli:_Studies",
176 |             "dashboard = optuna.cli:_Dashboard",
177 |             "study optimize = optuna.cli:_StudyOptimize",
178 |             "storage upgrade = optuna.cli:_StorageUpgrade",
179 |         ],
180 |     },
181 | )
182 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/tpe/parzen_estimator.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable
  2 | from typing import NamedTuple
  3 | from typing import Optional
  4 | 
  5 | import numpy
  6 | from numpy import ndarray
  7 | 
  8 | EPS = 1e-12
  9 | 
 10 | 
 11 | class _ParzenEstimatorParameters(
 12 |     NamedTuple(
 13 |         "_ParzenEstimatorParameters",
 14 |         [
 15 |             ("consider_prior", bool),
 16 |             ("prior_weight", Optional[float]),
 17 |             ("consider_magic_clip", bool),
 18 |             ("consider_endpoints", bool),
 19 |             ("weights", Callable[[int], ndarray]),
 20 |         ],
 21 |     )
 22 | ):
 23 |     pass
 24 | 
 25 | 
 26 | class _ParzenEstimator(object):
 27 |     def __init__(
 28 |         self,
 29 |         mus,  # type: ndarray
 30 |         low,  # type: float
 31 |         high,  # type: float
 32 |         parameters,  # type: _ParzenEstimatorParameters
 33 |     ):
 34 |         # type: (...) -> None
 35 | 
 36 |         self.weights, self.mus, self.sigmas = _ParzenEstimator._calculate(
 37 |             mus,
 38 |             low,
 39 |             high,
 40 |             parameters.consider_prior,
 41 |             parameters.prior_weight,
 42 |             parameters.consider_magic_clip,
 43 |             parameters.consider_endpoints,
 44 |             parameters.weights,
 45 |         )
 46 | 
 47 |     @classmethod
 48 |     def _calculate(
 49 |         cls,
 50 |         mus,  # type: ndarray
 51 |         low,  # type: float
 52 |         high,  # type: float
 53 |         consider_prior,  # type: bool
 54 |         prior_weight,  # type: Optional[float]
 55 |         consider_magic_clip,  # type: bool
 56 |         consider_endpoints,  # type: bool
 57 |         weights_func,  # type: Callable[[int], ndarray]
 58 |     ):
 59 |         # type: (...) -> Tuple[ndarray, ndarray, ndarray]
 60 |         """Calculates the weights, mus and sigma for the Parzen estimator.
 61 | 
 62 |            Note: When the number of observations is zero, the Parzen estimator ignores the
 63 |            `consider_prior` flag and utilizes a prior. Validation of this approach is future work.
 64 |         """
 65 | 
 66 |         # initialize mus and sigmas for the KDE
 67 |         mus = numpy.asarray(mus)
 68 |         sigma = numpy.asarray([], dtype=float)
 69 |         prior_pos = 0
 70 | 
 71 |         # Parzen estimator construction requires at least one observation or a priror.
 72 |         if mus.size == 0:
 73 |             consider_prior = True
 74 | 
 75 |         # consider_prior = True. We have a prior over the space of ints
 76 |         if consider_prior:
 77 |             # prior mean is the midpoint
 78 |             prior_mu = 0.5 * (low + high)
 79 |             # prior std is the range of values
 80 |             prior_sigma = 1.0 * (high - low)
 81 |             if mus.size == 0:
 82 |                 low_sorted_mus_high = numpy.zeros(3)
 83 |                 sorted_mus = low_sorted_mus_high[1:-1]
 84 |                 sorted_mus[0] = prior_mu
 85 |                 sigma = numpy.asarray([prior_sigma])
 86 |                 prior_pos = 0
 87 |                 order = []  # type: List[int]
 88 |             # THIS CODE ORDERS THE MEANS with the prior, confusing
 89 |             else:  # When mus.size is greater than 0. <- OPTUNA COMMENT
 90 |                 # We decide the place of the  prior. <- OPTUNA COMMENT
 91 |                 # order = indices that would sort the mus
 92 |                 order = numpy.argsort(mus).astype(int)
 93 |                 # mus in increasing order
 94 |                 ordered_mus = mus[order]
 95 |                 # find the index where prior_mu should be inserted to maintain order
 96 |                 prior_pos = numpy.searchsorted(ordered_mus, prior_mu)
 97 |                 # We decide the mus. <- OPTUNA COMMENT
 98 |                 # low_sorted_mus_high gets updated with sorted_mus and is used below
 99 |                 low_sorted_mus_high = numpy.zeros(len(mus) + 3)
100 |                 sorted_mus = low_sorted_mus_high[1:-1]
101 |                 # insert the prior appropriately in the ordered list of mus
102 |                 sorted_mus[:prior_pos] = ordered_mus[:prior_pos]
103 |                 sorted_mus[prior_pos] = prior_mu
104 |                 sorted_mus[prior_pos + 1 :] = ordered_mus[prior_pos:]
105 |         else:
106 |             order = numpy.argsort(mus)
107 |             # We decide the mus.
108 |             low_sorted_mus_high = numpy.zeros(len(mus) + 2)
109 |             sorted_mus = low_sorted_mus_high[1:-1]
110 |             sorted_mus[:] = mus[order]
111 | 
112 |         # We decide the sigma.
113 |         if mus.size > 0:
114 |             low_sorted_mus_high[-1] = high
115 |             low_sorted_mus_high[0] = low
116 |             # the standard deviation of each Gaussian was set to the greater of the distances to the left and right neighbour
117 |             sigma = numpy.maximum(
118 |                 low_sorted_mus_high[1:-1] - low_sorted_mus_high[0:-2],
119 |                 low_sorted_mus_high[2:] - low_sorted_mus_high[1:-1],
120 |             )
121 |             # If not considering endpoints, set the std of the min and max mus to be the
122 |             # distance from its only neighbours, DEFAULT consider_endpoints=False
123 |             if not consider_endpoints and low_sorted_mus_high.size > 2:
124 |                 sigma[0] = low_sorted_mus_high[2] - low_sorted_mus_high[1]
125 |                 sigma[-1] = low_sorted_mus_high[-2] - low_sorted_mus_high[-3]
126 | 
127 |         # We decide the weights. <- OPTUNA
128 |         # Ramp of weights
129 |         unsorted_weights = weights_func(mus.size)
130 |         if consider_prior:
131 |             # array of zeros in the shape of sorted_mus
132 |             sorted_weights = numpy.zeros_like(sorted_mus)
133 |             # sort the weights based on the increasing order of the mus
134 |             sorted_weights[:prior_pos] = unsorted_weights[order[:prior_pos]]
135 |             sorted_weights[prior_pos] = prior_weight
136 |             sorted_weights[prior_pos + 1 :] = unsorted_weights[order[prior_pos:]]
137 |         else:
138 |             sorted_weights = unsorted_weights[order]
139 |         # normalize the weights
140 |         sorted_weights /= sorted_weights.sum()
141 | 
142 |         # We adjust the range of the 'sigma' according to the 'consider_magic_clip' flag. <-OTPUNA
143 |         # Original TPE paper clips stds to remain in feasible range
144 |         # largest std in sigma array
145 |         maxsigma = 1.0 * (high - low)
146 |         # limit the smallest stds in a gaussian distribution
147 |         if consider_magic_clip:
148 |             minsigma = 1.0 * (high - low) / min(100.0, (1.0 + len(sorted_mus)))
149 |         else:
150 |             minsigma = EPS
151 |         # set all sigmas to be between minsigma and maxsigma
152 |         sigma = numpy.clip(sigma, minsigma, maxsigma)
153 |         if consider_prior:
154 |             # don't modify the prior std
155 |             sigma[prior_pos] = prior_sigma
156 | 
157 |         return sorted_weights, sorted_mus, sigma
158 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/_fixed.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | # from optuna import distributions
  4 | # from optuna.trial._base import BaseTrial
  5 | # from optuna.trial._util import _adjust_discrete_uniform_high
  6 | from .. import distributions
  7 | from ._base import BaseTrial
  8 | from ._util import _adjust_discrete_uniform_high
  9 | 
 10 | 
 11 | class FixedTrial(BaseTrial):
 12 |     """A trial class which suggests a fixed value for each parameter.
 13 | 
 14 |     This object has the same methods as :class:`~optuna.trial.Trial`, and it suggests pre-defined
 15 |     parameter values. The parameter values can be determined at the construction of the
 16 |     :class:`~optuna.trial.FixedTrial` object. In contrast to :class:`~optuna.trial.Trial`,
 17 |     :class:`~optuna.trial.FixedTrial` does not depend on :class:`~optuna.study.Study`, and it is
 18 |     useful for deploying optimization results.
 19 | 
 20 |     Example:
 21 | 
 22 |         Evaluate an objective function with parameter values given by a user.
 23 | 
 24 |         .. testcode::
 25 | 
 26 |             import optuna
 27 | 
 28 |             def objective(trial):
 29 |                 x = trial.suggest_uniform('x', -100, 100)
 30 |                 y = trial.suggest_categorical('y', [-1, 0, 1])
 31 |                 return x ** 2 + y
 32 | 
 33 |             assert objective(optuna.trial.FixedTrial({'x': 1, 'y': 0})) == 1
 34 | 
 35 | 
 36 |     .. note::
 37 |         Please refer to :class:`~optuna.trial.Trial` for details of methods and properties.
 38 | 
 39 |     Args:
 40 |         params:
 41 |             A dictionary containing all parameters.
 42 |         number:
 43 |             A trial number. Defaults to ``0``.
 44 | 
 45 |     """
 46 | 
 47 |     def __init__(self, params, number=0):
 48 |         # type: (Dict[str, Any], int) -> None
 49 | 
 50 |         self._params = params
 51 |         self._suggested_params = {}  # type: Dict[str, Any]
 52 |         self._distributions = {}  # type: Dict[str, BaseDistribution]
 53 |         self._user_attrs = {}  # type: Dict[str, Any]
 54 |         self._system_attrs = {}  # type: Dict[str, Any]
 55 |         self._datetime_start = datetime.datetime.now()
 56 |         self._number = number
 57 | 
 58 |     def suggest_float(self, name, low, high, *, log=False, step=None):
 59 |         # type: (str, float, float, bool, Optional[float]) -> float
 60 | 
 61 |         if step is not None:
 62 |             if log:
 63 |                 raise NotImplementedError(
 64 |                     "The parameter `step` is not supported when `log` is True."
 65 |                 )
 66 |             else:
 67 |                 return self._suggest(
 68 |                     name, distributions.DiscreteUniformDistribution(low=low, high=high, q=step)
 69 |                 )
 70 |         else:
 71 |             if log:
 72 |                 return self._suggest(
 73 |                     name, distributions.LogUniformDistribution(low=low, high=high)
 74 |                 )
 75 |             else:
 76 |                 return self._suggest(name, distributions.UniformDistribution(low=low, high=high))
 77 | 
 78 |     def suggest_uniform(self, name, low, high):
 79 |         # type: (str, float, float) -> float
 80 | 
 81 |         return self._suggest(name, distributions.UniformDistribution(low=low, high=high))
 82 | 
 83 |     def suggest_loguniform(self, name, low, high):
 84 |         # type: (str, float, float) -> float
 85 | 
 86 |         return self._suggest(name, distributions.LogUniformDistribution(low=low, high=high))
 87 | 
 88 |     def suggest_discrete_uniform(self, name, low, high, q):
 89 |         # type: (str, float, float, float) -> float
 90 | 
 91 |         high = _adjust_discrete_uniform_high(name, low, high, q)
 92 |         discrete = distributions.DiscreteUniformDistribution(low=low, high=high, q=q)
 93 |         return self._suggest(name, discrete)
 94 | 
 95 |     def suggest_int(self, name, low, high, step=1, log=False):
 96 |         # type: (str, int, int, int, bool) -> int
 97 |         if log:
 98 |             sample = self._suggest(
 99 |                 name, distributions.IntLogUniformDistribution(low=low, high=high, step=step)
100 |             )
101 |         else:
102 |             sample = self._suggest(
103 |                 name, distributions.IntUniformDistribution(low=low, high=high, step=step)
104 |             )
105 |         return int(sample)
106 | 
107 |     def suggest_categorical(self, name, choices):
108 |         # type: (str, Sequence[CategoricalChoiceType]) -> CategoricalChoiceType
109 | 
110 |         choices = tuple(choices)
111 |         return self._suggest(name, distributions.CategoricalDistribution(choices=choices))
112 | 
113 |     def _suggest(self, name, distribution):
114 |         # type: (str, BaseDistribution) -> Any
115 | 
116 |         if name not in self._params:
117 |             raise ValueError(
118 |                 "The value of the parameter '{}' is not found. Please set it at "
119 |                 "the construction of the FixedTrial object.".format(name)
120 |             )
121 | 
122 |         value = self._params[name]
123 |         param_value_in_internal_repr = distribution.to_internal_repr(value)
124 |         if not distribution._contains(param_value_in_internal_repr):
125 |             raise ValueError(
126 |                 "The value {} of the parameter '{}' is out of "
127 |                 "the range of the distribution {}.".format(value, name, distribution)
128 |             )
129 | 
130 |         if name in self._distributions:
131 |             distributions.check_distribution_compatibility(self._distributions[name], distribution)
132 | 
133 |         self._suggested_params[name] = value
134 |         self._distributions[name] = distribution
135 | 
136 |         return value
137 | 
138 |     def report(self, value, step):
139 |         # type: (float, int) -> None
140 | 
141 |         pass
142 | 
143 |     def should_prune(self, step=None):
144 |         # type: (Optional[int]) -> bool
145 | 
146 |         return False
147 | 
148 |     def set_user_attr(self, key, value):
149 |         # type: (str, Any) -> None
150 | 
151 |         self._user_attrs[key] = value
152 | 
153 |     def set_system_attr(self, key, value):
154 |         # type: (str, Any) -> None
155 | 
156 |         self._system_attrs[key] = value
157 | 
158 |     @property
159 |     def params(self):
160 |         # type: () -> Dict[str, Any]
161 | 
162 |         return self._suggested_params
163 | 
164 |     @property
165 |     def distributions(self):
166 |         # type: () -> Dict[str, BaseDistribution]
167 | 
168 |         return self._distributions
169 | 
170 |     @property
171 |     def user_attrs(self):
172 |         # type: () -> Dict[str, Any]
173 | 
174 |         return self._user_attrs
175 | 
176 |     @property
177 |     def system_attrs(self):
178 |         # type: () -> Dict[str, Any]
179 | 
180 |         return self._system_attrs
181 | 
182 |     @property
183 |     def datetime_start(self):
184 |         # type: () -> Optional[datetime.datetime]
185 | 
186 |         return self._datetime_start
187 | 
188 |     @property
189 |     def number(self) -> int:
190 | 
191 |         return self._number
192 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/logging.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from logging import CRITICAL  # NOQA
  3 | from logging import DEBUG  # NOQA
  4 | from logging import ERROR  # NOQA
  5 | from logging import FATAL  # NOQA
  6 | from logging import INFO  # NOQA
  7 | from logging import WARN  # NOQA
  8 | from logging import WARNING  # NOQA
  9 | import threading
 10 | 
 11 | import colorlog
 12 | 
 13 | _lock = threading.Lock()
 14 | _default_handler = None  # type: Optional[logging.Handler]
 15 | 
 16 | 
 17 | def create_default_formatter() -> colorlog.ColoredFormatter:
 18 |     """Create a default formatter of log messages.
 19 | 
 20 |     This function is not supposed to be directly accessed by library users.
 21 |     """
 22 | 
 23 |     return colorlog.ColoredFormatter(
 24 |         "%(log_color)s[%(levelname)1.1s %(asctime)s]%(reset)s %(message)s"
 25 |     )
 26 | 
 27 | 
 28 | def _get_library_name() -> str:
 29 | 
 30 |     return __name__.split(".")[0]
 31 | 
 32 | 
 33 | def _get_library_root_logger() -> logging.Logger:
 34 | 
 35 |     return logging.getLogger(_get_library_name())
 36 | 
 37 | 
 38 | def _configure_library_root_logger() -> None:
 39 | 
 40 |     global _default_handler
 41 | 
 42 |     with _lock:
 43 |         if _default_handler:
 44 |             # This library has already configured the library root logger.
 45 |             return
 46 |         _default_handler = logging.StreamHandler()  # Set sys.stderr as stream.
 47 |         _default_handler.setFormatter(create_default_formatter())
 48 | 
 49 |         # Apply our default configuration to the library root logger.
 50 |         library_root_logger = _get_library_root_logger()
 51 |         library_root_logger.addHandler(_default_handler)
 52 |         library_root_logger.setLevel(logging.INFO)
 53 |         library_root_logger.propagate = False
 54 | 
 55 | 
 56 | def _reset_library_root_logger() -> None:
 57 | 
 58 |     global _default_handler
 59 | 
 60 |     with _lock:
 61 |         if not _default_handler:
 62 |             return
 63 | 
 64 |         library_root_logger = _get_library_root_logger()
 65 |         library_root_logger.removeHandler(_default_handler)
 66 |         library_root_logger.setLevel(logging.NOTSET)
 67 |         _default_handler = None
 68 | 
 69 | 
 70 | def get_logger(name: str) -> logging.Logger:
 71 |     """Return a logger with the specified name.
 72 | 
 73 |     This function is not supposed to be directly accessed by library users.
 74 |     """
 75 | 
 76 |     _configure_library_root_logger()
 77 |     return logging.getLogger(name)
 78 | 
 79 | 
 80 | def get_verbosity() -> int:
 81 |     """Return the current level for the Optuna's root logger.
 82 | 
 83 |     Returns:
 84 |         Logging level, e.g., ``optuna.logging.DEBUG`` and ``optuna.logging.INFO``.
 85 | 
 86 |     .. note::
 87 |         Optuna has following logging levels:
 88 | 
 89 |         - ``optuna.logging.CRITICAL``, ``optuna.logging.FATAL``
 90 |         - ``optuna.logging.ERROR``
 91 |         - ``optuna.logging.WARNING``, ``optuna.logging.WARN``
 92 |         - ``optuna.logging.INFO``
 93 |         - ``optuna.logging.DEBUG``
 94 |     """
 95 | 
 96 |     _configure_library_root_logger()
 97 |     return _get_library_root_logger().getEffectiveLevel()
 98 | 
 99 | 
100 | def set_verbosity(verbosity: int) -> None:
101 |     """Set the level for the Optuna's root logger.
102 | 
103 |     Args:
104 |         verbosity:
105 |             Logging level, e.g., ``optuna.logging.DEBUG`` and ``optuna.logging.INFO``.
106 |     """
107 | 
108 |     _configure_library_root_logger()
109 |     _get_library_root_logger().setLevel(verbosity)
110 | 
111 | 
112 | def disable_default_handler() -> None:
113 |     """Disable the default handler of the Optuna's root logger.
114 | 
115 |     Example:
116 | 
117 |         Stop and then resume logging to :obj:`sys.stderr`.
118 | 
119 |         .. testsetup::
120 | 
121 |             def objective(trial):
122 |                 x = trial.suggest_uniform('x', -100, 100)
123 |                 y = trial.suggest_categorical('y', [-1, 0, 1])
124 |                 return x ** 2 + y
125 | 
126 |         .. testcode::
127 | 
128 |             import optuna
129 | 
130 |             study = optuna.create_study()
131 | 
132 |             # There are no logs in sys.stderr.
133 |             optuna.logging.disable_default_handler()
134 |             study.optimize(objective, n_trials=10)
135 | 
136 |             # There are logs in sys.stderr.
137 |             optuna.logging.enable_default_handler()
138 |             study.optimize(objective, n_trials=10)
139 |             # [I 2020-02-23 17:00:54,314] Finished trial#10 with value: ...
140 |             # [I 2020-02-23 17:00:54,356] Finished trial#11 with value: ...
141 |             # ...
142 | 
143 |     """
144 | 
145 |     _configure_library_root_logger()
146 | 
147 |     assert _default_handler is not None
148 |     _get_library_root_logger().removeHandler(_default_handler)
149 | 
150 | 
151 | def enable_default_handler() -> None:
152 |     """Enable the default handler of the Optuna's root logger.
153 | 
154 |     Please refer to the example shown in :func:`~optuna.logging.disable_default_handler()`.
155 |     """
156 | 
157 |     _configure_library_root_logger()
158 | 
159 |     assert _default_handler is not None
160 |     _get_library_root_logger().addHandler(_default_handler)
161 | 
162 | 
163 | def disable_propagation() -> None:
164 |     """Disable propagation of the library log outputs.
165 | 
166 |     Note that log propagation is disabled by default.
167 |     """
168 | 
169 |     _configure_library_root_logger()
170 |     _get_library_root_logger().propagate = False
171 | 
172 | 
173 | def enable_propagation() -> None:
174 |     """Enable propagation of the library log outputs.
175 | 
176 |     Please disable the Optuna's default handler to prevent double logging if the root logger has
177 |     been configured.
178 | 
179 |     Example:
180 | 
181 |         Propagate all log output to the root logger in order to save them to the file.
182 | 
183 |         .. testsetup::
184 | 
185 |             def objective(trial):
186 |                 x = trial.suggest_uniform('x', -100, 100)
187 |                 y = trial.suggest_categorical('y', [-1, 0, 1])
188 |                 return x ** 2 + y
189 | 
190 |         .. testcode::
191 | 
192 |             import optuna
193 |             import logging
194 | 
195 |             logger = logging.getLogger()
196 | 
197 |             logger.setLevel(logging.INFO)  # Setup the root logger.
198 |             logger.addHandler(logging.FileHandler("foo.log", mode="w"))
199 | 
200 |             optuna.logging.enable_propagation()  # Propagate logs to the root logger.
201 |             optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.
202 | 
203 |             study = optuna.create_study()
204 | 
205 |             logger.info("Start optimization.")
206 |             study.optimize(objective, n_trials=10)
207 | 
208 |             with open('foo.log') as f:
209 |                 assert f.readline() == "Start optimization.\\n"
210 |                 assert f.readline().startswith("Finished trial#0 with value:")
211 | 
212 |     """
213 | 
214 |     _configure_library_root_logger()
215 |     _get_library_root_logger().propagate = True
216 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/trial/_frozen.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import warnings
  3 | 
  4 | # from optuna import distributions
  5 | # from optuna import logging
  6 | # from optuna.trial._state import TrialState
  7 | from .. import distributions
  8 | from .. import logging
  9 | from ._state import TrialState
 10 | 
 11 | _logger = logging.get_logger(__name__)
 12 | 
 13 | 
 14 | class FrozenTrial(object):
 15 |     """Status and results of a :class:`~optuna.trial.Trial`.
 16 | 
 17 |     Attributes:
 18 |         number:
 19 |             Unique and consecutive number of :class:`~optuna.trial.Trial` for each
 20 |             :class:`~optuna.study.Study`. Note that this field uses zero-based numbering.
 21 |         state:
 22 |             :class:`TrialState` of the :class:`~optuna.trial.Trial`.
 23 |         value:
 24 |             Objective value of the :class:`~optuna.trial.Trial`.
 25 |         datetime_start:
 26 |             Datetime where the :class:`~optuna.trial.Trial` started.
 27 |         datetime_complete:
 28 |             Datetime where the :class:`~optuna.trial.Trial` finished.
 29 |         params:
 30 |             Dictionary that contains suggested parameters.
 31 |         user_attrs:
 32 |             Dictionary that contains the attributes of the :class:`~optuna.trial.Trial` set with
 33 |             :func:`optuna.trial.Trial.set_user_attr`.
 34 |         intermediate_values:
 35 |             Intermediate objective values set with :func:`optuna.trial.Trial.report`.
 36 |     """
 37 | 
 38 |     def __init__(
 39 |         self,
 40 |         number,  # type: int
 41 |         state,  # type: TrialState
 42 |         value,  # type: Optional[float]
 43 |         datetime_start,  # type: Optional[datetime.datetime]
 44 |         datetime_complete,  # type: Optional[datetime.datetime]
 45 |         params,  # type: Dict[str, Any]
 46 |         distributions,  # type: Dict[str, BaseDistribution]
 47 |         user_attrs,  # type: Dict[str, Any]
 48 |         system_attrs,  # type: Dict[str, Any]
 49 |         intermediate_values,  # type: Dict[int, float]
 50 |         trial_id,  # type: int
 51 |     ):
 52 |         # type: (...) -> None
 53 | 
 54 |         self.number = number
 55 |         self.state = state
 56 |         self.value = value
 57 |         self.datetime_start = datetime_start
 58 |         self.datetime_complete = datetime_complete
 59 |         self.params = params
 60 |         self.user_attrs = user_attrs
 61 |         self.system_attrs = system_attrs
 62 |         self.intermediate_values = intermediate_values
 63 |         self._distributions = distributions
 64 |         self._trial_id = trial_id
 65 | 
 66 |     # Ordered list of fields required for `__repr__`, `__hash__` and dataframe creation.
 67 |     # TODO(hvy): Remove this list in Python 3.6 as the order of `self.__dict__` is preserved.
 68 |     _ordered_fields = [
 69 |         "number",
 70 |         "value",
 71 |         "datetime_start",
 72 |         "datetime_complete",
 73 |         "params",
 74 |         "_distributions",
 75 |         "user_attrs",
 76 |         "system_attrs",
 77 |         "intermediate_values",
 78 |         "_trial_id",
 79 |         "state",
 80 |     ]
 81 | 
 82 |     def __eq__(self, other):
 83 |         # type: (Any) -> bool
 84 | 
 85 |         if not isinstance(other, FrozenTrial):
 86 |             return NotImplemented
 87 |         return other.__dict__ == self.__dict__
 88 | 
 89 |     def __lt__(self, other):
 90 |         # type: (Any) -> bool
 91 | 
 92 |         if not isinstance(other, FrozenTrial):
 93 |             return NotImplemented
 94 | 
 95 |         return self.number < other.number
 96 | 
 97 |     def __le__(self, other):
 98 |         # type: (Any) -> bool
 99 | 
100 |         if not isinstance(other, FrozenTrial):
101 |             return NotImplemented
102 | 
103 |         return self.number <= other.number
104 | 
105 |     def __hash__(self):
106 |         # type: () -> int
107 | 
108 |         return hash(tuple(getattr(self, field) for field in self._ordered_fields))
109 | 
110 |     def __repr__(self):
111 |         # type: () -> str
112 | 
113 |         return "{cls}({kwargs})".format(
114 |             cls=self.__class__.__name__,
115 |             kwargs=", ".join(
116 |                 "{field}={value}".format(
117 |                     field=field if not field.startswith("_") else field[1:],
118 |                     value=repr(getattr(self, field)),
119 |                 )
120 |                 for field in self._ordered_fields
121 |             ),
122 |         )
123 | 
124 |     def _validate(self):
125 |         # type: () -> None
126 | 
127 |         if self.datetime_start is None:
128 |             raise ValueError("`datetime_start` is supposed to be set.")
129 | 
130 |         if self.state.is_finished():
131 |             if self.datetime_complete is None:
132 |                 raise ValueError("`datetime_complete` is supposed to be set for a finished trial.")
133 |         else:
134 |             if self.datetime_complete is not None:
135 |                 raise ValueError(
136 |                     "`datetime_complete` is supposed to be None for an unfinished trial."
137 |                 )
138 | 
139 |         if self.state == TrialState.COMPLETE and self.value is None:
140 |             raise ValueError("`value` is supposed to be set for a complete trial.")
141 | 
142 |         if set(self.params.keys()) != set(self.distributions.keys()):
143 |             raise ValueError(
144 |                 "Inconsistent parameters {} and distributions {}.".format(
145 |                     set(self.params.keys()), set(self.distributions.keys())
146 |                 )
147 |             )
148 | 
149 |         for param_name, param_value in self.params.items():
150 |             distribution = self.distributions[param_name]
151 | 
152 |             param_value_in_internal_repr = distribution.to_internal_repr(param_value)
153 |             if not distribution._contains(param_value_in_internal_repr):
154 |                 raise ValueError(
155 |                     "The value {} of parameter '{}' isn't contained in the distribution "
156 |                     "{}.".format(param_value, param_name, distribution)
157 |                 )
158 | 
159 |     @property
160 |     def distributions(self):
161 |         # type: () -> Dict[str, BaseDistribution]
162 |         """Dictionary that contains the distributions of :attr:`params`."""
163 | 
164 |         return self._distributions
165 | 
166 |     @distributions.setter
167 |     def distributions(self, value):
168 |         # type: (Dict[str, BaseDistribution]) -> None
169 |         self._distributions = value
170 | 
171 |     @property
172 |     def trial_id(self):
173 |         # type: () -> int
174 |         """Return the trial ID.
175 | 
176 |         .. deprecated:: 0.19.0
177 |             The direct use of this attribute is deprecated and it is recommended that you use
178 |             :attr:`~optuna.trial.FrozenTrial.number` instead.
179 | 
180 |         Returns:
181 |             The trial ID.
182 |         """
183 | 
184 |         warnings.warn(
185 |             "The use of `FrozenTrial.trial_id` is deprecated. "
186 |             "Please use `FrozenTrial.number` instead.",
187 |             DeprecationWarning,
188 |         )
189 | 
190 |         _logger.warning(
191 |             "The use of `FrozenTrial.trial_id` is deprecated. "
192 |             "Please use `FrozenTrial.number` instead."
193 |         )
194 | 
195 |         return self._trial_id
196 | 
197 |     @property
198 |     def last_step(self):
199 |         # type: () -> Optional[int]
200 | 
201 |         if len(self.intermediate_values) == 0:
202 |             return None
203 |         else:
204 |             return max(self.intermediate_values.keys())
205 | 
206 |     @property
207 |     def duration(self):
208 |         # type: () -> Optional[datetime.timedelta]
209 |         """Return the elapsed time taken to complete the trial.
210 | 
211 |         Returns:
212 |             The duration.
213 |         """
214 | 
215 |         if self.datetime_start and self.datetime_complete:
216 |             return self.datetime_complete - self.datetime_start
217 |         else:
218 |             return None
219 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/structs.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | from optuna import _study_direction
  4 | from optuna import exceptions
  5 | from optuna import logging
  6 | from optuna import trial
  7 | 
  8 | 
  9 | _logger = logging.get_logger(__name__)
 10 | 
 11 | _message = (
 12 |     "`structs` is deprecated. Classes have moved to the following modules. "
 13 |     "`structs.StudyDirection`->`study.StudyDirection`, "
 14 |     "`structs.StudySummary`->`study.StudySummary`, "
 15 |     "`structs.FrozenTrial`->`trial.FrozenTrial`, "
 16 |     "`structs.TrialState`->`trial.TrialState`, "
 17 |     "`structs.TrialPruned`->`exceptions.TrialPruned`."
 18 | )
 19 | warnings.warn(_message, DeprecationWarning)
 20 | _logger.warning(_message)
 21 | 
 22 | # The use of the structs.StudyDirection is deprecated and it is recommended that you use
 23 | # study.StudyDirection instead. See the API reference for more details.
 24 | StudyDirection = _study_direction.StudyDirection
 25 | 
 26 | # The use of the structs.TrialState is deprecated and it is recommended that you use
 27 | # trial.TrialState instead. See the API reference for more details.
 28 | TrialState = trial.TrialState
 29 | 
 30 | 
 31 | class FrozenTrial(object):
 32 |     """Status and results of a :class:`~optuna.trial.Trial`.
 33 | 
 34 |     .. deprecated:: 1.4.0
 35 | 
 36 |         This class was moved to :mod:`~optuna.trial`. Please use
 37 |         :class:`~optuna.trial.FrozenTrial` instead.
 38 | 
 39 |     Attributes:
 40 |         number:
 41 |             Unique and consecutive number of :class:`~optuna.trial.Trial` for each
 42 |             :class:`~optuna.study.Study`. Note that this field uses zero-based numbering.
 43 |         state:
 44 |             :class:`TrialState` of the :class:`~optuna.trial.Trial`.
 45 |         value:
 46 |             Objective value of the :class:`~optuna.trial.Trial`.
 47 |         datetime_start:
 48 |             Datetime where the :class:`~optuna.trial.Trial` started.
 49 |         datetime_complete:
 50 |             Datetime where the :class:`~optuna.trial.Trial` finished.
 51 |         params:
 52 |             Dictionary that contains suggested parameters.
 53 |         user_attrs:
 54 |             Dictionary that contains the attributes of the :class:`~optuna.trial.Trial` set with
 55 |             :func:`optuna.trial.Trial.set_user_attr`.
 56 |         intermediate_values:
 57 |             Intermediate objective values set with :func:`optuna.trial.Trial.report`.
 58 |     """
 59 | 
 60 |     def __init__(
 61 |         self,
 62 |         number,  # type: int
 63 |         state,  # type: TrialState
 64 |         value,  # type: Optional[float]
 65 |         datetime_start,  # type: Optional[datetime]
 66 |         datetime_complete,  # type: Optional[datetime]
 67 |         params,  # type: Dict[str, Any]
 68 |         distributions,  # type: Dict[str, BaseDistribution]
 69 |         user_attrs,  # type: Dict[str, Any]
 70 |         system_attrs,  # type: Dict[str, Any]
 71 |         intermediate_values,  # type: Dict[int, float]
 72 |         trial_id,  # type: int
 73 |     ):
 74 |         # type: (...) -> None
 75 | 
 76 |         message = (
 77 |             "The use of `structs.FrozenTrial` is deprecated. "
 78 |             "Please use `trial.FrozenTrial` instead."
 79 |         )
 80 |         warnings.warn(message, DeprecationWarning)
 81 |         _logger.warning(message)
 82 | 
 83 |         self.number = number
 84 |         self.state = state
 85 |         self.value = value
 86 |         self.datetime_start = datetime_start
 87 |         self.datetime_complete = datetime_complete
 88 |         self.params = params
 89 |         self.user_attrs = user_attrs
 90 |         self.system_attrs = system_attrs
 91 |         self.intermediate_values = intermediate_values
 92 |         self._distributions = distributions
 93 |         self._trial_id = trial_id
 94 | 
 95 |     # Ordered list of fields required for `__repr__`, `__hash__` and dataframe creation.
 96 |     # TODO(hvy): Remove this list in Python 3.6 as the order of `self.__dict__` is preserved.
 97 |     _ordered_fields = [
 98 |         "number",
 99 |         "value",
100 |         "datetime_start",
101 |         "datetime_complete",
102 |         "params",
103 |         "_distributions",
104 |         "user_attrs",
105 |         "system_attrs",
106 |         "intermediate_values",
107 |         "_trial_id",
108 |         "state",
109 |     ]
110 | 
111 |     def __eq__(self, other):
112 |         # type: (Any) -> bool
113 | 
114 |         if not isinstance(other, FrozenTrial):
115 |             return NotImplemented
116 |         return other.__dict__ == self.__dict__
117 | 
118 |     def __lt__(self, other):
119 |         # type: (Any) -> bool
120 | 
121 |         if not isinstance(other, FrozenTrial):
122 |             return NotImplemented
123 | 
124 |         return self.number < other.number
125 | 
126 |     def __le__(self, other):
127 |         # type: (Any) -> bool
128 | 
129 |         if not isinstance(other, FrozenTrial):
130 |             return NotImplemented
131 | 
132 |         return self.number <= other.number
133 | 
134 |     def __hash__(self):
135 |         # type: () -> int
136 | 
137 |         return hash(tuple(getattr(self, field) for field in self._ordered_fields))
138 | 
139 |     def __repr__(self):
140 |         # type: () -> str
141 | 
142 |         return "{cls}({kwargs})".format(
143 |             cls=self.__class__.__name__,
144 |             kwargs=", ".join(
145 |                 "{field}={value}".format(
146 |                     field=field if not field.startswith("_") else field[1:],
147 |                     value=repr(getattr(self, field)),
148 |                 )
149 |                 for field in self._ordered_fields
150 |             ),
151 |         )
152 | 
153 |     def _validate(self):
154 |         # type: () -> None
155 | 
156 |         if self.datetime_start is None:
157 |             raise ValueError("`datetime_start` is supposed to be set.")
158 | 
159 |         if self.state.is_finished():
160 |             if self.datetime_complete is None:
161 |                 raise ValueError("`datetime_complete` is supposed to be set for a finished trial.")
162 |         else:
163 |             if self.datetime_complete is not None:
164 |                 raise ValueError(
165 |                     "`datetime_complete` is supposed to be None for an unfinished trial."
166 |                 )
167 | 
168 |         if self.state == TrialState.COMPLETE and self.value is None:
169 |             raise ValueError("`value` is supposed to be set for a complete trial.")
170 | 
171 |         if set(self.params.keys()) != set(self.distributions.keys()):
172 |             raise ValueError(
173 |                 "Inconsistent parameters {} and distributions {}.".format(
174 |                     set(self.params.keys()), set(self.distributions.keys())
175 |                 )
176 |             )
177 | 
178 |         for param_name, param_value in self.params.items():
179 |             distribution = self.distributions[param_name]
180 | 
181 |             param_value_in_internal_repr = distribution.to_internal_repr(param_value)
182 |             if not distribution._contains(param_value_in_internal_repr):
183 |                 raise ValueError(
184 |                     "The value {} of parameter '{}' isn't contained in the distribution "
185 |                     "{}.".format(param_value, param_name, distribution)
186 |                 )
187 | 
188 |     @property
189 |     def distributions(self):
190 |         # type: () -> Dict[str, BaseDistribution]
191 |         """Dictionary that contains the distributions of :attr:`params`."""
192 | 
193 |         return self._distributions
194 | 
195 |     @distributions.setter
196 |     def distributions(self, value):
197 |         # type: (Dict[str, BaseDistribution]) -> None
198 |         self._distributions = value
199 | 
200 |     @property
201 |     def last_step(self):
202 |         # type: () -> Optional[int]
203 | 
204 |         if len(self.intermediate_values) == 0:
205 |             return None
206 |         else:
207 |             return max(self.intermediate_values.keys())
208 | 
209 |     @property
210 |     def duration(self):
211 |         # type: () -> Optional[timedelta]
212 |         """Return the elapsed time taken to complete the trial.
213 | 
214 |         Returns:
215 |             The duration.
216 |         """
217 | 
218 |         if self.datetime_start and self.datetime_complete:
219 |             return self.datetime_complete - self.datetime_start
220 |         else:
221 |             return None
222 | 
223 | 
224 | class TrialPruned(exceptions.TrialPruned):
225 |     """Exception for pruned trials.
226 | 
227 |     .. deprecated:: 0.19.0
228 | 
229 |         This class was moved to :mod:`~optuna.exceptions`. Please use
230 |         :class:`~optuna.exceptions.TrialPruned` instead.
231 |     """
232 | 
233 |     def __init__(self, *args, **kwargs):
234 |         # type: (Any, Any) -> None
235 | 
236 |         message = (
237 |             "The use of `optuna.structs.TrialPruned` is deprecated. "
238 |             "Please use `optuna.exceptions.TrialPruned` instead."
239 |         )
240 |         warnings.warn(message, DeprecationWarning)
241 |         _logger.warning(message)
242 | 


--------------------------------------------------------------------------------
/boexplain/files/search.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | from pandas.api.types import is_numeric_dtype
  4 | 
  5 | from .cat_xform import individual_contribution
  6 | from .tpe_wrapper import TpeBo
  7 | from .stats import Experiment, Stats
  8 | 
  9 | CAT_ALG_MAP = {
 10 |     "individual_contribution": "individual_contribution_warm_start_top1",
 11 |     "categorical": "categorical",
 12 |     "categorical_warm_start": "categorical_warm_start",
 13 | }
 14 | 
 15 | 
 16 | def fmin(
 17 |     data,
 18 |     f,
 19 |     num_cols=[],
 20 |     cat_cols=[],
 21 |     columns=[],
 22 |     cat_alg=["individual_contribution"],
 23 |     n_trials=2000,
 24 |     runtime=10000,
 25 |     runs=1,
 26 |     k=5,
 27 |     random=False,
 28 |     correct_pred=None,
 29 |     increment=5,
 30 |     name="experiment_name",
 31 |     file=None,
 32 |     return_viz=False,
 33 |     use_seeds_from_paper=False,
 34 |     **kwargs,
 35 | ):
 36 |     """
 37 |     Use BOExplain to minimize the objective function.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 | 
 42 |     data
 43 |         pandas DataFrame of source, training, or inference data
 44 |         from which to derive an explanation.
 45 |     f
 46 |         Objective function to be minimized.
 47 |     num_cols
 48 |         Numerical columns over which to derive an explanation.
 49 |     cat_cols
 50 |         Categorical columns over which to derive an explanation.
 51 |     columns
 52 |         Columns over which to derive an explanation.
 53 |     cat_alg
 54 |         Algorithms to handle categorical parameters. Can be
 55 |             * 'individual_contribution'
 56 |             * 'categorical'
 57 |             * 'categorical_warm_start'
 58 |         See the paper for details.
 59 |     n_trials
 60 |         Maximum number of trials to perform during a run.
 61 |     runtime
 62 |         Maximum allowed time for a run in seconds.
 63 |     runs
 64 |         Number of runs to perform.
 65 |     k
 66 |         Number of TPE candidates to consider. (deprecated)
 67 |     random
 68 |         If True, perform a run using random search to
 69 |         find the constraint parameters.
 70 |     correct_pred
 71 |         If provided, will compute f-score, precision, recall,
 72 |         and jaccard similarity of the found predicates and
 73 |         the correct predicate
 74 |     increment
 75 |         How frequently (in seconds) to log results when finding the best
 76 |         result in each increment.
 77 |     name
 78 |         The name of an experiment.
 79 |     file
 80 |         File name to output statistics from the run.
 81 |     return_viz
 82 |         If True, return an Altair visualization of the objective function
 83 |         with iteration on the x-axis.
 84 |     use_seeds_from_paper
 85 |         If True, use the seeds that were used in the paper. For reproducibility.
 86 | 
 87 |     Returns
 88 |     -------
 89 | 
 90 |     The input DataFrame filtered to contain all tuples that do not
 91 |     satisfy the explanation
 92 |     """
 93 | 
 94 |     return _drop_tuples_satisfying_optimal_predicate(
 95 |         data,
 96 |         f,
 97 |         num_cols,
 98 |         cat_cols,
 99 |         columns,
100 |         cat_alg,
101 |         n_trials,
102 |         runtime,
103 |         runs,
104 |         k,
105 |         random,
106 |         correct_pred,
107 |         increment,
108 |         name,
109 |         file,
110 |         return_viz,
111 |         use_seeds_from_paper,
112 |         direction="minimize",
113 |         **kwargs,
114 |     )
115 | 
116 | 
117 | def fmax(
118 |     data,
119 |     f,
120 |     num_cols=[],
121 |     cat_cols=[],
122 |     columns=[],
123 |     cat_alg=["individual_contribution"],
124 |     n_trials=2000,
125 |     runtime=10000,
126 |     runs=1,
127 |     k=5,
128 |     random=False,
129 |     correct_pred=None,
130 |     increment=5,
131 |     name="experiment_name",
132 |     file=None,
133 |     return_viz=False,
134 |     use_seeds_from_paper=False,
135 |     **kwargs,
136 | ):
137 |     """
138 |     Use BOExplain to maximize the objective function.
139 | 
140 |     Parameters
141 |     ----------
142 | 
143 |     data
144 |         pandas DataFrame of source, training, or inference data
145 |         from which to derive an explanation.
146 |     f
147 |         Objective function to be minimized.
148 |     num_cols
149 |         Numerical columns over which to derive an explanation.
150 |     cat_cols
151 |         Categorical columns over which to derive an explanation.
152 |     columns
153 |         Columns over which to derive an explanation.
154 |     cat_alg
155 |         Algorithms to handle categorical parameters. Can be
156 |             * 'individual_contribution'
157 |             * 'categorical'
158 |             * 'categorical_warm_start'
159 |         See the paper for details.
160 |     n_trials
161 |         Maximum number of trials to perform during a run.
162 |     runtime
163 |         Maximum allowed time for a run in seconds.
164 |     runs
165 |         Number of runs to perform.
166 |     k
167 |         Number of TPE candidates to consider. (deprecated)
168 |     random
169 |         If True, perform a run using random search to
170 |         find the constraint parameters.
171 |     correct_pred
172 |         If provided, will compute f-score, precision, recall,
173 |         and jaccard similarity of the found predicates and
174 |         the correct predicate
175 |     increment
176 |         How frequently (in seconds) to log results when finding the best
177 |         result in each increment.
178 |     name
179 |         The name of an experiment.
180 |     file
181 |         File name to output statistics from the run.
182 |     return_viz
183 |         If True, return an Altair visualization of the objective function
184 |         with iteration on the x-axis.
185 |     use_seeds_from_paper
186 |         If True, use the seeds that were used in the paper. For reproducibility.
187 | 
188 |     Returns
189 |     -------
190 | 
191 |     The input DataFrame filtered to contain all tuples that do not
192 |     satisfy the explanation
193 |     """
194 |     return _drop_tuples_satisfying_optimal_predicate(
195 |         data,
196 |         f,
197 |         num_cols,
198 |         cat_cols,
199 |         columns,
200 |         cat_alg,
201 |         n_trials,
202 |         runtime,
203 |         runs,
204 |         k,
205 |         random,
206 |         correct_pred,
207 |         increment,
208 |         name,
209 |         file,
210 |         return_viz,
211 |         use_seeds_from_paper,
212 |         direction="maximize",
213 |         **kwargs,
214 |     )
215 | 
216 | 
217 | def _drop_tuples_satisfying_optimal_predicate(
218 |     data,
219 |     f,
220 |     num_cols=[],
221 |     cat_cols=[],
222 |     columns=[],
223 |     cat_alg=["individual_contribution"],
224 |     n_trials=2000,
225 |     runtime=10000,
226 |     runs=1,
227 |     k=5,
228 |     random=False,
229 |     correct_pred=None,
230 |     increment=5,
231 |     name="experiment_name",
232 |     file=None,
233 |     return_viz=False,
234 |     use_seeds_from_paper=False,
235 |     direction="minimize",
236 |     **kwargs,
237 | ):
238 |     assert direction == "minimize" or direction == "maximize"
239 | 
240 |     for col in columns:
241 |         if is_numeric_dtype(data[col]):
242 |             num_cols.append(col)
243 |         else:
244 |             cat_cols.append(col)
245 | 
246 |     # cast categorical columns as string type
247 |     if cat_cols:
248 |         data[cat_cols] = data[cat_cols].astype(str)
249 | 
250 |     # get the nuber of unique values in each column
251 |     num_cols_range = [(data[col].min(), data[col].max()) for col in num_cols]
252 |     cat_cols_n_uniq = [data[col].nunique() for col in cat_cols]
253 | 
254 |     # dataset length
255 |     dataset_length = len(data)
256 | 
257 |     experiment = Experiment(
258 |         num_cols,
259 |         cat_cols,
260 |         direction,
261 |         n_trials,
262 |         runs,
263 |         correct_pred,
264 |         name,
265 |         file,
266 |         num_cols_range,
267 |         cat_cols_n_uniq,
268 |         dataset_length,
269 |         runtime,
270 |         increment,
271 |         use_seeds_from_paper,
272 |     )
273 | 
274 |     cat_alg = [CAT_ALG_MAP[alg] for alg in cat_alg]
275 | 
276 |     for alg in cat_alg:
277 |         stats = Stats(experiment, alg)
278 |         cat_val_to_indiv_cont = {}
279 |         if cat_cols and alg in {
280 |             "individual_contribution_warm_start_topk",
281 |             "categorical_warm_start",
282 |             "individual_contribution_warm_start_top1",
283 |         }:
284 |             start = time.time()
285 |             # encode categorical columns as numerical and record their encoding maps
286 |             cat_val_to_indiv_cont = individual_contribution(
287 |                 data,
288 |                 objective=f,
289 |                 cat_cols=cat_cols,
290 |                 **kwargs,
291 |             )
292 |             run_encoding_time = time.time() - start
293 |             # print(alg, run_encoding_time)
294 |             stats.set_run_encoding_time(run_encoding_time)
295 | 
296 |         # initialize a TpeBo object
297 |         tpebo = TpeBo(
298 |             df=data,
299 |             objective=f,
300 |             num_cols=num_cols,
301 |             cat_cols=cat_cols,
302 |             direction=direction,
303 |             k=k,
304 |             cat_alg=alg,
305 |             cat_val_to_indiv_cont=cat_val_to_indiv_cont,
306 |             correct_pred=correct_pred,
307 |         )
308 |         # run the bayesian optimization
309 |         df_rem = tpebo.run(stats, **kwargs)
310 |         experiment.set_experiment(stats)
311 | 
312 |     if random:
313 |         stats = Stats(experiment, None)
314 |         tpebo = TpeBo(
315 |             df=data,
316 |             objective=f,
317 |             num_cols=num_cols,
318 |             cat_cols=cat_cols,
319 |             direction=direction,
320 |             k=k,
321 |             cat_alg="random",
322 |             cat_val_to_indiv_cont={},
323 |             correct_pred=correct_pred,
324 |         )
325 |         df_rem = tpebo.random(stats, **kwargs)
326 |         experiment.set_experiment(stats)
327 | 
328 |     viz = experiment.visualize_results()
329 | 
330 |     if file is not None:
331 |         experiment.output_file()
332 | 
333 |     if return_viz:
334 |         viz = experiment.visualize_results()
335 |         return df_rem, viz
336 | 
337 |     return df_rem
338 | 


--------------------------------------------------------------------------------
/boexplain/files/stats.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | import re
  3 | import random
  4 | import numpy as np
  5 | import pandas as pd
  6 | import altair as alt
  7 | 
  8 | alt.data_transformers.disable_max_rows()
  9 | from json import dumps
 10 | from numpyencoder import NumpyEncoder
 11 | 
 12 | 
 13 | class Experiment:
 14 | 
 15 |     experiments = dict()
 16 |     n_exp = 0
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         num_cols,
 21 |         cat_cols,
 22 |         direction,
 23 |         n_trials,
 24 |         runs,
 25 |         correct_pred,
 26 |         name,
 27 |         file,
 28 |         num_cols_range,
 29 |         cat_cols_n_uniq,
 30 |         dataset_length,
 31 |         runtime,
 32 |         increment,
 33 |         use_seeds_from_paper,
 34 |     ):
 35 | 
 36 |         self.num_cols = num_cols
 37 |         self.cat_cols = cat_cols
 38 |         self.direction = direction
 39 |         self.dir_enc = 1 if direction == "minimize" else -1
 40 |         self.n_trials = n_trials
 41 |         self.runs = runs
 42 |         self.correct_pred = correct_pred
 43 |         self.name = name
 44 |         self.file = file
 45 |         self.num_cols_range = num_cols_range
 46 |         self.cat_cols_n_uniq = cat_cols_n_uniq
 47 |         self.dataset_length = dataset_length
 48 |         self.runtime = runtime
 49 |         self.increment = increment
 50 | 
 51 |         if use_seeds_from_paper:
 52 |             self.seeds = [
 53 |                 529840,
 54 |                 664234,
 55 |                 978546,
 56 |                 283991,
 57 |                 819362,
 58 |                 348229,
 59 |                 536289,
 60 |                 480291,
 61 |                 500927,
 62 |                 386602,
 63 |             ]
 64 |         else:
 65 |             self.seeds = random.sample(range(1000000), runs)
 66 | 
 67 |     def set_experiment(self, results) -> None:
 68 | 
 69 |         self.experiments[self.n_exp] = results.__dict__.copy()
 70 |         self.n_exp += 1
 71 | 
 72 |     def output_file(self):
 73 | 
 74 |         fo = open(self.file, "w")
 75 | 
 76 |         for v in self.experiments.values():
 77 |             fo.write(f"{dumps(v, cls=NumpyEncoder)}\n")
 78 | 
 79 |         fo.close()
 80 | 
 81 |     def visualize_results(self):
 82 | 
 83 |         df = pd.DataFrame({}, columns=["Algorithm", "Iteration", "Value"])
 84 |         for i in range(len(self.experiments)):
 85 |             df_new = pd.DataFrame.from_dict(
 86 |                 {
 87 |                     "Algorithm": self.experiments[i]["cat_enc"],
 88 |                     "Iteration": list(range(self.experiments[i]["n_trials"])),
 89 |                     "Value": self.experiments[i]["opt_res"],
 90 |                 },
 91 |                 orient="index",
 92 |             ).T
 93 |             df = df.append(df_new)
 94 |         df = df.explode("Value")
 95 |         df = df.set_index(["Algorithm"]).apply(pd.Series.explode).reset_index()
 96 | 
 97 |         num_cols = f"{len(self.experiments[0]['num_cols'])} numerical columns: "
 98 |         for i, col in enumerate(self.experiments[0]["num_cols"]):
 99 |             num_cols += f"{col} (range {self.experiments[0]['num_cols_range'][i][0]} to {self.experiments[0]['num_cols_range'][i][1]}), "
100 |         cat_cols = f"{len(self.experiments[0]['cat_cols'])} categorical columns: "
101 |         for i, col in enumerate(self.experiments[0]["cat_cols"]):
102 |             cat_cols += f"{col} ({self.experiments[0]['cat_cols_n_uniq'][i]} unique values), "
103 | 
104 |         out_str = f"Experiment: {self.experiments[0]['name']}. Completed {self.experiments[0]['n_trials']} iterations for {self.experiments[0]['runs']} runs. Search space includes "
105 | 
106 |         if len(self.experiments[0]["num_cols"]) > 0:
107 |             out_str += num_cols
108 |             if len(self.experiments[0]["cat_cols"]) > 0:
109 |                 out_str += "and "
110 | 
111 |         if len(self.experiments[0]["cat_cols"]) > 0:
112 |             out_str += cat_cols
113 | 
114 |         out_str = f"{out_str[:-2]}."
115 | 
116 |         out_lst = [line.strip() for line in re.findall(r".{1,80}(?:\s+|$)", out_str)]
117 | 
118 |         line = (
119 |             alt.Chart(df)
120 |             .mark_line()
121 |             .encode(
122 |                 x="Iteration",
123 |                 y=alt.Y("mean(Value)", scale=alt.Scale(zero=False)),
124 |                 color="Algorithm",
125 |             )
126 |             .properties(title=out_lst)  # {"text": out_lst, "subtitle": ""}
127 |         )
128 |         band = (
129 |             alt.Chart(df)
130 |             .mark_errorband(extent="stdev")
131 |             .encode(
132 |                 x="Iteration",
133 |                 y=alt.Y("Value", title="Mean Objective Function Value"),
134 |                 color="Algorithm",
135 |             )
136 |         )
137 |         chart = band + line
138 |         chart = chart.configure_title(
139 |             anchor="start",
140 |         )
141 |         return chart
142 | 
143 | 
144 | class Stats(Experiment):
145 |     def __init__(self, experiment, cat_enc) -> None:
146 |         self.__dict__ = experiment.__dict__
147 |         self.cat_enc = cat_enc
148 | 
149 |         self.run_times = np.zeros(self.runs)
150 |         self.n_duplicates = np.zeros(self.runs)
151 |         self.n_zero_tup_preds = np.zeros(self.runs)
152 |         self.preds = dict()
153 |         self.opt_res = np.full((self.runs, self.n_trials), self.dir_enc * 1e9)
154 |         self.run_time_of_opt_res = np.zeros((self.runs, self.n_trials))
155 |         self.iter_completed = np.zeros(self.runs)
156 |         self.min_iter_completed = self.n_trials
157 |         self.n_tuples_removed_from_data = np.zeros(self.runs)
158 |         self.best_obj_values = np.full(self.runs, self.dir_enc * 1e9)
159 |         self.add_on = np.zeros(self.runs)
160 | 
161 |         if self.correct_pred:
162 |             self.precision = np.zeros((self.runs, self.n_trials))
163 |             self.recall = np.zeros((self.runs, self.n_trials))
164 |             self.f_score = np.zeros((self.runs, self.n_trials))
165 |             self.jaccard = np.zeros((self.runs, self.n_trials))
166 | 
167 |             self.final_precision = np.zeros(self.runs)
168 |             self.final_recall = np.zeros(self.runs)
169 |             self.final_f_score = np.zeros(self.runs)
170 |             self.final_jaccard = np.zeros(self.runs)
171 | 
172 |         self.encoding_time = 0
173 |         self.example_best_predicate = None
174 | 
175 |         self.time_array = np.zeros((self.runs, self.runtime // self.increment))
176 |         self.precision_time_array = np.zeros((self.runs, self.runtime // self.increment))
177 |         self.recall_time_array = np.zeros((self.runs, self.runtime // self.increment))
178 |         self.f_score_time_array = np.zeros((self.runs, self.runtime // self.increment))
179 |         self.jaccard_time_array = np.zeros((self.runs, self.runtime // self.increment))
180 | 
181 |     def get_run_opt_res_array(self) -> np.ndarray:
182 |         return np.full(self.n_trials, self.dir_enc * 1e9)
183 | 
184 |     def get_run_time_array(self) -> np.ndarray:
185 |         return np.zeros(self.runtime // self.increment)
186 | 
187 |     def get_run_time_of_opt_res_array(self) -> np.ndarray:
188 |         return np.zeros(self.n_trials)
189 | 
190 |     def set_run_encoding_time(self, run_encoding_time):
191 | 
192 |         self.encoding_time = run_encoding_time
193 | 
194 |     def set_run_opt_res(self, run_opt_res: np.ndarray, run: int) -> None:
195 | 
196 |         self.opt_res[run] = run_opt_res
197 | 
198 |     def set_run_time_array(self, run_time_array: np.ndarray, run: int) -> None:
199 | 
200 |         self.time_array[run] = run_time_array
201 | 
202 |     def set_precision_time_array(self, precision_time_array: np.ndarray, run: int) -> None:
203 | 
204 |         self.precision_time_array[run] = precision_time_array
205 | 
206 |     def set_recall_time_array(self, recall_time_array: np.ndarray, run: int) -> None:
207 | 
208 |         self.recall_time_array[run] = recall_time_array
209 | 
210 |     def set_f_score_time_array(self, f_score_time_array: np.ndarray, run: int) -> None:
211 | 
212 |         self.f_score_time_array[run] = f_score_time_array
213 | 
214 |     def set_jaccard_time_array(self, jaccard_time_array: np.ndarray, run: int) -> None:
215 | 
216 |         self.jaccard_time_array[run] = jaccard_time_array
217 | 
218 |     def set_run_time_of_opt_res(self, run_time_opt_res: np.ndarray, run: int) -> None:
219 | 
220 |         self.run_time_of_opt_res[run] = run_time_opt_res
221 | 
222 |     def set_run_time(self, run_time: float, run: int) -> None:
223 | 
224 |         self.run_times[run] = run_time
225 | 
226 |     def set_add_on(self, add_on: float, run: int) -> None:
227 | 
228 |         self.add_on[run] = add_on
229 | 
230 |     def set_run_n_duplicates(self, run_n_dups: float, run: int) -> None:
231 | 
232 |         self.n_duplicates[run] = run_n_dups
233 | 
234 |     def set_run_n_zero_tup_preds(self, run_n_zero_tup_preds: float, run: int) -> None:
235 | 
236 |         self.n_zero_tup_preds[run] = run_n_zero_tup_preds
237 | 
238 |     def set_run_preds(self, best_pred: dict[Any], run: int) -> None:
239 | 
240 |         self.preds[run] = best_pred
241 | 
242 |     def set_run_iter_completed(self, n_iter: int, run) -> None:
243 | 
244 |         self.iter_completed[run] = n_iter
245 | 
246 |     def set_run_best_objective_value(self, obj_value: int, run) -> None:
247 | 
248 |         self.best_obj_values[run] = obj_value
249 | 
250 |     def set_example_best_predicate(self, best_pred: dict[Any], run) -> None:
251 | 
252 |         if self.direction == "minimize":
253 |             if self.best_obj_values[run] == self.best_obj_values.min():
254 |                 self.example_best_predicate = best_pred
255 |         else:
256 |             if self.best_obj_values[run] == self.best_obj_values.max():
257 |                 self.example_best_predicate = best_pred
258 | 
259 |     def set_min_iter_completed(self, n_iter: int) -> None:
260 | 
261 |         if n_iter < self.min_iter_completed:
262 |             self.min_iter_completed = n_iter
263 | 
264 |     def set_run_n_tuples_removed_from_data(self, num_removed: int, run: int):
265 | 
266 |         self.n_tuples_removed_from_data[run] = num_removed
267 | 
268 |     def set_final_precision(self, precision: float, run: int) -> None:
269 | 
270 |         self.final_precision[run] = precision
271 | 
272 |     def set_final_recall(self, recall: float, run: int) -> None:
273 | 
274 |         self.final_recall[run] = recall
275 | 
276 |     def set_final_f_score(self, f_score: float, run: int) -> None:
277 | 
278 |         self.final_f_score[run] = f_score
279 | 
280 |     def set_final_jaccard(self, jaccard: float, run: int) -> None:
281 | 
282 |         self.final_jaccard[run] = jaccard
283 | 
284 |     def set_precision(self, precision: np.ndarray, run: int) -> None:
285 | 
286 |         self.precision[run] = precision
287 | 
288 |     def set_recall(self, recall: np.ndarray, run: int) -> None:
289 | 
290 |         self.recall[run] = recall
291 | 
292 |     def set_f_score(self, f_score: np.ndarray, run: int) -> None:
293 | 
294 |         self.f_score[run] = f_score
295 | 
296 |     def set_jaccard(self, jaccard: np.ndarray, run: int) -> None:
297 | 
298 |         self.jaccard[run] = jaccard
299 | 
300 |     def output_temp_file(self) -> None:
301 | 
302 |         fo = open("temp.json", "w")
303 | 
304 |         fo.write(f"{dumps(self.__dict__, cls=NumpyEncoder)}\n")
305 | 
306 |         fo.close()
307 | 
308 |     def standard_output(self) -> None:
309 | 
310 |         print("BEST SCORE", self.best_obj_values)
311 |         print("AVERAGE NUMBER OF TUPLES REMOVED", self.n_tuples_removed_from_data.mean())
312 |         print("AVERAGE TIME", self.run_times.mean())
313 |         print("AVERAGE DUPLICATE COUNT", self.n_duplicates.mean())
314 |         print("AVERAGE ZERO TUPLE", self.n_zero_tup_preds.mean(), "\n")
315 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/storages/in_memory.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from datetime import datetime
  3 | import threading
  4 | import uuid
  5 | 
  6 | # from optuna import distributions  # NOQA
  7 | # from optuna.exceptions import DuplicatedStudyError
  8 | # from optuna.storages import base
  9 | # from optuna.storages.base import DEFAULT_STUDY_NAME_PREFIX
 10 | # from optuna.study import StudyDirection
 11 | # from optuna.trial import FrozenTrial
 12 | # from optuna.trial import TrialState
 13 | from .. import distributions  # NOQA
 14 | from ..exceptions import DuplicatedStudyError
 15 | from . import base
 16 | from .base import DEFAULT_STUDY_NAME_PREFIX
 17 | from ..study import StudyDirection
 18 | from ..trial import FrozenTrial
 19 | from ..trial import TrialState
 20 | 
 21 | 
 22 | class InMemoryStorage(base.BaseStorage):
 23 |     """Storage class that stores data in memory of the Python process.
 24 | 
 25 |     This class is not supposed to be directly accessed by library users.
 26 |     """
 27 | 
 28 |     def __init__(self):
 29 |         # type: () -> None
 30 |         self._trial_id_to_study_id_and_number = {}  # type: Dict[int, Tuple[int, int]]
 31 |         self._study_name_to_id = {}  # type: Dict[str, int]
 32 |         self._studies = {}  # type: Dict[int, _StudyInfo]
 33 | 
 34 |         self._max_study_id = -1
 35 |         self._max_trial_id = -1
 36 | 
 37 |         self._lock = threading.RLock()
 38 | 
 39 |     def __getstate__(self):
 40 |         # type: () -> Dict[Any, Any]
 41 |         state = self.__dict__.copy()
 42 |         del state["_lock"]
 43 |         return state
 44 | 
 45 |     def __setstate__(self, state):
 46 |         # type: (Dict[Any, Any]) -> None
 47 |         self.__dict__.update(state)
 48 |         self._lock = threading.RLock()
 49 | 
 50 |     def create_new_study(self, study_name=None):
 51 |         # type: (Optional[str]) -> int
 52 | 
 53 |         with self._lock:
 54 |             study_id = self._max_study_id + 1
 55 |             self._max_study_id += 1
 56 | 
 57 |             if study_name is not None:
 58 |                 if study_name in self._study_name_to_id:
 59 |                     raise DuplicatedStudyError
 60 |             else:
 61 |                 study_uuid = str(uuid.uuid4())
 62 |                 study_name = DEFAULT_STUDY_NAME_PREFIX + study_uuid
 63 |             self._studies[study_id] = _StudyInfo(study_name)
 64 |             self._study_name_to_id[study_name] = study_id
 65 | 
 66 |             return study_id
 67 | 
 68 |     def delete_study(self, study_id):
 69 |         # type: (int) -> None
 70 | 
 71 |         with self._lock:
 72 |             self._check_study_id(study_id)
 73 | 
 74 |             for trial in self._studies[study_id].trials:
 75 |                 del self._trial_id_to_study_id_and_number[trial._trial_id]
 76 |             study_name = self._studies[study_id].name
 77 |             del self._study_name_to_id[study_name]
 78 |             del self._studies[study_id]
 79 | 
 80 |     def set_study_direction(self, study_id, direction):
 81 |         # type: (int, StudyDirection) -> None
 82 | 
 83 |         with self._lock:
 84 |             self._check_study_id(study_id)
 85 | 
 86 |             study = self._studies[study_id]
 87 |             if study.direction != StudyDirection.NOT_SET and study.direction != direction:
 88 |                 raise ValueError(
 89 |                     "Cannot overwrite study direction from {} to {}.".format(
 90 |                         study.direction, direction
 91 |                     )
 92 |                 )
 93 |             study.direction = direction
 94 | 
 95 |     def set_study_user_attr(self, study_id, key, value):
 96 |         # type: (int, str, Any) -> None
 97 | 
 98 |         with self._lock:
 99 |             self._check_study_id(study_id)
100 | 
101 |             self._studies[study_id].user_attrs[key] = value
102 | 
103 |     def set_study_system_attr(self, study_id, key, value):
104 |         # type: (int, str, Any) -> None
105 | 
106 |         with self._lock:
107 |             self._check_study_id(study_id)
108 | 
109 |             self._studies[study_id].system_attrs[key] = value
110 | 
111 |     def get_study_id_from_name(self, study_name):
112 |         # type: (str) -> int
113 |         with self._lock:
114 |             if study_name not in self._study_name_to_id:
115 |                 raise KeyError("No such study {}.".format(study_name))
116 | 
117 |             return self._study_name_to_id[study_name]
118 | 
119 |     def get_study_id_from_trial_id(self, trial_id):
120 |         # type: (int) -> int
121 | 
122 |         with self._lock:
123 |             self._check_trial_id(trial_id)
124 | 
125 |             return self._trial_id_to_study_id_and_number[trial_id][0]
126 | 
127 |     def get_study_name_from_id(self, study_id):
128 |         # type: (int) -> str
129 | 
130 |         with self._lock:
131 |             self._check_study_id(study_id)
132 |             return self._studies[study_id].name
133 | 
134 |     def get_study_direction(self, study_id):
135 |         # type: (int) -> StudyDirection
136 | 
137 |         with self._lock:
138 |             self._check_study_id(study_id)
139 |             return self._studies[study_id].direction
140 | 
141 |     def get_study_user_attrs(self, study_id):
142 |         # type: (int) -> Dict[str, Any]
143 | 
144 |         with self._lock:
145 |             self._check_study_id(study_id)
146 |             return copy.deepcopy(self._studies[study_id].user_attrs)
147 | 
148 |     def get_study_system_attrs(self, study_id):
149 |         # type: (int) -> Dict[str, Any]
150 | 
151 |         with self._lock:
152 |             self._check_study_id(study_id)
153 |             return copy.deepcopy(self._studies[study_id].system_attrs)
154 | 
155 |     def create_new_trial(self, study_id, template_trial=None):
156 |         # type: (int, Optional[FrozenTrial]) -> int
157 | 
158 |         with self._lock:
159 |             self._check_study_id(study_id)
160 | 
161 |             if template_trial is None:
162 |                 trial = self._create_running_trial()
163 |             else:
164 |                 trial = copy.deepcopy(template_trial)
165 | 
166 |             trial_id = self._max_trial_id + 1
167 |             self._max_trial_id += 1
168 |             trial.number = len(self._studies[study_id].trials)
169 |             trial._trial_id = trial_id
170 |             self._trial_id_to_study_id_and_number[trial_id] = (study_id, trial.number)
171 |             self._studies[study_id].trials.append(trial)
172 |             self._update_cache(trial_id, study_id)
173 |             return trial_id
174 | 
175 |     @staticmethod
176 |     def _create_running_trial():
177 |         # type: () -> FrozenTrial
178 | 
179 |         return FrozenTrial(
180 |             trial_id=-1,  # dummy value.
181 |             number=-1,  # dummy value.
182 |             state=TrialState.RUNNING,
183 |             params={},
184 |             distributions={},
185 |             user_attrs={},
186 |             system_attrs={},
187 |             value=None,
188 |             intermediate_values={},
189 |             datetime_start=datetime.now(),
190 |             datetime_complete=None,
191 |         )
192 | 
193 |     def set_trial_state(self, trial_id, state):
194 |         # type: (int, TrialState) -> bool
195 | 
196 |         with self._lock:
197 |             trial = self._get_trial(trial_id)
198 |             self.check_trial_is_updatable(trial_id, trial.state)
199 | 
200 |             trial = copy.copy(trial)
201 |             self.check_trial_is_updatable(trial_id, trial.state)
202 | 
203 |             if state == TrialState.RUNNING and trial.state != TrialState.WAITING:
204 |                 return False
205 | 
206 |             trial.state = state
207 |             if state.is_finished():
208 |                 trial.datetime_complete = datetime.now()
209 |                 self._set_trial(trial_id, trial)
210 |                 study_id = self._trial_id_to_study_id_and_number[trial_id][0]
211 |                 self._update_cache(trial_id, study_id)
212 |             else:
213 |                 self._set_trial(trial_id, trial)
214 | 
215 |             return True
216 | 
217 |     def clear_params_and_dists(self, trial_id):
218 |         trial = self._get_trial(trial_id)
219 |         trial = copy.copy(trial)
220 |         for name in list(trial.params.keys()):
221 |             # if "_min" not in name and "_len" not in name:
222 |             del trial.params[name]
223 |             del trial.distributions[name]
224 |         self._set_trial(trial_id, trial)
225 | 
226 |         return
227 | 
228 |     def set_trial_param(self, trial_id, param_name, param_value_internal, distribution):
229 |         # type: (int, str, float, distributions.BaseDistribution) -> bool
230 | 
231 |         with self._lock:
232 |             trial = self._get_trial(trial_id)
233 | 
234 |             self.check_trial_is_updatable(trial_id, trial.state)
235 | 
236 |             study_id = self._trial_id_to_study_id_and_number[trial_id][0]
237 |             # Check param distribution compatibility with previous trial(s).
238 |             if param_name in self._studies[study_id].param_distribution:
239 |                 distributions.check_distribution_compatibility(
240 |                     self._studies[study_id].param_distribution[param_name], distribution
241 |                 )
242 | 
243 |             # Check param has not been set; otherwise, return False.
244 |             if param_name in trial.params:
245 |                 return False
246 | 
247 |             # Set param distribution.
248 |             self._studies[study_id].param_distribution[param_name] = distribution
249 | 
250 |             # Set param.
251 |             trial = copy.copy(trial)
252 |             trial.params = copy.copy(trial.params)
253 |             trial.params[param_name] = distribution.to_external_repr(param_value_internal)
254 |             trial.distributions = copy.copy(trial.distributions)
255 |             trial.distributions[param_name] = distribution
256 |             self._set_trial(trial_id, trial)
257 | 
258 |             return True
259 | 
260 |     def get_trial_number_from_id(self, trial_id):
261 |         # type: (int) -> int
262 | 
263 |         with self._lock:
264 |             self._check_trial_id(trial_id)
265 | 
266 |             return self._trial_id_to_study_id_and_number[trial_id][1]
267 | 
268 |     def get_best_trial(self, study_id):
269 |         # type: (int) -> FrozenTrial
270 | 
271 |         with self._lock:
272 |             self._check_study_id(study_id)
273 | 
274 |             best_trial_id = self._studies[study_id].best_trial_id
275 |             if best_trial_id is None:
276 |                 raise ValueError("No trials are completed yet.")
277 |             return self.get_trial(best_trial_id)
278 | 
279 |     def get_trial_param(self, trial_id, param_name):
280 |         # type: (int, str) -> float
281 | 
282 |         with self._lock:
283 |             trial = self._get_trial(trial_id)
284 | 
285 |             distribution = trial.distributions[param_name]
286 |             return distribution.to_internal_repr(trial.params[param_name])
287 | 
288 |     def set_trial_value(self, trial_id, value):
289 |         # type: (int, float) -> None
290 | 
291 |         with self._lock:
292 |             trial = self._get_trial(trial_id)
293 |             self.check_trial_is_updatable(trial_id, trial.state)
294 | 
295 |             trial = copy.copy(trial)
296 |             self.check_trial_is_updatable(trial_id, trial.state)
297 | 
298 |             trial.value = value
299 |             self._set_trial(trial_id, trial)
300 | 
301 |     def _update_cache(self, trial_id: int, study_id: int) -> None:
302 | 
303 |         trial = self._get_trial(trial_id)
304 | 
305 |         if trial.state != TrialState.COMPLETE:
306 |             return
307 | 
308 |         best_trial_id = self._studies[study_id].best_trial_id
309 |         if best_trial_id is None:
310 |             self._studies[study_id].best_trial_id = trial_id
311 |             return
312 |         best_trial = self._get_trial(best_trial_id)
313 |         assert best_trial is not None
314 |         best_value = best_trial.value
315 |         new_value = trial.value
316 |         if best_value is None:
317 |             self._studies[study_id].best_trial_id = trial_id
318 |             return
319 |         # Complete trials do not have `None` values.
320 |         assert new_value is not None
321 | 
322 |         if self.get_study_direction(study_id) == StudyDirection.MAXIMIZE:
323 |             if best_value < new_value:
324 |                 self._studies[study_id].best_trial_id = trial_id
325 |         else:
326 |             if best_value > new_value:
327 |                 self._studies[study_id].best_trial_id = trial_id
328 | 
329 |     def set_trial_intermediate_value(self, trial_id, step, intermediate_value):
330 |         # type: (int, int, float) -> bool
331 | 
332 |         with self._lock:
333 |             trial = self._get_trial(trial_id)
334 |             self.check_trial_is_updatable(trial_id, trial.state)
335 | 
336 |             self.check_trial_is_updatable(trial_id, trial.state)
337 | 
338 |             trial = copy.copy(trial)
339 |             values = copy.copy(trial.intermediate_values)
340 |             if step in values:
341 |                 return False
342 | 
343 |             values[step] = intermediate_value
344 |             trial.intermediate_values = values
345 |             self._set_trial(trial_id, trial)
346 | 
347 |             return True
348 | 
349 |     def set_trial_user_attr(self, trial_id, key, value):
350 |         # type: (int, str, Any) -> None
351 | 
352 |         with self._lock:
353 |             self._check_trial_id(trial_id)
354 |             trial = self._get_trial(trial_id)
355 |             self.check_trial_is_updatable(trial_id, trial.state)
356 | 
357 |             self.check_trial_is_updatable(trial_id, trial.state)
358 | 
359 |             trial = copy.copy(trial)
360 |             trial.user_attrs = copy.copy(trial.user_attrs)
361 |             trial.user_attrs[key] = value
362 |             self._set_trial(trial_id, trial)
363 | 
364 |     def set_trial_system_attr(self, trial_id, key, value):
365 |         # type: (int, str, Any) -> None
366 | 
367 |         with self._lock:
368 |             trial = self._get_trial(trial_id)
369 |             self.check_trial_is_updatable(trial_id, trial.state)
370 | 
371 |             self.check_trial_is_updatable(trial_id, trial.state)
372 | 
373 |             trial = copy.copy(trial)
374 |             trial.system_attrs = copy.copy(trial.system_attrs)
375 |             trial.system_attrs[key] = value
376 |             self._set_trial(trial_id, trial)
377 | 
378 |     def get_trial(self, trial_id):
379 |         # type: (int) -> FrozenTrial
380 | 
381 |         with self._lock:
382 |             return copy.deepcopy(self._get_trial(trial_id))
383 | 
384 |     def _get_trial(self, trial_id: int) -> FrozenTrial:
385 | 
386 |         self._check_trial_id(trial_id)
387 |         # study_id=0, trial_number=0,1,2,...
388 |         study_id, trial_number = self._trial_id_to_study_id_and_number[trial_id]
389 |         return self._studies[study_id].trials[trial_number]
390 | 
391 |     def _set_trial(self, trial_id: int, trial: FrozenTrial) -> None:
392 |         study_id, trial_number = self._trial_id_to_study_id_and_number[trial_id]
393 |         self._studies[study_id].trials[trial_number] = trial
394 | 
395 |     def get_all_trials(self, study_id, deepcopy=True):
396 |         # type: (int, bool) -> List[FrozenTrial]
397 | 
398 |         with self._lock:
399 |             self._check_study_id(study_id)
400 |             if deepcopy:
401 |                 return copy.deepcopy(self._studies[study_id].trials)
402 |             else:
403 |                 return self._studies[study_id].trials[:]
404 | 
405 |     def get_n_trials(self, study_id, state=None):
406 |         # type: (int, Optional[TrialState]) -> int
407 | 
408 |         with self._lock:
409 |             self._check_study_id(study_id)
410 |             if state is None:
411 |                 return len(self._studies[study_id].trials)
412 | 
413 |             return sum(
414 |                 trial.state == state for trial in self.get_all_trials(study_id, deepcopy=False)
415 |             )
416 | 
417 |     def _check_study_id(self, study_id):
418 |         # type: (int) -> None
419 | 
420 |         if study_id not in self._studies:
421 |             raise KeyError("No study with study_id {} exists.".format(study_id))
422 | 
423 |     def _check_trial_id(self, trial_id: int) -> None:
424 | 
425 |         if trial_id not in self._trial_id_to_study_id_and_number:
426 |             raise KeyError("No trial with trial_id {} exists.".format(trial_id))
427 | 
428 | 
429 | class _StudyInfo:
430 |     def __init__(self, name: str) -> None:
431 |         self.trials = []  # type: List[FrozenTrial]
432 |         self.param_distribution = {}  # type: Dict[str, distributions.BaseDistribution]
433 |         self.user_attrs = {}  # type: Dict[str, Any]
434 |         self.system_attrs = {}  # type: Dict[str, Any]
435 |         self.name = name  # type: str
436 |         self.direction = StudyDirection.NOT_SET
437 |         self.best_trial_id = None  # type: Optional[int]
438 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/distributions.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import decimal
  3 | import json
  4 | import warnings
  5 | 
  6 | # from optuna import logging
  7 | from . import logging
  8 | 
  9 | 
 10 | class BaseDistribution(object, metaclass=abc.ABCMeta):
 11 |     """Base class for distributions.
 12 | 
 13 |     Note that distribution classes are not supposed to be called by library users.
 14 |     They are used by :class:`~optuna.trial.Trial` and :class:`~optuna.samplers` internally.
 15 |     """
 16 | 
 17 |     def to_external_repr(self, param_value_in_internal_repr):
 18 |         # type: (float) -> Any
 19 |         """Convert internal representation of a parameter value into external representation.
 20 | 
 21 |         Args:
 22 |             param_value_in_internal_repr:
 23 |                 Optuna's internal representation of a parameter value.
 24 | 
 25 |         Returns:
 26 |             Optuna's external representation of a parameter value.
 27 |         """
 28 | 
 29 |         return param_value_in_internal_repr
 30 | 
 31 |     def to_internal_repr(self, param_value_in_external_repr):
 32 |         # type: (Any) -> float
 33 |         """Convert external representation of a parameter value into internal representation.
 34 | 
 35 |         Args:
 36 |             param_value_in_external_repr:
 37 |                 Optuna's external representation of a parameter value.
 38 | 
 39 |         Returns:
 40 |             Optuna's internal representation of a parameter value.
 41 |         """
 42 | 
 43 |         return param_value_in_external_repr
 44 | 
 45 |     @abc.abstractmethod
 46 |     def single(self):
 47 |         # type: () -> bool
 48 |         """Test whether the range of this distribution contains just a single value.
 49 | 
 50 |         When this method returns :obj:`True`, :mod:`~optuna.samplers` always sample
 51 |         the same value from the distribution.
 52 | 
 53 |         Returns:
 54 |             :obj:`True` if the range of this distribution contains just a single value,
 55 |             otherwise :obj:`False`.
 56 |         """
 57 | 
 58 |         raise NotImplementedError
 59 | 
 60 |     @abc.abstractmethod
 61 |     def _contains(self, param_value_in_internal_repr):
 62 |         # type: (float) -> bool
 63 |         """Test if a parameter value is contained in the range of this distribution.
 64 | 
 65 |         Args:
 66 |             param_value_in_internal_repr:
 67 |                 Optuna's internal representation of a parameter value.
 68 | 
 69 |         Returns:
 70 |             :obj:`True` if the parameter value is contained in the range of this distribution,
 71 |             otherwise :obj:`False`.
 72 |         """
 73 | 
 74 |         raise NotImplementedError
 75 | 
 76 |     def _asdict(self):
 77 |         # type: () -> Dict
 78 | 
 79 |         return self.__dict__
 80 | 
 81 |     def __eq__(self, other):
 82 |         # type: (Any) -> bool
 83 | 
 84 |         if not isinstance(other, BaseDistribution):
 85 |             return NotImplemented
 86 |         if not type(self) is type(other):
 87 |             return False
 88 |         return self.__dict__ == other.__dict__
 89 | 
 90 |     def __hash__(self):
 91 |         # type: () -> int
 92 | 
 93 |         return hash((self.__class__,) + tuple(sorted(self.__dict__.items())))
 94 | 
 95 |     def __repr__(self):
 96 |         # type: () -> str
 97 | 
 98 |         kwargs = ", ".join("{}={}".format(k, v) for k, v in sorted(self.__dict__.items()))
 99 |         return "{}({})".format(self.__class__.__name__, kwargs)
100 | 
101 | 
102 | class UniformDistribution(BaseDistribution):
103 |     """A uniform distribution in the linear domain.
104 | 
105 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_uniform`, and passed to
106 |     :mod:`~optuna.samplers` in general.
107 | 
108 |     Attributes:
109 |         low:
110 |             Lower endpoint of the range of the distribution. ``low`` is included in the range.
111 |         high:
112 |             Upper endpoint of the range of the distribution. ``high`` is excluded from the range.
113 |     """
114 | 
115 |     def __init__(self, low, high):
116 |         # type: (float, float) -> None
117 | 
118 |         if low > high:
119 |             raise ValueError(
120 |                 "The `low` value must be smaller than or equal to the `high` value "
121 |                 "(low={}, high={}).".format(low, high)
122 |             )
123 | 
124 |         self.low = low
125 |         self.high = high
126 | 
127 |     def single(self):
128 |         # type: () -> bool
129 | 
130 |         return self.low == self.high
131 | 
132 |     def _contains(self, param_value_in_internal_repr):
133 |         # type: (float) -> bool
134 | 
135 |         value = param_value_in_internal_repr
136 |         if self.low == self.high:
137 |             return value == self.low
138 |         else:
139 |             return self.low <= value < self.high
140 | 
141 | 
142 | class LogUniformDistribution(BaseDistribution):
143 |     """A uniform distribution in the log domain.
144 | 
145 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_loguniform`, and passed to
146 |     :mod:`~optuna.samplers` in general.
147 | 
148 |     Attributes:
149 |         low:
150 |             Lower endpoint of the range of the distribution. ``low`` is included in the range.
151 |         high:
152 |             Upper endpoint of the range of the distribution. ``high`` is excluded from the range.
153 |     """
154 | 
155 |     def __init__(self, low, high):
156 |         # type: (float, float) -> None
157 | 
158 |         if low > high:
159 |             raise ValueError(
160 |                 "The `low` value must be smaller than or equal to the `high` value "
161 |                 "(low={}, high={}).".format(low, high)
162 |             )
163 |         if low <= 0.0:
164 |             raise ValueError(
165 |                 "The `low` value must be larger than 0 for a log distribution "
166 |                 "(low={}, high={}).".format(low, high)
167 |             )
168 | 
169 |         self.low = low
170 |         self.high = high
171 | 
172 |     def single(self):
173 |         # type: () -> bool
174 | 
175 |         return self.low == self.high
176 | 
177 |     def _contains(self, param_value_in_internal_repr):
178 |         # type: (float) -> bool
179 | 
180 |         value = param_value_in_internal_repr
181 |         if self.low == self.high:
182 |             return value == self.low
183 |         else:
184 |             return self.low <= value < self.high
185 | 
186 | 
187 | class DiscreteUniformDistribution(BaseDistribution):
188 |     """A discretized uniform distribution in the linear domain.
189 | 
190 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_discrete_uniform`, and passed
191 |     to :mod:`~optuna.samplers` in general.
192 | 
193 |     Attributes:
194 |         low:
195 |             Lower endpoint of the range of the distribution. ``low`` is included in the range.
196 |         high:
197 |             Upper endpoint of the range of the distribution. ``high`` is included in the range.
198 |         q:
199 |             A discretization step.
200 |     """
201 | 
202 |     def __init__(self, low, high, q):
203 |         # type: (float, float, float) -> None
204 | 
205 |         if low > high:
206 |             raise ValueError(
207 |                 "The `low` value must be smaller than or equal to the `high` value "
208 |                 "(low={}, high={}, q={}).".format(low, high, q)
209 |             )
210 | 
211 |         self.low = low
212 |         self.high = high
213 |         self.q = q
214 | 
215 |     def single(self):
216 |         # type: () -> bool
217 | 
218 |         if self.low == self.high:
219 |             return True
220 |         high = decimal.Decimal(str(self.high))
221 |         low = decimal.Decimal(str(self.low))
222 |         q = decimal.Decimal(str(self.q))
223 |         if (high - low) < q:
224 |             return True
225 |         return False
226 | 
227 |     def _contains(self, param_value_in_internal_repr):
228 |         # type: (float) -> bool
229 | 
230 |         value = param_value_in_internal_repr
231 |         return self.low <= value <= self.high
232 | 
233 | 
234 | class IntUniformDistribution(BaseDistribution):
235 |     """A uniform distribution on integers.
236 | 
237 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_int`, and passed to
238 |     :mod:`~optuna.samplers` in general.
239 | 
240 |     Attributes:
241 |         low:
242 |             Lower endpoint of the range of the distribution. ``low`` is included in the range.
243 |         high:
244 |             Upper endpoint of the range of the distribution. ``high`` is included in the range.
245 |         step:
246 |             A step for spacing between values.
247 |     """
248 | 
249 |     def __init__(self, low, high, step=1):
250 |         # type: (int, int, int) -> None
251 | 
252 |         if low > high:
253 |             raise ValueError(
254 |                 "The `low` value must be smaller than or equal to the `high` value "
255 |                 "(low={}, high={}).".format(low, high)
256 |             )
257 |         if step <= 0:
258 |             raise ValueError(
259 |                 "The `step` value must be non-zero positive value, but step={}.".format(step)
260 |             )
261 | 
262 |         self.low = low
263 |         self.high = high
264 |         self.step = step
265 | 
266 |     def to_external_repr(self, param_value_in_internal_repr):
267 |         # type: (float) -> int
268 | 
269 |         return int(param_value_in_internal_repr)
270 | 
271 |     def to_internal_repr(self, param_value_in_external_repr):
272 |         # type: (int) -> float
273 | 
274 |         return float(param_value_in_external_repr)
275 | 
276 |     def single(self):
277 |         # type: () -> bool
278 | 
279 |         if self.low == self.high:
280 |             return True
281 |         return (self.high - self.low) < self.step
282 | 
283 |     def _contains(self, param_value_in_internal_repr):
284 |         # type: (float) -> bool
285 | 
286 |         value = param_value_in_internal_repr
287 |         return self.low <= value <= self.high
288 | 
289 | 
290 | class IntLogUniformDistribution(BaseDistribution):
291 |     """A uniform distribution on integers in the log domain.
292 | 
293 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_int`, and passed to
294 |     :mod:`~optuna.samplers` in general.
295 | 
296 |     Attributes:
297 |         low:
298 |             Lower endpoint of the range of the distribution. ``low`` is included in the range.
299 |         high:
300 |             Upper endpoint of the range of the distribution. ``high`` is included in the range.
301 |         step:
302 |             A step for spacing between values.
303 |     """
304 | 
305 |     def __init__(self, low, high, step=1):
306 |         # type: (int, int, int) -> None
307 | 
308 |         if low > high:
309 |             raise ValueError(
310 |                 "The `low` value must be smaller than or equal to the `high` value "
311 |                 "(low={}, high={}).".format(low, high)
312 |             )
313 |         if step <= 0:
314 |             raise ValueError(
315 |                 "The `step` value must be non-zero positive value, but step={}.".format(step)
316 |             )
317 | 
318 |         if low <= 0.0:
319 |             raise ValueError(
320 |                 "The `low` value must be larger than 0 for a log distribution "
321 |                 "(low={}, high={}).".format(low, high)
322 |             )
323 | 
324 |         self.low = low
325 |         self.high = high
326 |         self.step = step
327 | 
328 |     def to_external_repr(self, param_value_in_internal_repr):
329 |         # type: (float) -> int
330 | 
331 |         return int(param_value_in_internal_repr)
332 | 
333 |     def to_internal_repr(self, param_value_in_external_repr):
334 |         # type: (int) -> float
335 | 
336 |         return float(param_value_in_external_repr)
337 | 
338 |     def single(self):
339 |         # type: () -> bool
340 | 
341 |         if self.low == self.high:
342 |             return True
343 |         return (self.high - self.low) < self.step
344 | 
345 |     def _contains(self, param_value_in_internal_repr):
346 |         # type: (float) -> bool
347 | 
348 |         value = param_value_in_internal_repr
349 |         return self.low <= value <= self.high
350 | 
351 | 
352 | class CategoricalDistribution(BaseDistribution):
353 |     """A categorical distribution.
354 | 
355 |     This object is instantiated by :func:`~optuna.trial.Trial.suggest_categorical`, and
356 |     passed to :mod:`~optuna.samplers` in general.
357 | 
358 |     Args:
359 |         choices:
360 |             Parameter value candidates.
361 | 
362 |     .. note::
363 | 
364 |         Not all types are guaranteed to be compatible with all storages. It is recommended to
365 |         restrict the types of the choices to :obj:`None`, :class:`bool`, :class:`int`,
366 |         :class:`float` and :class:`str`.
367 | 
368 |     Attributes:
369 |         choices:
370 |             Parameter value candidates.
371 |     """
372 | 
373 |     def __init__(self, choices):
374 |         # type: (Sequence[CategoricalChoiceType]) -> None
375 | 
376 |         if len(choices) == 0:
377 |             raise ValueError("The `choices` must contains one or more elements.")
378 |         for choice in choices:
379 |             if choice is not None and not isinstance(choice, (bool, int, float, str)):
380 |                 message = (
381 |                     "Choices for a categorical distribution should be a tuple of None, bool, "
382 |                     "int, float and str for persistent storage but contains {} which is of type "
383 |                     "{}.".format(choice, type(choice).__name__)
384 |                 )
385 |                 warnings.warn(message)
386 | 
387 |                 logger = logging._get_library_root_logger()
388 |                 logger.warning(message)
389 | 
390 |         self.choices = choices
391 | 
392 |     def to_external_repr(self, param_value_in_internal_repr):
393 |         # type: (float) -> CategoricalChoiceType
394 | 
395 |         return self.choices[int(param_value_in_internal_repr)]
396 | 
397 |     def to_internal_repr(self, param_value_in_external_repr):
398 |         # type: (CategoricalChoiceType) -> float
399 | 
400 |         try:
401 |             return self.choices.index(param_value_in_external_repr)
402 |         except ValueError as e:
403 |             raise ValueError(
404 |                 "'{}' not in {}.".format(param_value_in_external_repr, self.choices)
405 |             ) from e
406 | 
407 |     def single(self):
408 |         # type: () -> bool
409 | 
410 |         return len(self.choices) == 1
411 | 
412 |     def _contains(self, param_value_in_internal_repr):
413 |         # type: (float) -> bool
414 | 
415 |         index = int(param_value_in_internal_repr)
416 |         return 0 <= index < len(self.choices)
417 | 
418 | 
419 | DISTRIBUTION_CLASSES = (
420 |     UniformDistribution,
421 |     LogUniformDistribution,
422 |     DiscreteUniformDistribution,
423 |     IntUniformDistribution,
424 |     IntLogUniformDistribution,
425 |     CategoricalDistribution,
426 | )
427 | 
428 | 
429 | def json_to_distribution(json_str):
430 |     # type: (str) -> BaseDistribution
431 |     """Deserialize a distribution in JSON format.
432 | 
433 |     Args:
434 |         json_str: A JSON-serialized distribution.
435 | 
436 |     Returns:
437 |         A deserialized distribution.
438 |     """
439 | 
440 |     json_dict = json.loads(json_str)
441 | 
442 |     if json_dict["name"] == CategoricalDistribution.__name__:
443 |         json_dict["attributes"]["choices"] = tuple(json_dict["attributes"]["choices"])
444 | 
445 |     for cls in DISTRIBUTION_CLASSES:
446 |         if json_dict["name"] == cls.__name__:
447 |             return cls(**json_dict["attributes"])
448 | 
449 |     raise ValueError("Unknown distribution class: {}".format(json_dict["name"]))
450 | 
451 | 
452 | def distribution_to_json(dist):
453 |     # type: (BaseDistribution) -> str
454 |     """Serialize a distribution to JSON format.
455 | 
456 |     Args:
457 |         dist: A distribution to be serialized.
458 | 
459 |     Returns:
460 |         A JSON string of a given distribution.
461 | 
462 |     """
463 | 
464 |     return json.dumps({"name": dist.__class__.__name__, "attributes": dist._asdict()})
465 | 
466 | 
467 | def check_distribution_compatibility(dist_old, dist_new):
468 |     # type: (BaseDistribution, BaseDistribution) -> None
469 |     """A function to check compatibility of two distributions.
470 | 
471 |     Note that this method is not supposed to be called by library users.
472 | 
473 |     Args:
474 |         dist_old: A distribution previously recorded in storage.
475 |         dist_new: A distribution newly added to storage.
476 | 
477 |     Returns:
478 |         True denotes given distributions are compatible. Otherwise, they are not.
479 |     """
480 | 
481 |     if dist_old.__class__ != dist_new.__class__:
482 |         raise ValueError("Cannot set different distribution kind to the same parameter name.")
483 | 
484 |     if not isinstance(dist_old, CategoricalDistribution):
485 |         return
486 |     if not isinstance(dist_new, CategoricalDistribution):
487 |         return
488 |     if dist_old.choices != dist_new.choices:
489 |         raise ValueError(
490 |             CategoricalDistribution.__name__ + " does not support dynamic value space."
491 |         )
492 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/storages/base.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import copy
  3 | from typing import Any
  4 | from typing import Dict
  5 | from typing import List
  6 | from typing import Optional
  7 | 
  8 | # from optuna import study
  9 | # from optuna.trial import TrialState
 10 | from .. import study
 11 | from ..trial import TrialState
 12 | 
 13 | DEFAULT_STUDY_NAME_PREFIX = "no-name-"
 14 | 
 15 | 
 16 | class BaseStorage(object, metaclass=abc.ABCMeta):
 17 |     """Base class for storages.
 18 | 
 19 |     This class is not supposed to be directly accessed by library users.
 20 | 
 21 |     A storage class abstracts a backend database and provides library internal interfaces to
 22 |     read/write histories of studies and trials.
 23 | 
 24 |     **Thread safety**
 25 | 
 26 |     A storage class can be shared among multiple threads, and must therefore be thread-safe.
 27 |     It must guarantee that return values such as `FrozenTrial`s are never modified.
 28 |     A storage class can assume that return values are never modified by its user.
 29 |     When a user modifies a return value from a storage class, the internal state of the storage
 30 |     may become inconsistent. Consequences are undefined.
 31 | 
 32 |     **Ownership of RUNNING trials**
 33 | 
 34 |     Trials in finished states are not allowed to be modified.
 35 |     Trials in the WAITING state are not allowed to be modified except for the `state` field.
 36 |     A storage class can assume that each RUNNING trial is only modified from a single process.
 37 |     When a user modifies a RUNNING trial from multiple processes, the internal state of the storage
 38 |     may become inconsistent. Consequences are undefined.
 39 |     A storage class is not intended for inter-process communication.
 40 |     Consequently, users using optuna with MPI or other multi-process programs must make sure that
 41 |     only one process is used to access the optuna interface.
 42 | 
 43 |     **Consistency models**
 44 | 
 45 |     A storage class must support the monotonic-reads consistency model, that is, if a
 46 |     process reads data `X`, any successive reads on data `X` cannot return older values.
 47 |     It must support read-your-writes, that is, if a process writes to data `X`,
 48 |     any successive reads on data `X` from the same process must read the written
 49 |     value or one of the more recent values.
 50 | 
 51 |     **Stronger consistency requirements for special data**
 52 | 
 53 |     TODO(ytsmiling) Add load method to storage class implementations.
 54 | 
 55 |     Under a multi-worker setting, a storage class must return the latest values of any attributes
 56 |     of a study, not necessarily for the attributes of a `Trial`.
 57 |     However, if the `load(study_id)` method is called, any successive reads on the `state`
 58 |     attribute of a `Trial` are guaranteed to return the same or more recent values than the value
 59 |     at the time of call to the `load` method.
 60 |     Let `T` be a `Trial`.
 61 |     Let `P` be the process that last updated the `state` attribute of `T`.
 62 |     Then, any reads on any attributes of `T` are guaranteed to return the same or
 63 |     more recent values than any writes by `P` on the attribute before `P` updated
 64 |     the `state` attribute of `T`.
 65 |     The same applies for `user_attrs', 'system_attrs' and 'intermediate_values` attributes.
 66 | 
 67 |     .. note::
 68 | 
 69 |         These attribute behaviors may become user customizable in the future.
 70 | 
 71 |     **Data persistence**
 72 | 
 73 |     A storage class does not guarantee that write operations are logged into a persistent
 74 |     storage, even when write methods succeed.
 75 |     Thus, when process failure occurs, some writes might be lost.
 76 |     As exceptions, when a persistent storage is available, any writes on any attributes
 77 |     of `Study` and writes on `state` of `Trial` are guaranteed to be persistent.
 78 |     Additionally, any preceding writes on any attributes of `Trial` are guaranteed to
 79 |     be written into a persistent storage before writes on `state` of `Trial` succeed.
 80 |     The same applies for `user_attrs', 'system_attrs' and 'intermediate_values` attributes.
 81 | 
 82 |     .. note::
 83 | 
 84 |         These attribute behaviors may become user customizable in the future.
 85 |     """
 86 | 
 87 |     # Basic study manipulation
 88 | 
 89 |     @abc.abstractmethod
 90 |     def create_new_study(self, study_name: Optional[str] = None) -> int:
 91 |         """Create a new study from a name.
 92 | 
 93 |         If no name is specified, the storage class generates a name.
 94 |         The returned study ID is unique among all current and deleted studies.
 95 | 
 96 |         Args:
 97 |             study_name:
 98 |                 Name of the new study to create.
 99 | 
100 |         Returns:
101 |             ID of the created study.
102 | 
103 |         Raises:
104 |             :exc:`optuna.exceptions.DuplicatedStudyError`:
105 |                 If a study with the same ``study_name`` already exists.
106 |         """
107 |         # TODO(ytsmiling) Fix RDB storage implementation to ensure unique `study_id`.
108 |         raise NotImplementedError
109 | 
110 |     @abc.abstractmethod
111 |     def delete_study(self, study_id: int) -> None:
112 |         """Delete a study.
113 | 
114 |         Args:
115 |             study_id:
116 |                 ID of the study.
117 | 
118 |         Raises:
119 |             :exc:`KeyError`:
120 |                 If no study with the matching ``study_id`` exists.
121 |         """
122 |         raise NotImplementedError
123 | 
124 |     @abc.abstractmethod
125 |     def set_study_user_attr(self, study_id: int, key: str, value: Any) -> None:
126 |         """Register a user-defined attribute to a study.
127 | 
128 |         This method overwrites any existing attribute.
129 | 
130 |         Args:
131 |             study_id:
132 |                 ID of the study.
133 |             key:
134 |                 Attribute key.
135 |             value:
136 |                 Attribute value. It should be JSON serializable.
137 | 
138 |         Raises:
139 |             :exc:`KeyError`:
140 |                 If no study with the matching ``study_id`` exists.
141 |         """
142 |         raise NotImplementedError
143 | 
144 |     @abc.abstractmethod
145 |     def set_study_system_attr(self, study_id: int, key: str, value: Any) -> None:
146 |         """Register an optuna-internal attribute to a study.
147 | 
148 |         This method overwrites any existing attribute.
149 | 
150 |         Args:
151 |             study_id:
152 |                 ID of the study.
153 |             key:
154 |                 Attribute key.
155 |             value:
156 |                 Attribute value. It should be JSON serializable.
157 | 
158 |         Raises:
159 |             :exc:`KeyError`:
160 |                 If no study with the matching ``study_id`` exists.
161 |         """
162 |         raise NotImplementedError
163 | 
164 |     @abc.abstractmethod
165 |     def set_study_direction(self, study_id: int, direction: study.StudyDirection) -> None:
166 |         """Register an optimization problem direction to a study.
167 | 
168 |         Args:
169 |             study_id:
170 |                 ID of the study.
171 |             direction:
172 |                 Either :obj:`~optuna.study.StudyDirection.MAXIMIZE` or
173 |                 :obj:`~optuna.study.StudyDirection.MINIMIZE`.
174 | 
175 |         Raises:
176 |             :exc:`KeyError`:
177 |                 If no study with the matching ``study_id`` exists.
178 |             :exc:`ValueError`:
179 |                 If the direction is already set and the passed ``direction`` is the opposite
180 |                 direction or :obj:`~optuna.study.StudyDirection.NOT_SET`.
181 |         """
182 |         raise NotImplementedError
183 | 
184 |     # Basic study access
185 | 
186 |     @abc.abstractmethod
187 |     def get_study_id_from_name(self, study_name: str) -> int:
188 |         """Read the ID of a study.
189 | 
190 |         Args:
191 |             study_name:
192 |                 Name of the study.
193 | 
194 |         Returns:
195 |             ID of the study.
196 | 
197 |         Raises:
198 |             :exc:`KeyError`:
199 |                 If no study with the matching ``study_name`` exists.
200 |         """
201 |         raise NotImplementedError
202 | 
203 |     @abc.abstractmethod
204 |     def get_study_id_from_trial_id(self, trial_id: int) -> int:
205 |         """Read the ID of a study to which a trial belongs.
206 | 
207 |         Args:
208 |             trial_id:
209 |                 ID of the trial.
210 | 
211 |         Returns:
212 |             ID of the study.
213 | 
214 |         Raises:
215 |             :exc:`KeyError`:
216 |                 If no trial with the matching ``trial_id`` exists.
217 |         """
218 |         raise NotImplementedError
219 | 
220 |     @abc.abstractmethod
221 |     def get_study_name_from_id(self, study_id: int) -> str:
222 |         """Read the study name of a study.
223 | 
224 |         Args:
225 |             study_id:
226 |                 ID of the study.
227 | 
228 |         Returns:
229 |             Name of the study.
230 | 
231 |         Raises:
232 |             :exc:`KeyError`:
233 |                 If no study with the matching ``study_id`` exists.
234 |         """
235 |         raise NotImplementedError
236 | 
237 |     @abc.abstractmethod
238 |     def get_study_direction(self, study_id: int) -> study.StudyDirection:
239 |         """Read whether a study maximizes or minimizes an objective.
240 | 
241 |         Args:
242 |             study_id:
243 |                 ID of a study.
244 | 
245 |         Returns:
246 |             Optimization direction of the study.
247 | 
248 |         Raises:
249 |             :exc:`KeyError`:
250 |                 If no study with the matching ``study_id`` exists.
251 |         """
252 |         raise NotImplementedError
253 | 
254 |     @abc.abstractmethod
255 |     def get_study_user_attrs(self, study_id: int) -> Dict[str, Any]:
256 |         """Read the user-defined attributes of a study.
257 | 
258 |         Args:
259 |             study_id:
260 |                 ID of the study.
261 | 
262 |         Returns:
263 |             Dictionary with the user attributes of the study.
264 | 
265 |         Raises:
266 |             :exc:`KeyError`:
267 |                 If no study with the matching ``study_id`` exists.
268 |         """
269 |         raise NotImplementedError
270 | 
271 |     @abc.abstractmethod
272 |     def get_study_system_attrs(self, study_id: int) -> Dict[str, Any]:
273 |         """Read the optuna-internal attributes of a study.
274 | 
275 |         Args:
276 |             study_id:
277 |                 ID of the study.
278 | 
279 |         Returns:
280 |             Dictionary with the optuna-internal attributes of the study.
281 | 
282 |         Raises:
283 |             :exc:`KeyError`:
284 |                 If no study with the matching ``study_id`` exists.
285 |         """
286 |         raise NotImplementedError
287 | 
288 |     # Basic trial manipulation
289 | 
290 |     @abc.abstractmethod
291 |     def create_new_trial(
292 |         self, study_id: int, template_trial: Optional["FrozenTrial"] = None
293 |     ) -> int:
294 |         """Create and add a new trial to a study.
295 | 
296 |         The returned trial ID is unique among all current and deleted trials.
297 | 
298 |         Args:
299 |             study_id:
300 |                 ID of the study.
301 |             template_trial:
302 |                 Template :class:`~optuna.trial.FronzenTrial` with default user-attributes,
303 |                 system-attributes, intermediate-values, and a state.
304 | 
305 |         Returns:
306 |             ID of the created trial.
307 | 
308 |         Raises:
309 |             :exc:`KeyError`:
310 |                 If no study with the matching ``study_id`` exists.
311 |         """
312 |         raise NotImplementedError
313 | 
314 |     @abc.abstractmethod
315 |     def set_trial_state(self, trial_id: int, state: TrialState) -> bool:
316 |         """Update the state of a trial.
317 | 
318 |         Args:
319 |             trial_id:
320 |                 ID of the trial.
321 |             state:
322 |                 New state of the trial.
323 | 
324 |         Returns:
325 |             :obj:`True` if the state is successfully updated.
326 |             :obj:`False` if the state is kept the same.
327 |             The latter happens when this method tries to update the state of
328 |             :obj:`~optuna.trial.TrialState.RUNNING` trial to
329 |             :obj:`~optuna.trial.TrialState.RUNNING`.
330 | 
331 |         Raises:
332 |             :exc:`KeyError`:
333 |                 If no trial with the matching ``trial_id`` exists.
334 |             :exc:`RuntimeError`:
335 |                 If the trial is already finished.
336 |         """
337 |         raise NotImplementedError
338 | 
339 |     @abc.abstractmethod
340 |     def set_trial_param(
341 |         self,
342 |         trial_id: int,
343 |         param_name: str,
344 |         param_value_internal: float,
345 |         distribution: "distributions.BaseDistribution",
346 |     ) -> bool:
347 |         """Add a parameter to a trial.
348 | 
349 |         Args:
350 |             trial_id:
351 |                 ID of the trial.
352 |             param_name:
353 |                 Name of the parameter.
354 |             param_value_internal:
355 |                 Internal representation of the parameter value.
356 |             distribution:
357 |                 Sampled distribution of the parameter.
358 | 
359 |         Returns:
360 |             :obj:`False` when the parameter is already set to the trial, :obj:`True` otherwise.
361 | 
362 |         Raises:
363 |             :exc:`KeyError`:
364 |                 If no trial with the matching ``trial_id`` exists.
365 |             :exc:`RuntimeError`:
366 |                 If the trial is already finished.
367 |         """
368 |         raise NotImplementedError
369 | 
370 |     @abc.abstractmethod
371 |     def get_trial_number_from_id(self, trial_id: int) -> int:
372 |         """Read the trial number of a trial.
373 | 
374 |         .. note::
375 | 
376 |             The trial number is only unique within a study, and is sequential.
377 | 
378 |         Args:
379 |             trial_id:
380 |                 ID of the trial.
381 | 
382 |         Returns:
383 |             Number of the trial.
384 | 
385 |         Raises:
386 |             :exc:`KeyError`:
387 |                 If no trial with the matching ``trial_id`` exists.
388 |         """
389 |         raise NotImplementedError
390 | 
391 |     @abc.abstractmethod
392 |     def get_trial_param(self, trial_id: int, param_name: str) -> float:
393 |         """Read the parameter of a trial.
394 | 
395 |         Args:
396 |             trial_id:
397 |                 ID of the trial.
398 |             param_name:
399 |                 Name of the parameter.
400 | 
401 |         Returns:
402 |             Internal representation of the parameter.
403 | 
404 |         Raises:
405 |             :exc:`KeyError`:
406 |                 If no trial with the matching ``trial_id`` exists.
407 |                 If no such parameter exists.
408 |         """
409 |         raise NotImplementedError
410 | 
411 |     @abc.abstractmethod
412 |     def set_trial_value(self, trial_id: int, value: float) -> None:
413 |         """Set a return value of an objective function.
414 | 
415 |         This method overwrites any existing trial value.
416 | 
417 |         Args:
418 |             trial_id:
419 |                 ID of the trial.
420 |             value:
421 |                 Value of the objective function.
422 | 
423 |         Raises:
424 |             :exc:`KeyError`:
425 |                 If no trial with the matching ``trial_id`` exists.
426 |             :exc:`RuntimeError`:
427 |                 If the trial is already finished.
428 |         """
429 |         raise NotImplementedError
430 | 
431 |     @abc.abstractmethod
432 |     def set_trial_intermediate_value(
433 |         self, trial_id: int, step: int, intermediate_value: float
434 |     ) -> bool:
435 |         """Report an intermediate value of an objective function.
436 | 
437 |         Args:
438 |             trial_id:
439 |                 ID of the trial.
440 |             step:
441 |                 Step of the trial (e.g., the epoch when training a neural network).
442 |             intermediate_value:
443 |                 Intermediate value corresponding to the step.
444 | 
445 |         Returns:
446 |             :obj:`False` when the step is already set, :obj:`True` otherwise.
447 | 
448 |         Raises:
449 |             :exc:`KeyError`:
450 |                 If no trial with the matching ``trial_id`` exists.
451 |             :exc:`RuntimeError`:
452 |                 If the trial is already finished.
453 |         """
454 |         raise NotImplementedError
455 | 
456 |     @abc.abstractmethod
457 |     def set_trial_user_attr(self, trial_id: int, key: str, value: Any) -> None:
458 |         """Set a user-defined attribute to a trial.
459 | 
460 |         This method overwrites any existing attribute.
461 | 
462 |         Args:
463 |             trial_id:
464 |                 ID of the trial.
465 |             key:
466 |                 Attribute key.
467 |             value:
468 |                 Attribute value. It should be JSON serializable.
469 | 
470 |         Raises:
471 |             :exc:`KeyError`:
472 |                 If no trial with the matching ``trial_id`` exists.
473 |             :exc:`RuntimeError`:
474 |                 If the trial is already finished.
475 |         """
476 |         raise NotImplementedError
477 | 
478 |     @abc.abstractmethod
479 |     def set_trial_system_attr(self, trial_id: int, key: str, value: Any) -> None:
480 |         """Set an optuna-internal attribute to a trial.
481 | 
482 |         This method overwrites any existing attribute.
483 | 
484 |         Args:
485 |             trial_id:
486 |                 ID of the trial.
487 |             key:
488 |                 Attribute key.
489 |             value:
490 |                 Attribute value. It should be JSON serializable.
491 | 
492 |         Raises:
493 |             :exc:`KeyError`:
494 |                 If no trial with the matching ``trial_id`` exists.
495 |             :exc:`RuntimeError`:
496 |                 If the trial is already finished.
497 |         """
498 |         raise NotImplementedError
499 | 
500 |     # Basic trial access
501 | 
502 |     @abc.abstractmethod
503 |     def get_trial(self, trial_id: int) -> "FrozenTrial":
504 |         """Read a trial.
505 | 
506 |         Args:
507 |             trial_id:
508 |                 ID of the trial.
509 | 
510 |         Returns:
511 |             Trial with a matching trial ID.
512 | 
513 |         Raises:
514 |             :exc:`KeyError`:
515 |                 If no trial with the matching ``trial_id`` exists.
516 |         """
517 |         raise NotImplementedError
518 | 
519 |     @abc.abstractmethod
520 |     def get_all_trials(self, study_id: int, deepcopy: bool = True) -> List["FrozenTrial"]:
521 |         """Read all trials in a study.
522 | 
523 |         Args:
524 |             study_id:
525 |                 ID of the study.
526 |             deepcopy:
527 |                 Whether to copy the list of trials before returning.
528 |                 Set to :obj:`True` if you intend to update the list or elements of the list.
529 | 
530 |         Returns:
531 |             List of trials in the study.
532 | 
533 |         Raises:
534 |             :exc:`KeyError`:
535 |                 If no study with the matching ``study_id`` exists.
536 |         """
537 |         raise NotImplementedError
538 | 
539 |     @abc.abstractmethod
540 |     def get_n_trials(self, study_id: int, state: Optional[TrialState] = None) -> int:
541 |         """Count the number of trials in a study.
542 | 
543 |         Args:
544 |             study_id:
545 |                 ID of the study.
546 |             state:
547 |                 :class:`~optuna.trial.TrialState` to filter trials.
548 | 
549 |         Returns:
550 |             Number of trials in the study.
551 | 
552 |         Raises:
553 |             :exc:`KeyError`:
554 |                 If no study with the matching ``study_id`` exists.
555 |         """
556 |         raise NotImplementedError
557 | 
558 |     def get_best_trial(self, study_id: int) -> "FrozenTrial":
559 |         """Return the trial with the best value in a study.
560 | 
561 |         Args:
562 |             study_id:
563 |                 ID of the study.
564 | 
565 |         Returns:
566 |             The trial with the best objective value among all finished trials in the study.
567 | 
568 |         Raises:
569 |             :exc:`KeyError`:
570 |                 If no study with the matching ``study_id`` exists.
571 |             :exc:`RuntimeError`:
572 |                 If no trials have been completed.
573 |         """
574 |         all_trials = self.get_all_trials(study_id, deepcopy=False)
575 |         all_trials = [t for t in all_trials if t.state is TrialState.COMPLETE]
576 | 
577 |         if len(all_trials) == 0:
578 |             raise ValueError("No trials are completed yet.")
579 | 
580 |         if self.get_study_direction(study_id) == study.StudyDirection.MAXIMIZE:
581 |             best_trial = max(all_trials, key=lambda t: t.value)
582 |         else:
583 |             best_trial = min(all_trials, key=lambda t: t.value)
584 | 
585 |         return copy.deepcopy(best_trial)
586 | 
587 |     def get_trial_params(self, trial_id: int) -> Dict[str, Any]:
588 |         """Read the parameter dictionary of a trial.
589 | 
590 |         Args:
591 |             trial_id:
592 |                 ID of the trial.
593 | 
594 |         Returns:
595 |             Dictionary of a parameters. Keys are parameter names and values are internal
596 |             representations of the parameter values.
597 | 
598 |         Raises:
599 |             :exc:`KeyError`:
600 |                 If no trial with the matching ``trial_id`` exists.
601 |         """
602 |         return self.get_trial(trial_id).params
603 | 
604 |     def get_trial_user_attrs(self, trial_id: int) -> Dict[str, Any]:
605 |         """Read the user-defined attributes of a trial.
606 | 
607 |         Args:
608 |             trial_id:
609 |                 ID of the trial.
610 | 
611 |         Returns:
612 |             Dictionary with the user-defined attributes of the trial.
613 | 
614 |         Raises:
615 |             :exc:`KeyError`:
616 |                 If no trial with the matching ``trial_id`` exists.
617 |         """
618 |         return self.get_trial(trial_id).user_attrs
619 | 
620 |     def get_trial_system_attrs(self, trial_id: int) -> Dict[str, Any]:
621 |         """Read the optuna-internal attributes of a trial.
622 | 
623 |         Args:
624 |             trial_id:
625 |                 ID of the trial.
626 | 
627 |         Returns:
628 |             Dictionary with the optuna-internal attributes of the trial.
629 | 
630 |         Raises:
631 |             :exc:`KeyError`:
632 |                 If no trial with the matching ``trial_id`` exists.
633 |         """
634 |         return self.get_trial(trial_id).system_attrs
635 | 
636 |     def remove_session(self) -> None:
637 |         """Clean up all connections to a database."""
638 |         pass
639 | 
640 |     def check_trial_is_updatable(self, trial_id: int, trial_state: TrialState) -> None:
641 |         """Check whether a trial state is updatable.
642 | 
643 |         Args:
644 |             trial_id:
645 |                 ID of the trial.
646 |                 Only used for an error message.
647 |             trial_state:
648 |                 Trial state to check.
649 | 
650 |         Raises:
651 |             :exc:`RuntimeError`:
652 |                 If the trial is already finished.
653 |         """
654 |         if trial_state.is_finished():
655 |             trial = self.get_trial(trial_id)
656 |             raise RuntimeError(
657 |                 "Trial#{} has already finished and can not be updated.".format(trial.number)
658 |             )
659 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/study.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import datetime
  3 | import gc
  4 | import math
  5 | import threading
  6 | import warnings
  7 | 
  8 | 
  9 | import numpy as np
 10 | import pandas as pd  # NOQA
 11 | 
 12 | 
 13 | # from optuna._study_direction import StudyDirection
 14 | # from optuna import exceptions
 15 | # from optuna import logging
 16 | # from optuna import progress_bar as pbar_module
 17 | # from optuna import pruners
 18 | # from optuna import samplers
 19 | # from optuna import storages
 20 | # from optuna import trial as trial_module
 21 | # from optuna.trial import FrozenTrial
 22 | # from optuna.trial import TrialState
 23 | 
 24 | from ._study_direction import StudyDirection
 25 | from . import exceptions
 26 | from . import logging
 27 | from . import progress_bar as pbar_module
 28 | from . import pruners
 29 | from . import samplers
 30 | from . import storages
 31 | from . import trial as trial_module
 32 | from .trial import FrozenTrial
 33 | from .trial import TrialState
 34 | 
 35 | 
 36 | _logger = logging.get_logger(__name__)
 37 | 
 38 | 
 39 | class BaseStudy(object):
 40 |     def __init__(self, study_id, storage):
 41 |         # type: (int, storages.BaseStorage) -> None
 42 | 
 43 |         self._study_id = study_id
 44 |         self._storage = storage
 45 | 
 46 |     @property
 47 |     def best_params(self):
 48 |         # type: () -> Dict[str, Any]
 49 |         """Return parameters of the best trial in the study.
 50 | 
 51 |         Returns:
 52 |             A dictionary containing parameters of the best trial.
 53 |         """
 54 | 
 55 |         return self.best_trial.params
 56 | 
 57 |     @property
 58 |     def best_value(self):
 59 |         # type: () -> float
 60 |         """Return the best objective value in the study.
 61 | 
 62 |         Returns:
 63 |             A float representing the best objective value.
 64 |         """
 65 | 
 66 |         best_value = self.best_trial.value
 67 |         assert best_value is not None
 68 | 
 69 |         return best_value
 70 | 
 71 |     @property
 72 |     def best_trial(self):
 73 |         # type: () -> FrozenTrial
 74 |         """Return the best trial in the study.
 75 | 
 76 |         Returns:
 77 |             A :class:`~optuna.FrozenTrial` object of the best trial.
 78 |         """
 79 | 
 80 |         return self._storage.get_best_trial(self._study_id)
 81 | 
 82 |     @property
 83 |     def direction(self):
 84 |         # type: () -> StudyDirection
 85 |         """Return the direction of the study.
 86 | 
 87 |         Returns:
 88 |             A :class:`~optuna.study.StudyDirection` object.
 89 |         """
 90 | 
 91 |         return self._storage.get_study_direction(self._study_id)
 92 | 
 93 |     @property
 94 |     def trials(self):
 95 |         # type: () -> List[FrozenTrial]
 96 |         """Return all trials in the study.
 97 | 
 98 |         The returned trials are ordered by trial number.
 99 | 
100 |         This is a short form of ``self.get_trials(deepcopy=True)``.
101 | 
102 |         Returns:
103 |             A list of :class:`~optuna.FrozenTrial` objects.
104 |         """
105 | 
106 |         return self.get_trials()
107 | 
108 |     def get_trials(self, deepcopy=True):
109 |         # type: (bool) -> List[FrozenTrial]
110 |         """Return all trials in the study.
111 | 
112 |         The returned trials are ordered by trial number.
113 | 
114 |         For library users, it's recommended to use more handy
115 |         :attr:`~optuna.study.Study.trials` property to get the trials instead.
116 | 
117 |         Args:
118 |             deepcopy:
119 |                 Flag to control whether to apply ``copy.deepcopy()`` to the trials.
120 |                 Note that if you set the flag to :obj:`False`, you shouldn't mutate
121 |                 any fields of the returned trial. Otherwise the internal state of
122 |                 the study may corrupt and unexpected behavior may happen.
123 | 
124 |         Returns:
125 |             A list of :class:`~optuna.FrozenTrial` objects.
126 |         """
127 | 
128 |         return self._storage.get_all_trials(self._study_id, deepcopy=deepcopy)
129 | 
130 |     @property
131 |     def storage(self):
132 |         # type: () -> storages.BaseStorage
133 |         """Return the storage object used by the study.
134 | 
135 |         .. deprecated:: 0.15.0
136 |             The direct use of storage is deprecated.
137 |             Please access to storage via study's public methods
138 |             (e.g., :meth:`~optuna.study.Study.set_user_attr`).
139 | 
140 |         Returns:
141 |             A storage object.
142 |         """
143 | 
144 |         warnings.warn(
145 |             "The direct use of storage is deprecated. "
146 |             "Please access to storage via study's public methods "
147 |             "(e.g., `Study.set_user_attr`)",
148 |             DeprecationWarning,
149 |         )
150 | 
151 |         _logger.warning(
152 |             "The direct use of storage is deprecated. "
153 |             "Please access to storage via study's public methods "
154 |             "(e.g., `Study.set_user_attr`)"
155 |         )
156 | 
157 |         return self._storage
158 | 
159 | 
160 | class Study(BaseStudy):
161 |     """A study corresponds to an optimization task, i.e., a set of trials.
162 | 
163 |     This object provides interfaces to run a new :class:`~optuna.trial.Trial`, access trials'
164 |     history, set/get user-defined attributes of the study itself.
165 | 
166 |     Note that the direct use of this constructor is not recommended.
167 |     To create and load a study, please refer to the documentation of
168 |     :func:`~optuna.study.create_study` and :func:`~optuna.study.load_study` respectively.
169 | 
170 |     """
171 | 
172 |     def __init__(
173 |         self,
174 |         study_name,  # type: str
175 |         storage,  # type: Union[str, storages.BaseStorage]
176 |         sampler=None,  # type: samplers.BaseSampler
177 |         pruner=None,  # type: pruners.BasePruner
178 |         seed=None,
179 |         cat_preds=None,
180 |     ):
181 |         # type: (...) -> None
182 |         self.add_on = 0
183 | 
184 |         self.study_name = study_name
185 |         storage = storages.get_storage(storage)
186 |         study_id = storage.get_study_id_from_name(study_name)
187 |         super(Study, self).__init__(study_id, storage)
188 | 
189 |         # use TPE sampler
190 |         self.sampler = sampler or samplers.TPESampler()
191 |         # don't use prunning
192 |         self.pruner = pruner or pruners.NopPruner()
193 | 
194 |         self._optimize_lock = threading.Lock()
195 |         self._stop_flag = False
196 | 
197 |         self.evaled = set()
198 |         self.rnd = np.random.RandomState(seed=seed)
199 | 
200 |         self.cat_preds = cat_preds
201 |         try:
202 |             self.cat_preds_set = set(cat_preds.values())
203 |         except:
204 |             pass
205 | 
206 |         self.info = {}
207 |         self.info["names"] = []
208 | 
209 |     def __getstate__(self):
210 |         # type: () -> Dict[Any, Any]
211 | 
212 |         state = self.__dict__.copy()
213 |         del state["_optimize_lock"]
214 |         return state
215 | 
216 |     def __setstate__(self, state):
217 |         # type: (Dict[Any, Any]) -> None
218 | 
219 |         self.__dict__.update(state)
220 |         self._optimize_lock = threading.Lock()
221 | 
222 |     @property
223 |     def user_attrs(self):
224 |         # type: () -> Dict[str, Any]
225 |         """Return user attributes.
226 | 
227 |         Returns:
228 |             A dictionary containing all user attributes.
229 |         """
230 | 
231 |         return self._storage.get_study_user_attrs(self._study_id)
232 | 
233 |     @property
234 |     def system_attrs(self):
235 |         # type: () -> Dict[str, Any]
236 |         """Return system attributes.
237 | 
238 |         Returns:
239 |             A dictionary containing all system attributes.
240 |         """
241 | 
242 |         return self._storage.get_study_system_attrs(self._study_id)
243 | 
244 |     def optimize(
245 |         self,
246 |         func,  # type: ObjectiveFuncType
247 |         n_trials=None,  # type: Optional[int]
248 |         timeout=None,  # type: Optional[float]
249 |         n_jobs=1,  # type: int
250 |         catch=(),  # type: Union[Tuple[()], Tuple[Type[Exception]]]
251 |         callbacks=None,  # type: Optional[List[Callable[[Study, FrozenTrial], None]]]
252 |         gc_after_trial=True,  # type: bool
253 |         show_progress_bar=False,  # type: bool
254 |         **kwargs,
255 |     ):
256 |         # type: (...) -> None
257 |         """Optimize an objective function.
258 | 
259 |         Optimization is done by choosing a suitable set of hyperparameter values from a given
260 |         range. Uses a sampler which implements the task of value suggestion based on a specified
261 |         distribution. The sampler is specified in :func:`~optuna.study.create_study` and the
262 |         default choice for the sampler is TPE.
263 |         See also :class:`~optuna.samplers.TPESampler` for more details on 'TPE'.
264 | 
265 |         Args:
266 |             func:
267 |                 A callable that implements objective function.
268 |             n_trials:
269 |                 The number of trials. If this argument is set to :obj:`None`, there is no
270 |                 limitation on the number of trials. If :obj:`timeout` is also set to :obj:`None`,
271 |                 the study continues to create trials until it receives a termination signal such
272 |                 as Ctrl+C or SIGTERM.
273 |             timeout:
274 |                 Stop study after the given number of second(s). If this argument is set to
275 |                 :obj:`None`, the study is executed without time limitation. If :obj:`n_trials` is
276 |                 also set to :obj:`None`, the study continues to create trials until it receives a
277 |                 termination signal such as Ctrl+C or SIGTERM.
278 |             n_jobs:
279 |                 The number of parallel jobs. If this argument is set to :obj:`-1`, the number is
280 |                 set to CPU count.
281 |             catch:
282 |                 A study continues to run even when a trial raises one of the exceptions specified
283 |                 in this argument. Default is an empty tuple, i.e. the study will stop for any
284 |                 exception except for :class:`~optuna.exceptions.TrialPruned`.
285 |             callbacks:
286 |                 List of callback functions that are invoked at the end of each trial. Each function
287 |                 must accept two parameters with the following types in this order:
288 |                 :class:`~optuna.study.Study` and :class:`~optuna.FrozenTrial`.
289 |             gc_after_trial:
290 |                 Flag to execute garbage collection at the end of each trial. By default, garbage
291 |                 collection is enabled, just in case. You can turn it off with this argument if
292 |                 memory is safely managed in your objective function.
293 |             show_progress_bar:
294 |                 Flag to show progress bars or not. To disable progress bar, set this ``False``.
295 |                 Currently, progress bar is experimental feature and disabled
296 |                 when ``n_jobs`` :math:`\\ne 1`.
297 |         """
298 | 
299 |         # self._progress_bar = pbar_module._ProgressBar(
300 |         #     show_progress_bar and n_jobs == 1, n_trials, timeout
301 |         # )
302 | 
303 |         self._stop_flag = False
304 | 
305 |         # optimize one iteration at a time
306 |         self._optimize_sequential(func, n_trials, timeout, catch, callbacks, gc_after_trial, None, **kwargs)
307 | 
308 |         # self._progress_bar.close()
309 |         # del self._progress_bar
310 | 
311 |     def set_user_attr(self, key, value):
312 |         # type: (str, Any) -> None
313 |         """Set a user attribute to the study.
314 | 
315 |         Args:
316 |             key: A key string of the attribute.
317 |             value: A value of the attribute. The value should be JSON serializable.
318 | 
319 |         """
320 | 
321 |         self._storage.set_study_user_attr(self._study_id, key, value)
322 | 
323 |     def set_system_attr(self, key, value):
324 |         # type: (str, Any) -> None
325 |         """Set a system attribute to the study.
326 | 
327 |         Note that Optuna internally uses this method to save system messages. Please use
328 |         :func:`~optuna.study.Study.set_user_attr` to set users' attributes.
329 | 
330 |         Args:
331 |             key: A key string of the attribute.
332 |             value: A value of the attribute. The value should be JSON serializable.
333 | 
334 |         """
335 | 
336 |         self._storage.set_study_system_attr(self._study_id, key, value)
337 | 
338 |     def trials_dataframe(
339 |         self,
340 |         attrs=(
341 |             "number",
342 |             "value",
343 |             "datetime_start",
344 |             "datetime_complete",
345 |             "duration",
346 |             "params",
347 |             "user_attrs",
348 |             "system_attrs",
349 |             "state",
350 |         ),  # type: Tuple[str, ...]
351 |         multi_index=False,  # type: bool
352 |     ):
353 |         # type: (...) -> pd.DataFrame
354 |         """Export trials as a pandas DataFrame_.
355 | 
356 |         The DataFrame_ provides various features to analyze studies. It is also useful to draw a
357 |         histogram of objective values and to export trials as a CSV file.
358 |         If there are no trials, an empty DataFrame_ is returned.
359 | 
360 |         Example:
361 | 
362 |             .. testcode::
363 | 
364 |                 import optuna
365 |                 import pandas
366 | 
367 |                 def objective(trial):
368 |                     x = trial.suggest_uniform('x', -1, 1)
369 |                     return x ** 2
370 | 
371 |                 study = optuna.create_study()
372 |                 study.optimize(objective, n_trials=3)
373 | 
374 |                 # Create a dataframe from the study.
375 |                 df = study.trials_dataframe()
376 |                 assert isinstance(df, pandas.DataFrame)
377 |                 assert df.shape[0] == 3  # n_trials.
378 | 
379 |         Args:
380 |             attrs:
381 |                 Specifies field names of :class:`~optuna.FrozenTrial` to include them to a
382 |                 DataFrame of trials.
383 |             multi_index:
384 |                 Specifies whether the returned DataFrame_ employs MultiIndex_ or not. Columns that
385 |                 are hierarchical by nature such as ``(params, x)`` will be flattened to
386 |                 ``params_x`` when set to :obj:`False`.
387 | 
388 |         Returns:
389 |             A pandas DataFrame_ of trials in the :class:`~optuna.study.Study`.
390 | 
391 |         .. _DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
392 |         .. _MultiIndex: https://pandas.pydata.org/pandas-docs/stable/advanced.html
393 |         """
394 | 
395 |         trials = self.get_trials(deepcopy=False)
396 | 
397 |         # If no trials, return an empty dataframe.
398 |         if not len(trials):
399 |             return pd.DataFrame()
400 | 
401 |         assert all(isinstance(trial, FrozenTrial) for trial in trials)
402 |         attrs_to_df_columns = collections.OrderedDict()  # type: Dict[str, str]
403 |         for attr in attrs:
404 |             if attr.startswith("_"):
405 |                 # Python conventional underscores are omitted in the dataframe.
406 |                 df_column = attr[1:]
407 |             else:
408 |                 df_column = attr
409 |             attrs_to_df_columns[attr] = df_column
410 | 
411 |         # column_agg is an aggregator of column names.
412 |         # Keys of column agg are attributes of `FrozenTrial` such as 'trial_id' and 'params'.
413 |         # Values are dataframe columns such as ('trial_id', '') and ('params', 'n_layers').
414 |         column_agg = collections.defaultdict(set)  # type: Dict[str, Set]
415 |         non_nested_attr = ""
416 | 
417 |         def _create_record_and_aggregate_column(trial):
418 |             # type: (FrozenTrial) -> Dict[Tuple[str, str], Any]
419 | 
420 |             record = {}
421 |             for attr, df_column in attrs_to_df_columns.items():
422 |                 value = getattr(trial, attr)
423 |                 if isinstance(value, TrialState):
424 |                     # Convert TrialState to str and remove the common prefix.
425 |                     value = str(value).split(".")[-1]
426 |                 if isinstance(value, dict):
427 |                     for nested_attr, nested_value in value.items():
428 |                         record[(df_column, nested_attr)] = nested_value
429 |                         column_agg[attr].add((df_column, nested_attr))
430 |                 else:
431 |                     record[(df_column, non_nested_attr)] = value
432 |                     column_agg[attr].add((df_column, non_nested_attr))
433 |             return record
434 | 
435 |         records = list([_create_record_and_aggregate_column(trial) for trial in trials])
436 | 
437 |         columns = sum(
438 |             (sorted(column_agg[k]) for k in attrs if k in column_agg), []
439 |         )  # type: List[Tuple[str, str]]
440 | 
441 |         df = pd.DataFrame(records, columns=pd.MultiIndex.from_tuples(columns))
442 | 
443 |         if not multi_index:
444 |             # Flatten the `MultiIndex` columns where names are concatenated with underscores.
445 |             # Filtering is required to omit non-nested columns avoiding unwanted trailing
446 |             # underscores.
447 |             df.columns = [
448 |                 "_".join(filter(lambda c: c, map(lambda c: str(c), col))) for col in columns
449 |             ]
450 | 
451 |         return df
452 | 
453 |     def _optimize_sequential(
454 |         self,
455 |         func,  # type: ObjectiveFuncType
456 |         n_trials,  # type: Optional[int]
457 |         timeout,  # type: Optional[float]
458 |         catch,  # type: Union[Tuple[()], Tuple[Type[Exception]]]
459 |         callbacks,  # type: Optional[List[Callable[[Study, FrozenTrial], None]]]
460 |         gc_after_trial,  # type: bool
461 |         time_start,  # type: Optional[datetime.datetime]
462 |         **kwargs,
463 |     ):
464 |         # type: (...) -> None
465 | 
466 |         # trial counter
467 |         i_trial = 0
468 | 
469 |         # timer
470 |         if time_start is None:
471 |             time_start = datetime.datetime.now()
472 | 
473 |         while True:
474 |             if self._stop_flag:
475 |                 break
476 | 
477 |             # check number of trials
478 |             if n_trials is not None:
479 |                 if i_trial >= n_trials:
480 |                     break
481 |                 i_trial += 1
482 | 
483 |             # check if alloted time has expired
484 |             if timeout is not None:
485 |                 elapsed_seconds = (datetime.datetime.now() - time_start).total_seconds()
486 |                 if elapsed_seconds - self.add_on >= timeout:
487 |                     break
488 | 
489 |             self.info["names"] = []
490 |             self._run_trial(func, catch, gc_after_trial, **kwargs)
491 | 
492 |             # self._progress_bar.update((datetime.datetime.now() - time_start).total_seconds())
493 | 
494 |         self._storage.remove_session()
495 | 
496 |     def _run_trial(
497 |         self,
498 |         func,  # type: ObjectiveFuncType
499 |         catch,  # type: Union[Tuple[()], Tuple[Type[Exception]]]
500 |         gc_after_trial,  # type: bool
501 |         **kwargs,
502 |     ):
503 |         # type: (...) -> trial_module.Trial
504 | 
505 |         # trial_id enumerates the trials 0, 1, 2, ...
506 |         trial_id = self._storage.create_new_trial(self._study_id)
507 |         # create a new trial for this study (in file _trial.py)
508 |         trial = trial_module.Trial(self, trial_id)
509 |         # trial number is 0, 1, 2, ...
510 |         trial_number = trial.number
511 | 
512 |         # evaluate the objective function
513 |         result = func(trial, **kwargs)
514 | 
515 |         # The following line mitigates memory problems that can be occurred in some
516 |         # environments (e.g., services that use computing containers such as CircleCI).
517 |         if gc_after_trial:
518 |             gc.collect()
519 | 
520 |         # return a float or TrialState.FAIL
521 |         try:
522 |             result = float(result)
523 |         except (
524 |             ValueError,
525 |             TypeError,
526 |         ):
527 |             message = (
528 |                 "Setting status of trial#{} as {} because the returned value from the "
529 |                 "objective function cannot be casted to float. Returned value is: "
530 |                 "{}".format(trial_number, TrialState.FAIL, repr(result))
531 |             )
532 |             _logger.warning(message)
533 |             self._storage.set_trial_system_attr(trial_id, "fail_reason", message)
534 |             self._storage.set_trial_state(trial_id, TrialState.FAIL)
535 |             return trial
536 | 
537 |         if math.isnan(result):
538 |             message = (
539 |                 "Setting status of trial#{} as {} because the objective function "
540 |                 "returned {}.".format(trial_number, TrialState.FAIL, result)
541 |             )
542 |             _logger.warning(message)
543 |             self._storage.set_trial_system_attr(trial_id, "fail_reason", message)
544 |             self._storage.set_trial_state(trial_id, TrialState.FAIL)
545 |             return trial
546 | 
547 |         # log results
548 |         self._storage.set_trial_value(trial_id, result)
549 |         self._storage.set_trial_state(trial_id, TrialState.COMPLETE)
550 |         # self._log_completed_trial(trial, result)
551 | 
552 |         return trial
553 | 
554 |     def _log_completed_trial(self, trial, result):
555 |         # type: (trial_module.Trial, float) -> None
556 | 
557 |         _logger.info(
558 |             "Finished trial#{} with value: {} with parameters: {}. "
559 |             "Best is trial#{} with value: {}.".format(
560 |                 trial.number, result, trial.params, self.best_trial.number, self.best_value
561 |             )
562 |         )
563 | 
564 | 
565 | def create_study(
566 |     storage=None,  # type: Union[None, str, storages.BaseStorage]
567 |     sampler=None,  # type: samplers.BaseSampler
568 |     pruner=None,  # type: pruners.BasePruner
569 |     direction="minimize",  # type: str
570 |     load_if_exists=False,  # type: bool
571 |     seed=None,
572 |     cat_preds=None,
573 | ):
574 |     # type: (...) -> Study
575 |     """Create a new :class:`~optuna.study.Study`.
576 | 
577 |     Args:
578 |         storage:
579 |             Database URL. If this argument is set to None, in-memory storage is used, and the
580 |             :class:`~optuna.study.Study` will not be persistent.
581 | 
582 |             .. note::
583 |                 When a database URL is passed, Optuna internally uses `SQLAlchemy`_ to handle
584 |                 the database. Please refer to `SQLAlchemy's document`_ for further details.
585 |                 If you want to specify non-default options to `SQLAlchemy Engine`_, you can
586 |                 instantiate :class:`~optuna.storages.RDBStorage` with your desired options and
587 |                 pass it to the ``storage`` argument instead of a URL.
588 | 
589 |              .. _SQLAlchemy: https://www.sqlalchemy.org/
590 |              .. _SQLAlchemy's document:
591 |                  https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
592 |              .. _SQLAlchemy Engine: https://docs.sqlalchemy.org/en/latest/core/engines.html
593 | 
594 |         sampler:
595 |             A sampler object that implements background algorithm for value suggestion.
596 |             If :obj:`None` is specified, :class:`~optuna.samplers.TPESampler` is used
597 |             as the default. See also :class:`~optuna.samplers`.
598 |         pruner:
599 |             A pruner object that decides early stopping of unpromising trials. See also
600 |             :class:`~optuna.pruners`.
601 |         direction:
602 |             Direction of optimization. Set ``minimize`` for minimization and ``maximize`` for
603 |             maximization.
604 |         load_if_exists:
605 |             Flag to control the behavior to handle a conflict of study names.
606 |             In the case where a study named ``study_name`` already exists in the ``storage``,
607 |             a :class:`~optuna.exceptions.DuplicatedStudyError` is raised if ``load_if_exists`` is
608 |             set to :obj:`False`.
609 |             Otherwise, the creation of the study is skipped, and the existing one is returned.
610 | 
611 |     Returns:
612 |         A :class:`~optuna.study.Study` object.
613 | 
614 |     """
615 | 
616 |     # in memory or dbms (we will use only in memory data?)
617 |     storage = storages.get_storage(storage)
618 | 
619 |     # study_id in our case is always 0, method in in_memory.py
620 |     study_id = storage.create_new_study(None)
621 | 
622 |     # random string starting with "no-name"
623 |     study_name = storage.get_study_name_from_id(study_id)
624 | 
625 |     # study seesion
626 |     study = Study(
627 |         study_name=study_name,
628 |         storage=storage,
629 |         sampler=sampler,
630 |         pruner=pruner,
631 |         seed=seed,
632 |         cat_preds=cat_preds,
633 |     )
634 | 
635 |     if direction == "minimize":
636 |         _direction = StudyDirection.MINIMIZE
637 |     elif direction == "maximize":
638 |         _direction = StudyDirection.MAXIMIZE
639 |     else:
640 |         raise ValueError("Please set either 'minimize' or 'maximize' to direction.")
641 | 
642 |     # set the study direction to be minimize or maximize
643 |     study._storage.set_study_direction(study_id, _direction)
644 | 
645 |     return study
646 | 


--------------------------------------------------------------------------------
/boexplain/optuna/optuna/samplers/tpe/sampler.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import scipy.special
  5 | from scipy.stats import truncnorm
  6 | 
  7 | # from optuna import distributions
  8 | # from optuna.samplers import base
  9 | # from optuna.samplers import random
 10 | # from optuna.samplers.tpe.parzen_estimator import _ParzenEstimator
 11 | # from optuna.samplers.tpe.parzen_estimator import _ParzenEstimatorParameters
 12 | # from optuna.study import StudyDirection
 13 | # from optuna.trial import TrialState
 14 | from ... import distributions
 15 | from ...samplers import base
 16 | from ...samplers import random
 17 | from ...samplers.tpe.parzen_estimator import _ParzenEstimator
 18 | from ...samplers.tpe.parzen_estimator import _ParzenEstimatorParameters
 19 | from ...study import StudyDirection
 20 | from ...trial import TrialState
 21 | 
 22 | EPS = 1e-12
 23 | 
 24 | 
 25 | def default_gamma(x):
 26 |     # type: (int) -> int
 27 | 
 28 |     return min(int(np.ceil(0.1 * x)), 25)
 29 | 
 30 | 
 31 | def hyperopt_default_gamma(x):
 32 |     # type: (int) -> int
 33 | 
 34 |     return min(int(np.ceil(0.25 * np.sqrt(x))), 25)
 35 | 
 36 | 
 37 | def default_weights(x):
 38 |     # type: (int) -> np.ndarray
 39 | 
 40 |     if x == 0:
 41 |         return np.asarray([])
 42 |     elif x < 25:
 43 |         return np.ones(x)
 44 |     else:
 45 |         ramp = np.linspace(1.0 / x, 1.0, num=x - 25)
 46 |         flat = np.ones(25)
 47 |         return np.concatenate([ramp, flat], axis=0)
 48 | 
 49 | 
 50 | class TPESampler(base.BaseSampler):
 51 |     """Sampler using TPE (Tree-structured Parzen Estimator) algorithm.
 52 | 
 53 |     This sampler is based on *independent sampling*.
 54 |     See also :class:`~optuna.samplers.BaseSampler` for more details of 'independent sampling'.
 55 | 
 56 |     On each trial, for each parameter, TPE fits one Gaussian Mixture Model (GMM) ``l(x)`` to
 57 |     the set of parameter values associated with the best objective values, and another GMM
 58 |     ``g(x)`` to the remaining parameter values. It chooses the parameter value ``x`` that
 59 |     maximizes the ratio ``l(x)/g(x)``.
 60 | 
 61 |     For further information about TPE algorithm, please refer to the following papers:
 62 | 
 63 |     - `Algorithms for Hyper-Parameter Optimization
 64 |       <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`_
 65 |     - `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of
 66 |       Dimensions for Vision Architectures <http://proceedings.mlr.press/v28/bergstra13.pdf>`_
 67 | 
 68 |     Example:
 69 | 
 70 |         .. testcode::
 71 | 
 72 |             import optuna
 73 |             from optuna.samplers import TPESampler
 74 | 
 75 |             def objective(trial):
 76 |                 x = trial.suggest_uniform('x', -10, 10)
 77 |                 return x**2
 78 | 
 79 |             study = optuna.create_study(sampler=TPESampler())
 80 |             study.optimize(objective, n_trials=10)
 81 | 
 82 |     Args:
 83 |         consider_prior:
 84 |             Enhance the stability of Parzen estimator by imposing a Gaussian prior when
 85 |             :obj:`True`. The prior is only effective if the sampling distribution is
 86 |             either :class:`~optuna.distributions.UniformDistribution`,
 87 |             :class:`~optuna.distributions.DiscreteUniformDistribution`,
 88 |             :class:`~optuna.distributions.LogUniformDistribution`,
 89 |             :class:`~optuna.distributions.IntUniformDistribution`,
 90 |             or :class:`~optuna.distributions.IntLogUniformDistribution`.
 91 |         prior_weight:
 92 |             The weight of the prior. This argument is used in
 93 |             :class:`~optuna.distributions.UniformDistribution`,
 94 |             :class:`~optuna.distributions.DiscreteUniformDistribution`,
 95 |             :class:`~optuna.distributions.LogUniformDistribution`,
 96 |             :class:`~optuna.distributions.IntUniformDistribution`,
 97 |             :class:`~optuna.distributions.IntLogUniformDistribution`, and
 98 |             :class:`~optuna.distributions.CategoricalDistribution`.
 99 |         consider_magic_clip:
100 |             Enable a heuristic to limit the smallest variances of Gaussians used in
101 |             the Parzen estimator.
102 |         consider_endpoints:
103 |             Take endpoints of domains into account when calculating variances of Gaussians
104 |             in Parzen estimator. See the original paper for details on the heuristics
105 |             to calculate the variances.
106 |         n_startup_trials:
107 |             The random sampling is used instead of the TPE algorithm until the given number
108 |             of trials finish in the same study.
109 |         n_ei_candidates:
110 |             Number of candidate samples used to calculate the expected improvement.
111 |         gamma:
112 |             A function that takes the number of finished trials and returns the number
113 |             of trials to form a density function for samples with low grains.
114 |             See the original paper for more details.
115 |         weights:
116 |             A function that takes the number of finished trials and returns a weight for them.
117 |             See `Making a Science of Model Search: Hyperparameter Optimization in Hundreds of
118 |             Dimensions for Vision Architectures <http://proceedings.mlr.press/v28/bergstra13.pdf>`_
119 |             for more details.
120 |         seed:
121 |             Seed for random number generator.
122 |     """
123 | 
124 |     def __init__(
125 |         self,
126 |         consider_prior=True,  # type: bool
127 |         prior_weight=1.0,  # type: float
128 |         consider_magic_clip=True,  # type: bool
129 |         consider_endpoints=False,  # type: bool
130 |         n_startup_trials=10,  # 10,  # type: int
131 |         n_ei_candidates=24,  # type: int # USE 28 FOR ML EXPERIMENTS?
132 |         gamma=default_gamma,  # type: Callable[[int], int]
133 |         weights=default_weights,  # type: Callable[[int], np.ndarray]
134 |         seed=None,  # type: Optional[int]
135 |         k=5, 
136 |     ):
137 |         # type: (...) -> None
138 | 
139 |         self._parzen_estimator_parameters = _ParzenEstimatorParameters(
140 |             consider_prior, prior_weight, consider_magic_clip, consider_endpoints, weights
141 |         )
142 |         self._prior_weight = prior_weight
143 |         self._n_startup_trials = n_startup_trials
144 |         self._n_ei_candidates = n_ei_candidates
145 |         self._gamma = gamma
146 |         self._weights = weights
147 |         self._k = k
148 | 
149 |         self._rng = np.random.RandomState(seed)
150 |         self._random_sampler = random.RandomSampler(seed=seed)
151 | 
152 |     def reseed_rng(self) -> None:
153 | 
154 |         self._rng = np.random.RandomState()
155 |         self._random_sampler.reseed_rng()
156 | 
157 |     def infer_relative_search_space(self, study, trial):
158 |         # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution]
159 | 
160 |         return {}
161 | 
162 |     def sample_relative(self, study, trial, search_space):
163 |         # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any]
164 | 
165 |         return {}
166 | 
167 |     def sample_independent(self, study, trial, param_name, param_distribution):
168 |         # type: (Study, FrozenTrial, str, BaseDistribution) -> Any
169 | 
170 |         # parameter values and scores of previous iterations, of the form (param_value, (-step, value))
171 |         values, scores = _get_observation_pairs(study, param_name, trial)
172 | 
173 |         n = len(values)
174 | 
175 |         # randomly sample at the start
176 |         if n < self._n_startup_trials:
177 |             return (
178 |                 self._random_sampler.sample_independent(
179 |                     study, trial, param_name, param_distribution
180 |                 ),
181 |                 None,
182 |                 None,
183 |             )
184 |         # split the hyperparameters into good=below and bad=above. The best 10% of values
185 |         # or the 25 best values go in "below"
186 |         below_param_values, above_param_values = self._split_observation_pairs(values, scores)
187 | 
188 |         if isinstance(param_distribution, distributions.UniformDistribution):
189 |             return self._sample_uniform(param_distribution, below_param_values, above_param_values)
190 |         elif isinstance(param_distribution, distributions.LogUniformDistribution):
191 |             return self._sample_loguniform(
192 |                 param_distribution, below_param_values, above_param_values
193 |             )
194 |         elif isinstance(param_distribution, distributions.DiscreteUniformDistribution):
195 |             return self._sample_discrete_uniform(
196 |                 param_distribution, below_param_values, above_param_values
197 |             )
198 |         elif isinstance(param_distribution, distributions.IntUniformDistribution):
199 |             return self._sample_int(param_distribution, below_param_values, above_param_values)
200 |         elif isinstance(param_distribution, distributions.IntLogUniformDistribution):
201 |             return self._sample_int_loguniform(
202 |                 param_distribution, below_param_values, above_param_values
203 |             )
204 |         elif isinstance(param_distribution, distributions.CategoricalDistribution):
205 |             index, samples, scores = self._sample_categorical_index(
206 |                 param_distribution, below_param_values, above_param_values
207 |             )
208 |             return param_distribution.choices[index], samples, scores
209 |         else:
210 |             distribution_list = [
211 |                 distributions.UniformDistribution.__name__,
212 |                 distributions.LogUniformDistribution.__name__,
213 |                 distributions.DiscreteUniformDistribution.__name__,
214 |                 distributions.IntUniformDistribution.__name__,
215 |                 distributions.IntLogUniformDistribution.__name__,
216 |                 distributions.CategoricalDistribution.__name__,
217 |             ]
218 |             raise NotImplementedError(
219 |                 "The distribution {} is not implemented. "
220 |                 "The parameter distribution should be one of the {}".format(
221 |                     param_distribution, distribution_list
222 |                 )
223 |             )
224 | 
225 |     def _split_observation_pairs(
226 |         self,
227 |         config_vals,  # type: List[Optional[float]]
228 |         loss_vals,  # type: List[Tuple[float, float]]
229 |     ):
230 |         # type: (...) -> Tuple[np.ndarray, np.ndarray]
231 | 
232 |         # parameters and objective function values to np arrays
233 |         config_vals = np.asarray(config_vals)
234 |         loss_vals = np.asarray(loss_vals, dtype=[("step", float), ("score", float)])
235 | 
236 |         # number of good observations
237 |         n_below = self._gamma(len(config_vals))
238 |         # indices of values that would sort the losses in ascending order
239 |         loss_ascending = np.argsort(loss_vals)
240 |         # best parameter values
241 |         below = config_vals[np.sort(loss_ascending[:n_below])]
242 |         below = np.asarray([v for v in below if v is not None], dtype=float)
243 |         # worst parameter values
244 |         above = config_vals[np.sort(loss_ascending[n_below:])]
245 |         above = np.asarray([v for v in above if v is not None], dtype=float)
246 |         return below, above
247 | 
248 |     def _sample_uniform(self, distribution, below, above):
249 |         # type: (distributions.UniformDistribution, np.ndarray, np.ndarray) -> float
250 | 
251 |         low = distribution.low
252 |         high = distribution.high
253 |         return self._sample_numerical(low, high, below, above)
254 | 
255 |     def _sample_loguniform(self, distribution, below, above):
256 |         # type: (distributions.LogUniformDistribution, np.ndarray, np.ndarray) -> float
257 | 
258 |         low = distribution.low
259 |         high = distribution.high
260 |         return self._sample_numerical(low, high, below, above, is_log=True)
261 | 
262 |     def _sample_discrete_uniform(self, distribution, below, above):
263 |         # type:(distributions.DiscreteUniformDistribution, np.ndarray, np.ndarray) -> float
264 | 
265 |         # step size (1 for integers)
266 |         q = distribution.q
267 |         # value range
268 |         r = distribution.high - distribution.low
269 |         # [low, high] is shifted to [0, r] to align sampled values at regular intervals <- OPTUNA COMMENT
270 |         # Use ±0.5*q for rounding evenly around the endpoints
271 |         low = 0 - 0.5 * q
272 |         high = r + 0.5 * q
273 | 
274 |         # Shift below and above to [0, r] <- OPTUNA COMMENT
275 |         # ie, shift hyperparam values to be 0, 1, 2, ... when q=1
276 |         above -= distribution.low
277 |         below -= distribution.low
278 | 
279 |         # best sample
280 |         best_sample, samples, scores = self._sample_numerical(low, high, below, above, q=q)
281 |         best_sample += distribution.low
282 |         best_sample = min(max(best_sample, distribution.low), distribution.high)
283 |         samples = samples + distribution.low
284 |         # best_sample = self._sample_numerical(low, high, below, above, q=q) + distribution.low
285 |         return best_sample, samples, scores
286 | 
287 |     def _sample_int(self, distribution, below, above):
288 |         # type: (distributions.IntUniformDistribution, np.ndarray, np.ndarray) -> int
289 | 
290 |         # IntUniformDistribution is the same as DiscreteUniformDistribution with q=1
291 |         d = distributions.DiscreteUniformDistribution(
292 |             low=distribution.low, high=distribution.high, q=distribution.step
293 |         )
294 |         best_sample, samples, scores = self._sample_discrete_uniform(d, below, above)
295 |         samples = [int(sample) for sample in samples]
296 |         return int(best_sample), samples, scores
297 | 
298 |     def _sample_int_loguniform(self, distribution, below, above):
299 |         # type: (distributions.IntLogUniformDistribution, np.ndarray, np.ndarray) -> int
300 | 
301 |         low = distribution.low - 0.5
302 |         high = distribution.high + 0.5
303 | 
304 |         sample = self._sample_numerical(low, high, below, above, is_log=True)
305 |         best_sample = (
306 |             np.round((sample - distribution.low) / distribution.step) * distribution.step
307 |             + distribution.low
308 |         )
309 |         return int(min(max(best_sample, distribution.low), distribution.high))
310 | 
311 |     def _sample_numerical(
312 |         self,
313 |         low,  # type: float
314 |         high,  # type: float
315 |         below,  # type: np.ndarray
316 |         above,  # type: np.ndarray
317 |         q=None,  # type: Optional[float]
318 |         is_log=False,  # type: bool
319 |     ):
320 |         # type: (...) -> float
321 | 
322 |         # log distribution
323 |         if is_log:
324 |             low = np.log(low)
325 |             high = np.log(high)
326 |             below = np.log(below)
327 |             above = np.log(above)
328 | 
329 |         # number of ei candidates = 24
330 |         size = (self._n_ei_candidates,)
331 | 
332 |         # get sigmas and sampling weights in sorted order for the good points
333 |         parzen_estimator_below = _ParzenEstimator(
334 |             mus=below, low=low, high=high, parameters=self._parzen_estimator_parameters
335 |         )
336 |         # get a sample of 24 good points
337 |         samples_below = self._sample_from_gmm(
338 |             parzen_estimator=parzen_estimator_below, low=low, high=high, q=q, size=size,
339 |         )
340 |         # log likelihoods of the sample points
341 |         log_likelihoods_below = self._gmm_log_pdf(
342 |             samples=samples_below,
343 |             parzen_estimator=parzen_estimator_below,
344 |             low=low,
345 |             high=high,
346 |             q=q,
347 |         )
348 | 
349 |         # build a KDE on the bad=above points
350 |         parzen_estimator_above = _ParzenEstimator(
351 |             mus=above, low=low, high=high, parameters=self._parzen_estimator_parameters
352 |         )
353 |         # og likelihoods of the good sampled points occuring in the bad=above KDE
354 |         log_likelihoods_above = self._gmm_log_pdf(
355 |             samples=samples_below,
356 |             parzen_estimator=parzen_estimator_above,
357 |             low=low,
358 |             high=high,
359 |             q=q,
360 |         )
361 | 
362 |         ret, samples, scores = TPESampler._compare(
363 |             samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above, k=self._k
364 |         )
365 |         ret = float(ret[0])
366 |         ret = math.exp(ret) if is_log else ret
367 | 
368 |         # ret = float(
369 |         #     TPESampler._compare(
370 |         #         samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above
371 |         #     )[0]
372 |         # )
373 |         return ret, samples, scores
374 | 
375 |     def _sample_categorical_index(self, distribution, below, above):
376 |         # type: (distributions.CategoricalDistribution, np.ndarray, np.ndarray) -> int
377 | 
378 |         # parameter values
379 |         choices = distribution.choices
380 |         # convert the good=below and bad=above values to ints
381 |         below = list(map(int, below))
382 |         above = list(map(int, above))
383 |         upper = len(choices)
384 |         # number of ei candidates = 24
385 |         size = (self._n_ei_candidates,)
386 | 
387 |         # Ramp of weights, weights are smaller for trials done earlier on
388 |         weights_below = self._weights(len(below))
389 |         # Weighted count of the number of occurrences of each good hyperparameter (using int IDs)
390 |         counts_below = np.bincount(below, minlength=upper, weights=weights_below)
391 |         # Add a prior = 1 to avoid zero probability of choosing a hyperparameter
392 |         weighted_below = counts_below + self._prior_weight
393 |         # normalize
394 |         weighted_below /= weighted_below.sum()
395 |         # sample the good categorical values
396 |         samples_below = self._sample_from_categorical_dist(weighted_below, size)
397 |         # log probability of each categorical value in the sample
398 |         log_likelihoods_below = TPESampler._categorical_log_pdf(samples_below, weighted_below)
399 | 
400 |         ## Now same for bad points
401 |         # Ramp of weights
402 |         weights_above = self._weights(len(above))
403 |         # Weighted count of the number of occurrences of each bad hyperparameter
404 |         counts_above = np.bincount(above, minlength=upper, weights=weights_above)
405 |         # Add a prior = 1 to avoid zero probability of choosing a hyperparameter
406 |         weighted_above = counts_above + self._prior_weight
407 |         # normalize
408 |         weighted_above /= weighted_above.sum()
409 |         # log likelihood of the GOOD sample points with their probabilities of being in the bad group
410 |         log_likelihoods_above = TPESampler._categorical_log_pdf(samples_below, weighted_above)
411 | 
412 |         ret, samples, scores = TPESampler._compare(
413 |             samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above, k=self._k
414 |         )
415 |         ret = int(ret[0])
416 |         samples = [distribution.choices[samples[index]] for index in range(len(samples))]
417 |         return ret, samples, scores
418 | 
419 |         # return int(
420 |         #     TPESampler._compare(
421 |         #         samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above
422 |         #     )[0]
423 |         # )
424 | 
425 |     def _sample_from_gmm(
426 |         self,
427 |         parzen_estimator,  # type: _ParzenEstimator
428 |         low,  # type: float
429 |         high,  # type: float
430 |         q=None,  # type: Optional[float]
431 |         size=(),  # type: Tuple
432 |     ):
433 |         # type: (...) -> np.ndarray
434 | 
435 |         # weights, mus, and stds sorted by increasing mus of the good points
436 |         weights = parzen_estimator.weights
437 |         mus = parzen_estimator.mus
438 |         sigmas = parzen_estimator.sigmas
439 |         weights, mus, sigmas = map(np.asarray, (weights, mus, sigmas))
440 | 
441 |         if low >= high:
442 |             raise ValueError(
443 |                 "The 'low' should be lower than the 'high'. "
444 |                 "But (low, high) = ({}, {}).".format(low, high)
445 |             )
446 |         # weighted multinomial sample of 24 good points based on the WEIGHTS, not mean/std
447 |         active = np.argmax(self._rng.multinomial(1, weights, size=size), axis=-1)
448 |         # normalize the active points
449 |         trunc_low = (low - mus[active]) / sigmas[active]
450 |         trunc_high = (high - mus[active]) / sigmas[active]
451 |         while True:
452 |             # sample from a truncated normal dist with means and stds of the active points
453 |             samples = truncnorm.rvs(
454 |                 trunc_low,
455 |                 trunc_high,
456 |                 size=size,
457 |                 loc=mus[active],
458 |                 scale=sigmas[active],
459 |                 random_state=self._rng,
460 |             )
461 |             if (samples < high).all():  # why not
462 |                 break
463 | 
464 |         if q is None:
465 |             return samples
466 |         else:
467 |             # round the samples to ints
468 |             return np.round(samples / q) * q
469 | 
470 |     def _gmm_log_pdf(
471 |         self,
472 |         samples,  # type: np.ndarray
473 |         parzen_estimator,  # type: _ParzenEstimator
474 |         low,  # type: float
475 |         high,  # type: float
476 |         q=None,  # type: Optional[float]
477 |     ):
478 |         # type: (...) -> np.ndarray
479 | 
480 |         # weights, mus, and stds sorted by increasing mus of the good points
481 |         weights = parzen_estimator.weights
482 |         mus = parzen_estimator.mus
483 |         sigmas = parzen_estimator.sigmas
484 |         samples, weights, mus, sigmas = map(np.asarray, (samples, weights, mus, sigmas))
485 | 
486 |         if samples.size == 0:
487 |             return np.asarray([], dtype=float)
488 |         if weights.ndim != 1:
489 |             raise ValueError(
490 |                 "The 'weights' should be 2-dimension. "
491 |                 "But weights.shape = {}".format(weights.shape)
492 |             )
493 |         if mus.ndim != 1:
494 |             raise ValueError(
495 |                 "The 'mus' should be 2-dimension. " "But mus.shape = {}".format(mus.shape)
496 |             )
497 |         if sigmas.ndim != 1:
498 |             raise ValueError(
499 |                 "The 'sigmas' should be 2-dimension. " "But sigmas.shape = {}".format(sigmas.shape)
500 |             )
501 |         # probability of each point times the probability it is in the
502 |         # accepted range [low, high], ie normalization constant. weights are normalized
503 |         p_accept = np.sum(
504 |             weights
505 |             * (
506 |                 TPESampler._normal_cdf(high, mus, sigmas)
507 |                 - TPESampler._normal_cdf(low, mus, sigmas)
508 |             )
509 |         )
510 | 
511 |         if q is None:
512 |             distance = samples[..., None] - mus
513 |             mahalanobis = (distance / np.maximum(sigmas, EPS)) ** 2
514 |             Z = np.sqrt(2 * np.pi) * sigmas
515 |             coefficient = weights / Z / p_accept
516 |             return TPESampler._logsum_rows(-0.5 * mahalanobis + np.log(coefficient))
517 |         else:
518 |             cdf_func = TPESampler._normal_cdf
519 |             # bounds on the normal distribution of each sample point. This is the probability space
520 |             # that would have allowed for each sample value to be chosen
521 |             upper_bound = np.minimum(samples + q / 2.0, high)
522 |             lower_bound = np.maximum(samples - q / 2.0, low)
523 |             # probability for each sampled point
524 |             # weights[..., None] is weights reshaped from (len(weights),) to (len(weights),1), same for mus and sigmas
525 |             # upper_bound[None] are the upper_bounds reshaped from (len(upper_bound),) to (1, len(upper_bounds))
526 |             # For each sample point, we compute the probability of it occuring in each Gaussian mixture (one for each point)
527 |             # and then sum the mass. Finally we multiply by the weights of each point occuring
528 |             probabilities = np.sum(
529 |                 weights[..., None]
530 |                 * (
531 |                     cdf_func(upper_bound[None], mus[..., None], sigmas[..., None])
532 |                     - cdf_func(lower_bound[None], mus[..., None], sigmas[..., None])
533 |                 ),
534 |                 axis=0,
535 |             )
536 |             # normalize by the probability of accepting
537 |             return np.log(probabilities + EPS) - np.log(p_accept + EPS)
538 | 
539 |     def _sample_from_categorical_dist(self, probabilities, size):  # weights=probabilities
540 |         # type: (np.ndarray, Tuple[int]) -> np.ndarray
541 | 
542 |         # probabilities as np array
543 |         if probabilities.size == 1 and isinstance(probabilities[0], np.ndarray):
544 |             probabilities = probabilities[0]
545 |         probabilities = np.asarray(probabilities)
546 | 
547 |         if size == (0,):
548 |             return np.asarray([], dtype=float)
549 |         assert len(size)
550 |         assert probabilities.ndim == 1
551 | 
552 |         # n_draws = 24
553 |         n_draws = int(np.prod(size))
554 |         # draw samples from the multinomial distribution
555 |         sample = self._rng.multinomial(n=1, pvals=probabilities, size=n_draws)
556 |         assert sample.shape == size + (probabilities.size,)
557 |         # 24 categorical values selected from the multinomial sample
558 |         return_val = np.dot(sample, np.arange(probabilities.size))
559 |         return_val.shape = size
560 |         return return_val
561 | 
562 |     @classmethod
563 |     def _categorical_log_pdf(
564 |         cls,
565 |         sample,  # type: np.ndarray
566 |         p,  # type: np.ndarray
567 |     ):
568 |         # type: (...) -> np.ndarray
569 | 
570 |         if sample.size:
571 |             # log probability of each sample
572 |             return np.log(np.asarray(p)[sample])
573 |         else:
574 |             return np.asarray([])
575 | 
576 |     @classmethod
577 |     def _compare(cls, samples, log_l, log_g, k):
578 |         # type: (np.ndarray, np.ndarray, np.ndarray) -> np.ndarray
579 | 
580 |         # good samples, good log likelihoods, bad log likelihoods
581 |         samples, log_l, log_g = map(np.asarray, (samples, log_l, log_g))
582 |         if samples.size:
583 |             # ratio of likelihoods = difference of log likelihoods
584 |             score = log_l - log_g
585 |             if samples.size != score.size:
586 |                 raise ValueError(
587 |                     "The size of the 'samples' and that of the 'score' "
588 |                     "should be same. "
589 |                     "But (samples.size, score.size) = ({}, {})".format(samples.size, score.size)
590 |                 )
591 |             # this is the hyperparameter with the best expected score, can find top-k
592 |             best = np.argmax(score)
593 |             uniq_smpls, indices = np.unique(samples, return_index=True)
594 |             uniq_scores = np.exp(score[indices])
595 |             # topk=min(len(uniq_scores), 32)
596 |             k = min(k, len(uniq_smpls))
597 |             indicies = np.argpartition(uniq_scores, -k)[-k:]
598 |             uniq_smpls = uniq_smpls[indicies]
599 |             uniq_scores = uniq_scores[indicies]
600 |             # sorted_indices = np.argsort(uniq_scores)[::-1]
601 |             # uniq_smpls = uniq_smpls[sorted_indices]
602 |             # uniq_scores = uniq_scores[sorted_indices]
603 |             return np.asarray([samples[best]] * samples.size), uniq_smpls, uniq_scores #/ np.sum(uniq_scores)
604 |         else:
605 |             return np.asarray([])
606 | 
607 |     @classmethod
608 |     def _logsum_rows(cls, x):
609 |         # type: (np.ndarray) -> np.ndarray
610 | 
611 |         x = np.asarray(x)
612 |         m = x.max(axis=1)
613 |         return np.log(np.exp(x - m[:, None]).sum(axis=1)) + m
614 | 
615 |     @classmethod
616 |     def _normal_cdf(cls, x, mu, sigma):
617 |         # type: (float, np.ndarray, np.ndarray) -> np.ndarray
618 | 
619 |         mu, sigma = map(np.asarray, (mu, sigma))
620 |         denominator = x - mu
621 |         numerator = np.maximum(np.sqrt(2) * sigma, EPS)
622 |         z = denominator / numerator
623 |         return 0.5 * (1 + scipy.special.erf(z))
624 | 
625 |     @classmethod
626 |     def _log_normal_cdf(cls, x, mu, sigma):
627 |         # type: (float, np.ndarray, np.ndarray) -> np.ndarray
628 | 
629 |         mu, sigma = map(np.asarray, (mu, sigma))
630 |         if x < 0:
631 |             raise ValueError("Negative argument is given to _lognormal_cdf. x: {}".format(x))
632 |         denominator = np.log(np.maximum(x, EPS)) - mu
633 |         numerator = np.maximum(np.sqrt(2) * sigma, EPS)
634 |         z = denominator / numerator
635 |         return 0.5 + 0.5 * scipy.special.erf(z)
636 | 
637 |     @staticmethod
638 |     def hyperopt_parameters():
639 |         # type: () -> Dict[str, Any]
640 |         """Return the the default parameters of hyperopt (v0.1.2).
641 | 
642 |         :class:`~optuna.samplers.TPESampler` can be instantiated with the parameters returned
643 |         by this method.
644 | 
645 |         Example:
646 | 
647 |             Create a :class:`~optuna.samplers.TPESampler` instance with the default
648 |             parameters of `hyperopt <https://github.com/hyperopt/hyperopt/tree/0.1.2>`_.
649 | 
650 |             .. testcode::
651 | 
652 |                     import optuna
653 |                     from optuna.samplers import TPESampler
654 | 
655 |                     def objective(trial):
656 |                         x = trial.suggest_uniform('x', -10, 10)
657 |                         return x**2
658 | 
659 |                     sampler = TPESampler(**TPESampler.hyperopt_parameters())
660 |                     study = optuna.create_study(sampler=sampler)
661 |                     study.optimize(objective, n_trials=10)
662 | 
663 |         Returns:
664 |             A dictionary containing the default parameters of hyperopt.
665 | 
666 |         """
667 | 
668 |         return {
669 |             "consider_prior": True,
670 |             "prior_weight": 1.0,
671 |             "consider_magic_clip": True,
672 |             "consider_endpoints": False,
673 |             "n_startup_trials": 20,
674 |             "n_ei_candidates": 24,
675 |             "gamma": hyperopt_default_gamma,
676 |             "weights": default_weights,
677 |         }
678 | 
679 | 
680 | def _get_observation_pairs(study, param_name, trial):
681 |     # type: (Study, str, FrozenTrial) -> Tuple[List[Optional[float]], List[Tuple[float, float]]]
682 |     """Get observation pairs from the study.
683 | 
684 |        This function collects observation pairs from the complete or pruned trials of the study.
685 |        The values for trials that don't contain the parameter named ``param_name`` are set to None.
686 | 
687 |        An observation pair fundamentally consists of a parameter value and an objective value.
688 |        However, due to the pruning mechanism of Optuna, final objective values are not always
689 |        available. Therefore, this function uses intermediate values in addition to the final
690 |        ones, and reports the value with its step count as ``(-step, value)``.
691 |        Consequently, the structure of the observation pair is as follows:
692 |        ``(param_value, (-step, value))``.
693 | 
694 |        The second element of an observation pair is used to rank observations in
695 |        ``_split_observation_pairs`` method (i.e., observations are sorted lexicographically by
696 |        ``(-step, value)``).
697 |     """
698 | 
699 |     sign = 1
700 |     if study.direction == StudyDirection.MAXIMIZE:
701 |         sign = -1
702 | 
703 |     values = []
704 |     scores = []
705 |     for trial in study.get_trials(deepcopy=False):
706 |         if trial.state is TrialState.COMPLETE and trial.value is not None:
707 |             score = (-float("inf"), sign * trial.value)
708 |         elif trial.state is TrialState.PRUNED:
709 |             if len(trial.intermediate_values) > 0:
710 |                 step, intermediate_value = max(trial.intermediate_values.items())
711 |                 if math.isnan(intermediate_value):
712 |                     score = (-step, float("inf"))
713 |                 else:
714 |                     score = (-step, sign * intermediate_value)
715 |             else:
716 |                 score = (float("inf"), 0.0)
717 |         else:
718 |             continue
719 | 
720 |         param_value = None  # type: Optional[float]
721 |         if param_name in trial.params:
722 |             distribution = trial.distributions[param_name]
723 |             param_value = distribution.to_internal_repr(trial.params[param_name])
724 | 
725 |         values.append(param_value)
726 |         scores.append(score)
727 | 
728 |     return values, scores
729 | 


--------------------------------------------------------------------------------