├── .coveragerc
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .secrets.baseline
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bayesmark
    ├── __init__.py
    ├── abstract_optimizer.py
    ├── builtin_opt
    │   ├── __init__.py
    │   ├── config.py
    │   ├── hyperopt_optimizer.py
    │   ├── nevergrad_optimizer.py
    │   ├── opentuner_optimizer.py
    │   ├── pysot_optimizer.py
    │   ├── random_optimizer.py
    │   └── scikit_optimizer.py
    ├── cmd_parse.py
    ├── constants.py
    ├── data.py
    ├── expected_max.py
    ├── experiment.py
    ├── experiment_aggregate.py
    ├── experiment_analysis.py
    ├── experiment_baseline.py
    ├── experiment_db_init.py
    ├── experiment_launcher.py
    ├── np_util.py
    ├── path_util.py
    ├── quantiles.py
    ├── random_search.py
    ├── serialize.py
    ├── signatures.py
    ├── sklearn_funcs.py
    ├── space.py
    ├── stats.py
    ├── util.py
    └── xr_util.py
├── build_wheel.sh
├── docs
    ├── .gitignore
    ├── Makefile
    ├── authors.rst
    ├── code.rst
    ├── conf.py
    ├── dummy.py
    ├── index.rst
    ├── readme.rst
    └── scoring.rst
├── example_opt_root
    ├── config.json
    ├── flaky_optimizer.py
    ├── hyperopt_optimizer.py
    ├── nevergrad_optimizer.py
    ├── opentuner_optimizer.py
    ├── pysot_optimizer.py
    ├── random_optimizer.py
    └── scikit_optimizer.py
├── integration_test.sh
├── integration_test_with_setup.sh
├── notebooks
    ├── dummy.py
    ├── plot_mean_score.ipynb
    └── plot_test_case.ipynb
├── requirements
    ├── base.in
    ├── base.txt
    ├── docs.in
    ├── docs.txt
    ├── ipynb.in
    ├── ipynb.txt
    ├── optimizers.in
    ├── optimizers.txt
    ├── pipreqs_edits.sed
    ├── self.txt
    ├── test.in
    ├── test.txt
    ├── tools.in
    └── tools.txt
├── setup.py
├── test.sh
├── test
    ├── data_test.py
    ├── dummy.py
    ├── expected_max_test.py
    ├── experiment_aggregate_test.py
    ├── experiment_analysis_test.py
    ├── experiment_baseline_test.py
    ├── experiment_db_init_test.py
    ├── experiment_launcher_test.py
    ├── experiment_test.py
    ├── hypothesis_util.py
    ├── np_util_test.py
    ├── quantiles_test.py
    ├── random_search_test.py
    ├── serialize_test.py
    ├── signatures_test.py
    ├── sklearn_funcs_test.py
    ├── space_test.py
    ├── stats_test.py
    ├── util.py
    ├── util_test.py
    └── xr_util_test.py
└── tools
    ├── archive_branch.sh
    └── deploy.sh


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [report]
 2 | exclude_lines =
 3 |     pragma: no cover
 4 |     @abstract
 5 |     ValueError
 6 |     NotImplementedError
 7 |     assert
 8 |     _error
 9 |     def main()
10 |     pragma: io
11 |     pragma: main
12 |     pragma: validator
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .*
 2 | !.gitignore
 3 | !.gitmodules
 4 | !.flake8
 5 | !.coveragerc
 6 | !.pre-commit-config.yaml
 7 | !.secrets.baseline
 8 | !.travis.yml
 9 | !.readthedocs.yml
10 | 
11 | # For wheels
12 | bayesmark/version.py
13 | dist/
14 | 
15 | # Java
16 | *.class
17 | 
18 | # Intellij
19 | *.iml
20 | *.iws
21 | 
22 | # Gradle
23 | build/
24 | classes/
25 | 
26 | log/
27 | tmp/
28 | /out/
29 | ins.xml
30 | *.log
31 | 
32 | # Python
33 | *.py[co]
34 | *.egg*
35 | .cache
36 | .DS_Store
37 | 
38 | # env
39 | env/
40 | 
41 | # Emacs
42 | *~
43 | .\#*
44 | \#*\#
45 | 
46 | # *ipynb
47 | .ipynb_checkpoints
48 | *.png
49 | *.aux
50 | 
51 | # Hypothesis
52 | tests/src
53 | src/
54 | 
55 | # Coverage
56 | htmlcov/
57 | 
58 | # for the test.sh pip compile check
59 | requirements/*.chk
60 | requirement_chk.in
61 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | known_third_party = bayesmark,git,hyperopt,hypothesis,hypothesis_gufunc,importlib_metadata,ipykernel,jupyter,jupyter_core,nbconvert,nevergrad,numpy,opentuner,pandas,pathvalidate,poap,pySOT,pytest,pytest_cov,scipy,setuptools,sklearn,skopt,sphinx,xarray
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 2 |     rev: v1.2.3
 3 |     hooks:
 4 |     -   id: flake8
 5 |         exclude: ^(docs/*)
 6 |         args: [--max-line-length=120, --ignore=E203]
 7 |     -   id: check-byte-order-marker
 8 |     -   id: check-case-conflict
 9 |     -   id: check-merge-conflict
10 |     -   id: end-of-file-fixer
11 |     -   id: forbid-new-submodules
12 |     -   id: mixed-line-ending
13 |         args: [--fix=lf]
14 |     -   id: trailing-whitespace
15 |     -   id: debug-statements
16 |     -   id: check-json
17 |     -   id: pretty-format-json
18 |         args: [--autofix, --indent=4]
19 |     -   id: check-yaml
20 |     -   id: sort-simple-yaml
21 | -   repo: https://github.com/ambv/black
22 |     rev: 19.3b0
23 |     hooks:
24 |     -   id: black
25 |         args: [-l 120, --target-version=py36]
26 | -   repo: https://github.com/asottile/seed-isort-config
27 |     rev: v1.2.0
28 |     hooks:
29 |     -   id: seed-isort-config
30 |         args: [--application-directories=test]
31 | -   repo: https://github.com/pre-commit/mirrors-isort
32 |     rev: v4.3.4
33 |     hooks:
34 |     -   id: isort
35 |         language_version: python3
36 |         args: [-w 120, -m 3, -tc, --project=bayesmark]
37 | - repo: https://github.com/jumanjihouse/pre-commit-hooks
38 |   rev: 1.11.0
39 |   hooks:
40 |     - id: require-ascii
41 |     - id: script-must-have-extension
42 |     - id: forbid-binary
43 | -   repo: https://github.com/Lucas-C/pre-commit-hooks
44 |     rev: v1.1.6
45 |     hooks:
46 |     -   id: forbid-crlf
47 |     -   id: forbid-tabs
48 | - repo: https://github.com/kynan/nbstripout
49 |   rev: fe155a55548c61e4eb53522e57921077acf82c00  # pragma: allowlist secret
50 |   hooks:
51 |     - id: nbstripout
52 |       exclude: ^notebooks/.*\.out\.ipynb$
53 | - repo: https://github.com/Yelp/detect-secrets
54 |   rev: v0.12.5
55 |   hooks:
56 |     - id: detect-secrets
57 |       args: ['--baseline', '.secrets.baseline']
58 | - repo: https://github.com/pre-commit/pygrep-hooks
59 |   rev: v1.4.1  # Use the ref you want to point at
60 |   hooks:
61 |     - id: python-no-eval
62 |     - id: python-check-blanket-noqa
63 | - repo: https://github.com/asottile/yesqa
64 |   rev: v0.0.11
65 |   hooks:
66 |     - id: yesqa
67 | - repo: https://github.com/myint/eradicate
68 |   rev: 522ed7ce2da82d33b3e2331bf50d4671c5a5af9a  # pragma: allowlist secret
69 |   hooks:
70 |     - id: eradicate
71 |       exclude: docs/conf.py
72 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/conf.py
11 | 
12 | # Build documentation with MkDocs
13 | #mkdocs:
14 | #  configuration: mkdocs.yml
15 | 
16 | # Optionally build your docs in additional formats such as PDF and ePub
17 | formats: all
18 | 
19 | # Optionally set the version of Python and requirements required to build your docs
20 | python:
21 |   version: 3.6
22 |   install:
23 |     - requirements: requirements/docs.txt
24 | 


--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
 1 | {
 2 |   "exclude": {
 3 |     "files": null,
 4 |     "lines": null
 5 |   },
 6 |   "generated_at": "2019-09-18T01:04:54Z",
 7 |   "plugins_used": [
 8 |     {
 9 |       "name": "AWSKeyDetector"
10 |     },
11 |     {
12 |       "name": "ArtifactoryDetector"
13 |     },
14 |     {
15 |       "base64_limit": 4.5,
16 |       "name": "Base64HighEntropyString"
17 |     },
18 |     {
19 |       "name": "BasicAuthDetector"
20 |     },
21 |     {
22 |       "hex_limit": 3,
23 |       "name": "HexHighEntropyString"
24 |     },
25 |     {
26 |       "name": "KeywordDetector"
27 |     },
28 |     {
29 |       "name": "PrivateKeyDetector"
30 |     },
31 |     {
32 |       "name": "SlackDetector"
33 |     },
34 |     {
35 |       "name": "StripeDetector"
36 |     }
37 |   ],
38 |   "results": {},
39 |   "version": "0.12.5"
40 | }
41 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | 
 5 | before_script:
 6 |     - "curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/fossas/fossa-cli/master/install.sh | sudo bash"
 7 | 
 8 | script:
 9 |     - ./integration_test_with_setup.sh
10 |     - ./test.sh
11 |     - cat requirements/*.txt >requirements.txt
12 |     - '[ ! -z "$FOSSA_API_KEY" ] && (fossa init && fossa analyze) || true'
13 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements/base.in
2 | include requirements/optimizers.in
3 | include requirements/ipynb.in
4 | include LICENSE
5 | include README.rst
6 | 


--------------------------------------------------------------------------------
/bayesmark/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.8"
2 | __author__ = "Ryan Turner"
3 | __license__ = "Apache v2"
4 | 


--------------------------------------------------------------------------------
/bayesmark/abstract_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.
15 | """
16 | from abc import ABC, abstractmethod
17 | 
18 | from importlib_metadata import version
19 | 
20 | 
21 | class AbstractOptimizer(ABC):
22 |     """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.
23 |     """
24 | 
25 |     # Every implementation package needs to specify this static variable, e.g., "primary_import=opentuner"
26 |     primary_import = None
27 | 
28 |     def __init__(self, api_config, **kwargs):
29 |         """Build wrapper class to use an optimizer in benchmark.
30 | 
31 |         Parameters
32 |         ----------
33 |         api_config : dict-like of dict-like
34 |             Configuration of the optimization variables. See API description.
35 |         """
36 |         self.api_config = api_config
37 | 
38 |     @classmethod
39 |     def get_version(cls):
40 |         """Get the version for this optimizer.
41 | 
42 |         Returns
43 |         -------
44 |         version_str : str
45 |             Version number of the optimizer. Usually, this is equivalent to ``package.__version__``.
46 |         """
47 |         assert (cls.primary_import is None) or isinstance(cls.primary_import, str)
48 |         # Should use x.x.x as version if sub-class did not specify its primary import
49 |         version_str = "x.x.x" if cls.primary_import is None else version(cls.primary_import)
50 |         return version_str
51 | 
52 |     @abstractmethod
53 |     def suggest(self, n_suggestions):
54 |         """Get a suggestion from the optimizer.
55 | 
56 |         Parameters
57 |         ----------
58 |         n_suggestions : int
59 |             Desired number of parallel suggestions in the output
60 | 
61 |         Returns
62 |         -------
63 |         next_guess : list of dict
64 |             List of `n_suggestions` suggestions to evaluate the objective
65 |             function. Each suggestion is a dictionary where each key
66 |             corresponds to a parameter being optimized.
67 |         """
68 |         pass
69 | 
70 |     @abstractmethod
71 |     def observe(self, X, y):
72 |         """Send an observation of a suggestion back to the optimizer.
73 | 
74 |         Parameters
75 |         ----------
76 |         X : list of dict-like
77 |             Places where the objective function has already been evaluated.
78 |             Each suggestion is a dictionary where each key corresponds to a
79 |             parameter being optimized.
80 |         y : array-like, shape (n,)
81 |             Corresponding values where objective has been evaluated
82 |         """
83 |         pass
84 | 


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uber/bayesmark/8c420e935718f0d6867153b781e58943ecaf2338/bayesmark/builtin_opt/__init__.py


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from bayesmark.constants import RANDOM_SEARCH
15 | 
16 | CONFIG = {
17 |     "HyperOpt": ["hyperopt_optimizer.py", {}],
18 |     "Nevergrad-OnePlusOne": ["nevergrad_optimizer.py", {"budget": 300, "tool": "OnePlusOne"}],
19 |     "OpenTuner-BanditA": ["opentuner_optimizer.py", {"techniques": ["AUCBanditMetaTechniqueA"]}],
20 |     "OpenTuner-GA": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_Bandit"]}],
21 |     "OpenTuner-GA-DE": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_DE"]}],
22 |     "PySOT": ["pysot_optimizer.py", {}],
23 |     "RandomSearch": ["random_optimizer.py", {}],
24 |     "Scikit-GBRT-Hedge": [
25 |         "scikit_optimizer.py",
26 |         {"acq_func": "gp_hedge", "base_estimator": "GBRT", "n_initial_points": 5},
27 |     ],
28 |     "Scikit-GP-Hedge": ["scikit_optimizer.py", {"acq_func": "gp_hedge", "base_estimator": "GP", "n_initial_points": 5}],
29 |     "Scikit-GP-LCB": ["scikit_optimizer.py", {"acq_func": "LCB", "base_estimator": "GP", "n_initial_points": 5}],
30 | }
31 | 
32 | assert RANDOM_SEARCH in CONFIG, "%s required in settings file." % RANDOM_SEARCH
33 | 


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/nevergrad_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import nevergrad.optimization as optimization
 15 | import numpy as np
 16 | from nevergrad import instrumentation as inst
 17 | from scipy.stats import norm
 18 | 
 19 | from bayesmark.abstract_optimizer import AbstractOptimizer
 20 | from bayesmark.np_util import linear_rescale
 21 | from bayesmark.space import Real
 22 | 
 23 | 
 24 | class NevergradOptimizer(AbstractOptimizer):
 25 |     primary_import = "nevergrad"
 26 | 
 27 |     def __init__(self, api_config, tool, budget):
 28 |         """Build wrapper class to use nevergrad optimizer in benchmark.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         api_config : dict-like of dict-like
 33 |             Configuration of the optimization variables. See API description.
 34 |         budget : int
 35 |             Expected number of max function evals
 36 |         """
 37 |         AbstractOptimizer.__init__(self, api_config)
 38 | 
 39 |         self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)
 40 | 
 41 |         dimension = self.instrum.dimension
 42 |         opt_class = optimization.registry[tool]
 43 |         self.optim = opt_class(dimension=dimension, budget=budget)
 44 | 
 45 |     @staticmethod
 46 |     def get_nvg_dimensions(api_config):
 47 |         """Help routine to setup nevergrad search space in constructor.
 48 | 
 49 |         Take api_config as argument so this can be static.
 50 |         """
 51 |         # The ordering of iteration prob makes no difference, but just to be
 52 |         # safe and consistnent with space.py, I will make sorted.
 53 |         param_list = sorted(api_config.keys())
 54 | 
 55 |         all_args = {}
 56 |         all_prewarp = {}
 57 |         for param_name in param_list:
 58 |             param_config = api_config[param_name]
 59 | 
 60 |             param_type = param_config["type"]
 61 | 
 62 |             param_space = param_config.get("space", None)
 63 |             param_range = param_config.get("range", None)
 64 |             param_values = param_config.get("values", None)
 65 | 
 66 |             prewarp = None
 67 |             if param_type == "cat":
 68 |                 assert param_space is None
 69 |                 assert param_range is None
 70 |                 arg = inst.var.SoftmaxCategorical(param_values)
 71 |             elif param_type == "bool":
 72 |                 assert param_space is None
 73 |                 assert param_range is None
 74 |                 assert param_values is None
 75 |                 arg = inst.var.OrderedDiscrete([False, True])
 76 |             elif param_values is not None:
 77 |                 assert param_type in ("int", "ordinal", "real")
 78 |                 arg = inst.var.OrderedDiscrete(param_values)
 79 |                 # We are throwing away information here, but OrderedDiscrete
 80 |                 # appears to be invariant to monotonic transformation anyway.
 81 |             elif param_type == "int":
 82 |                 assert param_values is None
 83 |                 # Need +1 since API in inclusive
 84 |                 choices = range(int(param_range[0]), int(param_range[-1]) + 1)
 85 |                 arg = inst.var.OrderedDiscrete(choices)
 86 |                 # We are throwing away information here, but OrderedDiscrete
 87 |                 # appears to be invariant to monotonic transformation anyway.
 88 |             elif param_type == "real":
 89 |                 assert param_values is None
 90 |                 assert param_range is not None
 91 |                 # Will need to warp to this space sep.
 92 |                 arg = inst.var.Gaussian(mean=0, std=1)
 93 |                 prewarp = Real(warp=param_space, range_=param_range)
 94 |             else:
 95 |                 assert False, "type %s not handled in API" % param_type
 96 | 
 97 |             all_args[param_name] = arg
 98 |             all_prewarp[param_name] = prewarp
 99 |         instrum = inst.Instrumentation(**all_args)
100 |         return instrum, all_prewarp
101 | 
102 |     def prewarp(self, xx):
103 |         """Extra work needed to get variables into the Gaussian space
104 |         representation."""
105 |         xxw = {}
106 |         for arg_name, vv in xx.items():
107 |             assert np.isscalar(vv)
108 |             space = self.space[arg_name]
109 | 
110 |             if space is not None:
111 |                 # Warp so we think it is apriori uniform in [a, b]
112 |                 vv = space.warp(vv)
113 |                 assert vv.size == 1
114 | 
115 |                 # Now make uniform on [0, 1], also unpack warped to scalar
116 |                 (lb, ub), = space.get_bounds()
117 |                 vv = linear_rescale(vv.item(), lb, ub, 0, 1)
118 | 
119 |                 # Now make std Gaussian apriori
120 |                 vv = norm.ppf(vv)
121 |             assert np.isscalar(vv)
122 |             xxw[arg_name] = vv
123 |         return xxw
124 | 
125 |     def postwarp(self, xxw):
126 |         """Extra work needed to undo the Gaussian space representation."""
127 |         xx = {}
128 |         for arg_name, vv in xxw.items():
129 |             assert np.isscalar(vv)
130 |             space = self.space[arg_name]
131 | 
132 |             if space is not None:
133 |                 # Now make std Gaussian apriori
134 |                 vv = norm.cdf(vv)
135 | 
136 |                 # Now make uniform on [0, 1]
137 |                 (lb, ub), = space.get_bounds()
138 |                 vv = linear_rescale(vv, 0, 1, lb, ub)
139 | 
140 |                 # Warp so we think it is apriori uniform in [a, b]
141 |                 vv = space.unwarp([vv])
142 |             assert np.isscalar(vv)
143 |             xx[arg_name] = vv
144 |         return xx
145 | 
146 |     def suggest(self, n_suggestions=1):
147 |         """Get suggestion from nevergrad.
148 | 
149 |         Parameters
150 |         ----------
151 |         n_suggestions : int
152 |             Desired number of parallel suggestions in the output
153 | 
154 |         Returns
155 |         -------
156 |         next_guess : list of dict
157 |             List of `n_suggestions` suggestions to evaluate the objective
158 |             function. Each suggestion is a dictionary where each key
159 |             corresponds to a parameter being optimized.
160 |         """
161 |         x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]
162 | 
163 |         x_guess = [None] * n_suggestions
164 |         for ii, xx in enumerate(x_guess_data):
165 |             x_pos, x_kwarg = self.instrum.data_to_arguments(xx)
166 |             assert x_pos == ()
167 |             x_guess[ii] = self.postwarp(x_kwarg)
168 | 
169 |         return x_guess
170 | 
171 |     def observe(self, X, y):
172 |         """Feed an observation back to nevergrad.
173 | 
174 |         Parameters
175 |         ----------
176 |         X : list of dict-like
177 |             Places where the objective function has already been evaluated.
178 |             Each suggestion is a dictionary where each key corresponds to a
179 |             parameter being optimized.
180 |         y : array-like, shape (n,)
181 |             Corresponding values where objective has been evaluated
182 |         """
183 |         for xx, yy in zip(X, y):
184 |             xx = self.prewarp(xx)
185 |             xx = self.instrum.arguments_to_data(**xx)
186 |             self.optim.tell(xx, yy)
187 | 
188 | 
189 | opt_wrapper = NevergradOptimizer
190 | 


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/pysot_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import warnings
 15 | from copy import copy
 16 | 
 17 | import numpy as np
 18 | from poap.strategy import EvalRecord
 19 | from pySOT.experimental_design import SymmetricLatinHypercube
 20 | from pySOT.optimization_problems import OptimizationProblem
 21 | from pySOT.strategy import SRBFStrategy
 22 | from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant
 23 | 
 24 | from bayesmark.abstract_optimizer import AbstractOptimizer
 25 | from bayesmark.space import JointSpace
 26 | 
 27 | 
 28 | class PySOTOptimizer(AbstractOptimizer):
 29 |     primary_import = "pysot"
 30 | 
 31 |     def __init__(self, api_config):
 32 |         """Build wrapper class to use an optimizer in benchmark.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         api_config : dict-like of dict-like
 37 |             Configuration of the optimization variables. See API description.
 38 |         """
 39 |         AbstractOptimizer.__init__(self, api_config)
 40 | 
 41 |         self.space_x = JointSpace(api_config)
 42 |         self.bounds = self.space_x.get_bounds()
 43 |         self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)
 44 |         self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
 45 |         self.batch_size = None
 46 |         self.history = []
 47 |         self.proposals = []
 48 | 
 49 |     def create_opt_prob(self):
 50 |         """Create an optimization problem object."""
 51 |         opt = OptimizationProblem()
 52 |         opt.lb = self.bounds[:, 0]  # In warped space
 53 |         opt.ub = self.bounds[:, 1]  # In warped space
 54 |         opt.dim = len(self.bounds)
 55 |         opt.cont_var = np.arange(len(self.bounds))
 56 |         opt.int_var = []
 57 |         assert len(opt.cont_var) + len(opt.int_var) == opt.dim
 58 |         opt.objfun = None
 59 |         self.opt = opt
 60 | 
 61 |     def start(self, max_evals):
 62 |         """Starts a new pySOT run."""
 63 |         self.history = []
 64 |         self.proposals = []
 65 | 
 66 |         # Symmetric Latin hypercube design
 67 |         des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])
 68 |         slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)
 69 | 
 70 |         # Warped RBF interpolant
 71 |         rbf = RBFInterpolant(
 72 |             dim=self.opt.dim,
 73 |             lb=self.opt.lb,
 74 |             ub=self.opt.ub,
 75 |             kernel=CubicKernel(),
 76 |             tail=LinearTail(self.opt.dim),
 77 |             eta=1e-4,
 78 |         )
 79 | 
 80 |         # Optimization strategy
 81 |         self.strategy = SRBFStrategy(
 82 |             max_evals=self.max_evals,
 83 |             opt_prob=self.opt,
 84 |             exp_design=slhd,
 85 |             surrogate=rbf,
 86 |             asynchronous=True,
 87 |             batch_size=1,
 88 |             use_restarts=True,
 89 |         )
 90 | 
 91 |     def suggest(self, n_suggestions=1):
 92 |         """Get a suggestion from the optimizer.
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         n_suggestions : int
 97 |             Desired number of parallel suggestions in the output
 98 | 
 99 |         Returns
100 |         -------
101 |         next_guess : list of dict
102 |             List of `n_suggestions` suggestions to evaluate the objective
103 |             function. Each suggestion is a dictionary where each key
104 |             corresponds to a parameter being optimized.
105 |         """
106 | 
107 |         if self.batch_size is None:  # First call to suggest
108 |             self.batch_size = n_suggestions
109 |             self.start(self.max_evals)
110 | 
111 |         # Set the tolerances pretending like we are running batch
112 |         d, p = float(self.opt.dim), float(n_suggestions)
113 |         self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))
114 | 
115 |         # Now we can make suggestions
116 |         x_w = []
117 |         self.proposals = []
118 |         for _ in range(n_suggestions):
119 |             proposal = self.strategy.propose_action()
120 |             record = EvalRecord(proposal.args, status="pending")
121 |             proposal.record = record
122 |             proposal.accept()  # This triggers all the callbacks
123 | 
124 |             # It is possible that pySOT proposes a previously evaluated point
125 |             # when all variables are integers, so we just abort in this case
126 |             # since we have likely converged anyway. See PySOT issue #30.
127 |             x = list(proposal.record.params)  # From tuple to list
128 |             x_unwarped, = self.space_x.unwarp(x)
129 |             if x_unwarped in self.history:
130 |                 warnings.warn("pySOT proposed the same point twice")
131 |                 self.start(self.max_evals)
132 |                 return self.suggest(n_suggestions=n_suggestions)
133 | 
134 |             # NOTE: Append unwarped to avoid rounding issues
135 |             self.history.append(copy(x_unwarped))
136 |             self.proposals.append(proposal)
137 |             x_w.append(copy(x_unwarped))
138 | 
139 |         return x_w
140 | 
141 |     def _observe(self, x, y):
142 |         # Find the matching proposal and execute its callbacks
143 |         idx = [x == xx for xx in self.history]
144 |         i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties
145 |         proposal = self.proposals[i]
146 |         proposal.record.complete(y)
147 |         self.proposals.pop(i)
148 |         self.history.pop(i)
149 | 
150 |     def observe(self, X, y):
151 |         """Send an observation of a suggestion back to the optimizer.
152 | 
153 |         Parameters
154 |         ----------
155 |         X : list of dict-like
156 |             Places where the objective function has already been evaluated.
157 |             Each suggestion is a dictionary where each key corresponds to a
158 |             parameter being optimized.
159 |         y : array-like, shape (n,)
160 |             Corresponding values where objective has been evaluated
161 |         """
162 |         assert len(X) == len(y)
163 | 
164 |         for x_, y_ in zip(X, y):
165 |             # Just ignore, any inf observations we got, unclear if right thing
166 |             if np.isfinite(y_):
167 |                 self._observe(x_, y_)
168 | 
169 | 
170 | opt_wrapper = PySOTOptimizer
171 | 


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/random_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import bayesmark.random_search as rs
15 | from bayesmark import np_util
16 | from bayesmark.abstract_optimizer import AbstractOptimizer
17 | 
18 | 
19 | class RandomOptimizer(AbstractOptimizer):
20 |     # Unclear what is best package to list for primary_import here.
21 |     primary_import = "bayesmark"
22 | 
23 |     def __init__(self, api_config, random=np_util.random):
24 |         """Build wrapper class to use random search function in benchmark.
25 | 
26 |         Settings for `suggest_dict` can be passed using kwargs.
27 | 
28 |         Parameters
29 |         ----------
30 |         api_config : dict-like of dict-like
31 |             Configuration of the optimization variables. See API description.
32 |         """
33 |         AbstractOptimizer.__init__(self, api_config)
34 |         self.random = random
35 | 
36 |     def suggest(self, n_suggestions=1):
37 |         """Get suggestion.
38 | 
39 |         Parameters
40 |         ----------
41 |         n_suggestions : int
42 |             Desired number of parallel suggestions in the output
43 | 
44 |         Returns
45 |         -------
46 |         next_guess : list of dict
47 |             List of `n_suggestions` suggestions to evaluate the objective
48 |             function. Each suggestion is a dictionary where each key
49 |             corresponds to a parameter being optimized.
50 |         """
51 |         x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
52 |         return x_guess
53 | 
54 |     def observe(self, X, y):
55 |         """Feed an observation back.
56 | 
57 |         Parameters
58 |         ----------
59 |         X : list of dict-like
60 |             Places where the objective function has already been evaluated.
61 |             Each suggestion is a dictionary where each key corresponds to a
62 |             parameter being optimized.
63 |         y : array-like, shape (n,)
64 |             Corresponding values where objective has been evaluated
65 |         """
66 |         # Random search so don't do anything
67 |         pass
68 | 
69 | 
70 | # All optimizer wrappers need to assign their wrapper to the name opt_wrapper because experiment always tries to import
71 | # opt_wrapper regardless of the optimizer it is importing.
72 | opt_wrapper = RandomOptimizer
73 | 


--------------------------------------------------------------------------------
/bayesmark/builtin_opt/scikit_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import numpy as np
 15 | from scipy.interpolate import interp1d
 16 | from skopt import Optimizer as SkOpt
 17 | from skopt.space import Categorical, Integer, Real
 18 | 
 19 | from bayesmark.abstract_optimizer import AbstractOptimizer
 20 | 
 21 | 
 22 | class ScikitOptimizer(AbstractOptimizer):
 23 |     primary_import = "scikit-optimize"
 24 | 
 25 |     def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5, **kwargs):
 26 |         """Build wrapper class to use an optimizer in benchmark.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         api_config : dict-like of dict-like
 31 |             Configuration of the optimization variables. See API description.
 32 |         base_estimator : {'GP', 'RF', 'ET', 'GBRT'}
 33 |             How to estimate the objective function.
 34 |         acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}
 35 |             Acquisition objective to decide next suggestion.
 36 |         n_initial_points : int
 37 |             Number of points to sample randomly before actual Bayes opt.
 38 |         """
 39 |         AbstractOptimizer.__init__(self, api_config)
 40 | 
 41 |         dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)
 42 | 
 43 |         # Older versions of skopt don't copy over the dimensions names during
 44 |         # normalization and hence the names are missing in
 45 |         # self.skopt.space.dimensions. Therefore, we save our own copy of
 46 |         # dimensions list to be safe. If we can commit to using the newer
 47 |         # versions of skopt we can delete self.dimensions.
 48 |         self.dimensions_list = tuple(dd.name for dd in dimensions)
 49 | 
 50 |         # Undecided where we want to pass the kwargs, so for now just make sure
 51 |         # they are blank
 52 |         assert len(kwargs) == 0
 53 | 
 54 |         self.skopt = SkOpt(
 55 |             dimensions,
 56 |             n_initial_points=n_initial_points,
 57 |             base_estimator=base_estimator,
 58 |             acq_func=acq_func,
 59 |             acq_optimizer="auto",
 60 |             acq_func_kwargs={},
 61 |             acq_optimizer_kwargs={},
 62 |         )
 63 | 
 64 |     @staticmethod
 65 |     def get_sk_dimensions(api_config, transform="normalize"):
 66 |         """Help routine to setup skopt search space in constructor.
 67 | 
 68 |         Take api_config as argument so this can be static.
 69 |         """
 70 |         # The ordering of iteration prob makes no difference, but just to be
 71 |         # safe and consistnent with space.py, I will make sorted.
 72 |         param_list = sorted(api_config.keys())
 73 | 
 74 |         sk_dims = []
 75 |         round_to_values = {}
 76 |         for param_name in param_list:
 77 |             param_config = api_config[param_name]
 78 | 
 79 |             param_type = param_config["type"]
 80 | 
 81 |             param_space = param_config.get("space", None)
 82 |             param_range = param_config.get("range", None)
 83 |             param_values = param_config.get("values", None)
 84 | 
 85 |             # Some setup for case that whitelist of values is provided:
 86 |             values_only_type = param_type in ("cat", "ordinal")
 87 |             if (param_values is not None) and (not values_only_type):
 88 |                 assert param_range is None
 89 |                 param_values = np.unique(param_values)
 90 |                 param_range = (param_values[0], param_values[-1])
 91 |                 round_to_values[param_name] = interp1d(
 92 |                     param_values, param_values, kind="nearest", fill_value="extrapolate"
 93 |                 )
 94 | 
 95 |             if param_type == "int":
 96 |                 # Integer space in sklearn does not support any warping => Need
 97 |                 # to leave the warping as linear in skopt.
 98 |                 sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))
 99 |             elif param_type == "bool":
100 |                 assert param_range is None
101 |                 assert param_values is None
102 |                 sk_dims.append(Integer(0, 1, transform=transform, name=param_name))
103 |             elif param_type in ("cat", "ordinal"):
104 |                 assert param_range is None
105 |                 # Leave x-form to one-hot as per skopt default
106 |                 sk_dims.append(Categorical(param_values, name=param_name))
107 |             elif param_type == "real":
108 |                 # Skopt doesn't support all our warpings, so need to pick
109 |                 # closest substitute it does support.
110 |                 prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
111 |                 sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))
112 |             else:
113 |                 assert False, "type %s not handled in API" % param_type
114 |         return sk_dims, round_to_values
115 | 
116 |     def suggest(self, n_suggestions=1):
117 |         """Get a suggestion from the optimizer.
118 | 
119 |         Parameters
120 |         ----------
121 |         n_suggestions : int
122 |             Desired number of parallel suggestions in the output
123 | 
124 |         Returns
125 |         -------
126 |         next_guess : list of dict
127 |             List of `n_suggestions` suggestions to evaluate the objective
128 |             function. Each suggestion is a dictionary where each key
129 |             corresponds to a parameter being optimized.
130 |         """
131 |         # First get list of lists from skopt.ask()
132 |         next_guess = self.skopt.ask(n_points=n_suggestions)
133 |         # Then convert to list of dicts
134 |         next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]
135 | 
136 |         # Now do the rounding, custom rounding is not supported in skopt. Note
137 |         # that there is not nec a round function for each dimension here.
138 |         for param_name, round_f in self.round_to_values.items():
139 |             for xx in next_guess:
140 |                 xx[param_name] = round_f(xx[param_name])
141 |         return next_guess
142 | 
143 |     def observe(self, X, y):
144 |         """Send an observation of a suggestion back to the optimizer.
145 | 
146 |         Parameters
147 |         ----------
148 |         X : list of dict-like
149 |             Places where the objective function has already been evaluated.
150 |             Each suggestion is a dictionary where each key corresponds to a
151 |             parameter being optimized.
152 |         y : array-like, shape (n,)
153 |             Corresponding values where objective has been evaluated
154 |         """
155 |         # Supposedly skopt can handle blocks, but not sure about interface for
156 |         # that. Just do loop to be safe for now.
157 |         for xx, yy in zip(X, y):
158 |             # skopt needs lists instead of dicts
159 |             xx = [xx[dim_name] for dim_name in self.dimensions_list]
160 |             # Just ignore, any inf observations we got, unclear if right thing
161 |             if np.isfinite(yy):
162 |                 self.skopt.tell(xx, yy)
163 | 
164 | 
165 | opt_wrapper = ScikitOptimizer
166 | 


--------------------------------------------------------------------------------
/bayesmark/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """General constants that are used in multiple files in the code base.
15 | """
16 | 
17 | # Special constant for random search since it gets used as our reference point in the baselines
18 | RANDOM_SEARCH = "RandomSearch"
19 | OPTIMIZERS_FILE = "config.json"
20 | ARG_DELIM = "_"  # Delimeter used when concat cmd argument for any reason
21 | PY_INTERPRETER = "python"  # What command to call for sub process, we could specify version number here also.
22 | 
23 | # Variables to save in SAL
24 | EVAL = "eval"
25 | TIME = "time"
26 | SUGGEST_LOG = "suggest_log"
27 | EXP_VARS = (EVAL, TIME, SUGGEST_LOG)
28 | 
29 | # Derived variables to save in SAL
30 | TIME_RESULTS = "time"
31 | EVAL_RESULTS = "eval"
32 | BASELINE = "baseline"
33 | PERF_RESULTS = "perf"
34 | MEAN_SCORE = "summary"
35 | 
36 | # Coordinate dim names needed in saved xr Datasets
37 | ITER = "iter"
38 | TEST_CASE = "function"
39 | METHOD = "optimizer"
40 | TRIAL = "study_id"
41 | SUGGEST = "suggestion"
42 | OBJECTIVE = "objective"
43 | 
44 | # Dataset variables for eval results
45 | VISIBLE_TO_OPT = "_visible_to_opt"
46 | 
47 | # Dataset variables for time results
48 | SUGGEST_PHASE = "suggest"
49 | OBS_PHASE = "observe"
50 | EVAL_PHASE = "eval"
51 | EVAL_PHASE_SUM = "eval_sum"
52 | EVAL_PHASE_MAX = "eval_max"
53 | 
54 | # Dataset variables for aggregate results
55 | PERF_MED = "median"
56 | LB_MED = "median LB"
57 | UB_MED = "median UB"
58 | NORMED_MED = "median normed"
59 | PERF_MEAN = "mean"
60 | LB_MEAN = "mean LB"
61 | UB_MEAN = "mean UB"
62 | NORMED_MEAN = "mean normed"
63 | LB_NORMED_MEAN = "mean normed LB"
64 | UB_NORMED_MEAN = "mean normed UB"
65 | PERF_BEST = "best"
66 | PERF_CLIP = "clip"
67 | 
68 | # Choices used for test problems, there is some redundant specification with sklearn funcs file here
69 | MODEL_NAMES = ("DT", "MLP-adam", "MLP-sgd", "RF", "SVM", "ada", "kNN", "lasso", "linear")
70 | DATA_LOADER_NAMES = ("breast", "digits", "iris", "wine", "boston", "diabetes")
71 | 
72 | SCORERS_CLF = ("nll", "acc")
73 | SCORERS_REG = ("mae", "mse")
74 | METRICS = tuple(sorted(SCORERS_CLF + SCORERS_REG))
75 | 


--------------------------------------------------------------------------------
/bayesmark/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Module to deal with all matters relating to loading example data sets, which we tune ML models to.
 15 | """
 16 | from enum import IntEnum, auto
 17 | 
 18 | import numpy as np
 19 | import pandas as pd  # only needed for csv reader, maybe try something else
 20 | from sklearn import datasets
 21 | 
 22 | from bayesmark.constants import DATA_LOADER_NAMES, SCORERS_CLF, SCORERS_REG
 23 | from bayesmark.path_util import join_safe_r
 24 | from bayesmark.stats import robust_standardize
 25 | 
 26 | 
 27 | class ProblemType(IntEnum):
 28 |     """The different problem types we consider. Currently, just regression (`reg`) and classification (`clf`).
 29 |     """
 30 | 
 31 |     clf = auto()
 32 |     reg = auto()
 33 | 
 34 | 
 35 | DATA_LOADERS = {
 36 |     "digits": (datasets.load_digits, ProblemType.clf),
 37 |     "iris": (datasets.load_iris, ProblemType.clf),
 38 |     "wine": (datasets.load_wine, ProblemType.clf),
 39 |     "breast": (datasets.load_breast_cancer, ProblemType.clf),
 40 |     "boston": (datasets.load_boston, ProblemType.reg),
 41 |     "diabetes": (datasets.load_diabetes, ProblemType.reg),
 42 | }
 43 | 
 44 | assert sorted(DATA_LOADERS.keys()) == sorted(DATA_LOADER_NAMES)
 45 | 
 46 | # Arguably, this could go in constants, but doesn't cause extra imports being here.
 47 | METRICS_LOOKUP = {ProblemType.clf: SCORERS_CLF, ProblemType.reg: SCORERS_REG}
 48 | 
 49 | 
 50 | def get_problem_type(dataset_name):
 51 |     """Determine if this dataset is a regression of classification problem.
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     dataset : str
 56 |         Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.
 57 | 
 58 |     Returns
 59 |     -------
 60 |     problem_type : ProblemType
 61 |         `Enum` to indicate if regression of classification data set.
 62 |     """
 63 |     if dataset_name in DATA_LOADERS:
 64 |         _, problem_type = DATA_LOADERS[dataset_name]
 65 |         return problem_type
 66 | 
 67 |     # Maybe we can come up with a better system, but for now let's use a convention based on the naming of the csv file.
 68 |     if dataset_name.startswith("reg-"):
 69 |         return ProblemType.reg
 70 |     if dataset_name.startswith("clf-"):
 71 |         return ProblemType.clf
 72 |     assert False, "Can't determine problem type from dataset name."
 73 | 
 74 | 
 75 | def _csv_loader(dataset_name, return_X_y, data_root, clip_x=100):  # pragma: io
 76 |     """Load custom csv files for use in the benchmark.
 77 | 
 78 |     This function assumes ``dataset_name + ".csv"`` is a csv file found in the `data_root` path.  It also assumes the
 79 |     last column of the csv file is the target and the other columns are features.
 80 | 
 81 |     The target column should be `int` for classification and `float` for regression. Column names ending in ``"_cat"``
 82 |     are assumed to be categorical and will be one-hot encoded.
 83 | 
 84 |     The features (and target for regression) are robust standardized. The features are also clipped to be in
 85 |     ``[-clip_x, clip_x]`` *after* standardization.
 86 |     """
 87 |     assert return_X_y, "Only returning (X,y) tuple supported right now."
 88 |     assert clip_x >= 0
 89 | 
 90 |     # Quantile range for robust standardization. The 86% range is the most efficient for Gaussians. See:
 91 |     # https://github.com/scikit-learn/scikit-learn/issues/10139#issuecomment-344705040
 92 |     q_level = 0.86
 93 | 
 94 |     path = join_safe_r(data_root, dataset_name + ".csv")
 95 | 
 96 |     # For now, use convention that can get problem type based on data set name
 97 |     problem_type = get_problem_type(dataset_name)
 98 | 
 99 |     # Assuming no missing data in source csv files at the moment, these will
100 |     # result in error.
101 |     df = pd.read_csv(
102 |         path, header=0, index_col=False, engine="c", na_filter=False, true_values=["true"], false_values=["false"]
103 |     )
104 | 
105 |     label = df.columns[-1]  # Assume last col is target
106 | 
107 |     target = df.pop(label).values
108 |     if problem_type == ProblemType.clf:
109 |         assert target.dtype in (np.bool_, np.int_)
110 |         target = target.astype(np.int_)  # convert to int for skl
111 |     if problem_type == ProblemType.reg:
112 |         assert target.dtype == np.float_
113 |         # 86% range is the most efficient (at least for Gaussians)
114 |         target = robust_standardize(target, q_level=q_level)
115 | 
116 |     # Fill in an categorical variables (object dtype of cols names ..._cat)
117 |     cat_cols = sorted(cc for cc in df.columns if cc.endswith("_cat") or df[cc].dtype.kind == "O")
118 |     df = pd.get_dummies(df, columns=cat_cols, drop_first=True, dtype=np.float_)
119 |     # Could also sort all columns to be sure it will be reprod
120 | 
121 |     # Everything should now be in float
122 |     assert (df.dtypes == np.float_).all()
123 | 
124 |     data = df.values
125 |     data = robust_standardize(data, q_level=q_level)
126 |     # Debatable if we should include this, but there are a lot of outliers
127 |     data = np.clip(data, -clip_x, clip_x)
128 | 
129 |     # We should probably do some logging or something to wrap up
130 |     return data, target, problem_type
131 | 
132 | 
133 | def load_data(dataset_name, data_root=None):  # pragma: io
134 |     """Load a data set and return it in, pre-processed into numpy arrays.
135 | 
136 |     Parameters
137 |     ----------
138 |     dataset : str
139 |         Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.
140 |     data_root : str
141 |         Root directory to look for all custom csv files. May be ``None`` for sklearn data sets.
142 | 
143 |     Returns
144 |     -------
145 |     data : :class:`numpy:numpy.ndarray` of shape (n, d)
146 |         The feature matrix of the data set. It will be `float` array.
147 |     target : :class:`numpy:numpy.ndarray` of shape (n,)
148 |         The target vector for the problem, which is `int` for classification and `float` for regression.
149 |     problem_type : :class:`bayesmark.data.ProblemType`
150 |         `Enum` to indicate if regression of classification data set.
151 |     """
152 |     if dataset_name in DATA_LOADERS:
153 |         loader_f, problem_type = DATA_LOADERS[dataset_name]
154 |         data, target = loader_f(return_X_y=True)
155 |     else:  # try to load as custom csv
156 |         assert data_root is not None, "data root cannot be None when custom csv requested."
157 |         data, target, problem_type = _csv_loader(dataset_name, return_X_y=True, data_root=data_root)
158 |     return data, target, problem_type
159 | 


--------------------------------------------------------------------------------
/bayesmark/expected_max.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Compute expected maximum or minimum from iid samples.
 15 | """
 16 | import numpy as np
 17 | from scipy.special import gammaln, logsumexp
 18 | 
 19 | 
 20 | def get_expected_max_weights(n, m):
 21 |     """Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     n : int
 26 |         Number of data points in data set ``len(x)``. Must be ``>= 1``.
 27 |     m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
 28 |         This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
 29 |         broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
 30 |         case.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     pdf : :class:`numpy:numpy.ndarray`, shape (n,)
 35 |         The weights for L-estimator. Will be positive and sum to one.
 36 |     """
 37 |     assert np.ndim(n) == 0
 38 |     assert n >= 1  # otherwise makes no sense
 39 | 
 40 |     m = np.asarray(m)  # Must be np type for broadcasting
 41 |     # We could also check dtype is int, but not bothering here
 42 |     assert np.all(m >= 1)  # otherwise makes no sense
 43 |     m = m[..., None]
 44 | 
 45 |     kk = 1 + np.arange(n)
 46 |     lpdf = gammaln(kk) - gammaln(kk - (m - 1))
 47 |     pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True))
 48 |     # expect nan for m > n
 49 |     assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0))
 50 |     return pdf
 51 | 
 52 | 
 53 | def expected_max(x, m):
 54 |     """Compute unbiased estimator of expected ``max(x[1:m])`` on a data set.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     x : :class:`numpy:numpy.ndarray` of shape (n,)
 59 |         Data set we would like expected ``max(x[1:m])`` on.
 60 |     m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
 61 |         This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
 62 |         broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
 63 |         case.
 64 | 
 65 |     Returns
 66 |     -------
 67 |     E_max_x : float
 68 |         Unbiased estimate of mean max of `m` draws from distribution on `x`.
 69 |     """
 70 |     assert np.ndim(x) == 1
 71 |     # m is validated by get_expected_max_weights
 72 | 
 73 |     # Get order stats for L-estimator
 74 |     x = np.array(x, copy=True)  # we will modify in place
 75 |     x.sort()  # in place!!
 76 | 
 77 |     # Now get estimator weights
 78 |     n, = x.shape
 79 |     if n == 0:
 80 |         return np.full(np.shape(m), np.nan)
 81 |     pdf = get_expected_max_weights(n, m)
 82 | 
 83 |     # Compute L-estimator
 84 |     E_max_x = np.sum(x * pdf, axis=-1)
 85 |     return E_max_x
 86 | 
 87 | 
 88 | def expected_min(x, m):
 89 |     """Compute unbiased estimator of expected ``min(x[1:m])`` on a data set.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     x : :class:`numpy:numpy.ndarray` of shape (n,)
 94 |         Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``.
 95 |     m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
 96 |         This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be
 97 |         broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
 98 |         case.
 99 | 
100 |     Returns
101 |     -------
102 |     E_min_x : float
103 |         Unbiased estimate of mean min of `m` draws from distribution on `x`.
104 |     """
105 |     x = np.asarray(x)
106 |     E_min_x = -expected_max(-x, m)
107 |     return E_min_x
108 | 


--------------------------------------------------------------------------------
/bayesmark/experiment_db_init.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tool to create new datebase for results. This is just wrapper on serializer init call.
15 | """
16 | import logging
17 | 
18 | import bayesmark.cmd_parse as cmd
19 | from bayesmark.cmd_parse import CmdArgs
20 | from bayesmark.constants import EXP_VARS
21 | from bayesmark.serialize import XRSerializer
22 | 
23 | EXIST_OK = True
24 | 
25 | logger = logging.getLogger(__name__)
26 | 
27 | 
28 | def main():
29 |     """See README for instructions on calling db_init.
30 |     """
31 |     description = "Initialize the directories for running the experiments"
32 |     args = cmd.parse_args(cmd.general_parser(description))
33 | 
34 |     assert not args[CmdArgs.dry_run], "Dry run doesn't make any sense when building dirs"
35 | 
36 |     logger.setLevel(logging.INFO)  # Note this is the module-wide logger
37 |     if args[CmdArgs.verbose]:
38 |         logger.addHandler(logging.StreamHandler())
39 | 
40 |     XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=EXIST_OK)
41 | 
42 |     logger.info("done")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()  # pragma: main
47 | 


--------------------------------------------------------------------------------
/bayesmark/path_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Utilities handy for manipulating paths that have extra checks not included in `os.path`.
 15 | """
 16 | import os.path
 17 | import warnings
 18 | 
 19 | 
 20 | def abspath(path, verify=True):  # pragma: io
 21 |     """Combo of :func:`os.path.abspath` and :func:`os.path.expanduser` that will also check existence of directory.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     path : str
 26 |         Relative path string that can also contain home directories, e.g., ``"~/git/"``.
 27 |     verify : bool
 28 |         If true, verifies that the directory exists. Raises an assertion failure if it does not exist.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     path : str
 33 |         Absolute version of input path.
 34 |     """
 35 |     path = os.path.abspath(os.path.expanduser(path))
 36 |     if verify:
 37 |         assert os.path.isdir(path), "directory does not exist: %s" % path
 38 |     return path
 39 | 
 40 | 
 41 | def absopen(path, mode):  # pragma: io
 42 |     """Safe version of the built in :func:`open` that only opens absolute paths.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     path : str
 47 |         Absolute path. An assertion failure is raised if it is not absolute.
 48 |     mode : str
 49 |         Open mode, any mode understood by the built in :func:`open`, e.g., ``"r"`` or ``"w"``.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     f : file handle
 54 |         File handle open to use.
 55 |     """
 56 |     assert os.path.isabs(path), "Only allowing opening of absolute paths for safety."
 57 |     f = open(path, mode)
 58 |     return f
 59 | 
 60 | 
 61 | def _join_safe(*args):  # pragma: io
 62 |     """Helper routine with commonalities between `join_safe_r` and `join_safe_w`.
 63 |     """
 64 |     assert len(args) >= 2
 65 |     path, fname = args[:-1], args[-1]
 66 | 
 67 |     path = os.path.join(*path)  # Put together the dir
 68 |     path = abspath(path, verify=True)  # Make sure dir is abs, and exists
 69 | 
 70 |     assert os.path.basename(fname) == fname, "Expected basename got %s" % fname
 71 |     fname = os.path.join(path, fname)  # Put on the filename, must be abs
 72 |     # Could check abs again if really wanted to be safe
 73 |     return fname
 74 | 
 75 | 
 76 | def join_safe_r(*args):  # pragma: io
 77 |     """Safe version of :func:`os.path.join` that checks resulting path is absolute and the file exists for reading.
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     *args : str
 82 |         varargs for parts of path to combine. The last argument must be a file name.
 83 | 
 84 |     Returns
 85 |     -------
 86 |     fname : str
 87 |         Absolute path to filename.
 88 |     """
 89 |     fname = _join_safe(*args)
 90 |     assert os.path.isfile(fname)  # Check it exists
 91 |     return fname
 92 | 
 93 | 
 94 | def join_safe_w(*args):  # pragma: io
 95 |     """Safe version of :func:`os.path.join` that checks resulting path is absolute.
 96 | 
 97 |     Because this routine is for writing, if the file already exists, a warning is raised.
 98 | 
 99 |     Parameters
100 |     ----------
101 |     *args : str
102 |         varargs for parts of path to combine. The last argument must be a file name.
103 | 
104 |     Returns
105 |     -------
106 |     fname : str
107 |         Absolute path to filename.
108 |     """
109 |     fname = _join_safe(*args)
110 |     # Give a warning if it exists
111 |     if os.path.isfile(fname):
112 |         warnings.warn("file already exists: %s" % fname, RuntimeWarning)
113 |     return fname
114 | 


--------------------------------------------------------------------------------
/bayesmark/random_search.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A baseline random search in our standardized optimizer interface. Useful for baselines.
15 | """
16 | import numpy as np
17 | 
18 | from bayesmark import np_util
19 | from bayesmark.space import JointSpace
20 | 
21 | 
22 | def suggest_dict(X, y, meta, n_suggestions=1, random=np_util.random):
23 |     """Stateless function to create suggestions for next query point in random search optimization.
24 | 
25 |     This implements the API for general structures of different data types.
26 | 
27 |     Parameters
28 |     ----------
29 |     X : list(dict)
30 |         Places where the objective function has already been evaluated. Not actually used in random search.
31 |     y : :class:`numpy:numpy.ndarray`, shape (n,)
32 |         Corresponding values where objective has been evaluated. Not actually used in random search.
33 |     meta : dict(str, dict)
34 |         Configuration of the optimization variables. See API description.
35 |     n_suggestions : int
36 |         Desired number of parallel suggestions in the output
37 |     random : :class:`numpy:numpy.random.RandomState`
38 |         Optionally pass in random stream for reproducibility.
39 | 
40 |     Returns
41 |     -------
42 |     next_guess : list(dict)
43 |         List of `n_suggestions` suggestions to evaluate the objective function.
44 |         Each suggestion is a dictionary where each key corresponds to a parameter being optimized.
45 |     """
46 |     # Warp and get bounds
47 |     space_x = JointSpace(meta)
48 |     X_warped = space_x.warp(X)
49 |     bounds = space_x.get_bounds()
50 |     _, n_params = _check_x_y(X_warped, y, allow_impute=True)
51 |     lb, ub = _check_bounds(bounds, n_params)
52 | 
53 |     # Get the suggestion
54 |     suggest_x = random.uniform(lb, ub, size=(n_suggestions, n_params))
55 | 
56 |     # Unwarp
57 |     next_guess = space_x.unwarp(suggest_x)
58 |     return next_guess
59 | 
60 | 
61 | def _check_x_y(X, y, allow_impute=False):  # pragma: validator
62 |     """Input validation for `suggest` routine."""
63 |     if not (np.ndim(X) == 2):
64 |         raise ValueError("X must be 2-dimensional got %s." % str(np.shape(X)))
65 |     n_obs, n_params = np.shape(X)
66 | 
67 |     assert n_params >= 1, "We do not support suggest on empty space."
68 | 
69 |     if not (np.shape(y) == (n_obs,)):
70 |         raise ValueError("y must be %s not %s." % (str((n_obs,)), str(np.shape(y))))
71 | 
72 |     if not np.all(np.isfinite(X)):
73 |         raise ValueError("X must be finite.")
74 | 
75 |     n_real_obs = n_obs
76 |     if allow_impute:
77 |         if not np.all(np.isfinite(y) | np.isnan(y)):
78 |             raise ValueError("y can't contain infs even with data imputation.")
79 |         n_real_obs = np.sum(np.isfinite(y))
80 |     else:
81 |         if not np.all(np.isfinite(y)):
82 |             raise ValueError("y must be finite when data imputation not used.")
83 | 
84 |     return n_real_obs, n_params
85 | 
86 | 
87 | def _check_bounds(bounds, n_params):  # pragma: validator
88 |     """Input validation for `suggest` routine."""
89 |     if not (np.shape(bounds) == (n_params, 2)):
90 |         raise ValueError("bounds must have shape %s not %s." % (str((n_params, 2)), str(np.shape(bounds))))
91 | 
92 |     lb, ub = np.asarray(bounds).T
93 |     if not (np.all(np.isfinite(lb)) and np.all(np.isfinite(ub))):
94 |         raise ValueError("bounds must be finite.")
95 |     if not (np.all(lb <= ub)):
96 |         raise ValueError("lower bound must be less than upper bound.")
97 |     return lb, ub
98 | 


--------------------------------------------------------------------------------
/bayesmark/signatures.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Routines to compute and compare the "signatures" of objective functions. These are useful to make sure two different
 15 | studies were actually optimizing the same objective function (even if they say the same test case in the meta-data).
 16 | """
 17 | import warnings
 18 | 
 19 | import numpy as np
 20 | import pandas as pd
 21 | 
 22 | import bayesmark.random_search as rs
 23 | 
 24 | # How many points to probe the function to get the signature
 25 | N_SUGGESTIONS = 5
 26 | 
 27 | 
 28 | def get_func_signature(f, api_config):
 29 |     """Get the function signature for an objective function in an experiment.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     f : typing.Callable
 34 |         The objective function we want to compute the signature of. This function must take inputs in the form of
 35 |         ``dict(str, object)`` with one dictionary key per variable, and provide `float` as the output.
 36 |     api_config : dict(str, dict)
 37 |         Configuration of the optimization variables. See API description.
 38 | 
 39 |     Returns
 40 |     -------
 41 |     signature_x : list(dict(str, object)) of shape (n_suggest,)
 42 |         The input locations probed on signature call.
 43 |     signature_y : list(float) of shape (n_suggest,)
 44 |         The objective function values at the inputs points. This is the real signature.
 45 |     """
 46 |     # Make sure get same sequence on every call to be a signature
 47 |     random = np.random.RandomState(0)
 48 | 
 49 |     signature_x = rs.suggest_dict([], [], api_config, n_suggestions=N_SUGGESTIONS, random=random)
 50 | 
 51 |     # For now, we only take the first output as the signature. We can generalize this later.
 52 |     signature_y = [f(xx)[0] for xx in signature_x]
 53 |     assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function"
 54 |     return signature_x, signature_y
 55 | 
 56 | 
 57 | def analyze_signatures(signatures):
 58 |     """Analyze function signatures from the experiment.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     signatures : dict(str, list(list(float)))
 63 |         The signatures should all be the same length, so it should be 2D array
 64 |         like.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     sig_errs : :class:`pandas:pandas.DataFrame`
 69 |         rows are test cases, columns are test points.
 70 |     signatures_median : dict(str, list(float))
 71 |         Median signature across all repetition per test case.
 72 |     """
 73 |     sig_errs = {}
 74 |     signatures_median = {}
 75 |     for test_case, signature_y in signatures.items():
 76 |         assert len(signature_y) > 0, "signature with no cases found"
 77 |         assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function"
 78 | 
 79 |         minval = np.min(signature_y, axis=0)
 80 |         maxval = np.max(signature_y, axis=0)
 81 | 
 82 |         if not np.allclose(minval, maxval):
 83 |             # Arguably, the util should not raise the warning, and these should
 84 |             # be raised on the outside, but let's do this for simplicity.
 85 |             warnings.warn(
 86 |                 "Signature diverged on %s betwen %s and %s" % (test_case, str(minval), str(maxval)), RuntimeWarning
 87 |             )
 88 |         sig_errs[test_case] = maxval - minval
 89 |         # ensure serializable using tolist
 90 |         signatures_median[test_case] = np.median(signature_y, axis=0).tolist()
 91 | 
 92 |     # Convert to pandas so easy to append margins with max, better for disp.
 93 |     # If we let the user convert to pandas then we don't need dep on pandas.
 94 |     sig_errs = pd.DataFrame(sig_errs).T
 95 |     sig_errs.loc["max", :] = sig_errs.max(axis=0)
 96 |     sig_errs.loc[:, "max"] = sig_errs.max(axis=1)
 97 | 
 98 |     return sig_errs, signatures_median
 99 | 
100 | 
101 | def analyze_signature_pair(signatures, signatures_ref):
102 |     """Analyze a pair of signatures (often from two sets of experiments) and return the error between them.
103 | 
104 |     Parameters
105 |     ----------
106 |     signatures : dict(str, list(float))
107 |         Signatures from set of experiments. The signatures must all be the same length, so it should be 2D array like.
108 |     signatures_ref : dict(str, list(float))
109 |         The signatures from a reference set of experiments. The keys in `signatures` must be a subset of the signatures
110 |         in `signatures_ref`.
111 | 
112 |     Returns
113 |     -------
114 |     sig_errs : :class:`pandas:pandas.DataFrame`
115 |         rows are test cases, columns are test points.
116 |     signatures_median : dict(str, list(float))
117 |         Median signature across all repetition per test case.
118 |     """
119 |     signatures_pair = {kk: [signatures[kk], signatures_ref[kk]] for kk in signatures}
120 |     sig_errs, signatures_pair = analyze_signatures(signatures_pair)
121 |     return sig_errs, signatures_pair
122 | 


--------------------------------------------------------------------------------
/bayesmark/stats.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """General statistic tools useful in the benchmark.
15 | """
16 | import numpy as np
17 | import scipy.stats as sst
18 | 
19 | 
20 | def robust_standardize(X, q_level=0.5):
21 |     """Perform robust standardization of data matrix `X` over axis 0.
22 | 
23 |     Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian
24 |     adjustment rescaling so that if Gaussian data is passed in the transformed
25 |     data will, in large `n`, be distributed as N(0,1). See sklearn feature
26 |     request #10139 on github.
27 | 
28 |     Parameters
29 |     ----------
30 |     X : :class:`numpy:numpy.ndarray` of shape (n, ...)
31 |         Array containing elements standardize. Require ``n >= 2``.
32 |     q_level : scalar
33 |         Must be in [0, 1]. Inter-quartile range to use for scale estimation.
34 | 
35 |     Returns
36 |     -------
37 |     X : :class:`numpy:numpy.ndarray` of shape (n, ...)
38 |         Elements of input `X` standardization.
39 |     """
40 |     X = np.asarray(X)
41 |     assert X.ndim in (1, 2)
42 |     assert np.all(np.isfinite(X))
43 |     assert 0.0 < q_level and q_level <= 1.0
44 |     assert X.shape[0] >= 2
45 | 
46 |     mu = np.median(X, axis=0)
47 | 
48 |     q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
49 |     v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0)
50 |     v = np.asarray(v)
51 |     v[v == 0.0] = 1.0
52 | 
53 |     X_ss = (X - mu) / v
54 |     # Rescale to match scale of N(0,1)
55 |     X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
56 |     assert X.shape == X_ss.shape
57 |     return X_ss
58 | 
59 | 
60 | def t_EB(x, alpha=0.05, axis=-1):
61 |     """Get t-statistic based error bars on mean of `x`.
62 | 
63 |     Parameters
64 |     ----------
65 |     x : :class:`numpy:numpy.ndarray` of shape (n_samples,)
66 |         Data points to estimate mean. Must not be empty or contain ``NaN``.
67 |     alpha : float
68 |         The alpha level (``1-confidence``) probability (in (0, 1)) to construct confidence interval from t-statistic.
69 |     axis : int
70 |         The axis on `x` where we compute the t-statistics. The function is vectorized over all other dimensions.
71 | 
72 |     Returns
73 |     -------
74 |     EB : float
75 |         Size of error bar on mean (``>= 0``). The confidence interval is ``[mean(x) - EB, mean(x) + EB]``. `EB` is
76 |         ``inf`` when ``len(x) <= 1``. Will be ``NaN`` if there are any infinite values in `x`.
77 |     """
78 |     assert np.ndim(x) >= 1 and (not np.any(np.isnan(x)))
79 |     assert np.ndim(alpha) == 0
80 |     assert 0.0 < alpha and alpha < 1.0
81 | 
82 |     N = np.shape(x)[axis]
83 |     if N <= 1:
84 |         return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf)
85 | 
86 |     confidence = 1 - alpha
87 |     # loc cancels out when we just want EB anyway
88 |     LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0)
89 |     assert not (LB > UB)
90 |     # Just multiplying scale=ss.sem(x) is better for when scale=0
91 |     EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB)
92 |     return EB
93 | 


--------------------------------------------------------------------------------
/bayesmark/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """General utilities that should arguably be included in Python.
 15 | """
 16 | import shlex
 17 | 
 18 | 
 19 | def in_or_none(x, L):
 20 |     """Check if item is in list of list is None."""
 21 |     return (L is None) or (x in L)
 22 | 
 23 | 
 24 | def all_unique(L):
 25 |     """Check if all elements in a list are unique.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     L : list
 30 |         List we would like to check for uniqueness.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     uniq : bool
 35 |         True if all elements in `L` are unique.
 36 |     """
 37 |     uniq = len(L) == len(set(L))
 38 |     return uniq
 39 | 
 40 | 
 41 | def strict_sorted(L):
 42 |     """Return a strictly sorted version of `L`. Therefore, this raises an error if `L` contains duplicates.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     L : list
 47 |         List we would like to sort.
 48 | 
 49 |     Returns
 50 |     -------
 51 |     S : list
 52 |         Strictly sorted version of `L`.
 53 |     """
 54 |     assert all_unique(L), "Cannot strict sort because list contains duplicates."
 55 |     S = sorted(L)
 56 |     return S
 57 | 
 58 | 
 59 | def range_str(stop):
 60 |     """Version of ``range(stop)`` that instead returns strings that are zero padded so the entire iteration is of the
 61 |     same length.
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |     stop : int
 66 |         Stop value equivalent to ``range(stop)``.
 67 | 
 68 |     Yields
 69 |     ------
 70 |     x : str
 71 |         String representation of integer zero padded so all items from this generator have the same ``len(x)``.
 72 |     """
 73 |     str_len = len(str(stop - 1))  # moot if stop=0
 74 | 
 75 |     def map_(x):
 76 |         ss = str(x).zfill(str_len)
 77 |         return x, ss
 78 | 
 79 |     G = map(map_, range(stop))
 80 |     return G
 81 | 
 82 | 
 83 | def str_join_safe(delim, str_vec, append=False):
 84 |     """Version of `str.join` that is guaranteed to be invertible.
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     delim : str
 89 |         Delimiter to join the strings.
 90 |     str_vec : list(str)
 91 |         List of strings to join. A `ValueError` is raised if `delim` is present in any of these strings.
 92 |     append : bool
 93 |         If true, assume the first element is already joined and we are appending to it. So, `str_vec[0]` can contain
 94 |         `delim`.
 95 | 
 96 |     Returns
 97 |     -------
 98 |     joined_str : str
 99 |         Joined version of `str_vec`, which is always recoverable with ``joined_str.split(delim)``.
100 | 
101 |     Examples
102 |     --------
103 |     Append is required because,
104 | 
105 |     .. code-block:: pycon
106 | 
107 |         ss = str_join_safe('_', ('foo', 'bar'))
108 |         str_join_safe('_', (ss, 'baz', 'qux'))
109 | 
110 |     would fail because we are appending ``'baz'`` and ``'qux'`` to the already joined string ``ss = 'foo_bar'``.
111 | 
112 |     In this case, we use
113 | 
114 |     .. code-block:: pycon
115 | 
116 |         ss = str_join_safe('_', ('foo', 'bar'))
117 |         str_join_safe('_', (ss, 'baz', 'qux'), append=True)
118 |     """
119 |     chk_vec = str_vec[1:] if append else str_vec
120 | 
121 |     for ss in chk_vec:
122 |         if delim in ss:
123 |             raise ValueError("%s cannot contain delimeter %s" % (ss, delim))
124 | 
125 |     joined_str = delim.join(str_vec)
126 |     return joined_str
127 | 
128 | 
129 | def shell_join(argv, delim=" "):
130 |     """Join strings together in a way that is an inverse of `shlex` shell parsing into `argv`.
131 | 
132 |     Basically, if the resulting string is passed as a command line argument then `sys.argv` will equal `argv`.
133 | 
134 |     Parameters
135 |     ----------
136 |     argv : list(str)
137 |         List of arguments to collect into command line string. It will be escaped accordingly.
138 |     delim : str
139 |         Whitespace delimiter to join the strings.
140 | 
141 |     Returns
142 |     -------
143 |     cmd : str
144 |         Properly escaped and joined command line string.
145 |     """
146 |     vv = [shlex.quote(vv) for vv in argv]
147 |     cmd = delim.join(vv)
148 |     assert shlex.split(cmd) == list(argv)
149 |     return cmd
150 | 
151 | 
152 | def chomp(str_val, ext="\n"):
153 |     """Chomp a suffix off a string.
154 | 
155 |     Parameters
156 |     ----------
157 |     str_val : str
158 |         String we want to chomp off a suffix, e.g., ``"foo.log"``, and we want to chomp the file extension.
159 |     ext : str
160 |         The suffix we want to chomp. An error is raised if `str_val` doesn't end in `ext`.
161 | 
162 |     Returns
163 |     -------
164 |     chomped : str
165 |         Version of `str_val` with `ext` removed from the end.
166 |     """
167 |     n = len(ext)
168 |     assert n > 0
169 | 
170 |     chomped, ext_ = str_val[:-n], str_val[-n:]
171 |     assert ext == ext_, "%s must end with %s" % (repr(str_val), repr(ext))
172 |     return chomped
173 | 
174 | 
175 | def preimage_func(f, x):
176 |     """Pre-image a funcation at a set of input points.
177 | 
178 |     Parameters
179 |     ----------
180 |     f : typing.Callable
181 |         The function we would like to pre-image. The output type must be hashable.
182 |     x : typing.Iterable
183 |         Input points we would like to evaluate `f`. `x` must be of a type acceptable by `f`.
184 | 
185 |     Returns
186 |     -------
187 |     D : dict(object, list(object))
188 |         This dictionary maps the output of `f` to the list of `x` values that produce it.
189 |     """
190 |     D = {}
191 |     for xx in x:
192 |         D.setdefault(f(xx), []).append(xx)
193 |     return D
194 | 


--------------------------------------------------------------------------------
/build_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | set -o pipefail
 5 | 
 6 | # Display what version is being used for logging
 7 | python --version
 8 | 
 9 | # Fail if untracked files so we don't delete them in next step
10 | test -z "$(git status --porcelain)"
11 | 
12 | # Build from clean repo, delete all ignored files
13 | git clean -x -f -d
14 | 
15 | # Get everything in place to put inside the wheel
16 | SHA_LONG=$(git rev-parse HEAD)
17 | echo VERSION=\"$SHA_LONG\" >bayesmark/version.py
18 | 
19 | # Now the actual build
20 | python3 setup.py sdist
21 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck all
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 35 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 36 | 	@echo "  text       to make text files"
 37 | 	@echo "  man        to make manual pages"
 38 | 	@echo "  texinfo    to make Texinfo files"
 39 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 40 | 	@echo "  gettext    to make PO message catalogs"
 41 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 42 | 	@echo "  xml        to make Docutils-native XML files"
 43 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 44 | 	@echo "  linkcheck  to check all external links for integrity"
 45 | 
 46 | clean:
 47 | 	rm -rf $(BUILDDIR)/*
 48 | 
 49 | html:
 50 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 53 | 
 54 | dirhtml:
 55 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 58 | 
 59 | singlehtml:
 60 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 63 | 
 64 | pickle:
 65 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the pickle files."
 68 | 
 69 | json:
 70 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the JSON files."
 73 | 
 74 | htmlhelp:
 75 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 76 | 	@echo
 77 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 78 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 79 | 
 80 | qthelp:
 81 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 82 | 	@echo
 83 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 84 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 85 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bayesmark.qhcp"
 86 | 	@echo "To view the help file:"
 87 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bayesmark.qhc"
 88 | 
 89 | devhelp:
 90 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 91 | 	@echo
 92 | 	@echo "Build finished."
 93 | 	@echo "To view the help file:"
 94 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/bayesmark"
 95 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bayesmark"
 96 | 	@echo "# devhelp"
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | xml:
151 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
152 | 	@echo
153 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
154 | 
155 | pseudoxml:
156 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
157 | 	@echo
158 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
159 | 
160 | all: html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck
161 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
 1 | -------
 2 | Credits
 3 | -------
 4 | 
 5 | ~~~~~~~~~~~~~~~~
 6 | Development lead
 7 | ~~~~~~~~~~~~~~~~
 8 | 
 9 | Ryan Turner (rdturnermtl)
10 | 
11 | ~~~~~~~~~~~~
12 | Contributors
13 | ~~~~~~~~~~~~
14 | 
15 | * David Eriksson (dme65)
16 | 


--------------------------------------------------------------------------------
/docs/code.rst:
--------------------------------------------------------------------------------
  1 | -------------
  2 | Code Overview
  3 | -------------
  4 | 
  5 | .. _bayesmark:
  6 | 
  7 | ~~~~
  8 | Data
  9 | ~~~~
 10 | 
 11 | .. automodule:: bayesmark.data
 12 |    :members:
 13 |    :exclude-members:
 14 | 
 15 | ~~~~~~~~~~~~~~~~~~~~~~~
 16 | Expected Max Estimation
 17 | ~~~~~~~~~~~~~~~~~~~~~~~
 18 | 
 19 | .. automodule:: bayesmark.expected_max
 20 |    :members:
 21 |    :exclude-members:
 22 | 
 23 | ~~~~~~~~~~~~~~~~~~~~~~
 24 | Experiment Aggregation
 25 | ~~~~~~~~~~~~~~~~~~~~~~
 26 | 
 27 | .. automodule:: bayesmark.experiment_aggregate
 28 |    :members:
 29 |    :exclude-members: main
 30 | 
 31 | ~~~~~~~~~~~~~~~~~~~
 32 | Experiment Analysis
 33 | ~~~~~~~~~~~~~~~~~~~
 34 | 
 35 | .. automodule:: bayesmark.experiment_analysis
 36 |    :members:
 37 |    :exclude-members: main
 38 | 
 39 | ~~~~~~~~~~~~~~~~~~~
 40 | Experiment Baseline
 41 | ~~~~~~~~~~~~~~~~~~~
 42 | 
 43 | .. automodule:: bayesmark.experiment_baseline
 44 |    :members:
 45 |    :exclude-members: main, do_baseline
 46 | 
 47 | ~~~~~~~~~~~~~~~~~~~
 48 | Experiment Launcher
 49 | ~~~~~~~~~~~~~~~~~~~
 50 | 
 51 | .. automodule:: bayesmark.experiment_launcher
 52 |    :members:
 53 |    :exclude-members: main
 54 | 
 55 | ~~~~~~~~~~
 56 | Experiment
 57 | ~~~~~~~~~~
 58 | 
 59 | .. automodule:: bayesmark.experiment
 60 |    :members:
 61 |    :exclude-members: experiment_main
 62 | 
 63 | ~~~~~~~~~~~~~~~~~~~
 64 | Function Signatures
 65 | ~~~~~~~~~~~~~~~~~~~
 66 | 
 67 | .. automodule:: bayesmark.signatures
 68 |    :members:
 69 |    :exclude-members:
 70 | 
 71 | ~~~~~~~~~~
 72 | Numpy Util
 73 | ~~~~~~~~~~
 74 | 
 75 | .. automodule:: bayesmark.np_util
 76 |    :members:
 77 |    :exclude-members:
 78 | 
 79 | ~~~~~~~~~
 80 | Path Util
 81 | ~~~~~~~~~
 82 | 
 83 | .. automodule:: bayesmark.path_util
 84 |    :members:
 85 |    :exclude-members:
 86 | 
 87 | ~~~~~~~~~~~~~~~~~~~
 88 | Quantile Estimation
 89 | ~~~~~~~~~~~~~~~~~~~
 90 | 
 91 | .. automodule:: bayesmark.quantiles
 92 |    :members:
 93 |    :exclude-members: ensure_shape
 94 | 
 95 | ~~~~~~~~~~~~~
 96 | Random Search
 97 | ~~~~~~~~~~~~~
 98 | 
 99 | .. automodule:: bayesmark.random_search
100 |    :members:
101 |    :exclude-members:
102 | 
103 | ~~~~~~~~~~~~~
104 | Serialization
105 | ~~~~~~~~~~~~~
106 | 
107 | .. automodule:: bayesmark.serialize
108 |    :members:
109 |    :exclude-members: Serializer
110 | 
111 | ~~~~~~~~~~~~~~
112 | Sklearn Tuning
113 | ~~~~~~~~~~~~~~
114 | 
115 | .. automodule:: bayesmark.sklearn_funcs
116 |    :members:
117 |    :exclude-members:
118 | 
119 | ~~~~~
120 | Space
121 | ~~~~~
122 | 
123 | .. automodule:: bayesmark.space
124 |    :members:
125 |    :exclude-members: check_array, unravel_index
126 | 
127 | ~~~~~
128 | Stats
129 | ~~~~~
130 | 
131 | .. automodule:: bayesmark.stats
132 |    :members:
133 |    :exclude-members:
134 | 
135 | ~~~~~~~~~~~~~~
136 | Util (General)
137 | ~~~~~~~~~~~~~~
138 | 
139 | .. automodule:: bayesmark.util
140 |    :members:
141 |    :exclude-members:
142 | 
143 | ~~~~~~~~~~~
144 | Xarray Util
145 | ~~~~~~~~~~~
146 | 
147 | .. automodule:: bayesmark.xr_util
148 |    :members:
149 |    :exclude-members:
150 | 


--------------------------------------------------------------------------------
/docs/dummy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sphinx
15 | 
16 | # import extra deps and use it to keep pipreqs and flake8 happy
17 | for pkg in (sphinx,):
18 |     print("%s %s" % (pkg.__name__, pkg.__version__))
19 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. bayesmark documentation master file, created by
 2 |    sphinx-quickstart on Tue Jul  9 22:26:36 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to the Bayes Opt Benchmark Documentation
 7 | ================================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    readme
15 |    scoring
16 |    code
17 |    authors
18 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/scoring.rst:
--------------------------------------------------------------------------------
  1 | .. _how-scoring-works:
  2 | 
  3 | How scoring works
  4 | =================
  5 | 
  6 | The scoring system is about aggregating the function evaluations of the optimizers. We represent :math:`F_{pmtn}` as the function evaluation of objective function :math:`p` (``TEST_CASE``) from the suggestion of method :math:`m` (``METHOD``) at batch :math:`t` (``ITER``) under repeated trial :math:`n` (``TRIAL``). In the case of batch sizes greater than 1, :math:`F_{pmtn}` is the minimum function evaluation across the suggestions in batch :math:`t`. The first transformation is that we consider the *cumulative minimum* over batches :math:`t` as the performance of the optimizer on a particular trial:
  7 | 
  8 | .. math::
  9 | 
 10 |    S_{pmtn} = \textrm{cumm-min}_t F_{pmtn}\,.
 11 | 
 12 | All of the aggregate quantities described here are computed by :func:`.experiment_analysis.compute_aggregates` (which is called by `bayesmark-anal <#analyze-and-summarize-results>`_) in either the ``agg_result`` or ``summary`` xarray datasets. Additionally, the baseline performances are in the xarray dataset ``baseline_ds`` from :func:`.experiment_baseline.compute_baseline`. The baseline dataset can be generated via the ``bayesmark-baseline`` command, but it is called automatically by ``bayesmark-anal`` if needed.
 13 | 
 14 | Median scores
 15 | -------------
 16 | 
 17 | The more robust, but less decision-theoretically appealing method for aggregation is to look at median scores. On a per problem basis we simply consider the median (``agg_result[PERF_MED]``):
 18 | 
 19 | .. math::
 20 | 
 21 |    \textrm{med-perf}_{pmt} = \textrm{median}_n \, S_{pmtn} \,.
 22 | 
 23 | However, this score is not very comparable across different problems as the objectives are all on different scales with possible different units. Therefore, we decide the *normalized score* (``agg_result[NORMED_MED]``) in a way that is *invariant* to linear transformation of the objective function:
 24 | 
 25 | .. math::
 26 | 
 27 |    \textrm{norm-med-perf}_{pmt} = \frac{\textrm{med-perf}_{pmt}  - \textrm{opt}_p}{\textrm{rand-med-perf}_{pt} - \textrm{opt}_p} \,,
 28 | 
 29 | where :math:`\textrm{opt}_p` (``baseline_ds[PERF_BEST]``) is an estimate of the global minimum of objective function :math:`p`; and :math:`\textrm{rand-med-perf}_{pt}` is the median performance of random search at batch :math:`t` on objective function :math:`p`. This means that, on any objective, an optimizer has score 0 after converging to the global minimum; and random search performs as a straight line at 1 for all :math:`t`. Conceptually, the median random search performance (``baseline_ds[PERF_MED]``) is computed as:
 30 | 
 31 | .. math::
 32 | 
 33 |    \textrm{rand-med-perf}_{pt} = \textrm{median}_n \, S_{pmtn} \,,
 34 | 
 35 | with :math:`m=` random search. However, every observation of :math:`F_{pmtn}` is iid in the case of random search. There is no reason to break the samples apart into trials :math:`n`. Instead, we use the function :func:`.quantiles.min_quantile_CI` to compute a more statistically efficient pooled estimator using the pooled random search samples over :math:`t` and :math:`n`. This pooled method is a nonparametric estimator of the quantiles of the minimum over a batch of samples, which is distribution free.
 36 | 
 37 | To further aggregate the performance over all objectives for a single optimizer we can consider the median-of-medians (``summary[PERF_MED]``):
 38 | 
 39 | .. math::
 40 | 
 41 |    \textrm{med-perf}_{mt} = \textrm{median}_p \, \textrm{norm-med-perf}_{pmt} \,.
 42 | 
 43 | Combining scores across different problems is sensible here because we have transformed them all onto the same scale.
 44 | 
 45 | Mean scores
 46 | -----------
 47 | 
 48 | From a decision theoretical perspective it is more sensible to consider the mean (possible warped) score. The median score can hide a high percentage of runs that completely fail. However, when we look at the mean score we first take the clipped score with a baseline value:
 49 | 
 50 | .. math::
 51 | 
 52 |    S'_{pmtn} = \min(S_{pmtn}, \textrm{clip}_p) \,.
 53 | 
 54 | This is largely because there may be a non-zero probably of :math:`F = \infty` (as in when the objective function crashes), which means that mean random search performance is infinite loss. We set :math:`\textrm{clip}_p` (``baseline_ds[PERF_CLIP]``) to the median score after a single function evaluation, which is :math:`\textrm{rand-med-perf}_{p0}` for a batch size of 1. The mean performance on a single problem (``agg_result[PERF_MEAN]``) then becomes:
 55 | 
 56 | .. math::
 57 | 
 58 |    \textrm{mean-perf}_{pmt} = \textrm{mean}_n \, S'_{pmtn} \,.
 59 | 
 60 | Which then becomes a normalized performance (``agg_result[NORMED_MEAN]``) of:
 61 | 
 62 | .. math::
 63 | 
 64 |    \textrm{norm-mean-perf}_{pmt} = \frac{\textrm{mean-perf}_{pmt}  - \textrm{opt}_p}{\textrm{clip}_p  - \textrm{opt}_p} \,.
 65 | 
 66 | Note there that the random search performance is only 1 at the first batch unlike for :math:`\textrm{norm-med-perf}_{pmt}`.
 67 | 
 68 | Again we can aggregate this into all objective function performance with (``summary[PERF_MEAN]``):
 69 | 
 70 | .. math::
 71 | 
 72 |    \textrm{mean-perf}_{mt} = \textrm{mean}_p \, \textrm{norm-mean-perf}_{pmt} \,,
 73 | 
 74 | which is a mean-of-means (or *grand mean*), which is much more sensible in general than a median-of-medians. We can again obtain the property of random search having a constant performance of 1 for all :math:`t` using (``summary[NORMED_MEAN]``):
 75 | 
 76 | .. math::
 77 | 
 78 |    \textrm{norm-mean-perf}_{mt} = \frac{\textrm{mean-perf}_{mt}}{\textrm{rand-mean-perf}_{t}} \,,
 79 | 
 80 | where the random search baseline has been determined with the same sequence of equations as the other methods. These all collapse down to:
 81 | 
 82 | .. math::
 83 | 
 84 |    \textrm{rand-mean-perf}_{t} = \textrm{mean}_p \, \frac{\textrm{rand-mean-perf}_{pt} - \textrm{opt}_p}{\textrm{clip}_p  - \textrm{opt}_p} \,.
 85 | 
 86 | Conceptually, we compute this random search baseline (``baseline_ds[PERF_MEAN]``) as:
 87 | 
 88 | .. math::
 89 | 
 90 |    \textrm{rand-mean-perf}_{pt} = \textrm{mean}_n \, S'_{pmtn} \,,
 91 | 
 92 | with :math:`m=` random search. However, because all function evaluations for random search are iid across :math:`t`, we can use a more statistically efficient pooled estimator :func:`.expected_max.expected_min`, which is an unbiased distribution free estimator on the expected minimum of :math:`m` samples from a distribution.
 93 | 
 94 | Note that :math:`\textrm{norm-mean-perf}_{mt}` is, in aggregate, a linear transformation on the expected loss :math:`S'`. This makes it more justified in a decision theory framework than the median score. However, to view it as a linear transformation we are considering the values in ``baseline_ds`` to be fixed reference losses values and not the output from the experiment.
 95 | 
 96 | Error bars
 97 | ----------
 98 | 
 99 | The datasets ``agg_result`` and ``summary`` also compute error bars in the form of ``LB_`` and ``UB_`` variables. These error bars do not consider the random variation in the baseline quantities from ``baseline_ds`` like ``opt`` and ``clip``. They are instead treated as fixed constant reference points. Therefore, they are computed by a different command ``bayesmark-baseline``. The user can generate the baselines when they want, but since they are not considered a random quantity in the statistics they are not automatically generated from the experimental data (unless the baseline file ``derived/baseline.json`` is missing).
100 | 
101 | Additionally, the error bars on the grand mean (``summary[PERF_MEAN]``) are computed by simply using t-statistic based error bars on the individual means. Under a "random effects" model, this does not actually lose any statistical power. However, this is computing the mean on the loss over sampling from new problems under the "same distribution" of benchmark problems. These error bars will be wider than if we computed the error bars on the grand mean over this particular set of benchmark problems.
102 | 


--------------------------------------------------------------------------------
/example_opt_root/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Flaky": [
 3 |         "flaky_optimizer.py",
 4 |         {}
 5 |     ],
 6 |     "HyperOpt-New": [
 7 |         "hyperopt_optimizer.py",
 8 |         {}
 9 |     ],
10 |     "Nevergrad-OnePlusOne-New": [
11 |         "nevergrad_optimizer.py",
12 |         {
13 |             "budget": 300,
14 |             "tool": "OnePlusOne"
15 |         }
16 |     ],
17 |     "OpenTuner-BanditA-New": [
18 |         "opentuner_optimizer.py",
19 |         {
20 |             "techniques": [
21 |                 "AUCBanditMetaTechniqueA"
22 |             ]
23 |         }
24 |     ],
25 |     "OpenTuner-GA-DE-New": [
26 |         "opentuner_optimizer.py",
27 |         {
28 |             "techniques": [
29 |                 "PSO_GA_DE"
30 |             ]
31 |         }
32 |     ],
33 |     "OpenTuner-GA-New": [
34 |         "opentuner_optimizer.py",
35 |         {
36 |             "techniques": [
37 |                 "PSO_GA_Bandit"
38 |             ]
39 |         }
40 |     ],
41 |     "PySOT-New": [
42 |         "pysot_optimizer.py",
43 |         {}
44 |     ],
45 |     "RandomSearch-New": [
46 |         "random_optimizer.py",
47 |         {}
48 |     ],
49 |     "Scikit-GBRT-Hedge-New": [
50 |         "scikit_optimizer.py",
51 |         {
52 |             "acq_func": "gp_hedge",
53 |             "base_estimator": "GBRT",
54 |             "n_initial_points": 5
55 |         }
56 |     ],
57 |     "Scikit-GP-Hedge-New": [
58 |         "scikit_optimizer.py",
59 |         {
60 |             "acq_func": "gp_hedge",
61 |             "base_estimator": "GP",
62 |             "n_initial_points": 5
63 |         }
64 |     ],
65 |     "Scikit-GP-LCB-New": [
66 |         "scikit_optimizer.py",
67 |         {
68 |             "acq_func": "LCB",
69 |             "base_estimator": "GP",
70 |             "n_initial_points": 5
71 |         }
72 |     ]
73 | }
74 | 


--------------------------------------------------------------------------------
/example_opt_root/flaky_optimizer.py:
--------------------------------------------------------------------------------
 1 | from time import sleep
 2 | 
 3 | import bayesmark.random_search as rs
 4 | from bayesmark import np_util
 5 | from bayesmark.abstract_optimizer import AbstractOptimizer
 6 | from bayesmark.experiment import experiment_main
 7 | 
 8 | 
 9 | class FlakyOptimizer(AbstractOptimizer):
10 |     def __init__(self, api_config, random=np_util.random):
11 |         """Build wrapper class to use random search function in benchmark.
12 | 
13 |         Settings for `suggest_dict` can be passed using kwargs.
14 | 
15 |         Parameters
16 |         ----------
17 |         api_config : dict-like of dict-like
18 |             Configuration of the optimization variables. See API description.
19 |         """
20 |         AbstractOptimizer.__init__(self, api_config)
21 |         self.random = random
22 |         self.mode = self.random.choice(["normal", "crash", "delay"])
23 | 
24 |     def suggest(self, n_suggestions=1):
25 |         """Get suggestion.
26 | 
27 |         Parameters
28 |         ----------
29 |         n_suggestions : int
30 |             Desired number of parallel suggestions in the output
31 | 
32 |         Returns
33 |         -------
34 |         next_guess : list of dict
35 |             List of `n_suggestions` suggestions to evaluate the objective
36 |             function. Each suggestion is a dictionary where each key
37 |             corresponds to a parameter being optimized.
38 |         """
39 |         if self.random.rand() <= 0.5 or self.mode == "normal":
40 |             x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
41 |         elif self.mode == "delay":
42 |             sleep(15 * 60)  # 15 minutes
43 |             x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
44 |         elif self.mode == "crash":
45 |             assert False, "Crashing for testing purposes"
46 |         else:
47 |             assert False, "Crashing, not for testing purposes"
48 | 
49 |         return x_guess
50 | 
51 |     def observe(self, X, y):
52 |         """Feed an observation back.
53 | 
54 |         Parameters
55 |         ----------
56 |         X : list of dict-like
57 |             Places where the objective function has already been evaluated.
58 |             Each suggestion is a dictionary where each key corresponds to a
59 |             parameter being optimized.
60 |         y : array-like, shape (n,)
61 |             Corresponding values where objective has been evaluated
62 |         """
63 |         # Random search so don't do anything
64 |         pass
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     experiment_main(FlakyOptimizer)
69 | 


--------------------------------------------------------------------------------
/example_opt_root/nevergrad_optimizer.py:
--------------------------------------------------------------------------------
  1 | import nevergrad.optimization as optimization
  2 | import numpy as np
  3 | from nevergrad import instrumentation as inst
  4 | from scipy.stats import norm
  5 | 
  6 | from bayesmark.abstract_optimizer import AbstractOptimizer
  7 | from bayesmark.experiment import experiment_main
  8 | from bayesmark.np_util import linear_rescale
  9 | from bayesmark.space import Real
 10 | 
 11 | 
 12 | class NevergradOptimizer(AbstractOptimizer):
 13 |     primary_import = "nevergrad"
 14 | 
 15 |     def __init__(self, api_config, tool="OnePlusOne", budget=300):
 16 |         """Build wrapper class to use nevergrad optimizer in benchmark.
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         api_config : dict-like of dict-like
 21 |             Configuration of the optimization variables. See API description.
 22 |         budget : int
 23 |             Expected number of max function evals
 24 |         """
 25 |         AbstractOptimizer.__init__(self, api_config)
 26 | 
 27 |         self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)
 28 | 
 29 |         dimension = self.instrum.dimension
 30 |         opt_class = optimization.registry[tool]
 31 |         self.optim = opt_class(dimension=dimension, budget=budget)
 32 | 
 33 |     @staticmethod
 34 |     def get_nvg_dimensions(api_config):
 35 |         """Help routine to setup nevergrad search space in constructor.
 36 | 
 37 |         Take api_config as argument so this can be static.
 38 |         """
 39 |         # The ordering of iteration prob makes no difference, but just to be
 40 |         # safe and consistnent with space.py, I will make sorted.
 41 |         param_list = sorted(api_config.keys())
 42 | 
 43 |         all_args = {}
 44 |         all_prewarp = {}
 45 |         for param_name in param_list:
 46 |             param_config = api_config[param_name]
 47 | 
 48 |             param_type = param_config["type"]
 49 | 
 50 |             param_space = param_config.get("space", None)
 51 |             param_range = param_config.get("range", None)
 52 |             param_values = param_config.get("values", None)
 53 | 
 54 |             prewarp = None
 55 |             if param_type == "cat":
 56 |                 assert param_space is None
 57 |                 assert param_range is None
 58 |                 arg = inst.var.SoftmaxCategorical(param_values)
 59 |             elif param_type == "bool":
 60 |                 assert param_space is None
 61 |                 assert param_range is None
 62 |                 assert param_values is None
 63 |                 arg = inst.var.OrderedDiscrete([False, True])
 64 |             elif param_values is not None:
 65 |                 assert param_type in ("int", "ordinal", "real")
 66 |                 arg = inst.var.OrderedDiscrete(param_values)
 67 |                 # We are throwing away information here, but OrderedDiscrete
 68 |                 # appears to be invariant to monotonic transformation anyway.
 69 |             elif param_type == "int":
 70 |                 assert param_values is None
 71 |                 # Need +1 since API in inclusive
 72 |                 choices = range(int(param_range[0]), int(param_range[-1]) + 1)
 73 |                 arg = inst.var.OrderedDiscrete(choices)
 74 |                 # We are throwing away information here, but OrderedDiscrete
 75 |                 # appears to be invariant to monotonic transformation anyway.
 76 |             elif param_type == "real":
 77 |                 assert param_values is None
 78 |                 assert param_range is not None
 79 |                 # Will need to warp to this space sep.
 80 |                 arg = inst.var.Gaussian(mean=0, std=1)
 81 |                 prewarp = Real(warp=param_space, range_=param_range)
 82 |             else:
 83 |                 assert False, "type %s not handled in API" % param_type
 84 | 
 85 |             all_args[param_name] = arg
 86 |             all_prewarp[param_name] = prewarp
 87 |         instrum = inst.Instrumentation(**all_args)
 88 |         return instrum, all_prewarp
 89 | 
 90 |     def prewarp(self, xx):
 91 |         """Extra work needed to get variables into the Gaussian space
 92 |         representation."""
 93 |         xxw = {}
 94 |         for arg_name, vv in xx.items():
 95 |             assert np.isscalar(vv)
 96 |             space = self.space[arg_name]
 97 | 
 98 |             if space is not None:
 99 |                 # Warp so we think it is apriori uniform in [a, b]
100 |                 vv = space.warp(vv)
101 |                 assert vv.size == 1
102 | 
103 |                 # Now make uniform on [0, 1], also unpack warped to scalar
104 |                 (lb, ub), = space.get_bounds()
105 |                 vv = linear_rescale(vv.item(), lb, ub, 0, 1)
106 | 
107 |                 # Now make std Gaussian apriori
108 |                 vv = norm.ppf(vv)
109 |             assert np.isscalar(vv)
110 |             xxw[arg_name] = vv
111 |         return xxw
112 | 
113 |     def postwarp(self, xxw):
114 |         """Extra work needed to undo the Gaussian space representation."""
115 |         xx = {}
116 |         for arg_name, vv in xxw.items():
117 |             assert np.isscalar(vv)
118 |             space = self.space[arg_name]
119 | 
120 |             if space is not None:
121 |                 # Now make std Gaussian apriori
122 |                 vv = norm.cdf(vv)
123 | 
124 |                 # Now make uniform on [0, 1]
125 |                 (lb, ub), = space.get_bounds()
126 |                 vv = linear_rescale(vv, 0, 1, lb, ub)
127 | 
128 |                 # Warp so we think it is apriori uniform in [a, b]
129 |                 vv = space.unwarp([vv])
130 |             assert np.isscalar(vv)
131 |             xx[arg_name] = vv
132 |         return xx
133 | 
134 |     def suggest(self, n_suggestions=1):
135 |         """Get suggestion from nevergrad.
136 | 
137 |         Parameters
138 |         ----------
139 |         n_suggestions : int
140 |             Desired number of parallel suggestions in the output
141 | 
142 |         Returns
143 |         -------
144 |         next_guess : list of dict
145 |             List of `n_suggestions` suggestions to evaluate the objective
146 |             function. Each suggestion is a dictionary where each key
147 |             corresponds to a parameter being optimized.
148 |         """
149 |         x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]
150 | 
151 |         x_guess = [None] * n_suggestions
152 |         for ii, xx in enumerate(x_guess_data):
153 |             x_pos, x_kwarg = self.instrum.data_to_arguments(xx)
154 |             assert x_pos == ()
155 |             x_guess[ii] = self.postwarp(x_kwarg)
156 | 
157 |         return x_guess
158 | 
159 |     def observe(self, X, y):
160 |         """Feed an observation back to nevergrad.
161 | 
162 |         Parameters
163 |         ----------
164 |         X : list of dict-like
165 |             Places where the objective function has already been evaluated.
166 |             Each suggestion is a dictionary where each key corresponds to a
167 |             parameter being optimized.
168 |         y : array-like, shape (n,)
169 |             Corresponding values where objective has been evaluated
170 |         """
171 |         for xx, yy in zip(X, y):
172 |             xx = self.prewarp(xx)
173 |             xx = self.instrum.arguments_to_data(**xx)
174 |             self.optim.tell(xx, yy)
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     experiment_main(NevergradOptimizer)
179 | 


--------------------------------------------------------------------------------
/example_opt_root/pysot_optimizer.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from copy import copy
  3 | 
  4 | import numpy as np
  5 | from poap.strategy import EvalRecord
  6 | from pySOT.experimental_design import SymmetricLatinHypercube
  7 | from pySOT.optimization_problems import OptimizationProblem
  8 | from pySOT.strategy import SRBFStrategy
  9 | from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant
 10 | 
 11 | from bayesmark.abstract_optimizer import AbstractOptimizer
 12 | from bayesmark.experiment import experiment_main
 13 | from bayesmark.space import JointSpace
 14 | 
 15 | 
 16 | class PySOTOptimizer(AbstractOptimizer):
 17 |     primary_import = "pysot"
 18 | 
 19 |     def __init__(self, api_config):
 20 |         """Build wrapper class to use an optimizer in benchmark.
 21 | 
 22 |         Parameters
 23 |         ----------
 24 |         api_config : dict-like of dict-like
 25 |             Configuration of the optimization variables. See API description.
 26 |         """
 27 |         AbstractOptimizer.__init__(self, api_config)
 28 | 
 29 |         self.space_x = JointSpace(api_config)
 30 |         self.bounds = self.space_x.get_bounds()
 31 |         self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)
 32 |         self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
 33 |         self.batch_size = None
 34 |         self.history = []
 35 |         self.proposals = []
 36 | 
 37 |     def create_opt_prob(self):
 38 |         """Create an optimization problem object."""
 39 |         opt = OptimizationProblem()
 40 |         opt.lb = self.bounds[:, 0]  # In warped space
 41 |         opt.ub = self.bounds[:, 1]  # In warped space
 42 |         opt.dim = len(self.bounds)
 43 |         opt.cont_var = np.arange(len(self.bounds))
 44 |         opt.int_var = []
 45 |         assert len(opt.cont_var) + len(opt.int_var) == opt.dim
 46 |         opt.objfun = None
 47 |         self.opt = opt
 48 | 
 49 |     def start(self, max_evals):
 50 |         """Starts a new pySOT run."""
 51 |         self.history = []
 52 |         self.proposals = []
 53 | 
 54 |         # Symmetric Latin hypercube design
 55 |         des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])
 56 |         slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)
 57 | 
 58 |         # Warped RBF interpolant
 59 |         rbf = RBFInterpolant(
 60 |             dim=self.opt.dim,
 61 |             lb=self.opt.lb,
 62 |             ub=self.opt.ub,
 63 |             kernel=CubicKernel(),
 64 |             tail=LinearTail(self.opt.dim),
 65 |             eta=1e-4,
 66 |         )
 67 | 
 68 |         # Optimization strategy
 69 |         self.strategy = SRBFStrategy(
 70 |             max_evals=self.max_evals,
 71 |             opt_prob=self.opt,
 72 |             exp_design=slhd,
 73 |             surrogate=rbf,
 74 |             asynchronous=True,
 75 |             batch_size=1,
 76 |             use_restarts=True,
 77 |         )
 78 | 
 79 |     def suggest(self, n_suggestions=1):
 80 |         """Get a suggestion from the optimizer.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         n_suggestions : int
 85 |             Desired number of parallel suggestions in the output
 86 | 
 87 |         Returns
 88 |         -------
 89 |         next_guess : list of dict
 90 |             List of `n_suggestions` suggestions to evaluate the objective
 91 |             function. Each suggestion is a dictionary where each key
 92 |             corresponds to a parameter being optimized.
 93 |         """
 94 | 
 95 |         if self.batch_size is None:  # First call to suggest
 96 |             self.batch_size = n_suggestions
 97 |             self.start(self.max_evals)
 98 | 
 99 |         # Set the tolerances pretending like we are running batch
100 |         d, p = float(self.opt.dim), float(n_suggestions)
101 |         self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))
102 | 
103 |         # Now we can make suggestions
104 |         x_w = []
105 |         self.proposals = []
106 |         for _ in range(n_suggestions):
107 |             proposal = self.strategy.propose_action()
108 |             record = EvalRecord(proposal.args, status="pending")
109 |             proposal.record = record
110 |             proposal.accept()  # This triggers all the callbacks
111 | 
112 |             # It is possible that pySOT proposes a previously evaluated point
113 |             # when all variables are integers, so we just abort in this case
114 |             # since we have likely converged anyway. See PySOT issue #30.
115 |             x = list(proposal.record.params)  # From tuple to list
116 |             x_unwarped, = self.space_x.unwarp(x)
117 |             if x_unwarped in self.history:
118 |                 warnings.warn("pySOT proposed the same point twice")
119 |                 self.start(self.max_evals)
120 |                 return self.suggest(n_suggestions=n_suggestions)
121 | 
122 |             # NOTE: Append unwarped to avoid rounding issues
123 |             self.history.append(copy(x_unwarped))
124 |             self.proposals.append(proposal)
125 |             x_w.append(copy(x_unwarped))
126 | 
127 |         return x_w
128 | 
129 |     def _observe(self, x, y):
130 |         # Find the matching proposal and execute its callbacks
131 |         idx = [x == xx for xx in self.history]
132 |         i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties
133 |         proposal = self.proposals[i]
134 |         proposal.record.complete(y)
135 |         self.proposals.pop(i)
136 |         self.history.pop(i)
137 | 
138 |     def observe(self, X, y):
139 |         """Send an observation of a suggestion back to the optimizer.
140 | 
141 |         Parameters
142 |         ----------
143 |         X : list of dict-like
144 |             Places where the objective function has already been evaluated.
145 |             Each suggestion is a dictionary where each key corresponds to a
146 |             parameter being optimized.
147 |         y : array-like, shape (n,)
148 |             Corresponding values where objective has been evaluated
149 |         """
150 |         assert len(X) == len(y)
151 | 
152 |         for x_, y_ in zip(X, y):
153 |             # Just ignore, any inf observations we got, unclear if right thing
154 |             if np.isfinite(y_):
155 |                 self._observe(x_, y_)
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     experiment_main(PySOTOptimizer)
160 | 


--------------------------------------------------------------------------------
/example_opt_root/random_optimizer.py:
--------------------------------------------------------------------------------
 1 | import bayesmark.random_search as rs
 2 | from bayesmark import np_util
 3 | from bayesmark.abstract_optimizer import AbstractOptimizer
 4 | from bayesmark.experiment import experiment_main
 5 | 
 6 | 
 7 | class RandomOptimizer(AbstractOptimizer):
 8 |     # Unclear what is best package to list for primary_import here.
 9 |     primary_import = "bayesmark"
10 | 
11 |     def __init__(self, api_config, random=np_util.random):
12 |         """Build wrapper class to use random search function in benchmark.
13 | 
14 |         Settings for `suggest_dict` can be passed using kwargs.
15 | 
16 |         Parameters
17 |         ----------
18 |         api_config : dict-like of dict-like
19 |             Configuration of the optimization variables. See API description.
20 |         """
21 |         AbstractOptimizer.__init__(self, api_config)
22 |         self.random = random
23 | 
24 |     def suggest(self, n_suggestions=1):
25 |         """Get suggestion.
26 | 
27 |         Parameters
28 |         ----------
29 |         n_suggestions : int
30 |             Desired number of parallel suggestions in the output
31 | 
32 |         Returns
33 |         -------
34 |         next_guess : list of dict
35 |             List of `n_suggestions` suggestions to evaluate the objective
36 |             function. Each suggestion is a dictionary where each key
37 |             corresponds to a parameter being optimized.
38 |         """
39 |         x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
40 |         return x_guess
41 | 
42 |     def observe(self, X, y):
43 |         """Feed an observation back.
44 | 
45 |         Parameters
46 |         ----------
47 |         X : list of dict-like
48 |             Places where the objective function has already been evaluated.
49 |             Each suggestion is a dictionary where each key corresponds to a
50 |             parameter being optimized.
51 |         y : array-like, shape (n,)
52 |             Corresponding values where objective has been evaluated
53 |         """
54 |         # Random search so don't do anything
55 |         pass
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     experiment_main(RandomOptimizer)
60 | 


--------------------------------------------------------------------------------
/example_opt_root/scikit_optimizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.interpolate import interp1d
  3 | from skopt import Optimizer as SkOpt
  4 | from skopt.space import Categorical, Integer, Real
  5 | 
  6 | from bayesmark.abstract_optimizer import AbstractOptimizer
  7 | from bayesmark.experiment import experiment_main
  8 | 
  9 | 
 10 | class ScikitOptimizer(AbstractOptimizer):
 11 |     primary_import = "scikit-optimize"
 12 | 
 13 |     def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5):
 14 |         """Build wrapper class to use an optimizer in benchmark.
 15 | 
 16 |         Parameters
 17 |         ----------
 18 |         api_config : dict-like of dict-like
 19 |             Configuration of the optimization variables. See API description.
 20 |         base_estimator : {'GP', 'RF', 'ET', 'GBRT'}
 21 |             How to estimate the objective function.
 22 |         acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}
 23 |             Acquisition objective to decide next suggestion.
 24 |         n_initial_points : int
 25 |             Number of points to sample randomly before actual Bayes opt.
 26 |         """
 27 |         AbstractOptimizer.__init__(self, api_config)
 28 | 
 29 |         dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)
 30 | 
 31 |         # Older versions of skopt don't copy over the dimensions names during
 32 |         # normalization and hence the names are missing in
 33 |         # self.skopt.space.dimensions. Therefore, we save our own copy of
 34 |         # dimensions list to be safe. If we can commit to using the newer
 35 |         # versions of skopt we can delete self.dimensions.
 36 |         self.dimensions_list = tuple(dd.name for dd in dimensions)
 37 | 
 38 |         self.skopt = SkOpt(
 39 |             dimensions,
 40 |             n_initial_points=n_initial_points,
 41 |             base_estimator=base_estimator,
 42 |             acq_func=acq_func,
 43 |             acq_optimizer="auto",
 44 |             acq_func_kwargs={},
 45 |             acq_optimizer_kwargs={},
 46 |         )
 47 | 
 48 |     @staticmethod
 49 |     def get_sk_dimensions(api_config, transform="normalize"):
 50 |         """Help routine to setup skopt search space in constructor.
 51 | 
 52 |         Take api_config as argument so this can be static.
 53 |         """
 54 |         # The ordering of iteration prob makes no difference, but just to be
 55 |         # safe and consistnent with space.py, I will make sorted.
 56 |         param_list = sorted(api_config.keys())
 57 | 
 58 |         sk_dims = []
 59 |         round_to_values = {}
 60 |         for param_name in param_list:
 61 |             param_config = api_config[param_name]
 62 | 
 63 |             param_type = param_config["type"]
 64 | 
 65 |             param_space = param_config.get("space", None)
 66 |             param_range = param_config.get("range", None)
 67 |             param_values = param_config.get("values", None)
 68 | 
 69 |             # Some setup for case that whitelist of values is provided:
 70 |             values_only_type = param_type in ("cat", "ordinal")
 71 |             if (param_values is not None) and (not values_only_type):
 72 |                 assert param_range is None
 73 |                 param_values = np.unique(param_values)
 74 |                 param_range = (param_values[0], param_values[-1])
 75 |                 round_to_values[param_name] = interp1d(
 76 |                     param_values, param_values, kind="nearest", fill_value="extrapolate"
 77 |                 )
 78 | 
 79 |             if param_type == "int":
 80 |                 # Integer space in sklearn does not support any warping => Need
 81 |                 # to leave the warping as linear in skopt.
 82 |                 sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))
 83 |             elif param_type == "bool":
 84 |                 assert param_range is None
 85 |                 assert param_values is None
 86 |                 sk_dims.append(Integer(0, 1, transform=transform, name=param_name))
 87 |             elif param_type in ("cat", "ordinal"):
 88 |                 assert param_range is None
 89 |                 # Leave x-form to one-hot as per skopt default
 90 |                 sk_dims.append(Categorical(param_values, name=param_name))
 91 |             elif param_type == "real":
 92 |                 # Skopt doesn't support all our warpings, so need to pick
 93 |                 # closest substitute it does support.
 94 |                 prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
 95 |                 sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))
 96 |             else:
 97 |                 assert False, "type %s not handled in API" % param_type
 98 |         return sk_dims, round_to_values
 99 | 
100 |     def suggest(self, n_suggestions=1):
101 |         """Get a suggestion from the optimizer.
102 | 
103 |         Parameters
104 |         ----------
105 |         n_suggestions : int
106 |             Desired number of parallel suggestions in the output
107 | 
108 |         Returns
109 |         -------
110 |         next_guess : list of dict
111 |             List of `n_suggestions` suggestions to evaluate the objective
112 |             function. Each suggestion is a dictionary where each key
113 |             corresponds to a parameter being optimized.
114 |         """
115 |         # First get list of lists from skopt.ask()
116 |         next_guess = self.skopt.ask(n_points=n_suggestions)
117 |         # Then convert to list of dicts
118 |         next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]
119 | 
120 |         # Now do the rounding, custom rounding is not supported in skopt. Note
121 |         # that there is not nec a round function for each dimension here.
122 |         for param_name, round_f in self.round_to_values.items():
123 |             for xx in next_guess:
124 |                 xx[param_name] = round_f(xx[param_name])
125 |         return next_guess
126 | 
127 |     def observe(self, X, y):
128 |         """Send an observation of a suggestion back to the optimizer.
129 | 
130 |         Parameters
131 |         ----------
132 |         X : list of dict-like
133 |             Places where the objective function has already been evaluated.
134 |             Each suggestion is a dictionary where each key corresponds to a
135 |             parameter being optimized.
136 |         y : array-like, shape (n,)
137 |             Corresponding values where objective has been evaluated
138 |         """
139 |         # Supposedly skopt can handle blocks, but not sure about interface for
140 |         # that. Just do loop to be safe for now.
141 |         for xx, yy in zip(X, y):
142 |             # skopt needs lists instead of dicts
143 |             xx = [xx[dim_name] for dim_name in self.dimensions_list]
144 |             # Just ignore, any inf observations we got, unclear if right thing
145 |             if np.isfinite(yy):
146 |                 self.skopt.tell(xx, yy)
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     experiment_main(ScikitOptimizer)
151 | 


--------------------------------------------------------------------------------
/integration_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | set -o pipefail
 5 | 
 6 | # Be able to check if using version out of tar ball
 7 | which bayesmark-launch
 8 | which bayesmark-exp
 9 | which bayesmark-agg
10 | which bayesmark-anal
11 | 
12 | DB_ROOT=./notebooks
13 | DBID=bo_example_folder
14 | 
15 | bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -d boston breast -v
16 | bayesmark-agg -dir $DB_ROOT -b $DBID
17 | bayesmark-anal -dir $DB_ROOT -b $DBID -v
18 | 
19 | # Try ipynb export
20 | python -m ipykernel install --name=bobm_ipynb --user
21 | jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1
22 | jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1
23 | 
24 | # Try dry run
25 | bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -nj 50 -v
26 | 
27 | # Try again but use the custom optimizers
28 | mv $DB_ROOT/$DBID old
29 | bayesmark-launch -n 15 -r 1 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -d boston breast -v
30 | bayesmark-agg -dir $DB_ROOT -b $DBID
31 | bayesmark-anal -dir $DB_ROOT -b $DBID -v
32 | 
33 | # Export again
34 | jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1
35 | jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1
36 | 
37 | # Try dry run
38 | bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -nj 50 -v
39 | 
40 | echo "success"
41 | 


--------------------------------------------------------------------------------
/integration_test_with_setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | set -o pipefail
 5 | 
 6 | export PIP_REQUIRE_VIRTUALENV=false
 7 | 
 8 | # Handy to know what we are working with
 9 | git --version
10 | python --version
11 | pip freeze | sort
12 | 
13 | # Cleanup workspace, src for any old -e installs
14 | git clean -x -f -d
15 | rm -rf src/
16 | 
17 | # See if opentuner will work in env (but this command does not work on Mac)
18 | # dpkg -l | grep libsqlite
19 | 
20 | # Simulate deployment with wheel
21 | ./build_wheel.sh
22 | mv -v dist/bayesmark-* dist/bayesmark.tar.gz
23 | 
24 | # Install and run local optimizers
25 | mkdir install_test
26 | cp -r ./notebooks install_test
27 | cp -r ./example_opt_root install_test
28 | 
29 | cd install_test
30 | virtualenv bobm_ipynb --python=python3
31 | source ./bobm_ipynb/bin/activate
32 | python --version
33 | pip freeze | sort
34 | 
35 | # Remove this if we want to make sure everything is compatible with latest
36 | # pip install -r ../requirements/optimizers.txt
37 | 
38 | pip install ../dist/bayesmark.tar.gz[optimizers,notebooks]
39 | ../integration_test.sh
40 | 
41 | # wrap up
42 | deactivate
43 | cd ..
44 | 
45 | echo "success with setup wrapper too"
46 | 


--------------------------------------------------------------------------------
/notebooks/dummy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import ipykernel
15 | import jupyter
16 | import jupyter_core
17 | import nbconvert
18 | 
19 | # import extra deps and use it to keep pipreqs and flake8 happy
20 | for pkg in (ipykernel, jupyter, jupyter_core, nbconvert):
21 |     print("%s %s" % (pkg.__name__, pkg.__version__))
22 | 


--------------------------------------------------------------------------------
/notebooks/plot_test_case.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import matplotlib.pyplot as plt\n",
 10 |     "from matplotlib import cm, colors, rcParams\n",
 11 |     "\n",
 12 |     "import numpy as np\n",
 13 |     "\n",
 14 |     "import bayesmark.constants as cc\n",
 15 |     "from bayesmark.path_util import abspath\n",
 16 |     "from bayesmark.serialize import XRSerializer\n",
 17 |     "from bayesmark.constants import ITER, METHOD, TEST_CASE, OBJECTIVE, VISIBLE_TO_OPT"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# User settings, must specify location of the data to make plots here for this to run\n",
 27 |     "DB_ROOT = abspath(\".\")\n",
 28 |     "DBID = \"bo_example_folder\"\n",
 29 |     "metric_for_scoring = VISIBLE_TO_OPT"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# Matplotlib setup\n",
 39 |     "# Note this will put type-3 font BS in the pdfs, if it matters\n",
 40 |     "rcParams[\"mathtext.fontset\"] = \"stix\"\n",
 41 |     "rcParams[\"font.family\"] = \"STIXGeneral\""
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "def build_color_dict(names):\n",
 51 |     "    \"\"\"Make a color dictionary to give each name a mpl color.\n",
 52 |     "    \"\"\"\n",
 53 |     "    norm = colors.Normalize(vmin=0, vmax=1)\n",
 54 |     "    m = cm.ScalarMappable(norm, cm.tab20)\n",
 55 |     "    color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\n",
 56 |     "    color_dict = dict(zip(names, color_dict))\n",
 57 |     "    return color_dict"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Load the data\n",
 67 |     "agg_results_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.PERF_RESULTS)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Setup for plotting\n",
 77 |     "method_list = agg_results_ds.coords[METHOD].values\n",
 78 |     "method_to_rgba = build_color_dict(method_list.tolist())"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# Make the plots for inidividual test functions\n",
 88 |     "for func_name in agg_results_ds.coords[TEST_CASE].values:\n",
 89 |     "    plt.figure(figsize=(5, 5), dpi=300)\n",
 90 |     "    for method_name in method_list:\n",
 91 |     "        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n",
 92 |     "\n",
 93 |     "        plt.fill_between(\n",
 94 |     "            curr_ds.coords[ITER].values,\n",
 95 |     "            curr_ds[cc.LB_MED].values,\n",
 96 |     "            curr_ds[cc.UB_MED].values,\n",
 97 |     "            color=method_to_rgba[method_name],\n",
 98 |     "            alpha=0.5,\n",
 99 |     "        )\n",
100 |     "        plt.plot(\n",
101 |     "            curr_ds.coords[ITER].values,\n",
102 |     "            curr_ds[cc.PERF_MED].values,\n",
103 |     "            color=method_to_rgba[method_name],\n",
104 |     "            label=method_name,\n",
105 |     "            marker=\".\",\n",
106 |     "        )\n",
107 |     "    plt.xlabel(\"evaluation\", fontsize=10)\n",
108 |     "    plt.ylabel(\"median score\", fontsize=10)\n",
109 |     "    plt.title(func_name)\n",
110 |     "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
111 |     "    plt.grid()\n",
112 |     "\n",
113 |     "    plt.figure(figsize=(5, 5), dpi=300)\n",
114 |     "    for method_name in method_list:\n",
115 |     "        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n",
116 |     "\n",
117 |     "        plt.fill_between(\n",
118 |     "            curr_ds.coords[ITER].values,\n",
119 |     "            curr_ds[cc.LB_MEAN].values,\n",
120 |     "            curr_ds[cc.UB_MEAN].values,\n",
121 |     "            color=method_to_rgba[method_name],\n",
122 |     "            alpha=0.5,\n",
123 |     "        )\n",
124 |     "        plt.plot(\n",
125 |     "            curr_ds.coords[ITER].values,\n",
126 |     "            curr_ds[cc.PERF_MEAN].values,\n",
127 |     "            color=method_to_rgba[method_name],\n",
128 |     "            label=method_name,\n",
129 |     "            marker=\".\",\n",
130 |     "        )\n",
131 |     "    plt.xlabel(\"evaluation\", fontsize=10)\n",
132 |     "    plt.ylabel(\"mean score\", fontsize=10)\n",
133 |     "    plt.title(func_name)\n",
134 |     "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
135 |     "    plt.grid()"
136 |    ]
137 |   }
138 |  ],
139 |  "metadata": {
140 |   "kernelspec": {
141 |    "display_name": "bobm_ipynb",
142 |    "language": "python",
143 |    "name": "bobm_ipynb"
144 |   },
145 |   "language_info": {
146 |    "codemirror_mode": {
147 |     "name": "ipython",
148 |     "version": 3
149 |    },
150 |    "file_extension": ".py",
151 |    "mimetype": "text/x-python",
152 |    "name": "python",
153 |    "nbconvert_exporter": "python",
154 |    "pygments_lexer": "ipython3",
155 |    "version": "3.6.5"
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 2
160 | }
161 | 


--------------------------------------------------------------------------------
/requirements/base.in:
--------------------------------------------------------------------------------
1 | scipy==1.2.0
2 | pandas==0.24.0
3 | pathvalidate==0.29.0
4 | numpy==1.16.1
5 | GitPython==2.1.11
6 | importlib-metadata==0.18
7 | scikit-learn==0.20.2
8 | xarray==0.12.2
9 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:7ebe4df9e60f001b676e74ae561d5dc3202c3dd0
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | gitdb2==2.0.5             # via gitpython
 9 | gitpython==2.1.11         # via -r requirements/base.in
10 | importlib-metadata==0.18  # via -r requirements/base.in
11 | numpy==1.16.1             # via -r requirements/base.in, pandas, scikit-learn, scipy, xarray
12 | pandas==0.24.0            # via -r requirements/base.in, xarray
13 | pathvalidate==0.29.0      # via -r requirements/base.in
14 | python-dateutil==2.8.0    # via pandas
15 | pytz==2019.1              # via pandas
16 | scikit-learn==0.20.2      # via -r requirements/base.in
17 | scipy==1.2.0              # via -r requirements/base.in, scikit-learn
18 | six==1.12.0               # via python-dateutil
19 | smmap2==2.0.5             # via gitdb2
20 | xarray==0.12.2            # via -r requirements/base.in
21 | zipp==0.5.2               # via importlib-metadata
22 | 
23 | # The following packages are considered to be unsafe in a requirements file:
24 | # setuptools
25 | 


--------------------------------------------------------------------------------
/requirements/docs.in:
--------------------------------------------------------------------------------
1 | -r base.in
2 | Sphinx==2.1.2
3 | 


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:cde26afc07f6c9c1c6cb169e125fc5142a0c59ae
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | -r base.txt
 9 | alabaster==0.7.12         # via sphinx
10 | attrs==19.1.0             # via packaging
11 | babel==2.7.0              # via sphinx
12 | certifi==2019.6.16        # via requests
13 | chardet==3.0.4            # via requests
14 | docutils==0.15            # via sphinx
15 | idna==2.8                 # via requests
16 | imagesize==1.1.0          # via sphinx
17 | jinja2==2.10.1            # via sphinx
18 | markupsafe==1.1.1         # via jinja2
19 | packaging==19.1           # via sphinx
20 | pygments==2.4.2           # via sphinx
21 | pyparsing==2.4.2          # via packaging
22 | requests==2.22.0          # via sphinx
23 | snowballstemmer==1.9.0    # via sphinx
24 | sphinx==2.1.2             # via -r requirements/docs.in
25 | sphinxcontrib-applehelp==1.0.1  # via sphinx
26 | sphinxcontrib-devhelp==1.0.1  # via sphinx
27 | sphinxcontrib-htmlhelp==1.0.2  # via sphinx
28 | sphinxcontrib-jsmath==1.0.1  # via sphinx
29 | sphinxcontrib-qthelp==1.0.2  # via sphinx
30 | sphinxcontrib-serializinghtml==1.1.3  # via sphinx
31 | urllib3==1.25.3           # via requests
32 | 
33 | # The following packages are considered to be unsafe in a requirements file:
34 | # setuptools
35 | 


--------------------------------------------------------------------------------
/requirements/ipynb.in:
--------------------------------------------------------------------------------
1 | -r base.in
2 | ipykernel==5.1.1
3 | nbconvert==5.6.0
4 | jupyter==1.0.0
5 | jupyter-core==4.6.0
6 | matplotlib==3.1.1
7 | numpy==1.16.1
8 | 


--------------------------------------------------------------------------------
/requirements/ipynb.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:6c16d140e48d7e7fa0e157c053953db7d76f0caf
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | -r base.txt
 9 | appnope==0.1.0            # via ipython
10 | attrs==19.1.0             # via jsonschema
11 | backcall==0.1.0           # via ipython
12 | bleach==3.1.0             # via nbconvert
13 | cycler==0.10.0            # via matplotlib
14 | decorator==4.4.0          # via ipython, traitlets
15 | defusedxml==0.6.0         # via nbconvert
16 | entrypoints==0.3          # via nbconvert
17 | ipykernel==5.1.1          # via -r requirements/ipynb.in, ipywidgets, jupyter, jupyter-console, notebook, qtconsole
18 | ipython-genutils==0.2.0   # via nbformat, notebook, qtconsole, traitlets
19 | ipython==7.6.1            # via ipykernel, ipywidgets, jupyter-console
20 | ipywidgets==7.5.1         # via jupyter
21 | jedi==0.14.1              # via ipython
22 | jinja2==2.10.1            # via nbconvert, notebook
23 | jsonschema==3.0.2         # via nbformat
24 | jupyter-client==5.3.1     # via ipykernel, jupyter-console, notebook, qtconsole
25 | jupyter-console==6.0.0    # via jupyter
26 | jupyter-core==4.6.0       # via -r requirements/ipynb.in, jupyter-client, nbconvert, nbformat, notebook, qtconsole
27 | jupyter==1.0.0            # via -r requirements/ipynb.in
28 | kiwisolver==1.1.0         # via matplotlib
29 | markupsafe==1.1.1         # via jinja2
30 | matplotlib==3.1.1         # via -r requirements/ipynb.in
31 | mistune==0.8.4            # via nbconvert
32 | nbconvert==5.6.0          # via -r requirements/ipynb.in, jupyter, notebook
33 | nbformat==4.4.0           # via ipywidgets, nbconvert, notebook
34 | notebook==6.0.1           # via jupyter, widgetsnbextension
35 | pandocfilters==1.4.2      # via nbconvert
36 | parso==0.5.1              # via jedi
37 | pexpect==4.7.0            # via ipython
38 | pickleshare==0.7.5        # via ipython
39 | prometheus-client==0.7.1  # via notebook
40 | prompt-toolkit==2.0.9     # via ipython, jupyter-console
41 | ptyprocess==0.6.0         # via pexpect, terminado
42 | pygments==2.4.2           # via ipython, jupyter-console, nbconvert, qtconsole
43 | pyparsing==2.4.2          # via matplotlib
44 | pyrsistent==0.15.4        # via jsonschema
45 | pyzmq==18.0.2             # via jupyter-client, notebook
46 | qtconsole==4.5.5          # via jupyter
47 | send2trash==1.5.0         # via notebook
48 | terminado==0.8.2          # via notebook
49 | testpath==0.4.2           # via nbconvert
50 | tornado==6.0.3            # via ipykernel, jupyter-client, notebook, terminado
51 | traitlets==4.3.2          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook, qtconsole
52 | wcwidth==0.1.7            # via prompt-toolkit
53 | webencodings==0.5.1       # via bleach
54 | widgetsnbextension==3.5.1  # via ipywidgets
55 | 
56 | # The following packages are considered to be unsafe in a requirements file:
57 | # setuptools
58 | 


--------------------------------------------------------------------------------
/requirements/optimizers.in:
--------------------------------------------------------------------------------
 1 | -r base.in
 2 | opentuner==0.8.2
 3 | numpy==1.16.1
 4 | scipy==1.2.0
 5 | nevergrad==0.1.4
 6 | hyperopt==0.1.1
 7 | POAP==0.1.26
 8 | scikit-optimize==0.5.2
 9 | pySOT==0.3.3
10 | 


--------------------------------------------------------------------------------
/requirements/optimizers.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:08174a35f9973427450f549131b4438e2f116a88
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | -r base.txt
 9 | argparse==1.4.0           # via opentuner
10 | atomicwrites==1.3.0       # via pytest
11 | attrs==19.1.0             # via packaging, pytest
12 | bayesian-optimization==0.6.0  # via nevergrad
13 | certifi==2019.6.16        # via requests
14 | chardet==3.0.4            # via requests
15 | cma==2.7.0                # via nevergrad
16 | coverage==4.5.4           # via nevergrad
17 | cycler==0.10.0            # via matplotlib
18 | decorator==4.4.0          # via networkx
19 | dill==0.3.0               # via pysot
20 | fn==0.4.3                 # via opentuner
21 | future==0.17.1            # via hyperopt, opentuner
22 | genty==1.3.2              # via nevergrad
23 | hyperopt==0.1.1           # via -r requirements/optimizers.in
24 | idna==2.8                 # via requests
25 | joblib==0.13.2            # via nevergrad
26 | kiwisolver==1.1.0         # via matplotlib
27 | matplotlib==3.1.1         # via nevergrad
28 | more-itertools==7.2.0     # via pytest
29 | mypy-extensions==0.4.1    # via mypy
30 | mypy==0.720               # via nevergrad
31 | networkx==2.3             # via hyperopt
32 | nevergrad==0.1.4          # via -r requirements/optimizers.in
33 | nose-timer==0.7.5         # via nevergrad
34 | nose==1.3.7               # via nevergrad, nose-timer
35 | opentuner==0.8.2          # via -r requirements/optimizers.in
36 | packaging==19.1           # via pytest
37 | pluggy==0.12.0            # via pytest
38 | poap==0.1.26              # via -r requirements/optimizers.in, pysot
39 | py==1.8.0                 # via pytest
40 | pydoe2==1.2.0             # via pysot
41 | pymongo==3.8.0            # via hyperopt
42 | pyparsing==2.4.2          # via matplotlib, packaging
43 | pysot==0.3.3              # via -r requirements/optimizers.in
44 | pytest==5.0.1             # via pysot
45 | requests==2.22.0          # via nevergrad
46 | scikit-optimize==0.5.2    # via -r requirements/optimizers.in
47 | sqlalchemy==1.3.8         # via opentuner
48 | typed-ast==1.4.0          # via mypy
49 | typing-extensions==3.7.4  # via mypy, nevergrad
50 | urllib3==1.25.3           # via requests
51 | wcwidth==0.1.7            # via pytest
52 | xlrd==1.2.0               # via nevergrad
53 | xlwt==1.3.0               # via nevergrad
54 | 
55 | # The following packages are considered to be unsafe in a requirements file:
56 | # setuptools
57 | 


--------------------------------------------------------------------------------
/requirements/pipreqs_edits.sed:
--------------------------------------------------------------------------------
1 | /argparse/d
2 | /appnope/d
3 | /certifi/d
4 | /bayesmark/d
5 | 


--------------------------------------------------------------------------------
/requirements/self.txt:
--------------------------------------------------------------------------------
1 | bayesmark==0.0.8
2 | 


--------------------------------------------------------------------------------
/requirements/test.in:
--------------------------------------------------------------------------------
 1 | -r base.in
 2 | -r optimizers.in
 3 | hypothesis==4.32.3
 4 | hypothesis-gufunc==0.0.5rc2
 5 | numpy==1.16.1
 6 | pathvalidate==0.29.0
 7 | scipy==1.2.0
 8 | scikit-learn==0.20.2
 9 | xarray==0.12.2
10 | pytest==5.0.1
11 | pytest-cov==2.7.1
12 | 


--------------------------------------------------------------------------------
/requirements/test.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:0dd8b5c26e6671e320706ddd399f6f62e19f3189
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | -r base.txt
 9 | -r optimizers.txt
10 | hypothesis-gufunc==0.0.5rc2  # via -r requirements/test.in
11 | hypothesis==4.32.3        # via -r requirements/test.in, hypothesis-gufunc
12 | pytest-cov==2.7.1         # via -r requirements/test.in
13 | 
14 | # The following packages are considered to be unsafe in a requirements file:
15 | # setuptools
16 | 


--------------------------------------------------------------------------------
/requirements/tools.in:
--------------------------------------------------------------------------------
1 | detect-secrets==0.12.5
2 | ipykernel==5.1.1
3 | nbconvert==5.6.0
4 | pip-compile-multi==1.4.0
5 | pipreqs==0.4.9
6 | pre-commit==1.15.2
7 | pytest==5.0.1
8 | 


--------------------------------------------------------------------------------
/requirements/tools.txt:
--------------------------------------------------------------------------------
 1 | # SHA1:08f4ed4790290aab315dd20169793be4f0a974af
 2 | #
 3 | # This file is autogenerated by pip-compile-multi
 4 | # To update, run:
 5 | #
 6 | #    pip-compile-multi
 7 | #
 8 | appnope==0.1.0            # via ipython
 9 | aspy.yaml==1.3.0          # via pre-commit
10 | atomicwrites==1.3.0       # via pytest
11 | attrs==19.1.0             # via jsonschema, packaging, pytest
12 | backcall==0.1.0           # via ipython
13 | bleach==3.1.0             # via nbconvert
14 | certifi==2019.6.16        # via requests
15 | cfgv==2.0.1               # via pre-commit
16 | chardet==3.0.4            # via requests
17 | click==7.0                # via pip-compile-multi, pip-tools
18 | decorator==4.4.0          # via ipython, traitlets
19 | defusedxml==0.6.0         # via nbconvert
20 | detect-secrets==0.12.5    # via -r requirements/tools.in
21 | docopt==0.6.2             # via pipreqs
22 | entrypoints==0.3          # via nbconvert
23 | identify==1.4.5           # via pre-commit
24 | idna==2.8                 # via requests
25 | importlib-metadata==0.18  # via importlib-resources, pluggy, pre-commit, pytest
26 | importlib-resources==2.0.1  # via pre-commit
27 | ipykernel==5.1.1          # via -r requirements/tools.in
28 | ipython-genutils==0.2.0   # via nbformat, traitlets
29 | ipython==7.6.1            # via ipykernel
30 | jedi==0.14.1              # via ipython
31 | jinja2==2.10.1            # via nbconvert
32 | jsonschema==3.0.2         # via nbformat
33 | jupyter-client==5.3.1     # via ipykernel
34 | jupyter-core==4.6.0       # via jupyter-client, nbconvert, nbformat
35 | markupsafe==1.1.1         # via jinja2
36 | mistune==0.8.4            # via nbconvert
37 | more-itertools==7.2.0     # via pytest
38 | nbconvert==5.6.0          # via -r requirements/tools.in
39 | nbformat==4.4.0           # via nbconvert
40 | nodeenv==1.3.3            # via pre-commit
41 | packaging==19.1           # via pytest
42 | pandocfilters==1.4.2      # via nbconvert
43 | parso==0.5.1              # via jedi
44 | pexpect==4.7.0            # via ipython
45 | pickleshare==0.7.5        # via ipython
46 | pip-compile-multi==1.4.0  # via -r requirements/tools.in
47 | pip-tools==5.0.0          # via pip-compile-multi
48 | pipreqs==0.4.9            # via -r requirements/tools.in
49 | pluggy==0.12.0            # via pytest
50 | pre-commit==1.15.2        # via -r requirements/tools.in
51 | prompt-toolkit==2.0.9     # via ipython
52 | ptyprocess==0.6.0         # via pexpect
53 | py==1.8.0                 # via pytest
54 | pygments==2.4.2           # via ipython, nbconvert
55 | pyparsing==2.4.2          # via packaging
56 | pyrsistent==0.15.4        # via jsonschema
57 | pytest==5.0.1             # via -r requirements/tools.in
58 | python-dateutil==2.8.0    # via jupyter-client
59 | pyyaml==5.1.1             # via aspy.yaml, detect-secrets, pre-commit
60 | pyzmq==18.0.2             # via jupyter-client
61 | requests==2.22.0          # via detect-secrets, yarg
62 | six==1.12.0               # via bleach, cfgv, jsonschema, packaging, pip-tools, pre-commit, prompt-toolkit, python-dateutil, traitlets
63 | testpath==0.4.2           # via nbconvert
64 | toml==0.10.0              # via pre-commit
65 | toposort==1.5             # via pip-compile-multi
66 | tornado==6.0.3            # via ipykernel, jupyter-client
67 | traitlets==4.3.2          # via ipykernel, ipython, jupyter-client, jupyter-core, nbconvert, nbformat
68 | urllib3==1.25.3           # via requests
69 | virtualenv==16.7.2        # via pre-commit
70 | wcwidth==0.1.7            # via prompt-toolkit, pytest
71 | webencodings==0.5.1       # via bleach
72 | yarg==0.1.9               # via pipreqs
73 | zipp==0.5.2               # via importlib-metadata, importlib-resources
74 | 
75 | # The following packages are considered to be unsafe in a requirements file:
76 | # pip
77 | # setuptools
78 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from setuptools import find_packages, setup
15 | 
16 | CMD_NAME = "bayesmark"
17 | 
18 | # Strings to remove from README to make it PyPI friendly. See:
19 | # https://packaging.python.org/guides/making-a-pypi-friendly-readme/#validating-restructuredtext-markup
20 | REMOVE_FROM_RST = (":func:", ":ref:")
21 | 
22 | 
23 | def read_requirements(name):
24 |     with open("requirements/" + name + ".in") as f:
25 |         requirements = f.read().strip()
26 |     requirements = requirements.replace("==", ">=").splitlines()  # Loosen strict pins
27 |     return [pp for pp in requirements if pp[0].isalnum()]
28 | 
29 | 
30 | # Derive install requires from base.in first order requirements
31 | requirements = read_requirements("base")
32 | opt_requirements = read_requirements("optimizers")
33 | ipynb_requirements = read_requirements("ipynb")
34 | 
35 | with open("README.rst") as f:
36 |     long_description = f.read()
37 | # Probably more efficient way to do this with regex but good enough
38 | for remove_word in REMOVE_FROM_RST:
39 |     long_description = long_description.replace(remove_word, "")
40 | 
41 | setup(
42 |     name="bayesmark",
43 |     version="0.0.8",
44 |     packages=find_packages(),
45 |     url="https://github.com/uber/bayesmark/",
46 |     author="Ryan Turner",
47 |     author_email=("rdturnermtl@github.com"),
48 |     license="Apache v2",
49 |     description="Bayesian optimization benchmark system",
50 |     install_requires=requirements,
51 |     extras_require={"optimizers": opt_requirements, "notebooks": ipynb_requirements},
52 |     long_description=long_description,
53 |     long_description_content_type="text/x-rst",
54 |     platforms=["any"],
55 |     entry_points={
56 |         "console_scripts": [
57 |             CMD_NAME + "-init = bayesmark.experiment_db_init:main",
58 |             CMD_NAME + "-launch = bayesmark.experiment_launcher:main",
59 |             CMD_NAME + "-agg = bayesmark.experiment_aggregate:main",
60 |             CMD_NAME + "-baseline = bayesmark.experiment_baseline:main",
61 |             CMD_NAME + "-anal = bayesmark.experiment_analysis:main",
62 |             CMD_NAME + "-exp = bayesmark.experiment:main",
63 |         ]
64 |     },
65 | )
66 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -ex
  4 | set -o pipefail
  5 | 
  6 | # Set conda paths
  7 | export CONDA_PATH=./tmp/conda
  8 | export CONDA_ENVS=env
  9 | 
 10 | # Sometime pip PIP_REQUIRE_VIRTUALENV has issues with conda
 11 | export PIP_REQUIRE_VIRTUALENV=false
 12 | 
 13 | PY_VERSIONS=( "3.6" "3.7" )
 14 | 
 15 | # Handy to know what we are working with
 16 | git --version
 17 | 
 18 | # Cleanup workspace, src for any old -e installs
 19 | git clean -x -f -d
 20 | rm -rf src/
 21 | 
 22 | # Install miniconda
 23 | if command -v conda 2>/dev/null; then
 24 |     echo "Conda already installed"
 25 | else
 26 |     # We need to use miniconda since we can't figure out ho to install py3.6 in
 27 |     # this env image. We could also use Miniconda3-latest-Linux-x86_64.sh but
 28 |     # pinning version to make reprodicible.
 29 |     echo "Installing miniconda"
 30 |     if [[ "$OSTYPE" == "darwin"* ]]; then
 31 |         # In future let's also try, for reprodicibility:
 32 |         # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-MacOSX-x86_64.sh;
 33 |         curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh;
 34 |     else
 35 |         # In future let's also try, for reprodicibility:
 36 |         # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh;
 37 |         curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh;
 38 |     fi
 39 |     chmod +x ./miniconda.sh
 40 |     ./miniconda.sh -b -p $CONDA_PATH
 41 |     rm ./miniconda.sh
 42 | fi
 43 | export PATH=$CONDA_PATH/bin:$PATH
 44 | 
 45 | # Setup env just for installing pre-commit to run hooks on all files
 46 | rm -rf "$CONDA_ENVS"
 47 | ENV_PATH="${CONDA_ENVS}/bobm_commit_hooks"
 48 | conda create -y -q -p $ENV_PATH python=3.6
 49 | echo $ENV_PATH
 50 | source activate $ENV_PATH
 51 | python --version
 52 | pip freeze | sort
 53 | # not listing 2nd order deps here, but probably ok
 54 | pip install -r requirements/tools.txt
 55 | # Now run hooks on all files, don't need to install hooks since run directly
 56 | pre-commit run --all-files
 57 | # Now can leave env with  pre-commit
 58 | conda deactivate
 59 | # Also check no changes to files by hooks
 60 | test -z "$(git diff)"
 61 | # clean up for good measure, but need to keep miniconda tmp folder
 62 | git clean -x -f -d --exclude=tmp
 63 | 
 64 | # Tool to get compare only the package names in pip file
 65 | # On mac, sed -r needs to be seed -E
 66 | nameonly () { grep -i '^[a-z0-9]' | sed -E "s/([^=]*)==.*/\1/g" | tr _ - | sort -f; }
 67 | nameveronly () { grep -i '^[a-z0-9]' | awk '{print $1}' | tr _ - | sort -f; }
 68 | pipcheck () { cat $@ | grep -i '^[a-z0-9]' | awk '{print $1}' | sed -f requirements/pipreqs_edits.sed | sort -f | uniq >ask.log && pip freeze | sed -f requirements/pipreqs_edits.sed | sort -f >got.log && diff -i ask.log got.log; }
 69 | 
 70 | # Now test the deps
 71 | ENV_PATH="${CONDA_ENVS}/deps_test"
 72 | conda create -y -q -p $ENV_PATH python=3.6
 73 | echo $ENV_PATH
 74 | source activate $ENV_PATH
 75 | python --version
 76 | pip freeze | sort
 77 | 
 78 | # Install all requirements, make sure they are mutually compatible
 79 | pip install -r requirements/base.txt
 80 | pipcheck requirements/base.txt
 81 | 
 82 | # Install package
 83 | python setup.py install
 84 | pipcheck requirements/base.txt requirements/self.txt
 85 | 
 86 | pip install -r requirements/optimizers.txt
 87 | pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt
 88 | 
 89 | pip install -r requirements/test.txt
 90 | pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt requirements/test.txt
 91 | 
 92 | pip install -r requirements/ipynb.txt
 93 | pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt
 94 | pip install -r requirements/docs.txt
 95 | pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt requirements/docs.txt
 96 | 
 97 | pip install -r requirements/tools.txt
 98 | 
 99 | # Make sure .in file corresponds to what is imported
100 | nameonly <requirements/base.in >ask.log
101 | pipreqs bayesmark/  --ignore bayesmark/builtin_opt/ --savepath requirement_chk.in
102 | sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
103 | diff ask.log got.log
104 | 
105 | nameonly <requirements/test.in >ask.log
106 | pipreqs test/ --savepath requirement_chk.in
107 | sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
108 | diff ask.log got.log
109 | 
110 | nameonly <requirements/optimizers.in >ask.log
111 | pipreqs bayesmark/builtin_opt/ --savepath requirement_chk.in
112 | sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
113 | diff ask.log got.log
114 | 
115 | nameonly <requirements/docs.in >ask.log
116 | pipreqs docs/ --savepath requirement_chk.in
117 | sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
118 | diff ask.log got.log
119 | 
120 | nameonly <requirements/ipynb.in >ask.log
121 | jupyter nbconvert --to script notebooks/*.ipynb
122 | pipreqs notebooks/ --savepath requirement_chk.in
123 | sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
124 | diff ask.log got.log
125 | 
126 | # Make sure txt file corresponds to pip compile
127 | # First copy the originals
128 | for f in requirements/*.txt; do cp -- "$f" "${f%.txt}.chk"; done
129 | # Now re-compile
130 | # no-upgrade means that by default it keeps the 2nd order dependency versions already in the requirements txt file
131 | # (otherwise it brings it to the very latest available version which often causes issues).
132 | pip-compile-multi -o txt --no-upgrade
133 | 
134 | nameveronly <requirements/base.chk >ask.log
135 | sed -f requirements/pipreqs_edits.sed requirements/base.txt | nameveronly >got.log
136 | diff ask.log got.log
137 | 
138 | nameveronly <requirements/test.chk >ask.log
139 | sed -f requirements/pipreqs_edits.sed requirements/test.txt | nameveronly >got.log
140 | diff ask.log got.log
141 | 
142 | nameveronly <requirements/optimizers.chk | sed -f requirements/pipreqs_edits.sed >ask.log
143 | sed -f requirements/pipreqs_edits.sed requirements/optimizers.txt | nameveronly >got.log
144 | diff ask.log got.log
145 | 
146 | nameveronly <requirements/ipynb.chk | sed -f requirements/pipreqs_edits.sed >ask.log
147 | sed -f requirements/pipreqs_edits.sed requirements/ipynb.txt | nameveronly >got.log
148 | diff ask.log got.log
149 | 
150 | nameveronly <requirements/docs.chk | sed -f requirements/pipreqs_edits.sed >ask.log
151 | sed -f requirements/pipreqs_edits.sed requirements/docs.txt | nameveronly >got.log
152 | diff ask.log got.log
153 | 
154 | nameveronly <requirements/tools.chk | sed -f requirements/pipreqs_edits.sed >ask.log
155 | sed -f requirements/pipreqs_edits.sed requirements/tools.txt | nameveronly >got.log
156 | diff ask.log got.log
157 | 
158 | # Deactivate virtual environment
159 | conda deactivate
160 | 
161 | # Set up environments for all Python versions and loop over them
162 | rm -rf "$CONDA_ENVS"
163 | for i in "${PY_VERSIONS[@]}"
164 | do
165 |     # Now test the deps
166 |     ENV_PATH="${CONDA_ENVS}/unit_test"
167 |     conda create -y -q -p $ENV_PATH python=$i
168 |     echo $ENV_PATH
169 |     source activate $ENV_PATH
170 |     python --version
171 |     pip freeze | sort
172 | 
173 |     # Install all requirements
174 |     pip install -r requirements/test.txt
175 | 
176 |     # Install package
177 |     python setup.py install
178 | 
179 |     # Run tests
180 |     pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html
181 | 
182 |     conda deactivate
183 | done
184 | 


--------------------------------------------------------------------------------
/test/data_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from hypothesis import given
15 | from hypothesis.strategies import from_regex, sampled_from
16 | 
17 | from bayesmark import data
18 | 
19 | DATA_NAMES = sorted(data.DATA_LOADERS.keys())
20 | 
21 | 
22 | @given(sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*") | from_regex("^clf-[A-Z]*"))
23 | def test_get_problem_type(dataset_name):
24 |     problem_type = data.get_problem_type(dataset_name)
25 |     assert problem_type is not None
26 | 


--------------------------------------------------------------------------------
/test/dummy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pytest
15 | import pytest_cov
16 | 
17 | # import extra deps and use it to keep pipreqs and flake8 happy
18 | for pkg in (pytest, pytest_cov):
19 |     print("%s %s" % (pkg.__name__, pkg.__version__))
20 | 


--------------------------------------------------------------------------------
/test/expected_max_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from hypothesis import given
15 | from hypothesis.strategies import floats, integers, lists
16 | 
17 | import bayesmark.expected_max as em
18 | 
19 | 
20 | @given(integers(1, 10), integers(1, 10))
21 | def test_get_expected_max_weights(n, m):
22 |     pdf = em.get_expected_max_weights(n, m)
23 |     assert pdf is not None
24 | 
25 | 
26 | @given(lists(floats()), integers(1, 10))
27 | def test_expected_max(x, m):
28 |     E_max_x = em.expected_max(x, m)
29 |     assert E_max_x is not None
30 | 
31 | 
32 | @given(lists(floats()), integers(1, 10))
33 | def test_expected_min(x, m):
34 |     E_min_x = em.expected_min(x, m)
35 |     assert E_min_x is not None
36 | 


--------------------------------------------------------------------------------
/test/experiment_aggregate_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from itertools import product
 15 | 
 16 | import numpy as np
 17 | from hypothesis import HealthCheck, given, settings
 18 | from hypothesis.strategies import floats
 19 | from hypothesis_gufunc.extra.xr import (
 20 |     fixed_datasets,
 21 |     simple_coords,
 22 |     simple_dataarrays,
 23 |     simple_datasets,
 24 |     xr_coords,
 25 |     xr_dims,
 26 | )
 27 | 
 28 | import bayesmark.experiment_aggregate as agg
 29 | from bayesmark.constants import EVAL_PHASE, ITER, METHOD, OBS_PHASE, SUGGEST, SUGGEST_PHASE, TEST_CASE, TRIAL
 30 | from bayesmark.experiment import OBJECTIVE_NAMES
 31 | from bayesmark.signatures import N_SUGGESTIONS
 32 | 
 33 | N_SIG = N_SUGGESTIONS
 34 | SIG_POINT = "sig_point"
 35 | 
 36 | 
 37 | def data_to_concat():
 38 |     def separate(ds):
 39 |         G = product(
 40 |             ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist()
 41 |         )
 42 | 
 43 |         L = []
 44 |         for test_case, method, trial in G:
 45 |             # Could swap out trial for UUID here
 46 |             meta_data = (test_case, method, trial)
 47 | 
 48 |             ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True)
 49 | 
 50 |             perf_ds = ds_sub[list(OBJECTIVE_NAMES)]
 51 |             time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]]
 52 |             suggest_ds = ds_sub[["foo", "bar", "baz"]]
 53 |             sig = ds_sub["sig"].values.tolist()
 54 |             data = (perf_ds, time_ds, suggest_ds, sig)
 55 |             L.append((meta_data, data))
 56 |             assert not any(np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds)
 57 |             assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds)
 58 |             assert not any(np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds)
 59 |             assert not np.any(np.isnan(sig))
 60 |         return L
 61 | 
 62 |     vars_to_dims = {
 63 |         "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL),
 64 |         SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
 65 |         EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),
 66 |         OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
 67 |     }
 68 |     dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "sig": np.float_}
 69 | 
 70 |     for obj in OBJECTIVE_NAMES:
 71 |         vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
 72 |         dtype[obj] = np.float_
 73 | 
 74 |     # We should also generate this using the space strategy, but hard coding this test case is good enough got now.
 75 |     input_vars = {"foo": np.float_, "bar": np.float_, "baz": np.int_}
 76 |     for vv, dd in input_vars.items():
 77 |         vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
 78 |         dtype[vv] = dd
 79 | 
 80 |     float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10)
 81 |     # Using on str following dim conventions for coords here
 82 |     coords_st = {
 83 |         ITER: simple_coords(min_side=1),
 84 |         SUGGEST: simple_coords(min_side=1),
 85 |         TEST_CASE: xr_coords(elements=xr_dims(), min_side=1),
 86 |         METHOD: xr_coords(elements=xr_dims(), min_side=1),
 87 |         TRIAL: simple_coords(min_side=1),
 88 |         SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG),
 89 |     }
 90 |     S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate)
 91 |     return S
 92 | 
 93 | 
 94 | def time_datasets():
 95 |     vars_to_dims = {SUGGEST_PHASE: (ITER,), EVAL_PHASE: (ITER, SUGGEST), OBS_PHASE: (ITER,)}
 96 |     dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_}
 97 |     elements = floats(min_value=0, allow_infinity=False, allow_nan=False)
 98 |     S = simple_datasets(vars_to_dims, dtype=dtype, elements=elements, min_side=1)
 99 |     return S
100 | 
101 | 
102 | def perf_dataarrays():
103 |     dims = (ITER, SUGGEST)
104 |     elements = floats(allow_nan=False)
105 |     S = simple_dataarrays(dims, dtype=np.float_, elements=elements)
106 |     return S
107 | 
108 | 
109 | @given(time_datasets())
110 | def test_summarize_time(all_time):
111 |     time_summary = agg.summarize_time(all_time)
112 |     assert time_summary is not None
113 | 
114 | 
115 | @given(data_to_concat())
116 | @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
117 | def test_concat_experiments(all_experiments):
118 |     all_experiments = list(all_experiments)
119 |     all_perf, all_time, all_suggest, all_sigs = agg.concat_experiments(all_experiments, ravel=False)
120 | 


--------------------------------------------------------------------------------
/test/experiment_analysis_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from hypothesis import given, settings
15 | 
16 | import bayesmark.experiment_baseline as base
17 | from bayesmark import experiment_analysis as anal
18 | from bayesmark.constants import TRIAL
19 | from bayesmark.np_util import argmin_2d
20 | from hypothesis_util import gufunc_floats
21 | from util import perf_dataarrays
22 | 
23 | 
24 | @given(gufunc_floats("(n,p,t),(n,p,t)->(n,t)", allow_nan=False, unique=True, min_side=1))
25 | def test_get_perf_array(args):
26 |     """Behavior for tie-breaking in `evals_visible` is complex, so only testing all unique case here."""
27 |     evals, evals_visible = args
28 | 
29 |     n_iter, _, n_trials = evals.shape
30 | 
31 |     perf_array = anal.get_perf_array(evals, evals_visible)
32 |     assert perf_array.shape == (n_iter, n_trials)
33 | 
34 |     for ii in range(n_iter):
35 |         for jj in range(n_trials):
36 |             idx0, idx1 = argmin_2d(evals_visible[: ii + 1, :, jj])
37 |             assert perf_array[ii, jj] == evals[idx0, idx1, jj]
38 | 
39 | 
40 | @given(perf_dataarrays(min_trial=2))
41 | @settings(deadline=None)
42 | def test_compute_aggregates(perf_da):
43 |     n_trial = perf_da.sizes[TRIAL]
44 | 
45 |     split = n_trial // 2
46 |     assert isinstance(split, int)
47 | 
48 |     perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
49 |     assert perf_da1.sizes[TRIAL] >= 1
50 | 
51 |     perf_da2 = perf_da.isel({TRIAL: slice(split, None)})
52 |     assert perf_da2.sizes[TRIAL] >= 1
53 |     perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))
54 | 
55 |     baseline_ds = base.compute_baseline(perf_da1)
56 |     anal.compute_aggregates(perf_da2, baseline_ds)
57 | 
58 | 
59 | @given(perf_dataarrays(min_trial=4))
60 | @settings(deadline=None)
61 | def test_compute_aggregates_with_aux(perf_da):
62 |     # Split to get baseline
63 |     n_trial = perf_da.sizes[TRIAL]
64 |     split = n_trial // 2
65 |     assert isinstance(split, int)
66 |     perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
67 |     assert perf_da1.sizes[TRIAL] >= 1
68 |     perf_da2 = perf_da.isel({TRIAL: slice(split, None)})
69 |     assert perf_da2.sizes[TRIAL] >= 1
70 |     perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))
71 |     baseline_ds = base.compute_baseline(perf_da1)
72 |     perf_da = perf_da2
73 | 
74 |     # Split to get visible
75 |     n_trial = perf_da.sizes[TRIAL]
76 |     split = n_trial // 2
77 |     assert isinstance(split, int)
78 |     perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
79 |     assert perf_da1.sizes[TRIAL] >= 1
80 |     perf_da2 = perf_da.isel({TRIAL: slice(split, 2 * split)})
81 |     assert perf_da2.sizes[TRIAL] >= 1
82 |     perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))
83 | 
84 |     anal.compute_aggregates(perf_da2, baseline_ds, visible_perf_da=perf_da1)
85 | 


--------------------------------------------------------------------------------
/test/experiment_baseline_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import warnings
15 | 
16 | from hypothesis import given, settings
17 | 
18 | import bayesmark.experiment_baseline as base
19 | from util import perf_dataarrays
20 | 
21 | 
22 | @given(perf_dataarrays())
23 | @settings(deadline=None)
24 | def test_compute_baseline(perf_da):
25 |     with warnings.catch_warnings():
26 |         warnings.filterwarnings("ignore", category=RuntimeWarning)
27 |         baseline_ds = base.compute_baseline(perf_da)
28 |     assert baseline_ds is not None
29 | 


--------------------------------------------------------------------------------
/test/experiment_db_init_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import bayesmark.experiment_db_init as dbi
15 | 
16 | 
17 | def test_main():
18 |     # Really a nop test since there is nothing to test in this func
19 |     assert dbi.EXIST_OK
20 | 


--------------------------------------------------------------------------------
/test/experiment_launcher_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import os
 15 | import warnings
 16 | from io import StringIO
 17 | from string import ascii_letters, digits
 18 | 
 19 | import numpy as np
 20 | from hypothesis import HealthCheck, assume, given, settings
 21 | from hypothesis.strategies import (
 22 |     booleans,
 23 |     fixed_dictionaries,
 24 |     from_regex,
 25 |     integers,
 26 |     lists,
 27 |     sampled_from,
 28 |     text,
 29 |     tuples,
 30 |     uuids,
 31 | )
 32 | from pathvalidate.argparse import validate_filename, validate_filepath
 33 | 
 34 | from bayesmark import data
 35 | from bayesmark import experiment_launcher as launcher
 36 | from bayesmark.builtin_opt.config import CONFIG
 37 | from bayesmark.cmd_parse import CmdArgs
 38 | from bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES
 39 | from hypothesis_util import seeds
 40 | 
 41 | DATA_NAMES = sorted(data.DATA_LOADERS.keys())
 42 | 
 43 | 
 44 | def filepaths():
 45 |     def valid(ss):
 46 |         try:
 47 |             validate_filepath(ss)
 48 |         except Exception:
 49 |             return False
 50 |         return True
 51 | 
 52 |     alphabet = ascii_letters + digits + "_.-~" + os.sep
 53 |     S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)
 54 |     return S
 55 | 
 56 | 
 57 | def filenames(suffix=""):
 58 |     def valid(ss):
 59 |         try:
 60 |             validate_filename(ss)
 61 |         except Exception:
 62 |             return False
 63 |         return True
 64 | 
 65 |     alphabet = ascii_letters + digits + "_.-~"
 66 |     S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)
 67 |     return S
 68 | 
 69 | 
 70 | def joinables():
 71 |     S = filenames().filter(lambda ss: ARG_DELIM not in ss)
 72 |     return S
 73 | 
 74 | 
 75 | def datasets():
 76 |     return sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*$") | from_regex("^clf-[A-Z]*$")
 77 | 
 78 | 
 79 | def launcher_args(opts, min_jobs=0):
 80 |     args_dict = {
 81 |         CmdArgs.db_root: filepaths(),
 82 |         CmdArgs.optimizer_root: filepaths(),
 83 |         CmdArgs.uuid: uuids(),
 84 |         CmdArgs.data_root: filepaths(),
 85 |         CmdArgs.db: filenames(),
 86 |         CmdArgs.optimizer: lists(sampled_from(opts), min_size=1, max_size=len(opts)),
 87 |         CmdArgs.data: lists(datasets(), min_size=1),
 88 |         CmdArgs.classifier: lists(sampled_from(MODEL_NAMES), min_size=1, max_size=len(MODEL_NAMES)),
 89 |         CmdArgs.metric: lists(sampled_from(METRICS), min_size=1, max_size=len(METRICS)),
 90 |         CmdArgs.n_calls: integers(1, 100),
 91 |         CmdArgs.n_suggest: integers(1, 100),
 92 |         CmdArgs.n_repeat: integers(1, 100),
 93 |         CmdArgs.n_jobs: integers(min_jobs, 1000),
 94 |         CmdArgs.jobs_file: filepaths(),
 95 |         CmdArgs.verbose: booleans(),
 96 |     }
 97 |     S = fixed_dictionaries(args_dict)
 98 |     return S
 99 | 
100 | 
101 | def launcher_args_and_config(min_jobs=0):
102 |     def args_and_config(opts):
103 |         args = launcher_args(opts, min_jobs=min_jobs)
104 |         configs = fixed_dictionaries({ss: filenames(suffix=".py") for ss in opts})
105 |         args_and_configs = tuples(args, configs)
106 |         return args_and_configs
107 | 
108 |     # Make opt names a mix of built in opts and arbitrary names
109 |     optimizers = lists(joinables() | sampled_from(sorted(CONFIG.keys())), min_size=1)
110 |     S = optimizers.flatmap(args_and_config)
111 |     return S
112 | 
113 | 
114 | def test_is_arg_safe_empty():
115 |     val = launcher._is_arg_safe("")
116 |     assert isinstance(val, bool)
117 |     assert not val
118 | 
119 | 
120 | @given(launcher_args_and_config(), uuids())
121 | @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
122 | def test_gen_commands(args, run_uuid):
123 |     args, opt_file_lookup = args
124 | 
125 |     assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))
126 | 
127 |     uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]
128 |     for uu in uniqify:
129 |         assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))
130 |         args[uu] = list(set(args[uu]))
131 | 
132 |     m_set = set(args[CmdArgs.metric])
133 |     m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}
134 |     ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])
135 |     assume(ok)
136 | 
137 |     G = launcher.gen_commands(args, opt_file_lookup, run_uuid)
138 |     L = list(G)
139 |     assert L is not None
140 | 
141 | 
142 | @given(launcher_args_and_config(min_jobs=1), uuids(), seeds())
143 | @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
144 | def test_dry_run(args, run_uuid, seed):
145 |     args, opt_file_lookup = args
146 | 
147 |     assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))
148 | 
149 |     uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]
150 |     for uu in uniqify:
151 |         assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))
152 |         args[uu] = list(set(args[uu]))
153 | 
154 |     m_set = set(args[CmdArgs.metric])
155 |     m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}
156 |     ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])
157 |     assume(ok)
158 | 
159 |     fp_buf = StringIO()
160 |     random = np.random.RandomState(seed)
161 | 
162 |     with warnings.catch_warnings():
163 |         warnings.simplefilter("ignore", category=UserWarning)
164 |         launcher.dry_run(args, opt_file_lookup, run_uuid, fp_buf, random=random)
165 | 
166 |     jobs = fp_buf.getvalue()
167 |     assert jobs is not None
168 | 


--------------------------------------------------------------------------------
/test/random_search_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import numpy as np
 15 | from hypothesis import given, settings
 16 | from hypothesis.strategies import integers
 17 | 
 18 | import bayesmark.space as sp
 19 | from bayesmark.np_util import linear_rescale
 20 | from bayesmark.random_search import suggest_dict
 21 | from hypothesis_util import close_enough, gufunc_floats, seeds
 22 | from util import space_configs
 23 | 
 24 | 
 25 | @given(space_configs(allow_missing=True), integers(min_value=1, max_value=8), seeds())
 26 | @settings(deadline=None)
 27 | def test_random_search_suggest_sanity(api_args, n_suggest, seed):
 28 |     meta, X, y, _ = api_args
 29 | 
 30 |     # Get the unwarped X
 31 |     S = sp.JointSpace(meta)
 32 |     lower, upper = S.get_bounds().T
 33 |     S.validate(X)
 34 | 
 35 |     N = len(X)
 36 |     # Split history and call twice with diff histories but same seed
 37 |     M = N // 2
 38 |     X1, X2 = X[:M], X[M:]
 39 |     y1, y2 = y[:M], y[M:]
 40 | 
 41 |     x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed))
 42 |     x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed))
 43 | 
 44 |     # Check types too
 45 |     assert len(x_guess) == n_suggest
 46 |     assert all(all(close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess)))
 47 |     assert np.all(x_guess == x_guess2)
 48 |     # Make sure validated
 49 |     S.validate(x_guess)
 50 |     S.validate(x_guess2)
 51 | 
 52 |     # Test sanity of output
 53 |     D, = lower.shape
 54 |     x_guess_w = S.warp(x_guess)
 55 |     assert type(x_guess_w) == np.ndarray
 56 |     assert x_guess_w.dtype.kind == "f"
 57 |     assert x_guess_w.shape == (n_suggest, D)
 58 |     assert x_guess_w.shape == (n_suggest, D)
 59 |     assert np.all(x_guess_w <= upper)
 60 | 
 61 | 
 62 | @given(
 63 |     gufunc_floats("(n,D),(n)->()", min_value=0.0, max_value=1.0, min_side={"D": 1}),
 64 |     integers(min_value=1, max_value=10),
 65 |     seeds(),
 66 | )
 67 | @settings(deadline=None)
 68 | def test_random_search_suggest_diff(api_args, n_suggest, seed):
 69 |     # Hard to know how many iters needed for arbitrary space that we need to
 70 |     # run so that we don't get dupes by chance. So, for now, let's just stick
 71 |     # with this simple space.
 72 |     dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]}
 73 | 
 74 |     # Use at least 10 n_suggest to make sure don't get same answer by chance
 75 |     X_w, y = api_args
 76 | 
 77 |     D = X_w.shape[1]
 78 |     param_names = ["x%d" % ii for ii in range(5)]
 79 |     meta = dict(zip(param_names, [dim] * D))
 80 | 
 81 |     # Get the unwarped X
 82 |     S = sp.JointSpace(meta)
 83 |     lower, upper = S.get_bounds().T
 84 |     X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
 85 |     X = S.unwarp(X_w)
 86 |     S.validate(X)
 87 | 
 88 |     seed = seed // 2  # Keep in bounds even after add 7
 89 | 
 90 |     x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed))
 91 |     # Use diff seed to intentionally get diff result
 92 |     x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7))
 93 | 
 94 |     # Check types too
 95 |     assert len(x_guess) == n_suggest
 96 |     assert len(x_guess2) == n_suggest
 97 |     assert not np.all(x_guess == x_guess2)
 98 |     # Make sure validated
 99 |     S.validate(x_guess)
100 |     S.validate(x_guess2)
101 | 
102 |     # Test sanity of output
103 |     D, = lower.shape
104 | 
105 |     x_guess_w = S.warp(x_guess)
106 |     assert type(x_guess_w) == np.ndarray
107 |     assert x_guess_w.dtype.kind == "f"
108 |     assert x_guess_w.shape == (n_suggest, D)
109 |     assert x_guess_w.shape == (n_suggest, D)
110 |     assert np.all(x_guess_w <= upper)
111 | 
112 |     x_guess_w = S.warp(x_guess2)
113 |     assert type(x_guess_w) == np.ndarray
114 |     assert x_guess_w.dtype.kind == "f"
115 |     assert x_guess_w.shape == (n_suggest, D)
116 |     assert x_guess_w.shape == (n_suggest, D)
117 |     assert np.all(x_guess_w <= upper)
118 | 


--------------------------------------------------------------------------------
/test/serialize_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | from string import ascii_letters, digits
16 | 
17 | from hypothesis import given
18 | from hypothesis.strategies import lists, text, uuids
19 | from pathvalidate.argparse import validate_filename, validate_filepath
20 | 
21 | from bayesmark.serialize import XRSerializer
22 | 
23 | 
24 | def filepaths():
25 |     def valid(ss):
26 |         try:
27 |             validate_filepath(ss)
28 |         except Exception:
29 |             return False
30 |         return True
31 | 
32 |     alphabet = ascii_letters + digits + "_.-~" + os.sep
33 |     S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)
34 |     return S
35 | 
36 | 
37 | def filenames(suffix=""):
38 |     def valid(ss):
39 |         try:
40 |             validate_filename(ss)
41 |         except Exception:
42 |             return False
43 |         return True
44 | 
45 |     alphabet = ascii_letters + digits + "_.-~"
46 |     S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)
47 |     return S
48 | 
49 | 
50 | @given(filepaths(), lists(filenames()), filenames())
51 | def test_init_db_manual(db_root, keys, db):
52 |     XRSerializer.init_db_manual(db_root, keys, db)
53 | 
54 | 
55 | @given(uuids())
56 | def test_uuid_to_fname(uu):
57 |     ff = XRSerializer._uuid_to_fname(uu)
58 |     uu_ = XRSerializer._fname_to_uuid(ff)
59 |     assert uu == uu_
60 | 
61 |     ff_ = XRSerializer._uuid_to_fname(uu_)
62 |     assert ff == ff_
63 | 
64 | 
65 | @given(filenames())
66 | def test_key_to_fname(key):
67 |     ff = XRSerializer._key_to_fname(key)
68 |     kk = XRSerializer._fname_to_key(ff)
69 |     assert key == kk
70 | 
71 | 
72 | @given(filepaths(), lists(filenames()), filenames())
73 | def test_validate(db_root, keys, db):
74 |     XRSerializer._validate(db_root, keys, db)
75 | 


--------------------------------------------------------------------------------
/test/signatures_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import json
15 | import random as pyrandom
16 | import warnings
17 | 
18 | import numpy as np
19 | from hypothesis import given
20 | from hypothesis.strategies import dictionaries, floats, lists, text, tuples
21 | 
22 | import bayesmark.signatures as ss
23 | from bayesmark.experiment import OBJECTIVE_NAMES
24 | from util import space_configs
25 | 
26 | N_SIG = ss.N_SUGGESTIONS
27 | 
28 | 
29 | def bsigs():
30 |     S = lists(floats(allow_infinity=False, allow_nan=False), min_size=N_SIG, max_size=N_SIG)
31 |     return S
32 | 
33 | 
34 | def sigs():
35 |     S = lists(bsigs(), min_size=1)
36 |     return S
37 | 
38 | 
39 | def sig_pair():
40 |     def separate(D):
41 |         signatures, signatures_ref = {}, {}
42 |         for kk in D:
43 |             if len(D[kk]) == 1:
44 |                 v_ref, = D[kk]
45 |                 signatures_ref[kk] = np.asarray(v_ref)
46 |             elif len(D[kk]) == 2:
47 |                 v, v_ref = D[kk]
48 |                 signatures[kk] = np.asarray(v)
49 |                 signatures_ref[kk] = np.asarray(v_ref)
50 |             else:
51 |                 assert False
52 |         return signatures, signatures_ref
53 | 
54 |     sig_dict = dictionaries(text(), tuples(bsigs()) | tuples(bsigs(), bsigs()))
55 |     S = sig_dict.map(separate)
56 |     return S
57 | 
58 | 
59 | def some_mock_f(x):
60 |     """Some arbitrary deterministic test function.
61 |     """
62 |     random_stream = pyrandom.Random(json.dumps(x, sort_keys=True))
63 |     y = [random_stream.gauss(0, 1) for _ in OBJECTIVE_NAMES]
64 |     return y
65 | 
66 | 
67 | @given(space_configs())
68 | def test_get_func_signature(api_config):
69 |     api_config, _, _, _ = api_config
70 | 
71 |     signature_x, signature_y = ss.get_func_signature(some_mock_f, api_config)
72 | 
73 | 
74 | @given(dictionaries(text(), sigs()))
75 | def test_analyze_signatures(signatures):
76 |     with warnings.catch_warnings():
77 |         warnings.filterwarnings("ignore", category=RuntimeWarning)
78 |         sig_errs, signatures_median = ss.analyze_signatures(signatures)
79 | 
80 | 
81 | @given(sig_pair())
82 | def test_analyze_signature_pair(args):
83 |     signatures, signatures_ref = args
84 |     with warnings.catch_warnings():
85 |         warnings.filterwarnings("ignore", category=RuntimeWarning)
86 |         sig_errs, signatures_pair = ss.analyze_signature_pair(signatures, signatures_ref)
87 | 


--------------------------------------------------------------------------------
/test/sklearn_funcs_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 Uber Technologies, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pickle as pkl
15 | 
16 | import numpy as np
17 | from hypothesis import assume, given, settings
18 | from hypothesis.strategies import sampled_from, text
19 | from sklearn.linear_model import LinearRegression
20 | 
21 | from bayesmark import data
22 | from bayesmark import sklearn_funcs as skf
23 | from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES
24 | from bayesmark.random_search import suggest_dict
25 | from bayesmark.space import JointSpace
26 | from hypothesis_util import seeds
27 | 
28 | 
29 | @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())
30 | @settings(deadline=None)
31 | def test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed):
32 |     prob_type = data.get_problem_type(dataset)
33 |     assume(metric in data.METRICS_LOOKUP[prob_type])
34 | 
35 |     test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed)
36 | 
37 |     api_config = test_prob.get_api_config()
38 |     x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed))
39 | 
40 |     loss = test_prob.evaluate(x_guess)
41 | 
42 |     assert isinstance(loss, tuple)
43 |     assert all(isinstance(xx, float) for xx in loss)
44 |     assert np.shape(loss) == np.shape(test_prob.objective_names)
45 | 
46 | 
47 | @given(text(), text(), text())
48 | def test_inverse_test_case_str(model, dataset, scorer):
49 |     assume(ARG_DELIM not in (model + dataset + scorer))
50 | 
51 |     test_case = skf.SklearnModel.test_case_str(model, dataset, scorer)
52 |     R = skf.SklearnModel.inverse_test_case_str(test_case)
53 | 
54 |     assert R == (model, dataset, scorer)
55 | 
56 | 
57 | @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())
58 | @settings(deadline=None)
59 | def test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed):
60 |     prob_type = data.get_problem_type(dataset)
61 |     assume(metric in data.METRICS_LOOKUP[prob_type])
62 | 
63 |     test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0)
64 |     api_config = test_prob.get_api_config()
65 |     space = JointSpace(api_config)
66 | 
67 |     n_obj = len(test_prob.objective_names)
68 | 
69 |     n_suggestions = 20
70 | 
71 |     x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed))
72 |     x_guess_w = space.warp(x_guess)
73 | 
74 |     random = np.random.RandomState(model_seed)
75 |     y = random.randn(n_suggestions, n_obj)
76 | 
77 |     reg = LinearRegression()
78 |     reg.fit(x_guess_w, y)
79 |     loss0 = reg.predict(x_guess_w)
80 | 
81 |     path = pkl.dumps(reg)
82 |     del reg
83 |     assert isinstance(path, bytes)
84 | 
85 |     test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path)
86 |     loss = test_prob_surr.evaluate(x_guess[0])
87 | 
88 |     assert isinstance(loss, tuple)
89 |     assert all(isinstance(xx, float) for xx in loss)
90 |     assert np.shape(loss) == np.shape(test_prob.objective_names)
91 | 
92 |     assert np.allclose(loss0[0], np.array(loss))
93 | 


--------------------------------------------------------------------------------
/test/stats_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import numpy as np
 15 | import scipy.stats as sst
 16 | from hypothesis import assume, given
 17 | from hypothesis.strategies import integers, lists, sampled_from
 18 | from hypothesis_gufunc.gufunc import gufunc_args
 19 | from sklearn.preprocessing import robust_scale
 20 | 
 21 | from bayesmark import stats
 22 | from hypothesis_util import close_enough, mfloats, probs, seeds
 23 | 
 24 | 
 25 | def t_test_(x):
 26 |     """Perform a standard t-test to test if the values in `x` are sampled from
 27 |     a distribution with a zero mean.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     x : array-like, shape (n_samples,)
 32 |         array of data points to test.
 33 | 
 34 |     Returns
 35 |     -------
 36 |     pval : float
 37 |         p-value (in [0,1]) from t-test on `x`.
 38 |     """
 39 |     assert np.ndim(x) == 1 and (not np.any(np.isnan(x)))
 40 | 
 41 |     if (len(x) <= 1) or (not np.all(np.isfinite(x))):
 42 |         return 1.0  # Can't say anything about scale => p=1
 43 | 
 44 |     _, pval = sst.ttest_1samp(x, 0.0)
 45 |     if np.isnan(pval):
 46 |         # Should only be possible if scale underflowed to zero:
 47 |         assert np.var(x, ddof=1) <= 1e-100
 48 |         # It is debatable if the condition should be ``np.mean(x) == 0.0`` or
 49 |         # ``np.all(x == 0.0)``. Should not matter in practice.
 50 |         pval = np.float(np.mean(x) == 0.0)
 51 |     assert 0.0 <= pval and pval <= 1.0
 52 |     return pval
 53 | 
 54 | 
 55 | @given(gufunc_args("(n),()->(n)", dtype=np.float_, elements=[mfloats(), probs()], min_side=2))
 56 | def test_robust_standardize_to_sklearn(args):
 57 |     X, q_level = args
 58 | 
 59 |     q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
 60 |     assert close_enough(q1 - q0, q_level)
 61 | 
 62 |     X_bo = stats.robust_standardize(X, q_level=q_level)
 63 | 
 64 |     X = X[:, None]
 65 |     X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])
 66 |     X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
 67 | 
 68 |     assert close_enough(X_bo, X_skl, equal_nan=True)
 69 | 
 70 | 
 71 | def test_robust_standardize_broadcast():
 72 |     """Need to do things different here since standardize broadcasts over the
 73 |     wrong dimension (0 instead of -1).
 74 |     """
 75 |     # Build vectorize version, this is just loop inside.
 76 |     f_vec = np.vectorize(stats.robust_standardize, signature="(n),()->(n)", otypes=["float64"])
 77 | 
 78 |     @given(gufunc_args("(n,m),()->(n,m)", dtype=np.float_, min_side={"n": 2}, elements=[mfloats(), probs()]))
 79 |     def test_f(args):
 80 |         X, q_level = args
 81 | 
 82 |         R1 = stats.robust_standardize(X, q_level)
 83 |         R2 = f_vec(X.T, q_level).T
 84 |         assert R1.dtype == "float64"
 85 |         assert R2.dtype == "float64"
 86 |         assert close_enough(R1, R2, equal_nan=True)
 87 | 
 88 |     # Call the test
 89 |     test_f()
 90 | 
 91 | 
 92 | @given(integers(0, 10), mfloats(), probs())
 93 | def test_t_EB_zero_var(N, val, alpha):
 94 |     x = val + np.zeros(N)
 95 |     EB = stats.t_EB(x, alpha=alpha)
 96 |     if N <= 1:
 97 |         assert EB == np.inf
 98 |     else:
 99 |         assert np.allclose(EB, 0.0)
100 | 
101 | 
102 | @given(integers(1, 10), sampled_from([np.inf, -np.inf]), probs())
103 | def test_t_EB_inf(N, val, alpha):
104 |     x = np.zeros(N)
105 |     x[0] = val
106 | 
107 |     EB = stats.t_EB(x, alpha=alpha)
108 |     if N <= 1:
109 |         assert EB == np.inf
110 |     else:
111 |         assert np.isnan(EB)
112 | 
113 | 
114 | @given(seeds(), probs(), integers(2, 10))
115 | def test_t_EB_coverage(seed, alpha, N):
116 |     trials = 100
117 | 
118 |     random_st = np.random.RandomState(seed)
119 | 
120 |     fail = 0
121 |     for tt in range(trials):
122 |         x = random_st.randn(N)
123 | 
124 |         EB = stats.t_EB(x, alpha=alpha)
125 |         mu = np.nanmean(x)
126 |         LB, UB = mu - EB, mu + EB
127 |         assert np.isfinite(LB) and np.isfinite(UB)
128 |         fail += (0.0 < LB) or (UB < 0.0)
129 |     pval = sst.binom_test(fail, trials, alpha)
130 | 
131 |     assert pval >= 0.05 / 100  # Assume we run 100 times
132 | 
133 | 
134 | @given(lists(mfloats(), min_size=2))
135 | def test_t_test_to_EB(x):
136 |     pval = t_test_(x)
137 |     assume(0.0 < pval and pval < 1.0)
138 | 
139 |     EB = stats.t_EB(x, alpha=pval)
140 |     assert np.allclose(np.abs(np.mean(x)), EB)
141 | 


--------------------------------------------------------------------------------
/test/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import numpy as np
 15 | from hypothesis import assume
 16 | from hypothesis.extra.numpy import arrays
 17 | from hypothesis.strategies import (
 18 |     binary,
 19 |     booleans,
 20 |     composite,
 21 |     dictionaries,
 22 |     floats,
 23 |     from_regex,
 24 |     frozensets,
 25 |     integers,
 26 |     lists,
 27 |     sampled_from,
 28 |     text,
 29 | )
 30 | from hypothesis_gufunc.extra.xr import fixed_dataarrays, simple_coords, xr_coords
 31 | 
 32 | import bayesmark.space as sp
 33 | from bayesmark.constants import ARG_DELIM, ITER, METHOD, RANDOM_SEARCH, SUGGEST, TEST_CASE, TRIAL
 34 | from bayesmark.np_util import linear_rescale
 35 | 
 36 | NULL_PLUG = "\x00"
 37 | 
 38 | 
 39 | def _easy_text():
 40 |     # The NULL_PLUG confuses numpy arrays, so assume that is not in
 41 |     S = text().filter(lambda ss: NULL_PLUG not in ss)
 42 |     return S
 43 | 
 44 | 
 45 | def _hashable():
 46 |     S = floats() | integers() | _easy_text()
 47 |     return S
 48 | 
 49 | 
 50 | CAT_STGY = _easy_text if sp.CAT_KIND == "U" else binary
 51 | 
 52 | F_MIN = np.nextafter(0, 1)
 53 | 
 54 | RANGES = {"linear": (-1000, 1000), "log": (F_MIN, 1000), "logit": (F_MIN, np.nextafter(1, 0)), "bilog": (-100, 100)}
 55 | 
 56 | SPACES = tuple(sorted(sp.SPACE_DICT.keys()))
 57 | 
 58 | 
 59 | @composite
 60 | def space_vars(draw, max_values=5):
 61 |     """Build composite strategy for random API calls."""
 62 |     type_ = draw(sampled_from(SPACES))
 63 |     use_values = draw(booleans())
 64 | 
 65 |     if type_ == "real":
 66 |         warp = draw(sampled_from(("linear", "log", "logit", "bilog")))
 67 |         min_val, max_val = RANGES[warp]
 68 |         if use_values:
 69 |             # Generating unique values to ensure that always have more than 2
 70 |             # unique values, but code is designed to accept non-unique values
 71 |             # arrays as long as more than 2 non-unique. Could generalize this.
 72 |             values = draw(lists(floats(min_val, max_val), min_size=2, max_size=max_values, unique=True))
 73 |             D = {"type": type_, "space": warp, "values": values}
 74 |         else:
 75 |             range_ = tuple(sorted(draw(lists(floats(min_val, max_val), min_size=2, max_size=2, unique=True))))
 76 |             D = {"type": type_, "space": warp, "range": range_}
 77 |     elif type_ == "int":
 78 |         warp = draw(sampled_from(("linear", "log", "bilog")))
 79 |         min_val, max_val = RANGES[warp]
 80 |         # Must shrink these to next integers in range to keep hypothesis happy
 81 |         min_val = int(np.ceil(min_val))
 82 |         max_val = int(np.floor(max_val))
 83 |         if use_values:
 84 |             values = draw(lists(integers(min_val, max_val), min_size=2, max_size=max_values, unique=True))
 85 |             D = {"type": type_, "space": warp, "values": values}
 86 |         else:
 87 |             range_ = tuple(sorted(draw(lists(integers(min_val, max_val), min_size=2, max_size=2, unique=True))))
 88 |             D = {"type": type_, "space": warp, "range": range_}
 89 |     elif type_ == "bool":
 90 |         D = {"type": type_}
 91 |     elif type_ == "cat" or type_ == "ordinal":
 92 |         values = draw(lists(CAT_STGY(), min_size=2, max_size=max_values, unique=True))
 93 |         # This assume is needed because np.unique has bug for null plug
 94 |         # .. >>> np.unique([u'', u'\x00'])
 95 |         # .. array([u''], dtype='<U1')
 96 |         assume(len(np.unique(values)) == len(values))
 97 |         D = {"type": type_, "values": values}
 98 |     else:
 99 |         assert False
100 | 
101 |     return D
102 | 
103 | 
104 | @composite
105 | def space_configs(draw, max_vars=5, max_len=5, allow_missing=False, unique_y=False):
106 |     meta = draw(dictionaries(text(), space_vars(), min_size=1, max_size=max_vars))
107 | 
108 |     S = sp.JointSpace(meta)
109 |     lower, upper = S.get_bounds().T
110 | 
111 |     D = sum(len(var["values"]) if var["type"] in ("cat", "ordinal") else 1 for var in meta.values())
112 | 
113 |     # Let's draw warped variable because that will be a lot easier
114 |     N = draw(integers(min_value=0, max_value=max_len))
115 |     X_w = draw(arrays(dtype=float, shape=(N, D), elements=floats(min_value=0.0, max_value=1.0)))
116 |     X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
117 |     X = S.unwarp(X_w)
118 | 
119 |     # Draw output too in case we want it
120 |     y_elements = floats(allow_infinity=False, allow_nan=allow_missing)
121 |     y = draw(arrays(dtype=float, shape=(N,), elements=y_elements, unique=unique_y))
122 | 
123 |     # Draw the fixed vars
124 |     X_fixed_w = draw(arrays(dtype=float, shape=(1, D), elements=floats(min_value=0.0, max_value=1.0)))
125 |     X_fixed_w = linear_rescale(X_fixed_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
126 |     X_fixed, = S.unwarp(X_fixed_w)
127 | 
128 |     # Make fixed_vars a subset of all vars.
129 |     keep_in_fixed = draw(frozensets(sampled_from(tuple(X_fixed.keys()))))
130 |     X_fixed = {k: X_fixed[k] for k in keep_in_fixed}
131 | 
132 |     return meta, X, y, X_fixed
133 | 
134 | 
135 | _test_cases = _easy_text
136 | 
137 | 
138 | def perf_dataarrays(min_trial=1):
139 |     dims = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
140 |     # Don't get too close to infinity because that can also create issues and isn't supported
141 |     elements = floats(allow_nan=False, min_value=-1e300, max_value=1e300)
142 | 
143 |     ref = RANDOM_SEARCH + ARG_DELIM
144 |     method_names = from_regex("^%s[A-Z]*" % ref) | text()
145 |     method_st = xr_coords(elements=method_names).filter(lambda L: any(ss.startswith(ref) for ss in L))
146 | 
147 |     coords_st = {
148 |         ITER: simple_coords(min_side=1),
149 |         SUGGEST: simple_coords(min_side=1),
150 |         TRIAL: simple_coords(min_side=min_trial),
151 |         METHOD: method_st,
152 |     }
153 |     S = fixed_dataarrays(dims, dtype=np.float_, elements=elements, coords_elements=_test_cases(), coords_st=coords_st)
154 |     return S
155 | 


--------------------------------------------------------------------------------
/test/util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import random as pyrandom
 15 | import shlex
 16 | 
 17 | from hypothesis import assume, given
 18 | from hypothesis.strategies import floats, integers, iterables, lists, text
 19 | 
 20 | from bayesmark import util as bobm_util
 21 | from util import _hashable
 22 | 
 23 | 
 24 | def some_mock_f(x):
 25 |     """Some arbitrary deterministic test function.
 26 |     """
 27 |     random_stream = pyrandom.Random(hash(x))
 28 |     y = random_stream.gauss(0, 1)
 29 |     return y
 30 | 
 31 | 
 32 | @given(_hashable(), lists(_hashable()))
 33 | def test_in_or_none(x, L):
 34 |     val = bobm_util.in_or_none(x, L)
 35 |     assert isinstance(val, bool)
 36 |     assert val == (x in L)
 37 |     assert val == (x in set(L))
 38 | 
 39 | 
 40 | @given(_hashable())
 41 | def test_in_or_none_on_none(x):
 42 |     val = bobm_util.in_or_none(x, None)
 43 |     assert isinstance(val, bool)
 44 |     assert val
 45 | 
 46 | 
 47 | @given(lists(_hashable()))
 48 | def test_in_or_none_self(L):
 49 |     for xx in L:
 50 |         val = bobm_util.in_or_none(xx, L)
 51 |         assert isinstance(val, bool)
 52 |         assert val
 53 | 
 54 | 
 55 | @given(lists(_hashable()))
 56 | def test_all_unique(L):
 57 |     bobm_util.all_unique(L)
 58 | 
 59 | 
 60 | @given(lists(integers(), unique=True) | lists(text(), unique=True) | lists(floats(), unique=True))
 61 | def test_strict_sorted(L):
 62 |     bobm_util.strict_sorted(L)
 63 | 
 64 | 
 65 | @given(integers(-5, 1000))
 66 | def test_range_str(stop):
 67 |     list(bobm_util.range_str(stop))
 68 | 
 69 | 
 70 | @given(text(), lists(text()))
 71 | def test_str_join_safe(delim, str_vec):
 72 |     assume(not any(delim in ss for ss in str_vec))
 73 |     bobm_util.str_join_safe(delim, str_vec, append=False)
 74 | 
 75 | 
 76 | @given(text(), lists(text()), lists(text()))
 77 | def test_str_join_safe_append(delim, str_vec0, str_vec):
 78 |     assume(not any(delim in ss for ss in str_vec0))
 79 |     assume(not any(delim in ss for ss in str_vec))
 80 | 
 81 |     start = bobm_util.str_join_safe(delim, str_vec0, append=False)
 82 |     bobm_util.str_join_safe(delim, [start] + str_vec, append=True)
 83 | 
 84 | 
 85 | @given(lists(text()))
 86 | def test_shell_join(argv):
 87 |     cmd = bobm_util.shell_join(argv, delim=" ")
 88 | 
 89 |     assert shlex.split(cmd) == list(argv)
 90 | 
 91 | 
 92 | @given(text(), text(min_size=1))
 93 | def test_chomp(str_val, ext):
 94 |     bobm_util.chomp(str_val + ext, ext)
 95 | 
 96 | 
 97 | @given(iterables(_hashable()))
 98 | def test_preimage_func(x):
 99 |     bobm_util.preimage_func(some_mock_f, x)
100 | 


--------------------------------------------------------------------------------
/test/xr_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 Uber Technologies, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from collections import OrderedDict
 15 | from itertools import product
 16 | 
 17 | import xarray as xr
 18 | from hypothesis import assume, given, settings
 19 | from hypothesis.strategies import dictionaries, floats, integers, just, sampled_from, tuples
 20 | from hypothesis_gufunc.extra.xr import (
 21 |     _hashable,
 22 |     dataarrays,
 23 |     datasets,
 24 |     fixed_datasets,
 25 |     simple_dataarrays,
 26 |     subset_lists,
 27 |     vars_to_dims_dicts,
 28 |     xr_vars,
 29 | )
 30 | 
 31 | import bayesmark.xr_util as xru
 32 | 
 33 | xr_fill = _hashable
 34 | 
 35 | 
 36 | def intersect_seq(L):
 37 |     if len(L) == 0:
 38 |         return set([])
 39 | 
 40 |     S = set(L[0])
 41 |     for xx in L[1:]:
 42 |         S = S & set(xx)
 43 |     return S
 44 | 
 45 | 
 46 | def ds_vars_dims():
 47 |     def build_it(vars_to_dims_):
 48 |         all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))
 49 | 
 50 |         ds = fixed_datasets(vars_to_dims_)
 51 |         vars_ = subset_lists(list(vars_to_dims_.keys()))
 52 |         dims = subset_lists(all_dims)
 53 |         return tuples(ds, vars_, dims)
 54 | 
 55 |     vars_to_dims_st = vars_to_dims_dicts()
 56 | 
 57 |     S = vars_to_dims_st.flatmap(build_it)
 58 |     return S
 59 | 
 60 | 
 61 | def ds_vars_dims_mixed():
 62 |     def build_it(vars_to_dims_):
 63 |         all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))
 64 | 
 65 |         ds = fixed_datasets(vars_to_dims_)
 66 | 
 67 |         dims = subset_lists(all_dims)
 68 | 
 69 |         vars_ = sampled_from(list(vars_to_dims_.keys()))
 70 |         vars_dict = dictionaries(vars_, dims, dict_class=OrderedDict)
 71 |         vars_dict = vars_dict.map(OrderedDict.items).map(list)
 72 | 
 73 |         return tuples(ds, vars_dict, just(all_dims))
 74 | 
 75 |     vars_to_dims_st = vars_to_dims_dicts(min_vars=0, min_dims=0)
 76 | 
 77 |     S = vars_to_dims_st.flatmap(build_it)
 78 |     return S
 79 | 
 80 | 
 81 | @given(simple_dataarrays(("foo", "bar", "baz")) | dataarrays() | dataarrays(coords_elements=floats()), integers(0, 3))
 82 | def test_is_simple_coords(da, min_side):
 83 |     xru.is_simple_coords(da.coords, min_side=min_side)
 84 | 
 85 | 
 86 | @given(simple_dataarrays(("foo", "bar", "baz")))
 87 | def test_is_simple_coords_pass(da):
 88 |     simple = xru.is_simple_coords(da.coords)
 89 |     assert simple
 90 | 
 91 | 
 92 | @given(ds_vars_dims(), xr_fill())
 93 | def test_ds_like(args, fill):
 94 |     ref, vars_, dims = args
 95 | 
 96 |     xru.ds_like(ref, vars_, dims, fill=fill)
 97 | 
 98 | 
 99 | @given(ds_vars_dims_mixed(), xr_fill())
100 | def test_ds_like_mixed(args, fill):
101 |     ref, vars_, dims = args
102 | 
103 |     xru.ds_like_mixed(ref, vars_, dims, fill=fill)
104 | 
105 | 
106 | @given(xr_vars(), dataarrays())
107 | def test_only_dataarray(var_, da):
108 |     assume(var_ not in da.dims)
109 | 
110 |     ds = xr.Dataset({var_: da})
111 | 
112 |     xru.only_dataarray(ds)
113 | 
114 | 
115 | @given(datasets())
116 | def test_coord_compat(ds):
117 |     all_dims = [ds[kk].dims for kk in ds]
118 |     common_dims = sorted(intersect_seq(all_dims))
119 |     da_seq = [ds[kk] for kk in ds]
120 | 
121 |     compat = xru.coord_compat(da_seq, common_dims)
122 |     assert compat
123 | 
124 | 
125 | @given(datasets())
126 | def test_coord_compat_false(ds):
127 |     all_dims = [ds[kk].dims for kk in ds]
128 |     common_dims = sorted(intersect_seq(all_dims))
129 |     da_seq = [ds[kk] for kk in ds]
130 | 
131 |     assume(len(da_seq) > 0)
132 |     assume(len(da_seq[0].dims) > 0)
133 | 
134 |     da = da_seq[0]
135 |     kk = da.dims[0]
136 |     da_seq[0] = da.assign_coords(**{kk: range(da.sizes[kk])})
137 | 
138 |     xru.coord_compat(da_seq, common_dims)
139 | 
140 | 
141 | @given(dataarrays(min_dims=1, max_dims=1))
142 | def test_da_to_string(da):
143 |     xru.da_to_string(da)
144 | 
145 | 
146 | @given(dataarrays(min_side=0, min_dims=0), integers(1, 3))
147 | @settings(deadline=None)
148 | def test_da_concat(da, n):
149 |     assume(n < len(da.dims))
150 | 
151 |     da_dict, keys_to_slice = da_split(da, n)
152 |     assume(len(da_dict) > 0)
153 |     assert len(keys_to_slice) == n
154 | 
155 |     xru.da_concat(da_dict, dims=keys_to_slice)
156 | 
157 | 
158 | def da_split(da, n):
159 |     assert 0 < n
160 |     assert n <= len(da.dims)
161 | 
162 |     keys_to_slice = da.dims[-n:]
163 |     da_dict = {}
164 |     vals = [da.coords[kk].values.tolist() for kk in keys_to_slice]
165 |     for vv in product(*vals):
166 |         lookup = dict(zip(keys_to_slice, vv))
167 |         da_dict[tuple(vv)] = da.sel(lookup, drop=True)
168 |     return da_dict, keys_to_slice
169 | 
170 | 
171 | @given(datasets(min_side=1, min_dims=1), integers(1, 3))
172 | @settings(deadline=None)
173 | def test_ds_concat(ds, n):
174 |     all_dims = [ds[kk].dims for kk in ds]
175 |     common_dims = sorted(intersect_seq(all_dims))
176 | 
177 |     n = min([n, len(common_dims) - 1])
178 |     assume(0 < n)
179 | 
180 |     keys_to_slice = common_dims[:n]
181 |     ds_dict = {}
182 |     vals = [ds.coords[kk].values.tolist() for kk in keys_to_slice]
183 |     for vv in product(*vals):
184 |         lookup = dict(zip(keys_to_slice, vv))
185 |         ds_dict[vv] = ds.sel(lookup, drop=True)
186 | 
187 |     xru.ds_concat(ds_dict, dims=keys_to_slice)
188 | 


--------------------------------------------------------------------------------
/tools/archive_branch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | set -o pipefail
 5 | 
 6 | DATE=$(date +"%Y%m%d")
 7 | TAGNAME=archive/$DATE-$1
 8 | 
 9 | # Fail if untracked files
10 | test -z "$(git status --porcelain)"
11 | 
12 | # Fail if origin and local differ
13 | git diff $1 origin/$1 --quiet
14 | 
15 | # Prune remotes for good measure
16 | git remote prune origin
17 | 
18 | git checkout $1
19 | git tag -a $TAGNAME -m "archived branch $1 on $DATE"
20 | git checkout master
21 | git push origin $TAGNAME
22 | 
23 | # Make sure we tagged correctly for good measure
24 | diff <(git rev-list $TAGNAME -n 1) <(git rev-parse $1)
25 | git ls-remote --tags origin | grep $(git rev-parse $1)
26 | 
27 | git branch -D $1
28 | git push origin --delete $1
29 | 
30 | echo "cleaned up"
31 | 


--------------------------------------------------------------------------------
/tools/deploy.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Note that
  4 | # UUID=$(uuidgen)
  5 | # works on Mac OS by default, but requires installation on linux.
  6 | 
  7 | set -ex
  8 | set -o pipefail
  9 | 
 10 | # Script arguments
 11 | REMOTE=$1
 12 | BRANCH=$2
 13 | PACKAGE=$3
 14 | VERSION=$4
 15 | 
 16 | # Check to make sure we have keys setup right before we start
 17 | git push --dry-run
 18 | 
 19 | # Check versions are there, this is a crude way to do it but it works
 20 | grep "^$PACKAGE==$VERSION\$" requirements/self.txt
 21 | grep '^__version__ = "'$VERSION'"$' bayesmark/__init__.py
 22 | grep 'version="'$VERSION'",$' setup.py
 23 | 
 24 | # Where envs go
 25 | ENVS=~/envs
 26 | # Which python version this uses
 27 | PY=python3.7
 28 | # Which env contains twine and py version we use
 29 | TWINE_ENV=twine_env
 30 | # Where to run tar ball tests from
 31 | TEST_DIR=~/tmp/deploy_tests
 32 | 
 33 | mkdir -p $TEST_DIR
 34 | 
 35 | # Get the dir
 36 | REPO_DIR=$(pwd)
 37 | git checkout $BRANCH
 38 | 
 39 | # Fail if untracked files and clean
 40 | test -z "$(git status --porcelain)"
 41 | git clean -x -ff -d
 42 | 
 43 | # Run tests locally and cleanup
 44 | ./integration_test_with_setup.sh
 45 | ./test.sh
 46 | git reset --hard HEAD
 47 | git clean -x -ff -d
 48 | test -z "$(git status --porcelain)"
 49 | 
 50 | # push to remote and check
 51 | git push -u $REMOTE $BRANCH
 52 | git diff $BRANCH $REMOTE/$BRANCH --quiet
 53 | 
 54 | # See if tests pass remote, TODO use travis CLI
 55 | read -t 1 -n 10000 discard || true
 56 | read -p "Travis tests pass [y/n]? " -r
 57 | if [[ ! $REPLY =~ ^[Yy]$ ]]
 58 | then
 59 |     exit 1
 60 | fi
 61 | 
 62 | # test tar ball
 63 | source $ENVS/$TWINE_ENV/bin/activate
 64 | ./build_wheel.sh
 65 | twine check dist/*
 66 | deactivate
 67 | cd $TEST_DIR
 68 | UUID=$(uuidgen)
 69 | mkdir $UUID
 70 | cd $UUID
 71 | virtualenv env --python=$PY
 72 | source ./env/bin/activate
 73 | pip install -r $REPO_DIR/requirements/test.txt
 74 | pip install $REPO_DIR/dist/*.tar.gz
 75 | cp -r $REPO_DIR/test .
 76 | pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
 77 | deactivate
 78 | cd $REPO_DIR
 79 | # Cleanup since we will build again
 80 | git clean -x -ff -d
 81 | test -z "$(git status --porcelain)"
 82 | 
 83 | # merge master
 84 | # Fail if origin and local differ
 85 | git checkout $BRANCH
 86 | git diff master $REMOTE/master --quiet
 87 | git merge master --no-commit
 88 | # Fail if not clean
 89 | test -z "$(git status --porcelain)"
 90 | 
 91 | # merge to master
 92 | git checkout master
 93 | git merge $BRANCH --squash --no-commit
 94 | git status
 95 | read -t 1 -n 10000 discard || true
 96 | read -p "Commit message (CTRL-C to abort): "
 97 | git commit -m "$REPLY"
 98 | # Fail if not clean
 99 | test -z "$(git status --porcelain)"
100 | 
101 | # Run tests locally and cleanup
102 | ./integration_test_with_setup.sh
103 | ./test.sh
104 | git reset --hard HEAD
105 | git clean -x -ff -d
106 | test -z "$(git status --porcelain)"
107 | 
108 | # test tar ball
109 | source $ENVS/$TWINE_ENV/bin/activate
110 | ./build_wheel.sh
111 | twine check dist/*
112 | deactivate
113 | cd $TEST_DIR
114 | UUID=$(uuidgen)
115 | mkdir $UUID
116 | cd $UUID
117 | virtualenv env --python=$PY
118 | source ./env/bin/activate
119 | pip install -r $REPO_DIR/requirements/test.txt
120 | pip install $REPO_DIR/dist/*.tar.gz
121 | cp -r $REPO_DIR/test .
122 | pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
123 | deactivate
124 | cd $REPO_DIR
125 | 
126 | # push to test pypi
127 | source $ENVS/$TWINE_ENV/bin/activate
128 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
129 | deactivate
130 | 
131 | echo "ready to run?"
132 | echo "pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/"
133 | read -p "Enter when pypi has updated: " -r
134 | 
135 | # install and test
136 | cd $TEST_DIR
137 | UUID=$(uuidgen)
138 | mkdir $UUID
139 | cd $UUID
140 | virtualenv env --python=$PY
141 | source ./env/bin/activate
142 | pip install -r $REPO_DIR/requirements/test.txt
143 | pip install -r $REPO_DIR/requirements/ipynb.txt
144 | pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/
145 | cp $REPO_DIR/integration_test.sh .
146 | cp -r $REPO_DIR/notebooks .
147 | cp -r $REPO_DIR/example_opt_root .
148 | ./integration_test.sh
149 | cp -r $REPO_DIR/test .
150 | pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
151 | deactivate
152 | cd $REPO_DIR
153 | 
154 | # push to remote and check
155 | git push $REMOTE master
156 | git diff master $REMOTE/master --quiet
157 | 
158 | # Show sha256sum in case we want to check against PyPI test, use || for Mac OS version
159 | sha256sum dist/* || shasum -a 256 dist/*
160 | 
161 | # See if tests pass remote, TODO use travis CLI
162 | read -t 1 -n 10000 discard || true
163 | read -p "Travis tests pass, and push to PyPI? This cannot be undone. [push/no]" -r
164 | if [[ ! $REPLY == push ]]
165 | then
166 |     exit 1
167 | fi
168 | 
169 | # push to full pypi
170 | source $ENVS/$TWINE_ENV/bin/activate
171 | twine upload dist/*
172 | deactivate
173 | 
174 | echo "ready to run?"
175 | echo "pip install $PACKAGE==$VERSION"
176 | read -p "Enter when pypi has updated: " -r
177 | 
178 | # install and test
179 | cd $TEST_DIR
180 | UUID=$(uuidgen)
181 | mkdir $UUID
182 | cd $UUID
183 | virtualenv env --python=$PY
184 | source ./env/bin/activate
185 | pip install -r $REPO_DIR/requirements/test.txt
186 | pip install -r $REPO_DIR/requirements/ipynb.txt
187 | pip install $PACKAGE==$VERSION
188 | cp $REPO_DIR/integration_test.sh .
189 | cp -r $REPO_DIR/notebooks .
190 | cp -r $REPO_DIR/example_opt_root .
191 | ./integration_test.sh
192 | cp -r $REPO_DIR/test .
193 | pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
194 | deactivate
195 | cd $REPO_DIR
196 | 
197 | # clean and tag
198 | git clean -x -ff -d
199 | test -z "$(git status --porcelain)"
200 | git tag -a v$VERSION -m "$PACKAGE version $VERSION"
201 | git push $REMOTE v$VERSION
202 | 
203 | # remind user to archive/delete branch
204 | echo "remember to delete branch $BRANCH, and update readthedocs.io"
205 | echo "done"
206 | 


--------------------------------------------------------------------------------