├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Learning to Optimize.docx ├── Optimizing Optimization.docx ├── abh_cpu_allocation.png ├── loss_matrix.png ├── main_screen.png ├── response_matrix.png ├── single_parameter_scatter.png └── two_parameter_scatter.png ├── hypermax ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── adaptive_bayesian_hyperband_optimizer.py │ ├── atpe_optimizer.py │ ├── human_guided_optimizer_wrapper.py │ ├── optimization_algorithm_base.py │ ├── random_search_optimizer.py │ └── tpe_optimizer.py ├── atpe_models │ ├── model-gamma-configuration.json │ ├── model-gamma.txt │ ├── model-nEICandidates-configuration.json │ ├── model-nEICandidates.txt │ ├── model-resultFilteringAgeMultiplier-configuration.json │ ├── model-resultFilteringAgeMultiplier.txt │ ├── model-resultFilteringLossRankMultiplier-configuration.json │ ├── model-resultFilteringLossRankMultiplier.txt │ ├── model-resultFilteringMode-configuration.json │ ├── model-resultFilteringMode.txt │ ├── model-resultFilteringRandomProbability-configuration.json │ ├── model-resultFilteringRandomProbability.txt │ ├── model-secondaryCorrelationExponent-configuration.json │ ├── model-secondaryCorrelationExponent.txt │ ├── model-secondaryCorrelationMultiplier-configuration.json │ ├── model-secondaryCorrelationMultiplier.txt │ ├── model-secondaryCutoff-configuration.json │ ├── model-secondaryCutoff.txt │ ├── model-secondaryFixedProbability-configuration.json │ ├── model-secondaryFixedProbability.txt │ ├── model-secondaryLockingMode-configuration.json │ ├── model-secondaryLockingMode.txt │ ├── model-secondaryProbabilityMode-configuration.json │ ├── model-secondaryProbabilityMode.txt │ ├── model-secondaryTopLockingPercentile-configuration.json │ ├── model-secondaryTopLockingPercentile.txt │ └── scaling_model.json ├── cli.py ├── configuration.py ├── cui.py ├── execution.py ├── file_utils.py ├── hyperparameter.py ├── optimizer.py ├── results_analyzer.py ├── test_model.py └── utils.py ├── research ├── atpe_research_1 │ └── simulation.py ├── atpe_research_2 │ ├── benchmark.py │ ├── process_results.py │ └── simulation.py └── searches │ └── cifar_resnet │ ├── cifar10.json │ └── cifar_test.py ├── setup.py ├── test └── general_test.json └── tutorials └── fashion-MNIST ├── fashion_mnist.py ├── keras.json └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.ipynb 9 | 10 | # Distribution / packaging 11 | data/ 12 | email_data/ 13 | model/ 14 | tmp/ 15 | envs/ 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | 112 | .idea 113 | venv 114 | *.csv 115 | results_* 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Genixpro Technologies Corporation and Bradley Arsenault 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Hypermax is a power tool for optimizing algorithms. It builds on the powerful TPE algorithm with additional features 4 | meant to help you get to your optimal hyper parameters faster and easier. We call our algorithm Adaptive-TPE, and it is 5 | fast and accurate optimizer that trades off between explore-style and exploit-style strategies in an intelligent manner 6 | based on your results. It depends upon pretrained machine learning models that have been taught how to optimize 7 | your machine learning model as fast as possible. Read the research behind ATPE in [Optimizing Optimization](https://articulon.bradleyarsenault.me/article/optimizing-optimization) and [Learning to Optimize](https://articulon.bradleyarsenault.me/article/learning-to-optimize), and use it for yourself by downloading Hypermax. 8 | 9 | In addition, Hypermax automatically gives you a variety of charts and graphs based on your hyperparameter results. 10 | Hypermax can be restarted easily in-case of a crash. Hypermax can monitor the CPU and RAM usage of your algorithms - 11 | automatically killing your process if it takes too long to execute or uses too much RAM. Hypermax even has a UI. 12 | Hypermax makes it easier and faster to get to those high performing hyper-parameters that you crave so much. 13 | 14 | Start optimizing today! 15 | 16 | ![Screenshot of Hypermax](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/main_screen.png "Screenshot of Hypermax") 17 | 18 | # Installation 19 | 20 | Install using pip: 21 | 22 | ```bash 23 | pip3 install hypermax -g 24 | ``` 25 | 26 | Python3 is required. 27 | 28 | # Getting Started (Using Python Library) 29 | 30 | In Hypermax, you define your hyper-parameter search, including the variables, method of searching, and 31 | loss functions, using a JSON object as you configuration file. 32 | 33 | 34 | # Getting Started (Using CLI) 35 | 36 | Here is an example. Lets say you have the following file, model.py: 37 | 38 | ```python 39 | import sklearn.datasets 40 | import sklearn.ensemble 41 | import sklearn.metrics 42 | import datetime 43 | 44 | def trainModel(params): 45 | inputs, outputs = sklearn.datasets.make_hastie_10_2() 46 | 47 | startTime = datetime.now() 48 | 49 | model = sklearn.ensemble.RandomForestClassifier(n_estimators=int(params['n_estimators'])) 50 | model.fit(inputs, outputs) 51 | predicted = model.predict(inputs) 52 | 53 | finishTime = datetime.now() 54 | 55 | auc = sklearn.metrics.auc(outputs, predicted) 56 | 57 | return {"loss": auc, "time": (finishTime - startTime).total_seconds()} 58 | ``` 59 | 60 | You configure your hyper parameter search space by defining a JSON-schema object with the needed values: 61 | 62 | ```json 63 | { 64 | "hyperparameters": { 65 | "type": "object", 66 | "properties": { 67 | "n_estimators": { 68 | "type": "number", 69 | "min": 1, 70 | "max": 1000, 71 | "scaling": "logarithmic" 72 | } 73 | } 74 | } 75 | } 76 | ``` 77 | 78 | Next, define how you want to execute your optimization function: 79 | 80 | ```json 81 | { 82 | "function": { 83 | "type": "python_function", 84 | "module": "model.py", 85 | "name": "trainModel", 86 | "parallel": 1 87 | } 88 | } 89 | ``` 90 | 91 | Next, you need to define your hyper parameter search: 92 | 93 | ```json 94 | { 95 | "search": { 96 | "method": "atpe", 97 | "iterations": 1000 98 | } 99 | } 100 | ``` 101 | 102 | Next, setup where you wants results stored and if you want 103 | graphs generated: 104 | 105 | 106 | ```json 107 | { 108 | "results": { 109 | "directory": "results", 110 | "graphs": true 111 | } 112 | } 113 | ``` 114 | 115 | 116 | Lastly, you need to provide indication if you want to use the UI: 117 | 118 | ```json 119 | { 120 | "ui": { 121 | "enabled": true 122 | } 123 | } 124 | ``` 125 | 126 | **NOTE:** At the moment the console UI is not supported in Windows environments, so you will need to specify `false` in 127 | the `enabled` property. We use the `urwid.raw_display` module which relies on `fcntl`. For more information, [see here](https://github.com/urwid/urwid/issues/152). 128 | 129 | Pulling it all together, you create a file like this `search.json`, defining your hyper-parameter search: 130 | 131 | ```json 132 | { 133 | "hyperparameters": { 134 | "type": "object", 135 | "properties": { 136 | "n_estimators": { 137 | "type": "number", 138 | "min": 1, 139 | "max": 1000, 140 | "scaling": "logarithmic" 141 | } 142 | } 143 | }, 144 | "function": { 145 | "type": "python_function", 146 | "module": "model", 147 | "name": "trainModel", 148 | "parallel": 1 149 | }, 150 | "search": { 151 | "method": "atpe", 152 | "iterations": 1000 153 | }, 154 | "results": { 155 | "directory": "results", 156 | "graphs": true 157 | }, 158 | "ui": { 159 | "enabled": true 160 | } 161 | } 162 | ``` 163 | 164 | And now you can run your hyper-parameter search 165 | 166 | ```bash 167 | $ hypermax search.json 168 | ``` 169 | 170 | Hypermax will automatically begin searching your hyperparameter space. If your computer dies and you need to restart 171 | your hyperparameter search, its as easy as providing it the existing results directory as a second parameter. Hypermax 172 | will automatically pick up where it left off. 173 | 174 | ```bash 175 | $ hypermax search.json results_0/ 176 | ``` 177 | 178 | # Optimization Algorithms 179 | 180 | Hypermax supports 4 different optimization algorithms: 181 | 182 | - "random" - Does a fully random search 183 | - "tpe" - The classic TPE algorithm, with its default configuration 184 | - "atpe" - Our Adaptive-TPE algorithm, a good general purpose optimizer 185 | - "abh" - Adaptive Bayesian Hyperband - this is an optimizer that its able to learn from partially trained algorithms in order to optimizer your fully trained algorithm 186 | 187 | The first three optimizers - random, tpe, and atpe, can all be used with no configuration. 188 | 189 | ## Adaptive Bayesian Hyperband 190 | 191 | The only optimizer in our toolkit that requires additional configuration is the Adaptive Bayesian Hyperband algorithm. 192 | 193 | ABH works by training your network with different amounts of resources. Your "Resource" can be any parameter that significantly effects 194 | the execution time of your model. Typically, training-time, size of dataset, or # of epochs are used as the resource. The amount of resource 195 | is referred to as the "budget" of a particular execution. 196 | 197 | By using partially trained networks, ABH is able to explore more widely over more combinations of hyperparameters, and then triage 198 | the knowledge it gains up to the fully trained model. It does this by using a tournament of sorts, promoting the best performing 199 | parameters on smaller budget runs to be trained at larger budgets. See the following chart as an example: 200 | 201 | ![Hyperband CPU Allocation](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/abh_cpu_allocation.png "Single Parameter Loss Chart") 202 | 203 | There are many ways you could configure such a system. Hyperband is just a mathematically and theoretically sound way of choosing 204 | how many brackets to run and with what budgets. ABH is a method that combines Hyperband with ATPE, in the same way that BOHB combines 205 | Hyperband with conventional TPE. 206 | 207 | ABH requires you to select three additional parameters: 208 | - min_budget - Sets the minimum amount of resource that must be allocated to a single run 209 | - max_budget - Sets the maximum amount of resource that can be allocated to a single run 210 | - eta - Defines how much the budget is reduced for each bracket. The theoretically optimum value is technically E or 2.71, but values of 3 or 4 are more typical and work fine in practice. 211 | 212 | You define these parameters like so: 213 | 214 | ```json 215 | { 216 | "search": { 217 | "method": "abh", 218 | "iterations": 1000, 219 | "min_budget": 1, 220 | "max_budget": 30, 221 | "eta": 3 222 | } 223 | } 224 | ``` 225 | 226 | This configuration will result in Hyperband testing 4 different brackets: 30 epochs, 10 epochs, 3.333 epochs (rounded down), and 1.1111 epochs (rounded down) 227 | 228 | The budget for each run is provided as a hyperparameter to your function, along side your other hyperparameters. The budget will be given as 229 | the "$budget" key in the Python dictionary that is passed to your model function. 230 | 231 | Tips: 232 | - ABH only works well when the parameters for a run with a small budget correlates strongly with the parameters for a run with a high budget. 233 | - Try to eliminate any parameters whose behaviour and effectiveness might change depending on the budget. E.g. a parameter for % of budget in mode 1, % of budget in mode 2 will not 234 | work well with ABH. 235 | - Don't test too wide of a range of budgets. As a general rule of thumb, never set min_budget lower then max_budget/eta^4 236 | - Never test a min_budget thats so low that your model doesn't train at all. The minimum is there for a reason 237 | - If you find that ABH is getting stuck in a local minima, choosing parameters that work well on few epochs but work poorly on many epochs, your 238 | better off using vanilla ATPE and just training networks fully on each run. 239 | 240 | 241 | # Results 242 | 243 | Hypermax automatically generates a wide variety of different types of results for you to analyze. 244 | 245 | ## Hyperparameter Correlations 246 | 247 | The hyperparameter correlations can be viewed from within the user-interface or in "correlations.csv" within 248 | your results directory. The correlations can help you tell which hyper-parameter combinations are moving the 249 | needle the most. Remember that a large value either in the negative or positive indicates a strong correlation 250 | between those two hyper-parameters. Values close to 0 indicate that there is little correlation between those 251 | hyper-parameters. The diagonal access will give you the single-parameter correlations. 252 | 253 | It should also be noted that these numbers get rescaled to fall roughly between -10 and +10 (preserving the original sign), 254 | and thus are not the mathematically defined covariances. This is done to make it easier to see the important relationships. 255 | 256 | ## Single Parameter Loss Charts 257 | 258 | ![Single Parameter Loss Chart](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/single_parameter_scatter.png "Single Parameter Loss Chart") 259 | 260 | The single parameter loss charts create a Scatter diagram between the parameter and the loss. These are the most useful charts and are usually the go-to for attempting 261 | to interpret the results. Hypermax is going to generate several different versions of this chart. The original version will have every tested value. The "bucketed" 262 | version will attempt to combine hyper-parameter values into "buckets" and give you the minimum value for each bucket - useful for continuous valued hyper-parameters 263 | that you have a lot of results for. The "top_10_percent" version is just showing you the scatter for only the top-10% of results - useful when you want to home in on 264 | those top-performing values. 265 | 266 | You will also get a version of this chart for the time that your model takes to execute. This can be useful if trading off between accuracy and time taken is important 267 | to you. 268 | 269 | ## Two Parameter Loss Matrix 270 | 271 | The two parameter loss matrixes are a color-coded diagram that helps you to determine the optimal value between two hyper-parameters. 272 | 273 | ![Two Parameter Loss Matrix](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/loss_matrix.png "Loss Matrix") 274 | 275 | The graph is color coded in the following way: 276 | 277 | Red: 90th percentile of squares 278 | Yellow: 40th percentile of squares 279 | Green: 20th percentile of squares 280 | Blue: 5th percentile of squares 281 | 282 | We use quadric interpolation which both gives nice smooth rounded corners but does not excessively blur key areas. Thee chart is generated by 283 | dividing your hyperparameter values into 10 buckets each, resulting in a 10x10 grid of squares. We compute a value for each square in two ways: 284 | 285 | One version of the graph computes the value for each square by taking the Min of all values in that grid square. This is usually the most useful 286 | chart. The other version computes the value by taking the Mean. This second version can be susceptible to outlier results, but can show interesting 287 | patterns sometimes. 288 | 289 | You also get versions of this graph done which only use the top 10% of your results, helping you to further focus in on the top performing area of 290 | your hyper parameter space. In addition, you get a version of this matrix done for the execution time of your model - in caes that is important. 291 | 292 | ## Two Parameter Response Matrix 293 | 294 | The response matrixes are very similar to the Loss matrices. In fact - it displays all of the same data. They are just color-coded differently 295 | to highlight different things. The Loss matrix defines its colors based on global statistics. The Response Matrix defines its colors 296 | based only one the values within each row. This often highlights important patterns - such as that the optimal value for one hyperparameter 297 | is always the same, regardless of the other hyper parameter (like this chart below, where optimal boosting_rounds appears to be around 40, no 298 | matter what the max_depth is.) 299 | 300 | ![Two Parameter Response Matrix](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/response_matrix.png "Response Matrix") 301 | 302 | The graph is color coded in the following way: 303 | 304 | Red: 90th percentile of row 305 | Yellow: 60th percentile of row 306 | Green: 30th percentile of row 307 | Blue: 10th percentile of row 308 | 309 | ## Two Parameter Scatter 310 | 311 | The two parameter scatters go along-side the two-parameter loss matrices. If you are concerned that the Loss Matrixes may be trying to extrapolate too 312 | much from very few data-points, you can check the scatter in order to check if you actually have a decent sample of results that fall within that area. 313 | 314 | ![Two Parameter Scatter](https://raw.githubusercontent.com/electricbrainio/hypermax/master/docs/two_parameter_scatter.png "Two Parameter Scatter") 315 | 316 | The color coding is the same as it is for the Loss Matrix, but percentiles are calculated over all results and not over the 10x10 grid of squares. 317 | 318 | Red: 90th percentile of values 319 | Yellow: 40th percentile of values 320 | Green: 20th percentile of values 321 | Blue: 5th percentile of values 322 | 323 | The size of the markers will also vary - larger and bluer is more accurate. Smaller and redder is less accurate. 324 | 325 | ## CSV Files 326 | 327 | For all of the above mentioned charts, you will automatically get CSV files containing all of the raw data used to generate that chart. 328 | 329 | # Detailed Configuration 330 | 331 | ## Hyper Parameter Space 332 | 333 | You define your hyper-parameter space within the "hyperparameters" section of the configuration file. The format is reminiscent of JSON-schema, however, only 334 | a limited set of options are supported. 335 | 336 | ### Number hyper-parameters 337 | 338 | Most of the hyper-parameters that you are going to be tuning are expected to be numbers. The configuration of the number hyper-parameter looks like so: 339 | 340 | ```json 341 | { 342 | "parameter_name": { 343 | "type": "number", 344 | "mode": "uniform", 345 | "scaling": "logarithmic", 346 | "min": 1, 347 | "max": 1000, 348 | "rounding": 1 349 | } 350 | } 351 | ``` 352 | 353 | There are 3 required parameters - type, min and max. Type should be set to 'number', and the min and max should represent the minimum and maximum values of 354 | your range. 355 | 356 | There are also three optional parameters. `mode` can be either `uniform` or `normal` (defaults to `uniform`). The `scaling` parameter can be either `linear` 357 | or `logarithmic` (default to `linear`). And you can additionally set `rounding` if you want values to be rounded to some fixed interval. A rounding set to 1 358 | will make your parameter an integer. 359 | 360 | 361 | ### Enumerations 362 | 363 | When you have several different possible values that are categorically distinct, you can use an enumeration to specify the possible options: 364 | 365 | ```json 366 | { 367 | "activation_function": { 368 | "type": "string", 369 | "enum": ["relu", "elu", "tanh", "sigmoid", "swish", "prelu", "selu"] 370 | } 371 | } 372 | ``` 373 | 374 | 375 | ### Object hyper-parameters 376 | 377 | Your hyper-parameter space can contain JSON objects which contain other hyper parameters. In fact, the bottom layer must be made as an object. Simply 378 | set the type to `object` and provide it a `properties` field. 379 | 380 | ```json 381 | { 382 | "parameter_object": { 383 | "type": "object", 384 | "properties": { 385 | "parameter_name": { 386 | "type": "number", 387 | "mode": "uniform", 388 | "scaling": "logarithmic", 389 | "min": 1, 390 | "max": 1000, 391 | "rounding": 1 392 | } 393 | } 394 | } 395 | } 396 | ``` 397 | 398 | ### Choices & Decision Points 399 | 400 | The true power of the TPE algorithm comes from its ability to optimize categorical hyper-parameters, including ones which make other hyper-parameters 401 | available. To do this, you can provide either a `oneOf` or `anyOf` field. Note that "oneOf" and "anyOf" behave exactly the same - we allow 402 | both in order to match JSON-Schema specifications. 403 | 404 | 405 | ```json 406 | { 407 | "choice_parameter": { 408 | "anyOf": [ 409 | { 410 | "type": "object", 411 | "properties": { 412 | "parameter_name": { 413 | "type": "number", 414 | "min": 1, 415 | "max": 1000 416 | } 417 | } 418 | }, 419 | { 420 | "type": "object", 421 | "properties": { 422 | "other_parameter_name": { 423 | "type": "number", 424 | "min": 1, 425 | "max": 1000 426 | } 427 | } 428 | } 429 | ] 430 | } 431 | } 432 | ``` 433 | 434 | Hypermax will add in an additional "parameter_name.$index" field into the parameters it sends to your algorithm, so that you 435 | can tell which side of the branch you are on. 436 | 437 | Important! When using oneOf or anyOf, each of the options MUST be "object" type hyperparameters, as shown above. 438 | 439 | 440 | ### Constants 441 | 442 | When using decision points, you may find it convenient to add in a constant value to tell you which side of the branch you are on. 443 | 444 | This is easy using a "constant" parameter. You can have the same parameter name on both sides of the branch. 445 | 446 | This allows you to, for example, test two different neural network optimizers, and the various learning rates 447 | attached to each, without having to worry that the algorithm is going to learn an "average" learning rate that 448 | works for both optimizers. Both sides of the branch will be kept separate during optimization, even though 449 | they share the same parameter names. 450 | 451 | Constant parameters are ignored for optimization purposes, but are still passed into your function, making them mostly 452 | useful when you want to lock in a parameter without changing your code, or when using decision points like so: 453 | 454 | ```json 455 | { 456 | "optimizer": { 457 | "oneOf": [ 458 | { 459 | "type": "object", 460 | "properties": { 461 | "optimizerName": { 462 | "type": "string", 463 | "constant": "adam" 464 | 465 | }, 466 | "learningRate": { 467 | "type": "number", 468 | "min": 1e-5, 469 | "max": 1e-3 470 | } 471 | } 472 | }, 473 | { 474 | "type": "object", 475 | "properties": { 476 | "optimizerName": { 477 | "type": "string", 478 | "constant": "sgd" 479 | 480 | }, 481 | "learningRate": { 482 | "type": "number", 483 | "min": 1e-5, 484 | "max": 1e-3 485 | } 486 | } 487 | } 488 | ] 489 | } 490 | } 491 | ``` 492 | 493 | 494 | ## Model Execution 495 | 496 | There are several different ways of executing your model. 497 | 498 | ### Python Functions 499 | 500 | The most straight forward way to execute your model is by defining a Python function. To do this, simply provide the 501 | name of the module and the name of the function in the "module" and "name" functions, like so: 502 | 503 | ```json 504 | { 505 | "function": { 506 | "type": "python_function", 507 | "module": "model", 508 | "name": "trainModel" 509 | } 510 | } 511 | ``` 512 | 513 | Remember that you do not include the extension of the name of your module, there is no ".py" on it. The module is 514 | referenced using Pythons standard system. This means that you can directly reference any files in the current working 515 | directory simply by their file-name. Alternatively, you can reference a system-package or a Python package that is 516 | setup elsewhere. As long as this works: 517 | 518 | ```bash 519 | $ python3 520 | 521 | Python 3.6.5 (default, Mar 29 2018, 18:20:46) 522 | [GCC 8.0.1 20180317 (Red Hat 8.0.1-0.19)] on linux 523 | Type "help", "copyright", "credits" or "license" for more information. 524 | >>> import module_name 525 | >>> module.foobar() 526 | ``` 527 | 528 | Then this will to: 529 | 530 | ```json 531 | { 532 | "function": { 533 | "type": "python_function", 534 | "module": "module_name", 535 | "name": "foobar" 536 | } 537 | } 538 | ``` 539 | 540 | ### Format of the result 541 | 542 | The results can be provided in one of two formats. The simplest is to just return the loss directly as a single floating point value 543 | from your cost function, or print it to standard output in your executable. For example: 544 | 545 | ```python 546 | def trainModel(parameters): 547 | # Do some fancy stuff 548 | loss = 1.0 549 | return loss 550 | ``` 551 | 552 | or as an executable: 553 | 554 | ```python 555 | #!/usr/bin/python3 556 | 557 | # Do some fany stuff 558 | loss = 1.0 559 | print(loss) 560 | ``` 561 | 562 | If you are using multiple losses though, you will have to return each of them as part of a JSON object. For example: 563 | 564 | ```python 565 | def trainModel(parameters): 566 | # Do some fancy stuff 567 | accuracy = 0.9 568 | stddev = 0.1 569 | return {"accuracy": accuracy, "stddev": stddev} 570 | ``` 571 | 572 | or as an executable: 573 | 574 | ```python 575 | #!/usr/bin/python3 576 | 577 | import json 578 | 579 | # Do some fancy stuff 580 | accuracy = 0.9 581 | stddev = 0.1 582 | print(json.dumps({"accuracy": accuracy, "stddev": stddev})) 583 | ``` 584 | 585 | If you want to store additional metadata with your model, you can. Any fields that are unrecognized for any other purpose will be automatically considered as metadata. 586 | 587 | ```python 588 | def trainModel(parameters): 589 | # Do some fancy stuff 590 | loss = 1.0 591 | additional_statistic = 42.0 592 | return {"loss": loss, "additional_statistic": additional_statistic} 593 | ``` 594 | 595 | The time your model takes is automatically measured by Hypermax (time can be used for punishing your model for taking too long, see Losses section). 596 | However, you may only care about the execution / run-time of your model, and not about the training time. In these cases, you can return `time` as 597 | an additional variable. 598 | 599 | ```python 600 | def trainModel(parameters): 601 | # Do some fancy stuff 602 | model = Model() 603 | model.train() 604 | start = datetime.now() 605 | loss = model.test() 606 | end = datetime.now() 607 | return {"loss": loss, "time": (end-start).total_seconds()} 608 | ``` 609 | 610 | It should be noted that this time is not the same time used for `auto_kill` purposes. This is the time that will be showed in the UI and considered for optimization 611 | purposes. 612 | 613 | ### Automatically killing models due to running time or RAM usage 614 | 615 | Sometimes, your models may be behaving very poorly in certain parts of your hyper-parameter space. It is thus possible, 616 | and indeed recommended, to set add limits on how long your model can be running for and how much RAM it can use. This 617 | prevents your optimization routine from getting hung due to a model that takes too long to train, or crashing entirely 618 | because it uses too much RAM. 619 | 620 | To do this, simply add in a `auto_kill_max_time`, `auto_kill_max_ram`, or `auto_kill_max_system_ram` option, and set a 621 | a `kill_loss` variable to indicate what the loss should be for models which are killed. 622 | 623 | auto_kill_max_time is specified in seconds. `auto_kill_max_ram` and `auto_kill_max_system_ram` are both specified in 624 | megabytes, the kind which are based by `1024` (not `1000`). 625 | 626 | `auto_kill_max_ram` only measures the RAM of the model process. However, if your cost-function has other various 627 | sub-processes which take up RAM, these will not be counted. Therefore, you can use `auto_kill_max_system_ram` in 628 | these cases to prevent total system RAM usage from creeping too high (the assumption being your model is what is 629 | taking up the systems RAM). You are able to provide both at the same time (if you want to). 630 | 631 | `auto_kill_loss` is just a floating point indicating the total loss that should be given to the optimizer when the model 632 | is killed. This helps teach the optimizer to avoid hyper-parameters which lead to models being killed. 633 | 634 | ```json 635 | { 636 | "function": { 637 | "type": "python_function", 638 | "module": "model", 639 | "name": "trainModel", 640 | "auto_kill_max_time": 120.0, 641 | "auto_kill_max_ram": 512, 642 | "auto_kill_max_system_ram": 3800, 643 | "auto_kill_loss": 1.0 644 | } 645 | } 646 | ``` 647 | 648 | ## Loss / Cost Functions (UNDER CONSTRUCTION) 649 | 650 | PLEASE NOTE THIS SECTION IS DESCRIBING FUTURE FUNCTIONALITY AND IS NOT YET SUPPORTED. PLEASE IGNORE THIS SECTION. 651 | 652 | We support several different types of cost functions. 653 | 654 | ## Timing Loss (UNDER CONSTRUCTION) 655 | 656 | You can include the time your model takes to train as one of your loss functions as well. This makes it convenient 657 | to teach the algorithm to avoid bad hyper parameters which lead to long training times. Many algorithms have poor 658 | combinations of parameters which can lead to long execution time with no improvement in performance. 659 | 660 | If the algorithm takes less then the `target_time`, then no penalty is incurred. As the time taken goes between 661 | `target_time` and `max_time`, the penalty is introduced quadratically. At `max_time`, the penalty is exactly 662 | `penalty_at_max`. 663 | 664 | This usually results in the algorithm choosing a value between `target_time` and `max_time`, but closer 665 | to `target_time`. For example, with the following: 666 | 667 | ```json 668 | { 669 | "metrics": { 670 | "time": { 671 | "type": "time", 672 | "target_time": 5, 673 | "max_time": 10, 674 | "penalty_at_max": 0.1 675 | } 676 | } 677 | } 678 | ``` 679 | 680 | If the algorithm takes 5.0 seconds, no penalty is introduced. At 6.0 seconds, the penalty is: 681 | 682 | = ((6 - 5) ^ 2 / (10 - 5)^2)*0.1 683 | = 0.0025 684 | 685 | At 9 seconds, the penalty is: 686 | 687 | = ((9 - 5) ^ 2 / (10 - 5) ^ 2)*0.1 688 | = 0.064 689 | 690 | At 10 seconds, the penalty is: 691 | 692 | = ((10 - 5) ^ 2 / (10 - 5) ^ 2)*0.1 693 | = 0.1 694 | 695 | Longer times will have even larger penalties. 696 | 697 | # Details on Adaptive-TPE 698 | 699 | See here: 700 | 701 | - [Optimizing Optimization](https://articulon.bradleyarsenault.me/article/optimizing-optimization) 702 | - [Learning to Optimize](https://articulon.bradleyarsenault.me/article/learning-to-optimize) 703 | 704 | # Todo & Wishlist 705 | 706 | Feel free to contribute! Reach out to Brad at brad@electricbrain.io or here on Github. We welcome additional contributors. 707 | 708 | This is the grand-todo list and was created on August 4, 2018. Some items may have been completed. 709 | 710 | - Results 711 | - Able to configure save-location. 712 | - Automatic uploading of results to Google Drive 713 | - Model Execution 714 | - Autokill models that take too much GPU RAM 715 | - Autokill models that take too much Disk / Network (not sure about this one) 716 | - Fix bug related to using too many file handlers. 717 | - Execute model as an executable 718 | - Execute model remotely, through ssh 719 | - Execute model by sending a message through message-buffer like RabbitMQ or Kafka (receive results same way) 720 | - Rsync a folder prior to remote execution 721 | - Can attach additional arbitrary metadata to your model results 722 | - Able to have "speed-tests" on models, where your hyper-parameters are tested on a reduced dataset in order to measure the speed. Useful to eliminate bad hyper-parameters without executing the full model. 723 | - Similarly, able to run "memory tests" on models, ensuring your hyper-parameters don't lead to excessive ram usage 724 | - Able to automatically run additional cross-folds on your best-models, to ensure they aren't statistical flukes 725 | - Hyper-parameters with rounding set to 1 should be automatically converted to integer 726 | - Configuration: 727 | - JSON-schema for the configuration files 728 | - validation of json-schemas 729 | - Ability to accept yaml as well as JSON 730 | - Able to have fixed values inside of the hyper-parameter schemas. 731 | - Able to have "unbounded" hyperparameters (only when using iterative optimization, since TPE doesn't inherently do this) 732 | - Ability to have hyper-parameters within arrays, such as a list of layers 733 | - Reliability: 734 | - Hypermax saves results / reloads from where it left off by default 735 | - Try to lock in package versions related to Hyperopt so people don't have problems on installation 736 | - Need a better way of handling exceptions that happen in UI code 737 | - Execution threads should only communicate through queues, eliminate shared variables (and put locks in the ones we can't eliminate) 738 | - Control random seed and ensure that runs are reproducible 739 | - General User Interface: 740 | - Change User-Interface code to use proper organized classes and not ad-hoc style like it is currently 741 | - View recently trained models 742 | - Able to view Training Loss VS. Testing Loss on UI 743 | - View models which had errors 744 | - Fix UI issues related to data-tables (such as in hyperparameter correlations) 745 | - Able to adjust min,max,smoothing, and domain of the loss chart 746 | - Predict model execution time based on hyper-parameters and prior data 747 | - Progress-bar on model training 748 | - Can change the file-name when exporting the hyper-parameter correlations 749 | - Can instantly open files which are exported, using xdg-open & equivalents 750 | - Widget which allows easily viewing/scrolling a large block of text. 751 | - View the raw hyper-parameter search configuration, as JSON 752 | - Exporting the hyper-parameters should save them in the exact format they are fed into the model, not in a flattened structure 753 | - View the hyper-parameter space while model is running 754 | - Can view arbitrary metadata that was attached to models 755 | - Make the UI responsive to different console sizes (like a grid system) 756 | - Fix the bug where the UI doesn't automatically resize when terminal resizes 757 | - Access the UI through a web-browser 758 | - Password-protection on web-browser UI 759 | - Able to monitor the RAM (and GPU RAM) usage of currently executing models 760 | - Able to monitor the disk usage of currently executing models 761 | - Able to monitor the network usage of currently executing models 762 | - Able to monitor general system stats, such as CPU, network, disk, and ram 763 | - Losses: 764 | - Able to have multiple weighted loss functions 765 | - Automatically pass loss-function through math-function based on min,max,target to add in asymptotes at target values 766 | - Write documentation related to how your cost-function is manipulated to improve results 767 | - Convenient way to add Time as an additional loss on your model 768 | - Time computed automatically, but can be overridden if provided in results 769 | - Can view all the losses for a given model in the UI, not just final loss 770 | - Convenient way to add in peak or median RAM (and GPU RAM) as an additional loss on your model 771 | - Convenient way to add in disk / network usage as a additional loss on your model 772 | - Tools for Hyperparameter Tuning: 773 | - Improve feature-vector design for hyper-parameter correlations 774 | - Edit / change the hyper-parameter space while the model is running 775 | - Estimate the cardinality of the search-space 776 | - Estimate number of runs per parameter (or something like this) 777 | - Ability to fit a hyper-model and do a simulated extension of your hyper-parameter search, trying to predict if there are better values that you haven't searched 778 | - Ability to use the hyper-model to hold certain hyper-parameters constant, and determine the optimal values for remaining hyper-parameters 779 | - Staged tuning - able to have multiple tuning "stages", which tune only certain hyper-parameters at a time or with different configurations 780 | - Can have a 'default' value for each of the hyperparameters, e.g. your current best model. 781 | - Incremental tuning - basically only tunes a handful of hyper-parameters at a time. Can be random or specified 782 | - Ability to change the hyper parameters for TPE 783 | - Research some automatic way to guess good TPE hyper-parameters 784 | - Integrate Bayesian hyper-parameter optimization as an alternative to TPE 785 | - Integrate grid-search as an alternative to TPE 786 | - Integrate genetic-algo as an alternative to TPE 787 | - Command-line interface 788 | - Execute model without CUI 789 | - Sample next hyper-parameters to test 790 | - Export all of the existing types of result-analysis (correlations, hotspot-grids, param vs loss, etc..) 791 | - Launch web browser UI (without CUI) 792 | - Write a template configuration file to the current directory 793 | - Library interface: 794 | - Able to activate hypermax by calling a library function 795 | - Able to provide our cost-function directly as a python function, rather then just as a JSON description 796 | - Testing 797 | - Write unit tests for hyper-parameter configuration 798 | - Write unit tests for model execution 799 | - Write unit tests for loss functions 800 | - Write unit tests for the optimizer 801 | - Write unit tests for the results generation 802 | - Write unit tests related to reliability (starting / stopping models) 803 | - Write unit tests for command line interface 804 | - Write unit tests for web-UI module (just ensure it loads) 805 | - Write UI tests for the CUI 806 | - Write end-to-end optimization tests for a few different real datasets / algos 807 | - Template hyperparameter spaces: 808 | - Create a template hyper-parameter search for lightgbm 809 | - Create a template hyper-parameter search for xgbboost 810 | - Create template hyper-parameter searches for various scikit-learn estimators 811 | - Ability to reference template hyper-parameter searches in your own JSON schemas ($ref) 812 | - Release / Launch: 813 | - Test hypermax in following environments, and fix any issues with installation configuration 814 | - Fresh virtual environment 815 | - Fresh fedora installation 816 | - Fresh ubuntu installation 817 | - macOS 818 | - Write all the documentation for the README file for Hypermax 819 | - Create Github wiki, duplicate README documentation in Github Wiki 820 | - Create Hypermax web-page as sub-domain of Electric Brain, duplicate information from README 821 | - Write a blog post discussing hypermax and hyper-optimization in general 822 | - Create optimization examples for: 823 | - Python 824 | - Java 825 | - NodeJS 826 | - Ruby 827 | - R 828 | - Create system packages for: 829 | - Fedora / Redhat / CentOS 830 | - Debian / Ubuntu 831 | - macOS 832 | - Windows 833 | -------------------------------------------------------------------------------- /docs/Learning to Optimize.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/Learning to Optimize.docx -------------------------------------------------------------------------------- /docs/Optimizing Optimization.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/Optimizing Optimization.docx -------------------------------------------------------------------------------- /docs/abh_cpu_allocation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/abh_cpu_allocation.png -------------------------------------------------------------------------------- /docs/loss_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/loss_matrix.png -------------------------------------------------------------------------------- /docs/main_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/main_screen.png -------------------------------------------------------------------------------- /docs/response_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/response_matrix.png -------------------------------------------------------------------------------- /docs/single_parameter_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/single_parameter_scatter.png -------------------------------------------------------------------------------- /docs/two_parameter_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/docs/two_parameter_scatter.png -------------------------------------------------------------------------------- /hypermax/__init__.py: -------------------------------------------------------------------------------- 1 | name = 'hypermax' 2 | 3 | -------------------------------------------------------------------------------- /hypermax/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genixpro/hypermax/11b39caf2f7048d673898ff6dbe8050ff87dc3de/hypermax/algorithms/__init__.py -------------------------------------------------------------------------------- /hypermax/algorithms/adaptive_bayesian_hyperband_optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from math import log, ceil 4 | from time import time, ctime 5 | from .optimization_algorithm_base import OptimizationAlgorithmBase 6 | from .random_search_optimizer import RandomSearchOptimizer 7 | from pprint import pprint 8 | from ..hyperparameter import Hyperparameter 9 | import hyperopt 10 | import json 11 | import functools 12 | import copy 13 | 14 | class AdaptiveBayesianHyperband(OptimizationAlgorithmBase): 15 | """ This algorithm combines our ATPE optimizer with Hyperband""" 16 | 17 | def __init__(self, baseOptimizer, min_budget, max_budget, eta = 3): 18 | self.baseOptimizer = baseOptimizer 19 | self.randomOptimizer = RandomSearchOptimizer() 20 | 21 | self.min_budget = min_budget 22 | self.max_budget = max_budget # maximum iterations per configuration 23 | 24 | self.eta = eta # defines configuration downsampling rate (default = 3) 25 | 26 | self.logeta = lambda x: log(x) / log(self.eta) 27 | self.s_max = int(self.logeta(self.max_budget)) 28 | self.B = (self.s_max + 1) * self.max_budget 29 | 30 | self.results = [] # list of dicts 31 | 32 | @classmethod 33 | def configurationSchema(self): 34 | return {} 35 | 36 | 37 | def createBudgetSchedule(self): 38 | runs = [] 39 | 40 | for s in reversed( range( self.s_max + 1 )): 41 | 42 | # initial number of configurations 43 | n = self.B / self.max_budget / (s + 1) * self.eta ** s 44 | 45 | # initial amount of budget per config 46 | r = self.max_budget * self.eta ** (-s) 47 | 48 | runs_in_sequence = 0 49 | 50 | if round(r) >= self.min_budget: 51 | for i in range(( s + 1 )): 52 | 53 | # Run each of the n configs for 54 | # and keep best (n_configs / eta) configurations 55 | 56 | n_configs = n * self.eta ** ( -i ) 57 | n_budget = r * self.eta ** ( i ) 58 | 59 | runs.append({ 60 | "group": s, 61 | "round": runs_in_sequence, 62 | "configs_start": int(round(n_configs)), 63 | "configs_finish": int(round(n_configs / self.eta)), 64 | "input_configs": int(round(n_configs * self.eta)), 65 | "input_round": runs_in_sequence - 1, 66 | "input_budget": -1 if i == 0 else int(ceil(r * self.eta ** ( i - 1 ))), 67 | "budget": int(round(n_budget)) 68 | }) 69 | 70 | runs_in_sequence += 1 71 | 72 | return runs 73 | # return self.results 74 | 75 | def createCanonicalStringFromResult(self, result, hyperparameterSpace): 76 | params = Hyperparameter(hyperparameterSpace).convertToStructuredValues(result) 77 | 78 | keys = list(params.keys()) 79 | for key in keys: 80 | if key in OptimizationAlgorithmBase.resultInformationKeys or key.startswith('$'): 81 | del params[key] 82 | 83 | return json.dumps(params, sort_keys=True) 84 | 85 | def createCanonicalStringFromParameters(self, params, hyperparameterSpace): 86 | newResult = Hyperparameter(hyperparameterSpace).convertToFlatValues(params) 87 | return self.createCanonicalStringFromResult(newResult, hyperparameterSpace) 88 | 89 | def recommendNextParameters(self, hyperparameterSpace, results, currentTrials, lockedValues=None): 90 | runs = self.createBudgetSchedule() 91 | 92 | space = Hyperparameter(hyperparameterSpace) 93 | 94 | finishedAndRunningResults = [result for result in results if result['loss'] is not None] + [space.convertToFlatValues(trial['params']) for trial in currentTrials] 95 | 96 | runsNeeded = [] 97 | loopResults = [] 98 | loop = None 99 | 100 | # Find which is the largest $loop we find in the results 101 | if len(finishedAndRunningResults) == 0: 102 | loop = 0 103 | runsNeeded = sorted(runs, key=lambda run: run['budget']) 104 | else: 105 | maxLoop = max([result['$loop'] for result in finishedAndRunningResults]) 106 | 107 | for loopToTest in range(maxLoop+1): 108 | loopResults = [result for result in finishedAndRunningResults if result['$loop'] == loopToTest] 109 | 110 | # Define which secondary halving runs have enough data to operate 111 | runsNeeded = [] 112 | for run in runs: 113 | if run['input_round'] != -1: 114 | inputResultsForRun = [result for result in loopResults if (result['$group'] == run['group'] and result['$round'] == run['input_round'] and ('loss' in result))] 115 | 116 | if len(inputResultsForRun) < run['input_configs']: 117 | continue 118 | 119 | resultsForRun = [result for result in loopResults if (result['$group'] == run['group'] and result['$round'] == run['round'])] 120 | 121 | if len(resultsForRun) < run['configs_start']: 122 | runsNeeded.append(run) 123 | 124 | runsNeeded = sorted(runsNeeded, key=lambda run: (-run['group'], -run['budget'])) 125 | if len(runsNeeded) > 0: 126 | loop = loopToTest 127 | break 128 | if loop is None: 129 | loop = maxLoop 130 | 131 | if len(runsNeeded) == 0: 132 | runsNeeded = sorted(runs, key=lambda run: run['budget']) 133 | loop += 1 134 | 135 | run = runsNeeded[0] 136 | 137 | if run['input_round'] == -1: 138 | resultsForReccomendation = [result for result in results if result['$budget'] == run['budget']] 139 | 140 | if random.uniform(0, 1) < 0.3: 141 | params = self.randomOptimizer.recommendNextParameters(hyperparameterSpace, resultsForReccomendation, currentTrials) 142 | else: 143 | params = self.baseOptimizer.recommendNextParameters(hyperparameterSpace, resultsForReccomendation, currentTrials) 144 | 145 | params['$budget'] = run['budget'] 146 | params['$loop'] = loop 147 | params['$group'] = run['group'] 148 | params['$round'] = run['round'] 149 | return params 150 | else: 151 | inputResultsForRun = [result for result in loopResults if (result['$group'] == run['group'] and result['$round'] == run['input_round'])] 152 | inputResultsForRun = sorted(inputResultsForRun, key=lambda result: result['loss'])[0:run['configs_start']] 153 | 154 | existingResultsForRun = [result for result in loopResults if (result['$group'] == run['group'] and result['$round'] == run['round'])] 155 | 156 | inputCanonicalStrings = [self.createCanonicalStringFromResult(result, hyperparameterSpace) for result in inputResultsForRun] 157 | existingCanonicalStrings = [self.createCanonicalStringFromResult(result, hyperparameterSpace) for result in existingResultsForRun] 158 | 159 | neededCanonicalStrings = set(inputCanonicalStrings).difference(existingCanonicalStrings) 160 | neededResults = [inputResultsForRun[inputCanonicalStrings.index(resultString)] for resultString in neededCanonicalStrings] 161 | 162 | chosenResult = random.choice(neededResults) 163 | params = space.convertToStructuredValues(chosenResult) 164 | params['$budget'] = run['budget'] 165 | params['$loop'] = loop 166 | params['$group'] = run['group'] 167 | params['$round'] = run['round'] 168 | 169 | return params 170 | -------------------------------------------------------------------------------- /hypermax/algorithms/human_guided_optimizer_wrapper.py: -------------------------------------------------------------------------------- 1 | from .optimization_algorithm_base import OptimizationAlgorithmBase 2 | import hyperopt 3 | import functools 4 | import random 5 | import numpy 6 | import math 7 | import numpy.random 8 | from pprint import pprint 9 | from hypermax.hyperparameter import Hyperparameter 10 | 11 | class HumanGuidedOptimizerWrapper(OptimizationAlgorithmBase): 12 | """ This class wraps an optimizer to allow a human to provide additional guidance to it.""" 13 | 14 | def __init__(self, baseOptimizer): 15 | super() 16 | self.baseOptimizer = baseOptimizer 17 | self.guidanceOptions = { 18 | 'filteringMode': 'none', 19 | 'filteringPercentile': 0, 20 | 'lockedParameters': [], 21 | 'refitParameters': [], 22 | 'scrambleParameters': [] 23 | } 24 | 25 | @classmethod 26 | def configurationSchema(self): 27 | """ This method returns the configuration schema for the human guidance options.""" 28 | return { 29 | "type": "object", 30 | "properties": { 31 | "filteringMode": { 32 | "type": "string", 33 | "enum": ['none', 'age', 'lossrank'] 34 | }, 35 | "filteringPercentile": { 36 | "type": "number", 37 | "min": 0, 38 | "max": 100 39 | }, 40 | "lockedParameters": { 41 | "type": "array", 42 | "items": { 43 | "type": "object", 44 | "parameters": { 45 | "variable": { 46 | "type": "string" 47 | }, 48 | "value": { 49 | } 50 | } 51 | } 52 | }, 53 | "refitParameters": { 54 | "type": "array", 55 | "items": { 56 | "type": "object", 57 | "parameters": { 58 | "variable": { 59 | "type": "string" 60 | }, 61 | "refitStartTrial": { 62 | "type": "number" 63 | } 64 | } 65 | } 66 | }, 67 | "scrambleParameters": { 68 | "type": "array", 69 | "items": { 70 | "type": "object", 71 | "parameters": { 72 | "variable": { 73 | "type": "string" 74 | } 75 | } 76 | } 77 | }, 78 | } 79 | } 80 | 81 | 82 | def filterHyperparameterSpace(self, hyperparameterSpace, filterEntries, root=""): 83 | if 'anyOf' in hyperparameterSpace or 'oneOf' in hyperparameterSpace: 84 | if 'anyOf' in hyperparameterSpace: 85 | data = hyperparameterSpace['anyOf'] 86 | else: 87 | data = hyperparameterSpace['oneOf'] 88 | 89 | newParams = [] 90 | for index, param in enumerate(data): 91 | newParam = self.filterHyperparameterSpace(param, filterEntries, (root + "." if root else "") + str(index)) 92 | newParams.append(newParam) 93 | if 'anyOf' in hyperparameterSpace: 94 | return {"anyOf": newParams} 95 | else: 96 | return {"oneOf": newParams} 97 | elif 'enum' in hyperparameterSpace: 98 | return hyperparameterSpace 99 | elif hyperparameterSpace['type'] == 'object': 100 | newProperties = {} 101 | for key in hyperparameterSpace['properties'].keys(): 102 | name = root + "." + key 103 | if name not in filterEntries: 104 | config = hyperparameterSpace['properties'][key] 105 | newProperty = self.filterHyperparameterSpace(config, filterEntries, (root + "." if root else "") + key) 106 | newProperties[key] = newProperty 107 | return {"type": "object", "properties": newProperties} 108 | else: 109 | return hyperparameterSpace 110 | 111 | def filterResults(self, results, filterEntries): 112 | newResults = [] 113 | for result in results: 114 | filteredResult = {} 115 | for key in result: 116 | if (key not in filterEntries) or (key in self.resultInformationKeys): 117 | filteredResult[key] = result[key] 118 | newResults.append(filteredResult) 119 | return newResults 120 | 121 | def createFlatParameterValues(self, parameters, hyperparameterSpace, root=""): 122 | if 'anyOf' in hyperparameterSpace or 'oneOf' in hyperparameterSpace: 123 | return {root: hyperparameterSpace} 124 | elif 'enum' in hyperparameterSpace: 125 | return {root: hyperparameterSpace} 126 | elif hyperparameterSpace['type'] == 'object': 127 | flatParams = {} 128 | for key in hyperparameterSpace['properties'].keys(): 129 | # print("key", key) 130 | config = hyperparameterSpace['properties'][key] 131 | subFlatParams = self.createFlatParameterValues(parameters[key], config, root + "." + key) 132 | # print("subFlatParams", subFlatParams) 133 | for newKey in subFlatParams: 134 | flatParams[newKey] = subFlatParams[newKey] 135 | 136 | return flatParams 137 | else: 138 | return {root: parameters} 139 | 140 | def recommendNextParameters(self, hyperparameterSpace, results, currentTrials, lockedValues=None): 141 | if lockedValues is None: 142 | lockedValues = {} 143 | 144 | for lockedParam in self.guidanceOptions['lockedParameters']: 145 | lockedValues[lockedParam['variable']] = lockedParam['value'] 146 | 147 | for scrambledParam in self.guidanceOptions['scrambleParameters']: 148 | parameters = Hyperparameter(hyperparameterSpace).getFlatParameters() 149 | 150 | parameter = [param for param in parameters if param.name == scrambledParam['variable']][0] 151 | 152 | minVal = parameter.config['min'] 153 | maxVal = parameter.config['max'] 154 | 155 | if parameter.config.get('scaling', 'linear') == 'logarithmic': 156 | minVal = math.log(minVal) 157 | maxVal = math.log(maxVal) 158 | 159 | value = random.uniform(minVal, maxVal) 160 | 161 | if parameter.config.get('scaling', 'linear') == 'logarithmic': 162 | value = math.exp(value) 163 | 164 | if 'rounding' in parameter.config: 165 | value = round(value / parameter.config['rounding']) * parameter.config['rounding'] 166 | 167 | lockedValues[parameter.name] = value 168 | 169 | refitParameters = sorted(self.guidanceOptions['refitParameters'], key=lambda p: p['refitStartTrial']) 170 | 171 | refitNames = [refitParam['variable'] for refitParam in refitParameters] 172 | 173 | primarySpace = self.filterHyperparameterSpace(hyperparameterSpace, refitNames) 174 | 175 | # Filter results to remove the non primary variables 176 | primaryResults = self.filterResults(results, refitNames) 177 | recommendedParams = self.baseOptimizer.recommendNextParameters(primarySpace, primaryResults, currentTrials, lockedValues) 178 | 179 | # print(recommendedParams) 180 | 181 | for index, refitParam in enumerate(refitParameters): 182 | startTrial = refitParam['refitStartTrial'] 183 | 184 | # For each refit parameter, we predict after locking in the all previous parameters 185 | remainingRefits = refitParameters[index+1:] 186 | # pprint("filteredSpace", filteredSpace) 187 | 188 | newLockedValues = self.createFlatParameterValues(recommendedParams, self.filterHyperparameterSpace(hyperparameterSpace, refitNames[index:])) 189 | 190 | # pprint("newLockedValues", newLockedValues) 191 | filteredSpace = self.filterHyperparameterSpace(hyperparameterSpace, remainingRefits) 192 | filteredResults = self.filterResults(results[startTrial+1:], remainingRefits) 193 | 194 | refitReccomendation = self.baseOptimizer.recommendNextParameters(filteredSpace, filteredResults, currentTrials, newLockedValues) 195 | 196 | recommendedParams = refitReccomendation 197 | 198 | return recommendedParams 199 | 200 | -------------------------------------------------------------------------------- /hypermax/algorithms/optimization_algorithm_base.py: -------------------------------------------------------------------------------- 1 | import hyperopt 2 | import datetime 3 | from hypermax.hyperparameter import Hyperparameter 4 | import json 5 | import copy 6 | from pprint import pprint 7 | 8 | class OptimizationAlgorithmBase: 9 | """ This is the base class for all optimization algorithms. These are the core algorithms which produce 10 | recommendations on what to try next.""" 11 | 12 | resultInformationKeys = [ 13 | 'trial', 14 | 'status', 15 | 'loss', 16 | 'time', 17 | 'log', 18 | 'error' 19 | ] 20 | 21 | def recommendNextParameters(self, hyperparameterSpace, results, currentTrials, lockedValues=None): 22 | pass 23 | 24 | 25 | 26 | def convertResultsToTrials(self, hyperparameterSpace, results): 27 | trials = hyperopt.Trials() 28 | 29 | for resultIndex, result in enumerate(results): 30 | data = { 31 | 'book_time': datetime.datetime.now(), 32 | 'exp_key': None, 33 | 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'), 34 | 'idxs': {}, 35 | 'tid': resultIndex, 36 | 'vals': {}, 37 | 'workdir': None}, 38 | 'owner': None, 39 | 'refresh_time': datetime.datetime.now(), 40 | 'result': {'loss': result['loss'], 'status': result['status']}, 41 | 'spec': None, 42 | 'state': 2, 43 | 'tid': resultIndex, 44 | 'version': 0 45 | } 46 | 47 | for param in Hyperparameter(hyperparameterSpace).getFlatParameters(): 48 | value = result[param.name] 49 | if value is not "" and value is not None: 50 | if 'enum' in param.config: 51 | value = param.config['enum'].index(value) 52 | 53 | data['misc']['idxs'][param.hyperoptVariableName] = [resultIndex] 54 | data['misc']['vals'][param.hyperoptVariableName] = [value] 55 | else: 56 | data['misc']['idxs'][param.hyperoptVariableName] = [] 57 | data['misc']['vals'][param.hyperoptVariableName] = [] 58 | 59 | trials.insert_trial_doc(data) 60 | return trials 61 | 62 | def convertTrialsToResults(self, hyperparameterSpace, trials): 63 | results = [] 64 | for trialIndex, trial in enumerate(trials.trials): 65 | data = { 66 | "trial": trialIndex, 67 | "status": trial['result']['status'], 68 | "loss": trial['result']['loss'], 69 | "log": "", 70 | "time": abs((trial['book_time'] - trial['refresh_time']).total_seconds()) 71 | } 72 | 73 | params = trial['misc']['vals'] 74 | for param in Hyperparameter(hyperparameterSpace).getFlatParameters(): 75 | key = param.hyperoptVariableName 76 | 77 | if len(params[key]) == 1: 78 | value = params[key][0] 79 | if 'enum' in param.config: 80 | value = param.config['enum'][value] 81 | 82 | data[param.name] = value 83 | else: 84 | data[param.name] = '' 85 | 86 | 87 | results.append(data) 88 | return results 89 | -------------------------------------------------------------------------------- /hypermax/algorithms/random_search_optimizer.py: -------------------------------------------------------------------------------- 1 | from .optimization_algorithm_base import OptimizationAlgorithmBase 2 | import hyperopt 3 | import functools 4 | import random 5 | import numpy 6 | import numpy.random 7 | from hypermax.hyperparameter import Hyperparameter 8 | 9 | class RandomSearchOptimizer(OptimizationAlgorithmBase): 10 | def recommendNextParameters(self, hyperparameterSpace, results, currentTrials, lockedValues=None): 11 | if lockedValues is None: 12 | lockedValues = {} 13 | 14 | rstate = numpy.random.RandomState(seed=int(random.randint(1, 2 ** 32 - 1))) 15 | 16 | trials = self.convertResultsToTrials(hyperparameterSpace, results) 17 | 18 | space = Hyperparameter(hyperparameterSpace).createHyperoptSpace(lockedValues) 19 | 20 | params = {} 21 | def sample(parameters): 22 | nonlocal params 23 | params = parameters 24 | return {"loss": 0.5, 'status': 'ok'} 25 | 26 | hyperopt.fmin(fn=sample, 27 | space=space, 28 | algo=functools.partial(hyperopt.rand.suggest), 29 | max_evals=1, 30 | trials=trials, 31 | rstate=rstate, 32 | show_progressbar=False) 33 | return params 34 | 35 | -------------------------------------------------------------------------------- /hypermax/algorithms/tpe_optimizer.py: -------------------------------------------------------------------------------- 1 | from .optimization_algorithm_base import OptimizationAlgorithmBase 2 | import hyperopt 3 | import functools 4 | import random 5 | import numpy 6 | import numpy.random 7 | from hypermax.hyperparameter import Hyperparameter 8 | 9 | class TPEOptimizer(OptimizationAlgorithmBase): 10 | 11 | def recommendNextParameters(self, hyperparameterSpace, results, currentTrials, lockedValues=None): 12 | if lockedValues is None: 13 | lockedValues = {} 14 | 15 | rstate = numpy.random.RandomState(seed=int(random.randint(1, 2 ** 32 - 1))) 16 | 17 | trials = self.convertResultsToTrials(hyperparameterSpace, results) 18 | 19 | space = Hyperparameter(hyperparameterSpace).createHyperoptSpace(lockedValues) 20 | 21 | params = {} 22 | def sample(parameters): 23 | nonlocal params 24 | params = parameters 25 | return {"loss": 0.5, 'status': 'ok'} 26 | 27 | hyperopt.fmin(fn=sample, 28 | space=space, 29 | algo=functools.partial(hyperopt.tpe.suggest, n_EI_candidates=24, gamma=0.25), 30 | max_evals=1, 31 | trials=trials, 32 | rstate=rstate, 33 | show_progressbar=False) 34 | return params 35 | 36 | -------------------------------------------------------------------------------- /hypermax/atpe_models/model-gamma-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.40833300088997165, "origMean": 1.5122888341339762, "predStddev": 0.19062742851389658, "predMean": 1.512289493861218} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-nEICandidates-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 10.2721906051763, "origMean": 15.681163916118624, "predStddev": 1.896402761373421, "predMean": 15.681152536464452} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-resultFilteringAgeMultiplier-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 1.0287598782669287, "origMean": 2.5207235334555453, "predStddev": 0.12267867585236811, "predMean": 2.522910592328283} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-resultFilteringLossRankMultiplier-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 1.0498869166676732, "origMean": 2.694885551751099, "predStddev": 0.17790475888854926, "predMean": 2.740541794835421} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-resultFilteringMode-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddevs": {"age": 1.0169710833621914, "loss_rank": 0.8153492973283286, "none": 1.023112739996102, "random": 0.8958356241521069}, "origMeans": {"age": 0.30943222306915075, "loss_rank": 0.16437257369755198, "none": 0.31634995479765304, "random": 0.20984524843564425}, "predStddevs": {"age": 0.011422987080232332, "loss_rank": 0.01738393407162186, "none": 0.015779382903844556, "random": 0.007682919343794938}, "predMeans": {"age": 0.27106052994162017, "loss_rank": 0.21985560902998877, "none": 0.2736715532061568, "random": 0.2354123078222343}} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-resultFilteringRandomProbability-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.07735245101820602, "origMean": 0.8080440108126374, "predStddev": 0.006603064486510652, "predMean": 0.8085470436044462} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryCorrelationExponent-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.6788704432925833, "origMean": 1.9577070001595378, "predStddev": 0.07578445125269115, "predMean": 1.9577070166261366} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryCorrelationMultiplier-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.48376227975872604, "origMean": 1.0809729221833093, "predStddev": 0.08944618720745286, "predMean": 1.0823238370241313} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryCutoff-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.5641081216461193, "origMean": 0.04246760498466665, "predStddev": 0.12379219258476692, "predMean": 0.04246755473153523} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryFixedProbability-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 0.1801639083218247, "origMean": 0.535642367950544, "predStddev": 0.043833249803254634, "predMean": 0.5310744026400491} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryLockingMode-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddevs": {"random": 0.6710207329386981, "top": 0.6710207329386981}, "origMeans": {"random": 0.27666052151099924, "top": 0.7233394784890007}, "predStddevs": {"random": 0.004052769639310346, "top": 0.004052769639310366}, "predMeans": {"random": 0.4888355959206883, "top": 0.5111644040793116}} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryLockingMode.txt: -------------------------------------------------------------------------------- 1 | tree 2 | version=v2 3 | num_class=2 4 | num_tree_per_iteration=2 5 | label_index=0 6 | max_feature_idx=118 7 | objective=multiclass num_class:2 8 | feature_names=all_correlation_best_percentile25_ratio all_correlation_best_percentile50_ratio all_correlation_best_percentile75_ratio all_correlation_kurtosis all_correlation_percentile5_percentile25_ratio all_correlation_skew all_correlation_stddev_best_ratio all_correlation_stddev_median_ratio all_loss_best_percentile25_ratio all_loss_best_percentile50_ratio all_loss_best_percentile75_ratio all_loss_kurtosis all_loss_percentile5_percentile25_ratio all_loss_skew all_loss_stddev_best_ratio all_loss_stddev_median_ratio log10_cardinality recent_10_correlation_best_percentile25_ratio recent_10_correlation_best_percentile50_ratio recent_10_correlation_best_percentile75_ratio recent_10_correlation_kurtosis recent_10_correlation_percentile5_percentile25_ratio recent_10_correlation_skew recent_10_correlation_stddev_best_ratio recent_10_correlation_stddev_median_ratio recent_10_loss_best_percentile25_ratio recent_10_loss_best_percentile50_ratio recent_10_loss_best_percentile75_ratio recent_10_loss_kurtosis recent_10_loss_percentile5_percentile25_ratio recent_10_loss_skew recent_10_loss_stddev_best_ratio recent_10_loss_stddev_median_ratio recent_15%_correlation_best_percentile25_ratio recent_15%_correlation_best_percentile50_ratio recent_15%_correlation_best_percentile75_ratio recent_15%_correlation_kurtosis recent_15%_correlation_percentile5_percentile25_ratio recent_15%_correlation_skew recent_15%_correlation_stddev_best_ratio recent_15%_correlation_stddev_median_ratio recent_15%_loss_best_percentile25_ratio recent_15%_loss_best_percentile50_ratio recent_15%_loss_best_percentile75_ratio recent_15%_loss_kurtosis recent_15%_loss_percentile5_percentile25_ratio recent_15%_loss_skew recent_15%_loss_stddev_best_ratio recent_15%_loss_stddev_median_ratio recent_25_correlation_best_percentile25_ratio recent_25_correlation_best_percentile50_ratio recent_25_correlation_best_percentile75_ratio recent_25_correlation_kurtosis recent_25_correlation_percentile5_percentile25_ratio recent_25_correlation_skew recent_25_correlation_stddev_best_ratio recent_25_correlation_stddev_median_ratio recent_25_loss_best_percentile25_ratio recent_25_loss_best_percentile50_ratio recent_25_loss_best_percentile75_ratio recent_25_loss_kurtosis recent_25_loss_percentile5_percentile25_ratio recent_25_loss_skew recent_25_loss_stddev_best_ratio recent_25_loss_stddev_median_ratio top_10%_correlation_best_percentile25_ratio top_10%_correlation_best_percentile50_ratio top_10%_correlation_best_percentile75_ratio top_10%_correlation_kurtosis top_10%_correlation_percentile5_percentile25_ratio top_10%_correlation_skew top_10%_correlation_stddev_best_ratio top_10%_correlation_stddev_median_ratio top_10%_loss_best_percentile25_ratio top_10%_loss_best_percentile50_ratio top_10%_loss_best_percentile75_ratio top_10%_loss_kurtosis top_10%_loss_percentile5_percentile25_ratio top_10%_loss_skew top_10%_loss_stddev_best_ratio top_10%_loss_stddev_median_ratio top_20%_correlation_best_percentile25_ratio top_20%_correlation_best_percentile50_ratio top_20%_correlation_best_percentile75_ratio top_20%_correlation_kurtosis top_20%_correlation_percentile5_percentile25_ratio top_20%_correlation_skew top_20%_correlation_stddev_best_ratio top_20%_correlation_stddev_median_ratio top_20%_loss_best_percentile25_ratio top_20%_loss_best_percentile50_ratio top_20%_loss_best_percentile75_ratio top_20%_loss_kurtosis top_20%_loss_percentile5_percentile25_ratio top_20%_loss_skew top_20%_loss_stddev_best_ratio top_20%_loss_stddev_median_ratio top_30%_correlation_best_percentile25_ratio top_30%_correlation_best_percentile50_ratio top_30%_correlation_best_percentile75_ratio top_30%_correlation_kurtosis top_30%_correlation_percentile5_percentile25_ratio top_30%_correlation_skew top_30%_correlation_stddev_best_ratio top_30%_correlation_stddev_median_ratio top_30%_loss_best_percentile25_ratio top_30%_loss_best_percentile50_ratio top_30%_loss_best_percentile75_ratio top_30%_loss_kurtosis top_30%_loss_percentile5_percentile25_ratio top_30%_loss_skew top_30%_loss_stddev_best_ratio top_30%_loss_stddev_median_ratio resultFilteringMode_age resultFilteringMode_loss_rank resultFilteringMode_none resultFilteringMode_random secondaryProbabilityMode_correlation secondaryProbabilityMode_fixed 9 | feature_infos=[-9.0793999999999997:106.40000000000001] [-6.1543999999999999:907.00999999999999] [-4.2420999999999998:4891.5] [-4.0632000000000001:42.643000000000001] [-11.725:138.13999999999999] [-12.686:13.845000000000001] [-15.976000000000001:11.5] [-5.9271000000000003:1428.2] [-92019:46103] [-76234:115450] [-15884:122060] [-2.3426:70.739999999999995] [-81702:47027] [-8.2988:19.844000000000001] [-537830:1444500] [-78181:1104700] [-4.9066000000000001:5.7077999999999998] [-4.2309000000000001:1690.3] [-2.4914999999999998:363.81999999999999] [-2.2536:422.11000000000001] [-2.5891000000000002:34.893999999999998] [-5.0574000000000003:1705.5] [-9.4838000000000005:11.569000000000001] [-15.170999999999999:8.0465] [-2.4289999999999998:353.56] [-15750:17733] [-24245:129280] [-29342:94593] [-1.7628999999999999:2.0127000000000002] [-12687:18631] [-6.274:1.8354999999999999] [-378090:1398600] [-339220:1724900] [-4.3403:8764.7999999999993] [-2.6692:4616.3000000000002] [-2.3054999999999999:8748.2999999999993] [-2.5735000000000001:33.960000000000001] [-5.2473000000000001:9928.8999999999996] [-11.114000000000001:11.558] [-14.411:7.4763999999999999] [-2.6036000000000001:4298.5] [-25805:15193] [-71151:398300] [-232340:102990] [-1.6065:49.856000000000002] [-23318:13028] [-7.5063000000000004:12.785] [-779600:1158500] [-3854900:1321600] [-4.9035000000000002:9369.6000000000004] [-3.0550999999999999:3670.5] [-2.6476999999999999:4790.8000000000002] [-2.8029999999999999:42.148000000000003] [-6.0103999999999997:9025.7999999999993] [-10.73:12.619999999999999] [-13.798999999999999:7.9203999999999999] [-2.8595999999999999:3237.1999999999998] [-110400:32742] [-54569:50533] [-25601:21684] [-1.9413:3.7551999999999999] [-121220:7508.6000000000004] [-9.4586000000000006:3.0712000000000002] [-777050:1005600] [-4907700:2416500] [-3.5528:6421.6000000000004] [-2.3841000000000001:3859.3000000000002] [-1.8844000000000001:2265.0999999999999] [-2.2686999999999999:28.308] [-4.1837:6406.6000000000004] [-13.297000000000001:10.804] [-13.407999999999999:6.319] [-2.3730000000000002:3342.0999999999999] [-22397:39663] [-37882:31527] [-54135:285190] [-3.0364:128.66999999999999] [-21922:40673] [-24.335000000000001:19.193000000000001] [-555170:366600] [-27388:44588] [-3.9630000000000001:22230] [-2.3159000000000001:13472] [-1.9112:3249.8000000000002] [-2.4296000000000002:30.291] [-4.7637999999999998:13939] [-14.045:11.15] [-14.471:7.2553999999999998] [-2.3260000000000001:9986.7999999999993] [-80788:9101.8999999999996] [-96031:28600] [-78856:27997] [-3.8111000000000002:275.38999999999999] [-38099:9126.2999999999993] [-36.381999999999998:30.282] [-440240:271940] [-31583:88057] [-4.4935999999999998:76437] [-2.3953000000000002:17238] [-2.0042:18072] [-2.6255999999999999:30.370000000000001] [-5.4583000000000004:76103] [-11.244999999999999:10.606999999999999] [-14.869999999999999:7.6917] [-2.3576999999999999:17510] [-63240:23966] [-27011:25286] [-308980:655530] [-4.2102000000000004:378.39999999999998] [-85724:27764] [-41.186:32.061] [-557080:1121200] [-23140:353280] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] 10 | tree_sizes=2651 2600 11 | 12 | Tree=0 13 | num_leaves=31 14 | num_cat=0 15 | split_feature=117 69 75 44 92 76 76 91 7 44 44 10 71 43 44 80 91 29 66 35 35 40 5 10 63 13 13 13 41 43 16 | split_gain=889.122 737.344 232.12 212.89 143.227 116.457 70.3686 63.8356 58.3009 58.1901 39.3703 38.9642 33.9838 29.2346 27.9575 23.6714 21.5796 19.5014 18.7167 18.0664 17.8571 17.5634 17.4667 17.1134 22.4171 21.3843 15.4059 13.7394 13.4744 13.5818 17 | threshold=1.0000000180025095e-35 -1.8729499999999997 0.15360500000000002 -0.72895499999999991 -3.8109999999999995 -1.5781499999999997 -3.0362999999999993 0.47705000000000003 -0.3998549999999999 -0.36117999999999995 -0.36117999999999995 -0.86797999999999986 1.7775500000000004 -0.59635999999999989 0.038387500000000005 -1.03535 1.0080500000000001 3.5127500000000005 -1.5126999999999997 -1.4100499999999998 -1.6427499999999997 -1.5144499999999999 2.5897500000000004 1.3038500000000004 -1.2400499999999999 -0.10846999999999998 0.75078500000000015 -0.30890999999999996 4.3303000000000011 1.9100500000000002 18 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 19 | left_child=1 3 9 7 -2 21 -6 15 23 12 16 14 18 -5 -10 -1 -7 -9 -3 27 26 -4 -13 24 -8 -26 -12 -11 29 -18 20 | right_child=4 2 5 13 6 10 8 17 11 19 20 22 -14 -15 -16 -17 28 -19 -20 -21 -22 -23 -24 -25 25 -27 -28 -29 -30 -31 21 | leaf_value=-0.012148962148962149 -0.007113064203783704 -0.028217244195543112 -0.013056478405315614 -0.030163977783655118 -0.010834840265220013 -0.026945198913564467 -0.024809599080327635 -0.0081763449213439511 -0.016723793201942304 -0.029542612113632308 -0.024472226381194791 -0.016074259491030456 -0.02389352148813342 -0.022087952026167548 -0.023184792770333439 -0.018983152827918171 -0.021841755319148937 -0.029039301310043671 -0.032725354235755198 -0.035837526959022287 -0.030739903981926012 -0.019976560835286598 -0.011776192902020718 -0.017777280858676209 -0.017796439706326061 -0.02230370237882311 -0.029400563007120385 -0.033701052016364696 -0.031530782029950084 -0.013386524822695037 22 | leaf_count=4095 6713 8571 3010 3781 6636 6259 6959 10298 7002 5597 6679 23970 4677 5503 6418 6648 6016 229 9951 13910 10623 4693 5889 8944 9943 11182 6039 13688 601 1128 23 | internal_value=0 -0.521455 -0.579464 -0.32886 -0.341292 -0.503774 -0.356659 -0.25087 -0.368224 -0.639926 -0.536431 -0.332979 -0.585586 -0.50754 -0.396274 -0.327562 -0.477149 -0.172604 -0.612785 -0.677903 -0.571998 -0.34545 -0.304531 -0.40942 -0.426577 -0.403645 -0.536248 -0.649883 -0.427243 -0.410134 24 | internal_count=225652 131996 101442 30554 93656 45048 86943 21270 80307 56394 37345 43279 23199 9284 13420 10743 14004 10527 18522 33195 23341 7703 29859 37028 28084 21125 12718 19285 7745 7144 25 | shrinkage=0.05 26 | 27 | 28 | Tree=1 29 | num_leaves=31 30 | num_cat=0 31 | split_feature=118 69 75 44 92 76 76 91 7 44 44 10 71 44 16 41 80 29 115 66 35 35 40 42 107 115 106 13 10 63 32 | split_gain=889.122 737.344 232.12 212.89 143.227 116.457 70.3686 63.8356 58.3009 58.1901 39.3703 38.9642 33.9838 27.9575 26.7561 25.2295 23.6714 19.5014 19.2975 18.7167 18.0664 17.8571 17.5634 16.738 16.576 15.6223 15.6191 15.4059 15.3757 18.7306 33 | threshold=1.0000000180025095e-35 -1.8729499999999997 0.15360500000000002 -0.72895499999999991 -3.8109999999999995 -1.5781499999999997 -3.0362999999999993 0.47705000000000003 -0.3998549999999999 -0.36117999999999995 -0.36117999999999995 -0.86797999999999986 1.7775500000000004 0.038387500000000005 -0.27687999999999996 0.78084500000000012 -1.03535 3.5127500000000005 1.0000000180025095e-35 -1.5126999999999997 -1.4100499999999998 -1.6427499999999997 -1.5144499999999999 1.8514000000000002 0.67773500000000009 1.0000000180025095e-35 2.7717000000000005 0.75078500000000015 1.2524500000000003 -1.1610499999999997 34 | decision_type=2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 35 | left_child=4 3 9 7 -1 22 -6 16 18 12 14 13 19 -10 24 -5 -2 -9 28 -3 -11 27 -4 -16 -7 -13 -25 -12 29 -8 36 | right_child=1 2 5 15 6 10 8 17 11 20 21 25 -14 -15 23 -17 -18 -19 -20 -21 -22 -23 -24 26 -26 -27 -28 -29 -30 -31 37 | leaf_value=0.007113064203783704 0.012148962148962149 0.028217244195543112 0.013056478405315614 0.028970588235294123 0.010834840265220013 0.025808314087759816 0.026391475274156841 0.0081763449213439511 0.016723793201942304 0.032494166450609284 0.024472226381194791 0.016378875088357066 0.02389352148813342 0.023184792770333439 0.027845263919016636 0.021595932802829357 0.018983152827918171 0.029039301310043671 0.01813627254509018 0.032725354235755198 0.035837526959022287 0.030739903981926012 0.019976560835286598 0.016482213438735179 0.01747988580327018 0.012956331443350245 0.036192468619246861 0.029400563007120385 0.018549032154855226 0.021284364261168386 38 | leaf_count=6713 4095 8571 3010 4760 6636 1732 4833 10298 7002 19285 6679 19806 4677 6418 6915 4524 6648 229 11976 9951 13910 10623 4693 1265 3853 10053 239 6039 6251 13968 39 | internal_value=0 0.521455 0.579464 0.32886 0.341292 0.503774 0.356659 0.25087 0.368224 0.639926 0.536431 0.332979 0.585586 0.396274 0.477149 0.50754 0.327562 0.172604 0.40942 0.612785 0.677903 0.571998 0.34545 0.527497 0.401253 0.304531 0.392287 0.536248 0.431742 0.451944 40 | internal_count=225652 131996 101442 30554 93656 45048 86943 21270 80307 56394 37345 43279 23199 13420 14004 9284 10743 10527 37028 18522 33195 23341 7703 8419 5585 29859 1504 12718 25052 18801 41 | shrinkage=0.05 42 | 43 | 44 | 45 | feature importances: 46 | recent_15%_loss_kurtosis=8 47 | all_loss_best_percentile75_ratio=4 48 | all_loss_skew=4 49 | recent_15%_correlation_best_percentile75_ratio=4 50 | top_10%_loss_kurtosis=4 51 | top_20%_loss_best_percentile75_ratio=3 52 | all_correlation_stddev_median_ratio=2 53 | resultFilteringMode_none=2 54 | top_20%_loss_kurtosis=2 55 | top_10%_loss_stddev_median_ratio=2 56 | top_10%_loss_best_percentile75_ratio=2 57 | top_10%_correlation_stddev_best_ratio=2 58 | top_10%_correlation_percentile5_percentile25_ratio=2 59 | top_10%_correlation_best_percentile50_ratio=2 60 | recent_25_loss_stddev_best_ratio=2 61 | recent_15%_loss_best_percentile75_ratio=2 62 | recent_15%_loss_best_percentile25_ratio=2 63 | recent_15%_correlation_stddev_median_ratio=2 64 | recent_10_loss_percentile5_percentile25_ratio=2 65 | recent_15%_loss_best_percentile50_ratio=1 66 | log10_cardinality=1 67 | top_30%_loss_best_percentile50_ratio=1 68 | top_30%_loss_best_percentile75_ratio=1 69 | all_correlation_skew=1 70 | secondaryProbabilityMode_correlation=1 71 | secondaryProbabilityMode_fixed=1 72 | 73 | pandas_categorical:null 74 | -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryProbabilityMode-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddevs": {"correlation": 0.9854599900728228, "fixed": 0.9854599900728228}, "origMeans": {"correlation": 0.4150461772995586, "fixed": 0.5849538227004414}, "predStddevs": {"correlation": 0.013611494491602764, "fixed": 0.013611494491602762}, "predMeans": {"correlation": 0.465920460856203, "fixed": 0.5340795391437969}} -------------------------------------------------------------------------------- /hypermax/atpe_models/model-secondaryTopLockingPercentile-configuration.json: -------------------------------------------------------------------------------- 1 | {"origStddev": 3.691694597208023, "origMean": 4.709783547661788, "predStddev": 0.3245556599082036, "predMean": 4.736867877490124} -------------------------------------------------------------------------------- /hypermax/atpe_models/scaling_model.json: -------------------------------------------------------------------------------- 1 | {"all_correlation_best_percentile25_ratio": {"scales": [0.16038456053199576], "means": [1.456195954458782], "variances": [0.025723207257041414]}, "all_correlation_best_percentile50_ratio": {"scales": [0.3606873383477958], "means": [2.219810404891774], "variances": [0.13009535604441733]}, "all_correlation_best_percentile75_ratio": {"scales": [0.9972836648830518], "means": [4.230555446187631], "variances": [0.9945747082425711]}, "all_correlation_kurtosis": {"scales": [0.27072420012857173], "means": [-0.899998076603646], "variances": [0.07329159253525497]}, "all_correlation_percentile5_percentile25_ratio": {"scales": [0.11323324047856805], "means": [1.327675584537161], "variances": [0.01282176674927722]}, "all_correlation_skew": {"scales": [0.2299342886746713], "means": [0.24294992926805067], "variances": [0.052869777108327076]}, "all_correlation_stddev_best_ratio": {"scales": [0.018045554036120164], "means": [0.28829748073469874], "variances": [0.0003256420204705328]}, "all_correlation_stddev_median_ratio": {"scales": [0.10805313934012126], "means": [0.6404459437925102], "variances": [0.01167548092125566]}, "all_loss_best_percentile25_ratio": {"scales": [0.045416363454625845], "means": [0.8419317515264435], "variances": [0.002062646069442674]}, "all_loss_best_percentile50_ratio": {"scales": [0.07209375749522764], "means": [0.7163671285168255], "variances": [0.005197509869780692]}, "all_loss_best_percentile75_ratio": {"scales": [0.10407170926103988], "means": [0.5284953339290357], "variances": [0.010830920668514413]}, "all_loss_kurtosis": {"scales": [4.021467570468929], "means": [7.422399458501732], "variances": [16.172201420333273]}, "all_loss_percentile5_percentile25_ratio": {"scales": [0.03181820723558104], "means": [0.8964821330560754], "variances": [0.0010123983116863819]}, "all_loss_skew": {"scales": [0.6452502778761038], "means": [2.8157823430708175], "variances": [0.4163479210991891]}, "all_loss_stddev_best_ratio": {"scales": [0.9357224209857341], "means": [1.9258802981706955], "variances": [0.8755764491354034]}, "all_loss_stddev_median_ratio": {"scales": [0.5574982850078507], "means": [1.3499947006597943], "variances": [0.31080433778669475]}, "log10_cardinality": {"scales": [2.0645024018822826], "means": [11.810894379204923], "variances": [4.262170167377714]}, "recent_10_correlation_best_percentile25_ratio": {"scales": [0.4738032915568156], "means": [2.0045939661649523], "variances": [0.22448955909007284]}, "recent_10_correlation_best_percentile50_ratio": {"scales": [1.5400713250651092], "means": [3.8371288323462505], "variances": [2.371819686287801]}, "recent_10_correlation_best_percentile75_ratio": {"scales": [2.9665987645663665], "means": [6.685482860376086], "variances": [8.800708229926693]}, "recent_10_correlation_kurtosis": {"scales": [0.6061072148492362], "means": [-0.4307200943092082], "variances": [0.3673659558922982]}, "recent_10_correlation_percentile5_percentile25_ratio": {"scales": [0.3436824978220852], "means": [1.7381334336213878], "variances": [0.11811765930922762]}, "recent_10_correlation_skew": {"scales": [0.33300148551178343], "means": [0.8602048409872681], "variances": [0.1108899893530545]}, "recent_10_correlation_stddev_best_ratio": {"scales": [0.02153545881776462], "means": [0.32671406573530865], "variances": [0.00046377598649163587]}, "recent_10_correlation_stddev_median_ratio": {"scales": [0.5270349562198672], "means": [1.2801600917709988], "variances": [0.2777658450776773]}, "recent_10_loss_best_percentile25_ratio": {"scales": [0.029429917430103317], "means": [0.9339465475112988], "variances": [0.000866120039942699]}, "recent_10_loss_best_percentile50_ratio": {"scales": [0.05100502200823097], "means": [0.870354674976161], "variances": [0.0026015122700601256]}, "recent_10_loss_best_percentile75_ratio": {"scales": [0.09355196796909773], "means": [0.7708246550083175], "variances": [0.008751970710891089]}, "recent_10_loss_kurtosis": {"scales": [1.8834231065768354], "means": [1.3202957485315912], "variances": [3.547282598387538]}, "recent_10_loss_percentile5_percentile25_ratio": {"scales": [0.022671230584603783], "means": [0.9488713827703725], "variances": [0.0005139846962202739]}, "recent_10_loss_skew": {"scales": [0.6565209754055988], "means": [1.4616412927363174], "variances": [0.43101979114751887]}, "recent_10_loss_stddev_best_ratio": {"scales": [0.7866787174163206], "means": [0.8636356956537466], "variances": [0.6188634044357871]}, "recent_10_loss_stddev_median_ratio": {"scales": [0.6258872170260898], "means": [0.6962434648361304], "variances": [0.3917348084366637]}, "recent_15%_correlation_best_percentile25_ratio": {"scales": [0.4465480946669543], "means": [1.9381398858921328], "variances": [0.19940520085068714]}, "recent_15%_correlation_best_percentile50_ratio": {"scales": [1.4125431623965377], "means": [3.770371284551781], "variances": [1.9952781856332116]}, "recent_15%_correlation_best_percentile75_ratio": {"scales": [2.9738229368831455], "means": [6.856161404326642], "variances": [8.843622859932298]}, "recent_15%_correlation_kurtosis": {"scales": [0.5760063667471138], "means": [-0.5176232728977542], "variances": [0.33178333453321057]}, "recent_15%_correlation_percentile5_percentile25_ratio": {"scales": [0.32158580177773277], "means": [1.6874629845644586], "variances": [0.10341742790502724]}, "recent_15%_correlation_skew": {"scales": [0.3271336534606477], "means": [0.8062784202796887], "variances": [0.10701642722651114]}, "recent_15%_correlation_stddev_best_ratio": {"scales": [0.022843935461112956], "means": [0.32920920493346284], "variances": [0.0005218453873514939]}, "recent_15%_correlation_stddev_median_ratio": {"scales": [0.4802849040250488], "means": [1.2504690065663135], "variances": [0.23067358903435034]}, "recent_15%_loss_best_percentile25_ratio": {"scales": [0.02751319003504129], "means": [0.9320372087875272], "variances": [0.0007569756259042953]}, "recent_15%_loss_best_percentile50_ratio": {"scales": [0.04636230520314481], "means": [0.8703625755259309], "variances": [0.0021494633437495487]}, "recent_15%_loss_best_percentile75_ratio": {"scales": [0.0859234974877634], "means": [0.7719641177302543], "variances": [0.0073828474205296845]}, "recent_15%_loss_kurtosis": {"scales": [2.5931508508106087], "means": [2.1658692643404502], "variances": [6.724431335059784]}, "recent_15%_loss_percentile5_percentile25_ratio": {"scales": [0.020562538918116696], "means": [0.9498792475124589], "variances": [0.0004228180067590637]}, "recent_15%_loss_skew": {"scales": [0.7522298217865837], "means": [1.7158214115512724], "variances": [0.5658497047850755]}, "recent_15%_loss_stddev_best_ratio": {"scales": [0.8505849786013118], "means": [1.0555625630148562], "variances": [0.723494805822194]}, "recent_15%_loss_stddev_median_ratio": {"scales": [0.6966983608923659], "means": [0.8736510448628352], "variances": [0.4853886060701094]}, "recent_25_correlation_best_percentile25_ratio": {"scales": [0.36676184246376914], "means": [1.7984003792519627], "variances": [0.1345142490874186]}, "recent_25_correlation_best_percentile50_ratio": {"scales": [1.0536005288050014], "means": [3.2189058245694855], "variances": [1.1100740742981785]}, "recent_25_correlation_best_percentile75_ratio": {"scales": [2.1671255133381195], "means": [5.737831444106486], "variances": [4.696432990561007]}, "recent_25_correlation_kurtosis": {"scales": [0.4967850974973633], "means": [-0.6075034649566534], "variances": [0.24679543309546478]}, "recent_25_correlation_percentile5_percentile25_ratio": {"scales": [0.26373575013691974], "means": [1.5851594578268167], "variances": [0.06955654590028376]}, "recent_25_correlation_skew": {"scales": [0.3182686344134371], "means": [0.6735434824058056], "variances": [0.1012949236513941]}, "recent_25_correlation_stddev_best_ratio": {"scales": [0.023021016610986732], "means": [0.3176636000639736], "variances": [0.0005299672058033271]}, "recent_25_correlation_stddev_median_ratio": {"scales": [0.36241019721211626], "means": [1.0363541901944826], "variances": [0.131341151043325]}, "recent_25_loss_best_percentile25_ratio": {"scales": [0.03576257521915464], "means": [0.9045170980759313], "variances": [0.0012789617863056934]}, "recent_25_loss_best_percentile50_ratio": {"scales": [0.06044958455037848], "means": [0.8268662351414674], "variances": [0.0036541522723133567]}, "recent_25_loss_best_percentile75_ratio": {"scales": [0.1019756939720761], "means": [0.7055387815772877], "variances": [0.01039904216108652]}, "recent_25_loss_kurtosis": {"scales": [3.8689849465297392], "means": [5.512701380663076], "variances": [14.969044516473728]}, "recent_25_loss_percentile5_percentile25_ratio": {"scales": [0.025789910170064073], "means": [0.9335910937125049], "variances": [0.0006651194665799742]}, "recent_25_loss_skew": {"scales": [0.7232465030961711], "means": [2.473629285306328], "variances": [0.5230855042408399]}, "recent_25_loss_stddev_best_ratio": {"scales": [0.8248186866843087], "means": [1.4569924247561816], "variances": [0.6803258659036278]}, "recent_25_loss_stddev_median_ratio": {"scales": [0.626753605651394], "means": [1.1512837324453424], "variances": [0.3928200821970232]}, "top_10%_correlation_best_percentile25_ratio": {"scales": [0.6013240565413908], "means": [2.136370213367019], "variances": [0.36159062097539374]}, "top_10%_correlation_best_percentile50_ratio": {"scales": [1.7657466984645882], "means": [4.209701931252082], "variances": [3.1178614031385936]}, "top_10%_correlation_best_percentile75_ratio": {"scales": [4.247932988928543], "means": [8.004903881801011], "variances": [18.044934678427385]}, "top_10%_correlation_kurtosis": {"scales": [0.7861810295007009], "means": [-0.21641733767409863], "variances": [0.6180806111467819]}, "top_10%_correlation_percentile5_percentile25_ratio": {"scales": [0.4409044157549606], "means": [1.8446275571998783], "variances": [0.19439670383222316]}, "top_10%_correlation_skew": {"scales": [0.3590376848047355], "means": [1.0240393463405775], "variances": [0.12890805910994463]}, "top_10%_correlation_stddev_best_ratio": {"scales": [0.02534629652404277], "means": [0.339837119263161], "variances": [0.0006424347474847025]}, "top_10%_correlation_stddev_median_ratio": {"scales": [0.5961070151179171], "means": [1.4145608156101113], "variances": [0.3553435734727926]}, "top_10%_loss_best_percentile25_ratio": {"scales": [0.013392727400873222], "means": [0.9721505821196115], "variances": [0.0001793651472341004]}, "top_10%_loss_best_percentile50_ratio": {"scales": [0.01918552095122843], "means": [0.9526283006385371], "variances": [0.000368084214170025]}, "top_10%_loss_best_percentile75_ratio": {"scales": [0.023120163624131482], "means": [0.937053870784058], "variances": [0.0005345419660066125]}, "top_10%_loss_kurtosis": {"scales": [0.3429073545293775], "means": [-0.9588076305297099], "variances": [0.11758545379033619]}, "top_10%_loss_percentile5_percentile25_ratio": {"scales": [0.009639942673483566], "means": [0.9795329067402444], "variances": [9.292849474804948e-05]}, "top_10%_loss_skew": {"scales": [0.2587214110731812], "means": [-0.39116551264085525], "variances": [0.066936768547698]}, "top_10%_loss_stddev_best_ratio": {"scales": [0.010137291375757987], "means": [0.027030647146104043], "variances": [0.00010276467643701727]}, "top_10%_loss_stddev_median_ratio": {"scales": [0.009158579004378284], "means": [0.025741891937304372], "variances": [8.387956937943871e-05]}, "top_20%_correlation_best_percentile25_ratio": {"scales": [0.5161974160793928], "means": [2.045677562753425], "variances": [0.26645977236704177]}, "top_20%_correlation_best_percentile50_ratio": {"scales": [1.7869799744726187], "means": [4.138468555834506], "variances": [3.193297429166161]}, "top_20%_correlation_best_percentile75_ratio": {"scales": [4.017216802398603], "means": [7.6778299530450145], "variances": [16.138030837473654]}, "top_20%_correlation_kurtosis": {"scales": [0.673634630318629], "means": [-0.36331951237235566], "variances": [0.45378361516451593]}, "top_20%_correlation_percentile5_percentile25_ratio": {"scales": [0.3692696975388307], "means": [1.7591336882118704], "variances": [0.13636010952041952]}, "top_20%_correlation_skew": {"scales": [0.3403592869577229], "means": [0.8997681943326791], "variances": [0.11584444421836955]}, "top_20%_correlation_stddev_best_ratio": {"scales": [0.02301375821479847], "means": [0.33302531064097285], "variances": [0.000529633067169204]}, "top_20%_correlation_stddev_median_ratio": {"scales": [0.5950188647826977], "means": [1.3839883245452167], "variances": [0.3540474494472903]}, "top_20%_loss_best_percentile25_ratio": {"scales": [0.019665996317326764], "means": [0.9512382978604983], "variances": [0.0003867514111531099]}, "top_20%_loss_best_percentile50_ratio": {"scales": [0.027240545654885956], "means": [0.9208763269245608], "variances": [0.000742047327575926]}, "top_20%_loss_best_percentile75_ratio": {"scales": [0.03310925636674644], "means": [0.8959705799151324], "variances": [0.0010962228571589397]}, "top_20%_loss_kurtosis": {"scales": [0.3083527270722493], "means": [-0.8248432990042504], "variances": [0.09508140429289307]}, "top_20%_loss_percentile5_percentile25_ratio": {"scales": [0.013439525513198667], "means": [0.9662192877821436], "variances": [0.00018062084601991787]}, "top_20%_loss_skew": {"scales": [0.23856621051436655], "means": [-0.3671570616903414], "variances": [0.05691383679918506]}, "top_20%_loss_stddev_best_ratio": {"scales": [0.015439431404495654], "means": [0.04342300024364654], "variances": [0.00023837604209412663]}, "top_20%_loss_stddev_median_ratio": {"scales": [0.013118735120231846], "means": [0.039996834137123335], "variances": [0.00017210121115480447]}, "top_30%_correlation_best_percentile25_ratio": {"scales": [0.4334681003146475], "means": [1.9478306560008507], "variances": [0.18789459399038932]}, "top_30%_correlation_best_percentile50_ratio": {"scales": [1.662852095343671], "means": [3.9830074044140766], "variances": [2.7650770909888367]}, "top_30%_correlation_best_percentile75_ratio": {"scales": [3.4606744098410696], "means": [6.935848231428596], "variances": [11.976267370928836]}, "top_30%_correlation_kurtosis": {"scales": [0.5639719646618984], "means": [-0.5192514809363189], "variances": [0.31806437692460154]}, "top_30%_correlation_percentile5_percentile25_ratio": {"scales": [0.3093872767897915], "means": [1.688720684177907], "variances": [0.09572048703940306]}, "top_30%_correlation_skew": {"scales": [0.32467260492731137], "means": [0.8047728932136113], "variances": [0.10541230039028603]}, "top_30%_correlation_stddev_best_ratio": {"scales": [0.022161957894448997], "means": [0.3295372338386342], "variances": [0.0004911523777153302]}, "top_30%_correlation_stddev_median_ratio": {"scales": [0.5519140975329716], "means": [1.3012333590062939], "variances": [0.3046091710556345]}, "top_30%_loss_best_percentile25_ratio": {"scales": [0.024283910147047137], "means": [0.9331706215972884], "variances": [0.0005897082920298589]}, "top_30%_loss_best_percentile50_ratio": {"scales": [0.03379012979421067], "means": [0.8931779564390308], "variances": [0.0011417728715096037]}, "top_30%_loss_best_percentile75_ratio": {"scales": [0.041445471166722744], "means": [0.8587005308195698], "variances": [0.0017177270802316465]}, "top_30%_loss_kurtosis": {"scales": [0.2828974192733158], "means": [-0.8070006412888842], "variances": [0.08003094983150222]}, "top_30%_loss_percentile5_percentile25_ratio": {"scales": [0.016261874556073794], "means": [0.9552261341660463], "variances": [0.00026444856407748023]}, "top_30%_loss_skew": {"scales": [0.22792961031616177], "means": [-0.2887356458864833], "variances": [0.051951907258877356]}, "top_30%_loss_stddev_best_ratio": {"scales": [0.02125503466025269], "means": [0.06021030532646235], "variances": [0.00045177649840854315]}, "top_30%_loss_stddev_median_ratio": {"scales": [0.01714050068827318], "means": [0.05381408416633305], "variances": [0.0002937967638446934]}} -------------------------------------------------------------------------------- /hypermax/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import hypermax.cui 3 | import json 4 | import time 5 | import os.path 6 | import hyperopt 7 | from pprint import pprint 8 | from hypermax.optimizer import Optimizer 9 | import csv 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Provide configuration options for Hypermax') 13 | parser.add_argument('configuration_file', metavar='configuration_file', type=str, nargs=1, help='The JSON based configuration file which is used to configure the hyper-parameter search.') 14 | parser.add_argument('results_directory', metavar='results_directory', type=str, nargs='?', help='The directory of your existing results to reload and restart from.') 15 | 16 | args = parser.parse_args() 17 | return args 18 | 19 | 20 | def main(): 21 | args = parse_args() 22 | conf = args.configuration_file[0] 23 | dir_res = args.results_directory 24 | 25 | execute(conf, dir_res) 26 | 27 | def execute(conf, dir_res=None): 28 | with open(conf) as file: 29 | config_data = json.load(file) 30 | 31 | optimizer = Optimizer(config_data) 32 | 33 | if dir_res: 34 | results_path = os.path.join(dir_res, 'results.csv') 35 | if os.path.exists(results_path): 36 | optimizer.importResultsCSV(results_path) 37 | 38 | guidance_path = os.path.join(dir_res, 'guidance.json') 39 | if os.path.exists(guidance_path): 40 | optimizer.importGuidanceJSON(guidance_path) 41 | 42 | optimizer.resultsAnalyzer.directory = dir_res 43 | else: 44 | # See if we see the results directory here. 45 | directories = os.listdir('.') 46 | resultsDirectories = sorted([directory for directory in directories if directory.startswith('results_')], key=lambda dir: int(dir[len('result_')+1:])) 47 | resultsDirectories.reverse() # Reversed - examine the latest results directories first 48 | for directory in resultsDirectories: 49 | if os.path.exists(os.path.join(directory, 'search.json')): 50 | # Check to see if the configuration file is the same 51 | config = json.load(open(os.path.join(directory, 'search.json'), 'rt')) 52 | 53 | # Compare the config json string (in canonical, sorted form) with the one we've received from the user. If the same, we recommend to the user that they continue with this search 54 | if json.dumps(config_data, sort_keys=True) == json.dumps(config, sort_keys=True): 55 | prompt = input('It appears there was already an in-progress search with this configuration. Would you like to continue the existing hyper parameter search (' + directory + ")? [yes/no/y/n]\n") 56 | if 'y' in prompt: 57 | optimizer.importResultsCSV(os.path.join(directory, 'results.csv')) 58 | if os.path.exists(os.path.join(directory, 'guidance.json')): 59 | optimizer.importGuidanceJSON(os.path.join(directory, 'guidance.json')) 60 | optimizer.resultsAnalyzer.directory = directory 61 | break 62 | 63 | optimizer.runOptimizationThread() 64 | 65 | if config_data.get("ui", {}).get("enabled", True): 66 | hypermax.cui.launchHypermaxUI(optimizer) 67 | 68 | -------------------------------------------------------------------------------- /hypermax/configuration.py: -------------------------------------------------------------------------------- 1 | from hypermax.hyperparameter import Hyperparameter 2 | 3 | 4 | class Configuration: 5 | def __init__(self, data): 6 | self.data = data 7 | 8 | 9 | 10 | def createHyperparameterSpace(self, lockedValues=None): 11 | param = Hyperparameter(self.data['hyperparameters']) 12 | 13 | space = param.createHyperoptSpace(lockedValues) 14 | 15 | return space 16 | 17 | -------------------------------------------------------------------------------- /hypermax/execution.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import json 3 | import atexit 4 | import jsonschema 5 | import psutil 6 | import sys 7 | import random 8 | import time 9 | import datetime 10 | import os 11 | 12 | 13 | class Execution: 14 | """ 15 | This class represents a single execution of a model with a given set of hyper-parameters. It takes care of 16 | of managing the model process and standardizing the results. 17 | """ 18 | 19 | def __init__(self, config, parameters, worker_n=0): 20 | """ 21 | Initialize this execution with the given configuration and the given parameters. 22 | 23 | :param config: The Execution configuration. See configurationSchema() or the README file for more information. 24 | :param parameters: The hyper parameters for this model. 25 | :param worker_n: When executing models in parallel, this defines which worker this execution is. 26 | """ 27 | # jsonschema.validate(config, self.configurationSchema()) 28 | 29 | # Perform one additional validation 30 | if ('auto_kill_max_time' in config or 'auto_kill_max_ram' in config or 'auto_kill_max_system_ram' in config) and ('auto_kill_loss' not in config): 31 | raise ValueError("Configuration for model execution has an auto_kill parameter, but is missing the auto_kill_loss. Please set an auto_kill_loss to use automatic kill.") 32 | 33 | self.config = config 34 | self.parameters = parameters 35 | 36 | self.process = None 37 | self.result = None 38 | self.startTime = None 39 | self.endTime = None 40 | self.killed = False 41 | self.worker_n = worker_n 42 | self.scriptToken = None 43 | 44 | @classmethod 45 | def configurationSchema(self): 46 | """ This method returns the configuration schema for the execution module. The schema 47 | is a standard JSON-schema object.""" 48 | return { 49 | "type": "object", 50 | "oneOf": [ 51 | { 52 | "properties": { 53 | "type": { 54 | "type": "string", 55 | "constant": "python_function" 56 | }, 57 | "module": {"type": "string"}, 58 | "name": {"type": "string"}, 59 | "func": {} 60 | }, 61 | "required": ['type', 'module', 'name'] 62 | }, 63 | { 64 | "properties": { 65 | "type": { 66 | "type": "string", 67 | "constant": "remote" 68 | }, 69 | "hosts": { 70 | "type": "array", 71 | "items": {"type": "string"} 72 | }, 73 | "command": {"type": "string"}, 74 | "rsync": { 75 | "type": "object", 76 | "properties": { 77 | "from": {"type": "string"}, 78 | "to": {"type": "string"} 79 | }, 80 | "required": ['from', 'to'] 81 | } 82 | }, 83 | "required": ['type', 'hosts', 'command'] 84 | } 85 | ], 86 | "properties": { 87 | "auto_kill_max_time": {"type": "number"}, 88 | "auto_kill_max_ram": {"type": "number"}, 89 | "auto_kill_max_system_ram": {"type": "number"}, 90 | "auto_kill_loss": {"type": "number"} 91 | } 92 | } 93 | 94 | def generateScriptToken(self): 95 | """ 96 | The script token is used to easily differentiate when the log-output from the model is finished, and its results JSON object 97 | is now being printed. 98 | 99 | The token is just a random string that is extremely unlikely to come up in log output, used to define the cutoff line. 100 | 101 | Its saved to self.scriptToken 102 | 103 | :return: The generated script token 104 | """ 105 | characters = 'abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' 106 | self.scriptToken = ''.join([random.choice(characters) for c in range(64)]) 107 | return self.scriptToken 108 | 109 | def createPythonFunctionScript(self): 110 | """ 111 | This creates a Python script that will be executed to call the given Python function. 112 | 113 | :return: A string representing the Python script 114 | """ 115 | self.generateScriptToken() 116 | script = "from " + self.config['module'] + " import " + self.config['name'] + "\n" 117 | script += "import json\n" 118 | script += "result = " + self.config['name'] + "(" + json.dumps(self.parameters) + ")\n" 119 | script += "print(\"" + self.scriptToken + "\")\n" 120 | script += "print(json.dumps(result))\n" 121 | return script 122 | 123 | def startSubprocess(self): 124 | """ 125 | This function starts a subprocess to execute a model. 126 | 127 | :return: The subprocess.Popen object representing the subprocess. Also stored in self.process 128 | """ 129 | if self.config['type'] == 'python_function': 130 | process = subprocess.Popen(['python3', '-c', self.createPythonFunctionScript()], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 131 | atexit.register(lambda: process.kill()) 132 | 133 | # Set process affinities - hypermax in one, the model in the rest. Prevents them from causing cache conflicts. 134 | # if psutil.cpu_count() > 2: 135 | # processUtil = psutil.Process(process.pid) 136 | # processUtil.cpu_affinity([k for k in range(psutil.cpu_count())]) 137 | # processUtil = psutil.Process(os.getpid()) 138 | # processUtil.cpu_affinity([psutil.cpu_count() - 1]) 139 | 140 | self.process = process 141 | self.startTime = datetime.datetime.now() 142 | return process 143 | elif self.config['type'] == 'remote': 144 | host = self.config['hosts'][self.worker_n % len(self.config['hosts'])] 145 | 146 | # First synchronize files to the host. 147 | if 'rsync' in self.config: 148 | fromDirectory = self.config['rsync']['from'] 149 | if fromDirectory[-1] != '/': 150 | fromDirectory = fromDirectory + "/" # We ensure a trailing slash. Without it, rsync will behave differently. 151 | 152 | try: 153 | subprocess.run(['rsync', '-rac', fromDirectory, host + ":" + self.config['rsync']['to']]) 154 | except OSError as e: 155 | if e.errno == os.errno.ENOENT: # Rsync doesn't exist, use the slower scp command. 156 | subprocess.run(['scp', '-r', fromDirectory, host + ":" + self.config['rsync']['to']]) 157 | else: 158 | raise 159 | 160 | process = subprocess.Popen(['ssh', host, self.config['command']], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE) 161 | atexit.register(lambda: process.kill()) 162 | 163 | newParams = dict(self.parameters) 164 | newParams['$scriptToken'] = self.generateScriptToken() 165 | 166 | process.stdin.write(bytes(json.dumps(newParams)+"\n\n", 'utf8')) 167 | process.stdin.flush() 168 | 169 | self.process = process 170 | self.startTime = datetime.datetime.now() 171 | return process 172 | 173 | def shouldKillProcess(self): 174 | """ 175 | This method checks all of the conditions on the process, such as time usage and RAM usage, to decide whether 176 | it should be killed prematurely. 177 | 178 | :return: True/False on whether the process should be killed prematurely. 179 | """ 180 | processStats = psutil.Process(self.process.pid) 181 | 182 | memUsageMB = float(processStats.memory_info().vms) / (1024 * 1024) 183 | if 'auto_kill_max_ram' in self.config and memUsageMB > self.config['auto_kill_max_ram']: 184 | return True 185 | 186 | systemStats = psutil.virtual_memory() 187 | memUsageMB = float(systemStats.total - systemStats.available) / (1024 * 1024) 188 | if 'auto_kill_max_system_ram' in self.config and memUsageMB > self.config['auto_kill_max_system_ram']: 189 | return True 190 | 191 | elapsedTime = (datetime.datetime.now() - self.startTime).total_seconds() 192 | if 'auto_kill_max_time' in self.config and elapsedTime > self.config['auto_kill_max_time']: 193 | return True 194 | 195 | return False 196 | 197 | def run(self): 198 | """ 199 | Run the model, return the results. 200 | 201 | :return: A standard 'results' object. 202 | """ 203 | # print("Running: ", parameters) 204 | if 'func' in self.config: 205 | return self.config['func'](self.parameters) 206 | 207 | if self.config['type'] == 'python_function' or self.config['type'] == 'remote': 208 | process = self.startSubprocess() 209 | output = '' 210 | while process.returncode is None and (not self.scriptToken or self.scriptToken not in output) and 'no process found' not in output: 211 | process.poll() 212 | nextChars = str(process.stdout.read(), 'utf8') 213 | for nextChar in nextChars: 214 | if nextChar == chr(127): 215 | output = output[:-1] # Erase the last character from the output. 216 | else: 217 | output += nextChar 218 | # print(output) 219 | try: 220 | if self.shouldKillProcess(): 221 | self.killed = True 222 | parent = psutil.Process(process.pid) 223 | children = parent.children(recursive=True) 224 | children.append(parent) 225 | for p in children: 226 | p.send_signal(9) 227 | except psutil.NoSuchProcess: 228 | pass 229 | time.sleep(0.002) 230 | 231 | if self.killed: 232 | output += str(process.stdout.read(), 'utf8') 233 | self.result = {"status": "fail", "loss": self.config['auto_kill_loss'], "log": output, "error": "Model was automatically killed.", 234 | "time": (datetime.datetime.now() - self.startTime).total_seconds()} 235 | self.process = None 236 | return self.result 237 | 238 | output += str(process.stdout.read(), 'utf8') 239 | # print(output) 240 | 241 | if self.config['type'] == 'python_function': 242 | cutoffIndex = output.find(self.scriptToken) 243 | else: 244 | # The cutoff is the last newline character for a non-empty line 245 | cutoffIndex = output.rfind('\n') 246 | while cutoffIndex != -1 and (not output[cutoffIndex:].strip()): 247 | cutoffIndex = output.rfind('\n', 0, cutoffIndex-1) 248 | if cutoffIndex == -1: 249 | cutoffIndex = 0 250 | 251 | self.scriptToken = '' 252 | 253 | if cutoffIndex == -1: 254 | self.result = {"status": "fail", "loss": None, "log": output, "error": "Did not find result object in the output from the model script."} 255 | self.process = None 256 | return self.result 257 | else: 258 | resultString = output[cutoffIndex + len(self.scriptToken):] 259 | resultString = resultString.replace("'", "\"") 260 | try: 261 | rawResult = json.loads(resultString) 262 | self.result = self.interpretResultObject(rawResult) 263 | self.result['status'] = 'ok' 264 | self.result['log'] = output[:cutoffIndex] 265 | self.result['error'] = None 266 | self.process = None 267 | return self.result 268 | except json.JSONDecodeError as e: 269 | self.result = {"status": "fail", "loss": None, "log": output, "error": "Unable to decode the JSON result object from the model."} 270 | self.process = None 271 | return self.result 272 | if self.config['type'] == 'remote': 273 | pass 274 | 275 | def interpretResultObject(self, rawResult): 276 | """ 277 | This method has the job of interpreting and standardizing the result object from the model. 278 | 279 | :param rawResult: The raw result object from the model. 280 | :return: The standardized results object. 281 | """ 282 | if isinstance(rawResult, int) or isinstance(rawResult, float) or isinstance(rawResult, bool): 283 | return {"loss": rawResult} 284 | elif isinstance(rawResult, dict): 285 | return rawResult 286 | else: 287 | raise ValueError("Unexpected value for result object from model: " + json.dumps( 288 | rawResult) + "\nReturn value must be either a Python dictionary/JSON object or a single floating point value.") 289 | -------------------------------------------------------------------------------- /hypermax/file_utils.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import tempfile 3 | import os 4 | 5 | # Windows doesn't support opening a NamedTemporaryFile. 6 | # Solution inspired in https://stackoverflow.com/a/46501017/147507 7 | @contextmanager 8 | def ClosedNamedTempFile(contents): 9 | try: 10 | with tempfile.NamedTemporaryFile(delete=False) as f: 11 | file_name = f.name 12 | f.write(contents) 13 | yield file_name 14 | finally: 15 | os.unlink(file_name) -------------------------------------------------------------------------------- /hypermax/hyperparameter.py: -------------------------------------------------------------------------------- 1 | from hyperopt import hp 2 | import math 3 | from pprint import pprint 4 | import re 5 | 6 | class Hyperparameter: 7 | """ This class represents a hyperparameter.""" 8 | 9 | def __init__(self, config, parent=None, root='root'): 10 | self.config = config 11 | self.root = root 12 | self.name = root[5:] 13 | self.parent = parent 14 | self.resultVariableName = re.sub("\\.\\d+\\.", ".", self.name) 15 | 16 | self.hyperoptVariableName = self.root 17 | if 'name' in config: 18 | self.hyperoptVariableName = config['name'] 19 | 20 | def createHyperoptSpace(self, lockedValues=None): 21 | name = self.root 22 | 23 | if lockedValues is None: 24 | lockedValues = {} 25 | 26 | if 'anyOf' in self.config or 'oneOf' in self.config: 27 | data = [] 28 | if 'anyOf' in self.config: 29 | data = self.config['anyOf'] 30 | else: 31 | data = self.config['oneOf'] 32 | 33 | subSpaces = [Hyperparameter(param, self, name + "." + str(index)).createHyperoptSpace(lockedValues) for index, param in enumerate(data)] 34 | for index, space in enumerate(subSpaces): 35 | space["$index"] = index 36 | 37 | choices = hp.choice(self.hyperoptVariableName, subSpaces) 38 | 39 | return choices 40 | elif 'enum' in self.config: 41 | if self.name in lockedValues: 42 | return lockedValues[self.name] 43 | 44 | choices = hp.choice(self.hyperoptVariableName, self.config['enum']) 45 | return choices 46 | elif 'constant' in self.config: 47 | if self.name in lockedValues: 48 | return lockedValues[self.name] 49 | 50 | return self.config['constant'] 51 | elif self.config['type'] == 'object': 52 | space = {} 53 | for key in self.config['properties'].keys(): 54 | config = self.config['properties'][key] 55 | space[key] = Hyperparameter(config, self, name + "." + key).createHyperoptSpace(lockedValues) 56 | return space 57 | elif self.config['type'] == 'number': 58 | if self.name in lockedValues: 59 | return lockedValues[self.name] 60 | 61 | mode = self.config.get('mode', 'uniform') 62 | scaling = self.config.get('scaling', 'linear') 63 | 64 | if mode == 'uniform': 65 | min = self.config.get('min', 0) 66 | max = self.config.get('max', 1) 67 | rounding = self.config.get('rounding', None) 68 | 69 | if scaling == 'linear': 70 | if rounding is not None: 71 | return hp.quniform(self.hyperoptVariableName, min, max, rounding) 72 | else: 73 | return hp.uniform(self.hyperoptVariableName, min, max) 74 | elif scaling == 'logarithmic': 75 | if rounding is not None: 76 | return hp.qloguniform(self.hyperoptVariableName, math.log(min), math.log(max), rounding) 77 | else: 78 | return hp.loguniform(self.hyperoptVariableName, math.log(min), math.log(max)) 79 | if mode == 'randint': 80 | max = self.config.get('max', 1) 81 | return hp.randint(self.hyperoptVariableName, max) 82 | 83 | if mode == 'normal': 84 | mean = self.config.get('mean', 0) 85 | stddev = self.config.get('stddev', 1) 86 | rounding = self.config.get('rounding', None) 87 | 88 | if scaling == 'linear': 89 | if rounding is not None: 90 | return hp.qnormal(self.hyperoptVariableName, mean, stddev, rounding) 91 | else: 92 | return hp.normal(self.hyperoptVariableName, mean, stddev) 93 | elif scaling == 'logarithmic': 94 | if rounding is not None: 95 | return hp.qlognormal(self.hyperoptVariableName, math.log(mean), math.log(stddev), rounding) 96 | else: 97 | return hp.lognormal(self.hyperoptVariableName, math.log(mean), math.log(stddev)) 98 | 99 | def getFlatParameterNames(self): 100 | name = self.root 101 | 102 | if 'anyOf' in self.config or 'oneOf' in self.config: 103 | keys = set() 104 | if 'anyOf' in self.config: 105 | data = self.config['anyOf'] 106 | else: 107 | data = self.config['oneOf'] 108 | 109 | for index, param in enumerate(data): 110 | subKeys = Hyperparameter(param, self, name + "." + str(index)).getFlatParameterNames() 111 | for key in subKeys: 112 | keys.add(key) 113 | 114 | return keys 115 | elif 'enum' in self.config or 'constant' in self.config: 116 | return [name] 117 | elif self.config['type'] == 'object': 118 | keys = set() 119 | for key in self.config['properties'].keys(): 120 | config = self.config['properties'][key] 121 | subKeys = Hyperparameter(config, self, name + "." + key).getFlatParameterNames() 122 | for key in subKeys: 123 | keys.add(key) 124 | 125 | return keys 126 | elif self.config['type'] == 'number': 127 | return [name] 128 | 129 | def getFlatParameters(self): 130 | name = self.root 131 | if 'anyOf' in self.config or 'oneOf' in self.config: 132 | parameters = [] 133 | if 'anyOf' in self.config: 134 | data = self.config['anyOf'] 135 | else: 136 | data = self.config['oneOf'] 137 | 138 | for index, param in enumerate(data): 139 | subParameters = Hyperparameter(param, self, name + "." + str(index)).getFlatParameters() 140 | parameters = parameters + subParameters 141 | return parameters 142 | elif 'enum' in self.config or 'constant' in self.config: 143 | return [self] 144 | elif self.config['type'] == 'object': 145 | parameters = [] 146 | for key in self.config['properties'].keys(): 147 | config = self.config['properties'][key] 148 | subParameters = Hyperparameter(config, self, name + "." + key).getFlatParameters() 149 | parameters = parameters + subParameters 150 | return parameters 151 | elif self.config['type'] == 'number': 152 | return [self] 153 | 154 | def getLog10Cardinality(self): 155 | if 'anyOf' in self.config or 'oneOf' in self.config: 156 | if 'anyOf' in self.config: 157 | data = self.config['anyOf'] 158 | else: 159 | data = self.config['oneOf'] 160 | 161 | log10_cardinality = Hyperparameter(data[0], self, self.root + ".0").getLog10Cardinality() 162 | for index, subParam in enumerate(data[1:]): 163 | # We used logarithm identities to create this reduction formula 164 | other_log10_cardinality = Hyperparameter(subParam, self, self.root + "." + str(index)).getLog10Cardinality() 165 | 166 | # Revert to linear at high and low values, for numerical stability. Check here: https://www.desmos.com/calculator/efkbbftd18 to observe 167 | if (log10_cardinality - other_log10_cardinality) > 3: 168 | log10_cardinality = log10_cardinality + 1 169 | elif (other_log10_cardinality - log10_cardinality) > 3: 170 | log10_cardinality = other_log10_cardinality + 1 171 | else: 172 | log10_cardinality = other_log10_cardinality + math.log10(1 + math.pow(10, log10_cardinality - other_log10_cardinality)) 173 | return log10_cardinality + math.log10(len(data)) 174 | elif 'enum' in self.config: 175 | return math.log10(len(self.config['enum'])) 176 | elif 'constant' in self.config: 177 | return math.log10(1) 178 | elif self.config['type'] == 'object': 179 | log10_cardinality = 0 180 | for index, subParam in enumerate(self.config['properties'].values()): 181 | subParameter = Hyperparameter(subParam, self, self.root + "." + str(index)) 182 | log10_cardinality += subParameter.getLog10Cardinality() 183 | return log10_cardinality 184 | elif self.config['type'] == 'number': 185 | if 'rounding' in self.config: 186 | return math.log10(min(20, (self.config['max'] - self.config['min']) / self.config['rounding'] + 1)) 187 | else: 188 | return math.log10(20) # Default of 20 for fully uniform numbers. 189 | 190 | def convertToFlatValues(self, params): 191 | flatParams = {} 192 | 193 | def recurse(key, value, root): 194 | result_key = root + "." + key 195 | if isinstance(value, str): 196 | flatParams[result_key[1:]] = value 197 | elif isinstance(value, float) or isinstance(value, bool) or isinstance(value, int): 198 | flatParams[result_key[1:]] = value 199 | elif isinstance(value, dict): 200 | for subkey, subvalue in value.items(): 201 | recurse(subkey, subvalue, result_key) 202 | 203 | for key in params.keys(): 204 | value = params[key] 205 | recurse(key, value, '') 206 | 207 | flatValues = {} 208 | 209 | if 'anyOf' in self.config or 'oneOf' in self.config: 210 | if 'anyOf' in self.config: 211 | data = self.config['anyOf'] 212 | else: 213 | data = self.config['oneOf'] 214 | 215 | subParameterIndex = flatParams[self.resultVariableName + '.$index'] 216 | flatValues[self.name] = subParameterIndex 217 | 218 | for index, param in enumerate(data): 219 | subParameter = Hyperparameter(param, self, self.root + "." + str(index)) 220 | 221 | if index == subParameterIndex: 222 | subFlatValues = subParameter.convertToFlatValues(flatParams) 223 | for key in subFlatValues: 224 | flatValues[key] = subFlatValues[key] 225 | else: 226 | for flatParam in subParameter.getFlatParameters(): 227 | flatValues[flatParam.name] = "" 228 | 229 | return flatValues 230 | elif 'constant' in self.config: 231 | flatValues[self.name] = flatParams[self.resultVariableName] 232 | return flatValues 233 | elif 'enum' in self.config: 234 | flatValues[self.name] = flatParams[self.resultVariableName] 235 | return flatValues 236 | elif self.config['type'] == 'object': 237 | for key in self.config['properties'].keys(): 238 | config = self.config['properties'][key] 239 | 240 | subFlatValues = Hyperparameter(config, self, self.root + "." + key).convertToFlatValues(flatParams) 241 | 242 | for key in subFlatValues: 243 | flatValues[key] = subFlatValues[key] 244 | 245 | if self.name == "": 246 | for key in params.keys(): 247 | if key.startswith("$"): 248 | flatValues[key] = params[key] 249 | 250 | return flatValues 251 | elif self.config['type'] == 'number': 252 | flatValues[self.name] = flatParams[self.resultVariableName] 253 | return flatValues 254 | 255 | def convertToStructuredValues(self, flatValues): 256 | if 'anyOf' in self.config or 'oneOf' in self.config: 257 | if 'anyOf' in self.config: 258 | data = self.config['anyOf'] 259 | else: 260 | data = self.config['oneOf'] 261 | 262 | subParameterIndex = flatValues[self.name] 263 | subParam = Hyperparameter(data[subParameterIndex], self, self.root + "." + str(subParameterIndex)) 264 | 265 | structured = subParam.convertToStructuredValues(flatValues) 266 | structured['$index'] = subParameterIndex 267 | 268 | return structured 269 | elif 'constant' in self.config: 270 | return flatValues[self.name] 271 | elif 'enum' in self.config: 272 | return flatValues[self.name] 273 | elif self.config['type'] == 'object': 274 | result = {} 275 | for key in self.config['properties'].keys(): 276 | config = self.config['properties'][key] 277 | 278 | subStructuredValue = Hyperparameter(config, self, self.root + "." + key).convertToStructuredValues(flatValues) 279 | 280 | result[key] = subStructuredValue 281 | 282 | if self.name == "": 283 | for key in flatValues.keys(): 284 | if key.startswith("$"): 285 | result[key] = flatValues[key] 286 | return result 287 | elif self.config['type'] == 'number': 288 | return flatValues[self.name] 289 | 290 | 291 | @staticmethod 292 | def createHyperparameterConfigForHyperoptDomain(domain): 293 | if domain.name is None: 294 | data = { 295 | "type": "object", 296 | "properties": {} 297 | } 298 | 299 | for key in domain.params: 300 | data['properties'][key] = Hyperparameter.createHyperparameterConfigForHyperoptDomain(domain.params[key]) 301 | 302 | if 'name' not in data['properties'][key]: 303 | data['properties'][key]['name'] = key 304 | 305 | return data 306 | elif domain.name == 'dict': 307 | data = { 308 | "type": "object", 309 | "properties": {} 310 | } 311 | 312 | for item in domain.named_args: 313 | data['properties'][item[0]] = Hyperparameter.createHyperparameterConfigForHyperoptDomain(item[1]) 314 | 315 | return data 316 | elif domain.name == 'switch': 317 | data = { 318 | "oneOf": [ 319 | 320 | ] 321 | } 322 | 323 | data['name'] = domain.pos_args[0].pos_args 324 | 325 | for item in domain.pos_args[1:]: 326 | data['oneOf'].append(Hyperparameter.createHyperparameterConfigForHyperoptDomain(item)) 327 | return data 328 | elif domain.name == 'hyperopt_param': 329 | data = Hyperparameter.createHyperparameterConfigForHyperoptDomain(domain.pos_args[1]) 330 | data['name'] = domain.pos_args[0]._obj 331 | return data 332 | elif domain.name == 'uniform': 333 | data = {"type": "number"} 334 | data['scaling'] = 'linear' 335 | data['mode'] = 'uniform' 336 | data['min'] = domain.pos_args[0]._obj 337 | data['max'] = domain.pos_args[1]._obj 338 | return data 339 | elif domain.name == 'quniform': 340 | data = {"type": "number"} 341 | data['scaling'] = 'linear' 342 | data['mode'] = 'uniform' 343 | data['min'] = domain.pos_args[0]._obj 344 | data['max'] = domain.pos_args[1]._obj 345 | data['rounding'] = domain.pos_args[2]._obj 346 | return data 347 | elif domain.name == 'loguniform': 348 | data = {"type": "number"} 349 | data['scaling'] = 'logarithmic' 350 | data['mode'] = 'uniform' 351 | data['min'] = math.exp(domain.pos_args[0]._obj) 352 | data['max'] = math.exp(domain.pos_args[1]._obj) 353 | return data 354 | elif domain.name == 'qloguniform': 355 | data = {"type": "number"} 356 | data['scaling'] = 'logarithmic' 357 | data['mode'] = 'uniform' 358 | data['min'] = math.exp(domain.pos_args[0]._obj) 359 | data['max'] = math.exp(domain.pos_args[1]._obj) 360 | data['rounding'] = domain.pos_args[2]._obj 361 | return data 362 | elif domain.name == 'normal': 363 | data = {"type": "number"} 364 | data['scaling'] = 'linear' 365 | data['mode'] = 'normal' 366 | data['mean'] = domain.pos_args[0]._obj 367 | data['stddev'] = domain.pos_args[1]._obj 368 | return data 369 | elif domain.name == 'qnormal': 370 | data = {"type": "number"} 371 | data['scaling'] = 'linear' 372 | data['mode'] = 'normal' 373 | data['mean'] = domain.pos_args[0]._obj 374 | data['stddev'] = domain.pos_args[1]._obj 375 | data['rounding'] = domain.pos_args[2]._obj 376 | return data 377 | elif domain.name == 'lognormal': 378 | data = {"type": "number"} 379 | data['scaling'] = 'logarithmic' 380 | data['mode'] = 'normal' 381 | data['mean'] = domain.pos_args[0]._obj 382 | data['stddev'] = domain.pos_args[1]._obj 383 | return data 384 | elif domain.name == 'qlognormal': 385 | data = {"type": "number"} 386 | data['scaling'] = 'logarithmic' 387 | data['mode'] = 'normal' 388 | data['mean'] = domain.pos_args[0]._obj 389 | data['stddev'] = domain.pos_args[1]._obj 390 | data['rounding'] = domain.pos_args[2]._obj 391 | return data 392 | elif domain.name == 'literal': 393 | data = { 394 | 'type': 'string', 395 | 'constant': domain._obj 396 | } 397 | return data 398 | elif domain.name == 'randint': 399 | data = {"type": "number"} 400 | max = domain.pos_args[0]._obj 401 | data['max'] = max 402 | data['mode'] = 'randint' 403 | return data 404 | else: 405 | raise ValueError(f"Unsupported hyperopt domain type {domain.name}") 406 | -------------------------------------------------------------------------------- /hypermax/optimizer.py: -------------------------------------------------------------------------------- 1 | import hyperopt 2 | import csv 3 | import json 4 | import traceback 5 | import os.path 6 | from pprint import pprint 7 | import datetime 8 | import time 9 | import numpy.random 10 | import threading 11 | import queue 12 | import copy 13 | import tempfile 14 | import random 15 | import subprocess 16 | import concurrent.futures 17 | import tempfile 18 | import functools 19 | import math 20 | import atexit 21 | import jsonschema 22 | import pkg_resources 23 | from hypermax.execution import Execution 24 | from hypermax.hyperparameter import Hyperparameter 25 | from hypermax.results_analyzer import ResultsAnalyzer 26 | from hypermax.algorithms.atpe_optimizer import ATPEOptimizer 27 | from hypermax.algorithms.human_guided_optimizer_wrapper import HumanGuidedOptimizerWrapper 28 | from hypermax.algorithms.tpe_optimizer import TPEOptimizer 29 | from hypermax.algorithms.random_search_optimizer import RandomSearchOptimizer 30 | from hypermax.algorithms.adaptive_bayesian_hyperband_optimizer import AdaptiveBayesianHyperband 31 | 32 | from hypermax.configuration import Configuration 33 | 34 | 35 | class Optimizer: 36 | resultInformationKeys = [ 37 | 'trial', 38 | 'status', 39 | 'loss', 40 | 'time', 41 | 'log', 42 | 'error' 43 | ] 44 | 45 | def __init__(self, configuration): 46 | self.config = Configuration(configuration) 47 | 48 | self.searchConfig = configuration.get('search', {}) 49 | # jsonschema.validate(self.searchConfig, self.configurationSchema()) 50 | 51 | self.space = self.config.createHyperparameterSpace() 52 | 53 | self.threadExecutor = concurrent.futures.ThreadPoolExecutor() 54 | 55 | self.resultsAnalyzer = ResultsAnalyzer(configuration) 56 | 57 | self.results = [] 58 | self.resultFutures = [] 59 | 60 | self.best = None 61 | self.bestLoss = None 62 | 63 | self.thread = threading.Thread(target=lambda: self.optimizationThread(), daemon=True if configuration.get("ui", {}).get("enabled", True) else False) 64 | 65 | self.totalTrials = self.searchConfig.get("iterations") 66 | self.trialsSinceResultsUpload = None 67 | self.resultsExportFuture = None 68 | 69 | self.currentTrials = [] 70 | self.allWorkers = set(range(self.config.data['function'].get('parallel', 1))) 71 | self.occupiedWorkers = set() 72 | self.trialNumber = 0 73 | 74 | self.lastATPEParameters = None 75 | self.lastLockedParameters = None 76 | self.atpeParamDetails = None 77 | 78 | self.tpeOptimizer = TPEOptimizer() 79 | self.atpeOptimizer = ATPEOptimizer() 80 | self.abhOptimizer = AdaptiveBayesianHyperband(self.atpeOptimizer, self.searchConfig.get("min_budget", 1), self.searchConfig.get("max_budget", 100), self.searchConfig.get("eta", 3)) 81 | self.humanGuidedATPEOptimizer = HumanGuidedOptimizerWrapper(self.atpeOptimizer) 82 | self.randomSearchOptimizer = RandomSearchOptimizer() 83 | 84 | def __del__(self): 85 | if self.threadExecutor: 86 | self.threadExecutor.shutdown(wait=True) 87 | 88 | 89 | @classmethod 90 | def configurationSchema(self): 91 | """ This method returns the configuration schema for the optimization module. The schema 92 | is a standard JSON-schema object.""" 93 | return { 94 | "type": "object", 95 | "properties": { 96 | "method": {"type": "string", "enum": ['atpe', 'tpe', 'random']}, 97 | "iterations": {"type": "number"}, 98 | "budget": {"type": "number"} 99 | }, 100 | "required": ['method', 'iterations'] 101 | } 102 | 103 | def completed(self): 104 | return len(self.results) 105 | 106 | def sampleNext(self): 107 | if self.searchConfig['method'] == 'tpe': 108 | return self.tpeOptimizer.recommendNextParameters(self.config.data['hyperparameters'], self.results, self.currentTrials) 109 | elif self.searchConfig['method'] == 'random': 110 | return self.randomSearchOptimizer.recommendNextParameters(self.config.data['hyperparameters'], self.results, self.currentTrials) 111 | elif self.searchConfig['method'] == 'atpe': 112 | params = self.humanGuidedATPEOptimizer.recommendNextParameters(self.config.data['hyperparameters'], self.results, self.currentTrials) 113 | self.lastATPEParameters = self.atpeOptimizer.lastATPEParameters 114 | self.lastLockedParameters = self.atpeOptimizer.lastLockedParameters 115 | self.atpeParamDetails = self.atpeOptimizer.atpeParamDetails 116 | return params 117 | elif self.searchConfig['method'] == 'abh': 118 | params = self.abhOptimizer.recommendNextParameters(self.config.data['hyperparameters'], self.results, self.currentTrials) 119 | self.lastATPEParameters = self.atpeOptimizer.lastATPEParameters 120 | self.lastLockedParameters = self.atpeOptimizer.lastLockedParameters 121 | self.atpeParamDetails = self.atpeOptimizer.atpeParamDetails 122 | return params 123 | 124 | def computeCurrentBest(self): 125 | best = None 126 | bestLoss = None 127 | for result in self.results: 128 | if (best is None and result['loss'] is not None ) or (result['loss'] is not None and result['loss'] < bestLoss): 129 | best = result 130 | bestLoss = result['loss'] 131 | self.best = best 132 | self.bestLoss = bestLoss 133 | 134 | 135 | def startOptmizationJob(self): 136 | availableWorkers = list(sorted(self.allWorkers.difference(self.occupiedWorkers))) 137 | 138 | sampleWorker = availableWorkers[0] 139 | sample = None 140 | while sample is None: 141 | # Hedge against any exceptions in the atpe optimizer. 142 | try: 143 | sample = self.sampleNext() 144 | except Exception: 145 | traceback.print_exc() 146 | pass 147 | 148 | def testSample(params, trial, worker): 149 | currentTrial = { 150 | "start": datetime.datetime.now(), 151 | "trial": trial, 152 | "worker": worker, 153 | "params": copy.deepcopy(params) 154 | } 155 | self.currentTrials.append(currentTrial) 156 | start = datetime.datetime.now() 157 | execution = Execution(self.config.data['function'], parameters=params, worker_n=worker) 158 | modelResult = execution.run() 159 | end = datetime.datetime.now() 160 | 161 | result = Hyperparameter(self.config.data['hyperparameters']).convertToFlatValues(params) 162 | 163 | for key in params.keys(): 164 | if key.startswith("$"): 165 | result[key] = params[key] 166 | 167 | result['trial'] = trial 168 | self.resultsAnalyzer.makeDirs(os.path.join(self.resultsAnalyzer.directory, "logs")) 169 | 170 | if 'loss' in modelResult: 171 | result['loss'] = modelResult['loss'] 172 | elif 'accuracy' in modelResult: 173 | result['loss'] = modelResult['accuracy'] 174 | 175 | if 'status' in modelResult: 176 | result['status'] = modelResult['status'] 177 | else: 178 | result['status'] = 'ok' 179 | 180 | if 'log' in modelResult: 181 | fileName = os.path.join(self.resultsAnalyzer.directory, "logs", "trial_" + str(trial) + ".txt") 182 | with open(fileName, "wt") as file: 183 | file.write(modelResult['log']) 184 | result['log'] = fileName 185 | else: 186 | result['log'] = '' 187 | 188 | if 'error' in modelResult: 189 | result['error'] = modelResult['error'] 190 | else: 191 | result['error'] = '' 192 | 193 | if 'time' in modelResult: 194 | result['time'] = modelResult['time'] 195 | else: 196 | result['time'] = (end-start).total_seconds() 197 | 198 | self.currentTrials.remove(currentTrial) 199 | 200 | return result 201 | 202 | def onCompletion(worker, future): 203 | self.occupiedWorkers.remove(worker) 204 | 205 | self.results.append(future.result()) 206 | 207 | self.computeCurrentBest() 208 | 209 | if not self.config.data.get("ui", {}).get("enabled", True): 210 | pprint(future.result()) 211 | 212 | if self.resultsExportFuture is None or (self.resultsExportFuture.done() and len(self.results) > 5): 213 | self.resultsExportFuture = self.threadExecutor.submit( 214 | lambda: self.outputResultsWithBackup(self.config.data.get("results", {}).get("graphs", True))) 215 | else: 216 | self.outputResultsWithBackup(False) 217 | 218 | if 'hypermax_results' in self.config.data: 219 | if self.trialsSinceResultsUpload is None or self.trialsSinceResultsUpload >= self.config.data['hypermax_results']['upload_frequency']: 220 | self.saveResultsToHypermaxResultsRepository() 221 | self.trialsSinceResultsUpload = 1 222 | else: 223 | self.trialsSinceResultsUpload += 1 224 | 225 | self.occupiedWorkers.add(sampleWorker) 226 | sampleFuture = self.threadExecutor.submit(testSample, sample, self.trialNumber, sampleWorker) 227 | sampleFuture.add_done_callback(functools.partial(onCompletion, sampleWorker)) 228 | self.trialNumber += 1 229 | return sampleFuture 230 | 231 | def runOptimizationThread(self): 232 | self.thread.start() 233 | 234 | def outputResultsWithBackup(self, graphs, workers=1): 235 | self.resultsAnalyzer.outputResultsFolder(self, graphs, workers=workers) 236 | directory_head, directory_tail = os.path.split(self.resultsAnalyzer.directory) 237 | backup_directory = os.path.join(directory_head, ".backup_" + directory_tail + "~") 238 | self.resultsAnalyzer.outputResultsFolder(self, graphs, directory=backup_directory, workers=workers) 239 | 240 | def optimizationThread(self): 241 | # Make sure we output basic results if the process is killed for some reason. 242 | atexit.register(lambda: self.outputResultsWithBackup(False)) 243 | 244 | futures = [] 245 | for worker in range(min(len(self.allWorkers), self.totalTrials - len(self.results))): 246 | futures.append(self.startOptmizationJob()) 247 | time.sleep(1.0) 248 | 249 | while (len(self.results) + len(self.currentTrials)) < self.totalTrials: 250 | completedFuture = list(concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)[0])[0] 251 | futures.remove(completedFuture) 252 | time.sleep(0.05) 253 | futures.append(self.startOptmizationJob()) 254 | 255 | concurrent.futures.wait(futures) 256 | 257 | # We are completed, so we can allocate a full contingent of workers 258 | self.outputResultsWithBackup(True, workers=4) 259 | 260 | def exportGuidanceJSON(self, fileName): 261 | with open(fileName, 'wt') as file: 262 | json.dump(self.humanGuidedATPEOptimizer.guidanceOptions, file, indent=4, sort_keys=True) 263 | 264 | def importGuidanceJSON(self, fileName): 265 | with open(fileName, 'rt') as file: 266 | self.humanGuidedATPEOptimizer.guidanceOptions = json.load(file) 267 | 268 | def exportResultsCSV(self, fileName): 269 | allKeys = set() 270 | for result in self.results: 271 | for key in result: 272 | allKeys.add(key) 273 | 274 | fieldNames = self.resultInformationKeys + sorted(allKeys.difference(set(self.resultInformationKeys))) # Make sure we keep the order of the field names consistent when writing the csv 275 | with open(fileName, 'wt') as file: 276 | writer = csv.DictWriter(file, fieldnames=fieldNames if len(self.results) > 0 else [], dialect='unix') 277 | writer.writeheader() 278 | writer.writerows(self.results) 279 | 280 | def importResultsCSV(self, fileName): 281 | with open(fileName) as file: 282 | reader = csv.DictReader(file) 283 | results = list(reader) 284 | newResults = [] 285 | for result in results: 286 | newResult = {} 287 | for key,value in result.items(): 288 | if value is not None and value != "": 289 | try: 290 | if '.' in value or 'e' in value: 291 | newResult[key] = float(value) 292 | else: 293 | newResult[key] = int(value) 294 | except ValueError: 295 | newResult[key] = value 296 | elif key == 'loss': 297 | newResult[key] = None 298 | elif key == 'log': 299 | newResult[key] = '' 300 | else: 301 | newResult[key] = None 302 | newResults.append(newResult) 303 | self.results = newResults 304 | self.computeCurrentBest() 305 | self.trialNumber = len(self.results) 306 | 307 | def saveResultsToHypermaxResultsRepository(self): 308 | try: 309 | hypermaxResultsConfig = self.config.data['hypermax_results'] 310 | with tempfile.TemporaryDirectory() as directory: 311 | process = subprocess.run(['git', 'clone', 'git@github.com:electricbrainio/hypermax-results.git'], cwd=directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 312 | hypermaxResultsDirectory = os.path.join(directory, 'hypermax-results', hypermaxResultsConfig['name']) 313 | self.resultsAnalyzer.outputResultsFolder(self, detailed=False, directory=hypermaxResultsDirectory) 314 | with open(os.path.join(hypermaxResultsDirectory, "metadata.json"), 'wt') as file: 315 | json.dump(self.config.data['hypermax_results'], file, indent=4) 316 | process = subprocess.run(['git', 'add', hypermaxResultsDirectory], cwd=os.path.join(directory, 'hypermax-results')) 317 | process = subprocess.run(['git', 'commit', '-m', 'Hypermax automatically storing results for model ' + hypermaxResultsConfig['name'] + ' with ' + str(len(self.results)) + " trials."], cwd=os.path.join(directory, 'hypermax-results'), stdout=subprocess.PIPE, stderr=subprocess.PIPE) 318 | process = subprocess.run(['git push'], cwd=os.path.join(directory, 'hypermax-results'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) 319 | except Exception as e: 320 | print(e) 321 | -------------------------------------------------------------------------------- /hypermax/test_model.py: -------------------------------------------------------------------------------- 1 | import sklearn.datasets 2 | import sklearn.metrics 3 | import math 4 | from datetime import datetime 5 | 6 | 7 | def trainModel(params): 8 | inputs, outputs = sklearn.datasets.load_breast_cancer(True) 9 | 10 | startTime = datetime.now() 11 | 12 | targets = { 13 | "first_value": 383, 14 | "second_value": 862, 15 | "third_value": 4, 16 | "fourth_value": 670, 17 | "fifth_value": 106, 18 | "sixth_value": 42, 19 | "seventh_value": 215, 20 | "eighth_value": 921, 21 | } 22 | 23 | accuracy = 0 24 | total = 0 25 | for key in targets.keys(): 26 | accuracy += math.sqrt((params[key] - targets[key]) * (params[key] - targets[key])) 27 | total += targets[key] 28 | 29 | finishTime = datetime.now() 30 | 31 | result = {"accuracy": accuracy/total, "time": (finishTime - startTime).total_seconds()} 32 | print(result) 33 | return result 34 | 35 | 36 | -------------------------------------------------------------------------------- /hypermax/utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | def roundPrecision(number, precision=3): 6 | """ Rounds the given floating point number to a certain precision, for output.""" 7 | return float(('{:.' + str(precision) + 'E}').format(number)) 8 | -------------------------------------------------------------------------------- /research/atpe_research_2/benchmark.py: -------------------------------------------------------------------------------- 1 | from hypermax.optimizer import ATPEOptimizer 2 | from hypermax.optimizer import TPEOptimizer 3 | from hypermax.optimizer import RandomSearchOptimizer 4 | import hpolib.benchmarks.synthetic_functions as hpobench 5 | from hpolib.benchmarks.ml import svm_benchmark, logistic_regression 6 | import numpy as np 7 | from pprint import pprint 8 | 9 | atpeOptimizer = ATPEOptimizer() 10 | tpeOptimizer = TPEOptimizer() 11 | randomOptimizer = RandomSearchOptimizer() 12 | 13 | algorithms = { 14 | "ATPE": atpeOptimizer, 15 | "TPE": tpeOptimizer, 16 | "Random": randomOptimizer 17 | } 18 | 19 | # Run Scipy.minimize on artificial testfunctions 20 | 21 | h3 = hpobench.Hartmann3() 22 | h6 = hpobench.Hartmann6() 23 | b = hpobench.Branin() 24 | bo = hpobench.Bohachevsky() 25 | cb = hpobench.Camelback() 26 | fo = hpobench.Forrester() 27 | gp = hpobench.GoldsteinPrice() 28 | le = hpobench.Levy() 29 | rb = hpobench.Rosenbrock() 30 | 31 | logreg = svm_benchmark.SvmOnMnist() 32 | 33 | for f in [logreg]: 34 | info = f.get_meta_information() 35 | 36 | print("=" * 50) 37 | print(info['name']) 38 | 39 | space = { 40 | "type": "object", 41 | "properties": {} 42 | } 43 | 44 | for boundIndex, bound in enumerate(info['bounds']): 45 | space['properties'][str(boundIndex)] = { 46 | "type": "number", 47 | "scaling": "linear", 48 | "mode": "uniform", 49 | "min": bound[0], 50 | "max": bound[1] 51 | } 52 | 53 | increment = 0 54 | for name, optimizer in algorithms.items(): 55 | print("Optimizer", name) 56 | losses = [] 57 | for round in range(1): 58 | best = None 59 | history = [] 60 | for trial in range(100): 61 | params = optimizer.recommendNextParameters(space, history) 62 | evalParams = [params[str(boundIndex)] for boundIndex in range(len(space['properties']))] 63 | val = f(evalParams) 64 | val += increment 65 | print(val) 66 | params['loss'] = val 67 | params['status'] = 'ok' 68 | history.append(params) 69 | if best is None or val < best['loss']: 70 | best = params 71 | print(round, best['loss']) 72 | losses.append(best['loss']) 73 | averageLoss = np.mean(losses) 74 | averageLoss -= increment 75 | print("Average loss: ", averageLoss) 76 | -------------------------------------------------------------------------------- /research/atpe_research_2/process_results.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import json 3 | import traceback 4 | import csv 5 | import copy 6 | import sklearn.preprocessing 7 | import sklearn.neighbors 8 | import random 9 | import pickle 10 | import lightgbm 11 | import numpy 12 | from pprint import pprint 13 | from simulation import AlgorithmSimulation 14 | 15 | dirs = os.listdir('.') 16 | 17 | atpeParameterKeys = [ 18 | 'gamma', 19 | 'nEICandidates', 20 | 'resultFilteringAgeMultiplier', 21 | 'resultFilteringLossRankMultiplier', 22 | 'resultFilteringMode', 23 | 'resultFilteringRandomProbability', 24 | 'secondaryCorrelationExponent', 25 | 'secondaryCorrelationMultiplier', 26 | 'secondaryCutoff', 27 | # 'secondarySorting', 28 | 'secondaryFixedProbability', 29 | 'secondaryLockingMode', 30 | 'secondaryProbabilityMode', 31 | 'secondaryTopLockingPercentile', 32 | ] 33 | 34 | # We cascade the predictions - feeding prior predictions into the next one. There is a specific ordering to it, in order to help regularize the model based on the way our results were sampled and how ATPE is supposed to work at runtime 35 | predictorKeyCascadeOrdering = [ 36 | 'resultFilteringMode', 37 | 'secondaryProbabilityMode', 38 | 'secondaryLockingMode', 39 | 'resultFilteringAgeMultiplier', 40 | 'resultFilteringLossRankMultiplier', 41 | 'resultFilteringRandomProbability', 42 | 'secondaryTopLockingPercentile', 43 | 'secondaryCorrelationExponent', 44 | 'secondaryCorrelationMultiplier', 45 | 'secondaryFixedProbability', 46 | 'secondaryCutoff', 47 | 'gamma', 48 | 'nEICandidates' 49 | ] 50 | 51 | 52 | atpeParameterPredictionStandardDeviationRatio = { 53 | 'gamma': 0.9, 54 | 'nEICandidates': 0.8, 55 | 'resultFilteringAgeMultiplier': 1.0, 56 | 'resultFilteringLossRankMultiplier': 1.0, 57 | 'resultFilteringRandomProbability': 1.0, 58 | 'secondaryCorrelationExponent': 1.0, 59 | 'secondaryCorrelationMultiplier': 1.0, 60 | 'secondaryCutoff': 0.9, 61 | 'secondaryFixedProbability': 1.0, 62 | 'secondaryTopLockingPercentile': 1.0, 63 | 'resultFilteringMode': 2.2, 64 | 'secondaryLockingMode': 1.5, 65 | 'secondaryProbabilityMode': 2.0 66 | } 67 | 68 | # Custom params on per atpe param basis for training the lightgbm models 69 | customLightGBMParams = { 70 | 'secondaryCutoff': { 71 | 'feature_fraction': 0.7 # Extra bagging required on these ones for good generalization, since they are late in the cycle and can fit to other atpe parameter predictions 72 | }, 73 | 'gamma': { 74 | 'feature_fraction': 0.7 # Extra bagging required on these ones for good generalization, since they are late in the cycle and can fit to other atpe parameter predictions 75 | } 76 | } 77 | 78 | classPredictorKeys = [ 79 | 'resultFilteringMode', 80 | 'secondaryLockingMode', 81 | 'secondaryProbabilityMode' 82 | ] 83 | 84 | numPredictorClasses = { 85 | 'resultFilteringMode': 4, 86 | 'secondaryLockingMode': 2, 87 | 'secondaryProbabilityMode': 2 88 | } 89 | 90 | atpeParameterValues = { 91 | 'resultFilteringMode': ['age', 'loss_rank', 'none', 'random'], 92 | 'secondaryLockingMode': ['random', 'top'], 93 | 'secondaryProbabilityMode': ['correlation', 'fixed'] 94 | } 95 | 96 | nonFeatureKeys = [ 97 | 'algorithm', 98 | 'contributions_exponential', 99 | 'contributions_linear', 100 | 'contributions_logarithmic', 101 | 'contributions_peakvalley', 102 | 'contributions_random', 103 | 'fail_rate', 104 | 'history', 105 | 'interactions', 106 | 'interactions_index', 107 | 'interactions_linear', 108 | 'interactions_peakvalley', 109 | 'interactions_random', 110 | 'interactions_wave', 111 | 'loss', 112 | 'noise', 113 | 'run', 114 | 'time', 115 | 'trial', 116 | 'log10_trial', 117 | 'num_parameters' 118 | ] 119 | 120 | featureKeys = [ 121 | 'all_correlation_best_percentile25_ratio', 122 | 'all_correlation_best_percentile50_ratio', 123 | 'all_correlation_best_percentile75_ratio', 124 | 'all_correlation_kurtosis', 125 | 'all_correlation_percentile5_percentile25_ratio', 126 | 'all_correlation_skew', 127 | 'all_correlation_stddev_best_ratio', 128 | 'all_correlation_stddev_median_ratio', 129 | 'all_loss_best_percentile25_ratio', 130 | 'all_loss_best_percentile50_ratio', 131 | 'all_loss_best_percentile75_ratio', 132 | 'all_loss_kurtosis', 133 | 'all_loss_percentile5_percentile25_ratio', 134 | 'all_loss_skew', 135 | 'all_loss_stddev_best_ratio', 136 | 'all_loss_stddev_median_ratio', 137 | 'log10_cardinality', 138 | 'recent_10_correlation_best_percentile25_ratio', 139 | 'recent_10_correlation_best_percentile50_ratio', 140 | 'recent_10_correlation_best_percentile75_ratio', 141 | 'recent_10_correlation_kurtosis', 142 | 'recent_10_correlation_percentile5_percentile25_ratio', 143 | 'recent_10_correlation_skew', 144 | 'recent_10_correlation_stddev_best_ratio', 145 | 'recent_10_correlation_stddev_median_ratio', 146 | 'recent_10_loss_best_percentile25_ratio', 147 | 'recent_10_loss_best_percentile50_ratio', 148 | 'recent_10_loss_best_percentile75_ratio', 149 | 'recent_10_loss_kurtosis', 150 | 'recent_10_loss_percentile5_percentile25_ratio', 151 | 'recent_10_loss_skew', 152 | 'recent_10_loss_stddev_best_ratio', 153 | 'recent_10_loss_stddev_median_ratio', 154 | 'recent_15%_correlation_best_percentile25_ratio', 155 | 'recent_15%_correlation_best_percentile50_ratio', 156 | 'recent_15%_correlation_best_percentile75_ratio', 157 | 'recent_15%_correlation_kurtosis', 158 | 'recent_15%_correlation_percentile5_percentile25_ratio', 159 | 'recent_15%_correlation_skew', 160 | 'recent_15%_correlation_stddev_best_ratio', 161 | 'recent_15%_correlation_stddev_median_ratio', 162 | 'recent_15%_loss_best_percentile25_ratio', 163 | 'recent_15%_loss_best_percentile50_ratio', 164 | 'recent_15%_loss_best_percentile75_ratio', 165 | 'recent_15%_loss_kurtosis', 166 | 'recent_15%_loss_percentile5_percentile25_ratio', 167 | 'recent_15%_loss_skew', 168 | 'recent_15%_loss_stddev_best_ratio', 169 | 'recent_15%_loss_stddev_median_ratio', 170 | 'recent_25_correlation_best_percentile25_ratio', 171 | 'recent_25_correlation_best_percentile50_ratio', 172 | 'recent_25_correlation_best_percentile75_ratio', 173 | 'recent_25_correlation_kurtosis', 174 | 'recent_25_correlation_percentile5_percentile25_ratio', 175 | 'recent_25_correlation_skew', 176 | 'recent_25_correlation_stddev_best_ratio', 177 | 'recent_25_correlation_stddev_median_ratio', 178 | 'recent_25_loss_best_percentile25_ratio', 179 | 'recent_25_loss_best_percentile50_ratio', 180 | 'recent_25_loss_best_percentile75_ratio', 181 | 'recent_25_loss_kurtosis', 182 | 'recent_25_loss_percentile5_percentile25_ratio', 183 | 'recent_25_loss_skew', 184 | 'recent_25_loss_stddev_best_ratio', 185 | 'recent_25_loss_stddev_median_ratio', 186 | 'top_10%_correlation_best_percentile25_ratio', 187 | 'top_10%_correlation_best_percentile50_ratio', 188 | 'top_10%_correlation_best_percentile75_ratio', 189 | 'top_10%_correlation_kurtosis', 190 | 'top_10%_correlation_percentile5_percentile25_ratio', 191 | 'top_10%_correlation_skew', 192 | 'top_10%_correlation_stddev_best_ratio', 193 | 'top_10%_correlation_stddev_median_ratio', 194 | 'top_10%_loss_best_percentile25_ratio', 195 | 'top_10%_loss_best_percentile50_ratio', 196 | 'top_10%_loss_best_percentile75_ratio', 197 | 'top_10%_loss_kurtosis', 198 | 'top_10%_loss_percentile5_percentile25_ratio', 199 | 'top_10%_loss_skew', 200 | 'top_10%_loss_stddev_best_ratio', 201 | 'top_10%_loss_stddev_median_ratio', 202 | 'top_20%_correlation_best_percentile25_ratio', 203 | 'top_20%_correlation_best_percentile50_ratio', 204 | 'top_20%_correlation_best_percentile75_ratio', 205 | 'top_20%_correlation_kurtosis', 206 | 'top_20%_correlation_percentile5_percentile25_ratio', 207 | 'top_20%_correlation_skew', 208 | 'top_20%_correlation_stddev_best_ratio', 209 | 'top_20%_correlation_stddev_median_ratio', 210 | 'top_20%_loss_best_percentile25_ratio', 211 | 'top_20%_loss_best_percentile50_ratio', 212 | 'top_20%_loss_best_percentile75_ratio', 213 | 'top_20%_loss_kurtosis', 214 | 'top_20%_loss_percentile5_percentile25_ratio', 215 | 'top_20%_loss_skew', 216 | 'top_20%_loss_stddev_best_ratio', 217 | 'top_20%_loss_stddev_median_ratio', 218 | 'top_30%_correlation_best_percentile25_ratio', 219 | 'top_30%_correlation_best_percentile50_ratio', 220 | 'top_30%_correlation_best_percentile75_ratio', 221 | 'top_30%_correlation_kurtosis', 222 | 'top_30%_correlation_percentile5_percentile25_ratio', 223 | 'top_30%_correlation_skew', 224 | 'top_30%_correlation_stddev_best_ratio', 225 | 'top_30%_correlation_stddev_median_ratio', 226 | 'top_30%_loss_best_percentile25_ratio', 227 | 'top_30%_loss_best_percentile50_ratio', 228 | 'top_30%_loss_best_percentile75_ratio', 229 | 'top_30%_loss_kurtosis', 230 | 'top_30%_loss_percentile5_percentile25_ratio', 231 | 'top_30%_loss_skew', 232 | 'top_30%_loss_stddev_best_ratio', 233 | 'top_30%_loss_stddev_median_ratio' 234 | ] 235 | 236 | 237 | def roundPrecision(number, precision=4): 238 | """ Rounds the given floating point number to a certain precision, for output.""" 239 | return float(('{:.' + str(precision) + 'E}').format(number)) 240 | 241 | 242 | def preprocessResult(result): 243 | # Here we preprocess results as early as possible to create different predictor keys 244 | # result['secondarySorting'] = (1 if float(result['secondaryCutoff']) > 0 else (-1 if float(result['secondaryCutoff']) < 0 else 0)) 245 | # result['secondaryCutoff'] = abs(float(result['secondaryCutoff'])) 246 | return result 247 | 248 | 249 | def extractResultsFromLogs(): 250 | # This file merges together all the results 251 | dirs = sorted(os.listdir('.')) 252 | 253 | allFails = [] 254 | allResults = [] 255 | 256 | for dir in dirs: 257 | if 'run' not in dir: 258 | continue 259 | if os.path.exists(os.path.join(dir, 'hypermax', 'nohup.out')): 260 | with open(os.path.join(dir, 'hypermax', 'nohup.out'), 'rt') as file: 261 | text = file.read() 262 | 263 | fails = [] 264 | results = [] 265 | 266 | # Extract each of the results out of the log files 267 | start = text.find('{') 268 | while start != -1: 269 | end = text.find('}', start) 270 | result = text[start:end + 1] 271 | result = result.replace('\'', '"') 272 | result = result.replace('None', 'null') 273 | try: 274 | data = json.loads(result) 275 | data['run'] = dir 276 | results.append(preprocessResult(data)) 277 | except Exception: 278 | fails.append(result) 279 | # traceback.print_exc() 280 | start = text.find('{', end) 281 | 282 | allResults = allResults + results 283 | allFails = allFails + fails 284 | return allResults 285 | 286 | 287 | def extractResultsFromCSVs(): 288 | # This file merges together all the results 289 | dirs = sorted(os.listdir('.')) 290 | 291 | allResults = [] 292 | 293 | for dir in dirs: 294 | if 'run' not in dir: 295 | continue 296 | filePath = os.path.join(dir, 'hypermax', 'results.csv') 297 | if os.path.exists(filePath): 298 | with open(filePath, 'rt') as file: 299 | results = [dict(result) for result in csv.DictReader(file)] 300 | for result in results: 301 | result['run'] = dir 302 | allResults = allResults + [preprocessResult(result) for result in results] 303 | return allResults 304 | 305 | 306 | def getDeduplicatedResults(): 307 | logResults = extractResultsFromLogs() 308 | csvResults = extractResultsFromCSVs() 309 | 310 | logResultsByRun = {} 311 | csvResultsByRun = {} 312 | for result in logResults: 313 | # Make sure its a valid result. A very small number of results are missing all the requisite features (for an unknown reason, perhaps because they were killed part way into output 314 | valid = True 315 | for key in featureKeys: 316 | if key not in result: 317 | valid=False 318 | if not valid: 319 | continue 320 | if result['run'] in logResultsByRun: 321 | logResultsByRun[result['run']].append(result) 322 | else: 323 | logResultsByRun[result['run']] = [result] 324 | for result in csvResults: 325 | if result['run'] in csvResultsByRun: 326 | csvResultsByRun[result['run']].append(result) 327 | else: 328 | csvResultsByRun[result['run']] = [result] 329 | 330 | duplicates = [] 331 | additionals = [] 332 | csvResultsByRunCloned = copy.deepcopy(csvResultsByRun) 333 | for run in logResultsByRun.keys(): 334 | runDuplicates = [] 335 | runAdditionals = [] 336 | 337 | model = None 338 | if run in csvResultsByRun: 339 | # Build a nearest neighbor model to help us find the duplicates 340 | model = sklearn.neighbors.NearestNeighbors(n_neighbors=10) 341 | vectors = [] 342 | for result in csvResultsByRunCloned[run]: 343 | vectors.append([result[key] for key in featureKeys]) 344 | model.fit(vectors) 345 | 346 | def testCSVResult(result2): 347 | same = True 348 | for key in result.keys(): 349 | if result[key] is not None and result2[key] is not None: 350 | try: 351 | same = not (abs(float(result[key]) - float(result2[key])) > 0.01) 352 | except ValueError: 353 | same = (result[key] == result2[key]) 354 | 355 | if not same: 356 | break 357 | return same 358 | 359 | for result in logResultsByRun[run]: 360 | found = False 361 | if run in csvResultsByRunCloned: 362 | vector = [result[key] for key in featureKeys] 363 | nearest = model.kneighbors([vector], n_neighbors=10) # The k nearest neighbors is good enough to always find the duplicates. 364 | for neighbor in nearest[1][0]: 365 | found = testCSVResult(csvResultsByRun[run][neighbor]) 366 | if found: 367 | break 368 | if found: 369 | runDuplicates.append(result) 370 | if not found: 371 | runAdditionals.append(result) 372 | 373 | print(run, 'dupes', len(runDuplicates)) 374 | print(run, 'adds', len(runAdditionals)) 375 | duplicates = duplicates + runDuplicates 376 | additionals = additionals + runAdditionals 377 | 378 | allResults = csvResults + additionals 379 | return allResults 380 | 381 | 382 | def mergeResults(): 383 | allResults = getDeduplicatedResults() 384 | 385 | # with open('final/allResults.csv', 'rt') as file: 386 | # results = [dict(result) for result in csv.DictReader(file)] 387 | # for result in results: 388 | # del result['blank1'] 389 | # del result['blank2'] 390 | # allResults = results 391 | 392 | # Convert everything to floats where possible 393 | for result in allResults: 394 | for key in result.keys(): 395 | if result[key] is not None and result[key] != '': 396 | try: 397 | result[key] = float(result[key]) 398 | except ValueError: 399 | pass 400 | 401 | # Filter out all results with a loss of 1.0. These results are meaningless for our dataset, since they are caused our random-failure noise, and not by actually searching the dataset 402 | allResults = [result for result in allResults if float(result['loss']) < 1.0] 403 | 404 | for result in allResults: 405 | if 'algorithm' not in result: 406 | result['algorithm'] = '' 407 | if 'time' not in result: 408 | result['time'] = '' 409 | 410 | if not os.path.exists('final'): 411 | os.mkdir('final') 412 | if not os.path.exists('final/algos'): 413 | os.mkdir('final/algos') 414 | 415 | savedAlgorithms = {} 416 | 417 | for result in allResults: 418 | result['blank1'] = '' 419 | result['blank2'] = '' 420 | 421 | if result['algorithm'] and (result['run'] + result['algorithm']) not in savedAlgorithms: 422 | algoFileName = os.path.join(result['run'], 'hypermax', result['algorithm']) 423 | if os.path.exists(algoFileName): 424 | data = pickle.load(open(algoFileName, 'rb'))['algo'] 425 | 426 | scriptName = "algorithm-" + str(len(savedAlgorithms)) + ".py" 427 | dataName = "algorithm-" + str(len(savedAlgorithms)) + "-pickle.bin" 428 | with open(os.path.join('final', 'algos', scriptName), 'wt') as file: 429 | file.write(data.computeScript) 430 | with open(os.path.join('final', 'algos', dataName), 'wb') as file: 431 | pickle.dump(data, file) 432 | savedAlgorithms[result['run'] + result['algorithm']] = scriptName 433 | result['algorithm'] = scriptName 434 | else: 435 | result['algorithm'] = '' 436 | elif result['algorithm']: 437 | result['algorithm'] = savedAlgorithms[result['run'] + result['algorithm']] 438 | 439 | prettyResults = [] 440 | for result in allResults: 441 | prettyResult = {} 442 | for key in result.keys(): 443 | prettyResult[key] = result[key] 444 | if prettyResult[key]: 445 | try: 446 | number = roundPrecision(float(prettyResult[key])) 447 | prettyResult[key] = number 448 | except ValueError: 449 | pass 450 | except TypeError: 451 | pass 452 | prettyResults.append(prettyResult) 453 | 454 | 455 | with open("final/allResults.csv", "wt") as file: 456 | writer = csv.DictWriter(file, fieldnames=(atpeParameterKeys + ['blank1'] + featureKeys + ['blank2'] + nonFeatureKeys)) 457 | writer.writeheader() 458 | writer.writerows(prettyResults) 459 | 460 | # Compute the normalization region for each predictor 461 | scalers = {} 462 | for feature in featureKeys: 463 | vectors = [] 464 | for result in allResults: 465 | if float(result[feature]) != 0: 466 | vectors.append([result[feature]]) 467 | 468 | vectors = numpy.array(vectors) 469 | # Use percentiles to focus our scaler on the most common values, making it more immune to the weird outliers in our dataset 470 | percentile20 = numpy.percentile(vectors[:,0], q=20) 471 | percentile80 = numpy.percentile(vectors[:,0], q=80) 472 | vectors = [vector for vector in vectors if vector[0] > percentile20 and vector[0] < percentile80] 473 | 474 | scaler = sklearn.preprocessing.StandardScaler(vectors) 475 | scaler.fit(vectors) 476 | 477 | scalers[feature] = { 478 | 'scales': scaler.scale_.tolist(), 479 | 'means': scaler.mean_.tolist(), 480 | 'variances': scaler.var_.tolist() 481 | } 482 | with open("scaling_model.json", 'wt') as file: 483 | json.dump(scalers, file) 484 | 485 | for keyIndex, key in enumerate(featureKeys): 486 | featureScalingModel = sklearn.preprocessing.StandardScaler() 487 | featureScalingModel.scale_ = numpy.array(scalers[key]['scales']) 488 | featureScalingModel.mean_ = numpy.array(scalers[key]['means']) 489 | featureScalingModel.var_ = numpy.array(scalers[key]['variances']) 490 | 491 | vectors = [] 492 | for resultIndex, result in enumerate(allResults): 493 | vectors.append([float(result[key])]) 494 | 495 | vectors = featureScalingModel.transform(vectors) 496 | for resultIndex, result in enumerate(allResults): 497 | result[key] = roundPrecision(vectors[resultIndex][0]) 498 | 499 | 500 | # Output the normalized results 501 | prettyResults = [] 502 | for result in allResults: 503 | prettyResult = {} 504 | for key in result.keys(): 505 | prettyResult[key] = result[key] 506 | if prettyResult[key]: 507 | try: 508 | number = roundPrecision(float(prettyResult[key])) 509 | prettyResult[key] = number 510 | except ValueError: 511 | pass 512 | except TypeError: 513 | pass 514 | prettyResults.append(prettyResult) 515 | 516 | with open("final/allResults_normalized.csv", "wt") as file: 517 | writer = csv.DictWriter(file, fieldnames=atpeParameterKeys + ['blank1'] + featureKeys + ['blank2'] + nonFeatureKeys) 518 | writer.writeheader() 519 | writer.writerows(prettyResults) 520 | 521 | # Now put together the dataset for each predictor feature, so its more convenient to build models on them 522 | shuffled = list(allResults) 523 | random.shuffle(shuffled) 524 | cutoff = int(len(shuffled) * 0.2) 525 | testing = shuffled[:cutoff] 526 | training = shuffled[cutoff:] 527 | 528 | def writeDataset(key, filename, dataset): 529 | with open(filename, 'wt') as file: 530 | keyResults = [] 531 | for result in dataset: 532 | if result[key]: 533 | data = {} 534 | data[key] = result[key] 535 | for feature in featureKeys: 536 | data[feature] = result[feature] 537 | keyResults.append(data) 538 | 539 | writer = csv.DictWriter(file, fieldnames=[key] + featureKeys) 540 | writer.writeheader() 541 | writer.writerows(keyResults) 542 | 543 | for key in atpeParameterKeys: 544 | writeDataset(key, "final/" + key + "_testing.csv", testing) 545 | writeDataset(key, "final/" + key + "_training.csv", training) 546 | 547 | return training, testing 548 | 549 | 550 | 551 | def trainATPEModels(): 552 | with open('final/allResults_normalized.csv', 'rt') as file: 553 | allResults = [dict(result) for result in csv.DictReader(file)] 554 | 555 | # Now put together the dataset for each predictor feature, so its more convenient to build models on them 556 | shuffled = list(allResults) 557 | random.shuffle(shuffled) 558 | cutoff = int(len(shuffled) * 0.2) 559 | testing = shuffled[:cutoff] 560 | training = shuffled[cutoff:] 561 | 562 | 563 | def createDataset(key, dataset, atpeParamFeatures): 564 | vectors = [] 565 | targets = [] 566 | allTargets = set() 567 | for result in dataset: 568 | if result[key] not in allTargets: 569 | allTargets.add(result[key]) 570 | 571 | allTargets = sorted(list(allTargets)) 572 | 573 | names = copy.copy(featureKeys) 574 | for atpeParamFeature in atpeParamFeatures: 575 | if atpeParamFeature in atpeParameterValues: 576 | for value in atpeParameterValues[atpeParamFeature]: 577 | names.append(atpeParamFeature + "_" + value) 578 | else: 579 | names.append(atpeParamFeature) 580 | 581 | for result in dataset: 582 | if result[key]: 583 | vector = [] 584 | for feature in featureKeys: 585 | vector.append(float(result[feature])) 586 | for atpeParamFeature in atpeParamFeatures: 587 | if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '': 588 | if atpeParamFeature in atpeParameterValues: 589 | for value in atpeParameterValues[atpeParamFeature]: 590 | vector.append(1.0 if result[atpeParamFeature] == value else 0) 591 | else: 592 | vector.append(float(result[atpeParamFeature])) 593 | else: 594 | vector.append(-3) # We use -3 because none of our atpe parameters ever take this value 595 | vectors.append(vector) 596 | 597 | if key in classPredictorKeys: 598 | targets.append(allTargets.index(result[key])) 599 | else: 600 | targets.append(float(result[key])) 601 | return lightgbm.Dataset(numpy.array(vectors), label=numpy.array(targets), feature_name=names) 602 | 603 | 604 | allModels = [] 605 | for keyIndex, key in enumerate(predictorKeyCascadeOrdering): 606 | atpeParamFeatures = predictorKeyCascadeOrdering[:keyIndex] 607 | trainingData = createDataset(key, training, atpeParamFeatures=atpeParamFeatures) 608 | testingData = createDataset(key, testing, atpeParamFeatures=atpeParamFeatures) 609 | 610 | allFeatureNames = trainingData.feature_name 611 | 612 | params = { 613 | 'num_iterations': 100, 614 | 'is_provide_training_metric': True, 615 | "early_stopping_round": 5, 616 | "feature_fraction": 0.85, 617 | "learning_rate": 0.05 618 | } 619 | 620 | if key in customLightGBMParams: 621 | for param in customLightGBMParams[key]: 622 | params[param] = customLightGBMParams[key][param] 623 | 624 | if key in classPredictorKeys: 625 | params['num_class'] = numPredictorClasses[key] 626 | params['objective'] = 'multiclass' 627 | params['metric'] = 'multi_error' 628 | else: 629 | params['objective'] = 'regression_l2' 630 | params['metric'] = 'l2' 631 | 632 | model = lightgbm.train(params, trainingData, valid_sets=[testingData], verbose_eval=False) 633 | 634 | model.save_model("model-" + key + ".txt") 635 | 636 | if key not in classPredictorKeys: 637 | # Now we determine the "adjustment factor". Because these models are trained on an extremely noisy data set, 638 | # We have to eliminate the central tendency that results from training on it, so that the outputs of our model 639 | # Take up the full range of possible ATPE parameter values 640 | print(key) 641 | origStddev = numpy.std([float(result[key]) for result in training if result[key] is not None and result[key] != '']) 642 | origMean = numpy.mean([float(result[key]) for result in training if result[key] is not None and result[key] != '']) 643 | 644 | vectors = [] 645 | for result in training: 646 | vector = [] 647 | for feature in featureKeys: 648 | vector.append(result[feature]) 649 | for atpeParamFeature in atpeParamFeatures: 650 | if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '': 651 | if atpeParamFeature in atpeParameterValues: 652 | for value in atpeParameterValues[atpeParamFeature]: 653 | vector.append(1.0 if result[atpeParamFeature] == value else 0) 654 | else: 655 | vector.append(float(result[atpeParamFeature])) 656 | else: 657 | vector.append(-3) # We use -3 because none of our atpe parameters ever take this value, so it acts as our signal that this parameter is unfilled 658 | vectors.append(vector) 659 | 660 | trainingPredicted = model.predict(numpy.array(vectors)) 661 | 662 | predStddev = numpy.std(trainingPredicted) 663 | predMean = numpy.mean(trainingPredicted) 664 | 665 | origStddev = origStddev * atpeParameterPredictionStandardDeviationRatio[key] 666 | 667 | with open('model-' + key + "-configuration.json", 'wt') as file: 668 | json.dump({ 669 | "origStddev": origStddev, 670 | "origMean": origMean, 671 | "predStddev": predStddev, 672 | "predMean": predMean 673 | }, file) 674 | 675 | def renormalize(value): 676 | return (((value - predMean) / predStddev) * origStddev) + origMean 677 | 678 | totalL1Error = 0 679 | totalL1NormalizedError = 0 680 | totalCount = 0 681 | with open('predictions-' + key + '.csv', 'wt') as file: 682 | writer = csv.DictWriter(file, fieldnames=[key, key + "_predicted", key + "_predicted_normalized", key + "_l1_error", key + "_l1_normalized_error"]) 683 | writer.writeheader() 684 | for result in testing: 685 | if result[key] is not None and result[key] != '': 686 | vector = [] 687 | for feature in featureKeys: 688 | vector.append(float(result[feature])) 689 | for atpeParamFeature in atpeParamFeatures: 690 | if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '': 691 | if atpeParamFeature in atpeParameterValues: 692 | for value in atpeParameterValues[atpeParamFeature]: 693 | vector.append(1.0 if result[atpeParamFeature] == value else 0) 694 | else: 695 | vector.append(float(result[atpeParamFeature])) 696 | else: 697 | vector.append(-3) # We use -3 because none of our atpe parameters ever take this value 698 | value = roundPrecision(float(model.predict([vector])[0])) 699 | predicted = roundPrecision(float(renormalize(value))) 700 | l1_error = roundPrecision(float(abs(value - float(result[key])))) 701 | l1_normalized_error = roundPrecision(float(abs(predicted - float(result[key])))) 702 | totalL1Error += l1_error 703 | totalL1NormalizedError += l1_normalized_error 704 | totalCount += 1 705 | writer.writerow({ 706 | key: result[key], 707 | key + "_predicted": value, 708 | key + "_predicted_normalized": predicted, 709 | key + "_l1_error": l1_error, 710 | key + "_l1_normalized_error": l1_normalized_error, 711 | }) 712 | print("Average L1 Error:", totalL1Error/totalCount) 713 | print("Average Normalized L1 Error:", totalL1NormalizedError/totalCount) 714 | else: 715 | # Now we have to determine the adjustment factor for each of the class decisions. Because the dataset is very noisy, 716 | # our output probabilities tend to be very close to 0.25. Lets renormalize them so we can use them for stochastic 717 | # selection. 718 | values = atpeParameterValues[key] 719 | origMeans = {} 720 | origStddevs = {} 721 | for value in values: 722 | origMean = numpy.mean([(1.0 if result[key] == value else 0) for result in training if result[key] is not None and result[key] != '']) 723 | origStddev = numpy.std([(1.0 if result[key] == value else 0) for result in training if result[key] is not None and result[key] != '']) 724 | origMeans[value] = origMean 725 | origStddevs[value] = origStddev * atpeParameterPredictionStandardDeviationRatio[key] 726 | 727 | vectors = [] 728 | for result in training: 729 | vector = [] 730 | for feature in featureKeys: 731 | vector.append(result[feature]) 732 | for atpeParamFeature in atpeParamFeatures: 733 | if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '': 734 | if atpeParamFeature in atpeParameterValues: 735 | for value in atpeParameterValues[atpeParamFeature]: 736 | vector.append(1.0 if result[atpeParamFeature] == value else 0) 737 | else: 738 | vector.append(float(result[atpeParamFeature])) 739 | else: 740 | vector.append(-3) # We use -3 because none of our atpe parameters ever take this value, so it acts as our signal that this parameter is unfilled 741 | vectors.append(vector) 742 | 743 | trainingPredicted = model.predict(numpy.array(vectors)) 744 | trainingPredicted = numpy.array(trainingPredicted) 745 | 746 | predMeans = {} 747 | predStddevs = {} 748 | for valueIndex, value in enumerate(values): 749 | predMean = numpy.mean(trainingPredicted[:, valueIndex]) 750 | predStddev = numpy.std(trainingPredicted[:, valueIndex]) 751 | predMeans[value] = predMean 752 | predStddevs[value] = predStddev 753 | 754 | with open('model-' + key + "-configuration.json", 'wt') as file: 755 | json.dump({ 756 | "origStddevs": origStddevs, 757 | "origMeans": origMeans, 758 | "predStddevs": predStddevs, 759 | "predMeans": predMeans 760 | }, file) 761 | 762 | 763 | def renormalize(predictedClasses): 764 | predictedClasses = copy.copy(predictedClasses) 765 | for valueIndex, value in enumerate(values): 766 | predictedClasses[valueIndex] = (((predictedClasses[valueIndex] - predMeans[value]) / predStddevs[value]) * origStddevs[value]) + origMeans[value] 767 | predictedClasses[valueIndex] = max(0, min(1.0, predictedClasses[valueIndex])) 768 | return predictedClasses 769 | 770 | totalCorrect = 0 771 | totalCount = 0 772 | with open('predictions-' + key + '.csv', 'wt') as file: 773 | writer = csv.DictWriter(file, fieldnames=[key, key + "_predicted", key + "_correct"]) 774 | writer.writeheader() 775 | for resultIndex, result in enumerate(testing): 776 | vector = [] 777 | for feature in featureKeys: 778 | vector.append(float(result[feature])) 779 | for atpeParamFeature in atpeParamFeatures: 780 | if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '': 781 | if atpeParamFeature in atpeParameterValues: 782 | for value in atpeParameterValues[atpeParamFeature]: 783 | vector.append(1.0 if result[atpeParamFeature] == value else 0) 784 | else: 785 | vector.append(float(result[atpeParamFeature])) 786 | else: 787 | vector.append(-3) # We use -3 because none of our atpe parameters ever take this value, so it acts as our signal that this parameter is unfilled 788 | 789 | predictedClasses = model.predict([vector])[0] 790 | predicted = atpeParameterValues[key][int(numpy.argmax(predictedClasses))] 791 | correct = (predicted == result[key]) 792 | 793 | totalCount += 1 794 | if correct: 795 | totalCorrect += 1 796 | 797 | writer.writerow({ 798 | key: result[key], 799 | key + "_predicted": predicted, 800 | key + "_correct": correct 801 | }) 802 | print("Accuracy:", str(totalCorrect * 100 / totalCount), "%") 803 | 804 | importances = zip(allFeatureNames, model.feature_importance()) 805 | importances = sorted(importances, key=lambda r:-r[1]) 806 | print(key) 807 | for importance in importances: 808 | print(" ", importance[0], importance[1]) 809 | 810 | # mergeResults() 811 | trainATPEModels() -------------------------------------------------------------------------------- /research/searches/cifar_resnet/cifar10.json: -------------------------------------------------------------------------------- 1 | { 2 | "ui": { 3 | "enabled": false 4 | }, 5 | "hyperparameters": { 6 | "type": "object", 7 | "properties": { 8 | "learning_rate": { 9 | "type": "number", 10 | "mode": "uniform", 11 | "scaling": "linear", 12 | "min": 0.1, 13 | "max": 3.0 14 | }, 15 | "weight_decay": { 16 | "type": "number", 17 | "mode": "uniform", 18 | "scaling": "logarithmic", 19 | "min": 2e-5, 20 | "max": 2e-3 21 | }, 22 | "activation": { 23 | "type": "string", 24 | "enum": [ 25 | "relu", 26 | "elu", 27 | "selu", 28 | "rrelu" 29 | ] 30 | }, 31 | "layer1_size": { 32 | "type": "number", 33 | "mode": "uniform", 34 | "scaling": "logarithmic", 35 | "min": 32, 36 | "max": 96, 37 | "rounding": 16 38 | }, 39 | "layer1_layers": { 40 | "type": "number", 41 | "mode": "uniform", 42 | "scaling": "linear", 43 | "min": 1, 44 | "max": 3, 45 | "rounding": 1 46 | }, 47 | "layer2_size": { 48 | "type": "number", 49 | "mode": "uniform", 50 | "scaling": "logarithmic", 51 | "min": 64, 52 | "max": 192, 53 | "rounding": 32 54 | }, 55 | "layer2_layers": { 56 | "type": "number", 57 | "mode": "uniform", 58 | "scaling": "linear", 59 | "min": 1, 60 | "max": 3, 61 | "rounding": 1 62 | }, 63 | "layer3_size": { 64 | "type": "number", 65 | "mode": "uniform", 66 | "scaling": "logarithmic", 67 | "min": 128, 68 | "max": 384, 69 | "rounding": 64 70 | }, 71 | "layer3_layers": { 72 | "type": "number", 73 | "mode": "uniform", 74 | "scaling": "linear", 75 | "min": 1, 76 | "max": 3, 77 | "rounding": 1 78 | }, 79 | "layer4_size": { 80 | "type": "number", 81 | "mode": "uniform", 82 | "scaling": "logarithmic", 83 | "min": 256, 84 | "max": 768, 85 | "rounding": 128 86 | }, 87 | "layer4_layers": { 88 | "type": "number", 89 | "mode": "uniform", 90 | "scaling": "linear", 91 | "min": 1, 92 | "max": 3, 93 | "rounding": 1 94 | } 95 | } 96 | }, 97 | "function": { 98 | "type": "remote", 99 | "hosts": [ 100 | "localhost" 101 | ], 102 | "command": "source /home/bradley/imagenet-fast/cifar10/venv/bin/activate; cd /home/bradley/imagenet-fast/cifar10/; killall python -s9; killall python -s9; python test.py", 103 | "parallel": 1 104 | }, 105 | "search": { 106 | "method": "random", 107 | "iterations": 3000 108 | }, 109 | "results": { 110 | "graphs": false, 111 | "directory": "results" 112 | } 113 | } -------------------------------------------------------------------------------- /research/searches/cifar_resnet/cifar_test.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import json 3 | import os 4 | 5 | def test(params): 6 | with open('params.json', 'wt') as file: 7 | json.dump(params, file) 8 | 9 | args = [ 10 | 'python3', 11 | '-m', 12 | 'multiproc', 13 | 'train_cifar10.py ', 14 | '--cycle-len ', 15 | '40', 16 | ' -j', 17 | ' 16', 18 | ' -b', 19 | ' 128', 20 | ' --loss-scale', 21 | ' 512', 22 | ' --use-tta', 23 | ' 1', 24 | ' --fp16', 25 | ' --arch', 26 | ' resnet18', 27 | ' --wd', 28 | str(params['weight_decay']), 29 | ' --lr', 30 | str(params['learning_rate']), 31 | ' --use-clr', 32 | ' 50,12.5,0.95,0.85', 33 | ' data/' 34 | ] 35 | 36 | subprocess.run([' '.join(args)], cwd=os.getcwd(), shell=True) 37 | 38 | with open('tta_accuracy.txt') as file: 39 | lines = file.readlines() 40 | accuracies = [float(line) for line in lines] 41 | total = 0 42 | for accuracy in accuracies: 43 | total += accuracy 44 | averageAccuracy = total / len(accuracies) 45 | 46 | subprocess.run(['rm', 'tta_accuracy.txt']) 47 | 48 | return {"loss": 1.0 - averageAccuracy} 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | with open("README.md", "r") as fh: 5 | long_description = fh.read() 6 | 7 | 8 | setup( 9 | name='hypermax', 10 | version='0.5.1', 11 | description='Better, faster hyperparameter optimization by mixing the best of humans and machines.', 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url='https://github.com/electricbrainio/hypermax', 15 | author='Bradley Arsenault (Electric Brain)', 16 | author_email='brad@electricbrain.io', 17 | license='MIT', 18 | python_requires='>=3', 19 | packages=find_packages(), 20 | package_data={ 21 | 'hypermax': ['test', 'atpe_models/*.txt', 'atpe_models/*.json'], 22 | }, 23 | install_requires=[ 24 | 'hyperopt', 25 | 'scikit-learn', 26 | 'numpy', 27 | 'scipy', 28 | 'jsonschema', 29 | 'pyyaml', 30 | 'urwid', 31 | 'panwid==0.2.5', 32 | 'lightgbm', 33 | 'psutil', 34 | 'matplotlib', 35 | 'colors.py', 36 | 'pymongo' 37 | ], 38 | classifiers=[ 39 | 'Intended Audience :: Education', 40 | 'Intended Audience :: Science/Research', 41 | 'Intended Audience :: Developers', 42 | 'Environment :: Console', 43 | 'License :: OSI Approved :: BSD License', 44 | 'Operating System :: MacOS :: MacOS X', 45 | 'Operating System :: POSIX', 46 | 'Operating System :: Unix', 47 | 'Programming Language :: Python', 48 | 'Programming Language :: Python :: 3', 49 | 'Topic :: Scientific/Engineering', 50 | 'Topic :: Software Development', 51 | ], 52 | platforms=['Linux', 'OS-X'], 53 | zip_safe=False, 54 | entry_points={ 55 | 'console_scripts': [ 56 | 'hypermax = hypermax.cli:main', 57 | ] 58 | } 59 | ) 60 | -------------------------------------------------------------------------------- /test/general_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "hyperparameters": { 3 | "type": "object", 4 | "properties": { 5 | "first_value": { 6 | "type": "number", 7 | "mode": "uniform", 8 | "scaling": "logarithmic", 9 | "min": 1, 10 | "max": 100, 11 | "rounding": 1 12 | }, 13 | "second_value": { 14 | "type": "number", 15 | "mode": "uniform", 16 | "scaling": "logarithmic", 17 | "min": 1, 18 | "max": 1000, 19 | "rounding": 1 20 | }, 21 | "third_value": { 22 | "type": "number", 23 | "mode": "uniform", 24 | "scaling": "logarithmic", 25 | "min": 1, 26 | "max": 10000, 27 | "rounding": 1 28 | }, 29 | "fourth_value": { 30 | "type": "number", 31 | "mode": "uniform", 32 | "scaling": "linear", 33 | "min": 1, 34 | "max": 1000, 35 | "rounding": 1 36 | }, 37 | "fifth_value": { 38 | "type": "number", 39 | "mode": "uniform", 40 | "scaling": "logarithmic", 41 | "min": 1, 42 | "max": 1000, 43 | "rounding": 1 44 | }, 45 | "sixth_value": { 46 | "type": "number", 47 | "mode": "uniform", 48 | "scaling": "logarithmic", 49 | "min": 1, 50 | "max": 1000, 51 | "rounding": 1 52 | }, 53 | "seventh_value": { 54 | "type": "number", 55 | "mode": "uniform", 56 | "scaling": "logarithmic", 57 | "min": 1, 58 | "max": 1000, 59 | "rounding": 1 60 | }, 61 | "eighth_value": { 62 | "type": "number", 63 | "mode": "uniform", 64 | "scaling": "logarithmic", 65 | "min": 1, 66 | "max": 1000, 67 | "rounding": 1 68 | } 69 | } 70 | }, 71 | "function": { 72 | "type": "python_function", 73 | "module": "hypermax.test_model", 74 | "name": "trainModel", 75 | "parallel": 1 76 | }, 77 | "search": { 78 | "method": "atpe", 79 | "iterations": 1000 80 | }, 81 | "ui": { 82 | "enabled": false 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tutorials/fashion-MNIST/fashion_mnist.py: -------------------------------------------------------------------------------- 1 | 2 | import hypermax 3 | import tensorflow as tf 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | def measureAccuracy(params): 8 | # ### Downloasd the fashion_mnist data 9 | # Load the fashion-mnist pre-shuffled train data and test data 10 | (x_train,y_train),(x_test,y_test)=tf.keras.datasets.fashion_mnist.load_data() 11 | # Fashion labels 12 | fashion_labels=['T-shirt/Top', #index 0 13 | 'Trouser', #index 1 14 | 'Pullover', #index 2 15 | 'Dress', #index 3 16 | 'Coat', #index 4 17 | 'Sandal', #index 5 18 | 'Shirt', #index 6 19 | 'Sneaker', #index 7 20 | 'Bag', #index 8 21 | 'Ankle boot' #index 9 22 | ] 23 | 24 | 25 | # ### Data normalization 26 | x_train=x_train.astype('float32')/255 27 | x_test=x_test.astype('float32')/255 28 | 29 | 30 | # ### Split the data into train/validation/test data sets 31 | # Split train and validation datasets: 55000 for train and 5000 for validation 32 | (x_train,x_valid)=x_train[5000:],x_train[:5000] 33 | (y_train,y_valid)=y_train[5000:],y_train[:5000] 34 | 35 | # reshape input data from (28,28) to (28,28,1) 36 | w,h=28,28 37 | x_train=x_train.reshape(x_train.shape[0],w,h,1) 38 | x_valid=x_valid.reshape(x_valid.shape[0],w,h,1) 39 | x_test=x_test.reshape(x_test.shape[0],w,h,1) 40 | 41 | # one-hot encode the labels 42 | y_train=tf.keras.utils.to_categorical(y_train,10) 43 | y_valid=tf.keras.utils.to_categorical(y_valid,10) 44 | y_test=tf.keras.utils.to_categorical(y_test,10) 45 | 46 | 47 | # Create the model architecture 48 | # 2 convolutional nerual networks 49 | # 2 max pooling layers 50 | # 2 dropout layers 51 | # 1 fully connected layers 52 | model=tf.keras.Sequential() 53 | model.add(tf.keras.layers.Conv2D( 54 | filters=int(params['layer1_filters']), 55 | kernel_size=int(params['layer1_filter_size']), 56 | padding='same', 57 | activation=params['activation'], 58 | input_shape=(28,28,1))) 59 | model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) 60 | model.add(tf.keras.layers.Dropout(float(params['layer1_dropout']))) 61 | 62 | model.add(tf.keras.layers.Conv2D( 63 | filters=int(params['layer2_filters']), 64 | kernel_size=int(params['layer2_filter_size']), 65 | padding='same', 66 | activation=params['activation'])) 67 | model.add(tf.keras.layers.MaxPooling2D(pool_size=2)) 68 | model.add(tf.keras.layers.Dropout(float(params['layer2_dropout']))) 69 | 70 | model.add(tf.keras.layers.Flatten()) 71 | model.add(tf.keras.layers.Dense(int(params['fully_connected_size']), activation=params['activation'])) 72 | model.add(tf.keras.layers.Dropout(float(params['fully_connected_dropout']))) 73 | model.add(tf.keras.layers.Dense(10,activation='softmax')) 74 | 75 | #print('Model summary:') 76 | model.summary() 77 | 78 | 79 | # ### Compile the model 80 | model.compile(loss='categorical_crossentropy', 81 | optimizer='adam', 82 | metrics=['accuracy']) 83 | 84 | 85 | # ### Train the model & save the model 86 | 87 | # Moodel check pointer 88 | from keras.callbacks import ModelCheckpoint 89 | from keras.callbacks import EarlyStopping 90 | 91 | checkpointer=ModelCheckpoint(filepath='model.weigths.best.hdf5', 92 | verbose=1, 93 | save_best_only=True) 94 | early_stop=EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1) 95 | 96 | # Train the model 97 | model.fit(x_train, 98 | y_train, 99 | batch_size=64, 100 | epochs=50, 101 | validation_data=(x_valid,y_valid), 102 | callbacks=[checkpointer,early_stop]) 103 | 104 | # ### Load model with the best validation accuracy 105 | model.load_weights('model.weigths.best.hdf5') 106 | 107 | 108 | # ### Test Accuracy 109 | # Evaluate the mode on test set 110 | score=model.evaluate(x_test,y_test,verbose=0) 111 | y_hat=model.predict(x_test) 112 | 113 | return {"loss": (1.0 - score[1])} 114 | -------------------------------------------------------------------------------- /tutorials/fashion-MNIST/keras.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "hyperparameters": { 4 | "type": "object", 5 | "properties": { 6 | "activation": { 7 | "type": "string", 8 | "enum": ["relu", "elu"] 9 | }, 10 | "layer1_filters": { 11 | "type": "number", 12 | "min": 16, 13 | "max": 128, 14 | "scaling": "logarithmic", 15 | "rounding": 1 16 | }, 17 | "layer1_filter_size": { 18 | "type": "number", 19 | "min": 2, 20 | "max": 3, 21 | "rounding": 1 22 | }, 23 | "layer1_dropout": { 24 | "type": "number", 25 | "min": 0.1, 26 | "max": 0.5 27 | }, 28 | "layer2_filters": { 29 | "type": "number", 30 | "min": 16, 31 | "max": 128, 32 | "scaling": "logarithmic", 33 | "rounding": 1 34 | }, 35 | "layer2_filter_size": { 36 | "type": "number", 37 | "min": 2, 38 | "max": 3, 39 | "rounding": 1 40 | }, 41 | "layer2_dropout": { 42 | "type": "number", 43 | "min": 0.1, 44 | "max": 0.5 45 | }, 46 | "fully_connected_size": { 47 | "type": "number", 48 | "min": 64, 49 | "max": 384, 50 | "scaling": "logarithmic", 51 | "rounding": 1 52 | }, 53 | "fully_connected_dropout": { 54 | "type": "number", 55 | "min": 0.25, 56 | "max": 0.75 57 | } 58 | } 59 | }, 60 | "function": { 61 | "type": "python_function", 62 | "module": "fashion_mnist", 63 | "name": "measureAccuracy" 64 | }, 65 | "search": { 66 | "method": "atpe", 67 | "iterations": 100 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /tutorials/fashion-MNIST/readme.md: -------------------------------------------------------------------------------- 1 | This is the turorials to illustrate how to tune parameters on a keras models to predict fashion-MNIST labels. 2 | 3 | step 1: 4 | Create the python file to train the model: fashion_mnist.py 5 | 6 | step 2: 7 | create the json file to set up parameters space: keras.json 8 | 9 | step 3: 10 | run the following command in a shell: 'hypermax keras.json' and tune the hyper-parameters 11 | 12 | step 4: 13 | Obtain the optimal parameters and increase the model accuracy 14 | --------------------------------------------------------------------------------