├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── csaopt.py ├── csaopt ├── __init__.py ├── broker │ └── __init__.py ├── instancemanager │ ├── __init__.py │ ├── awstools.py │ ├── instancemanager.py │ └── local.py ├── internal │ ├── aws_setup_scripts │ │ ├── aws_broker_setup.sh │ │ └── aws_worker_setup.sh │ ├── aws_startup_scripts │ │ ├── aws-userdata-broker.sh │ │ ├── aws-userdata-worker.sh │ │ └── local-userdata-worker.sh │ └── csaopt-internal.conf ├── jobs │ ├── __init__.py │ └── jobmanager.py ├── model │ └── __init__.py ├── model_loader │ ├── __init__.py │ ├── model_loader.py │ └── model_validator.py └── utils │ └── __init__.py ├── docs └── source │ ├── conf.py │ └── index.rst ├── environment.dev.yml ├── environment.yml ├── examples ├── ackley │ ├── ackley.conf │ └── ackley_opt.py ├── bukin_6 │ ├── bukin_6_opt.py │ └── buking_6.conf ├── hp │ ├── hp_opt.py │ └── render.py ├── langermann │ ├── langermann_opt.conf │ └── langermann_opt.py ├── rastrigin │ ├── rastrigin.conf │ ├── rastrigin.docker.conf │ ├── rastrigin_docker_opt.py │ └── rastrigin_opt.py └── rosenbrock │ ├── drop_wave.conf │ └── drop_wave.py ├── setup.cfg ├── setup.py ├── sonar-project.properties └── tests ├── context.py ├── e2e └── csaopt_e2e.conf ├── test_aws.py ├── test_cli.py ├── test_e2e.py ├── test_jobmanager.py ├── test_model_loader.py ├── test_model_validator.py ├── test_runner.py └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | coverage.xml 2 | .pytest_cache/ 3 | .DS_Store 4 | .idea/ 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | *.obj 11 | 12 | # Precompiled Headers 13 | *.gch 14 | *.pch 15 | 16 | # Compiled Dynamic libraries 17 | *.so 18 | *.dylib 19 | *.dll 20 | 21 | # Fortran module files 22 | *.mod 23 | 24 | # Compiled Static libraries 25 | *.lai 26 | *.la 27 | *.a 28 | *.lib 29 | 30 | # Executables 31 | *.exe 32 | *.out 33 | *.app 34 | 35 | # Eclipse Core 36 | .project 37 | 38 | # External tool builders 39 | .externalToolBuilders/ 40 | 41 | # settings 42 | .settings/ 43 | 44 | # Locally stored "Eclipse launch configurations" 45 | *.launch 46 | 47 | # CDT-specific 48 | .cproject 49 | 50 | # JDT-specific (Eclipse Java Development Tools) 51 | .classpath 52 | 53 | # PDT-specific 54 | .buildpath 55 | 56 | # sbteclipse plugin 57 | .target 58 | 59 | # Generated docs 60 | doc/ 61 | 62 | # Build dir 63 | build/ 64 | cmake-build-debug/ 65 | 66 | # vs code 67 | .vscode/ 68 | 69 | # mypy 70 | .mypy_cache/ 71 | 72 | # pycache 73 | __pycache__ 74 | 75 | # caches 76 | .cache/ 77 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: generic 2 | 3 | services: 4 | - docker 5 | 6 | matrix: 7 | include: 8 | - os: linux 9 | dist: trusty 10 | sudo: true 11 | - os: osx 12 | 13 | addons: 14 | sonarcloud: 15 | organization: "d53dave-github" 16 | 17 | env: 18 | global: 19 | - BOTO_CONFIG=/dev/null # Fix "No module named google_compute_engine" on travis-ci 20 | - CC_TEST_REPORTER_ID=57a2d5465c28a2de7f343c5c859259b2d5b473a6fee413edd1f4503dc911fe38 21 | 22 | before_script: 23 | - | 24 | if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then 25 | curl -L https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh > miniconda.sh; 26 | curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-darwin-amd64 > cc-test-reporter; 27 | else 28 | curl -L https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh; 29 | curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > cc-test-reporter; 30 | fi 31 | - bash miniconda.sh -b -p $HOME/miniconda 32 | - export PATH="$HOME/miniconda/bin:$PATH" 33 | - chmod +x ./cc-test-reporter 34 | - hash -r 35 | - conda config --set always_yes yes --set changeps1 no 36 | - conda update -q conda 37 | # Useful for debugging any issues with conda 38 | - conda info -a 39 | - conda env create --name csaopt-travisci -f environment.dev.yml 40 | - source activate csaopt-travisci 41 | - ./cc-test-reporter before-build 42 | 43 | script: py.test --cov=csaopt -v tests 44 | 45 | after_script: 46 | - coverage xml 47 | - | 48 | if [[ "$TRAVIS_PULL_REQUEST" == "false" && $TRAVIS_OS_NAME == "linux" ]]; then ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT; 49 | fi 50 | 51 | after_success: 52 | - | 53 | if [[ $TRAVIS_OS_NAME == "linux" ]]; then 54 | coveralls; sonar-scanner; 55 | fi 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 David Sere 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSAOpt - Cloud based, GPU accelerated Simulated Annealing Framework 2 | 3 | [![Build Status](https://travis-ci.com/d53dave/csaopt.svg?branch=master)](https://travis-ci.com/d53dave/csaopt) 4 | [![Coverage Status](https://coveralls.io/repos/github/d53dave/csaopt/badge.svg?branch=master)](https://coveralls.io/github/d53dave/csaopt?branch=master) 5 | [![Maintainability](https://api.codeclimate.com/v1/badges/1f269d5aed4a650403ec/maintainability)](https://codeclimate.com/github/d53dave/csaopt/maintainability) 6 | [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fd53dave%2Fcsaopt.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fd53dave%2Fcsaopt?ref=badge_shield) [![Join Slack](https://img.shields.io/badge/style-join-green.svg?longCache=true&style=flat&label=slack&logo=slack)](https://join.slack.com/t/csaopt/shared_invite/enQtMzY2ODUyOTEwNDU1LTM3NDIxN2FiZGUzMjQ2YzdhZWIxY2JhZGVkODdlM2RhZWVhMmNjMjEwYTY3YzE2YTc4YmFlYTYyYjRkYzRmNGE) 7 | [![SonarCloud Quality](https://sonarcloud.io/api/project_badges/measure?project=d53dave_csaopt&metric=alert_status)](https://sonarcloud.io/dashboard?id=d53dave_csaopt) 8 | 9 | CSAOpt is a framework that enables you to run cloud based¹ GPU 10 | accelerated optimizations based on a massively parallel flavor of 11 | Simulated annealing. You simply provide a Python implementation of 12 | the functions used by Simulated Annealing and CSAOpt takes care of 13 | managing instances, deploying code, distributing work and collecting 14 | results. 15 | 16 | ## Usage 17 | 18 | 1. Configure your optimization run and cloud provider 19 | 2. Model your domain 20 | 3. Run CSAOpt for fun and profit 21 | 22 | ### DISCLAIMER 23 | 24 | If you are using CSAOpt together with AWS/EC2, this will incur costs on your 25 | registered payment method (i.e. your credit card). CSAOpt will try to make sure 26 | that instances are only run during normal operation, are properly shutdown when 27 | the software terminates and will print a **big, fat warning** if it cannot 28 | verify that everything was terminated upon exit. 29 | 30 | I will not be responsible for any of the costs generated by using CSAOpt. This 31 | software is provided **as is** and should be handled with the appropriate care. 32 | 33 | **Always** [make sure that no instances are left running after CSAOpt 34 | terminates](https://console.aws.amazon.com/ec2/v2/). 35 | 36 | ## Configuration 37 | 38 | The configuration is based on 39 | [HOCON](https://github.com/typesafehub/config/blob/master/HOCON.md) 40 | (typesafe/lightbend) and integrated with the excellent 41 | [pyhocon](https://github.com/chimpler/pyhocon). The main configuration (i.e. 42 | configuration for running the software) is located in `conf/csaopt.conf`. In 43 | addition, there is an internal configuration file under 44 | `app/internal/csaopt-internal.conf`, which does not need to be modified under 45 | normal circumstances. A detailed description and listing of supported 46 | configuration will follow here. 47 | 48 | ## Modelling 49 | 50 | TODO 51 | 52 | ## Requirements 53 | 54 | This software will probably not run on Windows out of the box, but it might run 55 | in the [WSL](https://blogs.msdn.microsoft.com/wsl). It runs fine on MacOS and on 56 | recent Linux distributions. The deployed AWS instances are based on Ubuntu 57 | Server 16.04 LTS. 58 | 59 | Required software: 60 | 61 | - A Conda3 distribution (i.e. 62 | [Anaconda](https://docs.anaconda.com/anaconda/install/) or 63 | [Miniconda](https://conda.io/miniconda.html)) 64 | - [AWS](https://aws.amazon.com/) credentials or a local GPU capable of running 65 | [CUDA](https://www.geforce.com/hardware/technology/cuda) computations. 66 | 67 | ## Development 68 | 69 | Formerly based on [pipenv](https://github.com/pypa/pipenv) (which is awesome), 70 | CSAOpt now uses Conda for package management. 71 | Currently, there is no separate development environment, although this would 72 | certainly be possible. So go ahead and 73 | 74 | ```shell 75 | git clone https://github.com/d53dave/csaopt && cd csaopt 76 | conda env create 77 | source activate csaopt 78 | ``` 79 | 80 | for development. 81 | 82 | Development of CSAOpt happened in VSCode, and it's required to set 83 | 84 | - `python.venvPath` to the venv path (see output of `conda env list`) 85 | - `python.pythonPath` to `/bin/python` 86 | 87 | for it to pick up the right interpreter and installed packages. 88 | 89 | ### Running the Test Suite 90 | 91 | From inside the `virtualenv` (i.e. after executing `source active csaopt`), the 92 | suite can be executed using 93 | 94 | ```bash 95 | pytest 96 | #or 97 | py.test 98 | ``` 99 | 100 | ### End-to-End Test 101 | 102 | The end-to-end test suite is disabled by default, since it requires a complete 103 | setup, i.e. including AWS credentials. Therefore, running the test will incur 104 | some costs. The costs should be relatively low, given that the provided test 105 | optimization should only run for a few seconds. AWS, however, charges a whole 106 | hour even if the instances are terminated after a few seconds. 107 | 108 | The AWS credentials for the end-to-end tests need to be provided as environment 109 | variables, as documented in [awstools.py](app/aws/awstools.py). 110 | 111 | The test suite is activated by setting a environment variable called 112 | `CSAOPT_RUN_E2E`. The contents are irrelevant, it should evaluate to a 113 | [truthy](https://docs.python.org/3/library/stdtypes.html#truth-value-testing) 114 | value. 115 | 116 | After setting the appropriate environment variables, the whole suite can be 117 | executed and will include the end-to-end tests (see above). 118 | 119 | If you want to run 120 | just the end-to-end tests, you can use the following command from the 121 | `virtualenv`: 122 | 123 | ```bash 124 | py.test -s test_e2e.py::test_end2end 125 | ``` 126 | 127 | ## Cloud Computing Platforms 128 | 129 | At this moment, only Amazon Web Services/EC2² is supported but it should be easy 130 | to add support for other providers. In a nutshell, any provider that can be (1) 131 | programmatically provisioned via public API, (2) provides CUDA capable hardware 132 | and (3) can run the nvidia-docker tool _should_ be able to support CSAOpt, since 133 | deployment and most configuration is done via Docker. On AWS/EC2, CSAOpt uses an 134 | AMI built by me, which has docker and nvidia-docker installed, as well as pulled 135 | images. Without those, a complete installation would take several minutes for 136 | each optimization run, and waiting makes people unhappy. 137 | 138 | The script used to setup the CSAOpt AMI on AWS can be found in the 139 | [setup-dockerhost.sh](app/docker/setup-dockerhost.sh) file. It can handle 140 | Debian/Ubuntu and Fedora/CentOS based distributions. 141 | 142 | Obvious candidates would be [Google Cloud Platform](https://cloud.google.com) 143 | as well as [Microsoft Azure](https://azure.microsoft.com/en-us/), both of which 144 | fulfill the 3 requirements stated above. Additionally, both providers 145 | conveniently offer client libraries on PyPI. In case somebody wanted to add 146 | support for another provider, the usual procedure would be: 147 | 148 | 1. Add client package from a repository (e.g. 149 | [google cloud from PyPI](https://pypi.python.org/pypi/google-cloud), 150 | [azure-mgmt-compute from conda-forge](https://anaconda.org/conda-forge/azure-mgmt-compute)) 151 | 2. Implement the [instancemanager interface](app/instancemanager/instancemanager.py), 152 | see [awstools.py](app/aws/awstools.py) 153 | 3. Add `elif` branch to create the instance manager based on the config (TODO: where is this?) 154 | 4. Profit 155 | 156 | ## FAQs 157 | 158 | > Why is this project not using docker to provision the message queue and workers? 159 | 160 | It is! It also has the benefit of not requiring docker to be installed on any 161 | of your local machines since this is sometimes out of the users control. 162 | ~~Things are a little bit awkward at the moment, since NVidia uses their 163 | [own tool](https://github.com/NVIDIA/nvidia-docker) called Docker Engine Utility 164 | for NVIDIA GPUs, which is not yet compatible with 165 | [ECS](https://aws.amazon.com/ecs/) or other container services. This means that 166 | we still rely on pre-built AMIs (or however images are called on other cloud 167 | providers), but when nvidia-docker becomes ready to be used with ECS, this will 168 | rock.~~ 169 | 170 | ~~That is a good question and it seems a very good use-case for docker, 171 | especially since NVidia published an official 172 | [Docker Engine Utility for NVIDIA GPUs](https://github.com/NVIDIA/nvidia-docker). 173 | I am considering throwing out ansible (which is not meant to be used the way I 174 | use it).~~ 175 | 176 | ## Change History 177 | 178 | > 0.2.0 Change to Numba for CUDA computations 179 | 180 | With v0.2.0 the remaining `C++` code (i.e. directly interfacing with CUDA) 181 | will be thrown out in favor of [Numba](https://github.com/numba/numba). 182 | This will imply a switch from `pipenv` to `conda`, which is unfortunate, because 183 | pipenv is really nice, IMHO. However, I don't want to compile llvmlite for the 184 | deployments and I certainly don't want to have separate environment managers for 185 | the different parts of this software. 186 | 187 | The move to numba will also allow the project to move much closer to the initial 188 | goal of using a single programming language for all components of CSAOpt, and 189 | Python is a much nicer language than C++11, in my opinion. 190 | 191 | > 0.1.0 Change to Python 192 | 193 | With v0.1.0, most C++ code was abandoned. It became clear 194 | that writing and maintaining this piece of software in C++ 195 | was never a good idea. Or, in other words, after chasing 196 | obscure bugs where they should not be, I gave up. The initial 197 | thought was **not to split the codebase into multiple languages** for 198 | the sake of the current and future developers and maintainers. 199 | This split will gradually be introduced, resulting, ideally, in 200 | a structure where all glue code, i.e. config parsing, command line 201 | interface, user interaction, networking and reporting will be 202 | done in Python. The core concept of a user writing a small 203 | model as C++ code which will be executed in a simulated annealing 204 | pipeline on graphics processors will remain. 205 | 206 | > 0.0.x C++ prototypes 207 | 208 | Versions 0.0.x were prototypes written in C++, 209 | including the proof of concept which was demo-ed to 210 | my thesis supervisor and colleagues. These versions were 211 | undocumented and development was sporadic. Most changes 212 | did not make it into version control and features 213 | were added and abandoned at will. The last version of the 214 | C++ prototype in this repository was commit [6c922f](https://github.com/d53dave/csaopt/tree/6c922f933eceb8992e9acae36f1767336c56209f). 215 | 216 | ## Notes 217 | 218 | ¹ Only AWS EC2 and local GPUs are currently supported. Pull requests are welcome. 219 | 220 | ² It turns out ECS does not add much value here and is therefore not used. 221 | ~~There are plans to move to [ECS](https://aws.amazon.com/ecs/) once ECS 222 | supports nvidia-docker **or** docker allows more capabilities in plugins so that 223 | nvidia-docker can provide a proper docker plugin.~~ 224 | 225 | ## License 226 | 227 | [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fd53dave%2Fcsaopt.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fd53dave%2Fcsaopt?ref=badge_large) 228 | -------------------------------------------------------------------------------- /csaopt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import click 5 | from csaopt.utils import internet_connectivity_available, get_configs 6 | from csaopt import Runner 7 | from csaopt import __appname__ as csaopt_name 8 | from csaopt import __version__ as csaopt_version 9 | 10 | CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) 11 | 12 | 13 | def eprint(*args, **kwargs): 14 | print(*args, file=sys.stderr, **kwargs) 15 | 16 | 17 | @click.group() 18 | @click.version_option(version=csaopt_version, prog_name=csaopt_name) 19 | @click.pass_context 20 | def cli(ctx): 21 | try: 22 | internal_conf = get_configs('csaopt/internal/csaopt-internal.conf') 23 | ctx.obj['internal_conf'] = internal_conf 24 | except Exception as e: 25 | eprint('Could not load configs', e) 26 | sys.exit(1) 27 | 28 | 29 | @cli.command(name='run', help='Run the optimization based on the provided config and model.') 30 | @click.option( 31 | '--model', 32 | type=click.Path(exists=True, resolve_path=True), 33 | multiple=True, 34 | help='Folder containing the model that should be used for optimization.') 35 | @click.option( 36 | '--conf', 37 | type=click.Path(exists=True, resolve_path=True), 38 | multiple=True, 39 | help='Path to the CSAOpt config. If not provided, \'conf/csaopt.conf\' will be used') 40 | @click.pass_context 41 | def run_opt(ctx, model, conf): 42 | runner = Runner(list(model), list(conf), ctx.obj) 43 | runner.run() 44 | runner.console_printer.print_magenta('Bye.\n') 45 | 46 | 47 | @cli.command(name='check', help='Check and validate the provided configuration and model.') 48 | @click.option( 49 | '--model', 50 | type=click.Path(exists=True, resolve_path=True), 51 | help='Folder containing the model that should be used for optimization.') 52 | @click.option( 53 | '--conf', 54 | default='conf/csaopt.conf', 55 | type=click.Path(exists=True, resolve_path=True), 56 | help='Path to the CSAOpt config. If not provided, \'conf/csaopt.conf\' will be used') 57 | @click.option( 58 | '--with-aws', 59 | is_flag=True, 60 | default=False, 61 | help='If enabled, the check will also spin up EC2 instances to verify configuration and communication.') 62 | def run_check(**kwargs): 63 | print('Check called') 64 | 65 | 66 | @cli.command(name='cleanup', help='Clean up generated files and terminate any running EC2 instances') 67 | def cleanup(): 68 | pass 69 | 70 | 71 | if __name__ == '__main__': 72 | cli(obj={}) 73 | -------------------------------------------------------------------------------- /csaopt/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' 2 | __appname__ = 'CSAOpt: Cloud based, GPU accelerated Simulated Annealing' 3 | 4 | import asyncio 5 | import logging 6 | import shutil 7 | import sys 8 | import subprocess 9 | import unicodedata 10 | import re 11 | import os 12 | import time 13 | import pathlib 14 | import better_exceptions 15 | 16 | from pyhocon import ConfigTree 17 | from apscheduler.schedulers.asyncio import AsyncIOScheduler 18 | from apscheduler.job import Job as ApJob 19 | from asyncio.selector_events import BaseSelectorEventLoop 20 | from typing import Dict, Optional, List, Any 21 | from sty import fg, ef, rs, Rule, Render 22 | from datetime import datetime, timedelta 23 | from async_timeout import timeout 24 | 25 | from .model_loader.model_loader import ModelLoader 26 | from .model import Model 27 | from .utils import get_configs, internet_connectivity_available 28 | from .instancemanager.instancemanager import InstanceManager 29 | from .instancemanager.awstools import AWSTools 30 | from .jobs.jobmanager import Job, JobManager, ExecutionType 31 | from .broker import Broker 32 | 33 | better_exceptions.hook() 34 | 35 | # logging.basicConfig(level='INFO') 36 | log = logging.getLogger('csaopt.Runner') 37 | fg.set_rule('csaopt_magenta', Rule(Render.rgb_fg, 199, 51, 147)) 38 | 39 | # log.setLevel(logging.DEBUG) 40 | 41 | logging.getLogger('botocore').setLevel(logging.WARN) 42 | logging.getLogger('apscheduler.executors.default').setLevel(logging.WARN) 43 | 44 | 45 | class ConsolePrinter: 46 | 47 | status_done = 'Done.' 48 | status_failed = 'Failed.' 49 | __ANSI_escape_re = re.compile(r'[\x1B|\x1b]\[[0-?]*[ -/]*[@-~]') 50 | 51 | def __init__(self, internal_config, log_level='info') -> None: 52 | self.spinner_idx = 0 53 | self.termsize = shutil.get_terminal_size((80, 20)) 54 | self.last_line: str = '' 55 | self.has_scheduled_print: bool = False 56 | self.print_job: Optional[ApJob] = None 57 | self.scheduler: AsyncIOScheduler = AsyncIOScheduler() 58 | self.scheduler.start() 59 | self.spinner: List[str] = ['◣', '◤', '◥', '◢'] 60 | self.log_level = log_level 61 | 62 | max_columns = internal_config.get('console.width_max') 63 | self.columns = internal_config.get('console.width_default') 64 | 65 | try: 66 | _, columns = subprocess.check_output(['stty', 'size']).split() 67 | if int(columns) < max_columns: 68 | self.columns = columns 69 | except Exception: 70 | log.exception('Could not get stty size, it seems there is no console available.') 71 | 72 | @staticmethod 73 | def _format_to_width(width: int, txt: str, status: str) -> str: 74 | txt_len = len(ConsolePrinter._remove_special_seqs(txt)) 75 | status_len = len(ConsolePrinter._remove_special_seqs(status)) 76 | if (txt_len + status_len) > width: 77 | return txt[0:(width - status_len - 4)] + '... ' + status 78 | 79 | return txt + ''.join([' '] * (width - status_len - txt_len)) + status 80 | 81 | @staticmethod 82 | def _remove_special_seqs(s): 83 | no_ansi = ConsolePrinter.__ANSI_escape_re.sub('', s) 84 | no_c = ''.join(c for c in no_ansi if unicodedata.category(c)[0] != 'C') 85 | return no_c 86 | 87 | def _advance_spinner(self): 88 | self.spinner_idx = (self.spinner_idx + 1) % len(self.spinner) 89 | 90 | def print(self, txt: str) -> None: 91 | self._advance_spinner() 92 | sys.stdout.write(txt + rs.all) 93 | sys.stdout.flush() 94 | self.last_line = txt 95 | 96 | def println(self, txt: str) -> None: 97 | self.print(txt + rs.all + '\n') 98 | 99 | def print_magenta(self, txt: str) -> None: 100 | self.print(fg.csaopt_magenta + txt) 101 | 102 | def print_with_spinner(self, txt: str) -> None: 103 | self.scheduler.remove_all_jobs() 104 | self.last_line = txt 105 | 106 | self.print_job = self.scheduler.add_job( 107 | lambda: self.print('\r' + ConsolePrinter._format_to_width( 108 | self.columns, 109 | txt, 110 | fg.csaopt_magenta + self.spinner[self.spinner_idx] + ' ')), 111 | 'interval', 112 | seconds=0.42, 113 | id='print_job', 114 | max_instances=1, 115 | next_run_time=datetime.now() + timedelta(milliseconds=50)) # run in 50ms, then periodically 116 | 117 | def spinner_success(self) -> None: 118 | # If log level < warn, just re-print with 'Done.' 119 | # Truncate to console width to fit message 120 | if self.log_level == 'info': 121 | if self.print_job is not None: 122 | self.print_job.pause() 123 | self.print_job.remove() 124 | self.scheduler.remove_all_jobs() 125 | self.println( 126 | ConsolePrinter._format_to_width(self.columns, 127 | self.last_line[0:self.columns - len(ConsolePrinter.status_done)], 128 | fg.green + ConsolePrinter.status_done)) 129 | 130 | def spinner_failure(self) -> None: 131 | # If log level < warn, just re-print with 'Failed.' 132 | # Truncate to console width to fit message 133 | if self.log_level == 'info': 134 | self.scheduler.remove_all_jobs() 135 | self.println( 136 | ConsolePrinter._format_to_width(self.columns, 137 | self.last_line[0:self.columns - len(ConsolePrinter.status_failed)], 138 | fg.red + ConsolePrinter.status_failed)) 139 | 140 | 141 | class Runner: 142 | def __init__(self, model_paths: List[str], conf_paths: List[str], invocation_options: Dict[str, Any]) -> None: 143 | internal_conf = invocation_options['internal_conf'] 144 | 145 | self.console_printer = ConsolePrinter(internal_conf) 146 | self.conf_paths = conf_paths 147 | self.model_paths = model_paths 148 | self.invocation_options = invocation_options 149 | self.loop = asyncio.get_event_loop() 150 | self.models: List[Model] = [] 151 | self.failures: List[str] = [] 152 | 153 | self.console_printer.print_magenta(ef.bold + 'Welcome to CSAOpt v{}\n\n'.format(__version__)) 154 | 155 | def _get_instance_manager(self, context, conf, internal_conf) -> InstanceManager: 156 | if conf.get('remote.local_docker', False) is True: 157 | from .instancemanager.local import Local 158 | return Local(conf, internal_conf) 159 | 160 | if not internet_connectivity_available(): 161 | raise AssertionError('Configured remote/cloud execution but internet connectivity unavailable.') 162 | 163 | cloud_platform = conf['remote.platform'] 164 | if cloud_platform == 'aws': 165 | return AWSTools(conf, internal_conf) 166 | # elif cloud_platform == 'gcp' etc... 167 | # return GCPTools() 168 | else: 169 | raise AttributeError('Cloud platform ' + cloud_platform + ' unrecognized.') 170 | 171 | def duplicate_remote_configs(self, configs): 172 | for config in configs: 173 | if config.get('remote', None) is not None: 174 | remote_conf = config['remote'] 175 | break 176 | else: 177 | raise AssertionError('No remote configuration found') 178 | 179 | for config in configs: 180 | config['remote'] = remote_conf 181 | 182 | async def _run_async(self, loop): 183 | printer = self.console_printer 184 | printer.print_with_spinner('Loading Config') 185 | try: 186 | configs = [get_configs(conf_path) for conf_path in self.conf_paths] 187 | self.duplicate_remote_configs(configs) 188 | 189 | internal_conf = self.invocation_options['internal_conf'] 190 | 191 | ctx = Context(printer, configs, internal_conf) 192 | except Exception as e: 193 | printer.spinner_failure() 194 | self.failures.append('Error while loading config: ' + str(e)) 195 | raise e 196 | printer.spinner_success() 197 | 198 | printer.print_with_spinner('Loading Models') 199 | for idx, model_path in enumerate(self.model_paths): 200 | configs[idx]['model']['path'] = model_path 201 | log.debug('Loading model {}'.format(model_path)) 202 | loader = ModelLoader(configs[idx], internal_conf) 203 | self.models.insert(idx, loader.get_model()) 204 | log.debug('Models loaded succesfully.') 205 | printer.spinner_success() 206 | 207 | # Get cloud config, create instance manager 208 | self.remote_config = configs[0] 209 | 210 | if self.remote_config.get('remote.local_docker', False): 211 | start_msg = 'Starting local instances with docker' 212 | else: 213 | start_msg = 'Starting instances on {}'.format(self.remote_config['remote.platform'].upper()) 214 | 215 | printer.print_with_spinner(start_msg) 216 | await asyncio.sleep(0.8) 217 | 218 | with self._get_instance_manager(ctx, self.remote_config, internal_conf) as instancemanager: 219 | log.debug('Entered instancemanager block') 220 | printer.spinner_success() 221 | printer.print_with_spinner('Waiting for broker to come online') 222 | 223 | broker_instance, workers = instancemanager.get_running_instances() 224 | log.debug('Got running instances: {}, {}'.format(broker_instance, workers)) 225 | if hasattr(instancemanager, 'broker_password'): 226 | # password is none for local deploys 227 | printer.println('Broker password (in case you intend to re-use instances): ' + 228 | instancemanager.broker_password) 229 | 230 | await asyncio.sleep(5.0) 231 | 232 | queue_ids: List[str] = [] 233 | for worker in workers: 234 | if 'queue_id' in worker.props: 235 | queue_ids.append(worker.props['queue_id']) 236 | log.debug('Got queue IDs (a.k.a. active workers): {}'.format(queue_ids)) 237 | 238 | assert len(queue_ids) > 0, 'There should be at least one worker running' 239 | 240 | redis_connect_timeout = configs[0].get('broker.connect_timeout', 241 | internal_conf['broker.defaults.connect_timeout']) 242 | async with timeout(30) as async_timeout: 243 | while not async_timeout.expired: 244 | try: 245 | await asyncio.sleep(5) 246 | broker: Broker = Broker( 247 | host=str(broker_instance.public_ip), 248 | port=broker_instance.port, 249 | queue_ids=queue_ids, 250 | socket_connect_timeout=redis_connect_timeout, 251 | password=broker_instance.props.get('password', None)) 252 | printer.spinner_success() 253 | break 254 | except ConnectionError: 255 | pass 256 | 257 | if async_timeout.expired: 258 | log.debug('Timeout while waiting for Broker') 259 | printer.spinner_failure() 260 | raise TimeoutError('Timed out waiting for broker to come online') 261 | 262 | jobmanager = JobManager(ctx, broker, self.models, configs) 263 | 264 | await asyncio.sleep(5) # wait for redis to start 265 | 266 | printer.print_with_spinner("Waiting for workers to join") 267 | for worker_id in (await jobmanager.wait_for_worker_join()): 268 | printer.println('Worker {} joined'.format(worker_id)) 269 | 270 | printer.spinner_success() 271 | printer.print_with_spinner('Deploying model') 272 | try: 273 | await jobmanager.deploy_model() 274 | except Exception: 275 | msg = 'An exception occured during model deployment.' 276 | log.exception(msg) 277 | printer.spinner_failure() 278 | self.failures.append(msg) 279 | return 280 | 281 | printer.spinner_success() 282 | 283 | await asyncio.sleep(0.8) 284 | 285 | printer.print_with_spinner('Running Simulated Annealing') 286 | 287 | await asyncio.sleep(1) 288 | # TODO: this needs timeouts 289 | jobs: List[Job] = await jobmanager.submit() 290 | 291 | await jobmanager.wait_for_results() 292 | printer.spinner_success() 293 | 294 | printer.print_with_spinner('Retrieving results') 295 | printer.spinner_success() 296 | 297 | printer.print_with_spinner('Scanning for best result') 298 | 299 | best_job, best_value, best_state = jobmanager.scan_for_best_result(jobs) 300 | 301 | # To improve testability 302 | self.best_state = best_state 303 | self.best_value = best_value 304 | 305 | printer.spinner_success() 306 | 307 | printer.println('Evaluated: {} State: {}'.format(best_value, best_state)) 308 | 309 | for index, job in enumerate(jobs): 310 | config = configs[0] if len(configs) == 1 else configs[index] 311 | save_to_file = config.get('save_to_file.type', 'none') 312 | base_path = config.get('save_to_file.path', os.path.dirname(os.path.realpath(__file__))) 313 | conf_name = config.get('name', ('optimization_' + str(int(time.time())))) 314 | path = os.path.join(base_path, conf_name) 315 | 316 | pathlib.Path(base_path, conf_name).mkdir(parents=True, exist_ok=True) 317 | 318 | binary = config.get('save_to_file.binary', False) 319 | if save_to_file == 'all': 320 | job.write_files(path, binary) 321 | elif save_to_file == 'best': 322 | job.write_files(path, binary, only_best=True) 323 | 324 | if save_to_file != 'none': 325 | printer.println('Files have successfully been written.') 326 | 327 | printer.println('Waiting for instances to shutdown. This might take a long time. If you configured ' + 328 | 'files to be written to disk, they are now ready for your perusal.') 329 | 330 | def run(self) -> None: 331 | """ 332 | 333 | """ 334 | 335 | loop = asyncio.get_event_loop() 336 | loop.run_until_complete(self._run_async(loop)) 337 | loop.close() 338 | 339 | if self.failures: 340 | self.console_printer.println(fg.red + 'It seems there have been errors. 🌩') 341 | else: 342 | self.console_printer.println(fg.green + 'All done. ✨') 343 | 344 | def cancel(self) -> None: 345 | pass 346 | 347 | 348 | class Context: 349 | def __init__(self, console_printer: ConsolePrinter, configs: ConfigTree, internal_config: ConfigTree) -> None: 350 | self.console_printer: ConsolePrinter = console_printer 351 | self.configs = configs 352 | self.internal_config = internal_config 353 | -------------------------------------------------------------------------------- /csaopt/broker/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides all required functionality for the communication between the application master and workers. 3 | """ 4 | 5 | import dramatiq 6 | import os 7 | import msgpack 8 | import msgpack_numpy 9 | import uuid 10 | import logging 11 | import sys 12 | import asyncio 13 | 14 | from async_timeout import timeout 15 | from dramatiq.brokers.redis import RedisBroker 16 | from dramatiq.brokers.stub import StubBroker 17 | from dramatiq.results import Results, ResultMissing 18 | from dramatiq.results.backends import RedisBackend 19 | from dramatiq import Message 20 | from typing import Dict, Any, List, Union 21 | from collections import defaultdict 22 | from enum import Enum 23 | from sortedcontainers import SortedSet 24 | 25 | from ..model import Model 26 | from ..utils import is_pytest_run 27 | 28 | msgpack_numpy.patch() 29 | 30 | log = logging.getLogger(__name__) 31 | 32 | 33 | def _use_stub_broker() -> bool: 34 | """Check whether a real or a stub broker should be used. 35 | 36 | This is useful for testing. 37 | 38 | Returns: 39 | True if USE_STUB_BROKER environemtn variable is set and truthy 40 | """ 41 | return bool(os.environ.get('USE_STUB_BROKER')) is True 42 | 43 | 44 | class WorkerCommand(Enum): 45 | """Enum for supported worker commands""" 46 | 47 | DeployModel = 'deploy_model' 48 | RunOptimization = 'run_optimization' 49 | 50 | 51 | class _MsgPackEncoder(dramatiq.Encoder): 52 | def encode(self, data: dramatiq.encoder.MessageData) -> bytes: 53 | return msgpack.packb(data, use_bin_type=True) 54 | 55 | def decode(self, data: bytes) -> dramatiq.encoder.MessageData: 56 | return msgpack.unpackb(data, raw=False) 57 | 58 | 59 | class Broker(): 60 | """Class wrapping a Dramatiq broker 61 | 62 | This class wraps a Dramatiq Broker and offers sending messages and retrieving results from said broker. Since the 63 | worker code is, by design, separate from this codebase, the standard way of communicating with Dramatiq workers 64 | is unavailable. Therefore, messages are enqueued directly on the broker and kept track of by this class. When 65 | retrieving results, the stored messages are polled for results and then discarded. 66 | 67 | Per convention, there is one optimization worker per queue id, so the size of the list of queue ids must correspond 68 | with the overall number of worker processes, or more specifically, the number of worker instances, since one 69 | Dramatiq worker always only runs one process and therefore one optimization worker. 70 | 71 | 72 | Args: 73 | host: Hostname or IP address of Dramatiq broker infrastructure, currently Redis 74 | port: Redis port 75 | password: Redis password 76 | queue_ids: Queue ids of available workers 77 | """ 78 | 79 | def __init__(self, 80 | host: str = 'localhost', 81 | port: int = 6379, 82 | password: str = None, 83 | queue_ids: List[str] = [], 84 | **kwargs: str) -> None: 85 | 86 | if len(queue_ids) < 1: 87 | log.warning('Constructing {} without queue_ids'.format(Broker)) 88 | 89 | if is_pytest_run() and _use_stub_broker(): 90 | broker = StubBroker 91 | broker.emit_after('process_boot') 92 | else: 93 | self.dramatiq_broker = broker = RedisBroker(host=host, port=port, password=password, **kwargs) 94 | 95 | msgpack_encoder = _MsgPackEncoder() 96 | self.result_backend = backend = RedisBackend(encoder=msgpack_encoder, client=self.dramatiq_broker.client) 97 | broker.add_middleware(Results(backend=backend)) 98 | 99 | dramatiq.set_broker(broker) 100 | dramatiq.set_encoder(msgpack_encoder) 101 | 102 | self.queue_ids: SortedSet = SortedSet(queue_ids) 103 | self.queue_messages: Dict[str, List[dramatiq.Message]] = defaultdict(list) 104 | 105 | def clear_queue_messages(self): 106 | """Clears messages that were queued on the broker 107 | 108 | The broker can only wait for results on message objects that were submitted to the Dramatiq broker. 109 | It does so by keeping an internal queue of messages. These messages will be polled for results in the 110 | next call to :meth:`~Broker.get_results`. Clearing the queue has the effect of 'forgetting' earlier 111 | messages. 112 | 113 | """ 114 | self.queue_messages.clear() 115 | 116 | def ping(self, queue: Union[str, int]) -> bool: 117 | """Sends a `ping` message to the specified queue. 118 | 119 | The Dramatiq PingActor will receive a `ping` message and should respond with `pong`. This is a blocking call. 120 | 121 | Args: 122 | queue: Index or id of queue 123 | 124 | Returns: 125 | `True` if the actor responded with `pong` 126 | 127 | """ 128 | queue_id = self.__extract_queue_id(queue) 129 | 130 | msg = self.dramatiq_broker.enqueue( 131 | Message( 132 | queue_name=queue_id, 133 | actor_name='PingActor', 134 | args=(), 135 | kwargs={}, 136 | options={}, 137 | )) 138 | 139 | # TODO refactor for dynamic timeout 140 | result = msg.get_result(backend=self.result_backend, block=True, timeout=2000) 141 | 142 | return result == 'pong' 143 | 144 | async def get_queue_results(self, queue: Union[str, int], timeout=10.0) -> List[Dict[str, Any]]: 145 | """Get results for specific queue. 146 | 147 | This will attempt to fetch all results from the Dramatiq results backend for a specific queue. Calls 148 | :meth:`~Broker.get_results` with the supplied queue index or id 149 | 150 | Args: 151 | queue: Index or id of queue 152 | timeout: Timeout in seconds 153 | 154 | Returns: 155 | A list of the retrieved results 156 | 157 | """ 158 | for queue_id in self.queue_ids: 159 | results = await self.get_results(queues=[self.__extract_queue_id(queue)], result_timeout=timeout) 160 | 161 | return results[queue_id] 162 | 163 | async def get_all_results(self, timeout=10.0) -> Dict[str, List[Dict[str, Any]]]: 164 | """Get results for all known queue. 165 | 166 | This will attempt to fetch all results from the Dramatiq results backend for a all known queues. Calls 167 | :meth:`~Broker.get_results` with a list of all queue ids. 168 | 169 | 170 | Args: 171 | timeout: Overall timeout in seconds (i.e. not per-queue) 172 | 173 | Returns: 174 | A dictionary of queue ids and list of the retrieved results for each queue 175 | 176 | """ 177 | return await self.get_results(queues=self.queue_ids, result_timeout=timeout) 178 | 179 | async def get_results(self, queues: List[str], result_timeout: float = 10.0) -> Dict[str, List[Dict[str, Any]]]: 180 | """Get results for a list of queue indices or ids. 181 | 182 | This will attempt to fetch results from the Dramatiq results backend for the specified queues. Internally, 183 | it constructs a list of messages that need to provide results and tries, in a non-blocking manner, to get 184 | results from the Dramatiq results backend. If a message has not yet received results, it will be re-tried 185 | after one-tenth of the specified timeout, but at a minimum of 1 second. Therefore, this method will re-try 186 | for a total number of 10 times, after which the overall timeout will stop the polling. 187 | 188 | 189 | Args: 190 | queues: List of queue indices or ids 191 | result_timeout: Overall timeout in seconds (i.e. not per-queue) 192 | 193 | Returns: 194 | A dictionary of queue ids and list of the retrieved results for each queue 195 | 196 | """ 197 | messages_to_process: Dict[str, List[dramatiq.Message]] = {} 198 | for queue_id in self.queue_ids: 199 | for message in self.queue_messages[queue_id]: 200 | messages_to_process[message.message_id] = message 201 | log.debug('Messages to process is [{}]'.format(messages_to_process)) 202 | 203 | message_ids_processed: List[str] = [] 204 | results: Dict[str, List[Dict[str, Any]]] = defaultdict(list) 205 | 206 | async with timeout(result_timeout) as timeout_cm: 207 | while len(messages_to_process) > 0: 208 | for msg in messages_to_process.values(): 209 | try: 210 | msg_result = msg.get_result( # type: ignore 211 | backend=self.result_backend, timeout=int(result_timeout * 1e3)) 212 | results[msg.queue_name].append(msg_result) # type: ignore 213 | 214 | message_ids_processed.append(msg.message_id) # type: ignore 215 | except ResultMissing: 216 | pass 217 | 218 | # Remove all processed messages from messages_to_process list 219 | for message_id in message_ids_processed: 220 | messages_to_process.pop(message_id, None) 221 | 222 | message_ids_processed.clear() 223 | 224 | log.debug('Sleeping for {} seconds, there is/are still {} messages to be awaited.'.format( 225 | max(1.0, result_timeout / 10.0), len(messages_to_process))) 226 | await asyncio.sleep(max(1.0, result_timeout / 10.0)) 227 | 228 | if timeout_cm.expired: 229 | raise TimeoutError('Timed out while waiting for results') 230 | 231 | return results 232 | 233 | def broadcast(self, command: WorkerCommand, payload: Dict[str, Any]) -> None: 234 | """Send a command and payload to all registered queues. 235 | 236 | Args: 237 | command: Command 238 | payload: Any dictionary that can be serialized by msgpack (and msgpack-numpy) 239 | 240 | Returns: 241 | Nothing 242 | """ 243 | log.debug('Broadcasting cmd [{}] and payload [{}] to queue ids: {}', command, payload, self.queue_ids) 244 | for queue_id in self.queue_ids: 245 | self.send_to_queue(queue_id, command, payload) 246 | 247 | def send_to_queue(self, queue: Union[str, int], command: WorkerCommand, payload: Dict[str, Any]) -> None: 248 | """Send a command and payload to a queue. 249 | 250 | Args: 251 | queue: Index or queue id 252 | command: Command 253 | payload: Any dictionary that can be serialized by msgpack (and msgpack-numpy) 254 | 255 | Returns: 256 | Nothing 257 | """ 258 | queue_id = self.__extract_queue_id(queue) 259 | 260 | msg = self.dramatiq_broker.enqueue( 261 | Message( 262 | queue_name=queue_id, 263 | actor_name='OptimizationActor', 264 | args=(command.value, payload), 265 | kwargs={}, 266 | options={}, 267 | )) 268 | log.debug('Appending msg[{}] to queued_messages'.format(msg)) 269 | self.queue_messages[queue_id].append(msg) 270 | 271 | def __extract_queue_id(self, queue: Union[str, int]) -> str: 272 | """Retrieves the queue id for a given index or id 273 | 274 | If the input is an integer, this method will select the queue at the speficied index. Otherwise, it will try 275 | to match the input as a string agains all known queue ids. 276 | 277 | Args: 278 | queue: Index or queue id 279 | 280 | Returns: 281 | Queue id for the queue at the specified index. 282 | 283 | Raises: 284 | AssertionError if the index is out of bounds or the queue id is unknown. 285 | """ 286 | queue_id = str(queue) 287 | if type(queue) is int: 288 | if not int(queue) < len(self.queue_ids): 289 | raise AssertionError('Queue index out of range: ' + str(queue)) 290 | queue_id = self.queue_ids[int(queue)] 291 | 292 | if queue_id not in self.queue_ids: 293 | raise AssertionError('Queue id not found: ' + queue_id) 294 | 295 | return queue_id 296 | -------------------------------------------------------------------------------- /csaopt/instancemanager/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides the necessary functionality to provision instances for use with CSAOpt. 3 | 4 | Instances can be either local, docker-based containers or proper cloud instances. The module offers a abstract base 5 | class :class:`instancemanager.InstanceManager` from which actual instance managers need to inherit. 6 | """ 7 | 8 | import ipaddress 9 | import json 10 | 11 | from typing import Union, Dict, Any 12 | 13 | IpAddress = Union[ipaddress.IPv4Address, ipaddress.IPv6Address] 14 | 15 | 16 | class Instance(): 17 | """Platform agnostic abstraction for an instance 18 | 19 | Args: 20 | inst_id: An instance identifier 21 | public_ip: IPv4 IP Address 22 | port: Port associated with this instance, e.g. Redis 23 | is_broker: Flag distinguishing broker or worker instance 24 | kwargs: Any other keyword arguments will be passed to an internal `props` field 25 | """ 26 | 27 | def __init__(self, inst_id: str, public_ip: str, port=-1, is_broker: bool = False, 28 | **kwargs: Dict[str, Any]) -> None: 29 | self.public_ip = public_ip 30 | self.port: int = port 31 | self.inst_id: str = inst_id 32 | self.is_broker: bool = is_broker 33 | self.props: Dict[str, Any] = kwargs 34 | 35 | @property 36 | def public_ip(self) -> IpAddress: 37 | """ 38 | Public Ip property 39 | """ 40 | return self._public_ip 41 | 42 | @public_ip.setter 43 | def public_ip(self, ip: str) -> None: 44 | self._public_ip = ipaddress.ip_address(ip) 45 | 46 | def __str__(self): 47 | return 'Instance[id={}, public_ip={}, broker={}, props={}'.format(self.inst_id, self.public_ip, 'True' 48 | if self.is_broker else 'False', 49 | json.dumps(self.props)) 50 | -------------------------------------------------------------------------------- /csaopt/instancemanager/awstools.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | import time 4 | 5 | from string import Template 6 | from pyhocon import ConfigTree 7 | from botocore.exceptions import ClientError 8 | from typing import List, Any, Tuple, Dict 9 | 10 | from . import Instance 11 | from .instancemanager import InstanceManager 12 | from ..utils import random_str, random_int 13 | 14 | log = logging.getLogger() 15 | 16 | 17 | def _interpolate_userscript_template_vals(script: bytes, **kwargs: str) -> bytes: 18 | return Template(script.decode('utf-8')).substitute(kwargs).encode() 19 | 20 | 21 | def _has_exit_status(instance) -> bool: 22 | instance.reload() 23 | return instance.state['Name'] == 'shutting-down' or instance.state['Name'] == 'terminated' 24 | 25 | 26 | class AWSTools(InstanceManager): 27 | """The AWSTools class provides an abstraction over boto3 and EC2 for the use with CSAOpt 28 | 29 | This is a context manager and creates required instances on `__enter__()`, disposing of the managed instances in 30 | `__exit__()`. These two methods as well as :meth:`instancemanager.awstools.AWSTools.get_running_instances` are the 31 | only methods called by the Runner (i.e. the only public methods). 32 | 33 | This class will use boto3 to (1) create a security group, (2) configure ingress to the broker backend (currently 34 | Redis, as used by Dramatiq). It then (3) creates as many worker instances as requested and runs 'user-data' scripts 35 | after startup, which is to say, bash scripts that set up and the required software (Redis, CSAOpt Worker, etc.). 36 | After the run AWSTools (4) terminates all managed instances and removes the security group. 37 | 38 | Note: 39 | If the AWS credentials are not provided in the config file, boto3 will look into 40 | the following environment variables: `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` 41 | 42 | How to create IAM credentials (i.e. AWS keys): 43 | * Create (or reuse) IAM user with programmatic access 44 | * Assign to a (potentially new) group with AmazonEC2FullAccess 45 | * Store the access key and secret key 46 | 47 | Args: 48 | config: Configuration for current optimization run 49 | internal_conf: Internal CSAOpt configuration 50 | """ 51 | 52 | def __init__(self, config: ConfigTree, internal_conf: ConfigTree) -> None: 53 | self.region = config.get('remote.aws.region', internal_conf['remote.aws.default_region']) 54 | 55 | if config.get('remote.aws.secret_key', False) and config.get('remote.aws.access_key', False): 56 | self.ec2_resource: boto3.session.Session.resource = boto3.resource( 57 | 'ec2', 58 | aws_access_key_id=config['remote.aws.access_key'], 59 | aws_secret_access_key=config['remote.aws.secret_key'], 60 | region_name=self.region) 61 | 62 | else: 63 | # This will look for the env variables 64 | self.ec2_resource: boto3.session.Session.resource = boto3.resource('ec2', region_name=self.region) 65 | 66 | self.ec2_client = self.ec2_resource.meta.client 67 | 68 | # ec2.Instance is of but this cannot be 69 | # used as a type hint here because it is generated by the factory at runtime, I assume. 70 | self.workers: List[Any] = [] 71 | self.broker: Any = None 72 | self.security_group_prefix: str = internal_conf.get('remote.aws.security_group_prefix', 'csaopt_') 73 | self.security_group_id: str = '' 74 | 75 | self.worker_count: int = config['remote.aws.worker_count'] 76 | 77 | worker_ami_key = 'remote.aws.worker_ami' 78 | broker_ami_key = 'remote.aws.broker_ami' 79 | 80 | self.broker_ami = config.get(broker_ami_key, internal_conf[broker_ami_key]) 81 | self.worker_ami = config.get(worker_ami_key, internal_conf[worker_ami_key]) 82 | 83 | self.timeout_provision = config.get('remote.aws.timeout_provision', 84 | internal_conf['remote.aws.timeout_provision']) 85 | self.timeout_startup = config.get('remote.aws.timeout_startup', internal_conf['remote.aws.timeout_startup']) 86 | 87 | self.broker_port = internal_conf.get('broker.defaults.remote_port') 88 | self.broker_password = config.get('remote.aws.instances.broker_password', None) 89 | if self.broker_password is None: 90 | self.broker_password = random_str(32) 91 | 92 | self.debug_on_cpu = config.get('debug.gpu_simulator', '') 93 | self.terminate_on_exit = config.get('remote.terminate_on_exit', False) 94 | 95 | self.use_existing_instances = False 96 | existing_instances = config.get('remote.aws.instances', None) 97 | 98 | if existing_instances is not None: 99 | self.use_existing_instances = True 100 | self.existing_instances = existing_instances 101 | 102 | self.provision_args: Dict[str, str] = { 103 | 'broker_image': 104 | config.get('remote.aws.broker_ami', internal_conf['remote.aws.broker_ami']), 105 | 'worker_image': 106 | config.get('remote.aws.worker_ami', internal_conf['remote.aws.worker_ami']), 107 | 'broker_instance_type': 108 | config.get('remote.aws.broker_instance_type', internal_conf['remote.aws.broker_instance_type']), 109 | 'worker_instance_type': 110 | config.get('remote.aws.worker_instance_type', internal_conf['remote.aws.worker_instance_type']) 111 | } 112 | 113 | data_base = internal_conf['remote.aws.userdata_rel_path'] 114 | with open(data_base + '-broker.sh', 'rb') as broker_data, open(data_base + '-worker.sh', 'rb') as worker_data: 115 | self.user_data_scripts: Dict[str, bytes] = {'broker': broker_data.read(), 'worker': worker_data.read()} 116 | 117 | def _get_from_ids(self, broker_id: str, worker_ids: List[str]) -> Tuple[Any, Any]: 118 | broker = self.ec2_resource.Instance(broker_id) 119 | workers = map(lambda worker_id: self.ec2_resource.Instance(worker_id), worker_ids) 120 | 121 | return broker, list(workers) 122 | 123 | def _provision_instances(self, timeout_ms: int, count: int = 2, **kwargs: str) -> Tuple[Any, Any]: 124 | """Start and configure instances 125 | 126 | Args: 127 | timeout_ms: General timeout for the provisioning of requested instances 128 | count: number of worker instances to be created 129 | kwargs: Any other parameters that are required for startup 130 | """ 131 | 132 | broker_userdata = _interpolate_userscript_template_vals( 133 | self.user_data_scripts['broker'], external_port=self.broker_port, redis_password=self.broker_password) 134 | 135 | broker = self.ec2_resource.create_instances( 136 | ImageId=kwargs['broker_image'], 137 | MinCount=1, 138 | MaxCount=1, 139 | UserData=broker_userdata, 140 | SecurityGroupIds=[self.security_group_id], 141 | InstanceType=kwargs['broker_instance_type'])[0] 142 | 143 | worker_userdata = _interpolate_userscript_template_vals( 144 | self.user_data_scripts['worker'], 145 | debug='1' if self.debug_on_cpu else 'off', 146 | redis_host=broker.private_ip_address, 147 | redis_port=self.broker_port, 148 | redis_password=self.broker_password) 149 | 150 | workers = self.ec2_resource.create_instances( 151 | ImageId=kwargs['worker_image'], 152 | MinCount=count, 153 | MaxCount=count, 154 | InstanceType=kwargs['worker_instance_type'], 155 | UserData=worker_userdata, 156 | SecurityGroupIds=[self.security_group_id]) 157 | 158 | return broker, workers 159 | 160 | def __map_ec2_instance(self, instance: Any, is_broker: bool = False, **kwargs: Any) -> Instance: 161 | """Maps a boto/EC2 instance to the internal Instance type 162 | 163 | Args: 164 | instance: Instance object returned by boto3 (which has a runtime type and therefore untyped here) 165 | is_broker: Flag indicating whether a given instance is a broker or not 166 | kwargs: Any other parameters that should be available on the produced object 167 | 168 | Returns: 169 | An abstract instance object 170 | """ 171 | return Instance(instance.id, instance.public_ip_address, is_broker=is_broker, **kwargs) 172 | 173 | def get_running_instances(self) -> Tuple[Instance, List[Instance]]: 174 | """Update and get currently managed instances 175 | 176 | Returns: 177 | A tuple of broker, [worker] 178 | """ 179 | self.broker.reload() 180 | for worker in self.workers: 181 | worker.reload() 182 | 183 | broker_instance = self.__map_ec2_instance( 184 | instance=self.broker, is_broker=True, port=self.broker_port, password=self.broker_password) 185 | worker_instances = [self.__map_ec2_instance(w, queue_id=w.id) for w in self.workers] 186 | 187 | return broker_instance, worker_instances 188 | 189 | def _terminate_instances(self, timeout_ms: int) -> None: 190 | """Terminate all instances managed by AWSTools 191 | 192 | Args: 193 | timeout_ms: Timeout, in milliseconds, for the termination 194 | """ 195 | instance_ids = [self.broker.id] + [instance.id for instance in self.workers] 196 | self.ec2_client.terminate_instances(InstanceIds=instance_ids) 197 | 198 | def _wait_for_instances(self) -> None: 199 | """Block until broker and workers are up""" 200 | self.broker.wait_until_running() 201 | 202 | for worker in self.workers: 203 | worker.wait_until_running() 204 | 205 | def _run_start_scripts(self, timeout_ms: int) -> None: 206 | """Run any required setup procedures after the initial startup of managed instances 207 | 208 | Args: 209 | timeout_ms: Timeout, in milliseconds, for the termination 210 | """ 211 | raise NotImplementedError 212 | 213 | def __enter__(self) -> InstanceManager: 214 | """On enter, AWSTools prepares the AWS security group and spins up the required intances 215 | 216 | """ 217 | if not self.use_existing_instances: 218 | self.security_group_id = self._create_sec_group(self.security_group_prefix + random_str(10)) 219 | 220 | self.broker, self.workers = self._provision_instances( 221 | count=self.worker_count, timeout_ms=self.timeout_provision, **self.provision_args) 222 | 223 | log.debug('Provision Instances returned: {}, {}. Waiting for instances now'.format( 224 | self.broker, self.workers)) 225 | else: 226 | self.security_group_id = self.existing_instances['security_group'] 227 | self.broker, self.workers = self._get_from_ids(self.existing_instances['broker'], 228 | self.existing_instances['workers']) 229 | self._wait_for_instances() 230 | 231 | log.debug('Waiting for instances returned') 232 | return self 233 | 234 | def __exit__(self, exc_type, exc_value, traceback): 235 | """On exit, AWSTools terminates the started instances and removes security groups""" 236 | log.debug('Entered awstools\' __exit__ method with traceback: {}'.format(traceback)) 237 | if not self.terminate_on_exit: 238 | return False 239 | 240 | self._terminate_instances(self.timeout_provision) 241 | log.debug('Terminate Instances call returned, waiting for termination') 242 | 243 | all_instances = [self.broker] + self.workers 244 | while (any((not _has_exit_status(instance) for instance in all_instances))): 245 | log.debug('Waiting for instances to enter "shutting-down" or "terminated" state: {}'.format( 246 | [(i.id, i.state) for i in all_instances])) 247 | time.sleep(2.0) 248 | 249 | log.debug('Remove Security Group') 250 | self._remove_sec_group(self.security_group_id) 251 | return False 252 | 253 | def _remove_sec_group(self, group_id: str) -> None: 254 | """Removes the security group created by CSAOpt 255 | 256 | Args: 257 | group_id: Security group Id of group to be deleted 258 | """ 259 | 260 | if group_id is not None: 261 | try: 262 | self.ec2_client.delete_security_group(GroupId=group_id) 263 | log.debug('Security group [{}] deleted'.format(group_id)) 264 | except ClientError as e: 265 | log.error('Could not remove security group: {}'.format(e)) 266 | else: 267 | log.warning('Cannot remove security group, because none was created. Skipping...') 268 | 269 | def _create_sec_group(self, name: str) -> str: 270 | """Creates an AWS security group and assigns ingress permissions from the current network 271 | 272 | Args: 273 | name: Name of the security group 274 | 275 | Returns: 276 | AWS Identifier `GroupId` of the created security group 277 | """ 278 | try: 279 | response = self.ec2_client.create_security_group(GroupName=name, Description='Security Group for CSAOpt') 280 | security_group_id = response['GroupId'] 281 | log.debug('Created Security Group: ' + security_group_id) 282 | 283 | data = self.ec2_client.authorize_security_group_ingress( 284 | GroupId=security_group_id, 285 | IpPermissions=[ 286 | { 287 | 'IpProtocol': 'tcp', 288 | 'FromPort': self.broker_port, 289 | 'ToPort': self.broker_port, 290 | 'IpRanges': [{ 291 | 'CidrIp': '0.0.0.0/0' 292 | }] 293 | }, 294 | { # Allow communication within the sec group 295 | 'IpProtocol': '-1', 296 | 'UserIdGroupPairs': [{ 297 | 'GroupId': security_group_id 298 | }] 299 | } 300 | ]) 301 | log.debug('Authorized Security Group Ingress with result: {}'.format(data)) 302 | 303 | data = self.ec2_client.authorize_security_group_egress( 304 | GroupId=security_group_id, 305 | IpPermissions=[{ # Allow communication within the sec group 306 | 'IpProtocol': '-1', 307 | 'UserIdGroupPairs': [{ 308 | 'GroupId': security_group_id 309 | }] 310 | }]) 311 | 312 | log.debug('Authorized Security Group Egress with result: {}'.format(data)) 313 | 314 | return security_group_id 315 | except ClientError as e: 316 | log.exception('Could not create Security Group') 317 | raise 318 | -------------------------------------------------------------------------------- /csaopt/instancemanager/instancemanager.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from typing import List, Tuple, Any, TypeVar, Generic 4 | 5 | from . import Instance 6 | 7 | T = TypeVar('T') 8 | 9 | 10 | class InstanceManager(abc.ABC, Generic[T]): 11 | """Abstract class for the instance management performed by CSAOpt. 12 | 13 | This class provides calls that are usually required for privisioning and configuration of instances running broker 14 | or worker code. Per Python conventions, methods prefixed by an underscore are not meant to be public. They are 15 | here to make the developer think about what steps are usually required for a complete setup of cloud or docker 16 | instances. The public methods of this class are :meth:`~InstanceManager.get_running_instances` as well as it's 17 | context manager methods, `__enter__` and `__exit__`. 18 | """ 19 | 20 | def __init__(self): 21 | pass 22 | 23 | @abc.abstractmethod 24 | def _provision_instances(self, timeout_ms, count=2, **kwargs) -> Tuple[T, List[T]]: 25 | """Start and configure instances, return queue and list of workers""" 26 | 27 | @abc.abstractmethod 28 | def get_running_instances(self) -> Tuple[Instance, List[Instance]]: 29 | """Returns the currently managed instances""" 30 | 31 | @abc.abstractmethod 32 | def _terminate_instances(self, timeout_ms) -> None: 33 | """Terminate managed instances""" 34 | 35 | @abc.abstractmethod 36 | def _run_start_scripts(self, timeout_ms) -> None: 37 | """Run scripts to start queue and worker applications after startup""" 38 | 39 | @abc.abstractmethod 40 | def __enter__(self): 41 | """InstanceManager is a ContextManager""" 42 | # This needs to return an InstanceManager, so the return Type should state the same. 43 | # However, Instancemanager cannot be referenced before the class has been evaluated. 44 | 45 | @abc.abstractmethod 46 | def __exit__(self, exc_type, exc_value, traceback) -> bool: 47 | """Cleanup resources on exit""" 48 | -------------------------------------------------------------------------------- /csaopt/instancemanager/local.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import uuid 3 | import time 4 | import sys 5 | 6 | from pyhocon import ConfigTree 7 | from typing import Tuple, List, Any, Dict, Union, Type, Optional 8 | 9 | from .instancemanager import InstanceManager 10 | from . import Instance 11 | from ..utils import get_free_tcp_port, random_str, docker_available 12 | 13 | log = logging.getLogger() 14 | 15 | 16 | def _map_docker_to_instance(container, port=-1, is_broker: bool = False) -> Instance: 17 | return Instance(str(container.name), '127.0.0.1', port=port, is_broker=is_broker, **container.labels) 18 | 19 | 20 | try: 21 | assert docker_available() 22 | import docker 23 | DockerContainerT = Type[docker.models.containers.Container] 24 | 25 | class Local(InstanceManager[DockerContainerT]): # type: ignore 26 | def __init__(self, conf: ConfigTree, internal_conf: ConfigTree) -> None: 27 | if not docker_available(): 28 | raise AssertionError('Trying to instantiate Local InstanceManager, but docker-py is not available.') 29 | self.docker_client = docker.from_env() 30 | 31 | # Because of the dynamic type variable, this fails to typecheck 32 | # since DockerContainerT will be typed to Type[Any] 33 | self.broker: DockerContainerT = ... # type: ignore 34 | self.worker: DockerContainerT = ... # type: ignore 35 | 36 | self.run_id = run_id = random_str(8) 37 | 38 | self.broker_docker_tag = internal_conf['remote.broker_image'] 39 | self.worker_docker_tag = internal_conf['remote.worker_image'] 40 | 41 | self.broker_container_name = 'CSAOpt-Broker-' + run_id 42 | self.worker_container_name = 'CSAOpt-Worker-' + run_id 43 | self.broker_port: Optional[int] = get_free_tcp_port() 44 | self.debug_on_cpu = conf.get('debug.gpu_simulator', False) 45 | 46 | def _provision_instances(self, timeout_ms, count=2, 47 | **kwargs) -> Tuple[DockerContainerT, List[DockerContainerT]]: 48 | self.broker = self.docker_client.containers.run( 49 | self.broker_docker_tag, 50 | ports={'6379/tcp': kwargs['HOST_REDIS_PORT']}, 51 | detach=True, 52 | network=self.docker_network.name, 53 | environment={'ALLOW_EMPTY_PASSWORD': 'yes'}, 54 | name=self.broker_container_name) 55 | 56 | # Sleeping here to give Redis a chance to start up 57 | time.sleep(3) 58 | 59 | # Being on the same docker network means that it automagically 60 | # handles DNS and the broker container name is also its DNS name. 61 | kwargs['REDIS_HOST'] = self.broker_container_name 62 | 63 | self.worker = self.docker_client.containers.run( 64 | self.worker_docker_tag, 65 | detach=True, 66 | network=self.docker_network.name, 67 | environment=kwargs, 68 | labels={'queue_id': self.worker_container_name}, 69 | name=self.worker_container_name) 70 | 71 | while self.broker.status != 'running' or self.worker.status != 'running': 72 | # TODO this needs to respect timeout 73 | time.sleep(1) 74 | self.broker, self.worker = self.__refresh_containers() 75 | 76 | return self.broker, [self.worker] 77 | 78 | def __refresh_containers(self) -> Tuple[DockerContainerT, DockerContainerT]: 79 | broker = self.docker_client.containers.get(self.broker_container_name) 80 | worker = self.docker_client.containers.get(self.worker_container_name) 81 | 82 | return broker, worker 83 | 84 | def get_running_instances(self) -> Tuple[Instance, List[Instance]]: 85 | """Returns the currently managed instances""" 86 | broker, worker = self.__refresh_containers() 87 | return (_map_docker_to_instance(broker, port=self.broker_port, is_broker=True), 88 | [_map_docker_to_instance(worker)]) 89 | 90 | def _terminate_instances(self, timeout_ms) -> None: 91 | self.worker.kill() 92 | self.broker.kill() 93 | self.worker.wait() 94 | self.broker.wait() 95 | 96 | def _run_start_scripts(self, timeout_ms) -> None: 97 | pass 98 | 99 | def __enter__(self) -> InstanceManager: 100 | try: 101 | # No broker password for the local, docker-driven case 102 | env: Dict[str, Union[str, int]] = { 103 | 'REDIS_HOST': self.broker_container_name, 104 | 'WORKER_QUEUE_ID': self.worker_container_name 105 | } 106 | 107 | if self.broker_port is not None: 108 | env['HOST_REDIS_PORT'] = self.broker_port 109 | 110 | if self.debug_on_cpu: 111 | env['NUMBA_ENABLE_CUDASIM'] = '1' 112 | 113 | self.docker_network = self.docker_client.networks.create(name='CSAOpt' + self.run_id) 114 | 115 | self.broker, workers = self._provision_instances(timeout_ms=10000, **env) 116 | self.worker = workers[0] 117 | 118 | return self 119 | except Exception as e: 120 | log.exception('An exception occured while starting docker containers') 121 | raise SystemError('An exception occured while starting docker containers: {}'.format(repr(e))) 122 | 123 | def __exit__(self, exc_type, exc_value, traceback) -> bool: 124 | try: 125 | log.debug('Broker logs: \n' + self.broker.logs().decode('utf-8')) 126 | log.debug('Worker logs: \n' + self.worker.logs().decode('utf-8')) 127 | self._terminate_instances(timeout_ms=10000) 128 | except Exception as e: 129 | log.warning('An exception occured while killing docker containers: ' + str(e)) 130 | finally: 131 | self.docker_network.remove() 132 | return False 133 | 134 | except Exception: 135 | pass 136 | -------------------------------------------------------------------------------- /csaopt/internal/aws_setup_scripts/aws_broker_setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sudo apt-get update 4 | 5 | # Docker 6 | sudo apt-get install -y \ 7 | apt-transport-https \ 8 | ca-certificates \ 9 | curl \ 10 | software-properties-common 11 | 12 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 13 | sudo apt-key fingerprint 0EBFCD88 14 | 15 | sudo add-apt-repository \ 16 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 17 | $(lsb_release -cs) \ 18 | stable" 19 | 20 | sudo apt update && sudo apt-get install docker-ce docker-compose -y 21 | sudo usermod -aG docker $USER 22 | 23 | # Docker Images 24 | sudo docker pull bitnami/redis 25 | 26 | -------------------------------------------------------------------------------- /csaopt/internal/aws_setup_scripts/aws_worker_setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sudo apt-get update 4 | 5 | # Docker 6 | sudo apt-get install -y \ 7 | apt-transport-https \ 8 | ca-certificates \ 9 | curl \ 10 | software-properties-common 11 | 12 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 13 | sudo apt-key fingerprint 0EBFCD88 14 | 15 | sudo add-apt-repository \ 16 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 17 | $(lsb_release -cs) \ 18 | stable" 19 | 20 | sudo apt update && sudo apt install docker-ce -y 21 | sudo usermod -aG docker $USER 22 | 23 | # Nvidia Docker 24 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \ 25 | sudo apt-key add - 26 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 27 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ 28 | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 29 | 30 | sudo apt update && sudo apt-get install nvidia-docker2 -y 31 | sudo pkill -SIGHUP dockerd 32 | 33 | # CUDA 34 | sudo add-apt-repository ppa:graphics-drivers/ppa 35 | sudo apt update && sudo apt install nvidia-410 cuda-drivers 36 | 37 | # Test NVidia Driver 38 | # docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi 39 | 40 | docker pull d53dave/csaopt-worker:0.1.1 41 | 42 | -------------------------------------------------------------------------------- /csaopt/internal/aws_startup_scripts/aws-userdata-broker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | docker run -d -p $external_port:6379 -e REDIS_PASSWORD=$redis_password bitnami/redis -------------------------------------------------------------------------------- /csaopt/internal/aws_startup_scripts/aws-userdata-worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker pull d53dave/csaopt-worker:0.1.1 4 | 5 | docker run --runtime=nvidia -d \ 6 | -e NUMBA_ENABLE_CUDASIM=$debug \ 7 | -e REDIS_HOST=$redis_host \ 8 | -e REDIS_PORT=$redis_port \ 9 | -e REDIS_PWD=$redis_password \ 10 | -e WORKER_QUEUE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` \ 11 | d53dave/csaopt-worker:0.1.1 -------------------------------------------------------------------------------- /csaopt/internal/aws_startup_scripts/local-userdata-worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get Image 4 | docker pull d53dave/csaopt-worker:0.1.1 5 | 6 | # Run 7 | # Template engine needs to replace runtime with "--runtime=nvidia" or empty string 8 | # and debug_env with "1" or "false". 9 | docker run $runtime_complete_flag -d -e NUMBA_ENABLE_CUDASIM=$debug -e WORKER_QUEUE_ID=$worker_queue_id d53dave/csaopt-worker:0.1.1 -------------------------------------------------------------------------------- /csaopt/internal/csaopt-internal.conf: -------------------------------------------------------------------------------- 1 | { 2 | console { 3 | width_default = 80 4 | width_max = 120 5 | } 6 | 7 | broker { 8 | defaults { 9 | local_port = 6379 10 | remote_port = 63379 11 | connect_timeout = 20 # seconds 12 | } 13 | worker_join_retry_delay = 3 # seconds 14 | worker_join_retry_count = 30 15 | } 16 | 17 | model { 18 | validation { 19 | globals_token = "# -- Globals" 20 | reserved_keywords = ['import', 'except ', 'except:', 'finally', 'yield'] 21 | } 22 | 23 | defaults { 24 | precision = float32 25 | distribution = uniform 26 | } 27 | } 28 | 29 | remote { 30 | platform = aws 31 | 32 | broker_image = bitnami/redis:4.0.11 33 | worker_image = d53dave/csaopt-worker:0.1.1 34 | 35 | aws { 36 | # This is an AMI prepared specially for CSAOpt and based on Ubuntu 16.04. 37 | # Contains Nvidia drivers, docker and nvidia-docker. No further setup needed. 38 | # 39 | # This AMI currently only exists on region eu-central-1. 40 | worker_ami = ami-051301414e4e7046e 41 | 42 | # This is an AMI provided for the CSAOpt Broker 43 | # 44 | # This AMI currently only exists on region eu-central-1. 45 | broker_ami = ami-0145895d0f153ea65 46 | 47 | # At time of writing, the following instances should be supported 48 | # on the AWS side (but make sure your region supports the selected instance tyoe): 49 | # - G2: g2.2xlarge, g2.8xlarge 50 | # - P2: p2.xlarge, p2.8xlarge, p2.16xlarge 51 | # - G3: g3.4xlarge, g3.8xlarge, g3.16xlarge 52 | worker_instance_type = g2.2xlarge 53 | 54 | # This will be used for the message queue 55 | broker_instance_type = m5.large 56 | 57 | default_region = eu-central-1 58 | 59 | timeout_provision = 160000 60 | timeout_startup = 130000 61 | 62 | security_group_prefix = csaopt_ 63 | 64 | userdata_rel_path = csaopt/internal/aws_startup_scripts/aws-userdata 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /csaopt/jobs/__init__.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import os 3 | import numpy as np 4 | 5 | from enum import Enum 6 | from typing import Dict, List, Any, Tuple, Optional, Type 7 | 8 | from ..model import Model 9 | 10 | __all__ = ['jobmanager'] 11 | 12 | Failure = Optional[Tuple[Optional[Type[BaseException]], Optional[BaseException], str]] 13 | 14 | 15 | class ExecutionType(Enum): 16 | """ 17 | 18 | """ 19 | MultiModelMultiConf = 'MultiModelMultiConf' 20 | SingleModelMultiConf = 'SingleModelMultiConf' 21 | SingleModelSingleConf = 'SingleModelSingleConf' 22 | MultiModelSingleConf = 'MultiModelSingleConf' 23 | 24 | 25 | class Job(): 26 | def __init__(self, model: Model, opt_params: Dict[str, Any]) -> None: 27 | self.id = str(uuid.uuid4()) 28 | self.message_id: str = '' 29 | self.queue_id: str = '' 30 | self.model: Model = model 31 | self.results: List[np.array] = [] 32 | self.values: List[float] = [] 33 | self.completed: bool = False 34 | self.failure: Failure = None 35 | self.submitted_to: List[str] = [] 36 | self.params: Dict[str, Any] = opt_params 37 | 38 | def __repr__(self): 39 | return 'Job[{}]: Model={}, Queues={}, Completed={}, Params={}'.format( 40 | self.id, self.model.name, self.submitted_to, self.completed, self.params) 41 | 42 | def to_dict(self): 43 | return {'id': self.id, 'params': self.params, 'model': self.model.name} 44 | 45 | def get_best_results(self) -> Tuple[float, np.array]: 46 | values_arr = np.asarray(self.values) 47 | ind = np.unravel_index(np.argmin(values_arr, axis=None), values_arr.shape) 48 | val_min = values_arr[ind] 49 | best_res = self.results[ind] 50 | return val_min, best_res 51 | 52 | def write_files(self, path: str, binary: bool = False, only_best: bool = False) -> None: 53 | suffix: str = 'bin' if binary else 'txt' 54 | 55 | if (os.path.isdir(path)): 56 | if only_best: 57 | best_val, best_state = self.get_best_results() 58 | self._write_file('{}_values_{}.{}'.format(self.id, 'best', suffix), path, binary, 59 | np.asarray([best_val])) 60 | self._write_file('{}_states_{}.{}'.format(self.id, 'best', suffix), path, binary, best_state) 61 | else: 62 | for idx, result in enumerate(self.results): 63 | self._write_file('{}_values_{}.{}'.format(self.id, idx, suffix), path, binary, 64 | np.asarray([self.values[idx]])) 65 | self._write_file('{}_states_{}.{}'.format(self.id, idx, suffix), path, binary, self.results[idx]) 66 | else: 67 | raise AttributeError('Cannot write to {}: not a directory.'.format(path)) 68 | 69 | def _write_file(self, name: str, path: str, binary: bool, arr: np.array) -> None: 70 | output_file = os.path.join(path, name) 71 | arr.tofile(output_file, sep=('' if binary else ',')) 72 | -------------------------------------------------------------------------------- /csaopt/jobs/jobmanager.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import numpy as np 4 | 5 | from typing import List, Dict, Tuple, Any 6 | from pyhocon import ConfigTree 7 | 8 | from . import Job, ExecutionType 9 | from ..model import Model 10 | from ..broker import Broker, WorkerCommand 11 | 12 | # TODO: this (or somebody else) needs to check for n Models == n Workers in several cases 13 | 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | class JobManager(): 18 | """This class handles submission, tracking and retrieval of optimization jobs. 19 | 20 | This abstracts away the detailed communication with the message queue through 21 | the msgqueue client class. 22 | 23 | """ 24 | 25 | def __init__(self, ctx, broker: Broker, models: List[Model], configs: List[ConfigTree]) -> None: 26 | 27 | self.broker = broker 28 | self.models_deployed = False 29 | self.models = models 30 | self.configs = configs 31 | self.execution_type: ExecutionType = self._get_execution_type(models, configs) 32 | self.jobs: List[Job] = [] 33 | self.worker_join_retry_delay = ctx.internal_config['broker.worker_join_retry_delay'] 34 | self.worker_join_retry_count = ctx.internal_config['broker.worker_join_retry_count'] 35 | if self.broker is not None: 36 | self.queue_models_deployed: Dict[str, bool] = {queue_id: False for queue_id in self.broker.queue_ids} 37 | 38 | async def wait_for_worker_join(self, retry_count=0) -> List[str]: 39 | """Send ping to each worker and wait for response. 40 | 41 | Workers are expected to join immediately as the jobmanager will be called only after the 42 | instances are initialized. To be somewhat relaxed regarding the startup of instances, 43 | the ping operation will be retried once after a retry delay specified in the internal configuration. 44 | 45 | Args: 46 | is_retry: Indicate if call is a retry (i.e. this method calls itself recursively when retrying) 47 | 48 | Returns: 49 | A list of worker IDs of the joined workers 50 | """ 51 | joined_workers = [] 52 | for queue_id in self.broker.queue_ids: 53 | try: 54 | if self.broker.ping(queue_id) is True: 55 | joined_workers.append(queue_id) 56 | else: 57 | raise AssertionError('Worker {} failed to join'.format(queue_id)) 58 | except Exception as e: 59 | if retry_count >= self.worker_join_retry_count: 60 | log.exception('Exception occurred while waiting for workers to join') 61 | raise e 62 | 63 | log.debug('Retrying to contact broker in order to ping workers') 64 | await asyncio.sleep(self.worker_join_retry_delay) 65 | await self.wait_for_worker_join(retry_count + 1) 66 | 67 | self.broker.clear_queue_messages() 68 | return joined_workers 69 | 70 | # TODO: Fix type links in comments 71 | def _get_execution_type(self, models: List[Model], configs: List[ConfigTree]) -> ExecutionType: 72 | """Determine the execution type of a given optimization run based on the number of models and configurations 73 | 74 | Args: 75 | models: A list of Models 76 | configs: A list of Configurations 77 | 78 | Returns: 79 | The ExecutionType of this optimization run 80 | """ 81 | len_models = len(models) 82 | len_configs = len(configs) 83 | 84 | if len_models < 1: 85 | raise AssertionError('No models provided') 86 | if len_configs < 1: 87 | raise AssertionError('No configs provided') 88 | 89 | if len_models > 1 and len_configs > 1 and len_configs != len_models: 90 | raise AssertionError('For len(models) == {}, there should be {} configs, but found {}'.format( 91 | len_models, len_models, len_configs)) 92 | 93 | if len_models == 1 and len_configs == 1: 94 | return ExecutionType.SingleModelSingleConf 95 | elif len_models == 1 and len_configs > 1: 96 | return ExecutionType.SingleModelMultiConf 97 | elif len_models > 1 and len_configs == 1: 98 | return ExecutionType.MultiModelSingleConf 99 | elif len_models > 1 and len_configs > 1: 100 | return ExecutionType.MultiModelMultiConf 101 | else: 102 | raise AssertionError('Could not determine Exec Type for len(models) == {} and len(configs) == {}'.format( 103 | len_models, len_configs)) 104 | 105 | async def deploy_model(self) -> None: 106 | """Deploy model to workers. 107 | 108 | This method will deploy models depending on the execution type (i.e. the configuration). 109 | - If the execution type is `SingleModelSingleConf` or `SingleModelMultiConf`, the model is broadcast to all 110 | workers 111 | - Otherwise, each worker will receive the deployment information for one model in the same order they were 112 | passed to CSAOpt. 113 | """ 114 | if self.execution_type is ExecutionType.SingleModelSingleConf or \ 115 | self.execution_type is ExecutionType.SingleModelMultiConf: 116 | self.broker.broadcast(WorkerCommand.DeployModel, self.models[0].to_dict()) 117 | else: 118 | for n, queue_id in enumerate(self.broker.queue_ids): 119 | log.debug('Deploying model to queue {} with id {}'.format(n, queue_id)) 120 | self.broker.send_to_queue(queue_id, WorkerCommand.DeployModel, self.models[n].to_dict()) 121 | 122 | all_results = await self.broker.get_all_results(timeout=10) 123 | for queue_id, results in all_results.items(): 124 | for message in results: 125 | if message == 'model_deployed': 126 | self.queue_models_deployed[queue_id] = True 127 | else: 128 | log.warning('Worker on Queue %s didn\'t successfully deploy model: "%s"', queue_id, message) 129 | 130 | assert not any((not model_deployed for queue_id, model_deployed in self.queue_models_deployed.items())), \ 131 | 'Not all queues reported a deployed model' 132 | 133 | log.debug('queue.models_deployed() finished') 134 | 135 | self.models_deployed = True 136 | self.broker.clear_queue_messages() 137 | 138 | async def submit(self) -> List[Job]: 139 | if not self.models_deployed: 140 | raise AssertionError('Trying to submit job without deploying model') 141 | 142 | cmd = WorkerCommand.RunOptimization 143 | 144 | if self.execution_type is ExecutionType.SingleModelSingleConf: 145 | job = Job(self.models[0], self.configs[0]) 146 | self.broker.broadcast(cmd, job.to_dict()) 147 | job.submitted_to.extend(self.broker.queue_ids) 148 | self.jobs.append(job) 149 | elif self.execution_type is ExecutionType.SingleModelMultiConf: 150 | for n, queue_id in enumerate(self.broker.queue_ids): 151 | job = Job(self.models[0], self.configs[n]) 152 | self.broker.send_to_queue(queue_id, cmd, job.to_dict()) 153 | job.submitted_to = [queue_id] 154 | self.jobs.append(job) 155 | elif self.execution_type is ExecutionType.MultiModelSingleConf: 156 | for n, queue_id in enumerate(self.broker.queue_ids): 157 | job = Job(self.models[n], self.configs[0]) 158 | self.broker.send_to_queue(queue_id, cmd, job.to_dict()) 159 | job.submitted_to = [queue_id] 160 | self.jobs.append(job) 161 | elif self.execution_type is ExecutionType.MultiModelMultiConf: 162 | for n, queue_id in enumerate(self.broker.queue_ids): 163 | job = Job(self.models[n], self.configs[n]) 164 | self.broker.send_to_queue(queue_id, cmd, job.to_dict()) 165 | job.submitted_to = [queue_id] 166 | self.jobs.append(job) 167 | 168 | return self.jobs 169 | 170 | async def wait_for_results(self) -> None: 171 | """ 172 | 173 | """ 174 | if not self.models_deployed: 175 | raise AssertionError('wait_for_results called but no models are deployed') 176 | if len(self.jobs) == 0: 177 | raise AssertionError('wait_for_results called but no jobs submitted') 178 | 179 | results = await self.broker.get_all_results(timeout=150.0) 180 | log.debug('Received results: {}'.format(results)) 181 | for job in self.jobs: 182 | for queue_id in job.submitted_to: 183 | for message in results[queue_id]: 184 | log.debug('Processing message on queue {}, result={}'.format(queue_id, message)) 185 | if message.get('failure') is not None: 186 | job.failure = message.get('failure') 187 | else: 188 | job.values = message['values'] 189 | job.results = message['states'] 190 | 191 | def scan_for_best_result(self, jobs: List[Job]) -> Tuple[Job, float, np.array]: 192 | """Get best performing job and it's results from a list of jobs 193 | 194 | Args: 195 | jobs: List of jobs to process 196 | 197 | Returns: 198 | A tuple of job, result and state at which the result was evaluated 199 | """ 200 | if len(jobs) < 1: 201 | raise AssertionError('Cannot scan for best result on empty jobs list') 202 | 203 | best_job = jobs[0] 204 | best_value, best_state = best_job.get_best_results() 205 | 206 | for job in jobs[1:]: 207 | value, state = job.get_best_results() 208 | 209 | if value < best_value: 210 | best_value = value 211 | best_state = state 212 | best_job = job 213 | 214 | return best_job, best_value, best_state 215 | -------------------------------------------------------------------------------- /csaopt/model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module offers the core CSAOpt modelling component: the :class:`~model.Model` class. 3 | """ 4 | import json 5 | 6 | from enum import Enum 7 | from typing import Dict, Any 8 | 9 | 10 | class Precision(Enum): 11 | """Enum for available calculation precisions""" 12 | Float32 = 'float32' 13 | Float64 = 'float64' 14 | 15 | 16 | class RandomDistribution(Enum): 17 | """Enum for available distributions of random values used during optimization""" 18 | Normal = 'normal' 19 | Uniform = 'uniform' 20 | 21 | 22 | class RequiredFunctions(Enum): 23 | """Enum for required functions that a model has to provide""" 24 | Initialize = 'initialize' 25 | GenerateNext = 'generate_next' 26 | Cool = 'cool' 27 | Evaluate = 'evaluate' 28 | Acceptance = 'acceptance_func' 29 | EmptyState = 'empty_state' 30 | 31 | 32 | class Model: 33 | """Core class containing functions and parameters for optimization 34 | 35 | Args: 36 | name: Optimization name 37 | dimensions: Number of dimensions of optimization domain 38 | precision: Required precision 39 | distribution: Required distribution of random values that will be provided by CSAOpt to the optimization 40 | opt_globals: Global variables available during optimization 41 | functions: Functions modelling the domain 42 | 43 | Attributes: 44 | name: Optimization name 45 | dimensions: Number of dimensions of optimization domain 46 | precision: Required precision 47 | distribution: Required distribution of random values that will be provided by CSAOpt to the optimization 48 | opt_globals: Global variables available during optimization 49 | functions: Functions modelling the domain 50 | """ 51 | 52 | @staticmethod 53 | def from_dict(d: Dict[str, Any]): 54 | """ 55 | Create model object from a dictionary (i.e. the serialized form) 56 | 57 | Args: 58 | d: Serialized model dictionary 59 | 60 | Returns: 61 | Model: A model object 62 | """ 63 | assert 'distribution' in d 64 | assert 'precision' in d 65 | assert 'globals' in d 66 | assert 'functions' in d 67 | 68 | distribution: RandomDistribution = d['distribution'] 69 | precision: Precision = d['precision'] 70 | 71 | return Model(d['name'], d['dimensions'], precision.value, distribution.value, d.get('globals', {}), 72 | d.get('state_shape', 1), d['functions']) 73 | 74 | def __init__(self, name: str, dimensions: int, precision: Precision, distribution: RandomDistribution, 75 | opt_globals: str, state_shape: int, functions: Dict[str, str]) -> None: 76 | self.name: str = name 77 | self.dimensions: int = dimensions 78 | self.distribution: RandomDistribution = distribution 79 | self.precision: Precision = precision 80 | self.globals: str = opt_globals 81 | self.state_shape: int = state_shape 82 | self.functions: Dict[str, str] = functions 83 | 84 | def to_dict(self) -> Dict[str, Any]: 85 | """Serializes model to dictionary (e.g. for transmission to workers) 86 | 87 | Returns: 88 | Dictionary representation of model 89 | """ 90 | return { 91 | 'name': self.name, 92 | 'dimensions': self.dimensions, 93 | 'distribution': self.distribution.value, 94 | 'precision': self.precision.value, 95 | 'globals': self.globals, 96 | 'state_shape': self.state_shape, 97 | 'functions': self.functions 98 | } 99 | 100 | def __repr__(self) -> str: 101 | return json.dumps(self, indent=4) 102 | -------------------------------------------------------------------------------- /csaopt/model_loader/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module offers functionality regarding the loading and validation of optimiztion models. 3 | """ 4 | 5 | __all__ = ['model_loader', 'model_validator'] 6 | 7 | 8 | class ValidationError(Exception): 9 | """Exception class for function validation errors 10 | 11 | Args: 12 | message: Error message 13 | """ 14 | 15 | def __init__(self, message: str) -> None: 16 | # Call the base class constructor with the parameters it needs 17 | super().__init__(message) 18 | -------------------------------------------------------------------------------- /csaopt/model_loader/model_loader.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import logging 3 | import inspect 4 | 5 | from types import ModuleType 6 | from pyhocon import ConfigTree 7 | from typing import Dict, List, Callable, Any, Optional 8 | 9 | from . import ValidationError 10 | from .model_validator import ModelValidator 11 | from ..model import Model, RequiredFunctions, Precision, RandomDistribution 12 | from ..utils import random_str 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def _get_precision_with_default(name: str, default: str) -> Precision: 18 | return Precision[name.casefold()] if Precision[name.casefold()] is not None else Precision[default] 19 | 20 | 21 | def _get_distribution_with_default(name: str, default: str) -> RandomDistribution: 22 | return RandomDistribution[ 23 | name.casefold()] if RandomDistribution[name.casefold()] is not None else RandomDistribution[default] 24 | 25 | 26 | class ModelLoader(): 27 | """Class responsible for loading the provided optimization model into the internal representation of a model. 28 | 29 | The input model is loaded as a python module. After validation, each function's source code is extracted and 30 | packed into a :class:`model.Model` object. 31 | 32 | Args: 33 | conf: Configuration of optimization run 34 | internal_conf: Internal CSAOpt Configuration 35 | validator: Instance of ModelValidator that should be used for validation 36 | """ 37 | 38 | def __init__(self, conf: ConfigTree, internal_conf: ConfigTree, validator=ModelValidator()) -> None: 39 | if 'model' not in conf: 40 | raise AssertionError('`model` section missing in configuration') 41 | 42 | self.model_path = conf['model.path'] 43 | 44 | model_name = conf.get('model.name', 'optimization_' + random_str(8)) 45 | 46 | # TODO: interpreting arbitrary code is a bad idea. This should, in the least, 47 | # do one pass of validation, maybe checking for forbidden keywords. 48 | self.model_module: ModuleType = self._create_module(model_name, self.model_path) 49 | self.globals_token = internal_conf.get('model.validation.globals_token', '# -- Globals') 50 | 51 | functions: Dict[str, Callable] = self._extract_functions(self.model_module) 52 | opt_globals = self._extract_globals(self.model_path) 53 | self.errors: List[ValidationError] = [] 54 | 55 | if not conf.get('model.skip_typecheck'): 56 | logger.debug('Skipping typecheck') 57 | typecheck_error = validator.validate_typing(self.model_path) 58 | if typecheck_error is not None: 59 | self.errors.append(typecheck_error) 60 | 61 | self.errors.extend(validator.validate_functions(functions, internal_conf)) 62 | 63 | model_params = self._extract_model_params(conf.get('model'), internal_conf) 64 | 65 | if len(self.errors) == 0: 66 | self.model = self._create_model(model_name, model_params['dimensions'], model_params['precision'], 67 | model_params['distribution'], self.model_module, opt_globals, functions) 68 | else: 69 | logger.error('Validation failed for model `{}`: {}'.format(self.model_path, self.errors)) 70 | 71 | def _extract_globals(self, model_path: str) -> str: 72 | """Extracts the globals section from a model file 73 | 74 | Model files may have a globals section that will be carried over to the worker machines. This will be extracted 75 | here. 76 | """ 77 | with open(model_path, 'r') as model_file: 78 | model_source_lines = model_file.read().splitlines() 79 | token_idxs = [idx for idx, line in enumerate(model_source_lines) if self.globals_token in line] 80 | if len(token_idxs) == 2 and token_idxs[0] != token_idxs[1]: 81 | begin, end = token_idxs 82 | return '\n'.join(model_source_lines[begin + 1:end]) 83 | return '' 84 | 85 | def _extract_model_params(self, m_conf: ConfigTree, internal_conf: ConfigTree) -> Dict[str, Any]: 86 | if 'dimensions' not in m_conf: 87 | raise AssertionError('`dimensions` parameter must be provided in model configuration') 88 | return { 89 | 'dimensions': int(m_conf['dimensions']), 90 | 'precision': Precision(m_conf.get('precision', internal_conf['model.defaults.precision'])), 91 | 'distribution': RandomDistribution( 92 | m_conf.get('distribution', internal_conf['model.defaults.distribution'])) 93 | } 94 | 95 | def _create_model(self, name: str, dimensions: int, precision: Precision, distribution: RandomDistribution, 96 | module: ModuleType, opt_globals: str, functions: Dict[str, Callable]) -> Model: 97 | """Creates a :class:`model.Model` object containing all relevant information for an optimization run 98 | 99 | Args: 100 | name: Name of optimization 101 | module: Module containing the optimization functions that were provided by the user 102 | opt_globals: Global variables that should be available during optimization 103 | functions: Map of function name to function object of all required optimization functions 104 | 105 | Returns: 106 | Internal representation of a Model. Ready to be transmitted to the workers. 107 | """ 108 | return Model( 109 | name, 110 | dimensions, 111 | precision, 112 | distribution, 113 | opt_globals, 114 | len(module.empty_state()), # type: ignore 115 | # The model is prepared for sending it to the workers 116 | # and contains raw source instead of the real python functions 117 | {f_name: inspect.getsource(functions[f_name]) 118 | for f_name in functions.keys()}) 119 | 120 | def _extract_functions(self, module: ModuleType) -> Dict[str, Callable]: 121 | """Extracts required functions from the intermediate python module 122 | 123 | Args: 124 | module: Module into which the provided optimization functions were interpreted into 125 | 126 | Returns: 127 | Dictionary of function name to function object of all required functions 128 | """ 129 | functions: Dict[str, Callable] = {} 130 | 131 | for func in RequiredFunctions: 132 | functions[func.value] = module.__getattribute__(func.value) 133 | 134 | return functions 135 | 136 | def get_model(self) -> Optional[Model]: 137 | try: 138 | return self.model 139 | except AttributeError: 140 | return None 141 | 142 | def _create_module(self, name: str, file: str) -> ModuleType: 143 | """Interprets a given file into a python module 144 | 145 | Args: 146 | name: Name of module to be created 147 | file: Path to file 148 | 149 | Returns: 150 | Python module that contains the interpreted code of the input file 151 | """ 152 | module = imp.load_source(name, file) 153 | 154 | if module is None: 155 | raise AssertionError('Model could not be loaded.') 156 | 157 | return module 158 | -------------------------------------------------------------------------------- /csaopt/model_loader/model_validator.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import subprocess 3 | 4 | from pyhocon import ConfigTree 5 | from typing import Optional, List, Dict, Callable 6 | from ..model import RequiredFunctions 7 | from . import ValidationError 8 | 9 | 10 | def _empty_function(): 11 | pass 12 | 13 | 14 | class ModelValidator: 15 | 16 | empty_function_bytecode = _empty_function.__code__.co_code 17 | 18 | # TODO: review these 19 | required_param_counts = { 20 | 'initialize': 2, 21 | 'generate_next': 4, 22 | 'cool': 3, 23 | 'evaluate': 1, 24 | 'acceptance_func': 4, 25 | 'empty_state': 0 26 | } 27 | 28 | def validate_functions(self, functions: Dict[str, Callable], internal_config: ConfigTree) -> List[ValidationError]: 29 | """Run validators on optimization functions 30 | 31 | Args: 32 | functions: Dictionary mapping function name to function object 33 | internal_config: Internal CSAOpt configuration 34 | 35 | Returns: 36 | A list of ValidationsErrors. This list will be empty when all restrictions are met. 37 | """ 38 | errors: List[ValidationError] = [] 39 | 40 | self.reserved_keywords: List[str] = internal_config['model.validation.reserved_keywords'] 41 | 42 | for func in RequiredFunctions: 43 | val_errors = self._validate_function(func.value, functions[func.value], 44 | self.required_param_counts[func.value]) 45 | 46 | errors.extend([err for err in val_errors if err is not None]) 47 | 48 | return errors 49 | 50 | def _validate_function(self, name: str, fun: Callable, param_count: int) -> List[Optional[ValidationError]]: 51 | """Run all validators on the input function 52 | 53 | Args: 54 | name: Name of function 55 | fun: Function object 56 | param_count: Number of expected function arguments 57 | """ 58 | return [ 59 | self._validate_missing_fun(name, fun), 60 | self._validate_empty_fun(name, fun), 61 | # TODO review if this is required 62 | self._validate_return_statement(name, fun), 63 | self._validate_fun_signature_len(name, fun, param_count), 64 | self._check_for_reserved_keywords(name, fun) 65 | ] 66 | 67 | def validate_typing(self, file_path: str) -> Optional[ValidationError]: 68 | """Validates the input file using mypy 69 | 70 | Args: 71 | file_path: Path to file 72 | 73 | Returns: 74 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 75 | """ 76 | mypy_result = subprocess.run(['mypy', file_path], stdout=subprocess.PIPE) 77 | 78 | if mypy_result.returncode != 0: 79 | return ValidationError(mypy_result.stdout.decode('utf-8')) 80 | return None 81 | 82 | def _check_for_reserved_keywords(self, name: str, fun: Callable) -> Optional[ValidationError]: 83 | """Returns ValidationError if function contains reserved keywords 84 | 85 | Args: 86 | name: Name of function that is checked 87 | fun: Function object to be checked 88 | 89 | Returns: 90 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 91 | """ 92 | for reserved_keyword in self.reserved_keywords: 93 | if reserved_keyword in inspect.getsource(fun): 94 | return ValidationError('Reserved Keyword {} found in function \'{}\''.format(reserved_keyword, name)) 95 | return None 96 | 97 | def _validate_missing_fun(self, name: str, fun: Callable) -> Optional[ValidationError]: 98 | """Returns a ValidationError if function is missing 99 | 100 | Args: 101 | name: Name of function that is checked 102 | fun: Function object to be checked 103 | 104 | Returns: 105 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 106 | """ 107 | if fun is None: 108 | return ValidationError('Definition of function `{}` not found.'.format(name)) 109 | return None 110 | 111 | def _validate_empty_fun(self, name: str, fun: Callable) -> Optional[ValidationError]: 112 | """Returns a ValidationError if function has no body (i.e. only pass, return) 113 | 114 | Args: 115 | name: Name of function that is checked 116 | fun: Function object to be checked 117 | 118 | Returns: 119 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 120 | """ 121 | if fun.__code__.co_code == self.empty_function_bytecode: 122 | return ValidationError('Definition of function `{}` is empty.'.format(name)) 123 | return None 124 | 125 | def _validate_fun_signature_len(self, name: str, fun: Callable, num_params: int) -> Optional[ValidationError]: 126 | """Validates that a given function accepts the correct number of arguments 127 | 128 | Args: 129 | name: Name of function that is checked 130 | fun: Function object to be checked 131 | 132 | Returns: 133 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 134 | """ 135 | if len(inspect.signature(fun).parameters) != num_params: 136 | return ValidationError( 137 | 'Signature of `{}` has an incorrect number of parameters (expected {}, found {})'.format( 138 | name, num_params, len(inspect.signature(fun).parameters))) 139 | return None 140 | 141 | def _validate_return_statement(self, name: str, fun: Callable) -> Optional[ValidationError]: 142 | """Validates that a given function includes a return statement 143 | 144 | Args: 145 | name: Name of function that is checked 146 | fun: Function object to be checked 147 | 148 | Returns: 149 | :class:`~model_loader.ValidationError` if validation fails, otherwise `None` 150 | """ 151 | if 'return' not in inspect.getsource(fun): 152 | return ValidationError('Body of function `{}` does not contain a `return` statement. '.format(name)) 153 | return None 154 | -------------------------------------------------------------------------------- /csaopt/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import string 3 | import requests 4 | import os 5 | import logging 6 | 7 | from typing import Optional 8 | from random import choice, randint 9 | from pyhocon import ConfigFactory 10 | from pyhocon.config_tree import ConfigTree 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | def docker_available() -> bool: 16 | try: 17 | import docker 18 | df = docker.from_env().df() 19 | return df is not None 20 | except Exception: 21 | return False 22 | 23 | 24 | def is_pytest_run() -> bool: 25 | return os.environ.get('UNIT_TESTS') == '1' 26 | 27 | 28 | def random_int(lower: int, upper: int) -> int: 29 | return randint(lower, upper) 30 | 31 | 32 | def random_str(length: int) -> str: 33 | """ 34 | Generates a random string using ascii letters and digits 35 | """ 36 | chars = string.ascii_letters + string.digits 37 | return ''.join(choice(chars) for x in range(length)) 38 | 39 | 40 | def internet_connectivity_available(host: str = "8.8.8.8", port: int = 53, timeout_seconds: float = 3.0) -> bool: 41 | """ 42 | Checks if internet connectivity is available. 43 | 44 | Default values opening connection to the Google DNS server at: 45 | Host: 8.8.8.8 (google-public-dns-a.google.com) 46 | OpenPort: 53/tcp 47 | Service: domain (DNS/TCP) 48 | Source: https://stackoverflow.com/a/33117579/2822762 49 | """ 50 | try: 51 | socket.setdefaulttimeout(timeout_seconds) 52 | socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) 53 | return True 54 | except Exception as e: 55 | log.exception('Exception in internet_connectivity_available()') 56 | return False 57 | 58 | 59 | def get_configs(conf_path: str) -> Optional[ConfigTree]: 60 | """Parse a hocon file into a ConfigTree""" 61 | return ConfigFactory.parse_file(conf_path) 62 | 63 | 64 | def get_free_tcp_port() -> Optional[int]: 65 | """Get a free tcp port from the OS""" 66 | try: 67 | tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 68 | tcp.bind(('', 0)) 69 | addr, port = tcp.getsockname() 70 | tcp.close() 71 | return port 72 | except Exception: 73 | log.exception('Exception in get_free_tcp_port()') 74 | return None 75 | 76 | 77 | def clamp(min_val, val, max_val) -> float: 78 | return max(min_val, min(max_val, val)) 79 | 80 | 81 | class FakeCuda(): 82 | def __init__(self): 83 | pass 84 | 85 | def jit(*args, **kwargs): 86 | def f(fun): 87 | return fun 88 | 89 | return f 90 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/stable/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'CSAOpt' 23 | copyright = '2018, David Sere' 24 | author = 'David Sere' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.intersphinx', 44 | 'sphinx.ext.todo', 45 | 'sphinx.ext.coverage', 46 | 'sphinx.ext.mathjax', 47 | 'sphinx.ext.ifconfig', 48 | 'sphinx.ext.viewcode', 49 | ] 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ['_templates'] 53 | 54 | # The suffix(es) of source filenames. 55 | # You can specify multiple suffix as a list of string: 56 | # 57 | # source_suffix = ['.rst', '.md'] 58 | source_suffix = '.rst' 59 | 60 | # The master toctree document. 61 | master_doc = 'index' 62 | 63 | # The language for content autogenerated by Sphinx. Refer to documentation 64 | # for a list of supported languages. 65 | # 66 | # This is also used if you do content translation via gettext catalogs. 67 | # Usually you set "language" from the command line for these cases. 68 | language = None 69 | 70 | # List of patterns, relative to source directory, that match files and 71 | # directories to ignore when looking for source files. 72 | # This pattern also affects html_static_path and html_extra_path . 73 | exclude_patterns = [] 74 | 75 | # The name of the Pygments (syntax highlighting) style to use. 76 | pygments_style = 'sphinx' 77 | 78 | 79 | # -- Options for HTML output ------------------------------------------------- 80 | 81 | # The theme to use for HTML and HTML Help pages. See the documentation for 82 | # a list of builtin themes. 83 | # 84 | html_theme = 'alabaster' 85 | 86 | # Theme options are theme-specific and customize the look and feel of a theme 87 | # further. For a list of options available for each theme, see the 88 | # documentation. 89 | # 90 | # html_theme_options = {} 91 | 92 | # Add any paths that contain custom static files (such as style sheets) here, 93 | # relative to this directory. They are copied after the builtin static files, 94 | # so a file named "default.css" will overwrite the builtin "default.css". 95 | html_static_path = ['_static'] 96 | 97 | # Custom sidebar templates, must be a dictionary that maps document names 98 | # to template names. 99 | # 100 | # The default sidebars (for documents that don't match any pattern) are 101 | # defined by theme itself. Builtin themes are using these templates by 102 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 103 | # 'searchbox.html']``. 104 | # 105 | # html_sidebars = {} 106 | 107 | 108 | # -- Options for HTMLHelp output --------------------------------------------- 109 | 110 | # Output file base name for HTML help builder. 111 | htmlhelp_basename = 'CSAOptdoc' 112 | 113 | 114 | # -- Options for LaTeX output ------------------------------------------------ 115 | 116 | latex_elements = { 117 | # The paper size ('letterpaper' or 'a4paper'). 118 | # 119 | # 'papersize': 'letterpaper', 120 | 121 | # The font size ('10pt', '11pt' or '12pt'). 122 | # 123 | # 'pointsize': '10pt', 124 | 125 | # Additional stuff for the LaTeX preamble. 126 | # 127 | # 'preamble': '', 128 | 129 | # Latex figure (float) alignment 130 | # 131 | # 'figure_align': 'htbp', 132 | } 133 | 134 | # Grouping the document tree into LaTeX files. List of tuples 135 | # (source start file, target name, title, 136 | # author, documentclass [howto, manual, or own class]). 137 | latex_documents = [ 138 | (master_doc, 'CSAOpt.tex', 'CSAOpt Documentation', 139 | 'David Sere', 'manual'), 140 | ] 141 | 142 | 143 | # -- Options for manual page output ------------------------------------------ 144 | 145 | # One entry per manual page. List of tuples 146 | # (source start file, name, description, authors, manual section). 147 | man_pages = [ 148 | (master_doc, 'csaopt', 'CSAOpt Documentation', 149 | [author], 1) 150 | ] 151 | 152 | 153 | # -- Options for Texinfo output ---------------------------------------------- 154 | 155 | # Grouping the document tree into Texinfo files. List of tuples 156 | # (source start file, target name, title, author, 157 | # dir menu entry, description, category) 158 | texinfo_documents = [ 159 | (master_doc, 'CSAOpt', 'CSAOpt Documentation', 160 | author, 'CSAOpt', 'One line description of project.', 161 | 'Miscellaneous'), 162 | ] 163 | 164 | 165 | # -- Extension configuration ------------------------------------------------- 166 | 167 | # -- Options for intersphinx extension --------------------------------------- 168 | 169 | # Example configuration for intersphinx: refer to the Python standard library. 170 | intersphinx_mapping = {'https://docs.python.org/': None} 171 | 172 | # -- Options for todo extension ---------------------------------------------- 173 | 174 | # If true, `todo` and `todoList` produce output, else they produce nothing. 175 | todo_include_todos = True -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. CSAOpt documentation master file, created by 2 | sphinx-quickstart on Sat May 19 22:02:24 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to CSAOpt's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /environment.dev.yml: -------------------------------------------------------------------------------- 1 | name: csaopt 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.6.4 6 | - pip 7 | - pip: 8 | - msgpack==0.5.6 9 | - click==7.0 10 | - numpy==1.16.4 11 | - msgpack-numpy==0.4.4.1 12 | - sortedcontainers==2.0.4 13 | - dramatiq[redis, watch]==1.3.0 14 | - boto3==1.9.161 15 | - moto==1.3.8 16 | - async-timeout==3.0.0 17 | - apscheduler==3.5.3 18 | - sty==1.0.0b7 19 | - pyhocon==0.3.44 20 | - mypy==0.620 21 | - pytest==3.8.1 22 | - pytest-cov==2.6.0 23 | - pytest-timeout==1.3.2 24 | - pytest-mock==1.10.0 25 | - pytest-asyncio==0.10.0 26 | - Sphinx==1.8.1 27 | - coverage==4.5.1 28 | - coveralls==1.5.1 29 | - flake8==3.4.1 30 | - docker==3.5.0 31 | - yapf==0.24.0 32 | - better-exceptions==0.2.2 33 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: csaopt 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.6.4 6 | - pip 7 | - pip: 8 | - msgpack==0.5.6 9 | - click==7.0 10 | - numpy==1.16.4 11 | - msgpack-numpy==0.4.4.1 12 | - sortedcontainers==2.0.4 13 | - dramatiq[redis]==1.3.0 14 | - boto3==1.9.161 15 | - async-timeout==3.0.0 16 | - apscheduler==3.5.3 17 | - sty==1.0.0b7 18 | - pyhocon==0.3.44 19 | - mypy==0.620 20 | - better-exceptions==0.2.2 21 | -------------------------------------------------------------------------------- /examples/ackley/ackley.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = ackley_100_2d 4 | 5 | save_to_file { 6 | type = all # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | dimensions = 2 13 | precision = float32 14 | distribution = normal 15 | } 16 | 17 | optimization { 18 | max_steps = 1000 19 | initial_temp = 1000.0 20 | thread_count = 256 21 | random_seed = 424242 22 | min_temp = 1e-35 23 | } 24 | 25 | # debug { 26 | # gpu_simulator = True 27 | # } 28 | 29 | remote { 30 | # local_docker = True 31 | platform = aws 32 | aws { 33 | region = eu-central-1 34 | # These will be picked up from ~/.aws/credentials or ENV 35 | # secret_key = 123 36 | # access_key = 123 37 | 38 | worker_count = 1 39 | timeout = 100000 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /examples/ackley/ackley_opt.py: -------------------------------------------------------------------------------- 1 | """Ackley Function 2 | https://www.sfu.ca/~ssurjano/ackley.html 3 | 4 | Description: 5 | Dimensions: d 6 | 7 | The Ackley function is widely used for testing optimization algorithms. 8 | In its two-dimensional form, as shown in the plot [at the link above], it is characterized by a nearly flat outer 9 | region, and a large hole at the centre. The function poses a risk for optimization algorithms, particularly 10 | hillclimbing algorithms, to be trapped in one of its many local minima. 11 | 12 | Recommended variable values are: a = 20, b = 0.2 and c = 2pi. 13 | 14 | Input Domain: 15 | The function is usually evaluated on the hypercube xi in [-32.768, 32.768], for all i = 1, ..., d, 16 | although it may also be restricted to a smaller domain. 17 | 18 | Global Minimum: 19 | f(x*) = 0, at x* = (0, ..., 0) 20 | 21 | 22 | References: 23 | 24 | Adorio, E. P., & Diliman, U. P. MVF - Multivariate Test Functions Library in C for Unconstrained Global Optimization 25 | (2005). Retrieved June 2013, from http://http://www.geocities.ws/eadorio/mvf.pdf. 26 | 27 | Molga, M., & Smutnicki, C. Test functions for optimization needs (2005). 28 | Retrieved June 2013, from http://www.zsd.ict.pwr.wroc.pl/files/docs/functions.pdf. 29 | 30 | Back, T. (1996). Evolutionary algorithms in theory and practice: evolution strategies, evolutionary programming, 31 | genetic algorithms. Oxford University Press on Demand. Global Optimization Test Functions Index. Retrieved June 32 | 2013, from http://infinity77.net/global_optimization/test_functions.html#test-functions-index. 33 | """ 34 | 35 | import math 36 | 37 | from csaopt.utils import clamp 38 | from csaopt.utils import FakeCuda as cuda 39 | from typing import MutableSequence, Sequence, Any, Tuple 40 | from math import pi 41 | 42 | # -- Globals 43 | 44 | a = 20 45 | b = 0.2 46 | c = 2 * pi 47 | upper_bound = 32.768 48 | lower_bound = -32.768 49 | max_steps = 1000 50 | 51 | 52 | @cuda.jit(device=True, inline=True) 53 | def copy_state(b, a): 54 | for i in range(len(b)): 55 | a[i] = b[i] 56 | 57 | 58 | @cuda.jit(device=True) 59 | def scale(val, old_min, old_max, new_min, new_max): 60 | return (val - old_min) / (old_max - old_min) * (new_max - new_min) + new_min 61 | 62 | 63 | # -- Globals 64 | 65 | # Configuration 66 | 67 | 68 | def empty_state() -> Tuple: 69 | return (0.0, 0.0) 70 | 71 | 72 | # Functions 73 | 74 | 75 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 76 | return (1 - 0.14) * old_temp 77 | 78 | 79 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> float: 80 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 81 | x = clamp(-80, -(e_new - e_old) / temp, 0.1) 82 | return math.exp(x) >= rnd 83 | 84 | 85 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 86 | for i in range(len(state)): 87 | state[i] = scale(randoms[i], 0.0, 1.0, lower_bound, upper_bound) 88 | return 89 | 90 | 91 | def evaluate(state: Sequence) -> float: 92 | d = len(state) 93 | t1_sum = 0.0 94 | t2_sum = 0.0 95 | for i in range(d): 96 | t1_sum += state[i] * state[i] 97 | t2_sum += math.cos(c * state[i]) 98 | t1 = -a * math.exp(-b * math.sqrt(t1_sum / d)) 99 | t2 = math.exp(t2_sum / d) 100 | return t1 - t2 + a + 2.71828182846 101 | 102 | 103 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float], step) -> Any: 104 | # i = int(randoms[0] * len(state)) % len(state) 105 | # delta = (randoms[dim] - 0.5) * 10 * (1 - float(step) / max_steps) 106 | d = len(state) 107 | for dim in range(d): 108 | if ((randoms[dim] * 100000) % 1 < 0.33): 109 | # skip with probability 0.66 110 | continue 111 | 112 | delta = scale(randoms[dim], 0.0, 1.0, lower_bound, upper_bound) * (1 - float(step) / (max_steps * 1.1)) 113 | new_val = state[dim] + delta 114 | 115 | if new_val > 10 or new_val < -5: 116 | new_val = clamp(-5, state[dim] + delta, 10) 117 | 118 | new_state[dim] = new_val 119 | return # empty return required by validator 120 | -------------------------------------------------------------------------------- /examples/bukin_6/bukin_6_opt.py: -------------------------------------------------------------------------------- 1 | # Bukin Function #6 2 | # https://www.sfu.ca/~ssurjano/bukin6.html 3 | # 4 | # Dimensions: 2 5 | # The sixth Bukin function has many local minima, all of which lie in a ridge. 6 | 7 | # Input Domain: 8 | # The function is usually evaluated on the rectangle x1 in [-15, -5], x2 in [-3, 3]. 9 | 10 | # Global Minimum: 11 | # f(x*) = 0 at x* = (-10, 1) 12 | 13 | # Reference: 14 | # Global Optimization Test Functions Index. Retrieved June 2013, from http://infinity77.net/global_optimization/test_functions.html#test-functions-index. 15 | 16 | import math 17 | 18 | from csaopt.model import RandomDistribution, Precision 19 | from csaopt.utils import clamp 20 | from typing import MutableSequence, Sequence, Any, Tuple 21 | from math import pi 22 | 23 | # -- Globals 24 | 25 | # -- Globals 26 | 27 | # Configuration 28 | 29 | 30 | def distribution() -> RandomDistribution: 31 | return RandomDistribution.Uniform 32 | 33 | 34 | def precision() -> Precision: 35 | return Precision.Float32 36 | 37 | 38 | def dimensions() -> int: 39 | return 2 40 | 41 | 42 | def empty_state() -> Tuple: 43 | return (0.0, 0.0) 44 | 45 | 46 | # Functions 47 | 48 | 49 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 50 | return initial_temp * math.pow(0.97, step) 51 | 52 | 53 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> float: 54 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 55 | x = clamp(-80, (e_old - e_new) / temp, 0.1) 56 | return math.exp(x) > rnd 57 | 58 | 59 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 60 | generate_next(state, state, randoms, 0) # just delegate to generate_next 61 | return 62 | 63 | 64 | def evaluate(state: Sequence) -> float: 65 | x1 = state[0] 66 | x2 = state[1] 67 | return 100 * math.sqrt(abs(x2 - 0.01 * x1 * x1)) + 0.01 * abs(x1 + 10) 68 | 69 | 70 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float], step) -> Any: 71 | new_state[0] = clamp(-15, (randoms[0] * 20) - 15, -5) 72 | new_state[1] = clamp(-3, (randoms[1] * 6) - 3, 3) 73 | return 74 | -------------------------------------------------------------------------------- /examples/bukin_6/buking_6.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = bukin6_10_10000 4 | 5 | save_to_file { 6 | type = best # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | } 13 | 14 | optimization { 15 | thread_count = 8 16 | initial_temp = 10.0, 17 | # random_seed = -919, 18 | max_steps = 12000 19 | } 20 | 21 | debug { 22 | gpu_simulator: enabled 23 | } 24 | 25 | remote { 26 | local_docker = True 27 | # platform = aws 28 | # use_existing_instances = false 29 | # terminate_on_exit = true 30 | aws { 31 | region = eu-central-1 32 | # # These will be picked up from ~/.aws/credentials or ENV 33 | # secret_key = 123 34 | # access_key = 123 35 | worker_instance_type = g2.2xlarge 36 | broker_instance_type = m5.2xlarge 37 | worker_count = 1 38 | timeout_provision = 20000 39 | timeout_startup = 10000 40 | timeout_deploy = 10000 41 | timeout_optimization = -1 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /examples/hp/hp_opt.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from typing import List, Tuple, MutableSequence, Sequence, Collection, Any 4 | 5 | from csaopt.utils import clamp 6 | 7 | Monomer = Tuple[int, int, int, int] 8 | Chain2d = List[Monomer] 9 | 10 | # -- Globals 11 | 12 | hp_str = 'PHHPPHPHPPHPHPHPPHPPHHHHH' 13 | eps = -1 14 | h_idxs = [idx for idx, mm in enumerate(hp_str) if mm == 'H'] 15 | 16 | 17 | # @numba.cuda.jit(inline=True, device=True) 18 | def is_valid_conformation(chain: Chain2d) -> bool: 19 | row_max = len(chain) 20 | for i in range(row_max): 21 | for j in range(i + 1, row_max): 22 | d = (chain[i][1] - chain[j][1])**2 + (chain[i][2] - chain[j][2])**2 23 | if d < 1.0: 24 | return False 25 | return True 26 | 27 | 28 | # -- Globals 29 | 30 | 31 | def empty_state() -> Collection: 32 | return [(0, 0, 0, 0)] * len(hp_str) 33 | 34 | 35 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 36 | return initial_temp * math.pow(0.97, step) 37 | 38 | 39 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> float: 40 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 41 | x = clamp(-80, (e_old - e_new) / temp, 0.1) 42 | return math.exp(x) > rnd 43 | 44 | 45 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 46 | generate_next(state, state, randoms, 0) # just delegate to generate_next 47 | 48 | 49 | def evaluate(state: Sequence) -> float: 50 | num_hs = len(h_idxs) 51 | num_contacts = 0 52 | # contacts: List[Tuple[int, int]] = [] 53 | for i in range(num_hs): 54 | for j in range(i + 1, num_hs): 55 | h2 = h_idxs[j] 56 | h1 = h_idxs[i] 57 | if (h2 - h1) >= 3: 58 | # if the distance between the two hydrophobic monomers is greater than 3, they could be in contact 59 | d = float(state[h1][1] - state[h2][1])**2 +\ 60 | float(state[h1][2] - state[h2][2])**2 # euclidean distance 61 | if d < 1.05: # if the distance is one, they are in contact 62 | num_contacts += 1 63 | 64 | return num_contacts * eps 65 | 66 | 67 | def rigid_rotation(chain: Chain2d, idx: int = 0, clckwise: bool = False): 68 | rot = 1 if clckwise else -1 69 | 70 | # Mutate the rest of the chain by the chosen rotation, starting from idx 71 | for i in range(idx, len(chain)): 72 | chain[i][3] = (chain[i][3] + rot) % 4 # type: ignore 73 | 74 | 75 | def crankshaft(chain: Chain2d, idx: int): 76 | tmp1 = chain[idx][3] 77 | tmp2 = chain[idx + 2][3] 78 | if tmp1 != tmp2: 79 | chain[idx][3] = tmp2 # type: ignore 80 | chain[idx + 2][3] = tmp1 # type: ignore 81 | 82 | 83 | def three_bead_flip(chain: Chain2d, idx: int): 84 | tmp1 = chain[idx][3] 85 | tmp2 = chain[idx + 1][3] 86 | if tmp1 != tmp2: 87 | chain[idx][3] = tmp2 # type: ignore 88 | chain[idx + 1][3] = tmp1 # type: ignore 89 | 90 | 91 | def generate_next(state: Sequence, new_state: Chain2d, randoms: Sequence[float], step) -> Any: 92 | len_randoms = len(randoms) 93 | n = 0 94 | while n <= 100: 95 | idx = int(math.floor((len(state) - 1.0001) * randoms[n % len_randoms])) 96 | for i in range(len(state)): 97 | new_state[i] = state[i] 98 | 99 | if randoms[1] < 0.3 or idx > (len(state) - 3): 100 | # if the vec index is on the end, do an end flip 101 | clckwise = randoms[2] < 0.5 102 | rigid_rotation(new_state, idx, clckwise=clckwise) 103 | elif randoms[1] < 0.66: 104 | # do a three-bead flip, i.e. switch two adjacent {n,e,w,s} directions 105 | crankshaft(new_state, idx) 106 | else: 107 | three_bead_flip(new_state, idx) 108 | 109 | if is_valid_conformation(new_state): 110 | break 111 | 112 | n += 1 113 | -------------------------------------------------------------------------------- /examples/hp/render.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | import plotly.graph_objs as go 3 | import networkx as nx 4 | import math 5 | import multiprocessing 6 | 7 | from typing import List, Tuple 8 | 9 | Chain = List[List[int]] 10 | 11 | 12 | def render_plotly(chain: Chain, contacts: List[Tuple[int, int]], filename='') -> None: 13 | hp_len = len(chain) 14 | scale_factor = max(8, hp_len / 4) 15 | 16 | contact_edges = [] 17 | for contact in contacts: 18 | print('Processing contact') 19 | coord1 = chain[contact[0]] 20 | coord2 = chain[contact[1]] 21 | contact_edges.append( 22 | go.Scatter( 23 | hoverinfo='none', 24 | x=[coord1[1], coord2[1]], 25 | y=[coord1[2], coord2[2]], 26 | line=dict(width=8, color='rgb(183,183,183,0.3)', dash='dot'), 27 | )) 28 | 29 | Xbe = [coord[1] for coord in chain] 30 | Ybe = [coord[2] for coord in chain] 31 | backbone_edges = go.Scatter( 32 | hoverinfo='none', 33 | x=Xbe, 34 | y=Ybe, 35 | line=dict(width=2 * scale_factor, color='black'), 36 | ) 37 | 38 | Xn = [coord[1] for coord in chain] 39 | Yn = [coord[2] for coord in chain] 40 | # for idx, coord in enumerate(coords): 41 | # print('coord[{}][1] + coord[{}][2] = {}', idx, idx, coord[idx][1] + coord[idx][2]) 42 | print(Xn) 43 | print(Yn) 44 | node_trace = go.Scatter( 45 | hoverinfo='none', 46 | x=Xn, 47 | y=Yn, 48 | text=['{}'.format(i) for i in range(len(chain))], 49 | textposition='middle center', 50 | textfont=dict(size=2 * scale_factor, color='rgb(160,160,160)'), 51 | line={}, 52 | mode='markers+text', 53 | marker=dict( 54 | showscale=False, 55 | # colorscale options 56 | # 'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' | 57 | # 'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' | 58 | # 'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' | 59 | # colorscale='Blackbody', 60 | reversescale=True, 61 | color=[], 62 | line=dict(width=scale_factor, color='black'), 63 | size=8 * scale_factor, 64 | )) 65 | 66 | node_trace['marker']['color'] = list(map(lambda c: 'white' if c[0] == 1 else 'rgb(70,70,70)', chain)) 67 | 68 | fig = go.Figure( 69 | data=[ 70 | backbone_edges, 71 | *contact_edges, 72 | node_trace, 73 | ], 74 | layout=go.Layout( 75 | autosize=False, 76 | width=1000, 77 | height=1000, 78 | showlegend=False, 79 | xaxis=dict(showgrid=True, zeroline=False, showticklabels=False), 80 | yaxis=dict(showgrid=True, zeroline=False, showticklabels=False, scaleanchor="x", scaleratio=1))) 81 | 82 | try: 83 | current_proc = multiprocessing.current_process() 84 | if filename == '': 85 | filename = 'hp_plot_' + str(current_proc.pid) 86 | except Exception: 87 | pass 88 | plotly.offline.plot(fig, filename=filename + '.html') 89 | -------------------------------------------------------------------------------- /examples/langermann/langermann_opt.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = langermann_5000_1000 4 | 5 | save_to_file { 6 | type = best # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | precision = float32 13 | dimensions = 2 14 | distribution = normal 15 | } 16 | 17 | optimization { 18 | initial_temp = 10.0, 19 | random_seed = -919, 20 | max_steps = 10000 21 | } 22 | 23 | remote { 24 | # local_docker = True 25 | platform = aws 26 | # use_existing_instances = false 27 | # terminate_on_exit = true 28 | aws { 29 | region = eu-central-1 30 | # # These will be picked up from ~/.aws/credentials or ENV 31 | # secret_key = 123 32 | # access_key = 123 33 | worker_instance_type = g2.2xlarge 34 | broker_instance_type = m5.2xlarge 35 | worker_count = 1 36 | timeout_provision = 20000 37 | timeout_startup = 10000 38 | timeout_deploy = 10000 39 | timeout_optimization = -1 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /examples/langermann/langermann_opt.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from csaopt.utils import clamp 4 | from typing import MutableSequence, Sequence, Any, Tuple 5 | from math import pi 6 | 7 | # -- Globals 8 | 9 | m = 5 10 | c = (1, 2, 5, 2, 3) 11 | A = ((3, 5), (5, 2), (2, 1), (1, 4), (7, 9)) 12 | 13 | # -- Globals 14 | 15 | # Configuration 16 | 17 | 18 | def empty_state() -> Tuple: 19 | return (0.0, 0.0) 20 | 21 | 22 | # Functions 23 | 24 | 25 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 26 | return initial_temp * math.pow(0.9, step) 27 | 28 | 29 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> bool: 30 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 31 | x = clamp(-80, (e_old - e_new) / temp, 0.1) 32 | return math.exp(x) > rnd 33 | 34 | 35 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 36 | for i in range(len(randoms)): 37 | state[i] = randoms[i] 38 | return 39 | 40 | 41 | def evaluate(state: Sequence) -> float: 42 | result = 0.0 43 | for i in range(m): # sum from 0 to m-1 44 | t2 = 0.0 45 | for j in range(2): # sum from 0..-1 46 | s_j = state[j] 47 | a_ij = A[i][j] 48 | t2 += (s_j - a_ij)**2 49 | t2 = -(1 / pi) * t2 50 | t3 = 0.0 51 | for j in range(2): # sum from 0..d-1 52 | t3 += (state[j] - A[i][j])**2 53 | t3 = pi * t3 54 | result += c[i] * math.exp(t2) * math.cos(t3) 55 | return -result 56 | 57 | 58 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float], step: int) -> Any: 59 | for i in range(len(state)): 60 | new_state[i] = clamp(0, state[i] + 0.3 * randoms[i], 10) 61 | return 62 | -------------------------------------------------------------------------------- /examples/rastrigin/rastrigin.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = rastrigin_2d 4 | 5 | save_to_file { 6 | type = all # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | dimensions = 3 13 | } 14 | 15 | optimization { 16 | thread_count = 256 17 | initial_temp = 1000.0, 18 | max_steps = 3200 19 | min_temp = 1e-30 20 | } 21 | 22 | # debug { 23 | # gpu_simulator: enabled 24 | # } 25 | 26 | remote { 27 | # local_docker = True 28 | # platform = aws 29 | # use_existing_instances = false 30 | terminate_on_exit = false 31 | platform = aws 32 | aws { 33 | region = eu-central-1 34 | # # These will be picked up from ~/.aws/credentials or ENV 35 | # secret_key = 123 36 | # access_key = 123 37 | # worker_instance_type = g2.2xlarge 38 | # broker_instance_type = m5.2xlarge 39 | worker_count = 1 40 | timeout_provision = 2000000 41 | timeout_startup = 1000000 42 | timeout_deploy = 1000000 43 | timeout_optimization = -1 44 | instances { # note that these need to be IDs, not names 45 | broker_password = "kuV52Y9RI2s9G6ALmpgYa32Cbk514i2o" 46 | security_group = "sg-093c978f34b9cffc0" 47 | broker = "i-0d9173d6ef56c7314" 48 | workers = ["i-0eb3d791f7945561c"] 49 | } 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /examples/rastrigin/rastrigin.docker.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = rastrigin_2d_docker 4 | 5 | save_to_file { 6 | type = all # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | dimensions = 3 13 | } 14 | 15 | optimization { 16 | thread_count = 256 17 | initial_temp = 1000.0, 18 | max_steps = 3200 19 | min_temp = 1e-30 20 | } 21 | 22 | debug { 23 | gpu_simulator: enabled 24 | } 25 | 26 | remote { 27 | local_docker = True 28 | # platform = aws 29 | # use_existing_instances = false 30 | # terminate_on_exit = false 31 | # platform = aws 32 | # aws { 33 | # region = eu-central-1 34 | # # These will be picked up from ~/.aws/credentials or ENV 35 | # secret_key = 123 36 | # access_key = 123 37 | # worker_instance_type = g2.2xlarge 38 | # broker_instance_type = m5.2xlarge 39 | # worker_count = 1 40 | # timeout_provision = 2000000 41 | # timeout_startup = 1000000 42 | # timeout_deploy = 1000000 43 | # timeout_optimization = -1 44 | # instances { # note that these need to be IDs, not names 45 | # broker_password = "kuV52Y9RI2s9G6ALmpgYa32Cbk514i2o" 46 | # security_group = "sg-093c978f34b9cffc0" 47 | # broker = "i-0d9173d6ef56c7314" 48 | # workers = ["i-0eb3d791f7945561c"] 49 | # } 50 | # } 51 | } 52 | } -------------------------------------------------------------------------------- /examples/rastrigin/rastrigin_docker_opt.py: -------------------------------------------------------------------------------- 1 | # Rastrigin Function 2 | # https://www.sfu.ca/~ssurjano/rastr.html 3 | # 4 | # Dimensions: d 5 | # 6 | # The Rastrigin function has several local minima. It is highly multimodal, but locations of the minima are regularly 7 | # distributed. 8 | # 9 | # Input Domain: 10 | # The function is usually evaluated on the hypercube xi in [-5.12, 5.12], for all i = 1, ..., d. 11 | # 12 | # Global Minimum: 13 | # f(x*) = 0, at (0, 0, 0, 0) 14 | # 15 | # References: 16 | # Global Optimization Test Problems. Retrieved June 2013, from 17 | # http://www-optima.amp.i.kyoto-u.ac.jp/member/student/hedar/Hedar_files/TestGO.htm. 18 | 19 | # Pohlheim, H. GEATbx Examples: Examples of Objective Functions (2005). Retrieved June 2013, from http://www.geatbx.com/download/GEATbx_ObjFunExpl_v37.pdf. 20 | 21 | import math 22 | 23 | from csaopt.model import RandomDistribution, Precision 24 | from csaopt.utils import clamp 25 | from csaopt.utils import FakeCuda as cuda 26 | from typing import MutableSequence, Sequence, Any, Tuple 27 | from math import pi 28 | 29 | # Configuration 30 | 31 | # -- Globals 32 | 33 | max_steps = 320 34 | 35 | 36 | @cuda.jit(device=True) 37 | def scale(val, old_min, old_max, new_min, new_max): 38 | return (val - old_min) / (old_max - old_min) * (new_max - new_min) + new_min 39 | 40 | 41 | @cuda.jit(device=True, inline=True) 42 | def copy_state(b, a): 43 | for i in range(len(b)): 44 | a[i] = b[i] 45 | 46 | 47 | # -- Globals 48 | 49 | 50 | def distribution() -> RandomDistribution: 51 | return RandomDistribution.Uniform 52 | 53 | 54 | def precision() -> Precision: 55 | return Precision.Float64 56 | 57 | 58 | def dimensions() -> int: 59 | return 2 60 | 61 | 62 | def empty_state() -> Tuple: 63 | return (0.0, 0.0) 64 | 65 | 66 | # Functions 67 | 68 | 69 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 70 | return (1 - 0.3) * old_temp 71 | 72 | 73 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> float: 74 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 75 | x = clamp(-80, -(e_new - e_old) / temp, 0.1) 76 | return math.exp(x) >= rnd 77 | 78 | 79 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 80 | for i in range(len(state)): 81 | state[i] = scale(randoms[i], 0.0, 1.0, -5.12, 5.12) 82 | return 83 | 84 | 85 | def evaluate(state: Sequence) -> float: 86 | d = len(state) 87 | t1 = 0.0 88 | for i in range(d): 89 | x_i = state[i] 90 | t1 += x_i * x_i - 10 * math.cos(2 * pi * x_i) 91 | return 10 * d + t1 92 | 93 | 94 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float], step: int) -> Any: 95 | d = len(state) 96 | for dim in range(d): 97 | delta = (randoms[dim] - 0.5) * 7 * (1 - float(step) / max_steps * 1.1) 98 | new_val = state[dim] + delta 99 | # print('New val', new_val, 'at scale 1 -', step, '/', max_steps, '=', 100 | # 1 - (float(step) / max_steps)) 101 | if new_val > 5.12 or new_val < -5.12: 102 | new_val = state[dim] - delta 103 | new_state[dim] = new_val 104 | return -------------------------------------------------------------------------------- /examples/rastrigin/rastrigin_opt.py: -------------------------------------------------------------------------------- 1 | # Rastrigin Function 2 | # https://www.sfu.ca/~ssurjano/rastr.html 3 | # 4 | # Dimensions: d 5 | # 6 | # The Rastrigin function has several local minima. It is highly multimodal, but locations of the minima are regularly 7 | # distributed. 8 | # 9 | # Input Domain: 10 | # The function is usually evaluated on the hypercube xi in [-5.12, 5.12], for all i = 1, ..., d. 11 | # 12 | # Global Minimum: 13 | # f(x*) = 0, at (0, 0, 0, 0) 14 | # 15 | # References: 16 | # Global Optimization Test Problems. Retrieved June 2013, from 17 | # http://www-optima.amp.i.kyoto-u.ac.jp/member/student/hedar/Hedar_files/TestGO.htm. 18 | 19 | # Pohlheim, H. GEATbx Examples: Examples of Objective Functions (2005). Retrieved June 2013, from http://www.geatbx.com/download/GEATbx_ObjFunExpl_v37.pdf. 20 | 21 | import math 22 | 23 | from csaopt.model import RandomDistribution, Precision 24 | from csaopt.utils import clamp 25 | from csaopt.utils import FakeCuda as cuda 26 | from typing import MutableSequence, Sequence, Any, Tuple 27 | from math import pi 28 | 29 | # Configuration 30 | 31 | # -- Globals 32 | 33 | max_steps = 3200 34 | 35 | 36 | @cuda.jit(device=True) 37 | def scale(val, old_min, old_max, new_min, new_max): 38 | return (val - old_min) / (old_max - old_min) * (new_max - new_min) + new_min 39 | 40 | 41 | @cuda.jit(device=True, inline=True) 42 | def copy_state(b, a): 43 | for i in range(len(b)): 44 | a[i] = b[i] 45 | 46 | 47 | # -- Globals 48 | 49 | 50 | def distribution() -> RandomDistribution: 51 | return RandomDistribution.Uniform 52 | 53 | 54 | def precision() -> Precision: 55 | return Precision.Float64 56 | 57 | 58 | def dimensions() -> int: 59 | return 2 60 | 61 | 62 | def empty_state() -> Tuple: 63 | return (0.0, 0.0) 64 | 65 | 66 | # Functions 67 | 68 | 69 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 70 | return (1 - 0.03) * old_temp 71 | 72 | 73 | def acceptance_func(e_old: float, e_new: float, temp: float, rnd: float) -> float: 74 | # prevent math.exp from under or overflowing, we can anyway constrain 0 < e^x <= (e^0 == 1) 75 | x = clamp(-80, -(e_new - e_old) / temp, 0.1) 76 | return math.exp(x) >= rnd 77 | 78 | 79 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 80 | for i in range(len(state)): 81 | state[i] = scale(randoms[i], 0.0, 1.0, -5.12, 5.12) 82 | return 83 | 84 | 85 | def evaluate(state: Sequence) -> float: 86 | d = len(state) 87 | t1 = 0.0 88 | for i in range(d): 89 | x_i = state[i] 90 | t1 += x_i * x_i - 10 * math.cos(2 * pi * x_i) 91 | return 10 * d + t1 92 | 93 | 94 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float], step: int) -> Any: 95 | d = len(state) 96 | for dim in range(d): 97 | delta = (randoms[dim] - 0.5) * 7 * (1 - float(step) / max_steps * 1.1) 98 | new_val = state[dim] + delta 99 | # print('New val', new_val, 'at scale 1 -', step, '/', max_steps, '=', 100 | # 1 - (float(step) / max_steps)) 101 | if new_val > 5.12 or new_val < -5.12: 102 | new_val = state[dim] - delta 103 | new_state[dim] = new_val 104 | return -------------------------------------------------------------------------------- /examples/rosenbrock/drop_wave.conf: -------------------------------------------------------------------------------- 1 | { 2 | # This name will be used to TODO 3 | name = langermann_5000_1000 4 | 5 | save_to_file { 6 | type = best # or all, none 7 | # base_dir = /home/username/optimization_results/ # This is optional, will use cwd by default 8 | } 9 | 10 | model { 11 | skip_typecheck = true 12 | } 13 | 14 | optimization { 15 | max_steps = 1000 16 | initial_temp = 100.0 17 | thread_count = 16 18 | } 19 | 20 | debug { 21 | gpu_simulator = True 22 | } 23 | 24 | remote { 25 | local_docker = True 26 | # platform = aws 27 | # aws { 28 | # region = eu-central-1 29 | # # These will be picked up from ~/.aws/credentials or ENV 30 | # # secret_key = 123 31 | # # access_key = 123 32 | 33 | # worker_count = 1 34 | # timeout = 1000 35 | # } 36 | } 37 | } -------------------------------------------------------------------------------- /examples/rosenbrock/drop_wave.py: -------------------------------------------------------------------------------- 1 | """Drop-Wave Function 2 | https://www.sfu.ca/~ssurjano/drop.html 3 | 4 | Dimensions: 2 5 | 6 | The Drop-Wave function is multimodal and highly complex. 7 | 8 | Input Domain: 9 | The function is usually evaluated on the square xi in [-5.12, 5.12], for all i = 1, 2. 10 | 11 | Global Minimum: 12 | f(x*) = -1 at x* = (0, 0) 13 | 14 | Reference: 15 | 16 | Global Optimization Test Functions Index. Retrieved June 2013, from http://infinity77.net/global_optimization/test_functions.html#test-functions-index. 17 | """ 18 | 19 | import math 20 | 21 | from csaopt.model import RandomDistribution, Precision 22 | from typing import MutableSequence, Sequence, Any, Tuple 23 | from math import pi 24 | 25 | # Configuration 26 | 27 | 28 | def distribution() -> RandomDistribution: 29 | return RandomDistribution.Normal 30 | 31 | 32 | def precision() -> Precision: 33 | return Precision.Float32 34 | 35 | 36 | def dimensions() -> int: 37 | return 2 38 | 39 | 40 | def empty_state() -> Tuple: 41 | return (0.0, 0.0) 42 | 43 | 44 | # Functions 45 | 46 | 47 | def cool(initial_temp: float, old_temp: float, step: int) -> float: 48 | return initial_temp * math.pow(0.95, step) 49 | 50 | 51 | def acceptance_func(e1: float, e2: float, temp: float) -> bool: 52 | return math.exp(-(e2 - e1) / temp) > 0.5 53 | 54 | 55 | def initialize(state: MutableSequence, randoms: Sequence[float]) -> None: 56 | for i in range(len(randoms)): 57 | state[i] = randoms[i] 58 | return 59 | 60 | 61 | def evaluate(state: Sequence) -> float: 62 | x1 = state[0] 63 | x2 = state[1] 64 | t1 = x1 * x1 + x2 * x2 65 | return -((1 + math.cos(12 * math.sqrt(t1))) / (0.5 * t1 + 2)) 66 | 67 | 68 | def generate_next(state: Sequence, new_state: MutableSequence, randoms: Sequence[float]) -> Any: 69 | for i in range(len(state)): 70 | new_state[i] = clamp(-5.12, state[i] + randoms[i], 5.12) 71 | return 72 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | 4 | [mypy] 5 | ignore_missing_imports = true 6 | 7 | [yapf] 8 | based_on_style = pep8 9 | column_limit = 120 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Packaging settings.""" 2 | 3 | from codecs import open 4 | from os.path import abspath, dirname, join 5 | from subprocess import call 6 | 7 | from setuptools import Command, find_packages, setup 8 | 9 | from lib import __version__ 10 | from typing import List 11 | 12 | this_dir = abspath(dirname(__file__)) 13 | with open(join(this_dir, 'README.md'), encoding='utf-8') as file: 14 | long_description = file.read() 15 | 16 | 17 | class RunTests(Command): 18 | """Run all tests.""" 19 | description = 'run tests' 20 | user_options: List[str] = [] 21 | 22 | def initialize_options(self): 23 | pass 24 | 25 | def finalize_options(self): 26 | pass 27 | 28 | def run(self): 29 | """Run all tests!""" 30 | errno = call(['py.test', '--cov=csaopt', '--cov-report=term-missing']) 31 | raise SystemExit(errno) 32 | 33 | 34 | setup( 35 | name='CSAOpt', 36 | version=__version__, 37 | description='Cloud based simulated annealing optimization framework', 38 | long_description=long_description, 39 | url='https://github.com/d53dave/csaopt', 40 | author='David Sere', 41 | author_email='dave@d53dev.net', 42 | license='UNLICENSE', 43 | classifiers=[ 44 | 'Intended Audience :: Developers, Scientists', 45 | 'Topic :: Scientific', 46 | 'License :: MIT', 47 | 'Natural Language :: English', 48 | 'Operating System :: GNU/Linux', 49 | 'Programming Language :: Python :: 3.6', 50 | ], 51 | keywords='cli', 52 | packages=find_packages(exclude=['docs', 'tests*']), 53 | install_requires=['docopt'], 54 | extras_require={ 55 | 'test': ['coverage', 'pytest', 'pytest-cov'], 56 | }, 57 | entry_points={ 58 | 'console_scripts': [ 59 | 'csaopt=csaopt.cli:main', 60 | ], 61 | }, 62 | cmdclass={'test': RunTests}, 63 | ) 64 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | # must be unique in a given SonarQube instance 2 | sonar.projectKey=d53dave_csaopt 3 | sonar.organization=d53dave-github 4 | sonar.sources=./csaopt 5 | sonar.tests=./tests 6 | sonar.python.coverage.reportPaths=./*coverage-*.xml 7 | sonar.host.url=https://sonarcloud.io 8 | sonar.language=py -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | import os 4 | import sys 5 | from distutils import dir_util 6 | 7 | sys.path.insert(0, os.path.abspath('.')) 8 | 9 | from csaopt import Runner, ExecutionType, ConsolePrinter, Context as AppContext 10 | from csaopt.utils import get_configs, docker_available 11 | from csaopt.model import Model, RandomDistribution, Precision 12 | from csaopt.model_loader.model_loader import ModelLoader, ModelValidator, ValidationError 13 | from csaopt.jobs.jobmanager import JobManager, Job 14 | from csaopt.instancemanager.awstools import AWSTools, Instance 15 | from csaopt.broker import Broker, WorkerCommand 16 | 17 | 18 | def copy_folder_contents(src, dest): 19 | 20 | try: 21 | dir_util.copy_tree(src, dest) 22 | except dir_util.DistutilsFileError as e: 23 | print('Error while copying folder contents from {} to {}: {}'.format(src, dest, e)) 24 | raise 25 | -------------------------------------------------------------------------------- /tests/e2e/csaopt_e2e.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d53dave/csaopt/a655d87a8577d18a7a714431f4237e4c9ebbf7e8/tests/e2e/csaopt_e2e.conf -------------------------------------------------------------------------------- /tests/test_aws.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import responses 3 | import base64 4 | 5 | from moto import mock_ec2 6 | from pyhocon import ConfigFactory 7 | from context import AWSTools, AppContext, ConsolePrinter, Instance 8 | 9 | from ipaddress import IPv4Address 10 | 11 | 12 | @pytest.fixture 13 | def internal_conf(): 14 | return ConfigFactory.parse_file('csaopt/internal/csaopt-internal.conf') 15 | 16 | 17 | @pytest.fixture 18 | def conf(): 19 | return ConfigFactory.parse_string(""" 20 | { 21 | remote { 22 | aws { 23 | region = eu-central-1 24 | secret_key = HQefGxVMHFnKDb7E9SWHlG9RqXFiWHkku2quV8jb 25 | access_key = AKXBREA5T5XLSQWEXB4KQ 26 | worker_count = 2 27 | timeout = 500 28 | broker_ami = ami-dca37ea5 # moto will complain about 'inexisting' AMIs otherwise 29 | worker_ami = ami-dca37ea5 30 | } 31 | } 32 | } 33 | """) 34 | 35 | 36 | def isBase64(x): 37 | return base64.b64encode(base64.b64decode(x)) == x.replace('\n', '').encode('ascii') 38 | 39 | 40 | @pytest.fixture 41 | def context(conf, internal_conf): 42 | return AppContext(ConsolePrinter(internal_conf), [conf], internal_conf) 43 | 44 | 45 | @pytest.fixture 46 | def awstools(context): 47 | return AWSTools(context.configs[0], context.internal_config) 48 | 49 | 50 | def test_loads_userdata(awstools): 51 | assert awstools.user_data_scripts['broker'] is not None 52 | assert awstools.user_data_scripts['worker'] is not None 53 | assert len(awstools.user_data_scripts['broker']) > 0 54 | assert len(awstools.user_data_scripts['worker']) > 0 55 | 56 | 57 | def test_create_security_group(awstools): 58 | with mock_ec2(): 59 | groupId = awstools._create_sec_group(name='testsecgroup') 60 | 61 | assert awstools.ec2_resource.SecurityGroup(groupId).group_name == 'testsecgroup' 62 | 63 | 64 | def test_remote_security_group(awstools): 65 | with mock_ec2(): 66 | response = awstools.ec2_client.create_security_group( 67 | GroupName="test_group", Description='Security Group for CSAOpt') 68 | 69 | security_group_id = response['GroupId'] 70 | awstools._remove_sec_group(security_group_id) 71 | 72 | security_group_iterator = awstools.ec2_resource.security_groups.all() 73 | for sec_grp in security_group_iterator: 74 | print(sec_grp) 75 | assert sec_grp.id is not security_group_id 76 | 77 | 78 | def test_start_instances(awstools): 79 | with mock_ec2(): 80 | awstools.security_group_id = 'test_sec_group' 81 | awstools.debug_on_cpu = True 82 | awstools.broker_port = 4242 83 | awstools.broker_password = 'testpassword' 84 | broker, workers = awstools._provision_instances(timeout_ms=100, count=2, **awstools.provision_args) 85 | 86 | assert len(workers) == 2 87 | assert broker is not None 88 | assert sum([len(r['Instances']) for r in awstools.ec2_client.describe_instances()['Reservations']]) == 3 89 | 90 | broker_userdata = base64.b64decode( 91 | awstools.ec2_client.describe_instance_attribute( 92 | Attribute='userData', InstanceId=broker.instance_id)['UserData']['Value']).decode('ascii') 93 | 94 | assert str(awstools.broker_port) in broker_userdata 95 | assert awstools.broker_password in broker_userdata 96 | 97 | worker0_userdata = base64.b64decode( 98 | awstools.ec2_client.describe_instance_attribute( 99 | Attribute='userData', InstanceId=workers[0].instance_id)['UserData']['Value']).decode('ascii') 100 | 101 | assert 'NUMBA_ENABLE_CUDASIM=1' in worker0_userdata 102 | assert str(broker.private_ip_address) in worker0_userdata 103 | assert str(awstools.broker_port) in worker0_userdata 104 | assert awstools.broker_password in worker0_userdata 105 | 106 | 107 | def test_get_instances(awstools): 108 | with mock_ec2(): 109 | awstools.security_group_id = 'test_sec_group' 110 | awstools.broker, awstools.workers = awstools._provision_instances( 111 | timeout_ms=100, count=4, **awstools.provision_args) 112 | 113 | broker, workers = awstools.get_running_instances() 114 | 115 | assert len(workers) == 4 116 | assert broker is not None 117 | 118 | 119 | def test_context_manager(context): 120 | with mock_ec2(): 121 | responses.add(responses.GET, 'https://api.ipify.org/', body='192.168.0.1', status=200) 122 | with AWSTools(context.configs[0], context.internal_config) as awstools: 123 | worker_instance_ids = [w.id for w in awstools.workers] 124 | broker_id = awstools.broker.id 125 | assert len(awstools.ec2_client.describe_instances()) == 2 126 | 127 | for instance in awstools.ec2_resource.instances.all(): 128 | if instance.id in worker_instance_ids or instance.id == broker_id: 129 | assert instance.state['Name'] == 'terminated' 130 | 131 | 132 | def test_instance_ip(): 133 | instance = Instance('id', '192.168.0.1') 134 | 135 | assert instance.public_ip.is_private is True 136 | assert instance.public_ip == IPv4Address('192.168.0.1') 137 | 138 | 139 | def test_instance_ip_bad(): 140 | with pytest.raises(ValueError): 141 | Instance('id', '442.168.0.1') 142 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def test_invalid_conf(): 6 | pass -------------------------------------------------------------------------------- /tests/test_e2e.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import subprocess 4 | 5 | from pathlib import Path 6 | 7 | 8 | @pytest.mark.skipif( 9 | not os.getenv('CSAOPT_RUN_E2E'), 10 | reason='E2E Tests are disabled by default. Set the CSAOPT_RUN_E2E env variable to enable') 11 | def test_end2end(): 12 | conf_path = Path('tests/e2e/csaopt_e2e.conf').resolve() 13 | model_path = Path('test/e2e/model/').resolve() 14 | 15 | try: 16 | csaopt_proc = subprocess.Popen( 17 | ['csaopt', '-conf', conf_path.name, '--model', model_path.name], 18 | stdout=subprocess.PIPE, 19 | stderr=subprocess.PIPE) 20 | 21 | out, err = csaopt_proc.communicate() 22 | returncode = csaopt_proc.returncode 23 | 24 | assert returncode == 1 25 | assert len(err) is 0 26 | assert '(0, 0)' in out 27 | 28 | finally: 29 | subprocess.Popen(['csaopt', '--cleanup']) 30 | returncode = csaopt_proc.returncode 31 | 32 | if returncode is not 0: 33 | assert not """ 34 | **WARNING** csaopt --cleanup exited with a non-zero result 35 | ========================================================== 36 | 37 | This usually means that it could not successfully terminate 38 | all instances that it provisioned on the cloud service. 39 | 40 | ** MAKE SURE TO MANUALLY CHECK AND TERMINATE INSTANCES ** 41 | ** IN THE CSAOPT GROUP/TAG. RUNNING MACHINES CONTINUE TO ** 42 | ** INCUR COSTS. YOU HAVE BEEN WARNED! ** 43 | """ 44 | -------------------------------------------------------------------------------- /tests/test_jobmanager.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from mock import call 4 | from dramatiq import Worker 5 | from context import JobManager, AppContext, Broker, ExecutionType, Model 6 | from context import RandomDistribution, Precision, Job, WorkerCommand 7 | from pyhocon import ConfigTree, ConfigFactory 8 | from collections import OrderedDict 9 | 10 | 11 | class MockBroker(): 12 | pass 13 | 14 | 15 | @pytest.fixture 16 | def internal_conf(): 17 | return ConfigFactory.parse_file('csaopt/internal/csaopt-internal.conf') 18 | 19 | 20 | def build_async_mock_results(result): 21 | async def mock_deploy_results(timeout): 22 | return result 23 | 24 | return mock_deploy_results 25 | 26 | 27 | @pytest.fixture 28 | def stub_broker(mocker): 29 | o = MockBroker() 30 | o.send_to_queue = mocker.Mock() 31 | o.broadcast = mocker.Mock() 32 | o.clear_queue_messages = mocker.Mock() 33 | o.queue_ids = [] 34 | return o 35 | 36 | 37 | @pytest.fixture() 38 | def stub_worker(stub_broker): 39 | worker = Worker(stub_broker.dramatiq_broker, worker_timeout=100) 40 | worker.start() 41 | yield worker 42 | worker.stop() 43 | 44 | 45 | @pytest.mark.asyncio 46 | async def test_submit_model_not_deployed(stub_broker, internal_conf): 47 | # TODO Remove ExecutionType from AppContext, it's not required 48 | ctx = AppContext(None, None, internal_conf) 49 | model = Model( 50 | name='testmodel', 51 | dimensions=3, 52 | precision=Precision.Float32, 53 | distribution=RandomDistribution.Uniform, 54 | state_shape=2, 55 | opt_globals=None, 56 | functions=[]) 57 | configs = [{}] 58 | 59 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 60 | 61 | with pytest.raises(AssertionError): 62 | await jobmanager.submit() 63 | 64 | 65 | @pytest.mark.asyncio 66 | async def test_wait_empty_jobs(stub_broker, internal_conf): 67 | ctx = AppContext(None, None, internal_conf) 68 | model = Model( 69 | name='testmodel', 70 | dimensions=3, 71 | precision=Precision.Float32, 72 | distribution=RandomDistribution.Uniform, 73 | state_shape=2, 74 | opt_globals=None, 75 | functions=[]) 76 | configs = [{}] 77 | 78 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 79 | 80 | with pytest.raises(AssertionError): 81 | await jobmanager.wait_for_results() 82 | 83 | 84 | @pytest.mark.asyncio 85 | async def test_deploy_single_model_single_conf(mocker, stub_broker, internal_conf): 86 | ctx = AppContext(None, None, internal_conf) 87 | model = Model( 88 | name='testmodel', 89 | dimensions=3, 90 | precision=Precision.Float32, 91 | distribution=RandomDistribution.Uniform, 92 | state_shape=2, 93 | opt_globals=None, 94 | functions=[]) 95 | 96 | stub_broker.queue_ids = ['queue1'] 97 | 98 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed']}) 99 | configs = [{'test': 'deploy_single_model_multi_conf'}] 100 | 101 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 102 | await jobmanager.deploy_model() 103 | 104 | stub_broker.broadcast.assert_called_with(WorkerCommand.DeployModel, model.to_dict()) 105 | assert jobmanager.models_deployed is True 106 | 107 | 108 | @pytest.mark.asyncio 109 | async def test_deploy_single_model_single_conf_failed_missing_response(mocker, stub_broker, internal_conf): 110 | ctx = AppContext(None, None, internal_conf) 111 | model = Model( 112 | name='testmodel', 113 | dimensions=3, 114 | precision=Precision.Float32, 115 | distribution=RandomDistribution.Uniform, 116 | state_shape=2, 117 | opt_globals=None, 118 | functions=[]) 119 | 120 | stub_broker.queue_ids = ['queue1', 'queue2'] 121 | 122 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed']}) 123 | configs = [{'test': 'deploy_single_model_multi_conf'}] 124 | 125 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 126 | with pytest.raises(AssertionError): 127 | await jobmanager.deploy_model() 128 | 129 | stub_broker.broadcast.assert_called_with(WorkerCommand.DeployModel, model.to_dict()) 130 | assert jobmanager.models_deployed is False 131 | 132 | 133 | @pytest.mark.asyncio 134 | async def test_deploy_single_model_single_conf_failed_with_error(mocker, stub_broker, internal_conf): 135 | ctx = AppContext(None, None, internal_conf) 136 | model = Model( 137 | name='testmodel', 138 | dimensions=3, 139 | precision=Precision.Float32, 140 | distribution=RandomDistribution.Uniform, 141 | state_shape=2, 142 | opt_globals=None, 143 | functions=[]) 144 | 145 | stub_broker.queue_ids = ['queue1', 'queue2'] 146 | 147 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed'], 'queue2': ['lol, error']}) 148 | configs = [{'test': 'deploy_single_model_multi_conf'}] 149 | 150 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 151 | with pytest.raises(AssertionError): 152 | await jobmanager.deploy_model() 153 | 154 | stub_broker.broadcast.assert_called_with(WorkerCommand.DeployModel, model.to_dict()) 155 | assert jobmanager.models_deployed is False 156 | 157 | 158 | @pytest.mark.asyncio 159 | async def test_deploy_single_model_multi_conf(mocker, stub_broker, internal_conf): 160 | ctx = AppContext(None, None, internal_conf) 161 | model = Model( 162 | name='testmodel', 163 | dimensions=3, 164 | precision=Precision.Float32, 165 | distribution=RandomDistribution.Uniform, 166 | state_shape=2, 167 | opt_globals=None, 168 | functions=[]) 169 | 170 | configs = [{'test': 'deploy_single_model_multi_conf'}, {'test': 'deploy_single_model_multi_conf'}] 171 | 172 | stub_broker.queue_ids = ['queue1', 'queue2'] 173 | 174 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed'], 'queue2': ['model_deployed']}) 175 | 176 | jobmanager = JobManager(ctx, stub_broker, [model], configs) 177 | await jobmanager.deploy_model() 178 | 179 | stub_broker.broadcast.assert_called_with(WorkerCommand.DeployModel, model.to_dict()) 180 | assert jobmanager.models_deployed is True 181 | 182 | 183 | @pytest.mark.asyncio 184 | async def test_deploy_multi_model_single_conf(mocker, stub_broker, internal_conf): 185 | ctx = AppContext(None, None, internal_conf) 186 | model = Model( 187 | name='testmodel1', 188 | dimensions=3, 189 | precision=Precision.Float32, 190 | distribution=RandomDistribution.Uniform, 191 | state_shape=2, 192 | opt_globals=None, 193 | functions=[]) 194 | 195 | model2 = Model( 196 | name='testmodel2', 197 | dimensions=3, 198 | precision=Precision.Float32, 199 | distribution=RandomDistribution.Uniform, 200 | state_shape=2, 201 | opt_globals=None, 202 | functions=[]) 203 | 204 | configs = [{'test': 'deploy_single_model_multi_conf'}] 205 | 206 | stub_broker.queue_ids = ['queue1', 'queue2'] 207 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed'], 'queue2': ['model_deployed']}) 208 | 209 | jobmanager = JobManager(ctx, stub_broker, [model, model2], configs) 210 | await jobmanager.deploy_model() 211 | 212 | stub_broker.send_to_queue.assert_has_calls([ 213 | call('queue1', WorkerCommand.DeployModel, model.to_dict()), 214 | call('queue2', WorkerCommand.DeployModel, model2.to_dict()) 215 | ]) 216 | assert jobmanager.models_deployed is True 217 | 218 | 219 | @pytest.mark.asyncio 220 | async def test_deploy_multi_model_multi_conf(mocker, stub_broker, internal_conf): 221 | ctx = AppContext(None, None, internal_conf) 222 | model = Model( 223 | name='testmodel1', 224 | dimensions=3, 225 | precision=Precision.Float32, 226 | distribution=RandomDistribution.Uniform, 227 | state_shape=2, 228 | opt_globals=None, 229 | functions=[]) 230 | 231 | model2 = Model( 232 | name='testmodel2', 233 | dimensions=3, 234 | precision=Precision.Float32, 235 | distribution=RandomDistribution.Uniform, 236 | state_shape=2, 237 | opt_globals=None, 238 | functions=[]) 239 | 240 | configs = [{'test': 'deploy_single_model_multi_conf'}, {'test2': 'deploy_single_model_multi_conf_2'}] 241 | 242 | stub_broker.queue_ids = ['queue1', 'queue2'] 243 | stub_broker.get_all_results = build_async_mock_results({'queue1': ['model_deployed'], 'queue2': ['model_deployed']}) 244 | 245 | jobmanager = JobManager(ctx, stub_broker, [model, model2], configs) 246 | await jobmanager.deploy_model() 247 | 248 | stub_broker.send_to_queue.assert_has_calls([ 249 | call('queue1', WorkerCommand.DeployModel, model.to_dict()), 250 | call('queue2', WorkerCommand.DeployModel, model2.to_dict()) 251 | ]) 252 | assert jobmanager.models_deployed is True 253 | 254 | 255 | @pytest.mark.asyncio 256 | async def test_job_single_model_single_conf(stub_broker, mocker, internal_conf): 257 | ctx = AppContext(None, None, internal_conf) 258 | models = [ 259 | Model( 260 | name='testmodel', 261 | dimensions=3, 262 | precision=Precision.Float32, 263 | distribution=RandomDistribution.Uniform, 264 | state_shape=2, 265 | opt_globals=None, 266 | functions=[]) 267 | ] 268 | 269 | configs = [{'test': 'deploy_single_model_multi_conf'}] 270 | stub_broker.queue_ids = ['queue1'] 271 | 272 | jobmanager = JobManager(ctx, stub_broker, models, configs) 273 | jobmanager.models_deployed = True 274 | 275 | jobs = await jobmanager.submit() 276 | assert len(jobs) == 1 277 | 278 | stub_broker.broadcast.assert_called() 279 | 280 | broadcast = stub_broker.broadcast.call_args[0] 281 | assert broadcast[0] is WorkerCommand.RunOptimization 282 | assert broadcast[1]['params'] == configs[0] 283 | 284 | 285 | @pytest.mark.asyncio 286 | async def test_job_multi_model_single_conf(stub_broker, mocker, internal_conf): 287 | ctx = AppContext(None, None, internal_conf) 288 | models = [ 289 | Model( 290 | name='testmodel1', 291 | dimensions=3, 292 | precision=Precision.Float32, 293 | distribution=RandomDistribution.Uniform, 294 | state_shape=2, 295 | opt_globals=None, 296 | functions=[]), 297 | Model( 298 | name='testmodel2', 299 | dimensions=3, 300 | precision=Precision.Float32, 301 | distribution=RandomDistribution.Uniform, 302 | state_shape=2, 303 | opt_globals=None, 304 | functions=[]) 305 | ] 306 | 307 | configs = [{'test': 'deploy_single_model_multi_conf'}] 308 | 309 | stub_broker.queue_ids = ['queue1', 'queue2'] 310 | 311 | jobmanager = JobManager(ctx, stub_broker, models, configs) 312 | jobmanager.models_deployed = True 313 | 314 | await jobmanager.submit() 315 | assert stub_broker.send_to_queue.call_count == 2 316 | 317 | jobs = stub_broker.send_to_queue.call_args_list 318 | assert jobs[0][0][0] == 'queue1' 319 | assert jobs[0][0][1] == WorkerCommand.RunOptimization 320 | assert jobs[0][0][2]['params'] == configs[0] 321 | assert jobs[0][0][2]['model'] == 'testmodel1' 322 | 323 | assert jobs[1][0][0] == 'queue2' 324 | assert jobs[1][0][1] == WorkerCommand.RunOptimization 325 | assert jobs[1][0][2]['params'] == configs[0] 326 | assert jobs[1][0][2]['model'] == 'testmodel2' 327 | 328 | 329 | @pytest.mark.asyncio 330 | async def test_job_single_model_multi_conf(stub_broker, mocker, internal_conf): 331 | ctx = AppContext(None, None, internal_conf) 332 | models = [ 333 | Model( 334 | name='testmodel1', 335 | dimensions=3, 336 | precision=Precision.Float32, 337 | distribution=RandomDistribution.Uniform, 338 | state_shape=2, 339 | opt_globals=None, 340 | functions=[]) 341 | ] 342 | 343 | configs = [{'test': 'deploy_multi_model_multi_conf'}, {'test2': 'deploy_multi_model_multi_conf'}] 344 | 345 | stub_broker.queue_ids = ['queue1', 'queue2'] 346 | 347 | jobmanager = JobManager(ctx, stub_broker, models, configs) 348 | jobmanager.models_deployed = True 349 | 350 | await jobmanager.submit() 351 | assert stub_broker.send_to_queue.call_count == 2 352 | 353 | jobs = stub_broker.send_to_queue.call_args_list 354 | assert jobs[0][0][0] == 'queue1' 355 | assert jobs[0][0][1] == WorkerCommand.RunOptimization 356 | assert jobs[0][0][2]['params'] == configs[0] 357 | assert jobs[0][0][2]['model'] == 'testmodel1' 358 | 359 | assert jobs[1][0][0] == 'queue2' 360 | assert jobs[1][0][1] == WorkerCommand.RunOptimization 361 | assert jobs[1][0][2]['params'] == configs[1] 362 | assert jobs[1][0][2]['model'] == 'testmodel1' 363 | 364 | 365 | @pytest.mark.asyncio 366 | async def test_job_multi_model_multi_conf(stub_broker, mocker, internal_conf): 367 | ctx = AppContext(None, None, internal_conf) 368 | models = [ 369 | Model( 370 | name='testmodel1', 371 | dimensions=3, 372 | precision=Precision.Float32, 373 | distribution=RandomDistribution.Uniform, 374 | state_shape=2, 375 | opt_globals=None, 376 | functions=[]), 377 | Model( 378 | name='testmodel2', 379 | dimensions=3, 380 | precision=Precision.Float32, 381 | distribution=RandomDistribution.Uniform, 382 | state_shape=2, 383 | opt_globals=None, 384 | functions=[]) 385 | ] 386 | 387 | configs = [{'test': 'deploy_multi_model_multi_conf'}, {'test2': 'deploy_multi_model_multi_conf'}] 388 | 389 | stub_broker.queue_ids = ['queue1', 'queue2'] 390 | 391 | jobmanager = JobManager(ctx, stub_broker, models, configs) 392 | jobmanager.models_deployed = True 393 | 394 | await jobmanager.submit() 395 | assert stub_broker.send_to_queue.call_count == 2 396 | 397 | jobs = stub_broker.send_to_queue.call_args_list 398 | assert jobs[0][0][0] == 'queue1' 399 | assert jobs[0][0][1] == WorkerCommand.RunOptimization 400 | assert jobs[0][0][2]['params'] == configs[0] 401 | assert jobs[0][0][2]['model'] == 'testmodel1' 402 | 403 | assert jobs[1][0][0] == 'queue2' 404 | assert jobs[1][0][1] == WorkerCommand.RunOptimization 405 | assert jobs[1][0][2]['params'] == configs[1] 406 | assert jobs[1][0][2]['model'] == 'testmodel2' 407 | 408 | 409 | def test_get_execution_type_multimulti(internal_conf): 410 | ctx = AppContext(None, None, internal_conf) 411 | with pytest.raises(AssertionError): 412 | confs = ['conf/1', 'conf/2', 'conf/3'] 413 | models = ['model/1', 'model/2'] 414 | JobManager(ctx, None, models, confs) 415 | 416 | with pytest.raises(AssertionError): 417 | confs = ['conf/1', 'conf/2'] 418 | models = ['model/1', 'model/2', 'model/3'] 419 | JobManager(ctx, None, models, confs) 420 | 421 | confs = ['conf/1', 'conf/2'] 422 | models = ['model/1', 'model/2'] 423 | exec_type = JobManager(ctx, None, models, confs).execution_type 424 | 425 | assert exec_type is ExecutionType.MultiModelMultiConf 426 | 427 | 428 | def test_get_execution_type_singlemulti(internal_conf): 429 | ctx = AppContext(None, None, internal_conf) 430 | confs = ['conf/1', 'conf/2'] 431 | models = ['model/1'] 432 | exec_type = JobManager(ctx, None, models, confs).execution_type 433 | 434 | assert exec_type is ExecutionType.SingleModelMultiConf 435 | 436 | 437 | def test_get_execution_type_singlesingle(internal_conf): 438 | ctx = AppContext(None, None, internal_conf) 439 | confs = ['conf/1'] 440 | models = ['model/1'] 441 | exec_type = JobManager(ctx, None, models, confs).execution_type 442 | 443 | assert exec_type is ExecutionType.SingleModelSingleConf 444 | 445 | 446 | def test_get_execution_type_multisingle(internal_conf): 447 | ctx = AppContext(None, None, internal_conf) 448 | confs = ['conf/1'] 449 | models = ['model/1', 'model/2'] 450 | exec_type = JobManager(ctx, None, models, confs).execution_type 451 | 452 | assert exec_type is ExecutionType.MultiModelSingleConf 453 | 454 | 455 | def test_get_execution_type_nomodel(internal_conf): 456 | ctx = AppContext(None, None, internal_conf) 457 | confs = ['conf/1'] 458 | models = [] 459 | 460 | with pytest.raises(AssertionError): 461 | JobManager(ctx, None, models, confs) 462 | 463 | 464 | def test_get_execution_type_noconf(internal_conf): 465 | ctx = AppContext(None, None, internal_conf) 466 | confs = [] 467 | models = ['model/1'] 468 | 469 | with pytest.raises(AssertionError): 470 | JobManager(ctx, None, models, confs) 471 | -------------------------------------------------------------------------------- /tests/test_model_loader.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import imp 3 | 4 | from pyhocon import ConfigFactory 5 | 6 | from context import ModelLoader, ModelValidator, ValidationError, Precision, RandomDistribution 7 | 8 | 9 | @pytest.fixture 10 | def internal_conf(): 11 | return ConfigFactory.parse_file('csaopt/internal/csaopt-internal.conf') 12 | 13 | 14 | @pytest.fixture 15 | def conf(): 16 | return ConfigFactory.parse_string(""" 17 | { 18 | model { 19 | name = testopt 20 | path = examples/langermann/langermann_opt.py 21 | skip_typecheck = True 22 | dimensions = 2 23 | } 24 | } 25 | """) 26 | 27 | 28 | def test_build_model(conf, internal_conf, mocker): 29 | validator = ModelValidator() 30 | validator.validate_functions = mocker.stub(name='validate_functions_stub') 31 | loader = ModelLoader(conf, internal_conf, validator) 32 | model = loader.get_model() 33 | 34 | assert model is not None 35 | assert len(model.functions) == 6 36 | validator.validate_functions.assert_called_once() 37 | 38 | 39 | def test_validator_has_errors(conf, internal_conf, mocker): 40 | validator = ModelValidator() 41 | validator.validate_functions = mocker.Mock(return_value=[ValidationError('this is a test error')]) 42 | loader = ModelLoader(conf, internal_conf, validator) 43 | model = loader.get_model() 44 | 45 | assert model is None 46 | assert len(loader.errors) == 1 47 | 48 | 49 | def test_should_run_type_check(conf, internal_conf, mocker): 50 | conf['model']['skip_typecheck'] = False 51 | 52 | validator = ModelValidator() 53 | validator.validate_functions = mocker.stub(name='validate_functions_stub') 54 | validator.validate_typing = mocker.stub(name='validate_typing_stub') 55 | 56 | ModelLoader(conf, internal_conf, validator) 57 | validator.validate_functions.assert_called_once() 58 | validator.validate_typing.assert_called_once() 59 | 60 | 61 | def test_loading_py_model_failed(conf, internal_conf, mocker): 62 | mocker.patch('imp.load_source', return_value=None) 63 | 64 | validator = ModelValidator() 65 | validator.validate_functions = mocker.stub(name='validate_functions_stub') 66 | 67 | with pytest.raises(AssertionError): 68 | ModelLoader(conf, internal_conf, validator) 69 | 70 | imp.load_source.assert_called_once_with('testopt', 'examples/langermann/langermann_opt.py') 71 | 72 | 73 | def test_globals(conf, internal_conf, mocker): 74 | validator = ModelValidator() 75 | validator.validate_functions = mocker.stub(name='validate_functions_stub') 76 | validator.validate_typing = mocker.stub(name='validate_typing_stub') 77 | 78 | loader = ModelLoader(conf, internal_conf, validator) 79 | validator.validate_functions.assert_called_once() 80 | 81 | model = loader.get_model() 82 | assert 'm = 5' in model.globals 83 | assert 'c = (1, 2, 5, 2, 3)' in model.globals 84 | assert 'A = ((3, 5), (5, 2), (2, 1), (1, 4), (7, 9))' in model.globals 85 | 86 | assert 'from math import pi' not in model.globals 87 | 88 | 89 | def test_model_params(conf, internal_conf, mocker): 90 | validator = ModelValidator() 91 | validator.validate_functions = mocker.stub(name='validate_functions_stub') 92 | validator.validate_typing = mocker.stub(name='validate_typing_stub') 93 | 94 | loader = ModelLoader(conf, internal_conf, validator) 95 | validator.validate_functions.assert_called_once() 96 | 97 | model = loader.get_model() 98 | assert Precision.Float32 == model.precision 99 | assert RandomDistribution.Uniform == model.distribution 100 | assert 2 == model.dimensions 101 | -------------------------------------------------------------------------------- /tests/test_model_validator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyhocon import ConfigFactory 4 | 5 | from context import ModelValidator 6 | 7 | 8 | @pytest.fixture 9 | def internal_conf(): 10 | return ConfigFactory.parse_file('csaopt/internal/csaopt-internal.conf') 11 | 12 | 13 | @pytest.fixture 14 | def conf(): 15 | return ConfigFactory.parse_string(""" 16 | { 17 | remote { 18 | aws { 19 | region = eu-central-1 20 | secret_key = a123456 21 | access_key = b123456 22 | worker_count = 2 23 | timeout = 500 24 | } 25 | } 26 | } 27 | """) 28 | 29 | 30 | @pytest.fixture 31 | def validator(): 32 | return ModelValidator() 33 | 34 | 35 | def test_validate_functions_return(validator: ModelValidator): 36 | def f_empty(): 37 | pass 38 | 39 | def f_two(a, b): 40 | return a + b 41 | 42 | error_empty = validator._validate_fun_signature_len('testfun1', f_empty, 0) 43 | error_empty_2 = validator._validate_fun_signature_len('testfun2', f_empty, 1) 44 | error_two = validator._validate_fun_signature_len('testfun3', f_two, 2) 45 | 46 | assert error_empty is None 47 | assert error_empty_2 is not None 48 | assert error_two is None 49 | -------------------------------------------------------------------------------- /tests/test_runner.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from context import Runner, ExecutionType, get_configs, docker_available 5 | 6 | 7 | class MockContext(): 8 | def __init__(self): 9 | self.obj = {} 10 | 11 | 12 | @pytest.mark.skipif(not docker_available(), reason='Docker is not available') 13 | def test_runner_langermann(): 14 | internal_conf = get_configs('csaopt/internal/csaopt-internal.conf') 15 | ctx = {} 16 | ctx['internal_conf'] = internal_conf 17 | 18 | runner = Runner(['examples/ackley/ackley_opt.py'], ['examples/ackley/ackley.conf'], ctx) 19 | 20 | runner.run() 21 | if len(runner.failures) > 0: 22 | raise Exception('Runner had failures: %s' % runner.failures) 23 | 24 | assert runner.best_value == pytest.approx(0, abs=0.2) 25 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import context # noqa 2 | --------------------------------------------------------------------------------