├── requirements.txt
├── docs
    ├── algorithm.rst
    ├── environment.rst
    ├── toc.rst
    ├── api
    │   ├── srb.rst
    │   ├── misc.rst
    │   ├── bench.rst
    │   ├── measure.rst
    │   ├── spaces.rst
    │   ├── envs.rst
    │   ├── algo.rst
    │   └── policy.rst
    ├── index.rst
    ├── Makefile
    └── conf.py
├── .gitignore
├── .dockerignore
├── SafeRLBench
    ├── envs
    │   ├── _quadrocopter
    │   │   ├── __init__.py
    │   │   ├── quaternions.py
    │   │   ├── quadrocopter_classes.py
    │   │   └── quadrotor_dynamics.py
    │   ├── __init__.py
    │   ├── README.rst
    │   ├── gym_wrap.py
    │   ├── linear_car.py
    │   ├── test.py
    │   ├── mdp.py
    │   ├── general_mountaincar.py
    │   └── quadrocopter.py
    ├── spaces
    │   ├── __init__.py
    │   ├── rd_space.py
    │   ├── discrete_space.py
    │   ├── test.py
    │   └── bounded_space.py
    ├── policy
    │   ├── __init__.py
    │   ├── controller.py
    │   ├── test.py
    │   ├── linear_policy.py
    │   └── neural_network.py
    ├── algo
    │   ├── __init__.py
    │   ├── test.py
    │   ├── README.rst
    │   ├── q_learning.py
    │   ├── safeopt.py
    │   └── policygradient.py
    ├── __init__.py
    ├── test
    │   ├── test_measure.py
    │   ├── test_integration.py
    │   ├── test_configuration.py
    │   └── test_bench.py
    ├── error.py
    ├── measure.py
    ├── configuration.py
    ├── base.py
    └── monitor.py
├── requirements_dev.txt
├── tox.ini
├── .travis.yml
├── setup.py
├── LICENSE
├── misc
    ├── Dockerfile.python3
    └── Dockerfile.python2
├── test_code.sh
├── Makefile
└── examples
    ├── GettingStarted.ipynb
    └── SafeOpt.ipynb


/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.7
2 | scipy >= 0.19.0
3 | six >= 1.10
4 | futures >= 3.0.5
5 | 


--------------------------------------------------------------------------------
/docs/algorithm.rst:
--------------------------------------------------------------------------------
1 | Algorithms
2 | ==========
3 | 
4 | .. include:: ../SafeRLBench/algo/README.rst
5 | 


--------------------------------------------------------------------------------
/docs/environment.rst:
--------------------------------------------------------------------------------
1 | Environments
2 | ============
3 | 
4 | .. include:: ../SafeRLBench/envs/README.rst
5 | 


--------------------------------------------------------------------------------
/docs/toc.rst:
--------------------------------------------------------------------------------
 1 | Content
 2 | =======
 3 | 
 4 | .. toctree::
 5 |   :maxdepth: 2
 6 | 
 7 |   algorithm
 8 |   environment
 9 |   api/srb
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .ipynb_checkpoints
 3 | .DS_Store
 4 | .idea
 5 | .coverage
 6 | covhtml
 7 | MANIFEST
 8 | _build
 9 | 
10 | *.pyc
11 | 


--------------------------------------------------------------------------------
/docs/api/srb.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. toctree::
 5 | 
 6 |   algo
 7 |   envs
 8 |   policy
 9 |   spaces
10 |   measure
11 |   bench
12 |   misc
13 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | examples
 2 | htmlcov
 3 | .travis.yml
 4 | .gitignore
 5 | .git
 6 | *.pyc
 7 | .ipynb_checkpoints
 8 | __pycache__
 9 | SafeRLBench.egg-info
10 | 


--------------------------------------------------------------------------------
/docs/api/misc.rst:
--------------------------------------------------------------------------------
 1 | Miscellaneous
 2 | =============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Configuration
 8 | -------------
 9 | 
10 | .. autoclass:: SafeRLBench.SRBConfig
11 |   :members:
12 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/_quadrocopter/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | 
3 | from .quadrotor_dynamics import QuadrotorDynamics
4 | from .quadrocopter_classes import StateVector
5 | 
6 | __all__ = ['QuadrotorDynamics', 'StateVector']
7 | 


--------------------------------------------------------------------------------
/SafeRLBench/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, absolute_import
2 | 
3 | from .rd_space import RdSpace
4 | from .bounded_space import BoundedSpace
5 | from .discrete_space import DiscreteSpace
6 | 
7 | __all__ = ['RdSpace', 'BoundedSpace', 'DiscreteSpace']
8 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. SafeRLBench documentation master file, created by
2 |    sphinx-quickstart on Mon Mar 27 16:08:01 2017.
3 |    You can adapt this file completely to your liking, but it should at least
4 |    contain the root `toctree` directive.
5 | 
6 | .. include:: ../README.rst
7 | 
8 | .. include:: toc.rst
9 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | gym >= 0.8.0
 2 | tensorflow >= 1.0.0
 3 | GPy >= 1.6.1
 4 | 
 5 | # Style testing
 6 | flake8 >= 3.3.0
 7 | pep8 >= 1.7.0
 8 | pep8-naming >= 0.4.1
 9 | pydocstyle >= 1.1.1
10 | 
11 | # Unittesting
12 | nose >= 1.3.7
13 | nose-exclude >= 0.5.0
14 | coverage >= 4.3.4
15 | unittest2 >= 1.1.0
16 | mock >= 2.0.0
17 | 
18 | # Documentation
19 | sphinx >= 1.5.3
20 | 


--------------------------------------------------------------------------------
/docs/api/bench.rst:
--------------------------------------------------------------------------------
 1 | Benchmark
 2 | =========
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Bench
 8 | -----
 9 | 
10 | .. autoclass:: SafeRLBench.Bench
11 |   :members:
12 | 
13 | BenchConfig
14 | -----------
15 | 
16 | .. autoclass:: SafeRLBench.BenchConfig
17 |   :members:
18 | 
19 | BenchRun
20 | --------
21 | 
22 | .. autoclass:: SafeRLBench.bench.BenchRun
23 |   :members:
24 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py35
 3 | 
 4 | [testenv]
 5 | deps =
 6 |   nose
 7 |   numpy
 8 |   theano
 9 |   mock
10 |   unittest2
11 | commands = nosetests
12 | 
13 | [flake8]
14 | ignore = E402,W503,D105,D413
15 | exclude =
16 |   SafeRLBench/envs/_quadrocopter*
17 | 
18 | [pydocstyle]
19 | add_ignore = D203,D105,D413
20 | match_dir = '[^\.\_].*'
21 | 
22 | [coverage:run]
23 | omit =
24 |   */_quadrocopter*
25 | 


--------------------------------------------------------------------------------
/SafeRLBench/policy/__init__.py:
--------------------------------------------------------------------------------
 1 | from .linear_policy import LinearPolicy, NoisyLinearPolicy
 2 | from .linear_policy import DiscreteLinearPolicy
 3 | from .neural_network import NeuralNetwork
 4 | from .controller import NonLinearQuadrocopterController
 5 | 
 6 | __all__ = [
 7 |     'LinearPolicy',
 8 |     'NoisyLinearPolicy',
 9 |     'DiscreteLinearPolicy',
10 |     'NeuralNetwork',
11 |     'NonLinearQuadrocopterController'
12 | ]
13 | 


--------------------------------------------------------------------------------
/docs/api/measure.rst:
--------------------------------------------------------------------------------
 1 | Measure Module
 2 | ==============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Measure
 8 | -------
 9 | 
10 | .. autoclass:: SafeRLBench.measure.Measure
11 |   :members:
12 | 
13 | BestPerformance
14 | ---------------
15 | 
16 | .. autoclass:: SafeRLBench.measure.BestPerformance
17 |   :members:
18 | 
19 | SafetyMeasure
20 | -------------
21 | 
22 | .. autoclass:: SafeRLBench.measure.SafetyMeasure
23 |   :members:
24 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .general_mountaincar import GeneralMountainCar
 4 | from .linear_car import LinearCar
 5 | from .gym_wrap import GymWrap
 6 | from .quadrocopter import Quadrocopter
 7 | from .mdp import MDP
 8 | 
 9 | __all__ = [
10 |     'GeneralMountainCar',
11 |     'LinearCar',
12 |     'GymWrap',
13 |     'Quadrocopter',
14 |     'MDP'
15 | ]
16 | 
17 | # TODO: Envs: Add module docs in __init__ file.
18 | 


--------------------------------------------------------------------------------
/docs/api/spaces.rst:
--------------------------------------------------------------------------------
 1 | Spaces Module
 2 | =============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Space
 8 | -----
 9 | 
10 | .. autoclass:: SafeRLBench.Space
11 |   :members:
12 | 
13 | BoundedSpace
14 | ------------
15 | 
16 | .. autoclass:: SafeRLBench.spaces.BoundedSpace
17 |   :members:
18 | 
19 | DiscreteSpace
20 | -------------
21 | 
22 | .. autoclass:: SafeRLBench.spaces.DiscreteSpace
23 |   :members:
24 | 
25 | RdSpace
26 | -------
27 | 
28 | .. autoclass:: SafeRLBench.spaces.RdSpace
29 |   :members:
30 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SafeRLBench
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/api/envs.rst:
--------------------------------------------------------------------------------
 1 | Environment Module
 2 | ==================
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | EnvironmentBase
 8 | ---------------
 9 | 
10 | .. autoclass:: SafeRLBench.EnvironmentBase
11 |   :members:
12 | 
13 | GeneralMountainCar
14 | ------------------
15 | 
16 | .. autoclass:: SafeRLBench.envs.GeneralMountainCar
17 |   :members:
18 | 
19 | GymWrap
20 | -------
21 | 
22 | .. autoclass:: SafeRLBench.envs.GymWrap
23 |   :members:
24 | 
25 | LinearCar
26 | ---------
27 | 
28 | .. autoclass:: SafeRLBench.envs.LinearCar
29 |   :members:
30 | 
31 | MDP
32 | ---
33 | 
34 | .. autoclass:: SafeRLBench.envs.MDP
35 |   :members:
36 | 
37 | Quadrocopter
38 | ------------
39 | 
40 | .. autoclass:: SafeRLBench.envs.Quadrocopter
41 |   :members:
42 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/__init__.py:
--------------------------------------------------------------------------------
 1 | """Algorithm Module.
 2 | 
 3 | =================== =========================================
 4 | Algorithm
 5 | =============================================================
 6 | A3C                 Asynchronous Actor-Critic Agents
 7 | PolicyGradient      Different Policy Gradient Implementations
 8 | DiscreteQLearning   Q-Learning using a table
 9 | SafeOpt             Bayesian Optimization with SafeOpt
10 | SafeOptSwarm        Bayesion Optimization with SafeOptSwarm
11 | =================== =========================================
12 | """
13 | 
14 | from .policygradient import PolicyGradient
15 | from .safeopt import SafeOpt, SafeOptSwarm
16 | from .a3c import A3C
17 | from .q_learning import DiscreteQLearning
18 | 
19 | __all__ = ['PolicyGradient', 'SafeOpt', 'A3C', 'DiscreteQLearning',
20 |            'SafeOptSwarm']
21 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: required
 4 | 
 5 | services:
 6 |   - docker
 7 | 
 8 | env:
 9 |   - PYTHON=python2
10 |   - PYTHON=python3
11 | 
12 | # Setup docker container
13 | install:
14 |   - docker build -f misc/Dockerfile.${PYTHON} -t test-image .
15 |   - docker ps -a
16 |   - ci_env=`bash <(curl -s https://codecov.io/env)`
17 | 
18 | # Run tests
19 | script:
20 |   - docker run test-image flake8 SafeRLBench --exclude "test*.py,__init__.py,_quadrocopter" --ignore=E402,W503 --show-source
21 |   - docker run test-image flake8 SafeRLBench --filename="__init__.py,test*.py" --ignore=F,E402,W503 --show-source
22 |   - docker run test-image pydocstyle SafeRLBench --match='(?!__init__).*\.py'
23 |   - docker run $ci_env test-image /bin/bash -c "nosetests --with-doctest --with-coverage --cover-package=SafeRLBench --verbosity=2 SafeRLBench ; bash <(curl -s https://codecov.io/bash)"
24 | 


--------------------------------------------------------------------------------
/docs/api/algo.rst:
--------------------------------------------------------------------------------
 1 | Algorithm Module
 2 | ================
 3 | 
 4 | This module contains implementations of different algorithms. Please refer to
 5 | the class documentation for detailed instructions on how to use them.
 6 | 
 7 | .. contents:: Contents
 8 |   :local:
 9 | 
10 | AlgorithmBase
11 | -------------
12 | 
13 | .. autoclass:: SafeRLBench.AlgorithmBase
14 |   :members:
15 | 
16 | A3C
17 | ---
18 | 
19 | .. autoclass:: SafeRLBench.algo.A3C
20 |   :members:
21 | 
22 | Policy Gradient
23 | ---------------
24 | 
25 | .. autoclass:: SafeRLBench.algo.PolicyGradient
26 |   :members:
27 | 
28 | Q-Learning
29 | ----------
30 | 
31 | .. autoclass:: SafeRLBench.algo.DiscreteQLearning
32 |   :members:
33 | 
34 | SafeOpt
35 | -------
36 | 
37 | .. autoclass:: SafeRLBench.algo.SafeOpt
38 |   :members:
39 | 
40 | SafeOptSwarm
41 | ------------
42 | 
43 | .. autoclass:: SafeRLBench.algo.SafeOptSwarm
44 |   :members:
45 | 


--------------------------------------------------------------------------------
/SafeRLBench/spaces/rd_space.py:
--------------------------------------------------------------------------------
 1 | """R^d with any shape."""
 2 | import numpy as np
 3 | from SafeRLBench import Space
 4 | 
 5 | 
 6 | class RdSpace(Space):
 7 |     """R^d Vectorspace."""
 8 | 
 9 |     def __init__(self, shape):
10 |         """Initialize with shape."""
11 |         self.shape = shape
12 |         self._dim = None
13 | 
14 |     def contains(self, x):
15 |         """Check if element is contained."""
16 |         return isinstance(x, np.ndarray) and x.shape == self.shape
17 | 
18 |     def sample(self):
19 |         """Return arbitrary element."""
20 |         return np.ones(self.shape)
21 | 
22 |     @property
23 |     def dimension(self):
24 |         """Return dimension of the space."""
25 |         if self._dim is None:
26 |             d = 1
27 |             for i in range(len(self.shape)):
28 |                 d *= self.shape[i]
29 |             self._dim = d
30 |         return self._dim
31 | 
32 |     def __repr__(self):
33 |         return 'RdSpace(shape=%s)' % str(self.shape)
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='SafeRLBench',
 5 |     version='1.0.1',
 6 |     author='Nicolas Ochsner',
 7 |     author_email='ochsnern@student.ethz.ch',
 8 |     packages=[
 9 |         'SafeRLBench',
10 |         'SafeRLBench.algo',
11 |         'SafeRLBench.envs',
12 |         'SafeRLBench.spaces',
13 |         'SafeRLBench.policy',
14 |     ],
15 |     description='Safe Reinforcement Learning Benchmark',
16 |     keywords='reinforcement-learning benchmark',
17 |     url='https://github.com/befelix/Safe-RL-Benchmark',
18 |     install_requires=[
19 |         'numpy >= 1.7',
20 |         'scipy >= 0.19.0',
21 |         'six >= 1.10',
22 |         'futures >= 3.0.5;python_version<"3.2"'
23 |     ],
24 |     extras_require={
25 |         'gym': ['gym >= 0.8.0'],
26 |         'safeopt': ['GPy >= 1.6.1', 'safeopt >= 0.1'],
27 |         'neural': ['tensorflow >= 1.0.0'],
28 |     },
29 |     dependency_links=[
30 |         'git+https://github.com/befelix/SafeOpt/tarball/master#egg=safeopt-0.1'
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/docs/api/policy.rst:
--------------------------------------------------------------------------------
 1 | Policy Module
 2 | =============
 3 | 
 4 | .. contents::
 5 |   :local:
 6 | 
 7 | Bases
 8 | -----
 9 | 
10 | Deterministic Policy Base
11 | ~~~~~~~~~~~~~~~~~~~~~~~~~
12 | 
13 | .. autoclass:: SafeRLBench.Policy
14 |   :members:
15 | 
16 | Probabilistic Policy Base
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. autoclass:: SafeRLBench.ProbPolicy
20 |   :members:
21 | 
22 | Linear Policies
23 | ---------------
24 | 
25 | LinearPolicy
26 | ~~~~~~~~~~~~
27 | 
28 | .. autoclass:: SafeRLBench.policy.LinearPolicy
29 |   :members:
30 | 
31 | DiscreteLinearPolicy
32 | ~~~~~~~~~~~~~~~~~~~~
33 | 
34 | .. autoclass:: SafeRLBench.policy.DiscreteLinearPolicy
35 |   :members:
36 | 
37 | NoisyLinearPolicy
38 | ~~~~~~~~~~~~~~~~~
39 | 
40 | .. autoclass:: SafeRLBench.policy.NoisyLinearPolicy
41 |   :members:
42 | 
43 | NonLinearQuadrocopterController
44 | -------------------------------
45 | 
46 | .. autoclass:: SafeRLBench.policy.NonLinearQuadrocopterController
47 |   :members:
48 | 
49 | NeuralNetwork
50 | -------------
51 | 
52 | .. autoclass:: SafeRLBench.policy.NeuralNetwork
53 |   :members:
54 | 


--------------------------------------------------------------------------------
/SafeRLBench/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import logging
 4 | 
 5 | from .configuration import SRBConfig
 6 | 
 7 | # Initialize configuration
 8 | config = SRBConfig(logging.getLogger(__name__))
 9 | 
10 | from .monitor import AlgoMonitor, EnvMonitor
11 | from .base import EnvironmentBase, Space, AlgorithmBase, Policy, ProbPolicy
12 | from .bench import Bench, BenchConfig
13 | from . import algo
14 | from . import envs
15 | from . import policy
16 | from . import spaces
17 | from . import error
18 | from . import measure
19 | 
20 | # Add things to all
21 | __all__ = ['EnvironmentBase',
22 |            'Space',
23 |            'AlgorithmBase',
24 |            'Policy',
25 |            'ProbPolicy',
26 |            'AlgoMonitor',
27 |            'EnvMonitor',
28 |            'SRBConfig',
29 |            'Bench',
30 |            'BenchConfig',
31 |            'envs',
32 |            'algo',
33 |            'policy',
34 |            'spaces',
35 |            'measure',
36 |            'error']
37 | 
38 | 
39 | # Import test after __all__ (no documentation)
40 | # from numpy.testing import Tester
41 | # test = Tester().test
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Felix Berkenkamp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/misc/Dockerfile.python3:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | ENV TF_CPP_MIN_LOG_LEVEL=2
 4 | 
 5 | # Install build essentials and clean up
 6 | RUN apt-get update --quiet \
 7 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 8 |   && apt-get clean
 9 | 
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 |   && apt-get clean
13 | 
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 |   && conda install python=3.5 pip numpy scipy nose --yes --quiet \
17 |   && conda clean --yes --all \
18 |   && hash -r
19 | 
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 | 
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 |     && rm -rf /root/.cache
26 | 
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 |   && rm -rf /root/.cache
30 | 
31 | # Install SafeOpt
32 | RUN git clone https://github.com/befelix/SafeOpt.git \
33 |   && cd SafeOpt \
34 |   && python setup.py install \
35 |   && rm -rf /SafeOpt
36 | 
37 | # Copy the main code
38 | COPY . /code
39 | RUN cd /code && python setup.py develop
40 | 
41 | WORKDIR /code
42 | 


--------------------------------------------------------------------------------
/SafeRLBench/spaces/discrete_space.py:
--------------------------------------------------------------------------------
 1 | """Discrete space implementation."""
 2 | 
 3 | from SafeRLBench import Space
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class DiscreteSpace(Space):
 9 |     """Discrete Space.
10 | 
11 |     Let d be the dimension of the space, then it will contain elements
12 |     {0, 1, ... , dim-1}.
13 | 
14 |     Examples
15 |     --------
16 |     Create a `DiscreteSpace` with three states:
17 |     >>> from SafeRLBench.spaces import DiscreteSpace
18 |     >>> discrete_space = DiscreteSpace(3)
19 |     """
20 | 
21 |     def __init__(self, dim):
22 |         """Initialize `DiscreteSpace`.
23 | 
24 |         Parameters
25 |         ----------
26 |         dim : int
27 |             Number of states.
28 |         """
29 |         assert dim > 0, ("If you need a discrete space without elements, you "
30 |                          + "do not need this class.")
31 |         self._dim = dim
32 | 
33 |     def contains(self, x):
34 |         """Check if element is part of the space."""
35 |         return (isinstance(x, int) and x >= 0 and x < self._dim)
36 | 
37 |     def sample(self):
38 |         """Sample an element of the space."""
39 |         return np.random.randint(self._dim)
40 | 
41 |     @property
42 |     def dimension(self):
43 |         """Return dimension of the space."""
44 |         return self._dim
45 | 
46 |     def __repr__(self):
47 |         return 'DiscreteSpace(dim=%d)' % self._dim
48 | 


--------------------------------------------------------------------------------
/misc/Dockerfile.python2:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda
 2 | 
 3 | ENV TF_CPP_MIN_LOG_LEVEL=2
 4 | 
 5 | # Install build essentials and clean up
 6 | RUN apt-get update --quiet \
 7 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 8 |   && apt-get clean
 9 | 
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 |   && apt-get clean
13 | 
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 |   && conda install python=2.7 pip numpy scipy nose --yes --quiet \
17 |   && conda clean --yes --all \
18 |   && hash -r
19 | 
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 | 
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 |     && rm -rf /root/.cache
26 | 
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 |   && rm -rf /root/.cache
30 | 
31 | # Install extra python2 requirements
32 | RUN pip --no-cache-dir install futures multiprocessing \
33 |   && rm -rf /root/.cache
34 | 
35 | # Install SafeOpt
36 | RUN git clone https://github.com/befelix/SafeOpt.git \
37 |   && cd SafeOpt \
38 |   && python setup.py install \
39 |   && rm -rf /SafeOpt
40 | 
41 | # Copy the main code
42 | COPY . /code
43 | RUN cd /code && python setup.py develop
44 | 
45 | WORKDIR /code
46 | 


--------------------------------------------------------------------------------
/test_code.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | module="SafeRLBench"
 4 | 
 5 | get_script_dir () {
 6 |      SOURCE="${BASH_SOURCE[0]}"
 7 |      # While $SOURCE is a symlink, resolve it
 8 |      while [ -h "$SOURCE" ]; do
 9 |           DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 |           SOURCE="$( readlink "$SOURCE" )"
11 |           # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 |           [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 |      done
14 |      DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 |      echo "$DIR"
16 | }
17 | 
18 | # tensorflow environment variable
19 | export TF_CPP_MIN_LOG_LEVEL='3'
20 | 
21 | # Change to script root
22 | cd $(get_script_dir)
23 | GREEN='\033[0;32m'
24 | NC='\033[0m'
25 | 
26 | BOLD=$(tput bold)
27 | NORMAL=$(tput sgr0)
28 | 
29 | # Run style tests
30 | echo -e "${GREEN}${BOLD}Running style tests:${NC}"
31 | flake8 $module --exclude test*.py,__init__.py,_quadrocopter --show-source
32 | 
33 | # Ignore import errors for __init__ and tests
34 | flake8 $module --filename=__init__.py,test*.py --ignore=F --show-source
35 | 
36 | echo -e "${GREEN}${BOLD}Testing docstring conventions:${NC}"
37 | # Test docstring conventions
38 | pydocstyle $module --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__"
39 | 
40 | echo -e "${GREEN}${BOLD}Running unit tests in current environment.${NC}"
41 | nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=$module $module 2>&1 | grep -v "^Level "
42 | 
43 | # Export html
44 | coverage html
45 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/test.py:
--------------------------------------------------------------------------------
 1 | """Algorithm Tests."""
 2 | 
 3 | from SafeRLBench.algo import PolicyGradient, A3C
 4 | from SafeRLBench.envs import LinearCar
 5 | from .policygradient import CentralFDEstimator, estimators
 6 | 
 7 | from SafeRLBench.policy import NeuralNetwork
 8 | 
 9 | from unittest2 import TestCase
10 | from mock import MagicMock, Mock
11 | 
12 | 
13 | class TestPolicyGradient(TestCase):
14 |     """PolicyGradientTestClass."""
15 | 
16 |     def test_pg_init(self):
17 |         """Test: POLICYGRADIENT: initialization."""
18 |         env_mock = MagicMock()
19 |         pol_mock = Mock()
20 | 
21 |         for key, item in estimators.items():
22 |             pg = PolicyGradient(env_mock, pol_mock, estimator=key)
23 |             self.assertIsInstance(pg.estimator, item)
24 | 
25 |         pg = PolicyGradient(env_mock, pol_mock, estimator=CentralFDEstimator)
26 |         self.assertIsInstance(pg.estimator, CentralFDEstimator)
27 | 
28 |         self.assertRaises(ImportError, PolicyGradient,
29 |                           env_mock, pol_mock, CentralFDEstimator(env_mock))
30 | 
31 | 
32 | class TestA3C(TestCase):
33 |     """A3C Test Class."""
34 | 
35 |     def test_a3c_init(self):
36 |         """Test: A3C: initialization."""
37 |         a3c = A3C(LinearCar(), NeuralNetwork([2, 6, 1]))
38 | 
39 |         fields = ['environment', 'policy', 'max_it', 'num_workers', 'rate',
40 |                   'done', 'policy', 'p_net', 'v_net', 'workers', 'threads',
41 |                   'global_counter', 'sess']
42 | 
43 |         for field in fields:
44 |             assert hasattr(a3c, field)
45 | 


--------------------------------------------------------------------------------
/SafeRLBench/spaces/test.py:
--------------------------------------------------------------------------------
 1 | """Tests for spaces module."""
 2 | from __future__ import absolute_import
 3 | 
 4 | from functools import partial
 5 | import inspect
 6 | 
 7 | from numpy import array
 8 | import SafeRLBench.spaces as spaces
 9 | 
10 | 
11 | """Dictionary storing initialization arguments for classes."""
12 | class_arguments = {
13 |     spaces.BoundedSpace: [array([-1, -2]), array([1, 0])],
14 |     spaces.RdSpace: [(3, 2)],
15 |     spaces.DiscreteSpace: [5]
16 | }
17 | 
18 | 
19 | class TestSpaces(object):
20 |     """Wrap spaces tests."""
21 | 
22 |     classes = []
23 | 
24 |     @classmethod
25 |     def setUpClass(cls):
26 |         """Initialize classes list."""
27 |         for name, c in inspect.getmembers(spaces):
28 |             if inspect.isclass(c):
29 |                 cls.classes.append(c)
30 | 
31 |     def exhaustive_tests(self):
32 |         """Check: Spaces tests initial values for testing."""
33 |         for c in self.classes:
34 |             if c not in class_arguments:
35 |                 assert(False)
36 | 
37 |     def generate_tests(self):
38 |         """Generate tests for spaces implementations."""
39 |         for c in self.classes:
40 |             if c in class_arguments:
41 |                 check = partial(self.check_contains)
42 |                 check.description = ('Test: ' + c.__name__.upper()
43 |                                      + ': implementation.')
44 |                 yield check, c
45 | 
46 |     def check_contains(self, c):
47 |         """Check if contains and element is implemented."""
48 |         space = c(*class_arguments[c])
49 |         try:
50 |             x = space.sample()
51 |             b = space.contains(x)
52 |         except NotImplementedError:
53 |             assert(False)
54 |         assert(b)
55 | 


--------------------------------------------------------------------------------
/SafeRLBench/test/test_measure.py:
--------------------------------------------------------------------------------
 1 | from SafeRLBench.measure import BestPerformance, SafetyMeasure
 2 | 
 3 | from mock import Mock
 4 | from unittest2 import TestCase
 5 | 
 6 | 
 7 | def _mock_run(val):
 8 |     run = Mock()
 9 |     monitor = Mock()
10 |     monitor.rewards = range(val, val + 4)
11 |     run.get_alg_monitor.return_value = monitor
12 | 
13 |     print(monitor.rewards)
14 |     print(run.get_alg_monitor())
15 |     print(monitor)
16 | 
17 |     return run
18 | 
19 | 
20 | class TestMeasure(TestCase):
21 |     """Test Measure classes."""
22 | 
23 |     def test_best_performance(self):
24 |         """Test: MEASURE: BestPerformance."""
25 |         run1 = _mock_run(0)
26 |         run2 = _mock_run(1)
27 | 
28 |         measure = BestPerformance()
29 |         self.assertIsNone(measure.result)
30 | 
31 |         measure([run1, run2])
32 |         result = measure.result
33 | 
34 |         self.assertEquals(result[0][0], run2)
35 |         self.assertEquals(result[1][0], run1)
36 | 
37 |         self.assertEquals(result[0][1], 4)
38 |         self.assertEquals(result[1][1], 3)
39 | 
40 |         best_result = measure.best_result
41 | 
42 |         self.assertEquals(best_result[0], run2)
43 |         self.assertEquals(best_result[1], 4)
44 | 
45 |     def test_safety_measure(self):
46 |         """Test: MEASURE: SafetyMeasure."""
47 |         measure = SafetyMeasure(0)
48 |         self.assertIsNone(measure.result)
49 | 
50 |         run1 = _mock_run(-2)
51 |         run2 = _mock_run(0)
52 | 
53 |         measure([run1, run2])
54 | 
55 |         result = measure.result
56 | 
57 |         self.assertEquals(result[0][0], run1)
58 |         self.assertEquals(result[0][1], 2)
59 |         self.assertEquals(result[0][2], 3)
60 | 
61 |         self.assertEquals(result[1][0], run2)
62 |         self.assertEquals(result[1][1], 0)
63 |         self.assertEquals(result[1][2], 0)
64 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | module="SafeRLBench"
 2 | 
 3 | GREEN=\033[0;32m
 4 | NC=\033[0m
 5 | 
 6 | # Flake 8 ignore errors
 7 | flakeignore='E402,W503'
 8 | 
 9 | # Pydocstyle ignore errors
10 | pydocignore='D105'
11 | 
12 | style:
13 | 	@echo "${GREEN}Running style tests:${NC}"
14 | 	@flake8 ${module} --exclude test*.py,__init__.py --show-source
15 | 	@flake8 ${module} --filename=__init__.py,test*.py --ignore=F --show-source
16 | 
17 | docstyle:
18 | 	@echo "${GREEN}Testing docstring conventions:${NC}"
19 | 	@pydocstyle ${module} --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__"
20 | 
21 | unittests:
22 | 	@echo "${GREEN}Running unit tests in current environment.${NC}"
23 | 	@nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=${module} ${module}  2>&1 | grep -v "^Level 1"
24 | 
25 | coverage: unittests
26 | 	@echo "${GREEN}Create coverage report:${NC}"
27 | 	@coverage html
28 | 
29 | test: style docstyle unittests
30 | 
31 | # targets to setup docker images for testing
32 | setup_docker2:
33 | 	docker build -f misc/Dockerfile.python2 -t srlb-py27-image .
34 | 
35 | setup_docker3:
36 | 	docker build -f misc/Dockerfile.python3 -t srlb-py35-image .
37 | 
38 | setup_docker: setup_docker2 setup_docker3
39 | 
40 | docker2:
41 | 	@echo "${GREEN}Running unit tests for 2.7 in docker container:${NC}"
42 | 	@docker run -e "TF_CPP_MIN_LOG_LEVEL=2" -v $(shell pwd):/code/ srlb-py27-image nosetests --with-doctest --verbosity=2 SafeRLBench  2>&1 | grep -v "^Level "
43 | 
44 | docker3:
45 | 	@echo "${GREEN}Running unit tests for 3.5 in docker container:${NC}"
46 | 	@docker run -e "TF_CPP_MIN_LOG_LEVEL=2" -v $(shell pwd):/code/ srlb-py35-image nosetests --with-doctest --verbosity=2 SafeRLBench  2>&1 | grep -v "^Level "
47 | 
48 | docker: docker2 docker3
49 | 
50 | history:
51 | 	git log --graph --decorate --oneline
52 | 
53 | clean:
54 | 	find . -type f -name '*.pyc' -exec rm -f {} ';'
55 | 	rm -r htmlcov
56 | 


--------------------------------------------------------------------------------
/SafeRLBench/test/test_integration.py:
--------------------------------------------------------------------------------
 1 | from SafeRLBench import config
 2 | 
 3 | from SafeRLBench import Bench, BenchConfig
 4 | from SafeRLBench.algo import PolicyGradient
 5 | from SafeRLBench.envs import LinearCar
 6 | from SafeRLBench.policy import LinearPolicy
 7 | from SafeRLBench.measure import BestPerformance
 8 | 
 9 | 
10 | from unittest2 import TestCase
11 | 
12 | import logging
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | class TestIntegration(TestCase):
18 |     """Test integration with PolicyGradient and LinearCar."""
19 | 
20 |     def test_integration(self):
21 |         """Integration: bench with pc and lc."""
22 |         # setup config:
23 |         config.logger_set_level(logging.DEBUG)
24 |         config.monitor_set_verbosity(3)
25 | 
26 |         policy = LinearPolicy(2, 1, biased=True)
27 |         algs = [(PolicyGradient, {'policy': policy,
28 |                                   'max_it': 10,
29 |                                   'estimator': 'central_fd'})]
30 |         env = [[(LinearCar, {'horizon': 100})]]
31 | 
32 |         test_config = BenchConfig(algs, env)
33 | 
34 |         benchmark = Bench(test_config, [BestPerformance()])
35 |         benchmark()
36 | 
37 |         assert(benchmark.measures[0].result is not None)
38 | 
39 |     def test_parallel_integration(self):
40 |         """Integration: bench with pc and lc (parallel)."""
41 |         # setup config:
42 |         config.logger_set_level(logging.DEBUG)
43 |         config.monitor_set_verbosity(3)
44 |         config.jobs_set(2)
45 | 
46 |         policy = LinearPolicy(2, 1)
47 |         algs = [(PolicyGradient, [{'policy': policy,
48 |                                    'max_it': 10,
49 |                                    'estimator': 'central_fd'},
50 |                                   {'policy': policy,
51 |                                    'max_it': 20,
52 |                                    'estimator': 'central_fd'}])]
53 |         env = [[(LinearCar, {'horizon': 100})]]
54 | 
55 |         test_config = BenchConfig(algs, env)
56 | 
57 |         benchmark = Bench(test_config, [BestPerformance()])
58 |         benchmark()
59 | 
60 |         assert(benchmark.measures[0].result is not None)
61 |         assert(len(benchmark.measures[0].result) == 2)
62 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/README.rst:
--------------------------------------------------------------------------------
 1 | Description
 2 | -----------
 3 | 
 4 | The ``algo`` module contains algorithm implementations based on the
 5 | ``AlgorithmBase`` class.
 6 | The objects should only be accessed through the interface functions defined
 7 | in the base class.
 8 | 
 9 | Overview
10 | --------
11 | 
12 | =============== ===============
13 | Algorithm       Policy
14 | =============== ===============
15 | A3C             NeuralNetwork
16 | PolicyGradient  Any
17 | Q-Learning      None
18 | SafeOpt         Any
19 | =============== ===============
20 | 
21 | Implementing an Algorithm
22 | -------------------------
23 | 
24 | When implementing an algorithm a couple of things have to be considered.
25 | ``AlgorithmBase`` is an abstrace base class. It will require any subclass to
26 | implement the private methods listed below. These will be invoked by the
27 | public interface methods.
28 | 
29 | Any algorithm must be structured using four methods. First the ``optimize``,
30 | which will control the optimization run, it is responsible for using the other
31 | methods. The three tools ``optimize`` should use are the methods
32 | ``initialize``, ``step`` and ``is_finished``.
33 | 
34 | ``initialize`` should be used to initialize the run and all the attributes and
35 | parameters that need to be set up.
36 | ``optimize`` should compute one step of the optimization run.
37 | ``is_finished`` is supposed to return ``True`` when the optimization run is
38 | finished.
39 | 
40 | Requirements
41 | ~~~~~~~~~~~~
42 | 
43 | ================= =============================================================
44 | Must implement
45 | ===============================================================================
46 | _initialize       Initialize any attributes, objects needed.
47 | _step             Execute one iteration of the algorithm.
48 | _is_finished      Return ``True`` when done.
49 | ================= =============================================================
50 | 
51 | ================= =============================================================
52 | May implement
53 | ===============================================================================
54 | _optimize(policy) Optimize the policy. Possibly no policy as in Q-learning.
55 | ================= =============================================================
56 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/README.rst:
--------------------------------------------------------------------------------
 1 | Description
 2 | -----------
 3 | 
 4 | The ``envs`` module contains environment implementations based on the
 5 | ``EnvironmentBase`` class.
 6 | The objects should only be accessed through the interface functions defined
 7 | in the base class.
 8 | 
 9 | Overview
10 | --------
11 | 
12 | =================== =================================== =======================
13 | Environment         State Space                         Action Space
14 | =================== =================================== =======================
15 | GeneralMountainCar  :math:`[-1,1]\times[-0.07,0.07]`    :math:`[-1, 1]`
16 | GymWrap
17 | LinearCar           :math:`\mathbb{R}^{2d}`             :math:`[-1, 1]^d`
18 | MDP
19 | Quadrocopter
20 | =================== =================================== =======================
21 | 
22 | Implementing an Environment
23 | ---------------------------
24 | 
25 | When implementing an environment a couple of things have to be considered.
26 | `EnvironmentBase` is an abstract base class. It will require any subclass to
27 | implement certain private methods which will be invoked by the public
28 | interface. Further certain attributes should be initialized, also as specified
29 | below, to support monitoring the execution.
30 | 
31 | Requirements
32 | ~~~~~~~~~~~~
33 | 
34 | Environments have to inherit from `SafeRLBench.EnvironmentBase`.
35 | 
36 | =============== =============== ===============================================
37 | Initialize Attributes
38 | ===============================================================================
39 | state_space     Space object
40 | action_space    Space object
41 | horizon         Integer         Used in default _rollout implementation.
42 | =============== =============== ===============================================
43 | 
44 | =============== =============== ===============================================
45 | Must implement
46 | ===============================================================================
47 | _update         action          Returns (action, state, reward)
48 | _reset
49 | =============== =============== ===============================================
50 | 
51 | =============== =============== ===============================================
52 | May implement
53 | ===============================================================================
54 | _rollout        policy          Returns list of (action, state, reward)
55 | =============== =============== ===============================================
56 | 


--------------------------------------------------------------------------------
/SafeRLBench/error.py:
--------------------------------------------------------------------------------
 1 | """Exceptions and error messages."""
 2 | 
 3 | import logging
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class NotSupportedException(Exception):
 9 |     """Exception raised when requirements are not installed.
10 | 
11 |     Attributes
12 |     ----------
13 |     dep : Module
14 |         The dependent module.
15 |     dep_name : String
16 |         Name of the dependency for a meaningful error message.
17 |     """
18 | 
19 |     def __init__(self, dep, name='Some'):
20 |         """Initialize NotSupportedException.
21 | 
22 |         Parameters
23 |         ----------
24 |         dep : Module
25 |             The dependent module.
26 |         dep_name : String
27 |             Name of the dependency for a meaningful error message.
28 |         """
29 |         msg = name + " is not installed on this system."
30 | 
31 |         super(NotSupportedException, self).__init__(msg)
32 | 
33 |         self.dep = dep
34 |         self.name = name
35 | 
36 | 
37 | class MultipleCallsException(Exception):
38 |     """Exception raised when a setup method is called multiple times."""
39 | 
40 |     pass
41 | 
42 | 
43 | class IncompatibilityException(Exception):
44 |     """Exception raised when any two parts are incompatible with each other.
45 | 
46 |     Attributes
47 |     ----------
48 |     obj1 : object
49 |         Instance of the object calling the exception.
50 |     obj2 : object
51 |         Instance of the object being incompatible.
52 |     """
53 | 
54 |     def __init__(self, obj1, obj2):
55 |         """Initialize IncompatibilityException.
56 | 
57 |         Parameters
58 |         ----------
59 |         obj1 : object
60 |             Instance of the object calling the exception.
61 |         obj2 : object
62 |             Instance of the object being incompatible.
63 |         """
64 |         msg = "%s is incompatible with %s." % (obj2.__name__,
65 |                                                obj1.__name__)
66 | 
67 |         super(IncompatibilityException, self).__init__(msg)
68 | 
69 |         self.obj1 = obj1
70 |         self.obj2 = obj2
71 | 
72 | 
73 | def add_dependency(dep, dep_name='Some'):
74 |     """Add dependency.
75 | 
76 |     Function, that will raise a `NotSupportedException` when `dep` is None.
77 | 
78 |     Parameters
79 |     ----------
80 |     dep : Module
81 |         The dependent module.
82 |     dep_name : String
83 |         Name of the dependency for a meaningful error message.
84 |     """
85 |     if dep is None:
86 |         raise NotSupportedException(dep, dep_name)
87 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/gym_wrap.py:
--------------------------------------------------------------------------------
 1 | """Wrapper for OpenAI Gym."""
 2 | 
 3 | from SafeRLBench import EnvironmentBase
 4 | from SafeRLBench.error import add_dependency
 5 | 
 6 | try:
 7 |     import gym
 8 | except ModuleNotFoundError:
 9 |     gym = None
10 | 
11 | 
12 | # TODO: GymWrap: Add examples to docs
13 | class GymWrap(EnvironmentBase):
14 |     """Wrapper class for the OpenAI Gym.
15 | 
16 |     Attributes
17 |     ----------
18 |     env : gym environment
19 |         Environment of the OpenAI Gym created by gym.make().
20 |     horizon : integer
21 |         Horizon for rollout.
22 |     render : boolean
23 |         Default: False. If True simulation will be rendered during rollouts on
24 |         this instance.
25 | 
26 |     Notes
27 |     -----
28 |     The GymWrap class relies on the complete observability of the state
29 |     through a state field in the respective gym environment. For the classic
30 |     control problem this is indeed the case, but on other environment it
31 |     remains to be untested.
32 |     """
33 | 
34 |     def __init__(self, env, horizon=100, render=False):
35 |         """Initialize attributes.
36 | 
37 |         Parameters
38 |         ----------
39 |         env : gym environment
40 |             Instance of the gym environment that should be optimized on.
41 |         horizon : integer
42 |             Horizon for rollout.
43 |         render : boolean
44 |             Default: False ; If True simulation will be rendered during
45 |             rollouts on this instance.
46 |         """
47 |         add_dependency(gym, 'Gym')
48 | 
49 |         EnvironmentBase.__init__(self, env.observation_space, env.action_space,
50 |                                  horizon)
51 |         self.environment = env.unwrapped
52 |         self.render = render
53 |         self.done = False
54 | 
55 |         self.environment.reset()
56 | 
57 |     def _update(self, action):
58 |         observation, reward, done, info = self.environment.step(action)
59 |         self.done = done
60 |         return action, observation, reward
61 | 
62 |     def _reset(self):
63 |         self.environment.reset()
64 |         self.done = False
65 | 
66 |     def _rollout(self, policy):
67 |         trace = []
68 |         for n in range(self.horizon):
69 |             if self.render:
70 |                 self.environment.render()
71 |             trace.append(self.update(policy(self.state)))
72 |             if self.done:
73 |                 break
74 |         return trace
75 | 
76 |     @property
77 |     def state(self):
78 |         """Observable system state."""
79 |         return self.environment.state
80 | 
81 |     @state.setter
82 |     def state(self, s):
83 |         assert self.state_space.contains(s)
84 |         self.environment.state = s
85 | 
86 | 
87 | def _get_test_args():
88 |     return [gym.make('MountainCar-v0')]
89 | 


--------------------------------------------------------------------------------
/SafeRLBench/spaces/bounded_space.py:
--------------------------------------------------------------------------------
 1 | """Bounded subspace of R^n."""
 2 | import numpy as np
 3 | from SafeRLBench import Space
 4 | 
 5 | from numpy.random import rand
 6 | 
 7 | 
 8 | class BoundedSpace(Space):
 9 |     """Bounded subspace of R^n.
10 | 
11 |     Attributes
12 |     ----------
13 |     lower : array-like
14 |         Lower bound
15 |     upper : array-like
16 |         Upper bound
17 | 
18 |     Examples
19 |     --------
20 |     The `BoundedSpace` class can be instatiated in two ways. If you have
21 |     individual bounds for each dimension, then you can directly pass the
22 |     `lower` or `upper` bound as an array-like.
23 | 
24 |     >>> space = BoundedSpace(np.array([-1, -2]), np.array([1, 0]))
25 | 
26 |     In this case the shape argument will be ignored. If you want to create a
27 |     however shaped box, where all the bounds are the same, then, you may pass
28 |     a lower and an upper bound as a scalar and make sure that you specify the
29 |     shape.
30 | 
31 |     >>> space = BoundedSpace(-1, 1, shape=(2,))
32 |     """
33 | 
34 |     def __init__(self, lower, upper, shape=None):
35 |         """Initialize BoundedSpace.
36 | 
37 |         Parameters
38 |         ----------
39 |         lower : array-like
40 |             Lower bound of the space. Either an array or an integer.
41 |             Must agree with the input of the upper bound.
42 |         upper : array-like
43 |             Upper bound of the space. Either an array or an integer. Must
44 |             agree with the input of the lower bound.
45 |         shape : integer
46 |             Shape of the bounds. Input will be ignored, if the bounds are non
47 |             scalar, if they are scalar, it must be set.
48 |         """
49 |         if (np.isscalar(lower) and np.isscalar(upper)):
50 |             assert shape is not None, "Shape must be set, if bounds are scalar"
51 |             self.lower = np.zeros(shape) + lower
52 |             self.upper = np.zeros(shape) + upper
53 |         else:
54 |             self.lower = np.array(lower)
55 |             self.upper = np.array(upper)
56 |             assert self.lower.shape == self.upper.shape, "Shapes do not agree."
57 | 
58 |         self._dim = None
59 | 
60 |     def contains(self, x):
61 |         """Check if element is contained."""
62 |         return (x.shape == self.lower.shape
63 |                 and (x >= self.lower).all()
64 |                 and (x <= self.upper).all())
65 | 
66 |     def sample(self):
67 |         """Return element."""
68 |         element = rand(*self.shape) * (self.upper - self.lower) + self.lower
69 |         return element
70 | 
71 |     @property
72 |     def shape(self):
73 |         """Return element shape."""
74 |         return self.lower.shape
75 | 
76 |     @property
77 |     def dimension(self):
78 |         """Return dimension of the space."""
79 |         if self._dim is None:
80 |             d = 1
81 |             for i in range(len(self.shape)):
82 |                 d *= self.shape[i]
83 |             self._dim = d
84 |         return self._dim
85 | 
86 |     def __repr__(self):
87 |         return 'BoundedSpace(lower=%s, upper=%s)' % (str(self.lower),
88 |                                                      str(self.upper))
89 | 


--------------------------------------------------------------------------------
/SafeRLBench/test/test_configuration.py:
--------------------------------------------------------------------------------
  1 | from SafeRLBench import SRBConfig
  2 | 
  3 | from unittest2 import TestCase
  4 | 
  5 | import sys
  6 | import os
  7 | 
  8 | import logging
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class TestSRBConfig(TestCase):
 14 |     """Test SRBConfig class."""
 15 | 
 16 |     def test_logger_stream_handler(self):
 17 |         """Test: CONFIG: stream handler."""
 18 |         config = SRBConfig(logger)
 19 | 
 20 |         self.assertIsNone(config.logger_stream_handler)
 21 | 
 22 |         # check if stream handler gets added
 23 |         config.logger_add_stream_handler()
 24 |         self.assertIsNotNone(config.logger_stream_handler)
 25 | 
 26 |         handler1 = config.logger_stream_handler
 27 |         handler2 = logging.StreamHandler(sys.stdout)
 28 | 
 29 |         # check if handler changes on assignment
 30 |         config.logger_stream_handler = handler2
 31 |         self.assertNotEqual(handler1, config.logger_stream_handler)
 32 | 
 33 |     def test_logger_file_handler(self):
 34 |         """Test: CONFIG: file handler."""
 35 |         config = SRBConfig(logger)
 36 | 
 37 |         self.assertIsNone(config.logger_file_handler)
 38 | 
 39 |         # check if file handler gets added
 40 |         config.logger_add_file_handler('logs.log')
 41 |         self.assertIsNotNone(config.logger_file_handler)
 42 | 
 43 |         handler1 = config.logger_file_handler
 44 |         handler2 = logging.FileHandler('logs2.log')
 45 | 
 46 |         # check if handler changes on assignment
 47 |         config.logger_file_handler = handler2
 48 |         self.assertNotEqual(handler1, config.logger_file_handler)
 49 | 
 50 |         self.assertTrue(os.path.isfile('logs.log'))
 51 |         self.assertTrue(os.path.isfile('logs2.log'))
 52 | 
 53 |         config.logger_file_handler = None
 54 | 
 55 |     def test_logger_format(self):
 56 |         """Test: CONFIG: logger format."""
 57 |         config = SRBConfig(logger)
 58 | 
 59 |         config.logger_add_stream_handler()
 60 |         config.logger_add_file_handler('logs.log')
 61 | 
 62 |         fmt = '%(name)s - %(levelname)s - %(message)s'
 63 |         formatter = logging.Formatter(fmt)
 64 | 
 65 |         config.logger_format = fmt
 66 | 
 67 |         tst_record = {
 68 |             'name': 'test_logger',
 69 |             'level': logging.DEBUG,
 70 |             'pathname': os.path.realpath(__file__),
 71 |             'lineno': 42,
 72 |             'msg': 'test_msg',
 73 |             'args': None,
 74 |             'exc_info': None,
 75 |             'func': 'test_logger_format'
 76 |         }
 77 |         rec = logging.makeLogRecord(tst_record)
 78 |         self.assertEqual(formatter.format(rec),
 79 |                          config.logger_stream_handler.format(rec))
 80 | 
 81 |     def test_monitor_verbosity(self):
 82 |         """Test: CONFIG: monitor verbosity."""
 83 |         config = SRBConfig(logger)
 84 | 
 85 |         config.monitor_set_verbosity(42)
 86 |         self.assertEqual(config.monitor_verbosity, 42)
 87 | 
 88 |         with self.assertRaises(ValueError):
 89 |             config.monitor_set_verbosity(-1)
 90 | 
 91 |     def test_jobs(self):
 92 |         """Test: CONFIG: jobs set."""
 93 |         config = SRBConfig(logger)
 94 | 
 95 |         config.jobs_set(42)
 96 |         self.assertEqual(config.n_jobs, 42)
 97 | 
 98 |         with self.assertRaises(ValueError):
 99 |             config.jobs_set(-1)
100 | 
101 |     @classmethod
102 |     def tearDownClass(cls):
103 |         """Clean up created file."""
104 |         if os.path.isfile('logs.log'):
105 |             os.remove('logs.log')
106 |         if os.path.isfile('logs2.log'):
107 |             os.remove('logs2.log')
108 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/linear_car.py:
--------------------------------------------------------------------------------
  1 | """Linear Car."""
  2 | import numpy as np
  3 | from numpy import copy, array
  4 | from numpy.linalg import norm
  5 | 
  6 | from SafeRLBench import EnvironmentBase
  7 | from SafeRLBench.spaces import RdSpace, BoundedSpace
  8 | 
  9 | 
 10 | # TODO: LinearCar: add examples
 11 | class LinearCar(EnvironmentBase):
 12 |     """Implementation of LinearCar Environment.
 13 | 
 14 |     This is a very simple environment implementing a car in an arbitrarily
 15 |     dimensioned space. By default it will just be one dimensional, which
 16 |     results in a two dimensional state space, that is, (pos, vel), and
 17 |     accordingly in a one dimensional bounded action space, that is, the
 18 |     acceleration.
 19 | 
 20 |     Attributes
 21 |     ----------
 22 |     state : ndarray
 23 |         Current state of the LinearCar.
 24 |     initial_state : ndarray
 25 |         Initial state of the LinearCar.
 26 |     goal : ndarray
 27 |         Goal state.
 28 |     eps : float
 29 |         Margin for completion. If 0, the goal is to stabilize at the goal
 30 |         completly.
 31 |     step : float
 32 |         Update step.
 33 |     state_space : Space object
 34 |         State space as deduced from the state.
 35 |     action_space : Space object
 36 |         Action space as deduced from the state.
 37 |     """
 38 | 
 39 |     def __init__(self, state=array([[0.], [0.]]), goal=array([[1.], [0.]]),
 40 |                  step=0.01, eps=0, horizon=100):
 41 |         """
 42 |         Initialize LinearCar.
 43 | 
 44 |         Parameters
 45 |         ----------
 46 |         state : ndarray
 47 |             Initial state of the LinearCar. The state and action space will be
 48 |             deduced from this. The shape needs to be (2, d) for d > 0.
 49 |         goal : ndarray
 50 |             Goal state of the LinearCar. The shape should comply to the shape
 51 |             of the initial state.
 52 |             In case the velocity is non-zero, eps should be strictly greater
 53 |             than zero, since there is no way for the system to stabilize in
 54 |             the goal state anyway.
 55 |         eps : float
 56 |             Reward at which we want to abort. If zero we do not abort at all.
 57 |         step : float
 58 |             Update step.
 59 |         """
 60 |         assert state.shape[0] == 2, 'Invalid shape of the initial state.'
 61 |         assert state.shape == goal.shape, 'State and goal shape have to agree.'
 62 | 
 63 |         # Initialize EnivronmentBase attributes
 64 |         self.horizon = horizon
 65 |         self.state_space = RdSpace(state.shape)
 66 |         self.action_space = BoundedSpace(-1, 1, shape=(state.shape[1],))
 67 | 
 68 |         # Initialize State
 69 |         self.initial_state = state
 70 |         self.state = copy(state)
 71 | 
 72 |         # Initialize Environment Parameters
 73 |         self.goal = goal
 74 |         self.eps = eps
 75 |         self.step = step
 76 | 
 77 |     def _update(self, action):
 78 |         one = np.ones(self.action_space.shape)
 79 |         action = np.maximum(np.minimum(action, one), -one)
 80 | 
 81 |         self.state[1] += self.step * action
 82 |         self.state[0] += self.state[1]
 83 | 
 84 |         return (action, copy(self.state), self._reward())
 85 | 
 86 |     def _reset(self):
 87 |         self.state = copy(self.initial_state)
 88 | 
 89 |     def _rollout(self, policy):
 90 |         self.reset()
 91 |         trace = []
 92 |         for n in range(self.horizon):
 93 |             action = policy(self.state)
 94 |             trace.append(self.update(action))
 95 |             if (self.eps != 0 and self._achieved()):
 96 |                 return trace
 97 |         return trace
 98 | 
 99 |     def _reward(self):
100 |         return -norm(self.state - self.goal)
101 | 
102 |     def _achieved(self):
103 |         return (abs(self._reward()) < self.eps)
104 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/q_learning.py:
--------------------------------------------------------------------------------
  1 | """Q-learning implementations."""
  2 | 
  3 | from SafeRLBench import AlgorithmBase, Policy
  4 | from SafeRLBench.spaces import DiscreteSpace
  5 | from SafeRLBench.error import IncompatibilityException
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | # TODO: DiscreteQLearning: examples, monitoring, finished, adaptive rate
 11 | class DiscreteQLearning(AlgorithmBase):
 12 |     """Q-Learning Algorithm.
 13 | 
 14 |     This Algorithm estimates a quality measure that maps every (state, action)
 15 |     pair to a real number.
 16 | 
 17 |     Attributes
 18 |     ----------
 19 |     Q : ndarray
 20 |         Array representing the quality for each state action pair.
 21 |     environment :
 22 |         The environment for which we want to estimate the Q function. Its
 23 |         state and action space need to be an instance of `DiscreteSpace`.
 24 |     discount : float
 25 |         Discount factor.
 26 |     max_it : int
 27 |         Maximum number of iterations.
 28 |     rate : float
 29 |         Update rate.
 30 |     shape : (int, int)
 31 |         Tuple containing the dimension of the state and action space.
 32 | 
 33 |     Notes
 34 |     -----
 35 |     The environment needs to use a discrete state and action space, because
 36 |     this Q-Learning implementation uses a table to estimate the Q function.
 37 |     """
 38 | 
 39 |     def __init__(self, environment, discount, max_it, rate):
 40 |         """Initialize QLearning.
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         environment :
 45 |             The environment for which we want to estimate the Q function. Its
 46 |             state and action space need to be an instance of `DiscreteSpace`.
 47 |         discount : float
 48 |             Discount factor.
 49 |         max_it : int
 50 |             Maximum number of iterations.
 51 |         rate : float
 52 |             Update rate.
 53 |         """
 54 |         # make some sanity checks
 55 |         if (not isinstance(self.environment.action_space, DiscreteSpace)
 56 |            and not isinstance(self.environment.state_space, DiscreteSpace)):
 57 |             raise IncompatibilityException(self, self.environment)
 58 | 
 59 |         if discount <= 0:
 60 |             raise ValueError('discount %d needs to be larger than zero.',
 61 |                              discount)
 62 | 
 63 |         if max_it <= 0:
 64 |             raise ValueError('max_it %d needs to be larger than zero.', max_it)
 65 | 
 66 |         # initialize the fields
 67 |         self.environment = environment
 68 |         self.discount = discount
 69 |         self.max_it = max_it
 70 |         self.rate = rate
 71 | 
 72 |         # determine the dimension of the state and action space
 73 |         d_state = environment.state_space.dimension
 74 |         d_action = environment.action_space.dimension
 75 | 
 76 |         self.shape = (d_state, d_action)
 77 | 
 78 |         # initialize the lookup table for the Q function.
 79 |         self.Q = None
 80 |         self.policy = _RandomPolicy(environment.action_space)
 81 | 
 82 |     def _initialize(self):
 83 |         self.Q = np.zeros(self.shape)
 84 | 
 85 |     def _step(self):
 86 |         trace = self.environment.rollout(self.policy)
 87 |         for (action, state, reward) in trace:
 88 |             dq = (reward + self.discount * self.Q[state, :].max()
 89 |                   - self.Q[state, action])
 90 |             self.Q[state, action] += self.rate * dq
 91 | 
 92 |     def _is_finished(self):
 93 |         pass
 94 | 
 95 |     # TODO: Q-learning evaluate qlearning performance appropiately
 96 | 
 97 | 
 98 | class _RandomPolicy(Policy):
 99 | 
100 |     def __init__(self, action_space):
101 |         self.action_space = action_space
102 | 
103 |     def map(self, state):
104 |         return self.action_space.sample
105 | 
106 |     @property
107 |     def parameters(self):
108 |         return self.action_space.dimension
109 | 
110 |     @property
111 |     def parameter_space(self):
112 |         return None
113 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/test.py:
--------------------------------------------------------------------------------
  1 | """Tests for envs module.
  2 | 
  3 | Need rework.
  4 | """
  5 | from __future__ import absolute_import
  6 | 
  7 | # import unittest
  8 | # from numpy.testing import *
  9 | import inspect
 10 | from functools import partial
 11 | 
 12 | import SafeRLBench.envs as envs
 13 | 
 14 | import numpy as np
 15 | 
 16 | import gym
 17 | gym.undo_logger_setup()
 18 | 
 19 | from mock import Mock
 20 | 
 21 | 
 22 | class TestEnvironments(object):
 23 |     """
 24 |     Test Class for Environment tests.
 25 | 
 26 |     Note that you really dont want to inherit from unittest.TestCase here,
 27 |     because it will break reasonable output with verbose testing.
 28 |     """
 29 | 
 30 |     exclude = []
 31 | 
 32 |     args = {
 33 |         'GymWrap': envs.gym_wrap._get_test_args(),
 34 |         'MDP': envs.mdp._get_test_args()
 35 |     }
 36 | 
 37 |     @classmethod
 38 |     def setUpClass(cls):
 39 |         """Generate list of classes."""
 40 |         cls.classes = []
 41 |         for name, c in inspect.getmembers(envs):
 42 |             if inspect.isclass(c):
 43 |                 cls.classes.append(c)
 44 | 
 45 |     def test_environment_requirements(self):
 46 |         """Generate tests for environment implementations."""
 47 |         for c in self.classes:
 48 |             if c.__name__ in self.exclude:
 49 |                 pass
 50 |             else:
 51 |                 # Generate NotImplementedError Test for _update
 52 |                 check_update = partial(self.check_env_update)
 53 |                 check_update.description = ('Test: ' + c.__name__.upper()
 54 |                                             + ': update implementation.')
 55 |                 yield check_update, c
 56 | 
 57 |                 # Generate NotImplementedError Test for _reset
 58 |                 check_reset = partial(self.check_env_reset)
 59 |                 check_reset.description = ('Test: ' + c.__name__.upper()
 60 |                                            + ': reset implementation.')
 61 |                 yield check_reset, c
 62 | 
 63 |                 check_rollout = partial(self.check_env_rollout)
 64 |                 check_rollout.description = ('Test: ' + c.__name__.upper()
 65 |                                              + ': rollout implementation.')
 66 |                 yield check_rollout, c
 67 | 
 68 |     def check_env_update(self, c):
 69 |         """Check if _update is implemented."""
 70 |         args = self.args.get(c.__name__, [])
 71 |         env = c(*args)
 72 |         x = env.action_space.sample()
 73 |         try:
 74 |             env._update(x)
 75 |         except NotImplementedError:
 76 |             assert False
 77 | 
 78 |     def check_env_reset(self, c):
 79 |         """Check if _reset is implemented."""
 80 |         args = self.args.get(c.__name__, [])
 81 |         env = c(*args)
 82 |         try:
 83 |             env._reset()
 84 |         except NotImplementedError:
 85 |             assert False
 86 | 
 87 |     def check_env_rollout(self, c):
 88 |         """Check rollout correctness at random positions."""
 89 |         args = self.args.get(c.__name__, [])
 90 |         env = c(*args)
 91 | 
 92 |         init_state = env.state
 93 | 
 94 |         def policy(state):
 95 |             return env.action_space.sample()
 96 | 
 97 |         policy_mock = Mock(side_effect=policy)
 98 |         trace = env._rollout(policy_mock)
 99 | 
100 |         # reset the environment
101 |         env._reset()
102 |         env.state = init_state
103 | 
104 |         # if the environment depends on a seed, reset it.
105 |         if hasattr(env, 'seed'):
106 |             env.seed = env.seed
107 | 
108 |         actions = [t[0] for t in trace]
109 | 
110 |         policy_mock_redo = Mock(side_effect=actions)
111 | 
112 |         trace_verify = env._rollout(policy_mock_redo)
113 | 
114 |         for t, t_verify in zip(trace, trace_verify):
115 |             print(t)
116 |             print(t_verify)
117 |             if isinstance(t[0], np.ndarray):
118 |                 assert(all(np.isclose(t_verify[0], t[0])))
119 |             else:
120 |                 assert(np.isclose(t_verify[0], t[0]))
121 |             if isinstance(t[1], np.ndarray):
122 |                 print(t_verify[1] - t[1])
123 |                 assert(all(np.isclose(t_verify[1], t[1])))
124 |             else:
125 |                 assert(np.isclose(t_verify[1], t[1]))
126 |             assert(np.isclose(t_verify[2], t[2]))
127 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/mdp.py:
--------------------------------------------------------------------------------
  1 | """Markov Decision Process Implementations."""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from SafeRLBench import EnvironmentBase
  6 | from SafeRLBench.spaces import DiscreteSpace
  7 | 
  8 | 
  9 | class MDP(EnvironmentBase):
 10 |     """Discrete Markov Decision Process Environment.
 11 | 
 12 |     Attributes
 13 |     ----------
 14 |     transitions : array-like
 15 |         Array holding transition matrix for each action. The dimension of
 16 |         the state and action spaces will be deduced from this array.
 17 |     rewards : array-like
 18 |         Array holding the reward matrix for each action. It needs to comply
 19 |         with the dimensions deduced from the transitions array.
 20 |     action_space : DiscreteSpace object
 21 |         Action space as determined from the transitions array
 22 |     state_space : DiscreteSpace object
 23 |         State space as determined from the transitions array.
 24 |     init_state : int
 25 |         Initial state of the process. If None, it will be set to 0.
 26 |     state : int
 27 |         Current state of the system.
 28 |     """
 29 | 
 30 |     def __init__(self, transitions, rewards, horizon=100, init_state=None,
 31 |                  seed=None):
 32 |         """MDP initialization.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         transitions : array-like
 37 |             Array holding transition matrix for each action. The dimension of
 38 |             the state and action spaces will be deduced from this array.
 39 |         rewards : array-like
 40 |             Array holding the reward matrix for each action. It needs to comply
 41 |             with the dimensions deduced from the transitions array.
 42 |         init_state : int
 43 |             Initial state of the process. If None, it will be set to 0.
 44 |         """
 45 |         self.horizon = horizon
 46 | 
 47 |         self.transitions = transitions
 48 |         self.rewards = rewards
 49 | 
 50 |         # determine state and action space
 51 |         self.action_space = DiscreteSpace(len(transitions))
 52 |         self.state_space = DiscreteSpace(len(transitions[0]))
 53 | 
 54 |         # if initial state is none, we will use 0 as an initial state
 55 |         if init_state is None:
 56 |             init_state = 0
 57 |         elif not self.state_space.contains(init_state):
 58 |             raise ValueError('Initial state (%d) is not a valid state.',
 59 |                              init_state)
 60 | 
 61 |         # setup current state and store the initial state for reset
 62 |         self.init_state = init_state
 63 |         self.state = init_state
 64 | 
 65 |         # generate random state
 66 |         self.random = np.random.RandomState()
 67 | 
 68 |         if seed is not None:
 69 |             self.seed = seed
 70 |         else:
 71 |             self._seed = None
 72 | 
 73 |     @property
 74 |     def seed(self):
 75 |         """Seed."""
 76 |         return self._seed
 77 | 
 78 |     @seed.setter
 79 |     def seed(self, v):
 80 |         self.random.seed(v)
 81 |         self._seed = v
 82 | 
 83 |     def _update(self, action):
 84 |         prev_state = self.state
 85 | 
 86 |         # choose next state
 87 |         self.state = self.random.choice(np.arange(self.state_space.dimension),
 88 |                                         p=self.transitions[action][self.state])
 89 |         # determine reward
 90 |         reward = self.rewards[action][prev_state][self.state]
 91 | 
 92 |         return action, self.state, reward
 93 | 
 94 |     def _reset(self):
 95 |         self.state = self.init_state
 96 | 
 97 | 
 98 | def _get_test_args():
 99 |     # private method that will generate arguments for mdp testing.
100 |     transitions = [
101 |         [[.1, .9, 0., 0., 0.],
102 |          [.2, 0., .8, 0., 0.],
103 |          [.3, 0., 0., .7, 0.],
104 |          [.4, 0., 0., 0., .6],
105 |          [.4, 0., 0., 0., .6]],
106 |         [[1., 0., 0., 0., 0.],
107 |          [1., 0., 0., 0., 0.],
108 |          [1., 0., 0., 0., 0.],
109 |          [1., 0., 0., 0., 0.],
110 |          [1., 0., 0., 0., 0.]]
111 |     ]
112 | 
113 |     rewards = [
114 |         [[0., 0., 0., 0., 0.],
115 |          [0., 0., 0., 0., 0.],
116 |          [0., 0., 0., 0., 0.],
117 |          [0., 0., 0., 0., 0.],
118 |          [0., 0., 0., 0., 0.]],
119 |         [[0., 0., 0., 0., 0.],
120 |          [1., 0., 0., 0., 0.],
121 |          [2., 0., 0., 0., 0.],
122 |          [3., 0., 0., 0., 0.],
123 |          [4., 0., 0., 0., 0.]],
124 |     ]
125 | 
126 |     return [transitions, rewards, 100, None, 42]
127 | 


--------------------------------------------------------------------------------
/SafeRLBench/policy/controller.py:
--------------------------------------------------------------------------------
  1 | """Quadrocopter Controller."""
  2 | from SafeRLBench import Policy
  3 | from SafeRLBench.spaces import BoundedSpace
  4 | from SafeRLBench.envs._quadrocopter import StateVector
  5 | 
  6 | import numpy as np
  7 | 
  8 | import logging
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | __all__ = ('NonLinearQuadrocopterController')
 13 | 
 14 | 
 15 | # TODO: Controller: Documentation
 16 | class NonLinearQuadrocopterController(Policy):
 17 |     """Non-linear quadrocopter controller."""
 18 | 
 19 |     def __init__(self, zeta_z=0.7, params=[.7, .7, .7, .5, .707],
 20 |                  reference=None):
 21 |         """Initialize NonLinearQuadrocopterController.
 22 | 
 23 |         Parameters
 24 |         ----------
 25 |         zeta_z :
 26 |         params :
 27 |         reference :
 28 |         """
 29 |         self._zeta_z = zeta_z
 30 |         self._params = np.array(params)
 31 |         self.reference = reference
 32 | 
 33 |         if params is not None:
 34 |             self.initialized = True
 35 |         else:
 36 |             self.initialized = False
 37 | 
 38 |         self._par_space = BoundedSpace(np.array([0., 0., 0., 0., 0.]),
 39 |                                        np.array([1., 1., 1., 1., 1.]))
 40 | 
 41 |     def map(self, state):
 42 |         """Map state to action.
 43 | 
 44 |         Depends on a reference object. If the environment has a reference
 45 |         object it needs to set the reference at the start of the rollout.
 46 | 
 47 |         Parameters
 48 |         ----------
 49 |         state : array-like
 50 |             Element of state space.
 51 | 
 52 |         Returns
 53 |         -------
 54 |         action : ndarray
 55 |             Element of action space.
 56 |         """
 57 |         ref = self.reference.reference
 58 |         state = StateVector(state)
 59 | 
 60 |         # Allocate memory for the 4 outputs of the controller.
 61 |         action = np.empty((4,), dtype=np.float32)
 62 | 
 63 |         # Retrieve the different parameters and make sure the critical ones
 64 |         # are non zero.
 65 |         tau_x, tau_y, tau_z, tau_w, zeta = self._params
 66 |         if tau_x < 1e-3:
 67 |             tau_x = 1e-3
 68 |             logger.warning('Parameter `tau_x` too small for controller, '
 69 |                            + 'has been clipped to 1e-3"')
 70 |         if tau_y < 1e-3:
 71 |             tau_y = 1e-3
 72 |             logger.warning('Parameter `tau_y` too small for controller, '
 73 |                            + 'has been clipped to 1e-3"')
 74 |         if tau_w < 1e-3:
 75 |             tau_w = 1e-3
 76 |             logger.warning('Parameter `tau_w` too small for controller, '
 77 |                            + 'has been clipped to 1e-3"')
 78 |         if zeta < 1e-3:
 79 |             zeta = 1e-3
 80 |             logger.warning('Parameter `zeta` too small for controller, '
 81 |                            + 'has been clipped to 1e-3"')
 82 | 
 83 |         # desired acceleration in x and y (global coordinates, [m/s^2] )
 84 |         ax = (2. * zeta / tau_x * (ref.vel[0] - state.vel[0])
 85 |               + 1. / (tau_x**2) * (ref.pos[0] - state.pos[0]))
 86 |         ay = (2. * zeta / tau_y * (ref.vel[1] - state.vel[1])
 87 |               + 1. / (tau_y**2) * (ref.pos[1] - state.pos[1]))
 88 | 
 89 |         # Normalize by thrust
 90 |         thrust = np.linalg.norm(np.array([ax, ay, 9.81 + state.acc[2]]))
 91 |         ax /= thrust
 92 |         ay /= thrust
 93 | 
 94 |         # Rotate desired accelerations into the yaw-rotated inertial frame
 95 |         ax_b = ax * np.cos(state.euler[2]) + ay * np.sin(state.euler[2])
 96 |         ay_b = -ax * np.sin(state.euler[2]) + ay * np.cos(state.euler[2])
 97 | 
 98 |         # Get euler angles from rotation matrix
 99 |         action[1] = np.arcsin(-ay_b)
100 |         action[0] = np.arcsin(ax_b / np.cos(action[1]))
101 | 
102 |         # Z-velocity command m/sec)
103 |         action[2] = (2. * self._zeta_z / tau_z * (ref.vel[2] - state.vel[2])
104 |                      + 1. / (tau_z**2) * (ref.pos[2] - state.pos[2]))
105 | 
106 |         # Yaw rate command (rad/sec)??
107 |         yaw_err = (np.mod(ref.euler[2] - state.euler[2] + np.pi, 2 * np.pi)
108 |                    - np.pi)
109 |         action[3] = yaw_err / tau_w + ref.omega_b[2]
110 | 
111 |         return action
112 | 
113 |     @property
114 |     def parameters(self):
115 |         """Set controller parameters."""
116 |         return self._params
117 | 
118 |     @parameters.setter
119 |     def parameters(self, params):
120 |         self._params = np.array(params)
121 | 
122 |     @property
123 |     def parameter_space(self):
124 |         """Set controller parameter space."""
125 |         return self._par_space
126 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/general_mountaincar.py:
--------------------------------------------------------------------------------
  1 | """General Mountain Car."""
  2 | import numpy as np
  3 | from numpy import pi, array, copy, cos, sin
  4 | 
  5 | from SafeRLBench.base import EnvironmentBase
  6 | from SafeRLBench.spaces import BoundedSpace
  7 | 
  8 | 
  9 | class GeneralMountainCar(EnvironmentBase):
 10 |     """Implementation of a GeneralMountainCar Environment.
 11 | 
 12 |     Attributes
 13 |     ----------
 14 |     state_space : BoundedSpace
 15 |         Space object describing the state space.
 16 |     action_space : BoundedSpace
 17 |         Space object describing the action space.
 18 |     state : array-like
 19 |         Current state of the car.
 20 |     initial_state : array-like
 21 |         Initial state of the car.
 22 |     gravitation : double
 23 |     power : double
 24 |     goal : double
 25 |         Goal along x-coordinate
 26 |     """
 27 | 
 28 |     def __init__(self,
 29 |                  state_space=BoundedSpace(array([-1, -0.07]),
 30 |                                           array([1, 0.07])),
 31 |                  action_space=BoundedSpace(-1, 1, shape=(1,)),
 32 |                  state=np.array([0, 0]),
 33 |                  contour=None, gravitation=0.0025, power=0.0015,
 34 |                  goal=0.6, horizon=100):
 35 |         """Initialize GeneralMountainCar Environment.
 36 | 
 37 |         Parameters
 38 |         ----------
 39 |         state_space : BoundedSpace
 40 |             Space object describing the state space.
 41 |         action_space : BoundedSpace
 42 |             Space object describing the action space.
 43 |         state : array-like
 44 |             Initial state of the car
 45 |         contour : tuple of callables
 46 |             If contour is None, a default shape will be generated. A valid
 47 |             tuple needs to contain a function for the height at a position
 48 |             in the first element and a function for the gradient at a position
 49 |             in the second argument.
 50 |         gravitation : double
 51 |         power : double
 52 |         goal : double
 53 |             Goal along x-coordinate
 54 |         """
 55 |         # Initialize Environment Base Parameters
 56 |         super(GeneralMountainCar, self).__init__(state_space,
 57 |                                                  action_space,
 58 |                                                  horizon)
 59 | 
 60 |         # setup environment parameters
 61 |         self.goal = goal
 62 |         self.power = power
 63 |         self.gravitation = gravitation
 64 | 
 65 |         # setup contour
 66 |         if contour is None:
 67 |             def _hx(x):
 68 |                 return -cos(pi * x)
 69 |             self._hx = _hx
 70 | 
 71 |             def _dydx(x):
 72 |                 return pi * sin(pi * x)
 73 |             self._dydx = _dydx
 74 |         else:
 75 |             self._hx = contour[0]
 76 |             self._dydx = contour[1]
 77 | 
 78 |         # init state
 79 |         self.state = copy(state)
 80 |         self.initial_state = state
 81 | 
 82 |     def _update(self, action):
 83 |         """Compute step considering the action."""
 84 |         action = array(action).flatten()
 85 |         action = max(min(action, 1.0), -1.0)
 86 | 
 87 |         if hasattr(action, 'size') and action.size == 1:
 88 |             action_in = action[0]
 89 |         else:
 90 |             action_in = action
 91 | 
 92 |         position = self.state[0]
 93 |         velocity = self.state[1]
 94 | 
 95 |         velocity += (action_in * self.power
 96 |                      - self._dydx(position) * self.gravitation)
 97 |         position += velocity
 98 | 
 99 |         bounds = self.state_space
100 | 
101 |         velocity = max(min(velocity, bounds.upper[1]), bounds.lower[1])
102 |         position = max(min(position, bounds.upper[0]), bounds.lower[0])
103 | 
104 |         # make sure outputs have the right form
105 |         self.state = np.array([position, velocity])
106 |         action = np.reshape(action, self.action_space.shape)
107 | 
108 |         return action, copy(self.state), self._reward()
109 | 
110 |     def _reset(self):
111 |         self.state = copy(self.initial_state)
112 | 
113 |     def _reward(self):
114 |         return(self.height() - 1)
115 | 
116 |     def _rollout(self, policy):
117 |         self.reset()
118 |         trace = []
119 |         for n in range(self.horizon):
120 |             action = policy(self.state)
121 |             trace.append(self.update(action))
122 |             if (self.position() >= self.goal):
123 |                 return trace
124 |         return trace
125 | 
126 |     def height(self):
127 |         """Compute current height."""
128 |         return(self._hx(self.state[0].item()).item())
129 | 
130 |     def position(self):
131 |         """Compute current position in x."""
132 |         return(self.state[0])
133 | 


--------------------------------------------------------------------------------
/SafeRLBench/measure.py:
--------------------------------------------------------------------------------
  1 | """Define Measurements."""
  2 | 
  3 | from abc import ABCMeta, abstractmethod
  4 | from six import add_metaclass
  5 | 
  6 | from operator import itemgetter
  7 | 
  8 | __all__ = ('Measure', 'BestPerformance', 'SafetyMeasure')
  9 | 
 10 | 
 11 | @add_metaclass(ABCMeta)
 12 | class Measure(object):
 13 |     """Abstract Base class defining the interface for any measurement.
 14 | 
 15 |     The methods below are abstract and need to be implemented by any child.
 16 | 
 17 |     Methods
 18 |     -------
 19 |     __call__(runs)
 20 |         Abstract! Evaluate a list of runs.
 21 |     result()
 22 |         Abstract! Return the result of the evaluation.
 23 |     """
 24 | 
 25 |     @abstractmethod
 26 |     def __call__(self, runs):
 27 |         """Evaluate a list of runs.
 28 | 
 29 |         Parameters
 30 |         ----------
 31 |         runs : List of BenchRun instances
 32 |             May be any subset of BenchRun instances passed in a list
 33 |         """
 34 |         pass
 35 | 
 36 |     @property
 37 |     @abstractmethod
 38 |     def result(self):
 39 |         """Return the result of evaluation."""
 40 |         pass
 41 | 
 42 | 
 43 | class BestPerformance(Measure):
 44 |     """Find the best performance achieved within runs."""
 45 | 
 46 |     def __call__(self, runs):
 47 |         """Sort content of runs by performance.
 48 | 
 49 |         This class creates a tuple with a BenchRun and its respective best
 50 |         performance and then stores in a descending sorted list.
 51 |         The results are accessible through the result method.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         runs : List of BenchRun instances
 56 |             May be any subset of BenchRun instances in a list.
 57 |         """
 58 |         # create a list of tuples with the max reward for each run
 59 |         runs_tup = []
 60 |         for run in runs:
 61 |             monitor = run.get_alg_monitor()
 62 |             max_reward = max(monitor.rewards)
 63 |             runs_tup.append((run, max_reward))
 64 | 
 65 |         # sort list
 66 |         self._result = sorted(runs_tup, key=itemgetter(1), reverse=True)
 67 | 
 68 |     @property
 69 |     def result(self):
 70 |         """Retrieve result."""
 71 |         if not hasattr(self, '_result'):
 72 |             self._result = None
 73 |         return self._result
 74 | 
 75 |     @property
 76 |     def best_result(self):
 77 |         """Retrieve the best run."""
 78 |         if self.result is not None:
 79 |             return self.result[0]
 80 |         return None
 81 | 
 82 | 
 83 | class SafetyMeasure(Measure):
 84 |     """Detect Safety violations.
 85 | 
 86 |     The measure evaluates to a list of 3-tuples, where the first
 87 |     element contains the instance that was evaluated, the second one
 88 |     the number of violations that occured and the third the sum of
 89 |     those violations, i.e. the sum of the difference between the
 90 |     effective reward and the threshold, for every violation.
 91 | 
 92 |     Attributes
 93 |     ----------
 94 |     threshold : float or integer
 95 |         Reward threshold to detect violations.
 96 |     """
 97 | 
 98 |     def __init__(self, threshold):
 99 |         """Initialize SafetyMeasure.
100 | 
101 |         Parameters
102 |         ----------
103 |         threshold : float or integer
104 |             Reward threshold to detect violations.
105 |         """
106 |         self.threshold = threshold
107 | 
108 |     def __call__(self, runs):
109 |         """Evaluate Safety violations.
110 | 
111 |         Parameters
112 |         ----------
113 |         runs : List of BenchRun instances
114 |             May be any subset of BenchRun instances in a list.
115 |         """
116 |         self._result = []
117 | 
118 |         for run in runs:
119 |             num_violations = 0
120 |             sum_violations = 0
121 |             for reward in run.get_alg_monitor().rewards:
122 |                 if reward < self.threshold:
123 |                     num_violations += 1
124 |                     sum_violations += self.threshold - reward
125 |             self._result.append((run, num_violations, sum_violations))
126 | 
127 |     @property
128 |     def result(self):
129 |         """Retrieve result.
130 | 
131 |         If a run has been evaluated, this function will retrieve the
132 |         result otherwise it will return ``None``.
133 |         The function evaluates to a list of 3-tuples, where the first
134 |         element contains the instance that was evaluated, the second one
135 |         the number of violations that occured and the third the sum of
136 |         those violations, i.e. the sum of the difference between the
137 |         effective reward and the threshold, for every violation.
138 | 
139 |         Returns
140 |         -------
141 |         (run, count, amount) : 3 tuple
142 |             run: Object that has been evaluated
143 |             count: Number of rollouts where the safety threshold was violated.
144 |             amount: Sum of difference between violations and threshold.
145 |         """
146 |         if not hasattr(self, '_result'):
147 |             self._result = None
148 |         return self._result
149 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/_quadrocopter/quaternions.py:
--------------------------------------------------------------------------------
  1 | """Some common functions for manipulating quaternions.
  2 | 
  3 | VERSION HISTORY
  4 | Aug 14, 2014 - initialy created (Felix Berkenkamp)
  5 | """
  6 | from __future__ import print_function, division, absolute_import
  7 | 
  8 | import math
  9 | import numpy as np
 10 | 
 11 | from .transformations import (vector_norm,
 12 |                               quaternion_multiply,
 13 |                               quaternion_conjugate,
 14 |                               quaternion_matrix,
 15 |                               quaternion_about_axis)
 16 | 
 17 | __all__ = ['omega_from_quat_quat', 'apply_omega_to_quat', 'global_to_body',
 18 |            'body_to_global']
 19 | 
 20 | 
 21 | def omega_from_quat_quat(q1, q2, dt):
 22 |     """
 23 |     Convert two quaternions and the time difference to angular velocity.
 24 | 
 25 |     Parameters:
 26 |     -----------
 27 |     q1: quaternion
 28 |         The old quaternion
 29 |     q2: quaternion
 30 |         The new quaternion
 31 |     dt: float
 32 |         The time difference
 33 | 
 34 |     Returns:
 35 |     --------
 36 |     omega_g: ndarray
 37 |         The angular velocity in global coordinates
 38 |     """
 39 |     if vector_norm(q1 - q2) < 1e-8:
 40 |         # linearly interpolate
 41 |         # the quaternion does not stay on unit sphere -> only for very small
 42 |         # rotations!
 43 | 
 44 |         # dq/dt
 45 |         dq = (q2 - q1) / dt
 46 | 
 47 |         # From Diebel: Representing Atitude, 6.6, quaternions are defined
 48 |         # differently there: [w, x, y, z] instead of [x, y, z, w]!
 49 |         omega = np.array([0.0, 0.0, 0.0], dtype=np.float64)
 50 | 
 51 |         # Equivalent, but slower
 52 |         # w = np.array([[q2[3], -q2[2], q2[1], -q2[0]],
 53 |         #               [q2[2], q2[3], -q2[0], -q2[1]],
 54 |         #               [-q2[1], q2[0], q2[3], -q2[2]]], dtype=np.float64)
 55 |         #
 56 |         # omega = 2 * w.dot(dq)
 57 | 
 58 |         omega[0] = 2.0 * (
 59 |             q2[3] * dq[0] - q2[2] * dq[1] + q2[1] * dq[2] - q2[0] * dq[3])
 60 |         omega[1] = 2.0 * (
 61 |             q2[2] * dq[0] + q2[3] * dq[1] - q2[0] * dq[2] - q2[1] * dq[3])
 62 |         omega[2] = 2.0 * (
 63 |             -q2[1] * dq[0] + q2[0] * dq[1] + q2[3] * dq[2] - q2[2] * dq[3])
 64 | 
 65 |         return omega
 66 |     else:
 67 |         # This function becomes numerically unstable for q1-q2 --> 0
 68 | 
 69 |         # Find rotation from q1 to q2
 70 |         # unit quaternion -> conjugate is the same as inverse
 71 |         # q2 = r * q1 --> r = q2 * inv(q1)
 72 |         r = quaternion_multiply(q2, quaternion_conjugate(q1))
 73 | 
 74 |         # Angle of rotation
 75 |         angle = 2.0 * math.acos(r[3])
 76 | 
 77 |         # acos gives value in [0,pi], ensure that we take the short path
 78 |         # (e.g. rotate by -pi/2 rather than 3pi/2)
 79 |         if angle > math.pi:
 80 |             angle -= 2.0 * math.pi
 81 | 
 82 |         # angular velocity = angle / dt
 83 |         # axis of rotation corresponds to r[:3]
 84 |         return angle / dt * r[:3] / vector_norm(r[:3])
 85 | 
 86 | 
 87 | def apply_omega_to_quat(q, omega, dt):
 88 |     """
 89 |     Convert a quaternion q and apply the angular velocity omega to it over dt.
 90 | 
 91 |     Parameters:
 92 |     -----------
 93 |     q: quaternion
 94 |     omega: ndarray
 95 |         angular velocity
 96 |     dt: float
 97 |         time difference
 98 | 
 99 |     Returns:
100 |     --------
101 |     quaternion
102 |         The quaternion of the orientation after rotation with omega for dt
103 |         seconds.
104 |     """
105 |     # rotation angle around each axis
106 |     w = omega * dt
107 | 
108 |     # only rotate if the angle we rotate through is actually significant
109 |     if vector_norm(w) < np.finfo(float).eps * 4.0:
110 |         return q
111 | 
112 |     # quaternion corresponding to this rotation
113 |     # w = 0 is not a problem because numpy is awesome
114 |     r = quaternion_about_axis(vector_norm(w), w)
115 | 
116 |     # return the rotated quaternion closest to original
117 |     return quaternion_multiply(r, q)
118 | 
119 | 
120 | def global_to_body(q, vec):
121 |     """
122 |     Convert a vector from global to body coordinates.
123 | 
124 |     Parameters:
125 |     -----------
126 |     q: quaternion
127 |         The rotation quaternion
128 |     vec: ndarray
129 |         The vector in global coordinates
130 | 
131 |     Returns:
132 |     vec: ndarray
133 |         The vector in body coordinates
134 |     """
135 |     # quaternion_matrix(q)[:3,:3] is a homogenous rotation matrix that
136 |     # rotates a vector by q
137 |     # quaternion_matrix(q)[:3,:3] is rot. matrix from body to global frame
138 |     # its transpose is the trafo matrix from global to body
139 |     # that matrix is multiplied by omega
140 |     return np.dot(quaternion_matrix(q)[:3, :3].transpose(), vec)
141 | 
142 | 
143 | def body_to_global(q, vec):
144 |     """Convert a vector from global to body coordinates.
145 | 
146 |     Parameters:
147 |     -----------
148 |     q: quaternion
149 |         The rotation quaternion
150 |     vec: ndarray
151 |         The vector in body coordinates
152 | 
153 |     Returns:
154 |     vec: ndarray
155 |         The vector in global coordinates
156 |     """
157 |     # quaternion_matrix(q)[:3,:3] is a homogenous rotation matrix that
158 |     # rotates a vector by q
159 |     # quaternion_matrix(q)[:3,:3] is the matrix from body to global frame
160 |     # that matrix is multiplied by omega
161 |     return np.dot(quaternion_matrix(q)[:3, :3], vec)
162 | 


--------------------------------------------------------------------------------
/SafeRLBench/test/test_bench.py:
--------------------------------------------------------------------------------
  1 | from SafeRLBench import Bench, BenchConfig
  2 | from SafeRLBench.bench import BenchRun
  3 | from SafeRLBench.algo import PolicyGradient
  4 | from SafeRLBench.envs import LinearCar
  5 | 
  6 | 
  7 | from mock import Mock, MagicMock, patch
  8 | from unittest2 import TestCase
  9 | 
 10 | import logging
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class TestBench(TestCase):
 16 |     """Bench tests."""
 17 | 
 18 |     def test_bench_init(self):
 19 |         """Test: BENCH: initialization."""
 20 |         bench = Bench()
 21 | 
 22 |         self.assertIsInstance(bench.config, BenchConfig)
 23 |         self.assertIsInstance(bench.runs, list)
 24 | 
 25 |         bench = Bench(BenchConfig())
 26 | 
 27 |         self.assertIsInstance(bench.config, BenchConfig)
 28 |         self.assertIsInstance(bench.runs, list)
 29 | 
 30 |     @patch('SafeRLBench.bench.BenchRun')
 31 |     def test_bench_benchmark(self, bench_run_mock):
 32 |         """Test: BENCH: benchmark invokation."""
 33 |         # setup mocks
 34 |         bench_run_obj_mock = Mock()
 35 |         bench_conf_mock = MagicMock(spec=BenchConfig)
 36 | 
 37 |         def create_run_obj_mock(a, b, c, d):
 38 |             return bench_run_obj_mock
 39 | 
 40 |         bench_run_mock.side_effect = create_run_obj_mock
 41 |         bench_conf_mock.__iter__.return_value = [(Mock(), Mock(), {}, {})]
 42 | 
 43 |         bench = Bench(bench_conf_mock)
 44 |         bench()
 45 | 
 46 |         bench_run_obj_mock.alg.optimize.assert_called_once_with()
 47 | 
 48 | 
 49 | class TestBenchConfig(TestCase):
 50 |     """BenchConfig tests."""
 51 | 
 52 |     # setup test configuration
 53 |     alg_config = [[
 54 |         (PolicyGradient, [{}]),
 55 |         (PolicyGradient, {})
 56 |     ], [
 57 |         (PolicyGradient, {})
 58 |     ]]
 59 | 
 60 |     env_config = [
 61 |         (LinearCar, {'horizon': 100}),
 62 |         (LinearCar, {'horizon': 200})
 63 |     ]
 64 | 
 65 |     alg_config_add = [
 66 |         (PolicyGradient, [{}, {}]),
 67 |     ]
 68 | 
 69 |     env_config_add = [
 70 |         (LinearCar, {'horizon': 100}),
 71 |         (LinearCar, {'horizon': 200})
 72 |     ]
 73 | 
 74 |     @staticmethod
 75 |     def _check_structure(lst):
 76 |         # loop through entire structure checking types.
 77 |         assert(isinstance(lst, list))
 78 |         for lst_elem in lst:
 79 |             assert(isinstance(lst_elem, list))
 80 |             for tup_elem in lst_elem:
 81 |                 assert(isinstance(tup_elem, tuple))
 82 |                 assert (tup_elem[0] is PolicyGradient
 83 |                         or tup_elem[0] is LinearCar)
 84 |                 assert(isinstance(tup_elem[1], list))
 85 |                 for dict_elem in tup_elem[1]:
 86 |                     assert(isinstance(dict_elem, dict))
 87 | 
 88 |     def test_benchconfig_init(self):
 89 |         """Test: BENCHCONFIG: initialization structure."""
 90 |         # apply test configuration
 91 |         config = BenchConfig(self.alg_config, self.env_config)
 92 | 
 93 |         # verify structure
 94 |         self._check_structure(config.algs)
 95 |         self._check_structure(config.envs)
 96 | 
 97 |     def test_benchconfig_add_tests(self):
 98 |         """Test: BENCHCONFIG: add_tests."""
 99 |         # setup test configuration
100 |         config = BenchConfig()
101 | 
102 |         # apply test configuration
103 |         config.add_tests(self.alg_config_add, self.env_config_add)
104 | 
105 |         # verify structure
106 |         self._check_structure(config.algs)
107 |         self._check_structure(config.envs)
108 | 
109 |     def test_benchconfig_exceptions(self):
110 |         """Test: BENCHCONFIG: exceptions."""
111 |         # setup bad test configurations
112 |         alg_bad_tuple = [PolicyGradient, {}]
113 |         env_bad_tuple = (LinearCar, {})
114 |         bad_tuple = [alg_bad_tuple, env_bad_tuple]
115 | 
116 |         alg_bad_alg = [(Mock(), {})]
117 |         env_bad_alg = [(LinearCar, {})]
118 |         bad_alg = [alg_bad_alg, env_bad_alg]
119 | 
120 |         alg_bad_env = [(PolicyGradient, {})]
121 |         env_bad_env = [(Mock, {})]
122 |         bad_env = [alg_bad_env, env_bad_env]
123 | 
124 |         alg_bad_len = [(PolicyGradient, {})]
125 |         env_bad_len = []
126 |         bad_len = [alg_bad_len, env_bad_len]
127 | 
128 |         tests = [bad_tuple, bad_alg, bad_env, bad_len]
129 | 
130 |         # apply tests
131 |         for test in tests:
132 |             with self.subTest(test=test):
133 |                 self.assertRaises(ValueError, BenchConfig, *test)
134 | 
135 |     def test_benchconfig_iterator(self):
136 |         """Test: BENCHCONFIG: Iterator."""
137 |         conf = BenchConfig(self.alg_config, self.env_config)
138 | 
139 |         for alg, env, alg_conf, env_conf in conf:
140 |             assert alg is PolicyGradient
141 |             assert env is LinearCar
142 |             self.assertIsInstance(alg_conf, dict)
143 |             self.assertIsInstance(env_conf, dict)
144 | 
145 | 
146 | class TestBenchRun(TestCase):
147 |     """Test BenchRun class."""
148 | 
149 |     def test_benchrun_init(self):
150 |         """Test: BENCHRUN: initialization."""
151 |         args = [MagicMock() for i in range(4)]
152 |         attr = ['alg', 'env', 'alg_conf', 'env_conf']
153 | 
154 |         run = BenchRun(*args)
155 | 
156 |         for a, m in zip(attr, args):
157 |             assert getattr(run, a) is m
158 | 
159 |     def test_benchrun_get_monitor(self):
160 |         """Test: BENCHRUN: monitor getters."""
161 |         env = LinearCar()
162 |         alg = PolicyGradient(env, Mock())
163 | 
164 |         run = BenchRun(alg, env, None, None)
165 | 
166 |         alg_monitor = run.get_alg_monitor()
167 |         self.assertEqual(alg_monitor, alg.monitor)
168 | 
169 |         env_monitor = run.get_env_monitor()
170 |         self.assertEqual(env_monitor, env.monitor)
171 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # SafeRLBench documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Mar 27 16:08:01 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('..'))
 23 | 
 24 | import sphinx_rtd_theme
 25 | 
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = [
 37 |     'sphinx.ext.autodoc',
 38 |     'sphinx.ext.mathjax',
 39 |     'sphinx.ext.viewcode',
 40 |     'sphinx.ext.napoleon',
 41 |     'sphinx.ext.autosummary',
 42 |     'sphinx.ext.doctest',
 43 | ]
 44 | 
 45 | numpydoc_class_members_toctree = False
 46 | 
 47 | # Add any paths that contain templates here, relative to this directory.
 48 | templates_path = ['_templates']
 49 | 
 50 | # The suffix(es) of source filenames.
 51 | # You can specify multiple suffix as a list of string:
 52 | #
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = '.rst'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # General information about the project.
 60 | project = 'SafeRLBench'
 61 | copyright = '2017, Nicolas Ochsner'
 62 | author = 'Nicolas Ochsner'
 63 | 
 64 | # The version info for the project you're documenting, acts as replacement for
 65 | # |version| and |release|, also used in various other places throughout the
 66 | # built documents.
 67 | #
 68 | # The short X.Y version.
 69 | version = '0.1'
 70 | # The full version, including alpha/beta/rc tags.
 71 | release = '0.1.0'
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #
 76 | # This is also used if you do content translation via gettext catalogs.
 77 | # Usually you set "language" from the command line for these cases.
 78 | language = None
 79 | 
 80 | # List of patterns, relative to source directory, that match files and
 81 | # directories to ignore when looking for source files.
 82 | # This patterns also effect to html_static_path and html_extra_path
 83 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = 'sphinx'
 87 | 
 88 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 89 | todo_include_todos = False
 90 | 
 91 | 
 92 | # -- Options for HTML output ----------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | #
 97 | html_theme = "sphinx_rtd_theme"
 98 | 
 99 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
100 | 
101 | # Theme options are theme-specific and customize the look and feel of a theme
102 | # further.  For a list of options available for each theme, see the
103 | # documentation.
104 | #
105 | # html_theme_options = {}
106 | 
107 | # Add any paths that contain custom static files (such as style sheets) here,
108 | # relative to this directory. They are copied after the builtin static files,
109 | # so a file named "default.css" will overwrite the builtin "default.css".
110 | html_static_path = ['_static']
111 | 
112 | 
113 | # -- Options for HTMLHelp output ------------------------------------------
114 | 
115 | # Output file base name for HTML help builder.
116 | htmlhelp_basename = 'SafeRLBenchdoc'
117 | 
118 | 
119 | # -- Options for LaTeX output ---------------------------------------------
120 | 
121 | latex_elements = {
122 |     # The paper size ('letterpaper' or 'a4paper').
123 |     #
124 |     # 'papersize': 'letterpaper',
125 | 
126 |     # The font size ('10pt', '11pt' or '12pt').
127 |     #
128 |     # 'pointsize': '10pt',
129 | 
130 |     # Additional stuff for the LaTeX preamble.
131 |     #
132 |     # 'preamble': '',
133 | 
134 |     # Latex figure (float) alignment
135 |     #
136 |     # 'figure_align': 'htbp',
137 | }
138 | 
139 | # Grouping the document tree into LaTeX files. List of tuples
140 | # (source start file, target name, title,
141 | #  author, documentclass [howto, manual, or own class]).
142 | latex_documents = [
143 |     (master_doc, 'SafeRLBench.tex', 'SafeRLBench Documentation',
144 |      'Nicolas Ochsner', 'howto'),
145 | ]
146 | 
147 | 
148 | # -- Options for manual page output ---------------------------------------
149 | 
150 | # One entry per manual page. List of tuples
151 | # (source start file, name, description, authors, manual section).
152 | man_pages = [
153 |     (master_doc, 'saferlbench', 'SafeRLBench Documentation',
154 |      [author], 1)
155 | ]
156 | 
157 | 
158 | # -- Options for Texinfo output -------------------------------------------
159 | 
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | #  dir menu entry, description, category)
163 | texinfo_documents = [
164 |     (master_doc, 'SafeRLBench', 'SafeRLBench Documentation',
165 |      author, 'SafeRLBench', 'One line description of project.',
166 |      'Miscellaneous'),
167 | ]
168 | 


--------------------------------------------------------------------------------
/SafeRLBench/policy/test.py:
--------------------------------------------------------------------------------
  1 | """Policy tests."""
  2 | from __future__ import division, print_function, absolute_import
  3 | 
  4 | from SafeRLBench.spaces import BoundedSpace
  5 | from SafeRLBench.envs.quadrocopter import Reference
  6 | from SafeRLBench.envs._quadrocopter import StateVector
  7 | from SafeRLBench.policy import (NeuralNetwork,
  8 |                                 LinearPolicy,
  9 |                                 DiscreteLinearPolicy,
 10 |                                 NonLinearQuadrocopterController)
 11 | 
 12 | import numpy as np
 13 | from numpy import isclose
 14 | 
 15 | import tensorflow as tf
 16 | 
 17 | from unittest2 import TestCase
 18 | from mock import Mock
 19 | 
 20 | import logging
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class TestNeuralNetwork(TestCase):
 26 |     """Test the Neural Netork Policy."""
 27 | 
 28 |     fields = ['args', 'kwargs', 'action_space', 'state_space', 'dtype',
 29 |               'layers', 'scope', 'init_weights', 'activation', 'X', 'a',
 30 |               'W_action', 'W_var', 'a_pred', 'var', 'h', 'is_set_up']
 31 | 
 32 |     def test_initialization(self):
 33 |         """Test: NEURALNETWORK: initialization."""
 34 |         # test bad layer size:
 35 |         args = [[2]]
 36 |         with self.assertRaises(ValueError):
 37 |             NeuralNetwork(*args)
 38 | 
 39 |         # test field existence
 40 |         args = [[2, 6, 1]]
 41 | 
 42 |         nn = NeuralNetwork(*args)
 43 | 
 44 |         for field in self.fields:
 45 |             assert hasattr(nn, field)
 46 | 
 47 |         # test network setup
 48 |         kwargs = {
 49 |             'do_setup': True
 50 |         }
 51 | 
 52 |         nn = NeuralNetwork(*args, **kwargs)
 53 | 
 54 |         # check field contents.
 55 |         assert(all([a == b for a, b in zip(args, nn.args)]))
 56 |         self.assertEqual(nn.layers, args[0])
 57 |         self.assertEqual(nn.dtype, 'float')
 58 | 
 59 |         self.assertEqual(len(nn.W_action), 2)
 60 |         self.assertEqual(len(nn.W_var), 1)
 61 | 
 62 |         # well... because is does not work for whatever fucking reason.
 63 |         self.assertEqual(str(type(nn.a_pred)), str(tf.Tensor))
 64 |         self.assertIn(str(type(nn.var)), (str(tf.Tensor), str(tf.constant)))
 65 | 
 66 |         self.assertEqual(len(nn.h), 2)
 67 | 
 68 |     def test_mapping(self):
 69 |         """Test: NEURALNETWORK: mapping."""
 70 |         args = [[2, 1]]
 71 | 
 72 |         kwargs = {
 73 |             'weights': [tf.constant([2., 1.], shape=(2, 1))],
 74 |             'do_setup': True,
 75 |         }
 76 | 
 77 |         nn = NeuralNetwork(*args, **kwargs)
 78 | 
 79 |         sess = tf.Session()
 80 | 
 81 |         with sess.as_default():
 82 |             self.assertEqual(nn(np.array([2., 1.])), [5.])
 83 | 
 84 |     def test_variable_assignment(self):
 85 |         """Test: NEURALNETWORK: parameter assignment."""
 86 |         args = [[2, 1]]
 87 |         kwargs = {'do_setup': True}
 88 | 
 89 |         nn = NeuralNetwork(*args, **kwargs)
 90 | 
 91 |         with tf.Session().as_default():
 92 |             nn.parameters = nn.W_action[0].assign([[2.], [1.]])
 93 |             assert((np.array([[2.], [1.]]) == nn.parameters).all())
 94 |             self.assertEqual(nn(np.array([2., 1.])), [5.])
 95 | 
 96 |     def test_copy(self):
 97 |         """Test: NEURALNETWORK: copy."""
 98 |         nn = NeuralNetwork([2, 6, 1])
 99 |         nn_copy = nn.copy(scope='copy', do_setup=False)
100 | 
101 |         exclude = ('scope', 'kwargs')
102 | 
103 |         for field in self.fields:
104 |             if field not in exclude and field in nn.kwargs.keys():
105 |                 print(field)
106 |                 self.assertEquals(getattr(nn, field, None),
107 |                                   getattr(nn_copy, field, None))
108 | 
109 | 
110 | class TestLinearPolicy(TestCase):
111 |     """Test the Linear Policy."""
112 | 
113 |     def test_initialization(self):
114 |         """Test: LINEARPOLICY: initialization."""
115 |         lp = LinearPolicy(2, 1)
116 | 
117 |         self.assertEqual(lp.d_state, 2)
118 |         self.assertEqual(lp.d_action, 1)
119 | 
120 |         self.assertEqual(lp.par_dim, 2)
121 |         self.assertIs(lp._par_space, None)
122 | 
123 |         self.assertFalse(lp.initialized)
124 | 
125 |         self.assertIs(lp._parameters, None)
126 |         self.assertTrue(lp.biased)
127 |         self.assertEqual(lp._bias, 0)
128 |         self.assertIs(lp._par, None)
129 | 
130 |         par_mock = Mock()
131 |         par_space_mock = Mock()
132 | 
133 |         with self.assertRaises(ValueError):
134 |             lp_mocked = LinearPolicy(2, 1, par_mock, par_space_mock)
135 | 
136 |         par_mock = [2, 1]
137 | 
138 |         lp_mocked = LinearPolicy(2, 1, par_mock, par_space_mock)
139 | 
140 |         self.assertTrue(lp_mocked.initialized)
141 |         assert(all(par_mock == lp_mocked.parameters))
142 | 
143 |         self.assertEqual(par_space_mock, lp_mocked.parameter_space)
144 | 
145 |     def test_discrete_map(self):
146 |         """Test: DISCRETELINEARPOLICY: map."""
147 |         dp = DiscreteLinearPolicy(2, 1, biased=False)
148 |         dp.parameters = np.array([1, 1])
149 |         self.assertEqual(dp([1, 1]), 1)
150 |         self.assertEqual(dp([-1, -1]), 0)
151 | 
152 |         dp2 = DiscreteLinearPolicy(2, 2, biased=False)
153 |         dp2.parameters = np.array([1, 1, -1, -1])
154 |         assert(all(dp2([1, 1]) == [1, 0]))
155 |         assert(all(dp2([-1, -1]) == [0, 1]))
156 | 
157 | 
158 | class TestController(TestCase):
159 |     """Test NonLinearQuadrocopterController."""
160 | 
161 |     def test_controller_init(self):
162 |         """Test: CONTROLLER: initialization."""
163 |         ctrl = NonLinearQuadrocopterController()
164 | 
165 |         self.assertEquals(ctrl._zeta_z, .7)
166 |         assert(all(isclose(ctrl._params, [.7, .7, .7, .5, .707])))
167 |         self.assertIsNone(ctrl.reference)
168 |         self.assertTrue(ctrl.initialized)
169 |         self.assertIsInstance(ctrl._par_space, BoundedSpace)
170 | 
171 |     def test_controller_map(self):
172 |         """Test: CONTROLLER: mapping."""
173 |         ref = Reference('circle', 1 / 70.)
174 |         ref.reset(StateVector())
175 |         ctrl = NonLinearQuadrocopterController(reference=ref)
176 | 
177 |         action = ctrl(StateVector())
178 | 
179 |         print(action)
180 |         assert all(isclose(action, [0.20510876, -0.30667618, 0., -6.28318548]))
181 | 
182 |     def test_controller_properties(self):
183 |         """Test: CONTROLLER: properties."""
184 |         ctrl = NonLinearQuadrocopterController()
185 | 
186 |         ctrl.parameters = [0., 1., 0., 1., 0.]
187 |         assert(all(np.isclose(ctrl.parameters, [0., 1., 0., 1., 0.])))
188 | 
189 |         self.assertEquals(ctrl.parameter_space, ctrl._par_space)
190 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/_quadrocopter/quadrocopter_classes.py:
--------------------------------------------------------------------------------
  1 | # flake8: noqa
  2 | 
  3 | """
  4 | quadrocopter_classes.py
  5 | 
  6 | Written By: Adrian Esser and David Wu
  7 | 
  8 | Changes:
  9 | - Aug 2015 - Vectorized quadrotor state, moved state conversions here
 10 | 
 11 | This file contains all classes for the quadrocopter simulation!
 12 | 
 13 | This class defines an object for the state of the drone.
 14 | The state contains the position, velocity, and acceleration information,
 15 | the rotation matrix (which implies pitch, roll, and yaw), and the angular
 16 | velocity information.
 17 | """
 18 | 
 19 | from __future__ import print_function, division, absolute_import
 20 | 
 21 | import numpy as np
 22 | 
 23 | from .transformations import (quaternion_from_euler, euler_from_matrix,
 24 |                               euler_matrix, euler_from_quaternion)
 25 | 
 26 | 
 27 | __all__ = ['State', 'Parameters', 'StateVector']
 28 | 
 29 | 
 30 | class StateVector(np.ndarray):
 31 | 
 32 |     def __new__(cls, input=None):
 33 |         obj = np.zeros(22).view(cls)
 34 |         obj[-1] = 1
 35 | 
 36 |         if input is not None:
 37 |             obj[:len(input)] = input
 38 | 
 39 |         return obj
 40 | 
 41 |     @property
 42 |     def pos(self):
 43 |         return self[0:3]
 44 | 
 45 |     @pos.setter
 46 |     def pos(self, pos):
 47 |         self[0:3] = pos
 48 | 
 49 |     @property
 50 |     def vel(self):
 51 |         return self[3:6]
 52 | 
 53 |     @vel.setter
 54 |     def vel(self, vel):
 55 |         self[3:6] = vel
 56 | 
 57 |     @property
 58 |     def acc(self):
 59 |         return self[6:9]
 60 | 
 61 |     @acc.setter
 62 |     def acc(self, acc):
 63 |         self[6:9] = acc
 64 | 
 65 |     @property
 66 |     def euler(self):
 67 |         return self[9:12]
 68 | 
 69 |     @euler.setter
 70 |     def euler(self, euler):
 71 |         self[9:12] = euler
 72 | 
 73 |     @property
 74 |     def omega_g(self):
 75 |         return self[12:15]
 76 | 
 77 |     @omega_g.setter
 78 |     def omega_g(self, omega_g):
 79 |         self[12:15] = omega_g
 80 | 
 81 |     @property
 82 |     def omega_b(self):
 83 |         return self[15:18]
 84 | 
 85 |     @omega_b.setter
 86 |     def omega_b(self, omega_b):
 87 |         self[15:18] = omega_b
 88 | 
 89 |     @property
 90 |     def quat(self):
 91 |         return self[18:22]
 92 | 
 93 |     @quat.setter
 94 |     def quat(self, quat):
 95 |         self[18:22] = quat
 96 | 
 97 | 
 98 | class State:
 99 | 
100 |     def __init__(self):
101 | 
102 |         self.R = np.eye(3)
103 |         self.pos = np.zeros(3)
104 |         self.vel = np.zeros(3)
105 |         self.acc = np.zeros(3)
106 |         self.omega = np.zeros(3)
107 | 
108 |     @property
109 |     def quaternion(self):
110 |         """Rotation quaternion corresponding to R."""
111 |         return quaternion_from_euler(*self.rpy)
112 | 
113 |     @property
114 |     def rpy(self):
115 |         """Roll, pitch, yaw corresponding to R."""
116 |         return np.array(euler_from_matrix(self.R))
117 | 
118 |     @property
119 |     def state_vector(self):
120 |         """Return the state as a StateVector."""
121 |         state = StateVector()
122 |         state.pos = self.pos
123 |         state.vel = self.vel
124 |         state.acc = self.acc
125 |         state.quat = self.quaternion
126 |         state.euler = self.rpy
127 |         state.omega_b = self.omega
128 |         state.omega_g = self.R.dot(self.omega)
129 |         return state
130 | 
131 |     @state_vector.setter
132 |     def state_vector(self, state):
133 |         self.pos = state.pos
134 |         self.vel = state.vel
135 |         self.acc = state.acc
136 |         self.omega = state.omega_b
137 |         self.R = self.rpy_to_R(euler_from_quaternion(state.quat))
138 | 
139 |     def rpy_to_R(self, rpy):
140 |         return euler_matrix(*rpy)[:3, :3]
141 | 
142 | 
143 | class Parameters:
144 |     """Parameters for quadrotor the define the physics."""
145 | 
146 |     def __init__(self):
147 | 
148 |         # m, mass of vehicle (kg)
149 |         self.m = 1.477
150 |         # g, mass normalized gravitational force (m/s^2)
151 |         self.g = 9.8
152 |         # L, vehicle arm length (m)
153 |         self.L = 0.18
154 |         # K, motor constant, determined experimentally
155 |         self.K = 0.26
156 |         # Ix, inertia around the body's x-axis (kg-m^2)
157 |         self.Ix = 0.01152
158 |         # Iy, inertia around the body's y-axis (kg-m^2)
159 |         self.Iy = 0.01152
160 |         # Iz, inertia around the body's z-axis (kg-m^2)
161 |         self.Iz = 0.0218
162 |         # fmin, mass normalized minimum rotor force (m/s^2)
163 |         self.fmin = 0.17
164 |         # fmax, mass normalized maximum rotor force (m/s^2)
165 |         self.fmax = 6.0
166 |         # vmax, maximum quadrotor velocity (m/s)
167 |         self.vmax = 2.0
168 |         # eta, damping ratio
169 |         self.eta = 0.707
170 |         # tau_z, time constant for vertical direction
171 |         self.tau_z = 1.0
172 |         # tau_Iz, integral time constant for vertical direction
173 |         self.tau_Iz = 0.05
174 |         # tau_yaw, time constant for yaw rate
175 |         self.tau_yaw = 0.55
176 |         # tau_Iyaw, integral time constant for yaw rate
177 |         self.tau_Iyaw = 0.01
178 |         # eta_y, damping ratio
179 |         self.eta_y = 0.707
180 |         # tau_y, time constant for x and y direction
181 |         self.tau_y = 1.7
182 |         # tau_Iu, integral time constant for x and y dir.
183 |         self.tau_Iu = 2.5
184 |         # tau_p, time constant for roll rate
185 |         self.tau_p = 0.18
186 |         # tau_q, time constant for pitch rate
187 |         self.tau_q = 0.18
188 |         # tau_r, time constant for yaw rate
189 |         self.tau_r = 0.1
190 |         # tau_rp, time constant
191 |         self.tau_rp = 0.18
192 |         # tau_f, time constant for force integration
193 |         self.tau_f = 0.1
194 | 
195 |         # Air drag factor in body x direction [dimensionless]
196 |         self.CD_bx = 0.55
197 |         # Air drag factor in body y direction [dimensionless]
198 |         self.CD_by = 1.25
199 |         # Air drag factor in body z direction [dimensionless]
200 |         self.CD_bz = 0.3
201 | 
202 |         # Air drag factor in body x direction [-]
203 |         self.CD_bx = 0.35
204 |         # Air drag factor in body y direction [-]
205 |         self.CD_by = 1.25
206 |         # Air drag factor in body z direction [-]
207 |         self.CD_bz = 0.3
208 | 
209 |         # Delay in the signal being sent from quad to computer (us)
210 |         self.incoming_delay = 0.0
211 |         # Delay in signal being sent from computer to quad (us)
212 |         self.outgoing_delay = 100000.0
213 |         # Update rate of inner loop (us)
214 |         self.inner_loop_cycle = 8000.0
215 |         # Update rate of outer loop (us)
216 |         self.outer_loop_cycle = 15000.0
217 | 
218 |         # Takeoff height (m)
219 |         self.takeoff_height = 1.0
220 |         # Takeoff speed (m/s)
221 |         self.takeoff_speed = 0.25
222 | 


--------------------------------------------------------------------------------
/examples/GettingStarted.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "# Getting Started\n",
 11 |     "\n",
 12 |     "This is an Notebook containing the examples from the **Getting Started** section in the documentation. Refer to the documentation for very verbose description of this code."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {
 18 |     "deletable": true,
 19 |     "editable": true
 20 |    },
 21 |    "source": [
 22 |     "### Optimizing a Policy"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true,
 30 |     "deletable": true,
 31 |     "editable": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# import the classes we need\n",
 36 |     "from SafeRLBench.envs import LinearCar\n",
 37 |     "from SafeRLBench.policy import LinearPolicy\n",
 38 |     "from SafeRLBench.algo import PolicyGradient"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "collapsed": false,
 46 |     "deletable": true,
 47 |     "editable": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# get an instance of `LinearCar` with the default arguments.\n",
 52 |     "linear_car = LinearCar()\n",
 53 |     "# we need a policy which maps R^2 to R\n",
 54 |     "policy = LinearPolicy(2, 1)\n",
 55 |     "# setup parameters\n",
 56 |     "policy.parameters = [-1, -1, 1]\n",
 57 |     "\n",
 58 |     "# plug the environment and policy into the algorithm\n",
 59 |     "optimizer = PolicyGradient(linear_car, policy, estimator='central_fd')\n",
 60 |     "\n",
 61 |     "# run optimization\n",
 62 |     "optimizer.optimize()"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {
 68 |     "deletable": true,
 69 |     "editable": true
 70 |    },
 71 |    "source": [
 72 |     "Lets take a look at what happened during the run. For this we can access the monitor and generate some plots. "
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": false,
 80 |     "deletable": true,
 81 |     "editable": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "import matplotlib.pyplot as plt\n",
 86 |     "\n",
 87 |     "y = optimizer.monitor.rewards\n",
 88 |     "\n",
 89 |     "plt.plot(range(len(y)), y)\n",
 90 |     "plt.show()"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {
 96 |     "deletable": true,
 97 |     "editable": true
 98 |    },
 99 |    "source": [
100 |     "### Configuration"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true,
108 |     "deletable": true,
109 |     "editable": true
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "# import the configuration object\n",
114 |     "from SafeRLBench import config"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "collapsed": true,
122 |     "deletable": true,
123 |     "editable": true
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "# setup stream handler\n",
128 |     "config.logger_add_stream_handler()\n",
129 |     "# setup logger level\n",
130 |     "config.logger_set_level(config.DEBUG)\n",
131 |     "# raise monitor verbosity\n",
132 |     "config.monitor_set_verbosity(2)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {
138 |     "deletable": true,
139 |     "editable": true
140 |    },
141 |    "source": [
142 |     "After changing these values, please run the cell which invokes `optimizer.optimize` again to see what happens."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {
148 |     "collapsed": true,
149 |     "deletable": true,
150 |     "editable": true
151 |    },
152 |    "source": [
153 |     "### Benchmark"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": false,
161 |     "deletable": true,
162 |     "editable": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "# import the best performance measure\n",
167 |     "from SafeRLBench.measure import BestPerformance\n",
168 |     "# import the Bench and BenchConfig\n",
169 |     "from SafeRLBench import Bench, BenchConfig"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {
176 |     "collapsed": true,
177 |     "deletable": true,
178 |     "editable": true
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "# define environment configuration.\n",
183 |     "envs = [[(LinearCar, {'horizon': 100})]]\n",
184 |     "# define algorithms configuration.\n",
185 |     "algs = [[\n",
186 |     "  (PolicyGradient, [{\n",
187 |     "    'policy': LinearPolicy(2, 1, par=[-1, -1, 1]),\n",
188 |     "    'estimator': 'central_fd',\n",
189 |     "    'var': var\n",
190 |     "  } for var in [1, 1.5, 2, 2.5]])\n",
191 |     "]]\n",
192 |     "\n",
193 |     "# instantiate BenchConfig\n",
194 |     "config = BenchConfig(algs, envs)\n",
195 |     "\n",
196 |     "# instantiate the bench\n",
197 |     "bench = Bench(config, BestPerformance())\n",
198 |     "\n",
199 |     "# configure to run in parallel\n",
200 |     "config.jobs_set(4)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": false,
208 |     "deletable": true,
209 |     "editable": true
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "bench()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {
220 |     "collapsed": false,
221 |     "deletable": true,
222 |     "editable": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "bench.measures[0]"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": false,
234 |     "deletable": true,
235 |     "editable": true
236 |    },
237 |    "outputs": [],
238 |    "source": [
239 |     "best_run = bench.measures[0].result[0][0]\n",
240 |     "monitor = best_run.get_alg_monitor()\n",
241 |     "best_trace = monitor.traces[monitor.rewards.index(max(monitor.rewards))]\n",
242 |     "y = [t[1][0] for t in best_trace]\n",
243 |     "x = range(len(y))\n",
244 |     "\n",
245 |     "import matplotlib.pyplot as plt\n",
246 |     "\n",
247 |     "plt.plot(x, y)\n",
248 |     "plt.show()"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {
255 |     "collapsed": true,
256 |     "deletable": true,
257 |     "editable": true
258 |    },
259 |    "outputs": [],
260 |    "source": []
261 |   }
262 |  ],
263 |  "metadata": {
264 |   "kernelspec": {
265 |    "display_name": "Python (py36-srb)",
266 |    "language": "python",
267 |    "name": "py36-srb"
268 |   },
269 |   "language_info": {
270 |    "codemirror_mode": {
271 |     "name": "ipython",
272 |     "version": 3
273 |    },
274 |    "file_extension": ".py",
275 |    "mimetype": "text/x-python",
276 |    "name": "python",
277 |    "nbconvert_exporter": "python",
278 |    "pygments_lexer": "ipython3",
279 |    "version": "3.6.1"
280 |   }
281 |  },
282 |  "nbformat": 4,
283 |  "nbformat_minor": 2
284 | }
285 | 


--------------------------------------------------------------------------------
/examples/SafeOpt.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": true,
  7 |     "editable": true
  8 |    },
  9 |    "source": [
 10 |     "# Using SafeOpt"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {
 17 |     "collapsed": false,
 18 |     "deletable": true,
 19 |     "editable": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import GPy, safeopt\n",
 24 |     "\n",
 25 |     "from SafeRLBench.algo import SafeOptSwarm\n",
 26 |     "from SafeRLBench.envs import Quadrocopter, LinearCar\n",
 27 |     "from SafeRLBench.policy import NonLinearQuadrocopterController, LinearPolicy\n",
 28 |     "\n",
 29 |     "from SafeRLBench.measure import BestPerformance, SafetyMeasure\n",
 30 |     "\n",
 31 |     "from SafeRLBench import Bench\n",
 32 |     "\n",
 33 |     "# set up logging\n",
 34 |     "from SafeRLBench import config\n",
 35 |     "\n",
 36 |     "config.logger_set_level(config.INFO)\n",
 37 |     "config.logger_add_stream_handler()\n",
 38 |     "config.monitor_set_verbosity(2)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "#### Linear Car"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {
 52 |     "collapsed": false,
 53 |     "deletable": true,
 54 |     "editable": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "noise_var = 0.05 ** 2\n",
 59 |     "\n",
 60 |     "bounds = [(-1., 0.), (-1., 0.), (0., 1.)]\n",
 61 |     "\n",
 62 |     "algos = [(SafeOptSwarm, [{\n",
 63 |     "    'policy': LinearPolicy(2, 1, par=[-1, 0, 1]),\n",
 64 |     "    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=.4, ARD=True),\n",
 65 |     "    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),\n",
 66 |     "    'max_it': 20,\n",
 67 |     "    'avg_reward': -20,\n",
 68 |     "    'window': 3,\n",
 69 |     "    'fmin': -100,\n",
 70 |     "    'bounds': bounds, \n",
 71 |     "    'info': std\n",
 72 |     "} for std in [30, 35, 40, 45, 50]])]\n",
 73 |     "\n",
 74 |     "envs = [(LinearCar, {})]\n",
 75 |     "\n",
 76 |     "bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(-100)])"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false,
 84 |     "deletable": true,
 85 |     "editable": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "bench()"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "Below we output the results of the safety measure. List comprehension is used to get a more readable format for the\n",
 97 |     "tuples.\n",
 98 |     "The first element shows the standard deviation used, the second the number of violations and the last one the sum over\n",
 99 |     "all violations, just as documented in the `SafetyMeasure` class.\n",
100 |     "\n",
101 |     "We can see that increasing the standard deviation will ensure that the safty constraints will not be violated."
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false,
109 |     "deletable": true,
110 |     "editable": true
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "#### Quadrocopter"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false,
129 |     "deletable": true,
130 |     "editable": true
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "noise_var = 0.05 ** 2\n",
135 |     "\n",
136 |     "# Set fixed Gaussian measurement noise\n",
137 |     "likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)\n",
138 |     "\n",
139 |     "# Bounds on the inputs variable\n",
140 |     "bounds = [(0., 1.), (0., 1.), (0., 1.), (0., 1.), (0., 1.)]\n",
141 |     "\n",
142 |     "# Define Kernel\n",
143 |     "kernel = GPy.kern.RBF(input_dim=len(bounds), variance=1000.*2, lengthscale=1.0, ARD=True)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": false,
151 |     "deletable": true,
152 |     "editable": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "noise_var = 0.05 ** 2\n",
157 |     "\n",
158 |     "fmin = -2400\n",
159 |     "\n",
160 |     "# Bounds on the inputs variable\n",
161 |     "# bounds = [(1e-2, .9), (1e-2, .9), (1e-1, .9), (.2, .7), (1e-2, .9)]\n",
162 |     "bounds = [(1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.)]\n",
163 |     "\n",
164 |     "algos = [(SafeOptSwarm, [{\n",
165 |     "    'policy': NonLinearQuadrocopterController(),\n",
166 |     "    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=0.2, ARD=True),\n",
167 |     "    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),\n",
168 |     "    'max_it': 20,\n",
169 |     "    'avg_reward': -1500,\n",
170 |     "    'window': 3,\n",
171 |     "    'fmin': fmin,\n",
172 |     "    'bounds': bounds,\n",
173 |     "    'swarm_size': 1000,\n",
174 |     "    'info': std\n",
175 |     "} for std in [1000, 1250, 1500, 1750, 2000]])]\n",
176 |     "\n",
177 |     "envs = [(Quadrocopter, {})]\n",
178 |     "\n",
179 |     "bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(fmin)])"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false,
187 |     "deletable": true,
188 |     "editable": true
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "bench()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "Below we output the results of the safety measure and performance. List comprehension is used to get a more readable format for the tuples.\n",
200 |     "The first element shows the standard deviation used, the second the number of violations and the last one the sum over\n",
201 |     "all violations, just as documented in the `SafetyMeasure` class."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": false,
209 |     "deletable": true,
210 |     "editable": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "collapsed": false
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "print([(t[0].alg_conf['info'], int(t[1])) for t in bench.measures[0].result])"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": true
233 |    },
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "Python (py36-srb)",
241 |    "language": "python",
242 |    "name": "py36-srb"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 3
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython3",
254 |    "version": "3.6.1"
255 |   }
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 2
259 | }
260 | 


--------------------------------------------------------------------------------
/SafeRLBench/policy/linear_policy.py:
--------------------------------------------------------------------------------
  1 | """Linear Policy Class."""
  2 | 
  3 | from SafeRLBench import Policy, ProbPolicy
  4 | from SafeRLBench.spaces import BoundedSpace
  5 | 
  6 | import numpy as np
  7 | 
  8 | __all__ = ('LinearPolicy', 'DiscreteLinearPolicy', 'NoisyLinearPolicy')
  9 | 
 10 | 
 11 | class LinearPolicy(Policy):
 12 |     """Policy implementing a linear mapping from state to action space.
 13 | 
 14 |     Attributes
 15 |     ----------
 16 |     d_state : positive integer
 17 |         Dimension of the state space.
 18 |     d_action : positive integer
 19 |         Dimension of the action space
 20 |     parameters : nd-array
 21 |         Array containing initial parameters.
 22 |     initialized : boolean
 23 |         Boolean indicating if parameters have been initialized.
 24 |     biased : boolean
 25 |         Flag indicating if the policy is supposed to be biased or not.
 26 |     """
 27 | 
 28 |     def __init__(self, d_state, d_action,
 29 |                  par=None, par_space=None, biased=True):
 30 |         """Initialize LinearPolicy.
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         d_state : positive integer
 35 |             Dimension of the state space.
 36 |         d_action : positive integer
 37 |             Dimension of the action space
 38 |         par : ndarray
 39 |             Array containing initial parameters. If there is a constant bias,
 40 |             the array needs to be flat with shape (d_state * d_action + 1,).
 41 |             Otherwise it may either have shape (d_action, d_state) or
 42 |             (d_state * d_action,)
 43 |         biased : boolean
 44 |             Flag indicating if the policy is supposed to be biased or not.
 45 |         """
 46 |         assert(d_state > 0 and d_action > 0)
 47 |         self.d_state = d_state
 48 |         self.d_action = d_action
 49 | 
 50 |         self.par_dim = d_state * d_action
 51 | 
 52 |         self._par_space = None
 53 | 
 54 |         self.initialized = False
 55 | 
 56 |         if par is not None:
 57 |             self.parameters = par
 58 |         else:
 59 |             # make sure some fields exist.
 60 |             self._parameters = None
 61 |             self.biased = biased
 62 |             self._bias = 0
 63 |             self._par = None
 64 | 
 65 |         if par_space is not None:
 66 |             self.parameter_space = par_space
 67 | 
 68 |     def map(self, state):
 69 |         """Map a state to an action.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         state : array-like
 74 |             Element of state space.
 75 | 
 76 |         Returns
 77 |         -------
 78 |         action : ndarray
 79 |             Element of action space.
 80 |         """
 81 |         if self.d_action == 1:
 82 |             ret = self._parameters.dot(state).item() + self._bias
 83 |         else:
 84 |             ret = self._parameters.dot(state) + self._bias
 85 |         return ret
 86 | 
 87 |     @property
 88 |     def parameters(self):
 89 |         """Property to access parameters.
 90 | 
 91 |         The property returns the same representation as used when set.
 92 |         If the mapping contains a bias, then the input needs to be a ndarray
 93 |         with shape (d_action * d_state + 1,) otherwise it may either be a
 94 |         (d_action, d_state) or (d_action * d_state,) shaped array
 95 |         """
 96 |         if not self.initialized:
 97 |             raise NameError('Policy parameters not initialized yet.')
 98 |         return self._par
 99 | 
100 |     @parameters.setter
101 |     def parameters(self, par):
102 |         par = np.array(par).copy()
103 | 
104 |         if not self.initialized:
105 |             shape = par.shape
106 |             if (shape == (self.d_action, self.d_state)
107 |                     or shape == (self.par_dim,)):
108 |                 self.biased = False
109 |                 self._bias = 0
110 |             elif shape == (self.par_dim + 1,):
111 |                 self.biased = True
112 |             else:
113 |                 raise ValueError("Parameters with shape %s invalid.",
114 |                                  str(shape))
115 | 
116 |             self.initialized = True
117 | 
118 |         # store parameter in original representation.
119 |         self._par = par
120 | 
121 |         if self.d_action == 1:
122 |             shape = (self.d_state,)
123 |         else:
124 |             shape = (self.d_action, self.d_state)
125 | 
126 |         if not self.biased:
127 |             self._parameters = par.reshape(shape)
128 |         else:
129 |             self._bias = par[-1]
130 |             self._parameters = par[0:-1].reshape(shape)
131 | 
132 |     @property
133 |     def parameter_space(self):
134 |         """Property storing the parameter space.
135 | 
136 |         By default the parameter space will be assigned to be a BoundedSpace
137 |         between [0,1]^d. However it might be necessary to change this. A user
138 |         may thus assign a new parameter space.
139 | 
140 |         WARNING: Currently there is no sanity check for manually assigned
141 |         parameter spaces.
142 |         """
143 |         if self._par_space is None:
144 |             if self.biased:
145 |                 shape = (self.par_dim + 1,)
146 |             else:
147 |                 shape = (self.par_dim,)
148 |             self._par_space = BoundedSpace(0, 1, shape)
149 | 
150 |         return self._par_space
151 | 
152 |     @parameter_space.setter
153 |     def parameter_space(self, par_space):
154 |         self._par_space = par_space
155 | 
156 | 
157 | class DiscreteLinearPolicy(LinearPolicy):
158 |     """LinearPolicy on a discrete action space of {0, 1}^d."""
159 | 
160 |     def map(self, state):
161 |         """Map to discrete action space.
162 | 
163 |         Parameters
164 |         ----------
165 |         state : element of state space
166 |             state to be mapped.
167 | 
168 |         Returns
169 |         -------
170 |         action : ndarray
171 |             Element of {0, 1}^d_action
172 |         """
173 |         cont_action = super(DiscreteLinearPolicy, self).map(state)
174 |         if self.d_action == 1:
175 |             if (cont_action < 0):
176 |                 action = 0
177 |             else:
178 |                 action = 1
179 |         else:
180 |             action = np.zeros(cont_action.shape, dtype=int)
181 |             action[cont_action > 0] += 1
182 | 
183 |         return action
184 | 
185 | 
186 | class NoisyLinearPolicy(LinearPolicy, ProbPolicy):
187 |     """
188 |     Policy implementing a linear mapping from state to action space with noise.
189 | 
190 |     Attributes
191 |     ----------
192 |     d_state : positive integer
193 |         Dimension of the state space.
194 |     d_action : positive integer
195 |         Dimension of the action space
196 |     sigma : double
197 |         Sigma for gaussian noise
198 |     parameters : nd-array
199 |         Array containing initial parameters.
200 |     initialized : boolean
201 |         Boolean indicating if parameters have been initialized.
202 |     biased : boolean
203 |         Flag indicating if the policy is supposed to be biased or not.
204 |     """
205 | 
206 |     def __init__(self, d_state, d_action, sigma,
207 |                  par=None, par_space=None, biased=False):
208 |         """Initialize Noisy Linear Policy.
209 | 
210 |         Parameters
211 |         ----------
212 |         d_state : positive integer
213 |             Dimension of the state space.
214 |         d_action : positive integer
215 |             Dimension of the action space
216 |         sigma : double
217 |             Sigma for gaussian noise
218 |         par : ndarray
219 |             Array containing initial parameters. If there is a constant bias,
220 |             the array needs to be flat with shape (d_state * d_action + 1,).
221 |             Otherwise it may either have shape (d_action, d_state) or
222 |             (d_state * d_action,)
223 |         biased : boolean
224 |             Flag indicating if the policy is supposed to be biased or not.
225 |         """
226 |         assert(d_state > 0 and d_action > 0)
227 | 
228 |         self.sigma = sigma
229 | 
230 |         self.random_state = np.random.RandomState()
231 | 
232 |         super(NoisyLinearPolicy, self).__init__(d_state, d_action, par,
233 |                                                 par_space, biased)
234 | 
235 |     def map(self, state):
236 |         """Map a state to an action.
237 | 
238 |         Parameters
239 |         ----------
240 |         state : array-like
241 |             Element of state space.
242 | 
243 |         Returns
244 |         -------
245 |         action : ndarray
246 |             Element of action space.
247 |         """
248 |         noise = self.random_state.normal(0, self.sigma)
249 |         return super(NoisyLinearPolicy, self).map(state) + noise
250 | 
251 |     def grad_log_prob(self, state, action):
252 |         """Compute the gradient of the logarithm of the probability dist."""
253 |         noise = action - super(NoisyLinearPolicy, self).map(state)
254 |         return - 2 * noise * self.parameters / self.sigma**2
255 | 


--------------------------------------------------------------------------------
/SafeRLBench/configuration.py:
--------------------------------------------------------------------------------
  1 | """Global Configuration Class."""
  2 | import logging
  3 | import sys
  4 | 
  5 | 
  6 | class SRBConfig(object):
  7 |     """SafeRLBench configuration class.
  8 | 
  9 |     This is a configuration class providing a container for global variables
 10 |     and configuration functions.
 11 | 
 12 |     In general this class should not be instantiated directly, but rather
 13 |     accessed through the global variable ``SafeRLBench.config``, which is
 14 |     created when the package is imported and will contain the root logger of
 15 |     the package.
 16 | 
 17 |     Attributes
 18 |     ----------
 19 |     logger_stream_handler :
 20 |         This is a property wrapping the current stream handler. The current
 21 |         stream handler can be accessed through this property, or it may even
 22 |         be replaced with a new stream handler. In case of resetting the stream
 23 |         handler, the old handler will be removed from the logger
 24 |         automatically.
 25 |     logger_file_handler :
 26 |         This is a property wrapping the current file handler. The current
 27 |         file handler can be accessed through this property, or it may even
 28 |         be replaced with a new stream handler. In case of resetting the file
 29 |         handler, the old handler will be removed from the logger
 30 |         automatically.
 31 |     logger_format :
 32 |         This is a property to access the format stored. This is the default
 33 |         format that will be used when adding the default handlers.
 34 |         When assigned to, the formats of already set loggers will be changed
 35 |         to the new format.
 36 |     log :
 37 |         The logger object.
 38 |     n_jobs :
 39 |         Number of jobs used by the library
 40 |     monitor_verbosity :
 41 |         Verbosity of the monitor.
 42 | 
 43 |     Methods
 44 |     -------
 45 |     monitor_set_verbosity(verbosity)
 46 |         Set monitor verbosity level.
 47 |     jobs_set(n_jobs)
 48 |         Set the amount of jobs used by a worker pool.
 49 |     logger_set_level(level=logging.INFO)
 50 |         Set the logger level package wide.
 51 |     logger_add_stream_handler()
 52 |         Set a handler to print logs to stdout.
 53 |     logger_add_file_handler(path)
 54 |         Set a handler to print to file.
 55 | 
 56 |     Notes
 57 |     -----
 58 |     Access logger levels through the static variables:
 59 | 
 60 |     +-----------+------------------+
 61 |     |DEBUG      | logging.DEBUG    |
 62 |     +-----------+------------------+
 63 |     |INFO       | logging.INFO     |
 64 |     +-----------+------------------+
 65 |     |WARNING    | logging.WARNING  |
 66 |     +-----------+------------------+
 67 |     |ERROR      | logging.ERROR    |
 68 |     +-----------+------------------+
 69 |     |CRITICAL   | logging.CRITICAL |
 70 |     +-----------+------------------+
 71 |     """
 72 | 
 73 |     DEBUG = logging.DEBUG
 74 |     INFO = logging.INFO
 75 |     WARNING = logging.WARNING
 76 |     ERROR = logging.ERROR
 77 |     CRITICAL = logging.CRITICAL
 78 | 
 79 |     def __init__(self, log):
 80 |         """Initialize default configuration."""
 81 |         # some libraries think it is a good idea to add handlers by default
 82 |         # without documenting that at all, thanks gpy...
 83 |         log.propagate = False
 84 | 
 85 |         self.log = log
 86 |         self.n_jobs = 1
 87 |         self.monitor_verbosity = 0
 88 | 
 89 |         self._stream_handler = None
 90 |         self._file_handler = None
 91 |         self._fmt = ('%(process)d - %(asctime)s - %(name)s - %(levelname)s'
 92 |                      + ' - %(message)s')
 93 |         self._formatter = logging.Formatter(self._fmt)
 94 | 
 95 |     def monitor_set_verbosity(self, verbosity):
 96 |         """Set monitor verbosity level.
 97 | 
 98 |         Parameters
 99 |         ----------
100 |         verbose : int
101 |             Non negative verbosity level
102 |         """
103 |         if verbosity < 0:
104 |             raise ValueError('Verbosity level can not be negative.')
105 |         self.monitor_verbosity = verbosity
106 | 
107 |     def jobs_set(self, n_jobs):
108 |         """Set the amount of jobs used by a worker pool.
109 | 
110 |         Parameters
111 |         ----------
112 |         n_jobs : Int
113 |             Number of jobs, needs to be larger than 0.
114 |         """
115 |         if n_jobs <= 0:
116 |             raise ValueError('Number of jobs needs to be larger than 0.')
117 |         self.n_jobs = n_jobs
118 | 
119 |     def logger_set_level(self, level=logging.INFO):
120 |         """Set the logger level package wide.
121 | 
122 |         Parameters
123 |         ----------
124 |         level :
125 |             Logger level as defined in logging.
126 |         """
127 |         self.log.setLevel(level)
128 | 
129 |     @property
130 |     def logger_stream_handler(self):
131 |         """Property storing the current stream handler.
132 | 
133 |         If overwritten with a new stream handler, the logger will be updated
134 |         with the new stream handler.
135 | 
136 |         Examples
137 |         --------
138 |         Setup a stream handler for the logger.
139 | 
140 |         >>> from SafeRLBench import config
141 |         >>> import logging
142 |         >>> # configurate stream handler
143 |         >>> ch = logging.StreamHandler(sys.stdout)
144 |         >>> config.logger_stream_handler = ch
145 | 
146 |         To use the default format:
147 | 
148 |         >>> formatter = logging.Formatter(config.logger_format)
149 |         >>> ch.setFormatter(formatter)
150 | 
151 |         which is equivalent to using `logger_add_stream_handler`.
152 |         """
153 |         return self._stream_handler
154 | 
155 |     @logger_stream_handler.setter
156 |     def logger_stream_handler(self, ch):
157 |         """Setter method for logger_stream_handler property."""
158 |         if self._stream_handler is not None:
159 |             self.log.removeHandler(self._stream_handler)
160 | 
161 |         self._stream_handler = ch
162 |         if ch is not None:
163 |             self.log.addHandler(ch)
164 | 
165 |     @property
166 |     def logger_file_handler(self):
167 |         """Property storing the current file handler.
168 | 
169 |         If overwritten with a new file handler, the logger will be updated with
170 |         the new file handler.
171 | 
172 |         Examples
173 |         --------
174 |         Setup a stream handler for the logger.
175 | 
176 |         >>> from SafeRLBench import config
177 |         >>> import logging
178 |         >>> # configurate stream handler
179 |         >>> fh = logging.FileHandler('logs.log')
180 |         >>> config.logger_file_handler = fh
181 | 
182 |         To use the default format:
183 | 
184 |         >>> formatter = logging.Formatter(config.logger_format)
185 |         >>> fh.setFormatter(formatter)
186 | 
187 |         which is equivalent to using `logger_add_file_handler`.
188 |         """
189 |         return self._file_handler
190 | 
191 |     @logger_file_handler.setter
192 |     def logger_file_handler(self, fh):
193 |         """Setter method for logger_file_handler property."""
194 |         if self._file_handler is not None:
195 |             self.log.removeHandler(self._file_handler)
196 | 
197 |         self._file_handler = fh
198 |         if fh is not None:
199 |             self.log.addHandler(fh)
200 | 
201 |     @property
202 |     def logger_format(self):
203 |         """Property for default logger format.
204 | 
205 |         If overwritten stream and file handler will be updated accordingly.
206 |         However if manually updating stream and file handler logger_format will
207 |         be ignored.
208 |         """
209 |         return self._fmt
210 | 
211 |     @logger_format.setter
212 |     def logger_format(self, fmt):
213 |         """Setter method for logger_format property."""
214 |         self._formatter = logging.Formatter(fmt)
215 | 
216 |         self._fmt = fmt
217 | 
218 |         if self.logger_stream_handler is not None:
219 |             self.logger_stream_handler.setFormatter(self._formatter)
220 | 
221 |         if self.logger_file_handler is not None:
222 |             self.logger_file_handler.setFormatter(self._formatter)
223 | 
224 |     def logger_add_stream_handler(self):
225 |         """Set a handler to print logs to stdout."""
226 |         if self._stream_handler is not None:
227 |             self.log.removeHandler(self._stream_handler)
228 | 
229 |         ch = logging.StreamHandler(sys.stdout)
230 |         ch.setFormatter(self._formatter)
231 | 
232 |         self._stream_handler = ch
233 |         self.log.addHandler(ch)
234 | 
235 |     def logger_add_file_handler(self, path):
236 |         """Set a handler to print to file.
237 | 
238 |         Parameters
239 |         ----------
240 |         path :
241 |             Path to log file.
242 |         """
243 |         if self._file_handler is not None:
244 |             self.log.removeHandler(self._file_handler)
245 | 
246 |         fh = logging.FileHandler(path)
247 |         fh.setFormatter(self._formatter)
248 | 
249 |         self._file_handler = fh
250 |         self.log.addHandler(fh)
251 | 


--------------------------------------------------------------------------------
/SafeRLBench/policy/neural_network.py:
--------------------------------------------------------------------------------
  1 | """Neural Network Policy implementation."""
  2 | 
  3 | from SafeRLBench import Policy
  4 | from SafeRLBench.error import add_dependency, MultipleCallsException
  5 | from SafeRLBench.spaces import RdSpace
  6 | 
  7 | import numpy as np
  8 | from numpy.random import normal
  9 | 
 10 | try:
 11 |     import tensorflow as tf
 12 | except ModuleNotFoundError:
 13 |     tf = None
 14 | 
 15 | import logging
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def default_init_weights(shape):
 21 |     """Initialize default weights."""
 22 |     weights = tf.random_normal(shape, mean=0, stddev=0.1, name='weights')
 23 |     return tf.Variable(weights)
 24 | 
 25 | 
 26 | class NeuralNetwork(Policy):
 27 |     """Fully connected Neural Network Policy.
 28 | 
 29 |     Attributes
 30 |     ----------
 31 |     args : list
 32 |         Contains the args used to initialize the policy.
 33 |     kwargs : dict
 34 |         Contains the kwargs used to initialize the policy.
 35 |     layers : list of integers
 36 |         A list describing the layer sizes. The first element represents the
 37 |         size of the input layer, the last element the size of the output
 38 |         layer.
 39 |     state_space : space instance
 40 |     action_space : space instance
 41 |     weights : tf.Variable
 42 |         If none the init_weights function will be used to initialize the
 43 |         weights.
 44 |     init_weights : callable
 45 |         Takes a shape as an argument and returns a tf.Variable according to
 46 |         this shape.
 47 |     activation : list of activation functions
 48 |         An activation function which will be used to construct the respective
 49 |         layer. If only one activation function is passed, it will be used for
 50 |         every layer. If the argument is None by default the sigmoid function
 51 |         will be used.
 52 |     dtype : string
 53 |         Data type of input and output.
 54 |     W_action : list of tf.Variable
 55 |         The list contains the `tf.Variable` instances describing the mapping
 56 |         between the hidden layers. The i-th entry describes the connection
 57 |         between layer i and layer i+1.
 58 |     W_var : list of tf.Variable
 59 |         This list contains the weights used to compute the variance estimation.
 60 |         Each entry corresponds to one layer and contains weights of shape
 61 |         (layer[i], 1).
 62 |     a_pred :
 63 |         Action estimate of the fully connected neural network defined by
 64 |         `W_action` and activation.
 65 |     var :
 66 |         Variance estimate which is a weighted sum of all hidden units.
 67 |         The weights are described by `W_var`.
 68 |     h : list of tf.Tensor
 69 |         Hidden layers
 70 |     """
 71 | 
 72 |     def __init__(self,
 73 |                  layers, weights=None, init_weights=None, activation=None,
 74 |                  dtype='float', scope='global', do_setup=False):
 75 |         """Initialize Neural Network wrapper."""
 76 |         add_dependency(tf, 'TensorFlow')
 77 | 
 78 |         if (len(layers) < 2):
 79 |             raise ValueError('At least two layers needed.')
 80 | 
 81 |         # determine state and action space
 82 |         state_space = RdSpace((layers[0],))
 83 |         action_space = RdSpace((layers[-1],))
 84 | 
 85 |         # store arguments convenient for copy operation
 86 |         self.args = [layers]
 87 |         self.kwargs = {
 88 |             'weights': weights,
 89 |             'init_weights': init_weights,
 90 |             'activation': activation,
 91 |             'dtype': dtype
 92 |         }
 93 | 
 94 |         self.state_space = state_space
 95 |         self.action_space = action_space
 96 | 
 97 |         self.dtype = dtype
 98 |         self.layers = layers
 99 |         self.scope = scope
100 | 
101 |         self.is_set_up = False
102 | 
103 |         if init_weights is None:
104 |             self.init_weights = default_init_weights
105 |         else:
106 |             self.init_weights = init_weights
107 | 
108 |         # Activation function
109 |         if activation is None:
110 |             activation = (len(layers) - 2) * [tf.sigmoid]
111 |         elif (isinstance(activation, list)
112 |                 and (len(activation) != len(layers) - 2)):
113 |             raise ValueError('Activation list has wrong size.')
114 |         else:
115 |             activation = (len(layers) - 2) * [activation]
116 | 
117 |         self.activation = activation
118 | 
119 |         # Symbols
120 |         self.X = tf.placeholder(dtype, shape=[None, layers[0]], name='X')
121 |         self.a = tf.placeholder(dtype, shape=[None, layers[-1]], name='a')
122 | 
123 |         if do_setup:
124 |             with tf.variable_scope(self.scope):
125 |                 self.setup()
126 |         else:
127 |             # Make sure all fields exist
128 |             self.W_action = None
129 |             self.W_var = None
130 |             self.a_pred = None
131 |             self.var = None
132 |             self.h = None
133 | 
134 |         self.sess = None
135 | 
136 |     def setup(self):
137 |         """Set up the network graph.
138 | 
139 |         The weights and graph will be initialized by this function. If do_setup
140 |         is True, setup will automatically be called, when instantiating the
141 |         class.
142 |         """
143 |         if self.is_set_up:
144 |             raise MultipleCallsException('Network is already set up.')
145 | 
146 |         layers = self.layers
147 |         weights = self.kwargs['weights']
148 | 
149 |         # Weights for the action estimation
150 |         with tf.variable_scope('action_estimator'):
151 |             if weights is None:
152 |                 w = []
153 |                 for i in range(len(layers) - 1):
154 |                     w.append(self.init_weights((layers[i], layers[i + 1])))
155 |             else:
156 |                 w = weights
157 | 
158 |             self.W_action = w
159 | 
160 |         # generate network
161 |         self.a_pred = self._generate_network()
162 | 
163 |         # Weights for variance estimation
164 |         with tf.variable_scope('variance_estimator'):
165 |             self.W_var = []
166 |             for i in range(1, len(layers) - 1):
167 |                 self.W_var.append(self.init_weights((layers[i], 1)))
168 | 
169 |         # generate variance network
170 |         self.var = self._generate_variance()
171 | 
172 |         self.is_set_up = True
173 | 
174 |     def _generate_network(self):
175 |         self.h = [self.X]
176 |         for i, act in enumerate(self.activation):
177 |             h_i = self.h[i]
178 |             w_i = self.W_action[i]
179 |             self.h.append(act(tf.matmul(h_i, w_i)))
180 | 
181 |         return tf.matmul(self.h[-1], self.W_action[-1])
182 | 
183 |     def _generate_variance(self):
184 |         var = []
185 |         if not self.W_var:
186 |             return tf.constant(0, name='variance')
187 |         for h_i, w_i in zip(self.W_var, self.h[1:]):
188 |             var.append(tf.reduce_sum(tf.matmul(w_i, h_i)))
189 |         return tf.abs(tf.reduce_sum(var, name='variance'))
190 | 
191 |     def copy(self, scope, do_setup=True):
192 |         """Generate a copy of the network.
193 | 
194 |         The copy will instantiate the class with the same arguments, but
195 |         replace `scope` and `do_setup` with the respective arguments passed
196 |         to this function.
197 | 
198 |         Parameters
199 |         ----------
200 |         scope : String
201 |             Indication the scope that should be used when initializing the
202 |             network.
203 |         do_setup : Boolean
204 |             Default: True ; Indicating if the `setup` method, should be called
205 |             when instantiating.
206 |         """
207 |         self.kwargs['scope'] = scope
208 |         self.kwargs['do_setup'] = do_setup
209 |         return NeuralNetwork(*self.args, **self.kwargs)
210 | 
211 |     def map(self, state):
212 |         """Compute output in session.
213 | 
214 |         Make sure a default session is set when calling.
215 |         """
216 |         state = state.flatten()
217 |         assert(self.state_space.contains(state))
218 | 
219 |         if self.sess is None:
220 |             sess = tf.get_default_session()
221 |         else:
222 |             sess = self.sess
223 |         mean, var = sess.run([self.a_pred, self.var], {self.X: [state]})
224 | 
225 |         action = np.array(normal(mean, var))
226 |         action = action.reshape(self.action_space.shape)
227 | 
228 |         return action
229 | 
230 |     @property
231 |     def parameters(self):
232 |         """Return weights of the neural network.
233 | 
234 |         This returns a list of tf.Variables. Please note that these can not
235 |         simply be updated by assignment. See the parameters.setter docstring
236 |         for more information.
237 |         The list of tf.Variables can be directly accessed through the
238 |         attribute `W`.
239 |         """
240 |         if self.sess is None:
241 |             return tf.get_default_session().run(self.W_action + self.W_var)
242 |         else:
243 |             return self.sess.run(self.W_action + self.W_var)
244 | 
245 |     @parameters.setter
246 |     def parameters(self, update):
247 |         """Setter function for parameters.
248 | 
249 |         Since the parameters are a list of `tf.Variable`, we need to feed them
250 |         into an assign operator. Thus the argument, needs to be a list
251 |         containing an element for each Variable in `W_action` and `W_var` in
252 |         that order, i.e. `W_var` will be the last element.
253 | 
254 |         Parameters
255 |         ----------
256 |         update :
257 |             List of parameters for each `tf.Varible`
258 | 
259 |         Notes
260 |         -----
261 |         Make sure there is a default session or `self.sess` is set.
262 |         """
263 |         if not isinstance(update, list):
264 |             update = [update]
265 | 
266 |         variables = self.W_action + self.W_var
267 |         assign_op = []
268 | 
269 |         for (var, val) in zip(variables, update):
270 |             assign_op.append(var.assign(val))
271 | 
272 |         if self.sess is None:
273 |             sess = tf.get_default_session()
274 |         else:
275 |             sess = self.sess
276 | 
277 |         sess.run(assign_op)
278 | 
279 |     @property
280 |     def parameter_space(self):
281 |         """Return parameter space."""
282 |         pass
283 | 


--------------------------------------------------------------------------------
/SafeRLBench/base.py:
--------------------------------------------------------------------------------
  1 | """Module implements Baseclasses."""
  2 | 
  3 | from __future__ import division, print_function, absolute_import
  4 | 
  5 | from abc import ABCMeta, abstractmethod
  6 | from six import add_metaclass
  7 | 
  8 | from SafeRLBench import AlgoMonitor, EnvMonitor
  9 | 
 10 | __all__ = ('EnvironmentBase', 'Space')
 11 | 
 12 | 
 13 | @add_metaclass(ABCMeta)
 14 | class EnvironmentBase(EnvMonitor):
 15 |     """Environment Base Class.
 16 | 
 17 |     This base class defines and implements an interface to any environment
 18 |     implementation part of the environment module. Subclasses inheriting
 19 |     from EnvironmentBase need to make sure they meet the requirements below.
 20 | 
 21 |     Any subclass must implement:
 22 |         * _update(action)
 23 |         * _reset()
 24 | 
 25 |     Any subclass might override:
 26 |         * _rollout(policy)
 27 | 
 28 |     Make sure the `state_space`, `action_space` and `horizon` attributes will
 29 |     be set in any subclass, as the default implementation and / or the monitor
 30 |     may access them to retrieve information.
 31 | 
 32 |     Attributes
 33 |     ----------
 34 |     state_space :
 35 |         State space of the environment.
 36 |     action_space :
 37 |         Action space of the environment.
 38 |     horizon :
 39 |         Maximum number of iterations until rollout will stop.
 40 |     monitor : EnvData instance
 41 |         Contains the monitoring data. The monitor will be automatically
 42 |         initialized during creation.
 43 | 
 44 |     Methods
 45 |     -------
 46 |     rollout(policy)
 47 |         Perform a rollout according to the actions selected by policy.
 48 |     update(action)
 49 |         Update the environment state according to the action.
 50 |     reset()
 51 |         Reset the environment to the initial state.
 52 | 
 53 |     Notes
 54 |     -----
 55 |     When overwriting _rollout(policy) use the provided interface functions
 56 |     and do not directly call the private implementation.
 57 |     """
 58 | 
 59 |     def __init__(self, state_space, action_space, horizon=0):
 60 |         """Initialize EnvironmentBase.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         state_space :
 65 |             State space of the environment.
 66 |         action_space :
 67 |             Action space of the environment.
 68 |         horizon :
 69 |             Maximum number of iterations until rollout will stop.
 70 |         """
 71 |         super(EnvironmentBase, self).__init__()
 72 |         self.state_space = state_space
 73 |         self.action_space = action_space
 74 |         self.horizon = horizon
 75 | 
 76 |     # Implement in subclasses:
 77 |     # See update(self, action) for more information
 78 |     @abstractmethod
 79 |     def _update(self, action):
 80 |         raise NotImplementedError
 81 | 
 82 |     # See reset(self) for more information
 83 |     @abstractmethod
 84 |     def _reset(self):
 85 |         raise NotImplementedError
 86 | 
 87 |     # Override in subclasses if necessary
 88 |     def _rollout(self, policy):
 89 |         self.reset()
 90 |         trace = []
 91 |         for n in range(self.horizon):
 92 |             action = policy(self.state)
 93 |             trace.append(self.update(action))
 94 |         return trace
 95 | 
 96 |     def update(self, action):
 97 |         """Update the environment state according to the action.
 98 | 
 99 |         Wraps the subclass implementation _update(action) providing
100 |         monitoring capabilities.
101 | 
102 |         Parameters
103 |         ----------
104 |         action: array-like
105 |             Element of action_space
106 | 
107 |         Returns
108 |         -------
109 |         tuple : 3-tuple
110 |             action : array-like
111 |                 element of action space as it has been applied in update
112 |             state : array-like
113 |                 element of state_space which is the resulting state after
114 |                 applying action
115 |             reward : float
116 |                 reward for resulting state
117 |         """
118 |         with self.monitor_update():
119 |             t = self._update(action)
120 |         return t
121 | 
122 |     def reset(self):
123 |         """Reset the environment to initial state.
124 | 
125 |         Reset wraps the subclass implementation _reset() providing monitoring
126 |         capabilities.
127 |         """
128 |         with self.monitor_reset():
129 |             self._reset()
130 | 
131 |     def rollout(self, policy):
132 |         """Perform a rollout according to the actions selected by policy.
133 | 
134 |         Wraps the implementation _rollout(policy) providing monitoring
135 |         capabilities.
136 | 
137 |         Parameters
138 |         ----------
139 |         Policy : callable
140 |             Maps element of state_space to element of action_space
141 | 
142 |         Returns
143 |         -------
144 |         trace : list of 3-tuple
145 |             List of (action, state, reward)-tuple as returned by update().
146 |         """
147 |         with self.monitor_rollout():
148 |             trace = self._rollout(policy)
149 |         return trace
150 | 
151 |     def __repr__(self):
152 |         """Return class name."""
153 |         return self.__class__.__name__
154 | 
155 | 
156 | @add_metaclass(ABCMeta)
157 | class Space(object):
158 |     """Baseclass for Spaceobject.
159 | 
160 |     All methods have to be implemented in any subclass.
161 | 
162 |     Methods
163 |     -------
164 |     contains(x)
165 |         Check if x is an element of space.
166 |     element
167 |         Return arbitray element in space.
168 |     """
169 | 
170 |     @abstractmethod
171 |     def contains(self, x):
172 |         """Check if x is an element of space."""
173 |         pass
174 | 
175 |     @abstractmethod
176 |     def sample(self):
177 |         """Return an arbitrary element in space for unit testing."""
178 |         pass
179 | 
180 |     @property
181 |     @abstractmethod
182 |     def dimension(self):
183 |         """Return the dimension of the space."""
184 |         pass
185 | 
186 | 
187 | @add_metaclass(ABCMeta)
188 | class AlgorithmBase(AlgoMonitor):
189 |     """Baseclass for any algorithm.
190 | 
191 |     This baseclass defines a uniform interface for any algorithm part of
192 |     the algorithm module SafeRLBench.algo. It features monitoring capabilities
193 |     for tracking and evaluating the execution of the algorithm.
194 | 
195 |     Inheriting from `AlgorithmBase` is suspect to some constraints, i.e. any
196 |     algorithm needs to be implemented using the following functions.
197 | 
198 |     Any subclass must overwrite:
199 |         * _initialize(policy)
200 |         * _step(policy)
201 |         * _is_finished()
202 | 
203 |     Any subclass may overwrite:
204 |         * _optimize(policy)
205 | 
206 |     In case one does overwrite _optimize, the functions _initialize(),
207 |     _step(parameter), _is_finished() may just pass unless they are used.
208 |     This may however change the information tracked by the monitor.
209 | 
210 |     Attributes
211 |     ----------
212 |     environment :
213 |         Environment we want to optimize on
214 |     policy :
215 |         Policy to be optimized
216 |     max_it : int
217 |         Maximum number of iterations
218 |     monitor : AlgoData instance
219 |         Contains monitoring data. The monitor will automatically initialize
220 |         on creation of an object.
221 | 
222 |     Methods
223 |     -------
224 |     optimize()
225 |         Optimize a policy with respective algorithm.
226 |     initialize()
227 |         Initialize policy parameter.
228 |     step()
229 |         Update policy parameters.
230 |     is_finished()
231 |         Return true when algorithm is finished.
232 | 
233 |     Notes
234 |     -----
235 |     Specification of the private functions.
236 | 
237 |     _initialize(self):
238 |         Initialize the algorithm.
239 |     _step():
240 |         Compute one step of the algorithm.
241 |     _is_finished():
242 |         Return True when algorithm is supposed to finish.
243 |     """
244 | 
245 |     def __init__(self, environment, policy, max_it):
246 |         super(AlgorithmBase, self).__init__()
247 | 
248 |         self.environment = environment
249 |         self.policy = policy
250 |         self.max_it = max_it
251 | 
252 |         self.grad = None
253 | 
254 |     # Have to be overwritten.
255 |     @abstractmethod
256 |     def _initialize(self):
257 |         pass
258 | 
259 |     @abstractmethod
260 |     def _step(self):
261 |         pass
262 | 
263 |     @abstractmethod
264 |     def _is_finished(self):
265 |         pass
266 | 
267 |     # May be overwritten
268 |     def _optimize(self):
269 |         self.initialize()
270 | 
271 |         for n in range(self.max_it):
272 |             self.step()
273 |             if self.is_finished():
274 |                 break
275 | 
276 |     def optimize(self):
277 |         """Optimize policy parameter.
278 | 
279 |         Wraps subclass implementation in _optimize(policy).
280 | 
281 |         Parameters
282 |         ----------
283 |         policy: PolicyBase subclass
284 |         """
285 |         with self.monitor_optimize():
286 |             self._optimize()
287 | 
288 |     def initialize(self):
289 |         """Initialize policy parameter.
290 | 
291 |         Wraps subclass implementation in _initialize(policy)
292 | 
293 |         Parameters
294 |         ----------
295 |         policy: PolicyBase subclass
296 |         """
297 |         with self.monitor_initialize():
298 |             self._initialize()
299 | 
300 |     def step(self):
301 |         """Update policy parameter.
302 | 
303 |         Wraps subclass implementation in _step(policy).
304 | 
305 |         Parameters
306 |         ----------
307 |         policy: PolicyBase subclass
308 |         """
309 |         with self.monitor_step():
310 |             self._step()
311 | 
312 |     def is_finished(self):
313 |         """Return True when algorithm is supposed to finish.
314 | 
315 |         Wraps subclass implementation in _is_finished().
316 |         """
317 |         stop = self._is_finished()
318 |         return stop
319 | 
320 |     def reset(self):
321 |         """Reset the monitor."""
322 |         self._alg_reset()
323 | 
324 |     def __repr__(self):
325 |         if hasattr(self, '_info'):
326 |             return self._info()
327 |         return self.__class__.__name__
328 | 
329 | 
330 | @add_metaclass(ABCMeta)
331 | class Policy(object):
332 |     """Minimal policy interface."""
333 | 
334 |     def __call__(self, state):
335 |         return self.map(state)
336 | 
337 |     @abstractmethod
338 |     def map(self, state):
339 |         """Map element of state space to action space."""
340 |         pass
341 | 
342 |     @property
343 |     @abstractmethod
344 |     def parameters(self):
345 |         """Access current parameters."""
346 |         pass
347 | 
348 |     @parameters.setter
349 |     @abstractmethod
350 |     def parameters(self, par):
351 |         pass
352 | 
353 |     @property
354 |     @abstractmethod
355 |     def parameter_space(self):
356 |         """Return parameter space."""
357 | 
358 | 
359 | @add_metaclass(ABCMeta)
360 | class ProbPolicy(Policy):
361 |     """Probabilistic policy interface."""
362 | 
363 |     @abstractmethod
364 |     def grad_log_prob(self, state, action):
365 |         """Return the :math:log(grad p(action | state)):math:."""
366 |         pass
367 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/_quadrocopter/quadrotor_dynamics.py:
--------------------------------------------------------------------------------
  1 | """Quadrotor Dynamics."""
  2 | 
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | from __future__ import absolute_import
  6 | 
  7 | import numpy as np
  8 | 
  9 | from .quadrocopter_classes import State, Parameters
 10 | 
 11 | __all__ = ['QuadrotorDynamics', 'wind_creator', 'random_disturbance_creator']
 12 | 
 13 | 
 14 | class QuadrotorDynamics(object):
 15 |     """Implement the quadrotor dynamics and states (independent of gazebo).
 16 | 
 17 |     Attributes
 18 |     ----------
 19 |     pos: 3d array
 20 |         Initial position of quadrotor
 21 |     vel: 3d array
 22 |         Initial velocity of quadrotor
 23 |     acc: 3d array
 24 |         Initial acceleration of quadrotor
 25 |     R: 3x3 array
 26 |         Initial rotation matrix
 27 |     external_forces: list
 28 |         a list of callables that take the state as input and return forces on
 29 |         the quadrotor in global coordinates.
 30 | 
 31 |     Notes
 32 |     -----
 33 |     There seems to be an instability where the acceleration overflows and then
 34 |     causes issues in the controller.
 35 |     """
 36 | 
 37 |     def __init__(self, pos=None, vel=None, acc=None, R=None,
 38 |                  external_forces=None):
 39 |         """Initialize quadrocopter dynamics.
 40 | 
 41 |         Parameters
 42 |         ----------
 43 |         pos: 3d array
 44 |             Initial position of quadrotor
 45 |         vel: 3d array
 46 |             Initial velocity of quadrotor
 47 |         acc: 3d array
 48 |             Initial acceleration of quadrotor
 49 |         R: 3x3 array
 50 |             Initial rotation matrix
 51 |         external_forces: list
 52 |             a list of callables that take the state as input and return forces
 53 |             on the quadrotor in global coordinates.
 54 |         """
 55 |         self.state = State()
 56 |         self.params = Parameters()
 57 | 
 58 |         if external_forces is None:
 59 |             self.external_forces = ()
 60 |         else:
 61 |             self.external_forces = external_forces
 62 | 
 63 |         if pos is not None:
 64 |             self.state.pos = pos.copy()
 65 |         if vel is not None:
 66 |             self.state.vel = vel.copy()
 67 |         if acc is not None:
 68 |             self.state.acc = acc.copy()
 69 |         if R is not None:
 70 |             self.state.R = R.copy()
 71 | 
 72 |     def dynamics_derivative(self, pitch, roll, z_vel, yaw_vel):
 73 |         """Return the state derivatives for the current state and input."""
 74 |         rates = self._inputs_to_desired_rates(pitch, roll, z_vel, yaw_vel)
 75 | 
 76 |         forces = self._determine_forces(*rates)
 77 | 
 78 |         return self._forces_to_derivatives(forces)
 79 | 
 80 |     def update_position(self, inputs):
 81 |         """Compute the derivatives and integrate them based on inputs."""
 82 |         pitch, roll, z_vel, yaw_vel = inputs
 83 |         derivatives = self.dynamics_derivative(pitch, roll, z_vel, yaw_vel)
 84 |         self._integrate_derivatives(derivatives,
 85 |                                     self.params.inner_loop_cycle * 1e-6)
 86 | 
 87 |     def _inputs_to_desired_rates(self, pitch, roll, z_vel, yaw_vel):
 88 |         """Convert inputs to desired angular rates and thrust."""
 89 |         # Current roll, and yaw angles
 90 |         roll_cur, _, yaw_cur = self.state.rpy
 91 | 
 92 |         # r_des is simply the commanded yaw rate
 93 |         r_des = yaw_vel
 94 | 
 95 |         # calculate the commanded acceleration in the z direction,
 96 |         # (z_dot_des - z_dot) / tau_z
 97 |         z_ddot_des = (z_vel - self.state.vel[2]) / self.params.tau_Iz
 98 | 
 99 |         # And from this we may find the commanded thrust, (g + z_ddot_cmd)/R33
100 |         c_des = (self.params.g + z_ddot_des) / self.state.R[2, 2]
101 | 
102 |         # Calculate the commanded yaw angle from:
103 |         yaw_des = yaw_vel * self.params.tau_Iyaw + yaw_cur
104 | 
105 |         # R13_des = sin(yaw_des) * sin(roll_cmd)
106 |         #         + cos(yaw_des) * cos(roll_cmd) * sin(pitch_cmd)
107 |         r_13_des = (np.sin(yaw_des) * np.sin(roll) +
108 |                     np.cos(yaw_des) * np.cos(roll) * np.sin(pitch))
109 | 
110 |         # R23_des = cos(roll_cmd) * sin(yaw_des) * sin(pitch_cmd)
111 |         #         - cos(yaw_des) * sin(roll_cmd)
112 |         r_23_des = (np.cos(roll) * np.sin(yaw_des) * np.sin(pitch) -
113 |                     np.cos(yaw_des) * np.sin(roll))
114 | 
115 |         # p_des = (R21*(R13_des-R13) - R11*(R23_des-R23))/(R33*tau_rp)
116 |         p_des = (self.state.R[1, 0] * (r_13_des - self.state.R[0, 2]) -
117 |                  self.state.R[0, 0] * (r_23_des - self.state.R[1, 2]))
118 |         p_des /= self.state.R[2, 2] * self.params.tau_rp
119 | 
120 |         # q_des = (R22*(R13_des-R13) - R12*(R23_des-R23))/(R33*tau_rp)
121 |         q_des = (self.state.R[1, 1] * (r_13_des - self.state.R[0, 2]) -
122 |                  self.state.R[0, 1] * (r_23_des - self.state.R[1, 2]))
123 |         q_des /= self.state.R[2, 2] * self.params.tau_rp
124 | 
125 |         # Return everything!
126 |         return p_des, q_des, r_des, c_des
127 | 
128 |     def _determine_forces(self, p_des, q_des, r_des, c_des):
129 |         """Convert desired angular rates and thrust to rotor forces."""
130 |         L = self.params.L
131 |         K = self.params.K
132 |         m = self.params.m
133 | 
134 |         a = np.array(((0, L, 0, -L),
135 |                       (-L, 0, L, 0),
136 |                       (K, -K, K, -K),
137 |                       (1 / m, 1 / m, 1 / m, 1 / m)),
138 |                      dtype=np.float64)
139 | 
140 |         # The inertial matrix
141 |         j = np.diag((self.params.Ix, self.params.Iy, self.params.Iz))
142 | 
143 |         # The current angular velocity vector
144 |         omega = self.state.omega
145 | 
146 |         # The rate vector (our approximation of omega_dot)
147 |         rate_vector = np.array(
148 |             (((1 / self.params.tau_p) * (p_des - self.state.omega[0])),
149 |              ((1 / self.params.tau_q) * (q_des - self.state.omega[1])),
150 |              ((1 / self.params.tau_r) * (r_des - self.state.omega[2])))).T
151 | 
152 |         b = j.dot(rate_vector) + np.cross(omega, j.dot(omega))
153 | 
154 |         # Add c_des to the bottom of the row vector
155 |         b = np.concatenate((b, [c_des]))
156 | 
157 |         # Return the four rotor forces
158 |         return np.linalg.solve(a, b)
159 | 
160 |     def _forces_to_derivatives(self, forces):
161 |         """Compute the state derivatives based on applied forces."""
162 |         # Update position
163 |         derivatives = State()
164 | 
165 |         derivatives.pos[:] = self.state.vel
166 | 
167 |         drag = self._compute_drag()
168 | 
169 |         # Update accelerations
170 |         derivatives.acc = np.sum(forces) * self.state.R[:, 2] - drag
171 | 
172 |         # Add external forces
173 |         for force in self.external_forces:
174 |             derivatives.acc += force(self.state)
175 | 
176 |         # Normalize with mass and add gravity
177 |         derivatives.acc /= self.params.m
178 |         derivatives.acc[2] -= self.params.g
179 | 
180 |         # Update velocities
181 |         derivatives.vel[:] = self.state.acc
182 | 
183 |         p, q, r = self.state.omega
184 |         derivatives.R = self.state.R.dot(np.array([[0, -r, q],
185 |                                                    [r, 0, -p],
186 |                                                    [-q, p, 0]]))
187 | 
188 |         # Angular velocity changes
189 |         f1, f2, f3, f4 = forces
190 | 
191 |         # p' = (1/Ix)*(L*(f2-f4) + (Iy-Iz)*r*q)
192 |         p_dot = (self.params.L * (f2 - f4) +
193 |                  (self.params.Iy - self.params.Iz) *
194 |                  self.state.omega[2] * self.state.omega[1]) / self.params.Ix
195 | 
196 |         # q' = (1/Iy)*(L*(f3-f1) + (Iz-Ix)*r*p)
197 |         q_dot = (self.params.L * (f3 - f1) +
198 |                  (self.params.Iz - self.params.Ix) *
199 |                  self.state.omega[2] * self.state.omega[0]) / self.params.Iy
200 | 
201 |         # r' = (1/Iz)*(K*(f1-f2+f3-f4) + (Ix-Iy)*p*q)
202 |         r_dot = (self.params.K * (f1 - f2 + f3 - f4) +
203 |                  (self.params.Ix - self.params.Iy) *
204 |                  self.state.omega[0] * self.state.omega[1]) / self.params.Iz
205 | 
206 |         derivatives.omega = np.array([p_dot, q_dot, r_dot])
207 | 
208 |         return derivatives
209 | 
210 |     def _integrate_derivatives(self, derivatives, dt):
211 |         """Simple euler integration to determine new states."""
212 |         self.state.pos += dt * derivatives.pos
213 |         self.state.vel += dt * derivatives.vel
214 |         self.state.acc[:] = derivatives.acc
215 | 
216 |         self.state.R += dt * derivatives.R
217 |         self.state.omega += dt * derivatives.omega
218 | 
219 |     def _compute_drag(self):
220 |         """Compute velocities and applies linear drag model.
221 | 
222 |         Inverts the current rotation matrix and solves for the components of
223 |         quadrocopter velocities in the body coordinates. Then a simple linear
224 |         drag model equation is applied. This is done because the quadrocopter
225 |         platform areas don't change in this refernce frame. The drag forces are
226 |         returned in global coordinates.
227 |         """
228 |         v_b = np.linalg.solve(self.state.R, self.state.vel)
229 | 
230 |         drag_model = np.array((self.params.CD_bx,
231 |                                self.params.CD_by,
232 |                                self.params.CD_bz)) * v_b
233 | 
234 |         return self.state.R.dot(drag_model)
235 | 
236 | 
237 | def wind_creator(direction, strength):
238 |     """
239 |     Return callable that computes the wind force on the quadrotor.
240 | 
241 |     Parameters:
242 |     direction: 3d-array
243 |         Direction vector for the wind.
244 |     strength: float
245 |         Strength of the wind in N / m^2
246 |     """
247 |     direction = np.asarray(direction, dtype=np.float).squeeze()
248 |     direction /= np.linalg.norm(direction)
249 | 
250 |     quadrotor_length = 0.3
251 |     quadrotor_height = 0.05
252 | 
253 |     norm_area = np.array((quadrotor_length * quadrotor_height,
254 |                           quadrotor_length * quadrotor_height,
255 |                           quadrotor_length ** 2))
256 | 
257 |     def wind_force(state):
258 |         """Return wind force.
259 | 
260 |         Homogeneous wind, this does not create any torques.
261 | 
262 |         Parameters
263 |         ----------
264 |         state :
265 |         """
266 |         # Project surface areas into the wind direction
267 |         area = np.abs(direction.dot(state.R)) * norm_area
268 |         force = np.sum(area) * strength * direction
269 |         return force
270 | 
271 |     return wind_force
272 | 
273 | 
274 | def random_disturbance_creator(covariance, mean=None):
275 |     """Add gaussian disturbance forces with a certain covariance function.
276 | 
277 |     Parameters
278 |     ----------
279 |     covariance: np.array
280 |         A 3x3 array of the covariance matrix
281 |     mean: np.array
282 |         A 1d array of the 3 mean values (defaults to zero-mean)
283 | 
284 |     Returns
285 |     -------
286 |     disturbance: callable
287 |         A function that can be used as an external force in quadsim
288 |     """
289 |     if mean is None:
290 |         mean = np.zeros((3,))
291 | 
292 |     def random_force(state):
293 |         """Return wind force.
294 | 
295 |         Parameters
296 |         ----------
297 |         state: State
298 | 
299 |         Returns
300 |         -------
301 |         force: np.array
302 |         """
303 |         return np.random.multivariate_normal(mean, covariance)
304 | 
305 |     return random_force
306 | 


--------------------------------------------------------------------------------
/SafeRLBench/envs/quadrocopter.py:
--------------------------------------------------------------------------------
  1 | """Quadrocopter environment wrapper."""
  2 | 
  3 | from __future__ import division, print_function, absolute_import
  4 | 
  5 | from SafeRLBench import EnvironmentBase
  6 | from SafeRLBench.spaces import RdSpace
  7 | 
  8 | from ._quadrocopter import QuadrotorDynamics
  9 | from ._quadrocopter import StateVector
 10 | 
 11 | from functools import partial
 12 | 
 13 | from six import string_types
 14 | 
 15 | import numpy as np
 16 | from numpy import array
 17 | from numpy import pi, cos, sin
 18 | from numpy.linalg import norm
 19 | 
 20 | import logging
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | # Available reference functions.
 25 | REFERENCE_TYPES = ['circle', 'stationary', 'oscillate']
 26 | 
 27 | 
 28 | class Quadrocopter(EnvironmentBase):
 29 |     """Quadrocopter simulation.
 30 | 
 31 |     Attributes
 32 |     ----------
 33 |     horizon : int
 34 |         Number of iterations for the main simulation
 35 |     pre_sim_horizon : int
 36 |         Number of iterations for the pre-simulation.
 37 |     _model : model object
 38 |         Object simulating the quadrotor dynamics.
 39 |     """
 40 | 
 41 |     def __init__(self,
 42 |                  init_pos=None, init_vel=None, num_sec=9,
 43 |                  num_init_sec=4, ref='circle', period=1 / 70.,
 44 |                  seed=None):
 45 |         """Quadrocopter initialization.
 46 | 
 47 |         Parameters
 48 |         ----------
 49 |         init_pos : array-like
 50 |             Initial position of the quadrocopter. Default: None ; which will
 51 |             set init_pos to [1, 0, 0].
 52 |         init_vel : array-like
 53 |             Initial velocity of the quadrocopter. Default: None ; which will
 54 |             set init_vel to [0, pi / 2, 0]
 55 |         num_sec : integer
 56 |         num_init_sec : integer
 57 |         ref : string or reference object
 58 |             Name of the reference. Currently supported are 'circle',
 59 |             'stationary' or 'oscillate'.
 60 |         period : float
 61 |         seed : int
 62 |         """
 63 |         # spaces
 64 |         self.state_space = RdSpace((22,))
 65 |         self.action_space = RdSpace((4,))
 66 | 
 67 |         # seed
 68 |         if seed is not None:
 69 |             np.random.seed = seed
 70 |             self._seed = seed
 71 | 
 72 |         # initial position
 73 |         if init_pos is None:
 74 |             init_pos = array([cos(0), sin(0), 0.])
 75 | 
 76 |         if len(init_pos) != 3:
 77 |             raise ValueError("init_pos with invalid length %d.", init_pos)
 78 | 
 79 |         # initial velocity
 80 |         if init_vel is None:
 81 |             init_vel = array([-pi / 2. * sin(0), pi / 2. * cos(0), 0.])
 82 | 
 83 |         if len(init_vel) != 3:
 84 |             raise ValueError("init_vel with invalid length %d.", init_vel)
 85 | 
 86 |         # initialize model
 87 |         self._model = QuadrotorDynamics(init_pos, init_vel)
 88 | 
 89 |         if isinstance(ref, string_types):
 90 |             self.reference = Reference(ref, period)
 91 |         else:
 92 |             self.reference = ref
 93 |             self.period = ref.period
 94 | 
 95 |         self.reference.reset(self.state)
 96 | 
 97 |         self.horizon = int(1. / period) * num_sec
 98 |         self.pre_sim_horizon = int(1. / period) * num_init_sec
 99 | 
100 |         self.period = self.reference.period
101 | 
102 |         self._init_pos = init_pos
103 |         self._init_vel = init_vel
104 | 
105 |         self._trajectory = np.atleast_2d(np.zeros(3))
106 |         self._time = []
107 |         self._step = 0
108 | 
109 |     def _update(self, action):
110 |         assert self.action_space.contains(action), "Invalid action."
111 | 
112 |         self._model.update_position(action)
113 | 
114 |         self._step += 1
115 |         time = self._step * self.period
116 | 
117 |         self._time.append(time)
118 |         self._trajectory = np.vstack((self._trajectory, self.state.pos))
119 | 
120 |         reward = self._reward()
121 |         self.reference.update(self.state, time)
122 | 
123 |         return action, self.state.copy(), reward
124 | 
125 |     def _reset(self):
126 |         self._model = QuadrotorDynamics(self._init_pos, self._init_vel)
127 |         self.reference.reset(self.state)
128 |         self._trajectory = np.atleast_2d(np.zeros(3))
129 |         self._time = []
130 |         self._step = 0
131 | 
132 |     def _rollout(self, policy):
133 |         if hasattr(policy, 'reference'):
134 |             policy.reference = self.reference
135 |         self.reset()
136 |         trace = []
137 |         for n in range(self.horizon):
138 |             action = policy(self.state)
139 |             trace.append(self.update(action))
140 |         return trace
141 | 
142 |     def _reward(self):
143 |         state = self.state
144 |         ref = self.reference.reference
145 | 
146 |         reward = -norm(state.pos - ref.pos) - norm(state.vel - ref.vel)
147 | 
148 |         if np.isnan(reward):
149 |             reward = -1.79769313e+308
150 | 
151 |         return reward
152 | 
153 |     @property
154 |     def seed(self):
155 |         """Seed."""
156 |         return self._seed
157 | 
158 |     @seed.setter
159 |     def seed(self, value):
160 |         np.random.seed(value)
161 |         self._seed = value
162 | 
163 |     @property
164 |     def state(self):
165 |         """Provide access to state_vector."""
166 |         # this whole state vector implementation is annoyingly inefficient.
167 |         return self._model.state.state_vector
168 | 
169 |     @state.setter
170 |     def state(self, state):
171 |         self._model.state.state_vector = state.view(StateVector)
172 | 
173 | 
174 | class Reference(object):
175 |     """Reference object for quadrocopter environment."""
176 | 
177 |     def __init__(self, name='circle', period=1. / 70, keep_record=True,
178 |                  **kwargs):
179 |         """Initialize Reference.
180 | 
181 |         Parameters
182 |         ----------
183 |         name : str
184 |             The name of the reference function.
185 |         period : float
186 |             The time step that the simulation takes every iteration.
187 |         keep_record : bool
188 |             Whether the history of the reference object should be saved.
189 |         **kwargs : dict
190 |         """
191 |         # name type checking.
192 |         if not isinstance(name, string_types):
193 |             raise ValueError('Invalid type for argument name.')
194 |         if name not in REFERENCE_TYPES:
195 |             raise ValueError(name + ' is not a valid reference.')
196 | 
197 |         self._name = name
198 |         self.period = period
199 |         self._iter = 0
200 |         self._reference_function = self._reference_chooser(**kwargs)
201 |         self._current_ref = None
202 |         self.keep_record = keep_record
203 |         if keep_record:
204 |             self._record = []
205 | 
206 |     @property
207 |     def name(self):
208 |         """Return the type of reference function."""
209 |         return self._name
210 | 
211 |     @name.setter
212 |     def name(self, value):
213 |         if value not in REFERENCE_TYPES:
214 |             raise ValueError(value + ' is not a valid reference.')
215 | 
216 |         self.reset()
217 | 
218 |         self._name = value
219 |         self._reference_function = self._ref_chooser()
220 | 
221 |     @property
222 |     def record(self):
223 |         """Return the reference record of the simulation."""
224 |         if self.keep_record:
225 |             return np.atleast_2d(self._record)
226 |         else:
227 |             logger.warning("Reference record has not been saved.")
228 | 
229 |     def reset(self, state=None):
230 |         """Reset internal state."""
231 |         self._iter = 0
232 |         self._current_ref = self._reference_function(state, 0, False)
233 |         if self.keep_record:
234 |             self._record = []
235 | 
236 |     def update(self, state, time, finished=False):
237 |         """Compute the state of the reference object."""
238 |         ref = self._reference_function(state, time, finished)
239 |         self._iter += 1
240 | 
241 |         if self.keep_record:
242 |             ref_value = np.hstack((ref.pos, ref.vel, ref.euler, ref.omega_b))
243 |             self._update_record(ref_value)
244 | 
245 |         self._current_ref = ref
246 | 
247 |         return ref
248 | 
249 |     @property
250 |     def reference(self):
251 |         """Return the reference."""
252 |         return self._current_ref
253 | 
254 |     def _update_record(self, ref_value):
255 |         self._record.append(ref_value)
256 |         assert self._iter == len(self._record)
257 | 
258 |     def _reference_chooser(self, **kwargs):
259 |         # CIRCLE
260 |         if self._name == 'circle':
261 |             if kwargs.get('speed', False):
262 |                 speed = kwargs['speed']
263 |             else:
264 |                 speed = pi / 2.
265 |             if kwargs.get('initial_angle', False):
266 |                 init_angle = kwargs['initial_angle']
267 |             else:
268 |                 init_angle = 0.
269 |             if kwargs.get('radius', False):
270 |                 radius = kwargs['radius']
271 |             else:
272 |                 radius = 1.
273 |             if kwargs.get('z_vel', False):
274 |                 z_vel = kwargs['z_vel']
275 |             else:
276 |                 z_vel = 0.
277 |             return partial(_circle_reference,
278 |                            speed=speed,
279 |                            init_angle=init_angle,
280 |                            radius=radius,
281 |                            z_vel=z_vel)
282 |         # STATIONARY
283 |         elif self._name == 'stationary':
284 |             if kwargs.get('position', False):
285 |                 position = kwargs['position']
286 |             else:
287 |                 position = [1., 0., 0.]
288 |             return partial(_stationary_reference,
289 |                            position=position)
290 |         # OSCILLATE
291 |         elif self._name == 'oscillate':
292 |             if kwargs.get('x_velocity', False):
293 |                 x_vel = kwargs['x_velocity']
294 |             else:
295 |                 x_vel = 0.5
296 |             if kwargs.get('omega', False):
297 |                 omega = kwargs['omega']
298 |             else:
299 |                 omega = 1.
300 |             if kwargs.get('radius', False):
301 |                 radius = kwargs['radius']
302 |             else:
303 |                 radius = 0.5
304 |             return partial(_oscillate_reference,
305 |                            x_vel=x_vel,
306 |                            omega=omega,
307 |                            radius=radius)
308 | 
309 | 
310 | # private circle reference function
311 | def _circle_reference(state,
312 |                       time,
313 |                       finished,
314 |                       radius=None,
315 |                       speed=None,
316 |                       init_angle=None,
317 |                       z_vel=None):
318 |     ref = StateVector()
319 |     angle = init_angle + speed / radius * time
320 | 
321 |     ref.pos = array([radius * cos(angle),
322 |                      radius * sin(angle),
323 |                      z_vel * time])
324 |     ref.vel[:] = [-speed * sin(angle), speed * cos(angle), z_vel]
325 |     ref.euler[2] = pi + np.arctan2(state.pos[1], state.pos[0])
326 |     # reference.omega_b[2] = speed / radius
327 |     return ref
328 | 
329 | 
330 | # private stationary reference function
331 | def _stationary_reference(state,
332 |                           time,
333 |                           finished,
334 |                           position=None):
335 |     ref = StateVector()
336 |     ref.pos[0] = position[0]
337 |     ref.pos[1] = position[1]
338 |     ref.pos[2] = position[2]
339 |     return ref
340 | 
341 | 
342 | # private oscillation reference function
343 | def _oscillate_reference(state,
344 |                          time,
345 |                          finished,
346 |                          x_vel=None,
347 |                          omega=None,
348 |                          radius=None):
349 |     ref = StateVector()
350 |     angle = omega * time
351 |     ref.pos[0] = x_vel * time
352 |     ref.pos[1] = radius * sin(angle)
353 |     ref.pos[2] = 0.
354 |     ref.vel[0] = x_vel
355 |     ref.vel[1] = radius * omega * cos(angle)
356 |     ref.vel[2] = 0.
357 |     return ref
358 | 


--------------------------------------------------------------------------------
/SafeRLBench/monitor.py:
--------------------------------------------------------------------------------
  1 | """Monitoring implementations."""
  2 | 
  3 | import logging
  4 | import time
  5 | 
  6 | from SafeRLBench import config
  7 | 
  8 | from contextlib import contextmanager
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | __all__ = ('EnvMonitor', 'AlgoMonitor')
 13 | 
 14 | 
 15 | class EnvMonitor(object):
 16 |     """
 17 |     Environment Monitor, providing tracking for environments.
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     monitor :
 22 |         This is the container where monitoring data will be stored.
 23 | 
 24 |     Methods
 25 |     -------
 26 |     monitor_update()
 27 |         Context manager for monitoring environment updates. It should be used
 28 |         when invoking the private ``_update`` implementation from the interface
 29 |         method.
 30 |     monitor_rollout()
 31 |         Context manager for monitoring environment rollout. It should be used
 32 |         when invoking the private ``_rollout`` implementation from the
 33 |         interface method.
 34 |     monitor_reset()
 35 |         Context manager for monitoring environment resets. It should be used
 36 |         when invoking the private ``_reset`` implementation from the interface
 37 |         method.
 38 |     """
 39 | 
 40 |     def __new__(cls, *args, **kwargs):
 41 |         """Create monitor in subclasses."""
 42 |         obj = object.__new__(cls)
 43 |         obj.monitor = EnvData()
 44 |         return obj
 45 | 
 46 |     @contextmanager
 47 |     def monitor_update(self):
 48 |         """Context monitoring update."""
 49 |         self._before_update()
 50 |         yield self
 51 |         self._after_update()
 52 | 
 53 |     @contextmanager
 54 |     def monitor_rollout(self):
 55 |         """Context monitoring rollout."""
 56 |         self._before_rollout()
 57 |         yield self
 58 |         self._after_rollout()
 59 | 
 60 |     @contextmanager
 61 |     def monitor_reset(self):
 62 |         """Context monitoring reset."""
 63 |         self._before_reset()
 64 |         yield self
 65 |         self._after_reset()
 66 | 
 67 |     def _before_update(self):
 68 |         """Monitor environment before update.
 69 | 
 70 |         Parameters
 71 |         ----------
 72 |         env :
 73 |             Environment instance to be monitored.
 74 |         """
 75 |         pass
 76 | 
 77 |     def _after_update(self):
 78 |         """Monitor environment after update.
 79 | 
 80 |         Parameters
 81 |         ----------
 82 |         env :
 83 |             Environment instance to be monitored.
 84 |         """
 85 |         pass
 86 | 
 87 |     def _before_rollout(self):
 88 |         """Monitor environment before rollout.
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         env :
 93 |             Environment instance to be monitored.
 94 |         """
 95 |         pass
 96 | 
 97 |     def _after_rollout(self):
 98 |         """
 99 |         Monitor environment after rollout.
100 | 
101 |         Parameters
102 |         ----------
103 |         env :
104 |             Environment instance to be monitored.
105 |         """
106 |         self.monitor.rollout_cnt += 1
107 | 
108 |     def _before_reset(self):
109 |         """Monitor environment before reset.
110 | 
111 |         Parameters
112 |         ----------
113 |         env :
114 |             Environment instance to be monitored.
115 |         """
116 |         pass
117 | 
118 |     def _after_reset(self):
119 |         """Monitor environment after reset.
120 | 
121 |         Parameters
122 |         ----------
123 |         env :
124 |             Environment instance to be monitored.
125 |         """
126 |         pass
127 | 
128 | 
129 | class AlgoMonitor(object):
130 |     """Algorithm monitor tracks algorithms' activity.
131 | 
132 |     This class is inherited by the `AlgorithmBase` class and will provide it
133 |     with tracking capabilities.
134 | 
135 |     Attributes
136 |     ----------
137 |     monitor :
138 |         This is the container where monitoring data will be stored.
139 |     grad :
140 |         The Alogrithm can set this field, to provide information about the
141 |         current gradient to the monitor.
142 |     has_policy :
143 |         In case the algorithm does not depend on a policy and does not need
144 |         any parameters, this can me set to False, to prevent issues with
145 |         tracking data that does not exist.
146 | 
147 |     Methods
148 |     -------
149 |     monitor_optimize()
150 |         Context manager for monitoring algorithm optimizations. It should be
151 |         used when invoking the private ``_optimize`` implementation from the
152 |         interface method.
153 |     monitor_initialize()
154 |         Context manager for monitoring algorithm initializations. It should be
155 |         used when invoking the private ``_initialize`` implementation from the
156 |         interface method.
157 |     monitor_step()
158 |         Context manager for monitoring algorithm step. It should be used when
159 |         invoking the private ``_step`` implementation from the interface
160 |         method.
161 |     """
162 | 
163 |     def __new__(cls, *args, **kwargs):
164 |         """Create monitor in subclasses."""
165 |         obj = object.__new__(cls)
166 |         obj.monitor = AlgoData()
167 |         obj.grad = None
168 |         obj.has_policy = True
169 |         return obj
170 | 
171 |     @contextmanager
172 |     def monitor_optimize(self):
173 |         """Context monitoring optimization."""
174 |         self._before_optimize()
175 |         yield self
176 |         self._after_optimize()
177 | 
178 |     @contextmanager
179 |     def monitor_initialize(self):
180 |         """Context monitoring initialize."""
181 |         yield self
182 |         if self.has_policy:
183 |             self.monitor.parameters.append(self.policy.parameters)
184 | 
185 |     @contextmanager
186 |     def monitor_step(self):
187 |         """Context monitoring stepping."""
188 |         self._before_step()
189 |         yield self
190 |         self._after_step()
191 | 
192 |     def _before_optimize(self):
193 |         """Set monitor up for optimization run.
194 | 
195 |         Parameters
196 |         ----------
197 |         alg :
198 |             the algorithm instance to be monitored
199 |         """
200 |         if config.monitor_verbosity > 0:
201 |             logger.info('Starting optimization of %s...', str(self))
202 | 
203 |         # reset monitor object in case of rerun
204 |         self.monitor.reset()
205 | 
206 |         # init monitor dict for algorithm
207 |         self.monitor.t = time.time()
208 | 
209 |         # init optimization time control
210 |         self.monitor.optimize_start = time.time()
211 | 
212 |     def _after_optimize(self):
213 |         """Catch data after optimization run."""
214 |         # retrieve time of optimization
215 |         optimize_end = time.time()
216 |         optimize_time = optimize_end - self.monitor.optimize_start
217 | 
218 |         if self.monitor.optimize_start == 0:
219 |             logger.warning('Time measure for optimize corrupted')
220 | 
221 |         self.monitor.optimize_start = 0
222 | 
223 |         self.monitor.optimize_time = optimize_time
224 | 
225 |         # if the gradient attribute has been set
226 |         if self.grad is not None:
227 |             logger.debug('Finished optimization after %d steps with grad %s.',
228 |                          self.monitor.step_cnt, str(self.grad))
229 |         else:
230 |             logger.debug('Finished optimization after %d steps.',
231 |                          self.monitor.step_cnt)
232 | 
233 |         if self.has_policy:
234 |             # independently compute traces after optimization is finished
235 |             if config.monitor_verbosity > 0:
236 |                 logger.info('Computing traces for %s run...', str(self))
237 | 
238 |             for parameters in self.monitor.parameters:
239 | 
240 |                 self.policy.parameters = parameters
241 | 
242 |                 # compute trace
243 |                 trace = self.environment._rollout(self.policy)
244 |                 self.monitor.traces.append(trace)
245 | 
246 |                 # compute total reward
247 |                 reward = sum([t[2] for t in trace])
248 |                 self.monitor.rewards.append(reward)
249 | 
250 |     def _before_step(self):
251 |         """Monitor algorithm before step.
252 | 
253 |         Parameters
254 |         ----------
255 |         alg :
256 |             Algorithm instance to be monitored.
257 |         """
258 |         # count the number of rollouts for each step
259 |         self.environment.monitor.rollout_cnt = 0
260 | 
261 |         if config.monitor_verbosity > 2:
262 |             logger.info('Computing step %d for %s...', self.monitor.step_cnt,
263 |                         str(self))
264 | 
265 |     def _after_step(self):
266 |         """Monitor algorithm after step.
267 | 
268 |         Parameters
269 |         ----------
270 |         alg :
271 |             Algorithm instance to be monitored.
272 |         """
273 |         emonitor = self.environment.monitor
274 | 
275 |         self.monitor.step_cnt += 1
276 | 
277 |         # store the number of rollouts
278 |         self.monitor.rollout_cnts.append(emonitor.rollout_cnt)
279 | 
280 |         # retrieve information from the policy
281 |         if self.has_policy:
282 |             # retrieve current parameters
283 |             parameters = self.policy.parameters
284 |             # store information
285 |             self.monitor.parameters.append(parameters)
286 | 
287 |         # log if wanted
288 |         self._step_log()
289 | 
290 |     def _step_log(self):
291 |         # print information if wanted
292 |         monitor = self.monitor
293 |         n = monitor.step_cnt
294 |         log = 0
295 | 
296 |         # check verbosity level
297 |         if config.monitor_verbosity > 0:
298 |             if monitor.step_cnt % 1000 == 0:
299 |                 log = 1000
300 | 
301 |         if config.monitor_verbosity > 1:
302 |             if monitor.step_cnt % 100 == 0:
303 |                 log = 100
304 | 
305 |         if config.monitor_verbosity > 2:
306 |             log = 1
307 | 
308 |         if log:
309 |             # generate time strings
310 |             now = time.time()
311 |             t = now - monitor.optimize_start
312 |             t_s = "{:.2f}".format(t)
313 |             avg_s = "{:.3f}".format(t / n)
314 | 
315 |             # generate log message
316 |             msg = 'Status for ' + self.__class__.__name__ + ' on '
317 |             msg += self.environment.__class__.__name__ + ':\n\n'
318 |             msg += '\tRun: %d\tTime: %s\t Avg: %s\n' % (n, t_s, avg_s)
319 |             if self.has_policy:
320 |                 # retrieve current state
321 |                 par_s = str(self.policy.parameters)
322 |                 msg += '\tParameter: \t%s\n' % (par_s)
323 | 
324 |             logger.info(msg)
325 | 
326 |     def _alg_reset(self):
327 |         """Reset the algorithm monitor."""
328 |         self.monitor.reset()
329 | 
330 | 
331 | class EnvData(object):
332 |     """Class to store environment tracking data.
333 | 
334 |     Attributes
335 |     ----------
336 |     rollout_cnt : Int
337 |         number of rollouts performed on environment.
338 |     """
339 | 
340 |     def __init__(self):
341 |         """Initialize attributes."""
342 |         self.rollout_cnt = 0
343 | 
344 | 
345 | class AlgoData(object):
346 |     """Class used to store algorithm tracking data.
347 | 
348 |     Attributes
349 |     ----------
350 |     optimize_start : Float
351 |         Start time of the optimization.
352 |     optimize_time : Float
353 |         Start time of intermediate runs.
354 |     step_cnt : Int
355 |         Number of steps performed since initialization.
356 |     rollout_cnts : List
357 |         Number of rollouts during one step.
358 |     parameters : List
359 |         List of parameters found during optimization.
360 |     traces : List
361 |         List of traces for parameters.
362 |     rewards : List
363 |         List of rewards for parameters.
364 |     """
365 | 
366 |     def __init__(self):
367 |         """Initialize attributes."""
368 |         self.reset()
369 | 
370 |     def reset(self):
371 |         """Reset monitor data."""
372 |         self.optimize_start = 0
373 |         self.optimize_time = 0
374 | 
375 |         self.step_cnt = 0
376 |         self.rollout_cnts = []
377 | 
378 |         self.parameters = []
379 |         self.traces = []
380 |         self.rewards = []
381 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/safeopt.py:
--------------------------------------------------------------------------------
  1 | """SafeOpt Wrapper."""
  2 | 
  3 | from SafeRLBench import AlgorithmBase
  4 | from SafeRLBench.error import add_dependency
  5 | 
  6 | from numpy import mean, array
  7 | 
  8 | try:
  9 |     import safeopt
 10 | except ModuleNotFoundError:
 11 |     safeopt = None
 12 | 
 13 | try:
 14 |     import GPy
 15 | except ModuleNotFoundError:
 16 |     GPy = None
 17 | 
 18 | import logging
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | __all__ = ('SafeOpt', 'SafeOptSwarm')
 23 | 
 24 | 
 25 | class _SafeOptWrap(AlgorithmBase):
 26 | 
 27 |     def __init__(self, opt, gp_opt_par, gp_par, environment, policy, max_it,
 28 |                  avg_reward, window):
 29 |         super(_SafeOptWrap, self).__init__(environment, policy, max_it)
 30 | 
 31 |         self._opt = opt
 32 | 
 33 |         self.gp_opt = None
 34 | 
 35 |         self.gp_opt_par = gp_opt_par
 36 |         self.gp_par = gp_par
 37 | 
 38 |         self.avg_reward = avg_reward
 39 |         self.window = window
 40 |         self.rewards = []
 41 | 
 42 |     def _initialize(self):
 43 |         logger.debug("Initializing Policy.")
 44 |         # check if policy is already initialized by the user
 45 |         if self.policy.initialized:
 46 |             logger.debug("Use pre-set policy parameters.")
 47 |             parameters = self.policy.parameters
 48 |         else:
 49 |             logger.debug("Draw parameters at random.")
 50 |             parameters = self.policy.parameter_space.sample()
 51 |             self.policy.parameters = parameters
 52 | 
 53 |         # Compute a rollout
 54 |         trace = self.environment.rollout(self.policy)
 55 |         reward = sum([t[2] for t in trace])
 56 | 
 57 |         # Initialize gaussian process with args:
 58 |         gp = []
 59 |         for pars in zip(*self.gp_par):
 60 |             gp.append(GPy.core.GP(array([parameters]), array([[reward]]),
 61 |                       *pars))
 62 | 
 63 |         # Initialize SafeOpt
 64 |         self.gp_opt = self._opt(gp, **self.gp_opt_par)
 65 | 
 66 |     def _step(self):
 67 |         parameters = self.gp_opt.optimize()
 68 |         self.policy.parameters = parameters
 69 | 
 70 |         trace = self.environment.rollout(self.policy)
 71 |         reward = sum([t[2] for t in trace])
 72 | 
 73 |         self.gp_opt.add_new_data_point(parameters, reward)
 74 |         self.rewards.append(reward)
 75 | 
 76 |     def _is_finished(self):
 77 |         if ((len(self.rewards) > self.window)
 78 |                 and mean(self.rewards[(len(self.rewards) - self.window):-1])
 79 |                 > self.avg_reward):
 80 |             return True
 81 |         else:
 82 |             return False
 83 | 
 84 | 
 85 | class SafeOpt(_SafeOptWrap):
 86 |     """Wrap SafeOpt algorithm.
 87 | 
 88 |     This class wraps the `SafeOpt` algorithm. It relies on the original
 89 |     implementation of `SafeOpt` which has to be installed before using this
 90 |     wrapper.
 91 | 
 92 |     Attributes
 93 |     ----------
 94 |     environment :
 95 |         Environment to be optimized.
 96 |     policy :
 97 |         Policy to be optimized.
 98 |     max_it :
 99 |         Maximal number of iterations before we abort.
100 |     avg_reward : integer
101 |         Average reward at which the optimization will be finished.
102 |     window : integer
103 |         Window for the average reward
104 |     gp : GPy Gaussian process
105 |         A Gaussian process which is initialized with safe, initial data points.
106 |         If a list of GPs then the first one is the value, while all the
107 |         other ones are safety constraints.
108 |     gp_opt : SafeOptSwarm object
109 |         Instance of `SafeOptSwarm` used for optimization.
110 |     gp_opt_par : dict
111 |         Dictionary of parameters to initialize `SafeOpt`.
112 |     """
113 | 
114 |     def __init__(self,
115 |                  environment, policy, max_it, avg_reward, window,
116 |                  kernel, likelihood, parameter_set, fmin,
117 |                  lipschitz=None, beta=3.0, num_contexts=0, threshold=0,
118 |                  scaling='auto', info=None):
119 |         """Initialize Attributes.
120 | 
121 |         Parameters
122 |         ----------
123 |         environment :
124 |             environmet to be optimized.
125 |         policy :
126 |             policy to be optimized.
127 |         max_it :
128 |             maximal number of iterations before we abort.
129 |         avg_reward : integer
130 |             average reward at which the optimization will be finished.
131 |         window : integer
132 |             window for the average reward
133 |         kernel : GPy kernel
134 |             Kernel used to initialize the gaussian process. If this is a list
135 |             multiple kernels will be initialized. The size of this argument
136 |             has to agree with the size of the likelihood.
137 |         likelihood : GPy likelihood
138 |             Likelihood used to initialize kernels. If this is a list, multiple
139 |             kernels will be initialized. The size of this argument has to
140 |             agree with the size of the likelihood.
141 |         parameter_set : 2d-array
142 |             List of parameters
143 |         fmin : list of floats
144 |             Safety threshold for the function value. If multiple safety
145 |             constraints are used this can also be a list of floats (the first
146 |             one is always the one for the values, can be set to None if not
147 |             wanted)
148 |         lipschitz : list of floats
149 |             The Lipschitz constant of the system, if None the GP confidence
150 |             intervals are used directly.
151 |         beta : float or callable
152 |             A constant or a function of the time step that scales the
153 |             confidence interval of the acquisition function.
154 |         threshold : float or list of floats
155 |             The algorithm will not try to expand any points that are below this
156 |             threshold. This makes the algorithm stop expanding points
157 |             eventually. If a list, this represents the stopping criterion for
158 |             all the gps. This ignores the scaling factor.
159 |         scaling : list of floats or "auto"
160 |             A list used to scale the GP uncertainties to compensate for
161 |             different input sizes. This should be set to the maximal variance
162 |             of each kernel. You should probably leave this to "auto" unless
163 |             your kernel is non-stationary.
164 |         info :
165 |             Dummy argument that can hold anything usable to identify the
166 |             configuration.
167 |         """
168 |         add_dependency(safeopt, 'SafeOpt')
169 |         add_dependency(GPy, 'GPy')
170 | 
171 |         # store the `SafeOpt` arguments.
172 |         gp_opt_par = {
173 |             'parameter_set': parameter_set,
174 |             'fmin': fmin,
175 |             'lipschitz': lipschitz,
176 |             'beta': beta,
177 |             'num_contexts': num_contexts,
178 |             'threshold': threshold,
179 |             'scaling': scaling}
180 | 
181 |         # store the kernel arguments
182 |         if not isinstance(kernel, list):
183 |             kernel = [kernel]
184 |         if not isinstance(likelihood, list):
185 |             likelihood = [likelihood]
186 |         assert len(likelihood) == len(kernel), (
187 |             'kernel and likelihood need to have same length (%d /= %d)'
188 |             % (len(likelihood), len(kernel)))
189 | 
190 |         gp_par = (kernel, likelihood)
191 | 
192 |         super(SafeOpt, self).__init__(safeopt.SafeOpt, gp_opt_par, gp_par,
193 |                                       environment, policy, max_it, avg_reward,
194 |                                       window)
195 | 
196 | 
197 | class SafeOptSwarm(_SafeOptWrap):
198 |     """Wrap SafeOpt algorithm.
199 | 
200 |     This class wraps the `SafeOptSwarm` algorithm. It relies on the original
201 |     implementation of `SafeOptSwarm` which is part of the `safeopt` package
202 |     and has to be installed before using this class.
203 | 
204 |     Attributes
205 |     ----------
206 |     environment :
207 |         Environment to be optimized.
208 |     policy :
209 |         Policy to be optimized.
210 |     max_it :
211 |         Maximal number of iterations before we abort.
212 |     avg_reward : integer
213 |         Average reward at which the optimization will be finished.
214 |     window : integer
215 |         Window for the average reward
216 |     gp : GPy Gaussian process
217 |         A Gaussian process which is initialized with safe, initial data points.
218 |         If a list of GPs then the first one is the value, while all the
219 |         other ones are safety constraints.
220 |     gp_opt : SafeOptSwarm object
221 |         Instance of SafeOptSwarm used for optimization.
222 |     gp_opt_par : list
223 |         List of parameters to initialize `SafeOpt`.
224 |     """
225 | 
226 |     def __init__(self,
227 |                  environment, policy, max_it, avg_reward, window,
228 |                  kernel, likelihood, fmin, bounds, beta=3.0, threshold=0,
229 |                  scaling='auto', swarm_size=20, info=None):
230 |         """Initialize Attributes.
231 | 
232 |         Parameters
233 |         ----------
234 |         environment :
235 |             Environment to be optimized.
236 |         policy :
237 |             policy to be optimized.
238 |         max_it :
239 |             maximal number of iterations before we abort.
240 |         avg_reward : integer
241 |             average reward at which the optimization will be finished.
242 |         window : integer
243 |             window for the average reward
244 |         kernel : GPy kernel
245 |             Kernel used to initialize the gaussian process. If this is a list
246 |             multiple kernels will be initialized. The size of this argument
247 |             has to agree with the size of the likelihood.
248 |         likelihood : GPy likelihood
249 |             Likelihood used to initialize kernels. If this is a list, multiple
250 |             kernels will be initialized. The size of this argument has to
251 |             agree with the size of the likelihood.
252 |         fmin : list of floats
253 |             Safety threshold for the function value. If multiple safety
254 |             constraints are used this can also be a list of floats (the first
255 |             one is always the one for the values, can be set to None if not
256 |             wanted)
257 |         bounds : pair of floats or list of pairs of floats
258 |             If a list is given, then each pair represents the lower/upper bound
259 |             in each dimension. Otherwise, we assume the same bounds for all
260 |             dimensions. This is mostly important for plotting or to restrict
261 |             particles to a certain domain.
262 |         beta : float or callable
263 |             A constant or a function of the time step that scales the
264 |             confidence interval of the acquisition function.
265 |         threshold : float or list of floats
266 |             The algorithm will not try to expand any points that are below this
267 |             threshold. This makes the algorithm stop expanding points
268 |             eventually. If a list, this represents the stopping criterion for
269 |             all the gps. This ignores the scaling factor.
270 |         scaling : list of floats or "auto"
271 |             A list used to scale the GP uncertainties to compensate for
272 |             different input sizes. This should be set to the maximal variance
273 |             of each kernel. You should probably set this to "auto" unless your
274 |             kernel is non-stationary
275 |         swarm_size : int
276 |             The number of particles in each of the optimization swarms
277 |         info :
278 |             Dummy argument that can hold anything usable to identify the
279 |             configuration.
280 |         """
281 |         add_dependency(safeopt, 'SafeOpt')
282 |         add_dependency(GPy, 'GPy')
283 | 
284 |         # store the `SafeOpt` arguments.
285 |         gp_opt_par = {
286 |             'fmin': fmin,
287 |             'bounds': bounds,
288 |             'beta': beta,
289 |             'threshold': threshold,
290 |             'scaling': scaling,
291 |             'swarm_size': swarm_size
292 |         }
293 | 
294 |         # store the kernel arguments
295 |         if not isinstance(kernel, list):
296 |             kernel = [kernel]
297 |         if not isinstance(likelihood, list):
298 |             likelihood = [likelihood]
299 |         assert len(likelihood) == len(kernel), (
300 |             'kernel and likelihood need to have same length (%d /= %d)'
301 |             % (len(likelihood), len(kernel)))
302 | 
303 |         gp_par = (kernel, likelihood)
304 | 
305 |         super(SafeOptSwarm, self).__init__(safeopt.SafeOptSwarm, gp_opt_par,
306 |                                            gp_par, environment, policy, max_it,
307 |                                            avg_reward, window)
308 | 


--------------------------------------------------------------------------------
/SafeRLBench/algo/policygradient.py:
--------------------------------------------------------------------------------
  1 | """Policy Gradient implementations."""
  2 | 
  3 | from SafeRLBench import AlgorithmBase
  4 | from SafeRLBench.spaces import BoundedSpace
  5 | 
  6 | import numpy as np
  7 | from numpy.linalg import solve, norm
  8 | 
  9 | from abc import ABCMeta, abstractmethod
 10 | from six import add_metaclass
 11 | 
 12 | import logging
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class PolicyGradient(AlgorithmBase):
 18 |     """Implementing many policy gradient methods.
 19 | 
 20 |     This uses standard gradient descent using different policy gradient
 21 |     estimators.
 22 | 
 23 |     Attributes
 24 |     ----------
 25 |     environment :
 26 |         Environment we want to optimize the policy on. This should be a
 27 |         subclass of `EnvironmentBase`.
 28 |     policy :
 29 |         Policy we want to find parameters for. This should be a subclass of
 30 |         `Policy`.
 31 |     estimator :
 32 |         Either an estimator object, that is a subclass of
 33 |         PolicyGradientEstimator or a string. A list of possible estimator
 34 |         strings can be found in the Notes section. By default 'reinforce' will
 35 |         be used
 36 |     eps : float
 37 |         The optimizer will stop optimization ones the norm of the gradient is
 38 |         smaller than `eps`.
 39 |     rate : float
 40 |         This is the rate we use for the updates in each step
 41 | 
 42 |     Notes
 43 |     -----
 44 |     These strings can be used to access the implemented estimators.
 45 | 
 46 |     +------------+---------------------------------+
 47 |     |'forward_fd'| Uses forward finite differences.|
 48 |     +------------+---------------------------------+
 49 |     |'central_fd'| Uses central finite differences.|
 50 |     +------------+---------------------------------+
 51 |     |'reinforce' | Classic reinforce estimator.    |
 52 |     +------------+---------------------------------+
 53 |     |'gpomdp'    | Uses GPOMDP estimator.          |
 54 |     +------------+---------------------------------+
 55 |     """
 56 | 
 57 |     def __init__(self,
 58 |                  environment, policy, estimator='reinforce',
 59 |                  max_it=1000, eps=0.0001, est_eps=0.001,
 60 |                  parameter_space=BoundedSpace(0, 1, (3,)),
 61 |                  rate=1, var=0.5):
 62 |         """Initialize PolicyGradient.
 63 | 
 64 |         Parameters
 65 |         ----------
 66 |         environment :
 67 |             Environment we want to optimize the policy on. This should be a
 68 |             subclass of `EnvironmentBase`.
 69 |         policy :
 70 |             Policy we want to find parameters for. This should be a subclass of
 71 |             `Policy`.
 72 |         estimator :
 73 |             Either an estimator object, that is a subclass of
 74 |             PolicyGradientEstimator or a string. A list of possible estimator
 75 |             strings can be found in the Notes section. By default 'reinforce'
 76 |             will be used
 77 |         eps : float
 78 |             The optimizer will stop optimization ones the norm of the gradient
 79 |             is smaller than `eps`.
 80 |         est_eps : float
 81 |             In case an estimator needs to converge, this is the margin it will
 82 |             use to stop.
 83 |         parameter_space :
 84 |         rate : float
 85 |             This is the rate we use for the updates in each step
 86 |         var : float
 87 |             This parameter will be used depending on the estimator type. e.g.
 88 |             for central differences this value corresponds to the grid size
 89 |             that is used.
 90 |         """
 91 |         super(PolicyGradient, self).__init__(environment, policy, max_it)
 92 | 
 93 |         self.parameter_space = policy.parameter_space
 94 | 
 95 |         self.eps = eps
 96 |         self.rate = rate
 97 | 
 98 |         if isinstance(estimator, str):
 99 |             estimator = estimators[estimator]
100 |         elif issubclass(estimator, PolicyGradientEstimator):
101 |             pass
102 |         else:
103 |             raise ImportError('Invalid Estimator')
104 | 
105 |         self.estimator = estimator(environment, self.parameter_space, max_it,
106 |                                    est_eps, var)
107 | 
108 |     def _initialize(self):
109 |         logger.debug("Initializing Policy.")
110 |         # check if policy is already initialized by the user
111 |         if self.policy.initialized:
112 |             logger.debug("Use pre-set policy parameters.")
113 |             return self.policy.parameters
114 | 
115 |         # outerwise draw an element at random from the parameter space
116 |         parameter = self.parameter_space.sample()
117 | 
118 |         for _ in range(1000):
119 |             self.policy.parameters = parameter
120 |             grad = self.estimator(self.policy)
121 | 
122 |             if (norm(grad) >= 1000 * self.eps):
123 |                 return parameter
124 | 
125 |             parameter = self.parameter_space.sample()
126 | 
127 |         logger.error('Unable to find non-zero gradient.')
128 | 
129 |     def _step(self):
130 |         grad = self.estimator(self.policy)
131 | 
132 |         parameter = self.policy.parameters
133 | 
134 |         self.policy.parameters = parameter + self.rate * grad
135 | 
136 |         self.grad = grad
137 | 
138 |     def _is_finished(self):
139 |         done = False
140 |         if np.isnan(self.grad).any():
141 |             done = True
142 |             logger.warning('Abort Optimization. Gradient contained not NaN')
143 |         done = done or (norm(self.grad) < self.eps)
144 |         return done
145 | 
146 | 
147 | @add_metaclass(ABCMeta)
148 | class PolicyGradientEstimator(object):
149 |     """Interface for Gradient Estimators."""
150 | 
151 |     name = 'Policy Gradient'
152 | 
153 |     def __init__(self, environment, parameter_space, max_it=200, eps=0.001):
154 |         """Initialize."""
155 |         self.environment = environment
156 |         self.state_dim = environment.state.shape[0]
157 |         self.par_dim = parameter_space.dimension
158 | 
159 |         self.eps = eps
160 |         self.max_it = max_it
161 | 
162 |     def __repr__(self):
163 |         return self.__class__.__name__
164 | 
165 |     def __call__(self, policy):
166 |         """Invoke _estimate_gradient(policy)."""
167 |         return self._estimate_gradient(policy)
168 | 
169 |     @abstractmethod
170 |     def _estimate_gradient(self, policy):
171 |         pass
172 | 
173 | 
174 | class ForwardFDEstimator(PolicyGradientEstimator):
175 |     """Forward Finite Differences Gradient Estimator."""
176 | 
177 |     name = 'Forward Finite Differences'
178 | 
179 |     def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)),
180 |                  max_it=200, eps=0.001, var=1):
181 |         """Initialize."""
182 |         super(ForwardFDEstimator, self).__init__(environment, parameter_space,
183 |                                                  max_it, eps)
184 |         self.var = var
185 | 
186 |     def _estimate_gradient(self, policy):
187 |         env = self.environment
188 |         var = self.var
189 |         # store current policy parameter
190 |         parameter = policy.parameters
191 |         par_dim = policy.parameter_space.dimension
192 | 
193 |         # using forward differences
194 |         trace = env.rollout(policy)
195 |         j_ref = sum([x[2] for x in trace]) / len(trace)
196 | 
197 |         dj = np.zeros((2 * par_dim))
198 |         dv = np.append(np.eye(par_dim), -np.eye(par_dim), axis=0)
199 |         dv *= var
200 | 
201 |         for n in range(par_dim):
202 |             variation = dv[n]
203 | 
204 |             policy.parameters = parameter + variation
205 |             trace_n = env.rollout(policy)
206 | 
207 |             jn = sum([x[2] for x in trace]) / len(trace_n)
208 | 
209 |             dj[n] = j_ref - jn
210 | 
211 |         grad = solve(dv.T.dot(dv), dv.T.dot(dj))
212 | 
213 |         # reset current policy parameter
214 |         policy.parameters = parameter
215 | 
216 |         return grad
217 | 
218 | 
219 | class CentralFDEstimator(PolicyGradientEstimator):
220 |     """Central Finite Differences Gradient Estimator."""
221 | 
222 |     name = 'Central Finite Differences'
223 | 
224 |     def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)),
225 |                  max_it=200, eps=0.001, var=1):
226 |         """Initialize."""
227 |         super(CentralFDEstimator, self).__init__(environment, parameter_space,
228 |                                                  max_it, eps)
229 |         self.var = var
230 | 
231 |     def _estimate_gradient(self, policy):
232 |         env = self.environment
233 | 
234 |         parameter = policy.parameters
235 |         par_dim = policy.parameter_space.dimension
236 | 
237 |         dj = np.zeros((par_dim,))
238 |         dv = np.eye(par_dim) * self.var / 2
239 | 
240 |         for n in range(par_dim):
241 |             variation = dv[n]
242 | 
243 |             policy.parameters = parameter + variation
244 |             trace_n = env.rollout(policy)
245 | 
246 |             policy.parameters = parameter - variation
247 |             trace_n_ref = env.rollout(policy)
248 | 
249 |             jn = sum([x[2] for x in trace_n]) / len(trace_n)
250 |             jn_ref = sum([x[2] for x in trace_n_ref]) / len(trace_n_ref)
251 | 
252 |             dj[n] = jn - jn_ref
253 | 
254 |         grad = solve(dv.T.dot(dv), dv.T.dot(dj))
255 |         policy.parameters = parameter
256 | 
257 |         return grad
258 | 
259 | 
260 | class ReinforceEstimator(PolicyGradientEstimator):
261 |     """Reinforce Gradient Estimator."""
262 | 
263 |     name = 'Reinforce'
264 | 
265 |     def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)),
266 |                  max_it=200, eps=0.001, lam=0.5):
267 |         """Initialize."""
268 |         super(ReinforceEstimator, self).__init__(environment, parameter_space,
269 |                                                  max_it, eps)
270 |         self.lam = lam
271 | 
272 |     def _estimate_gradient(self, policy):
273 |         env = self.environment
274 |         par_shape = policy.parameters.shape
275 |         max_it = self.max_it
276 | 
277 |         b_div = np.zeros(par_shape)
278 |         b_nom = np.zeros(par_shape)
279 | 
280 |         grads = np.zeros(par_shape)
281 |         grad = np.zeros(par_shape)
282 | 
283 |         for n in range(max_it):
284 |             trace = env.rollout(policy)
285 | 
286 |             lam = self.lam
287 | 
288 |             actions = [x[0] for x in trace]
289 |             states = [x[1] for x in trace]
290 | 
291 |             rewards_sum = sum([x[2] * lam**k for k, x in enumerate(trace)])
292 | 
293 |             lg_sum = sum(list(map(policy.grad_log_prob, states, actions)))
294 | 
295 |             b_div_n = lg_sum**2
296 |             b_nom_n = b_div_n * rewards_sum
297 | 
298 |             b_div += b_div_n
299 |             b_nom += b_nom_n
300 | 
301 |             b = b_nom / b_div
302 |             grad_n = lg_sum * (rewards_sum - b)
303 | 
304 |             grads += grad_n
305 | 
306 |             grad_old = grad
307 |             grad = grads / (n + 1)
308 | 
309 |             if (n > 2 and norm(grad_old - grad) < self.eps):
310 |                 return grad
311 | 
312 |         logger.warning('ReinforceEstimator did not converge!'
313 |                        + 'You may want to raise max_it.')
314 |         return grad
315 | 
316 | 
317 | class GPOMDPEstimator(PolicyGradientEstimator):
318 |     """GPOMDP Gradient Estimator."""
319 | 
320 |     name = 'GPOMDP'
321 | 
322 |     def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)),
323 |                  max_it=200, eps=0.001, lam=0.5):
324 |         """Initialize."""
325 |         super(GPOMDPEstimator, self).__init__(environment, parameter_space,
326 |                                               max_it, eps)
327 |         self.lam = lam
328 | 
329 |     def _estimate_gradient(self, policy):
330 |         env = self.environment
331 |         h = env.horizon
332 |         shape = policy.parameters.shape
333 | 
334 |         b_nom = np.zeros((h, shape))
335 |         b_div = np.zeros((h, shape))
336 |         b = np.zeros((h, shape))
337 |         grad = np.zeros(shape)
338 | 
339 |         lam = self.lam
340 | 
341 |         for n in range(self.max_it):
342 |             trace = env.rollout(policy)
343 |             b_n = np.zeros((h, shape))
344 | 
345 |             for k, state in enumerate(trace):
346 |                 update = policy.grad_log_prob(state[1], state[0])
347 |                 for j in range(k + 1):
348 |                     b_n[j] += update
349 | 
350 |             fac = n / (n + 1)
351 | 
352 |             b_n = b_n**2
353 |             b_div = fac * b_div + b_n / (n + 1)
354 | 
355 |             for k, state in enumerate(trace):
356 |                 b_nom[k] = fac * b_nom[k]
357 |                 b_nom[k] += b_n[k] * state[2] * lam**k / (n + 1)
358 | 
359 |             b = b_nom / b_div
360 | 
361 |             grad_update = np.zeros(shape)
362 |             update = np.zeros(shape)
363 |             for k, state in enumerate(trace):
364 |                 update += policy.grad_log_prob(state[1], state[0])
365 |                 grad_update += update * (-b[k] + state[2] * lam**k)
366 | 
367 |             if (n > 2 and norm(grad_update / (n + 1)) < self.eps):
368 |                 grad /= (n + 1)
369 |                 return grad
370 |             grad += np.nan_to_num(grad_update)
371 | 
372 |         logger.warning('GPOMDP did not converge! '
373 |                        + 'You may want to raise max_it.')
374 |         grad /= n + 1
375 |         return grad
376 | 
377 | 
378 | """Dictionary for resolving estimator strings."""
379 | estimators = {
380 |     'forward_fd': ForwardFDEstimator,
381 |     'central_fd': CentralFDEstimator,
382 |     'reinforce': ReinforceEstimator,
383 |     'gpomdp': GPOMDPEstimator
384 | }
385 | 


--------------------------------------------------------------------------------