├── requirements.txt ├── docs ├── algorithm.rst ├── environment.rst ├── toc.rst ├── api │ ├── srb.rst │ ├── misc.rst │ ├── bench.rst │ ├── measure.rst │ ├── spaces.rst │ ├── envs.rst │ ├── algo.rst │ └── policy.rst ├── index.rst ├── Makefile └── conf.py ├── .gitignore ├── .dockerignore ├── SafeRLBench ├── envs │ ├── _quadrocopter │ │ ├── __init__.py │ │ ├── quaternions.py │ │ ├── quadrocopter_classes.py │ │ └── quadrotor_dynamics.py │ ├── __init__.py │ ├── README.rst │ ├── gym_wrap.py │ ├── linear_car.py │ ├── test.py │ ├── mdp.py │ ├── general_mountaincar.py │ └── quadrocopter.py ├── spaces │ ├── __init__.py │ ├── rd_space.py │ ├── discrete_space.py │ ├── test.py │ └── bounded_space.py ├── policy │ ├── __init__.py │ ├── controller.py │ ├── test.py │ ├── linear_policy.py │ └── neural_network.py ├── algo │ ├── __init__.py │ ├── test.py │ ├── README.rst │ ├── q_learning.py │ ├── safeopt.py │ └── policygradient.py ├── __init__.py ├── test │ ├── test_measure.py │ ├── test_integration.py │ ├── test_configuration.py │ └── test_bench.py ├── error.py ├── measure.py ├── configuration.py ├── base.py └── monitor.py ├── requirements_dev.txt ├── tox.ini ├── .travis.yml ├── setup.py ├── LICENSE ├── misc ├── Dockerfile.python3 └── Dockerfile.python2 ├── test_code.sh ├── Makefile └── examples ├── GettingStarted.ipynb └── SafeOpt.ipynb /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.7 2 | scipy >= 0.19.0 3 | six >= 1.10 4 | futures >= 3.0.5 5 | -------------------------------------------------------------------------------- /docs/algorithm.rst: -------------------------------------------------------------------------------- 1 | Algorithms 2 | ========== 3 | 4 | .. include:: ../SafeRLBench/algo/README.rst 5 | -------------------------------------------------------------------------------- /docs/environment.rst: -------------------------------------------------------------------------------- 1 | Environments 2 | ============ 3 | 4 | .. include:: ../SafeRLBench/envs/README.rst 5 | -------------------------------------------------------------------------------- /docs/toc.rst: -------------------------------------------------------------------------------- 1 | Content 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | algorithm 8 | environment 9 | api/srb 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .ipynb_checkpoints 3 | .DS_Store 4 | .idea 5 | .coverage 6 | covhtml 7 | MANIFEST 8 | _build 9 | 10 | *.pyc 11 | -------------------------------------------------------------------------------- /docs/api/srb.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. toctree:: 5 | 6 | algo 7 | envs 8 | policy 9 | spaces 10 | measure 11 | bench 12 | misc 13 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | examples 2 | htmlcov 3 | .travis.yml 4 | .gitignore 5 | .git 6 | *.pyc 7 | .ipynb_checkpoints 8 | __pycache__ 9 | SafeRLBench.egg-info 10 | -------------------------------------------------------------------------------- /docs/api/misc.rst: -------------------------------------------------------------------------------- 1 | Miscellaneous 2 | ============= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Configuration 8 | ------------- 9 | 10 | .. autoclass:: SafeRLBench.SRBConfig 11 | :members: 12 | -------------------------------------------------------------------------------- /SafeRLBench/envs/_quadrocopter/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | from .quadrotor_dynamics import QuadrotorDynamics 4 | from .quadrocopter_classes import StateVector 5 | 6 | __all__ = ['QuadrotorDynamics', 'StateVector'] 7 | -------------------------------------------------------------------------------- /SafeRLBench/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | from .rd_space import RdSpace 4 | from .bounded_space import BoundedSpace 5 | from .discrete_space import DiscreteSpace 6 | 7 | __all__ = ['RdSpace', 'BoundedSpace', 'DiscreteSpace'] 8 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. SafeRLBench documentation master file, created by 2 | sphinx-quickstart on Mon Mar 27 16:08:01 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. include:: ../README.rst 7 | 8 | .. include:: toc.rst 9 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | gym >= 0.8.0 2 | tensorflow >= 1.0.0 3 | GPy >= 1.6.1 4 | 5 | # Style testing 6 | flake8 >= 3.3.0 7 | pep8 >= 1.7.0 8 | pep8-naming >= 0.4.1 9 | pydocstyle >= 1.1.1 10 | 11 | # Unittesting 12 | nose >= 1.3.7 13 | nose-exclude >= 0.5.0 14 | coverage >= 4.3.4 15 | unittest2 >= 1.1.0 16 | mock >= 2.0.0 17 | 18 | # Documentation 19 | sphinx >= 1.5.3 20 | -------------------------------------------------------------------------------- /docs/api/bench.rst: -------------------------------------------------------------------------------- 1 | Benchmark 2 | ========= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Bench 8 | ----- 9 | 10 | .. autoclass:: SafeRLBench.Bench 11 | :members: 12 | 13 | BenchConfig 14 | ----------- 15 | 16 | .. autoclass:: SafeRLBench.BenchConfig 17 | :members: 18 | 19 | BenchRun 20 | -------- 21 | 22 | .. autoclass:: SafeRLBench.bench.BenchRun 23 | :members: 24 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py35 3 | 4 | [testenv] 5 | deps = 6 | nose 7 | numpy 8 | theano 9 | mock 10 | unittest2 11 | commands = nosetests 12 | 13 | [flake8] 14 | ignore = E402,W503,D105,D413 15 | exclude = 16 | SafeRLBench/envs/_quadrocopter* 17 | 18 | [pydocstyle] 19 | add_ignore = D203,D105,D413 20 | match_dir = '[^\.\_].*' 21 | 22 | [coverage:run] 23 | omit = 24 | */_quadrocopter* 25 | -------------------------------------------------------------------------------- /SafeRLBench/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_policy import LinearPolicy, NoisyLinearPolicy 2 | from .linear_policy import DiscreteLinearPolicy 3 | from .neural_network import NeuralNetwork 4 | from .controller import NonLinearQuadrocopterController 5 | 6 | __all__ = [ 7 | 'LinearPolicy', 8 | 'NoisyLinearPolicy', 9 | 'DiscreteLinearPolicy', 10 | 'NeuralNetwork', 11 | 'NonLinearQuadrocopterController' 12 | ] 13 | -------------------------------------------------------------------------------- /docs/api/measure.rst: -------------------------------------------------------------------------------- 1 | Measure Module 2 | ============== 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Measure 8 | ------- 9 | 10 | .. autoclass:: SafeRLBench.measure.Measure 11 | :members: 12 | 13 | BestPerformance 14 | --------------- 15 | 16 | .. autoclass:: SafeRLBench.measure.BestPerformance 17 | :members: 18 | 19 | SafetyMeasure 20 | ------------- 21 | 22 | .. autoclass:: SafeRLBench.measure.SafetyMeasure 23 | :members: 24 | -------------------------------------------------------------------------------- /SafeRLBench/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .general_mountaincar import GeneralMountainCar 4 | from .linear_car import LinearCar 5 | from .gym_wrap import GymWrap 6 | from .quadrocopter import Quadrocopter 7 | from .mdp import MDP 8 | 9 | __all__ = [ 10 | 'GeneralMountainCar', 11 | 'LinearCar', 12 | 'GymWrap', 13 | 'Quadrocopter', 14 | 'MDP' 15 | ] 16 | 17 | # TODO: Envs: Add module docs in __init__ file. 18 | -------------------------------------------------------------------------------- /docs/api/spaces.rst: -------------------------------------------------------------------------------- 1 | Spaces Module 2 | ============= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Space 8 | ----- 9 | 10 | .. autoclass:: SafeRLBench.Space 11 | :members: 12 | 13 | BoundedSpace 14 | ------------ 15 | 16 | .. autoclass:: SafeRLBench.spaces.BoundedSpace 17 | :members: 18 | 19 | DiscreteSpace 20 | ------------- 21 | 22 | .. autoclass:: SafeRLBench.spaces.DiscreteSpace 23 | :members: 24 | 25 | RdSpace 26 | ------- 27 | 28 | .. autoclass:: SafeRLBench.spaces.RdSpace 29 | :members: 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = SafeRLBench 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/api/envs.rst: -------------------------------------------------------------------------------- 1 | Environment Module 2 | ================== 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | EnvironmentBase 8 | --------------- 9 | 10 | .. autoclass:: SafeRLBench.EnvironmentBase 11 | :members: 12 | 13 | GeneralMountainCar 14 | ------------------ 15 | 16 | .. autoclass:: SafeRLBench.envs.GeneralMountainCar 17 | :members: 18 | 19 | GymWrap 20 | ------- 21 | 22 | .. autoclass:: SafeRLBench.envs.GymWrap 23 | :members: 24 | 25 | LinearCar 26 | --------- 27 | 28 | .. autoclass:: SafeRLBench.envs.LinearCar 29 | :members: 30 | 31 | MDP 32 | --- 33 | 34 | .. autoclass:: SafeRLBench.envs.MDP 35 | :members: 36 | 37 | Quadrocopter 38 | ------------ 39 | 40 | .. autoclass:: SafeRLBench.envs.Quadrocopter 41 | :members: 42 | -------------------------------------------------------------------------------- /SafeRLBench/algo/__init__.py: -------------------------------------------------------------------------------- 1 | """Algorithm Module. 2 | 3 | =================== ========================================= 4 | Algorithm 5 | ============================================================= 6 | A3C Asynchronous Actor-Critic Agents 7 | PolicyGradient Different Policy Gradient Implementations 8 | DiscreteQLearning Q-Learning using a table 9 | SafeOpt Bayesian Optimization with SafeOpt 10 | SafeOptSwarm Bayesion Optimization with SafeOptSwarm 11 | =================== ========================================= 12 | """ 13 | 14 | from .policygradient import PolicyGradient 15 | from .safeopt import SafeOpt, SafeOptSwarm 16 | from .a3c import A3C 17 | from .q_learning import DiscreteQLearning 18 | 19 | __all__ = ['PolicyGradient', 'SafeOpt', 'A3C', 'DiscreteQLearning', 20 | 'SafeOptSwarm'] 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: required 4 | 5 | services: 6 | - docker 7 | 8 | env: 9 | - PYTHON=python2 10 | - PYTHON=python3 11 | 12 | # Setup docker container 13 | install: 14 | - docker build -f misc/Dockerfile.${PYTHON} -t test-image . 15 | - docker ps -a 16 | - ci_env=`bash <(curl -s https://codecov.io/env)` 17 | 18 | # Run tests 19 | script: 20 | - docker run test-image flake8 SafeRLBench --exclude "test*.py,__init__.py,_quadrocopter" --ignore=E402,W503 --show-source 21 | - docker run test-image flake8 SafeRLBench --filename="__init__.py,test*.py" --ignore=F,E402,W503 --show-source 22 | - docker run test-image pydocstyle SafeRLBench --match='(?!__init__).*\.py' 23 | - docker run $ci_env test-image /bin/bash -c "nosetests --with-doctest --with-coverage --cover-package=SafeRLBench --verbosity=2 SafeRLBench ; bash <(curl -s https://codecov.io/bash)" 24 | -------------------------------------------------------------------------------- /docs/api/algo.rst: -------------------------------------------------------------------------------- 1 | Algorithm Module 2 | ================ 3 | 4 | This module contains implementations of different algorithms. Please refer to 5 | the class documentation for detailed instructions on how to use them. 6 | 7 | .. contents:: Contents 8 | :local: 9 | 10 | AlgorithmBase 11 | ------------- 12 | 13 | .. autoclass:: SafeRLBench.AlgorithmBase 14 | :members: 15 | 16 | A3C 17 | --- 18 | 19 | .. autoclass:: SafeRLBench.algo.A3C 20 | :members: 21 | 22 | Policy Gradient 23 | --------------- 24 | 25 | .. autoclass:: SafeRLBench.algo.PolicyGradient 26 | :members: 27 | 28 | Q-Learning 29 | ---------- 30 | 31 | .. autoclass:: SafeRLBench.algo.DiscreteQLearning 32 | :members: 33 | 34 | SafeOpt 35 | ------- 36 | 37 | .. autoclass:: SafeRLBench.algo.SafeOpt 38 | :members: 39 | 40 | SafeOptSwarm 41 | ------------ 42 | 43 | .. autoclass:: SafeRLBench.algo.SafeOptSwarm 44 | :members: 45 | -------------------------------------------------------------------------------- /SafeRLBench/spaces/rd_space.py: -------------------------------------------------------------------------------- 1 | """R^d with any shape.""" 2 | import numpy as np 3 | from SafeRLBench import Space 4 | 5 | 6 | class RdSpace(Space): 7 | """R^d Vectorspace.""" 8 | 9 | def __init__(self, shape): 10 | """Initialize with shape.""" 11 | self.shape = shape 12 | self._dim = None 13 | 14 | def contains(self, x): 15 | """Check if element is contained.""" 16 | return isinstance(x, np.ndarray) and x.shape == self.shape 17 | 18 | def sample(self): 19 | """Return arbitrary element.""" 20 | return np.ones(self.shape) 21 | 22 | @property 23 | def dimension(self): 24 | """Return dimension of the space.""" 25 | if self._dim is None: 26 | d = 1 27 | for i in range(len(self.shape)): 28 | d *= self.shape[i] 29 | self._dim = d 30 | return self._dim 31 | 32 | def __repr__(self): 33 | return 'RdSpace(shape=%s)' % str(self.shape) 34 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='SafeRLBench', 5 | version='1.0.1', 6 | author='Nicolas Ochsner', 7 | author_email='ochsnern@student.ethz.ch', 8 | packages=[ 9 | 'SafeRLBench', 10 | 'SafeRLBench.algo', 11 | 'SafeRLBench.envs', 12 | 'SafeRLBench.spaces', 13 | 'SafeRLBench.policy', 14 | ], 15 | description='Safe Reinforcement Learning Benchmark', 16 | keywords='reinforcement-learning benchmark', 17 | url='https://github.com/befelix/Safe-RL-Benchmark', 18 | install_requires=[ 19 | 'numpy >= 1.7', 20 | 'scipy >= 0.19.0', 21 | 'six >= 1.10', 22 | 'futures >= 3.0.5;python_version<"3.2"' 23 | ], 24 | extras_require={ 25 | 'gym': ['gym >= 0.8.0'], 26 | 'safeopt': ['GPy >= 1.6.1', 'safeopt >= 0.1'], 27 | 'neural': ['tensorflow >= 1.0.0'], 28 | }, 29 | dependency_links=[ 30 | 'git+https://github.com/befelix/SafeOpt/tarball/master#egg=safeopt-0.1' 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /docs/api/policy.rst: -------------------------------------------------------------------------------- 1 | Policy Module 2 | ============= 3 | 4 | .. contents:: 5 | :local: 6 | 7 | Bases 8 | ----- 9 | 10 | Deterministic Policy Base 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | 13 | .. autoclass:: SafeRLBench.Policy 14 | :members: 15 | 16 | Probabilistic Policy Base 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. autoclass:: SafeRLBench.ProbPolicy 20 | :members: 21 | 22 | Linear Policies 23 | --------------- 24 | 25 | LinearPolicy 26 | ~~~~~~~~~~~~ 27 | 28 | .. autoclass:: SafeRLBench.policy.LinearPolicy 29 | :members: 30 | 31 | DiscreteLinearPolicy 32 | ~~~~~~~~~~~~~~~~~~~~ 33 | 34 | .. autoclass:: SafeRLBench.policy.DiscreteLinearPolicy 35 | :members: 36 | 37 | NoisyLinearPolicy 38 | ~~~~~~~~~~~~~~~~~ 39 | 40 | .. autoclass:: SafeRLBench.policy.NoisyLinearPolicy 41 | :members: 42 | 43 | NonLinearQuadrocopterController 44 | ------------------------------- 45 | 46 | .. autoclass:: SafeRLBench.policy.NonLinearQuadrocopterController 47 | :members: 48 | 49 | NeuralNetwork 50 | ------------- 51 | 52 | .. autoclass:: SafeRLBench.policy.NeuralNetwork 53 | :members: 54 | -------------------------------------------------------------------------------- /SafeRLBench/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | 5 | from .configuration import SRBConfig 6 | 7 | # Initialize configuration 8 | config = SRBConfig(logging.getLogger(__name__)) 9 | 10 | from .monitor import AlgoMonitor, EnvMonitor 11 | from .base import EnvironmentBase, Space, AlgorithmBase, Policy, ProbPolicy 12 | from .bench import Bench, BenchConfig 13 | from . import algo 14 | from . import envs 15 | from . import policy 16 | from . import spaces 17 | from . import error 18 | from . import measure 19 | 20 | # Add things to all 21 | __all__ = ['EnvironmentBase', 22 | 'Space', 23 | 'AlgorithmBase', 24 | 'Policy', 25 | 'ProbPolicy', 26 | 'AlgoMonitor', 27 | 'EnvMonitor', 28 | 'SRBConfig', 29 | 'Bench', 30 | 'BenchConfig', 31 | 'envs', 32 | 'algo', 33 | 'policy', 34 | 'spaces', 35 | 'measure', 36 | 'error'] 37 | 38 | 39 | # Import test after __all__ (no documentation) 40 | # from numpy.testing import Tester 41 | # test = Tester().test 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Felix Berkenkamp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /misc/Dockerfile.python3: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | ENV TF_CPP_MIN_LOG_LEVEL=2 4 | 5 | # Install build essentials and clean up 6 | RUN apt-get update --quiet \ 7 | && apt-get install -y --no-install-recommends --quiet build-essential \ 8 | && apt-get clean 9 | 10 | # Fix matlab issues. 11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \ 12 | && apt-get clean 13 | 14 | # Update conda, install packages, and clean up 15 | RUN conda update conda --yes --quiet \ 16 | && conda install python=3.5 pip numpy scipy nose --yes --quiet \ 17 | && conda clean --yes --all \ 18 | && hash -r 19 | 20 | # Get the requirements files (seperate from the main body) 21 | COPY requirements.txt requirements_dev.txt /code/ 22 | 23 | # Install requirements and clean up 24 | RUN pip --no-cache-dir install -r code/requirements.txt \ 25 | && rm -rf /root/.cache 26 | 27 | # Install dev requirements and clean up 28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \ 29 | && rm -rf /root/.cache 30 | 31 | # Install SafeOpt 32 | RUN git clone https://github.com/befelix/SafeOpt.git \ 33 | && cd SafeOpt \ 34 | && python setup.py install \ 35 | && rm -rf /SafeOpt 36 | 37 | # Copy the main code 38 | COPY . /code 39 | RUN cd /code && python setup.py develop 40 | 41 | WORKDIR /code 42 | -------------------------------------------------------------------------------- /SafeRLBench/spaces/discrete_space.py: -------------------------------------------------------------------------------- 1 | """Discrete space implementation.""" 2 | 3 | from SafeRLBench import Space 4 | 5 | import numpy as np 6 | 7 | 8 | class DiscreteSpace(Space): 9 | """Discrete Space. 10 | 11 | Let d be the dimension of the space, then it will contain elements 12 | {0, 1, ... , dim-1}. 13 | 14 | Examples 15 | -------- 16 | Create a `DiscreteSpace` with three states: 17 | >>> from SafeRLBench.spaces import DiscreteSpace 18 | >>> discrete_space = DiscreteSpace(3) 19 | """ 20 | 21 | def __init__(self, dim): 22 | """Initialize `DiscreteSpace`. 23 | 24 | Parameters 25 | ---------- 26 | dim : int 27 | Number of states. 28 | """ 29 | assert dim > 0, ("If you need a discrete space without elements, you " 30 | + "do not need this class.") 31 | self._dim = dim 32 | 33 | def contains(self, x): 34 | """Check if element is part of the space.""" 35 | return (isinstance(x, int) and x >= 0 and x < self._dim) 36 | 37 | def sample(self): 38 | """Sample an element of the space.""" 39 | return np.random.randint(self._dim) 40 | 41 | @property 42 | def dimension(self): 43 | """Return dimension of the space.""" 44 | return self._dim 45 | 46 | def __repr__(self): 47 | return 'DiscreteSpace(dim=%d)' % self._dim 48 | -------------------------------------------------------------------------------- /misc/Dockerfile.python2: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | 3 | ENV TF_CPP_MIN_LOG_LEVEL=2 4 | 5 | # Install build essentials and clean up 6 | RUN apt-get update --quiet \ 7 | && apt-get install -y --no-install-recommends --quiet build-essential \ 8 | && apt-get clean 9 | 10 | # Fix matlab issues. 11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \ 12 | && apt-get clean 13 | 14 | # Update conda, install packages, and clean up 15 | RUN conda update conda --yes --quiet \ 16 | && conda install python=2.7 pip numpy scipy nose --yes --quiet \ 17 | && conda clean --yes --all \ 18 | && hash -r 19 | 20 | # Get the requirements files (seperate from the main body) 21 | COPY requirements.txt requirements_dev.txt /code/ 22 | 23 | # Install requirements and clean up 24 | RUN pip --no-cache-dir install -r code/requirements.txt \ 25 | && rm -rf /root/.cache 26 | 27 | # Install dev requirements and clean up 28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \ 29 | && rm -rf /root/.cache 30 | 31 | # Install extra python2 requirements 32 | RUN pip --no-cache-dir install futures multiprocessing \ 33 | && rm -rf /root/.cache 34 | 35 | # Install SafeOpt 36 | RUN git clone https://github.com/befelix/SafeOpt.git \ 37 | && cd SafeOpt \ 38 | && python setup.py install \ 39 | && rm -rf /SafeOpt 40 | 41 | # Copy the main code 42 | COPY . /code 43 | RUN cd /code && python setup.py develop 44 | 45 | WORKDIR /code 46 | -------------------------------------------------------------------------------- /test_code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | module="SafeRLBench" 4 | 5 | get_script_dir () { 6 | SOURCE="${BASH_SOURCE[0]}" 7 | # While $SOURCE is a symlink, resolve it 8 | while [ -h "$SOURCE" ]; do 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | SOURCE="$( readlink "$SOURCE" )" 11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory 12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" 13 | done 14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 15 | echo "$DIR" 16 | } 17 | 18 | # tensorflow environment variable 19 | export TF_CPP_MIN_LOG_LEVEL='3' 20 | 21 | # Change to script root 22 | cd $(get_script_dir) 23 | GREEN='\033[0;32m' 24 | NC='\033[0m' 25 | 26 | BOLD=$(tput bold) 27 | NORMAL=$(tput sgr0) 28 | 29 | # Run style tests 30 | echo -e "${GREEN}${BOLD}Running style tests:${NC}" 31 | flake8 $module --exclude test*.py,__init__.py,_quadrocopter --show-source 32 | 33 | # Ignore import errors for __init__ and tests 34 | flake8 $module --filename=__init__.py,test*.py --ignore=F --show-source 35 | 36 | echo -e "${GREEN}${BOLD}Testing docstring conventions:${NC}" 37 | # Test docstring conventions 38 | pydocstyle $module --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__" 39 | 40 | echo -e "${GREEN}${BOLD}Running unit tests in current environment.${NC}" 41 | nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=$module $module 2>&1 | grep -v "^Level " 42 | 43 | # Export html 44 | coverage html 45 | -------------------------------------------------------------------------------- /SafeRLBench/algo/test.py: -------------------------------------------------------------------------------- 1 | """Algorithm Tests.""" 2 | 3 | from SafeRLBench.algo import PolicyGradient, A3C 4 | from SafeRLBench.envs import LinearCar 5 | from .policygradient import CentralFDEstimator, estimators 6 | 7 | from SafeRLBench.policy import NeuralNetwork 8 | 9 | from unittest2 import TestCase 10 | from mock import MagicMock, Mock 11 | 12 | 13 | class TestPolicyGradient(TestCase): 14 | """PolicyGradientTestClass.""" 15 | 16 | def test_pg_init(self): 17 | """Test: POLICYGRADIENT: initialization.""" 18 | env_mock = MagicMock() 19 | pol_mock = Mock() 20 | 21 | for key, item in estimators.items(): 22 | pg = PolicyGradient(env_mock, pol_mock, estimator=key) 23 | self.assertIsInstance(pg.estimator, item) 24 | 25 | pg = PolicyGradient(env_mock, pol_mock, estimator=CentralFDEstimator) 26 | self.assertIsInstance(pg.estimator, CentralFDEstimator) 27 | 28 | self.assertRaises(ImportError, PolicyGradient, 29 | env_mock, pol_mock, CentralFDEstimator(env_mock)) 30 | 31 | 32 | class TestA3C(TestCase): 33 | """A3C Test Class.""" 34 | 35 | def test_a3c_init(self): 36 | """Test: A3C: initialization.""" 37 | a3c = A3C(LinearCar(), NeuralNetwork([2, 6, 1])) 38 | 39 | fields = ['environment', 'policy', 'max_it', 'num_workers', 'rate', 40 | 'done', 'policy', 'p_net', 'v_net', 'workers', 'threads', 41 | 'global_counter', 'sess'] 42 | 43 | for field in fields: 44 | assert hasattr(a3c, field) 45 | -------------------------------------------------------------------------------- /SafeRLBench/spaces/test.py: -------------------------------------------------------------------------------- 1 | """Tests for spaces module.""" 2 | from __future__ import absolute_import 3 | 4 | from functools import partial 5 | import inspect 6 | 7 | from numpy import array 8 | import SafeRLBench.spaces as spaces 9 | 10 | 11 | """Dictionary storing initialization arguments for classes.""" 12 | class_arguments = { 13 | spaces.BoundedSpace: [array([-1, -2]), array([1, 0])], 14 | spaces.RdSpace: [(3, 2)], 15 | spaces.DiscreteSpace: [5] 16 | } 17 | 18 | 19 | class TestSpaces(object): 20 | """Wrap spaces tests.""" 21 | 22 | classes = [] 23 | 24 | @classmethod 25 | def setUpClass(cls): 26 | """Initialize classes list.""" 27 | for name, c in inspect.getmembers(spaces): 28 | if inspect.isclass(c): 29 | cls.classes.append(c) 30 | 31 | def exhaustive_tests(self): 32 | """Check: Spaces tests initial values for testing.""" 33 | for c in self.classes: 34 | if c not in class_arguments: 35 | assert(False) 36 | 37 | def generate_tests(self): 38 | """Generate tests for spaces implementations.""" 39 | for c in self.classes: 40 | if c in class_arguments: 41 | check = partial(self.check_contains) 42 | check.description = ('Test: ' + c.__name__.upper() 43 | + ': implementation.') 44 | yield check, c 45 | 46 | def check_contains(self, c): 47 | """Check if contains and element is implemented.""" 48 | space = c(*class_arguments[c]) 49 | try: 50 | x = space.sample() 51 | b = space.contains(x) 52 | except NotImplementedError: 53 | assert(False) 54 | assert(b) 55 | -------------------------------------------------------------------------------- /SafeRLBench/test/test_measure.py: -------------------------------------------------------------------------------- 1 | from SafeRLBench.measure import BestPerformance, SafetyMeasure 2 | 3 | from mock import Mock 4 | from unittest2 import TestCase 5 | 6 | 7 | def _mock_run(val): 8 | run = Mock() 9 | monitor = Mock() 10 | monitor.rewards = range(val, val + 4) 11 | run.get_alg_monitor.return_value = monitor 12 | 13 | print(monitor.rewards) 14 | print(run.get_alg_monitor()) 15 | print(monitor) 16 | 17 | return run 18 | 19 | 20 | class TestMeasure(TestCase): 21 | """Test Measure classes.""" 22 | 23 | def test_best_performance(self): 24 | """Test: MEASURE: BestPerformance.""" 25 | run1 = _mock_run(0) 26 | run2 = _mock_run(1) 27 | 28 | measure = BestPerformance() 29 | self.assertIsNone(measure.result) 30 | 31 | measure([run1, run2]) 32 | result = measure.result 33 | 34 | self.assertEquals(result[0][0], run2) 35 | self.assertEquals(result[1][0], run1) 36 | 37 | self.assertEquals(result[0][1], 4) 38 | self.assertEquals(result[1][1], 3) 39 | 40 | best_result = measure.best_result 41 | 42 | self.assertEquals(best_result[0], run2) 43 | self.assertEquals(best_result[1], 4) 44 | 45 | def test_safety_measure(self): 46 | """Test: MEASURE: SafetyMeasure.""" 47 | measure = SafetyMeasure(0) 48 | self.assertIsNone(measure.result) 49 | 50 | run1 = _mock_run(-2) 51 | run2 = _mock_run(0) 52 | 53 | measure([run1, run2]) 54 | 55 | result = measure.result 56 | 57 | self.assertEquals(result[0][0], run1) 58 | self.assertEquals(result[0][1], 2) 59 | self.assertEquals(result[0][2], 3) 60 | 61 | self.assertEquals(result[1][0], run2) 62 | self.assertEquals(result[1][1], 0) 63 | self.assertEquals(result[1][2], 0) 64 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | module="SafeRLBench" 2 | 3 | GREEN=\033[0;32m 4 | NC=\033[0m 5 | 6 | # Flake 8 ignore errors 7 | flakeignore='E402,W503' 8 | 9 | # Pydocstyle ignore errors 10 | pydocignore='D105' 11 | 12 | style: 13 | @echo "${GREEN}Running style tests:${NC}" 14 | @flake8 ${module} --exclude test*.py,__init__.py --show-source 15 | @flake8 ${module} --filename=__init__.py,test*.py --ignore=F --show-source 16 | 17 | docstyle: 18 | @echo "${GREEN}Testing docstring conventions:${NC}" 19 | @pydocstyle ${module} --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__" 20 | 21 | unittests: 22 | @echo "${GREEN}Running unit tests in current environment.${NC}" 23 | @nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=${module} ${module} 2>&1 | grep -v "^Level 1" 24 | 25 | coverage: unittests 26 | @echo "${GREEN}Create coverage report:${NC}" 27 | @coverage html 28 | 29 | test: style docstyle unittests 30 | 31 | # targets to setup docker images for testing 32 | setup_docker2: 33 | docker build -f misc/Dockerfile.python2 -t srlb-py27-image . 34 | 35 | setup_docker3: 36 | docker build -f misc/Dockerfile.python3 -t srlb-py35-image . 37 | 38 | setup_docker: setup_docker2 setup_docker3 39 | 40 | docker2: 41 | @echo "${GREEN}Running unit tests for 2.7 in docker container:${NC}" 42 | @docker run -e "TF_CPP_MIN_LOG_LEVEL=2" -v $(shell pwd):/code/ srlb-py27-image nosetests --with-doctest --verbosity=2 SafeRLBench 2>&1 | grep -v "^Level " 43 | 44 | docker3: 45 | @echo "${GREEN}Running unit tests for 3.5 in docker container:${NC}" 46 | @docker run -e "TF_CPP_MIN_LOG_LEVEL=2" -v $(shell pwd):/code/ srlb-py35-image nosetests --with-doctest --verbosity=2 SafeRLBench 2>&1 | grep -v "^Level " 47 | 48 | docker: docker2 docker3 49 | 50 | history: 51 | git log --graph --decorate --oneline 52 | 53 | clean: 54 | find . -type f -name '*.pyc' -exec rm -f {} ';' 55 | rm -r htmlcov 56 | -------------------------------------------------------------------------------- /SafeRLBench/test/test_integration.py: -------------------------------------------------------------------------------- 1 | from SafeRLBench import config 2 | 3 | from SafeRLBench import Bench, BenchConfig 4 | from SafeRLBench.algo import PolicyGradient 5 | from SafeRLBench.envs import LinearCar 6 | from SafeRLBench.policy import LinearPolicy 7 | from SafeRLBench.measure import BestPerformance 8 | 9 | 10 | from unittest2 import TestCase 11 | 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class TestIntegration(TestCase): 18 | """Test integration with PolicyGradient and LinearCar.""" 19 | 20 | def test_integration(self): 21 | """Integration: bench with pc and lc.""" 22 | # setup config: 23 | config.logger_set_level(logging.DEBUG) 24 | config.monitor_set_verbosity(3) 25 | 26 | policy = LinearPolicy(2, 1, biased=True) 27 | algs = [(PolicyGradient, {'policy': policy, 28 | 'max_it': 10, 29 | 'estimator': 'central_fd'})] 30 | env = [[(LinearCar, {'horizon': 100})]] 31 | 32 | test_config = BenchConfig(algs, env) 33 | 34 | benchmark = Bench(test_config, [BestPerformance()]) 35 | benchmark() 36 | 37 | assert(benchmark.measures[0].result is not None) 38 | 39 | def test_parallel_integration(self): 40 | """Integration: bench with pc and lc (parallel).""" 41 | # setup config: 42 | config.logger_set_level(logging.DEBUG) 43 | config.monitor_set_verbosity(3) 44 | config.jobs_set(2) 45 | 46 | policy = LinearPolicy(2, 1) 47 | algs = [(PolicyGradient, [{'policy': policy, 48 | 'max_it': 10, 49 | 'estimator': 'central_fd'}, 50 | {'policy': policy, 51 | 'max_it': 20, 52 | 'estimator': 'central_fd'}])] 53 | env = [[(LinearCar, {'horizon': 100})]] 54 | 55 | test_config = BenchConfig(algs, env) 56 | 57 | benchmark = Bench(test_config, [BestPerformance()]) 58 | benchmark() 59 | 60 | assert(benchmark.measures[0].result is not None) 61 | assert(len(benchmark.measures[0].result) == 2) 62 | -------------------------------------------------------------------------------- /SafeRLBench/algo/README.rst: -------------------------------------------------------------------------------- 1 | Description 2 | ----------- 3 | 4 | The ``algo`` module contains algorithm implementations based on the 5 | ``AlgorithmBase`` class. 6 | The objects should only be accessed through the interface functions defined 7 | in the base class. 8 | 9 | Overview 10 | -------- 11 | 12 | =============== =============== 13 | Algorithm Policy 14 | =============== =============== 15 | A3C NeuralNetwork 16 | PolicyGradient Any 17 | Q-Learning None 18 | SafeOpt Any 19 | =============== =============== 20 | 21 | Implementing an Algorithm 22 | ------------------------- 23 | 24 | When implementing an algorithm a couple of things have to be considered. 25 | ``AlgorithmBase`` is an abstrace base class. It will require any subclass to 26 | implement the private methods listed below. These will be invoked by the 27 | public interface methods. 28 | 29 | Any algorithm must be structured using four methods. First the ``optimize``, 30 | which will control the optimization run, it is responsible for using the other 31 | methods. The three tools ``optimize`` should use are the methods 32 | ``initialize``, ``step`` and ``is_finished``. 33 | 34 | ``initialize`` should be used to initialize the run and all the attributes and 35 | parameters that need to be set up. 36 | ``optimize`` should compute one step of the optimization run. 37 | ``is_finished`` is supposed to return ``True`` when the optimization run is 38 | finished. 39 | 40 | Requirements 41 | ~~~~~~~~~~~~ 42 | 43 | ================= ============================================================= 44 | Must implement 45 | =============================================================================== 46 | _initialize Initialize any attributes, objects needed. 47 | _step Execute one iteration of the algorithm. 48 | _is_finished Return ``True`` when done. 49 | ================= ============================================================= 50 | 51 | ================= ============================================================= 52 | May implement 53 | =============================================================================== 54 | _optimize(policy) Optimize the policy. Possibly no policy as in Q-learning. 55 | ================= ============================================================= 56 | -------------------------------------------------------------------------------- /SafeRLBench/envs/README.rst: -------------------------------------------------------------------------------- 1 | Description 2 | ----------- 3 | 4 | The ``envs`` module contains environment implementations based on the 5 | ``EnvironmentBase`` class. 6 | The objects should only be accessed through the interface functions defined 7 | in the base class. 8 | 9 | Overview 10 | -------- 11 | 12 | =================== =================================== ======================= 13 | Environment State Space Action Space 14 | =================== =================================== ======================= 15 | GeneralMountainCar :math:`[-1,1]\times[-0.07,0.07]` :math:`[-1, 1]` 16 | GymWrap 17 | LinearCar :math:`\mathbb{R}^{2d}` :math:`[-1, 1]^d` 18 | MDP 19 | Quadrocopter 20 | =================== =================================== ======================= 21 | 22 | Implementing an Environment 23 | --------------------------- 24 | 25 | When implementing an environment a couple of things have to be considered. 26 | `EnvironmentBase` is an abstract base class. It will require any subclass to 27 | implement certain private methods which will be invoked by the public 28 | interface. Further certain attributes should be initialized, also as specified 29 | below, to support monitoring the execution. 30 | 31 | Requirements 32 | ~~~~~~~~~~~~ 33 | 34 | Environments have to inherit from `SafeRLBench.EnvironmentBase`. 35 | 36 | =============== =============== =============================================== 37 | Initialize Attributes 38 | =============================================================================== 39 | state_space Space object 40 | action_space Space object 41 | horizon Integer Used in default _rollout implementation. 42 | =============== =============== =============================================== 43 | 44 | =============== =============== =============================================== 45 | Must implement 46 | =============================================================================== 47 | _update action Returns (action, state, reward) 48 | _reset 49 | =============== =============== =============================================== 50 | 51 | =============== =============== =============================================== 52 | May implement 53 | =============================================================================== 54 | _rollout policy Returns list of (action, state, reward) 55 | =============== =============== =============================================== 56 | -------------------------------------------------------------------------------- /SafeRLBench/error.py: -------------------------------------------------------------------------------- 1 | """Exceptions and error messages.""" 2 | 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | class NotSupportedException(Exception): 9 | """Exception raised when requirements are not installed. 10 | 11 | Attributes 12 | ---------- 13 | dep : Module 14 | The dependent module. 15 | dep_name : String 16 | Name of the dependency for a meaningful error message. 17 | """ 18 | 19 | def __init__(self, dep, name='Some'): 20 | """Initialize NotSupportedException. 21 | 22 | Parameters 23 | ---------- 24 | dep : Module 25 | The dependent module. 26 | dep_name : String 27 | Name of the dependency for a meaningful error message. 28 | """ 29 | msg = name + " is not installed on this system." 30 | 31 | super(NotSupportedException, self).__init__(msg) 32 | 33 | self.dep = dep 34 | self.name = name 35 | 36 | 37 | class MultipleCallsException(Exception): 38 | """Exception raised when a setup method is called multiple times.""" 39 | 40 | pass 41 | 42 | 43 | class IncompatibilityException(Exception): 44 | """Exception raised when any two parts are incompatible with each other. 45 | 46 | Attributes 47 | ---------- 48 | obj1 : object 49 | Instance of the object calling the exception. 50 | obj2 : object 51 | Instance of the object being incompatible. 52 | """ 53 | 54 | def __init__(self, obj1, obj2): 55 | """Initialize IncompatibilityException. 56 | 57 | Parameters 58 | ---------- 59 | obj1 : object 60 | Instance of the object calling the exception. 61 | obj2 : object 62 | Instance of the object being incompatible. 63 | """ 64 | msg = "%s is incompatible with %s." % (obj2.__name__, 65 | obj1.__name__) 66 | 67 | super(IncompatibilityException, self).__init__(msg) 68 | 69 | self.obj1 = obj1 70 | self.obj2 = obj2 71 | 72 | 73 | def add_dependency(dep, dep_name='Some'): 74 | """Add dependency. 75 | 76 | Function, that will raise a `NotSupportedException` when `dep` is None. 77 | 78 | Parameters 79 | ---------- 80 | dep : Module 81 | The dependent module. 82 | dep_name : String 83 | Name of the dependency for a meaningful error message. 84 | """ 85 | if dep is None: 86 | raise NotSupportedException(dep, dep_name) 87 | -------------------------------------------------------------------------------- /SafeRLBench/envs/gym_wrap.py: -------------------------------------------------------------------------------- 1 | """Wrapper for OpenAI Gym.""" 2 | 3 | from SafeRLBench import EnvironmentBase 4 | from SafeRLBench.error import add_dependency 5 | 6 | try: 7 | import gym 8 | except ModuleNotFoundError: 9 | gym = None 10 | 11 | 12 | # TODO: GymWrap: Add examples to docs 13 | class GymWrap(EnvironmentBase): 14 | """Wrapper class for the OpenAI Gym. 15 | 16 | Attributes 17 | ---------- 18 | env : gym environment 19 | Environment of the OpenAI Gym created by gym.make(). 20 | horizon : integer 21 | Horizon for rollout. 22 | render : boolean 23 | Default: False. If True simulation will be rendered during rollouts on 24 | this instance. 25 | 26 | Notes 27 | ----- 28 | The GymWrap class relies on the complete observability of the state 29 | through a state field in the respective gym environment. For the classic 30 | control problem this is indeed the case, but on other environment it 31 | remains to be untested. 32 | """ 33 | 34 | def __init__(self, env, horizon=100, render=False): 35 | """Initialize attributes. 36 | 37 | Parameters 38 | ---------- 39 | env : gym environment 40 | Instance of the gym environment that should be optimized on. 41 | horizon : integer 42 | Horizon for rollout. 43 | render : boolean 44 | Default: False ; If True simulation will be rendered during 45 | rollouts on this instance. 46 | """ 47 | add_dependency(gym, 'Gym') 48 | 49 | EnvironmentBase.__init__(self, env.observation_space, env.action_space, 50 | horizon) 51 | self.environment = env.unwrapped 52 | self.render = render 53 | self.done = False 54 | 55 | self.environment.reset() 56 | 57 | def _update(self, action): 58 | observation, reward, done, info = self.environment.step(action) 59 | self.done = done 60 | return action, observation, reward 61 | 62 | def _reset(self): 63 | self.environment.reset() 64 | self.done = False 65 | 66 | def _rollout(self, policy): 67 | trace = [] 68 | for n in range(self.horizon): 69 | if self.render: 70 | self.environment.render() 71 | trace.append(self.update(policy(self.state))) 72 | if self.done: 73 | break 74 | return trace 75 | 76 | @property 77 | def state(self): 78 | """Observable system state.""" 79 | return self.environment.state 80 | 81 | @state.setter 82 | def state(self, s): 83 | assert self.state_space.contains(s) 84 | self.environment.state = s 85 | 86 | 87 | def _get_test_args(): 88 | return [gym.make('MountainCar-v0')] 89 | -------------------------------------------------------------------------------- /SafeRLBench/spaces/bounded_space.py: -------------------------------------------------------------------------------- 1 | """Bounded subspace of R^n.""" 2 | import numpy as np 3 | from SafeRLBench import Space 4 | 5 | from numpy.random import rand 6 | 7 | 8 | class BoundedSpace(Space): 9 | """Bounded subspace of R^n. 10 | 11 | Attributes 12 | ---------- 13 | lower : array-like 14 | Lower bound 15 | upper : array-like 16 | Upper bound 17 | 18 | Examples 19 | -------- 20 | The `BoundedSpace` class can be instatiated in two ways. If you have 21 | individual bounds for each dimension, then you can directly pass the 22 | `lower` or `upper` bound as an array-like. 23 | 24 | >>> space = BoundedSpace(np.array([-1, -2]), np.array([1, 0])) 25 | 26 | In this case the shape argument will be ignored. If you want to create a 27 | however shaped box, where all the bounds are the same, then, you may pass 28 | a lower and an upper bound as a scalar and make sure that you specify the 29 | shape. 30 | 31 | >>> space = BoundedSpace(-1, 1, shape=(2,)) 32 | """ 33 | 34 | def __init__(self, lower, upper, shape=None): 35 | """Initialize BoundedSpace. 36 | 37 | Parameters 38 | ---------- 39 | lower : array-like 40 | Lower bound of the space. Either an array or an integer. 41 | Must agree with the input of the upper bound. 42 | upper : array-like 43 | Upper bound of the space. Either an array or an integer. Must 44 | agree with the input of the lower bound. 45 | shape : integer 46 | Shape of the bounds. Input will be ignored, if the bounds are non 47 | scalar, if they are scalar, it must be set. 48 | """ 49 | if (np.isscalar(lower) and np.isscalar(upper)): 50 | assert shape is not None, "Shape must be set, if bounds are scalar" 51 | self.lower = np.zeros(shape) + lower 52 | self.upper = np.zeros(shape) + upper 53 | else: 54 | self.lower = np.array(lower) 55 | self.upper = np.array(upper) 56 | assert self.lower.shape == self.upper.shape, "Shapes do not agree." 57 | 58 | self._dim = None 59 | 60 | def contains(self, x): 61 | """Check if element is contained.""" 62 | return (x.shape == self.lower.shape 63 | and (x >= self.lower).all() 64 | and (x <= self.upper).all()) 65 | 66 | def sample(self): 67 | """Return element.""" 68 | element = rand(*self.shape) * (self.upper - self.lower) + self.lower 69 | return element 70 | 71 | @property 72 | def shape(self): 73 | """Return element shape.""" 74 | return self.lower.shape 75 | 76 | @property 77 | def dimension(self): 78 | """Return dimension of the space.""" 79 | if self._dim is None: 80 | d = 1 81 | for i in range(len(self.shape)): 82 | d *= self.shape[i] 83 | self._dim = d 84 | return self._dim 85 | 86 | def __repr__(self): 87 | return 'BoundedSpace(lower=%s, upper=%s)' % (str(self.lower), 88 | str(self.upper)) 89 | -------------------------------------------------------------------------------- /SafeRLBench/test/test_configuration.py: -------------------------------------------------------------------------------- 1 | from SafeRLBench import SRBConfig 2 | 3 | from unittest2 import TestCase 4 | 5 | import sys 6 | import os 7 | 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class TestSRBConfig(TestCase): 14 | """Test SRBConfig class.""" 15 | 16 | def test_logger_stream_handler(self): 17 | """Test: CONFIG: stream handler.""" 18 | config = SRBConfig(logger) 19 | 20 | self.assertIsNone(config.logger_stream_handler) 21 | 22 | # check if stream handler gets added 23 | config.logger_add_stream_handler() 24 | self.assertIsNotNone(config.logger_stream_handler) 25 | 26 | handler1 = config.logger_stream_handler 27 | handler2 = logging.StreamHandler(sys.stdout) 28 | 29 | # check if handler changes on assignment 30 | config.logger_stream_handler = handler2 31 | self.assertNotEqual(handler1, config.logger_stream_handler) 32 | 33 | def test_logger_file_handler(self): 34 | """Test: CONFIG: file handler.""" 35 | config = SRBConfig(logger) 36 | 37 | self.assertIsNone(config.logger_file_handler) 38 | 39 | # check if file handler gets added 40 | config.logger_add_file_handler('logs.log') 41 | self.assertIsNotNone(config.logger_file_handler) 42 | 43 | handler1 = config.logger_file_handler 44 | handler2 = logging.FileHandler('logs2.log') 45 | 46 | # check if handler changes on assignment 47 | config.logger_file_handler = handler2 48 | self.assertNotEqual(handler1, config.logger_file_handler) 49 | 50 | self.assertTrue(os.path.isfile('logs.log')) 51 | self.assertTrue(os.path.isfile('logs2.log')) 52 | 53 | config.logger_file_handler = None 54 | 55 | def test_logger_format(self): 56 | """Test: CONFIG: logger format.""" 57 | config = SRBConfig(logger) 58 | 59 | config.logger_add_stream_handler() 60 | config.logger_add_file_handler('logs.log') 61 | 62 | fmt = '%(name)s - %(levelname)s - %(message)s' 63 | formatter = logging.Formatter(fmt) 64 | 65 | config.logger_format = fmt 66 | 67 | tst_record = { 68 | 'name': 'test_logger', 69 | 'level': logging.DEBUG, 70 | 'pathname': os.path.realpath(__file__), 71 | 'lineno': 42, 72 | 'msg': 'test_msg', 73 | 'args': None, 74 | 'exc_info': None, 75 | 'func': 'test_logger_format' 76 | } 77 | rec = logging.makeLogRecord(tst_record) 78 | self.assertEqual(formatter.format(rec), 79 | config.logger_stream_handler.format(rec)) 80 | 81 | def test_monitor_verbosity(self): 82 | """Test: CONFIG: monitor verbosity.""" 83 | config = SRBConfig(logger) 84 | 85 | config.monitor_set_verbosity(42) 86 | self.assertEqual(config.monitor_verbosity, 42) 87 | 88 | with self.assertRaises(ValueError): 89 | config.monitor_set_verbosity(-1) 90 | 91 | def test_jobs(self): 92 | """Test: CONFIG: jobs set.""" 93 | config = SRBConfig(logger) 94 | 95 | config.jobs_set(42) 96 | self.assertEqual(config.n_jobs, 42) 97 | 98 | with self.assertRaises(ValueError): 99 | config.jobs_set(-1) 100 | 101 | @classmethod 102 | def tearDownClass(cls): 103 | """Clean up created file.""" 104 | if os.path.isfile('logs.log'): 105 | os.remove('logs.log') 106 | if os.path.isfile('logs2.log'): 107 | os.remove('logs2.log') 108 | -------------------------------------------------------------------------------- /SafeRLBench/envs/linear_car.py: -------------------------------------------------------------------------------- 1 | """Linear Car.""" 2 | import numpy as np 3 | from numpy import copy, array 4 | from numpy.linalg import norm 5 | 6 | from SafeRLBench import EnvironmentBase 7 | from SafeRLBench.spaces import RdSpace, BoundedSpace 8 | 9 | 10 | # TODO: LinearCar: add examples 11 | class LinearCar(EnvironmentBase): 12 | """Implementation of LinearCar Environment. 13 | 14 | This is a very simple environment implementing a car in an arbitrarily 15 | dimensioned space. By default it will just be one dimensional, which 16 | results in a two dimensional state space, that is, (pos, vel), and 17 | accordingly in a one dimensional bounded action space, that is, the 18 | acceleration. 19 | 20 | Attributes 21 | ---------- 22 | state : ndarray 23 | Current state of the LinearCar. 24 | initial_state : ndarray 25 | Initial state of the LinearCar. 26 | goal : ndarray 27 | Goal state. 28 | eps : float 29 | Margin for completion. If 0, the goal is to stabilize at the goal 30 | completly. 31 | step : float 32 | Update step. 33 | state_space : Space object 34 | State space as deduced from the state. 35 | action_space : Space object 36 | Action space as deduced from the state. 37 | """ 38 | 39 | def __init__(self, state=array([[0.], [0.]]), goal=array([[1.], [0.]]), 40 | step=0.01, eps=0, horizon=100): 41 | """ 42 | Initialize LinearCar. 43 | 44 | Parameters 45 | ---------- 46 | state : ndarray 47 | Initial state of the LinearCar. The state and action space will be 48 | deduced from this. The shape needs to be (2, d) for d > 0. 49 | goal : ndarray 50 | Goal state of the LinearCar. The shape should comply to the shape 51 | of the initial state. 52 | In case the velocity is non-zero, eps should be strictly greater 53 | than zero, since there is no way for the system to stabilize in 54 | the goal state anyway. 55 | eps : float 56 | Reward at which we want to abort. If zero we do not abort at all. 57 | step : float 58 | Update step. 59 | """ 60 | assert state.shape[0] == 2, 'Invalid shape of the initial state.' 61 | assert state.shape == goal.shape, 'State and goal shape have to agree.' 62 | 63 | # Initialize EnivronmentBase attributes 64 | self.horizon = horizon 65 | self.state_space = RdSpace(state.shape) 66 | self.action_space = BoundedSpace(-1, 1, shape=(state.shape[1],)) 67 | 68 | # Initialize State 69 | self.initial_state = state 70 | self.state = copy(state) 71 | 72 | # Initialize Environment Parameters 73 | self.goal = goal 74 | self.eps = eps 75 | self.step = step 76 | 77 | def _update(self, action): 78 | one = np.ones(self.action_space.shape) 79 | action = np.maximum(np.minimum(action, one), -one) 80 | 81 | self.state[1] += self.step * action 82 | self.state[0] += self.state[1] 83 | 84 | return (action, copy(self.state), self._reward()) 85 | 86 | def _reset(self): 87 | self.state = copy(self.initial_state) 88 | 89 | def _rollout(self, policy): 90 | self.reset() 91 | trace = [] 92 | for n in range(self.horizon): 93 | action = policy(self.state) 94 | trace.append(self.update(action)) 95 | if (self.eps != 0 and self._achieved()): 96 | return trace 97 | return trace 98 | 99 | def _reward(self): 100 | return -norm(self.state - self.goal) 101 | 102 | def _achieved(self): 103 | return (abs(self._reward()) < self.eps) 104 | -------------------------------------------------------------------------------- /SafeRLBench/algo/q_learning.py: -------------------------------------------------------------------------------- 1 | """Q-learning implementations.""" 2 | 3 | from SafeRLBench import AlgorithmBase, Policy 4 | from SafeRLBench.spaces import DiscreteSpace 5 | from SafeRLBench.error import IncompatibilityException 6 | 7 | import numpy as np 8 | 9 | 10 | # TODO: DiscreteQLearning: examples, monitoring, finished, adaptive rate 11 | class DiscreteQLearning(AlgorithmBase): 12 | """Q-Learning Algorithm. 13 | 14 | This Algorithm estimates a quality measure that maps every (state, action) 15 | pair to a real number. 16 | 17 | Attributes 18 | ---------- 19 | Q : ndarray 20 | Array representing the quality for each state action pair. 21 | environment : 22 | The environment for which we want to estimate the Q function. Its 23 | state and action space need to be an instance of `DiscreteSpace`. 24 | discount : float 25 | Discount factor. 26 | max_it : int 27 | Maximum number of iterations. 28 | rate : float 29 | Update rate. 30 | shape : (int, int) 31 | Tuple containing the dimension of the state and action space. 32 | 33 | Notes 34 | ----- 35 | The environment needs to use a discrete state and action space, because 36 | this Q-Learning implementation uses a table to estimate the Q function. 37 | """ 38 | 39 | def __init__(self, environment, discount, max_it, rate): 40 | """Initialize QLearning. 41 | 42 | Parameters 43 | ---------- 44 | environment : 45 | The environment for which we want to estimate the Q function. Its 46 | state and action space need to be an instance of `DiscreteSpace`. 47 | discount : float 48 | Discount factor. 49 | max_it : int 50 | Maximum number of iterations. 51 | rate : float 52 | Update rate. 53 | """ 54 | # make some sanity checks 55 | if (not isinstance(self.environment.action_space, DiscreteSpace) 56 | and not isinstance(self.environment.state_space, DiscreteSpace)): 57 | raise IncompatibilityException(self, self.environment) 58 | 59 | if discount <= 0: 60 | raise ValueError('discount %d needs to be larger than zero.', 61 | discount) 62 | 63 | if max_it <= 0: 64 | raise ValueError('max_it %d needs to be larger than zero.', max_it) 65 | 66 | # initialize the fields 67 | self.environment = environment 68 | self.discount = discount 69 | self.max_it = max_it 70 | self.rate = rate 71 | 72 | # determine the dimension of the state and action space 73 | d_state = environment.state_space.dimension 74 | d_action = environment.action_space.dimension 75 | 76 | self.shape = (d_state, d_action) 77 | 78 | # initialize the lookup table for the Q function. 79 | self.Q = None 80 | self.policy = _RandomPolicy(environment.action_space) 81 | 82 | def _initialize(self): 83 | self.Q = np.zeros(self.shape) 84 | 85 | def _step(self): 86 | trace = self.environment.rollout(self.policy) 87 | for (action, state, reward) in trace: 88 | dq = (reward + self.discount * self.Q[state, :].max() 89 | - self.Q[state, action]) 90 | self.Q[state, action] += self.rate * dq 91 | 92 | def _is_finished(self): 93 | pass 94 | 95 | # TODO: Q-learning evaluate qlearning performance appropiately 96 | 97 | 98 | class _RandomPolicy(Policy): 99 | 100 | def __init__(self, action_space): 101 | self.action_space = action_space 102 | 103 | def map(self, state): 104 | return self.action_space.sample 105 | 106 | @property 107 | def parameters(self): 108 | return self.action_space.dimension 109 | 110 | @property 111 | def parameter_space(self): 112 | return None 113 | -------------------------------------------------------------------------------- /SafeRLBench/envs/test.py: -------------------------------------------------------------------------------- 1 | """Tests for envs module. 2 | 3 | Need rework. 4 | """ 5 | from __future__ import absolute_import 6 | 7 | # import unittest 8 | # from numpy.testing import * 9 | import inspect 10 | from functools import partial 11 | 12 | import SafeRLBench.envs as envs 13 | 14 | import numpy as np 15 | 16 | import gym 17 | gym.undo_logger_setup() 18 | 19 | from mock import Mock 20 | 21 | 22 | class TestEnvironments(object): 23 | """ 24 | Test Class for Environment tests. 25 | 26 | Note that you really dont want to inherit from unittest.TestCase here, 27 | because it will break reasonable output with verbose testing. 28 | """ 29 | 30 | exclude = [] 31 | 32 | args = { 33 | 'GymWrap': envs.gym_wrap._get_test_args(), 34 | 'MDP': envs.mdp._get_test_args() 35 | } 36 | 37 | @classmethod 38 | def setUpClass(cls): 39 | """Generate list of classes.""" 40 | cls.classes = [] 41 | for name, c in inspect.getmembers(envs): 42 | if inspect.isclass(c): 43 | cls.classes.append(c) 44 | 45 | def test_environment_requirements(self): 46 | """Generate tests for environment implementations.""" 47 | for c in self.classes: 48 | if c.__name__ in self.exclude: 49 | pass 50 | else: 51 | # Generate NotImplementedError Test for _update 52 | check_update = partial(self.check_env_update) 53 | check_update.description = ('Test: ' + c.__name__.upper() 54 | + ': update implementation.') 55 | yield check_update, c 56 | 57 | # Generate NotImplementedError Test for _reset 58 | check_reset = partial(self.check_env_reset) 59 | check_reset.description = ('Test: ' + c.__name__.upper() 60 | + ': reset implementation.') 61 | yield check_reset, c 62 | 63 | check_rollout = partial(self.check_env_rollout) 64 | check_rollout.description = ('Test: ' + c.__name__.upper() 65 | + ': rollout implementation.') 66 | yield check_rollout, c 67 | 68 | def check_env_update(self, c): 69 | """Check if _update is implemented.""" 70 | args = self.args.get(c.__name__, []) 71 | env = c(*args) 72 | x = env.action_space.sample() 73 | try: 74 | env._update(x) 75 | except NotImplementedError: 76 | assert False 77 | 78 | def check_env_reset(self, c): 79 | """Check if _reset is implemented.""" 80 | args = self.args.get(c.__name__, []) 81 | env = c(*args) 82 | try: 83 | env._reset() 84 | except NotImplementedError: 85 | assert False 86 | 87 | def check_env_rollout(self, c): 88 | """Check rollout correctness at random positions.""" 89 | args = self.args.get(c.__name__, []) 90 | env = c(*args) 91 | 92 | init_state = env.state 93 | 94 | def policy(state): 95 | return env.action_space.sample() 96 | 97 | policy_mock = Mock(side_effect=policy) 98 | trace = env._rollout(policy_mock) 99 | 100 | # reset the environment 101 | env._reset() 102 | env.state = init_state 103 | 104 | # if the environment depends on a seed, reset it. 105 | if hasattr(env, 'seed'): 106 | env.seed = env.seed 107 | 108 | actions = [t[0] for t in trace] 109 | 110 | policy_mock_redo = Mock(side_effect=actions) 111 | 112 | trace_verify = env._rollout(policy_mock_redo) 113 | 114 | for t, t_verify in zip(trace, trace_verify): 115 | print(t) 116 | print(t_verify) 117 | if isinstance(t[0], np.ndarray): 118 | assert(all(np.isclose(t_verify[0], t[0]))) 119 | else: 120 | assert(np.isclose(t_verify[0], t[0])) 121 | if isinstance(t[1], np.ndarray): 122 | print(t_verify[1] - t[1]) 123 | assert(all(np.isclose(t_verify[1], t[1]))) 124 | else: 125 | assert(np.isclose(t_verify[1], t[1])) 126 | assert(np.isclose(t_verify[2], t[2])) 127 | -------------------------------------------------------------------------------- /SafeRLBench/envs/mdp.py: -------------------------------------------------------------------------------- 1 | """Markov Decision Process Implementations.""" 2 | 3 | import numpy as np 4 | 5 | from SafeRLBench import EnvironmentBase 6 | from SafeRLBench.spaces import DiscreteSpace 7 | 8 | 9 | class MDP(EnvironmentBase): 10 | """Discrete Markov Decision Process Environment. 11 | 12 | Attributes 13 | ---------- 14 | transitions : array-like 15 | Array holding transition matrix for each action. The dimension of 16 | the state and action spaces will be deduced from this array. 17 | rewards : array-like 18 | Array holding the reward matrix for each action. It needs to comply 19 | with the dimensions deduced from the transitions array. 20 | action_space : DiscreteSpace object 21 | Action space as determined from the transitions array 22 | state_space : DiscreteSpace object 23 | State space as determined from the transitions array. 24 | init_state : int 25 | Initial state of the process. If None, it will be set to 0. 26 | state : int 27 | Current state of the system. 28 | """ 29 | 30 | def __init__(self, transitions, rewards, horizon=100, init_state=None, 31 | seed=None): 32 | """MDP initialization. 33 | 34 | Parameters 35 | ---------- 36 | transitions : array-like 37 | Array holding transition matrix for each action. The dimension of 38 | the state and action spaces will be deduced from this array. 39 | rewards : array-like 40 | Array holding the reward matrix for each action. It needs to comply 41 | with the dimensions deduced from the transitions array. 42 | init_state : int 43 | Initial state of the process. If None, it will be set to 0. 44 | """ 45 | self.horizon = horizon 46 | 47 | self.transitions = transitions 48 | self.rewards = rewards 49 | 50 | # determine state and action space 51 | self.action_space = DiscreteSpace(len(transitions)) 52 | self.state_space = DiscreteSpace(len(transitions[0])) 53 | 54 | # if initial state is none, we will use 0 as an initial state 55 | if init_state is None: 56 | init_state = 0 57 | elif not self.state_space.contains(init_state): 58 | raise ValueError('Initial state (%d) is not a valid state.', 59 | init_state) 60 | 61 | # setup current state and store the initial state for reset 62 | self.init_state = init_state 63 | self.state = init_state 64 | 65 | # generate random state 66 | self.random = np.random.RandomState() 67 | 68 | if seed is not None: 69 | self.seed = seed 70 | else: 71 | self._seed = None 72 | 73 | @property 74 | def seed(self): 75 | """Seed.""" 76 | return self._seed 77 | 78 | @seed.setter 79 | def seed(self, v): 80 | self.random.seed(v) 81 | self._seed = v 82 | 83 | def _update(self, action): 84 | prev_state = self.state 85 | 86 | # choose next state 87 | self.state = self.random.choice(np.arange(self.state_space.dimension), 88 | p=self.transitions[action][self.state]) 89 | # determine reward 90 | reward = self.rewards[action][prev_state][self.state] 91 | 92 | return action, self.state, reward 93 | 94 | def _reset(self): 95 | self.state = self.init_state 96 | 97 | 98 | def _get_test_args(): 99 | # private method that will generate arguments for mdp testing. 100 | transitions = [ 101 | [[.1, .9, 0., 0., 0.], 102 | [.2, 0., .8, 0., 0.], 103 | [.3, 0., 0., .7, 0.], 104 | [.4, 0., 0., 0., .6], 105 | [.4, 0., 0., 0., .6]], 106 | [[1., 0., 0., 0., 0.], 107 | [1., 0., 0., 0., 0.], 108 | [1., 0., 0., 0., 0.], 109 | [1., 0., 0., 0., 0.], 110 | [1., 0., 0., 0., 0.]] 111 | ] 112 | 113 | rewards = [ 114 | [[0., 0., 0., 0., 0.], 115 | [0., 0., 0., 0., 0.], 116 | [0., 0., 0., 0., 0.], 117 | [0., 0., 0., 0., 0.], 118 | [0., 0., 0., 0., 0.]], 119 | [[0., 0., 0., 0., 0.], 120 | [1., 0., 0., 0., 0.], 121 | [2., 0., 0., 0., 0.], 122 | [3., 0., 0., 0., 0.], 123 | [4., 0., 0., 0., 0.]], 124 | ] 125 | 126 | return [transitions, rewards, 100, None, 42] 127 | -------------------------------------------------------------------------------- /SafeRLBench/policy/controller.py: -------------------------------------------------------------------------------- 1 | """Quadrocopter Controller.""" 2 | from SafeRLBench import Policy 3 | from SafeRLBench.spaces import BoundedSpace 4 | from SafeRLBench.envs._quadrocopter import StateVector 5 | 6 | import numpy as np 7 | 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ('NonLinearQuadrocopterController') 13 | 14 | 15 | # TODO: Controller: Documentation 16 | class NonLinearQuadrocopterController(Policy): 17 | """Non-linear quadrocopter controller.""" 18 | 19 | def __init__(self, zeta_z=0.7, params=[.7, .7, .7, .5, .707], 20 | reference=None): 21 | """Initialize NonLinearQuadrocopterController. 22 | 23 | Parameters 24 | ---------- 25 | zeta_z : 26 | params : 27 | reference : 28 | """ 29 | self._zeta_z = zeta_z 30 | self._params = np.array(params) 31 | self.reference = reference 32 | 33 | if params is not None: 34 | self.initialized = True 35 | else: 36 | self.initialized = False 37 | 38 | self._par_space = BoundedSpace(np.array([0., 0., 0., 0., 0.]), 39 | np.array([1., 1., 1., 1., 1.])) 40 | 41 | def map(self, state): 42 | """Map state to action. 43 | 44 | Depends on a reference object. If the environment has a reference 45 | object it needs to set the reference at the start of the rollout. 46 | 47 | Parameters 48 | ---------- 49 | state : array-like 50 | Element of state space. 51 | 52 | Returns 53 | ------- 54 | action : ndarray 55 | Element of action space. 56 | """ 57 | ref = self.reference.reference 58 | state = StateVector(state) 59 | 60 | # Allocate memory for the 4 outputs of the controller. 61 | action = np.empty((4,), dtype=np.float32) 62 | 63 | # Retrieve the different parameters and make sure the critical ones 64 | # are non zero. 65 | tau_x, tau_y, tau_z, tau_w, zeta = self._params 66 | if tau_x < 1e-3: 67 | tau_x = 1e-3 68 | logger.warning('Parameter `tau_x` too small for controller, ' 69 | + 'has been clipped to 1e-3"') 70 | if tau_y < 1e-3: 71 | tau_y = 1e-3 72 | logger.warning('Parameter `tau_y` too small for controller, ' 73 | + 'has been clipped to 1e-3"') 74 | if tau_w < 1e-3: 75 | tau_w = 1e-3 76 | logger.warning('Parameter `tau_w` too small for controller, ' 77 | + 'has been clipped to 1e-3"') 78 | if zeta < 1e-3: 79 | zeta = 1e-3 80 | logger.warning('Parameter `zeta` too small for controller, ' 81 | + 'has been clipped to 1e-3"') 82 | 83 | # desired acceleration in x and y (global coordinates, [m/s^2] ) 84 | ax = (2. * zeta / tau_x * (ref.vel[0] - state.vel[0]) 85 | + 1. / (tau_x**2) * (ref.pos[0] - state.pos[0])) 86 | ay = (2. * zeta / tau_y * (ref.vel[1] - state.vel[1]) 87 | + 1. / (tau_y**2) * (ref.pos[1] - state.pos[1])) 88 | 89 | # Normalize by thrust 90 | thrust = np.linalg.norm(np.array([ax, ay, 9.81 + state.acc[2]])) 91 | ax /= thrust 92 | ay /= thrust 93 | 94 | # Rotate desired accelerations into the yaw-rotated inertial frame 95 | ax_b = ax * np.cos(state.euler[2]) + ay * np.sin(state.euler[2]) 96 | ay_b = -ax * np.sin(state.euler[2]) + ay * np.cos(state.euler[2]) 97 | 98 | # Get euler angles from rotation matrix 99 | action[1] = np.arcsin(-ay_b) 100 | action[0] = np.arcsin(ax_b / np.cos(action[1])) 101 | 102 | # Z-velocity command m/sec) 103 | action[2] = (2. * self._zeta_z / tau_z * (ref.vel[2] - state.vel[2]) 104 | + 1. / (tau_z**2) * (ref.pos[2] - state.pos[2])) 105 | 106 | # Yaw rate command (rad/sec)?? 107 | yaw_err = (np.mod(ref.euler[2] - state.euler[2] + np.pi, 2 * np.pi) 108 | - np.pi) 109 | action[3] = yaw_err / tau_w + ref.omega_b[2] 110 | 111 | return action 112 | 113 | @property 114 | def parameters(self): 115 | """Set controller parameters.""" 116 | return self._params 117 | 118 | @parameters.setter 119 | def parameters(self, params): 120 | self._params = np.array(params) 121 | 122 | @property 123 | def parameter_space(self): 124 | """Set controller parameter space.""" 125 | return self._par_space 126 | -------------------------------------------------------------------------------- /SafeRLBench/envs/general_mountaincar.py: -------------------------------------------------------------------------------- 1 | """General Mountain Car.""" 2 | import numpy as np 3 | from numpy import pi, array, copy, cos, sin 4 | 5 | from SafeRLBench.base import EnvironmentBase 6 | from SafeRLBench.spaces import BoundedSpace 7 | 8 | 9 | class GeneralMountainCar(EnvironmentBase): 10 | """Implementation of a GeneralMountainCar Environment. 11 | 12 | Attributes 13 | ---------- 14 | state_space : BoundedSpace 15 | Space object describing the state space. 16 | action_space : BoundedSpace 17 | Space object describing the action space. 18 | state : array-like 19 | Current state of the car. 20 | initial_state : array-like 21 | Initial state of the car. 22 | gravitation : double 23 | power : double 24 | goal : double 25 | Goal along x-coordinate 26 | """ 27 | 28 | def __init__(self, 29 | state_space=BoundedSpace(array([-1, -0.07]), 30 | array([1, 0.07])), 31 | action_space=BoundedSpace(-1, 1, shape=(1,)), 32 | state=np.array([0, 0]), 33 | contour=None, gravitation=0.0025, power=0.0015, 34 | goal=0.6, horizon=100): 35 | """Initialize GeneralMountainCar Environment. 36 | 37 | Parameters 38 | ---------- 39 | state_space : BoundedSpace 40 | Space object describing the state space. 41 | action_space : BoundedSpace 42 | Space object describing the action space. 43 | state : array-like 44 | Initial state of the car 45 | contour : tuple of callables 46 | If contour is None, a default shape will be generated. A valid 47 | tuple needs to contain a function for the height at a position 48 | in the first element and a function for the gradient at a position 49 | in the second argument. 50 | gravitation : double 51 | power : double 52 | goal : double 53 | Goal along x-coordinate 54 | """ 55 | # Initialize Environment Base Parameters 56 | super(GeneralMountainCar, self).__init__(state_space, 57 | action_space, 58 | horizon) 59 | 60 | # setup environment parameters 61 | self.goal = goal 62 | self.power = power 63 | self.gravitation = gravitation 64 | 65 | # setup contour 66 | if contour is None: 67 | def _hx(x): 68 | return -cos(pi * x) 69 | self._hx = _hx 70 | 71 | def _dydx(x): 72 | return pi * sin(pi * x) 73 | self._dydx = _dydx 74 | else: 75 | self._hx = contour[0] 76 | self._dydx = contour[1] 77 | 78 | # init state 79 | self.state = copy(state) 80 | self.initial_state = state 81 | 82 | def _update(self, action): 83 | """Compute step considering the action.""" 84 | action = array(action).flatten() 85 | action = max(min(action, 1.0), -1.0) 86 | 87 | if hasattr(action, 'size') and action.size == 1: 88 | action_in = action[0] 89 | else: 90 | action_in = action 91 | 92 | position = self.state[0] 93 | velocity = self.state[1] 94 | 95 | velocity += (action_in * self.power 96 | - self._dydx(position) * self.gravitation) 97 | position += velocity 98 | 99 | bounds = self.state_space 100 | 101 | velocity = max(min(velocity, bounds.upper[1]), bounds.lower[1]) 102 | position = max(min(position, bounds.upper[0]), bounds.lower[0]) 103 | 104 | # make sure outputs have the right form 105 | self.state = np.array([position, velocity]) 106 | action = np.reshape(action, self.action_space.shape) 107 | 108 | return action, copy(self.state), self._reward() 109 | 110 | def _reset(self): 111 | self.state = copy(self.initial_state) 112 | 113 | def _reward(self): 114 | return(self.height() - 1) 115 | 116 | def _rollout(self, policy): 117 | self.reset() 118 | trace = [] 119 | for n in range(self.horizon): 120 | action = policy(self.state) 121 | trace.append(self.update(action)) 122 | if (self.position() >= self.goal): 123 | return trace 124 | return trace 125 | 126 | def height(self): 127 | """Compute current height.""" 128 | return(self._hx(self.state[0].item()).item()) 129 | 130 | def position(self): 131 | """Compute current position in x.""" 132 | return(self.state[0]) 133 | -------------------------------------------------------------------------------- /SafeRLBench/measure.py: -------------------------------------------------------------------------------- 1 | """Define Measurements.""" 2 | 3 | from abc import ABCMeta, abstractmethod 4 | from six import add_metaclass 5 | 6 | from operator import itemgetter 7 | 8 | __all__ = ('Measure', 'BestPerformance', 'SafetyMeasure') 9 | 10 | 11 | @add_metaclass(ABCMeta) 12 | class Measure(object): 13 | """Abstract Base class defining the interface for any measurement. 14 | 15 | The methods below are abstract and need to be implemented by any child. 16 | 17 | Methods 18 | ------- 19 | __call__(runs) 20 | Abstract! Evaluate a list of runs. 21 | result() 22 | Abstract! Return the result of the evaluation. 23 | """ 24 | 25 | @abstractmethod 26 | def __call__(self, runs): 27 | """Evaluate a list of runs. 28 | 29 | Parameters 30 | ---------- 31 | runs : List of BenchRun instances 32 | May be any subset of BenchRun instances passed in a list 33 | """ 34 | pass 35 | 36 | @property 37 | @abstractmethod 38 | def result(self): 39 | """Return the result of evaluation.""" 40 | pass 41 | 42 | 43 | class BestPerformance(Measure): 44 | """Find the best performance achieved within runs.""" 45 | 46 | def __call__(self, runs): 47 | """Sort content of runs by performance. 48 | 49 | This class creates a tuple with a BenchRun and its respective best 50 | performance and then stores in a descending sorted list. 51 | The results are accessible through the result method. 52 | 53 | Parameters 54 | ---------- 55 | runs : List of BenchRun instances 56 | May be any subset of BenchRun instances in a list. 57 | """ 58 | # create a list of tuples with the max reward for each run 59 | runs_tup = [] 60 | for run in runs: 61 | monitor = run.get_alg_monitor() 62 | max_reward = max(monitor.rewards) 63 | runs_tup.append((run, max_reward)) 64 | 65 | # sort list 66 | self._result = sorted(runs_tup, key=itemgetter(1), reverse=True) 67 | 68 | @property 69 | def result(self): 70 | """Retrieve result.""" 71 | if not hasattr(self, '_result'): 72 | self._result = None 73 | return self._result 74 | 75 | @property 76 | def best_result(self): 77 | """Retrieve the best run.""" 78 | if self.result is not None: 79 | return self.result[0] 80 | return None 81 | 82 | 83 | class SafetyMeasure(Measure): 84 | """Detect Safety violations. 85 | 86 | The measure evaluates to a list of 3-tuples, where the first 87 | element contains the instance that was evaluated, the second one 88 | the number of violations that occured and the third the sum of 89 | those violations, i.e. the sum of the difference between the 90 | effective reward and the threshold, for every violation. 91 | 92 | Attributes 93 | ---------- 94 | threshold : float or integer 95 | Reward threshold to detect violations. 96 | """ 97 | 98 | def __init__(self, threshold): 99 | """Initialize SafetyMeasure. 100 | 101 | Parameters 102 | ---------- 103 | threshold : float or integer 104 | Reward threshold to detect violations. 105 | """ 106 | self.threshold = threshold 107 | 108 | def __call__(self, runs): 109 | """Evaluate Safety violations. 110 | 111 | Parameters 112 | ---------- 113 | runs : List of BenchRun instances 114 | May be any subset of BenchRun instances in a list. 115 | """ 116 | self._result = [] 117 | 118 | for run in runs: 119 | num_violations = 0 120 | sum_violations = 0 121 | for reward in run.get_alg_monitor().rewards: 122 | if reward < self.threshold: 123 | num_violations += 1 124 | sum_violations += self.threshold - reward 125 | self._result.append((run, num_violations, sum_violations)) 126 | 127 | @property 128 | def result(self): 129 | """Retrieve result. 130 | 131 | If a run has been evaluated, this function will retrieve the 132 | result otherwise it will return ``None``. 133 | The function evaluates to a list of 3-tuples, where the first 134 | element contains the instance that was evaluated, the second one 135 | the number of violations that occured and the third the sum of 136 | those violations, i.e. the sum of the difference between the 137 | effective reward and the threshold, for every violation. 138 | 139 | Returns 140 | ------- 141 | (run, count, amount) : 3 tuple 142 | run: Object that has been evaluated 143 | count: Number of rollouts where the safety threshold was violated. 144 | amount: Sum of difference between violations and threshold. 145 | """ 146 | if not hasattr(self, '_result'): 147 | self._result = None 148 | return self._result 149 | -------------------------------------------------------------------------------- /SafeRLBench/envs/_quadrocopter/quaternions.py: -------------------------------------------------------------------------------- 1 | """Some common functions for manipulating quaternions. 2 | 3 | VERSION HISTORY 4 | Aug 14, 2014 - initialy created (Felix Berkenkamp) 5 | """ 6 | from __future__ import print_function, division, absolute_import 7 | 8 | import math 9 | import numpy as np 10 | 11 | from .transformations import (vector_norm, 12 | quaternion_multiply, 13 | quaternion_conjugate, 14 | quaternion_matrix, 15 | quaternion_about_axis) 16 | 17 | __all__ = ['omega_from_quat_quat', 'apply_omega_to_quat', 'global_to_body', 18 | 'body_to_global'] 19 | 20 | 21 | def omega_from_quat_quat(q1, q2, dt): 22 | """ 23 | Convert two quaternions and the time difference to angular velocity. 24 | 25 | Parameters: 26 | ----------- 27 | q1: quaternion 28 | The old quaternion 29 | q2: quaternion 30 | The new quaternion 31 | dt: float 32 | The time difference 33 | 34 | Returns: 35 | -------- 36 | omega_g: ndarray 37 | The angular velocity in global coordinates 38 | """ 39 | if vector_norm(q1 - q2) < 1e-8: 40 | # linearly interpolate 41 | # the quaternion does not stay on unit sphere -> only for very small 42 | # rotations! 43 | 44 | # dq/dt 45 | dq = (q2 - q1) / dt 46 | 47 | # From Diebel: Representing Atitude, 6.6, quaternions are defined 48 | # differently there: [w, x, y, z] instead of [x, y, z, w]! 49 | omega = np.array([0.0, 0.0, 0.0], dtype=np.float64) 50 | 51 | # Equivalent, but slower 52 | # w = np.array([[q2[3], -q2[2], q2[1], -q2[0]], 53 | # [q2[2], q2[3], -q2[0], -q2[1]], 54 | # [-q2[1], q2[0], q2[3], -q2[2]]], dtype=np.float64) 55 | # 56 | # omega = 2 * w.dot(dq) 57 | 58 | omega[0] = 2.0 * ( 59 | q2[3] * dq[0] - q2[2] * dq[1] + q2[1] * dq[2] - q2[0] * dq[3]) 60 | omega[1] = 2.0 * ( 61 | q2[2] * dq[0] + q2[3] * dq[1] - q2[0] * dq[2] - q2[1] * dq[3]) 62 | omega[2] = 2.0 * ( 63 | -q2[1] * dq[0] + q2[0] * dq[1] + q2[3] * dq[2] - q2[2] * dq[3]) 64 | 65 | return omega 66 | else: 67 | # This function becomes numerically unstable for q1-q2 --> 0 68 | 69 | # Find rotation from q1 to q2 70 | # unit quaternion -> conjugate is the same as inverse 71 | # q2 = r * q1 --> r = q2 * inv(q1) 72 | r = quaternion_multiply(q2, quaternion_conjugate(q1)) 73 | 74 | # Angle of rotation 75 | angle = 2.0 * math.acos(r[3]) 76 | 77 | # acos gives value in [0,pi], ensure that we take the short path 78 | # (e.g. rotate by -pi/2 rather than 3pi/2) 79 | if angle > math.pi: 80 | angle -= 2.0 * math.pi 81 | 82 | # angular velocity = angle / dt 83 | # axis of rotation corresponds to r[:3] 84 | return angle / dt * r[:3] / vector_norm(r[:3]) 85 | 86 | 87 | def apply_omega_to_quat(q, omega, dt): 88 | """ 89 | Convert a quaternion q and apply the angular velocity omega to it over dt. 90 | 91 | Parameters: 92 | ----------- 93 | q: quaternion 94 | omega: ndarray 95 | angular velocity 96 | dt: float 97 | time difference 98 | 99 | Returns: 100 | -------- 101 | quaternion 102 | The quaternion of the orientation after rotation with omega for dt 103 | seconds. 104 | """ 105 | # rotation angle around each axis 106 | w = omega * dt 107 | 108 | # only rotate if the angle we rotate through is actually significant 109 | if vector_norm(w) < np.finfo(float).eps * 4.0: 110 | return q 111 | 112 | # quaternion corresponding to this rotation 113 | # w = 0 is not a problem because numpy is awesome 114 | r = quaternion_about_axis(vector_norm(w), w) 115 | 116 | # return the rotated quaternion closest to original 117 | return quaternion_multiply(r, q) 118 | 119 | 120 | def global_to_body(q, vec): 121 | """ 122 | Convert a vector from global to body coordinates. 123 | 124 | Parameters: 125 | ----------- 126 | q: quaternion 127 | The rotation quaternion 128 | vec: ndarray 129 | The vector in global coordinates 130 | 131 | Returns: 132 | vec: ndarray 133 | The vector in body coordinates 134 | """ 135 | # quaternion_matrix(q)[:3,:3] is a homogenous rotation matrix that 136 | # rotates a vector by q 137 | # quaternion_matrix(q)[:3,:3] is rot. matrix from body to global frame 138 | # its transpose is the trafo matrix from global to body 139 | # that matrix is multiplied by omega 140 | return np.dot(quaternion_matrix(q)[:3, :3].transpose(), vec) 141 | 142 | 143 | def body_to_global(q, vec): 144 | """Convert a vector from global to body coordinates. 145 | 146 | Parameters: 147 | ----------- 148 | q: quaternion 149 | The rotation quaternion 150 | vec: ndarray 151 | The vector in body coordinates 152 | 153 | Returns: 154 | vec: ndarray 155 | The vector in global coordinates 156 | """ 157 | # quaternion_matrix(q)[:3,:3] is a homogenous rotation matrix that 158 | # rotates a vector by q 159 | # quaternion_matrix(q)[:3,:3] is the matrix from body to global frame 160 | # that matrix is multiplied by omega 161 | return np.dot(quaternion_matrix(q)[:3, :3], vec) 162 | -------------------------------------------------------------------------------- /SafeRLBench/test/test_bench.py: -------------------------------------------------------------------------------- 1 | from SafeRLBench import Bench, BenchConfig 2 | from SafeRLBench.bench import BenchRun 3 | from SafeRLBench.algo import PolicyGradient 4 | from SafeRLBench.envs import LinearCar 5 | 6 | 7 | from mock import Mock, MagicMock, patch 8 | from unittest2 import TestCase 9 | 10 | import logging 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class TestBench(TestCase): 16 | """Bench tests.""" 17 | 18 | def test_bench_init(self): 19 | """Test: BENCH: initialization.""" 20 | bench = Bench() 21 | 22 | self.assertIsInstance(bench.config, BenchConfig) 23 | self.assertIsInstance(bench.runs, list) 24 | 25 | bench = Bench(BenchConfig()) 26 | 27 | self.assertIsInstance(bench.config, BenchConfig) 28 | self.assertIsInstance(bench.runs, list) 29 | 30 | @patch('SafeRLBench.bench.BenchRun') 31 | def test_bench_benchmark(self, bench_run_mock): 32 | """Test: BENCH: benchmark invokation.""" 33 | # setup mocks 34 | bench_run_obj_mock = Mock() 35 | bench_conf_mock = MagicMock(spec=BenchConfig) 36 | 37 | def create_run_obj_mock(a, b, c, d): 38 | return bench_run_obj_mock 39 | 40 | bench_run_mock.side_effect = create_run_obj_mock 41 | bench_conf_mock.__iter__.return_value = [(Mock(), Mock(), {}, {})] 42 | 43 | bench = Bench(bench_conf_mock) 44 | bench() 45 | 46 | bench_run_obj_mock.alg.optimize.assert_called_once_with() 47 | 48 | 49 | class TestBenchConfig(TestCase): 50 | """BenchConfig tests.""" 51 | 52 | # setup test configuration 53 | alg_config = [[ 54 | (PolicyGradient, [{}]), 55 | (PolicyGradient, {}) 56 | ], [ 57 | (PolicyGradient, {}) 58 | ]] 59 | 60 | env_config = [ 61 | (LinearCar, {'horizon': 100}), 62 | (LinearCar, {'horizon': 200}) 63 | ] 64 | 65 | alg_config_add = [ 66 | (PolicyGradient, [{}, {}]), 67 | ] 68 | 69 | env_config_add = [ 70 | (LinearCar, {'horizon': 100}), 71 | (LinearCar, {'horizon': 200}) 72 | ] 73 | 74 | @staticmethod 75 | def _check_structure(lst): 76 | # loop through entire structure checking types. 77 | assert(isinstance(lst, list)) 78 | for lst_elem in lst: 79 | assert(isinstance(lst_elem, list)) 80 | for tup_elem in lst_elem: 81 | assert(isinstance(tup_elem, tuple)) 82 | assert (tup_elem[0] is PolicyGradient 83 | or tup_elem[0] is LinearCar) 84 | assert(isinstance(tup_elem[1], list)) 85 | for dict_elem in tup_elem[1]: 86 | assert(isinstance(dict_elem, dict)) 87 | 88 | def test_benchconfig_init(self): 89 | """Test: BENCHCONFIG: initialization structure.""" 90 | # apply test configuration 91 | config = BenchConfig(self.alg_config, self.env_config) 92 | 93 | # verify structure 94 | self._check_structure(config.algs) 95 | self._check_structure(config.envs) 96 | 97 | def test_benchconfig_add_tests(self): 98 | """Test: BENCHCONFIG: add_tests.""" 99 | # setup test configuration 100 | config = BenchConfig() 101 | 102 | # apply test configuration 103 | config.add_tests(self.alg_config_add, self.env_config_add) 104 | 105 | # verify structure 106 | self._check_structure(config.algs) 107 | self._check_structure(config.envs) 108 | 109 | def test_benchconfig_exceptions(self): 110 | """Test: BENCHCONFIG: exceptions.""" 111 | # setup bad test configurations 112 | alg_bad_tuple = [PolicyGradient, {}] 113 | env_bad_tuple = (LinearCar, {}) 114 | bad_tuple = [alg_bad_tuple, env_bad_tuple] 115 | 116 | alg_bad_alg = [(Mock(), {})] 117 | env_bad_alg = [(LinearCar, {})] 118 | bad_alg = [alg_bad_alg, env_bad_alg] 119 | 120 | alg_bad_env = [(PolicyGradient, {})] 121 | env_bad_env = [(Mock, {})] 122 | bad_env = [alg_bad_env, env_bad_env] 123 | 124 | alg_bad_len = [(PolicyGradient, {})] 125 | env_bad_len = [] 126 | bad_len = [alg_bad_len, env_bad_len] 127 | 128 | tests = [bad_tuple, bad_alg, bad_env, bad_len] 129 | 130 | # apply tests 131 | for test in tests: 132 | with self.subTest(test=test): 133 | self.assertRaises(ValueError, BenchConfig, *test) 134 | 135 | def test_benchconfig_iterator(self): 136 | """Test: BENCHCONFIG: Iterator.""" 137 | conf = BenchConfig(self.alg_config, self.env_config) 138 | 139 | for alg, env, alg_conf, env_conf in conf: 140 | assert alg is PolicyGradient 141 | assert env is LinearCar 142 | self.assertIsInstance(alg_conf, dict) 143 | self.assertIsInstance(env_conf, dict) 144 | 145 | 146 | class TestBenchRun(TestCase): 147 | """Test BenchRun class.""" 148 | 149 | def test_benchrun_init(self): 150 | """Test: BENCHRUN: initialization.""" 151 | args = [MagicMock() for i in range(4)] 152 | attr = ['alg', 'env', 'alg_conf', 'env_conf'] 153 | 154 | run = BenchRun(*args) 155 | 156 | for a, m in zip(attr, args): 157 | assert getattr(run, a) is m 158 | 159 | def test_benchrun_get_monitor(self): 160 | """Test: BENCHRUN: monitor getters.""" 161 | env = LinearCar() 162 | alg = PolicyGradient(env, Mock()) 163 | 164 | run = BenchRun(alg, env, None, None) 165 | 166 | alg_monitor = run.get_alg_monitor() 167 | self.assertEqual(alg_monitor, alg.monitor) 168 | 169 | env_monitor = run.get_env_monitor() 170 | self.assertEqual(env_monitor, env.monitor) 171 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SafeRLBench documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Mar 27 16:08:01 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.abspath('..')) 23 | 24 | import sphinx_rtd_theme 25 | 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # 31 | # needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | 'sphinx.ext.autodoc', 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | 'sphinx.ext.napoleon', 41 | 'sphinx.ext.autosummary', 42 | 'sphinx.ext.doctest', 43 | ] 44 | 45 | numpydoc_class_members_toctree = False 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # General information about the project. 60 | project = 'SafeRLBench' 61 | copyright = '2017, Nicolas Ochsner' 62 | author = 'Nicolas Ochsner' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = '0.1' 70 | # The full version, including alpha/beta/rc tags. 71 | release = '0.1.0' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | # This patterns also effect to html_static_path and html_extra_path 83 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | # If true, `todo` and `todoList` produce output, else they produce nothing. 89 | todo_include_todos = False 90 | 91 | 92 | # -- Options for HTML output ---------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | # 97 | html_theme = "sphinx_rtd_theme" 98 | 99 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 100 | 101 | # Theme options are theme-specific and customize the look and feel of a theme 102 | # further. For a list of options available for each theme, see the 103 | # documentation. 104 | # 105 | # html_theme_options = {} 106 | 107 | # Add any paths that contain custom static files (such as style sheets) here, 108 | # relative to this directory. They are copied after the builtin static files, 109 | # so a file named "default.css" will overwrite the builtin "default.css". 110 | html_static_path = ['_static'] 111 | 112 | 113 | # -- Options for HTMLHelp output ------------------------------------------ 114 | 115 | # Output file base name for HTML help builder. 116 | htmlhelp_basename = 'SafeRLBenchdoc' 117 | 118 | 119 | # -- Options for LaTeX output --------------------------------------------- 120 | 121 | latex_elements = { 122 | # The paper size ('letterpaper' or 'a4paper'). 123 | # 124 | # 'papersize': 'letterpaper', 125 | 126 | # The font size ('10pt', '11pt' or '12pt'). 127 | # 128 | # 'pointsize': '10pt', 129 | 130 | # Additional stuff for the LaTeX preamble. 131 | # 132 | # 'preamble': '', 133 | 134 | # Latex figure (float) alignment 135 | # 136 | # 'figure_align': 'htbp', 137 | } 138 | 139 | # Grouping the document tree into LaTeX files. List of tuples 140 | # (source start file, target name, title, 141 | # author, documentclass [howto, manual, or own class]). 142 | latex_documents = [ 143 | (master_doc, 'SafeRLBench.tex', 'SafeRLBench Documentation', 144 | 'Nicolas Ochsner', 'howto'), 145 | ] 146 | 147 | 148 | # -- Options for manual page output --------------------------------------- 149 | 150 | # One entry per manual page. List of tuples 151 | # (source start file, name, description, authors, manual section). 152 | man_pages = [ 153 | (master_doc, 'saferlbench', 'SafeRLBench Documentation', 154 | [author], 1) 155 | ] 156 | 157 | 158 | # -- Options for Texinfo output ------------------------------------------- 159 | 160 | # Grouping the document tree into Texinfo files. List of tuples 161 | # (source start file, target name, title, author, 162 | # dir menu entry, description, category) 163 | texinfo_documents = [ 164 | (master_doc, 'SafeRLBench', 'SafeRLBench Documentation', 165 | author, 'SafeRLBench', 'One line description of project.', 166 | 'Miscellaneous'), 167 | ] 168 | -------------------------------------------------------------------------------- /SafeRLBench/policy/test.py: -------------------------------------------------------------------------------- 1 | """Policy tests.""" 2 | from __future__ import division, print_function, absolute_import 3 | 4 | from SafeRLBench.spaces import BoundedSpace 5 | from SafeRLBench.envs.quadrocopter import Reference 6 | from SafeRLBench.envs._quadrocopter import StateVector 7 | from SafeRLBench.policy import (NeuralNetwork, 8 | LinearPolicy, 9 | DiscreteLinearPolicy, 10 | NonLinearQuadrocopterController) 11 | 12 | import numpy as np 13 | from numpy import isclose 14 | 15 | import tensorflow as tf 16 | 17 | from unittest2 import TestCase 18 | from mock import Mock 19 | 20 | import logging 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class TestNeuralNetwork(TestCase): 26 | """Test the Neural Netork Policy.""" 27 | 28 | fields = ['args', 'kwargs', 'action_space', 'state_space', 'dtype', 29 | 'layers', 'scope', 'init_weights', 'activation', 'X', 'a', 30 | 'W_action', 'W_var', 'a_pred', 'var', 'h', 'is_set_up'] 31 | 32 | def test_initialization(self): 33 | """Test: NEURALNETWORK: initialization.""" 34 | # test bad layer size: 35 | args = [[2]] 36 | with self.assertRaises(ValueError): 37 | NeuralNetwork(*args) 38 | 39 | # test field existence 40 | args = [[2, 6, 1]] 41 | 42 | nn = NeuralNetwork(*args) 43 | 44 | for field in self.fields: 45 | assert hasattr(nn, field) 46 | 47 | # test network setup 48 | kwargs = { 49 | 'do_setup': True 50 | } 51 | 52 | nn = NeuralNetwork(*args, **kwargs) 53 | 54 | # check field contents. 55 | assert(all([a == b for a, b in zip(args, nn.args)])) 56 | self.assertEqual(nn.layers, args[0]) 57 | self.assertEqual(nn.dtype, 'float') 58 | 59 | self.assertEqual(len(nn.W_action), 2) 60 | self.assertEqual(len(nn.W_var), 1) 61 | 62 | # well... because is does not work for whatever fucking reason. 63 | self.assertEqual(str(type(nn.a_pred)), str(tf.Tensor)) 64 | self.assertIn(str(type(nn.var)), (str(tf.Tensor), str(tf.constant))) 65 | 66 | self.assertEqual(len(nn.h), 2) 67 | 68 | def test_mapping(self): 69 | """Test: NEURALNETWORK: mapping.""" 70 | args = [[2, 1]] 71 | 72 | kwargs = { 73 | 'weights': [tf.constant([2., 1.], shape=(2, 1))], 74 | 'do_setup': True, 75 | } 76 | 77 | nn = NeuralNetwork(*args, **kwargs) 78 | 79 | sess = tf.Session() 80 | 81 | with sess.as_default(): 82 | self.assertEqual(nn(np.array([2., 1.])), [5.]) 83 | 84 | def test_variable_assignment(self): 85 | """Test: NEURALNETWORK: parameter assignment.""" 86 | args = [[2, 1]] 87 | kwargs = {'do_setup': True} 88 | 89 | nn = NeuralNetwork(*args, **kwargs) 90 | 91 | with tf.Session().as_default(): 92 | nn.parameters = nn.W_action[0].assign([[2.], [1.]]) 93 | assert((np.array([[2.], [1.]]) == nn.parameters).all()) 94 | self.assertEqual(nn(np.array([2., 1.])), [5.]) 95 | 96 | def test_copy(self): 97 | """Test: NEURALNETWORK: copy.""" 98 | nn = NeuralNetwork([2, 6, 1]) 99 | nn_copy = nn.copy(scope='copy', do_setup=False) 100 | 101 | exclude = ('scope', 'kwargs') 102 | 103 | for field in self.fields: 104 | if field not in exclude and field in nn.kwargs.keys(): 105 | print(field) 106 | self.assertEquals(getattr(nn, field, None), 107 | getattr(nn_copy, field, None)) 108 | 109 | 110 | class TestLinearPolicy(TestCase): 111 | """Test the Linear Policy.""" 112 | 113 | def test_initialization(self): 114 | """Test: LINEARPOLICY: initialization.""" 115 | lp = LinearPolicy(2, 1) 116 | 117 | self.assertEqual(lp.d_state, 2) 118 | self.assertEqual(lp.d_action, 1) 119 | 120 | self.assertEqual(lp.par_dim, 2) 121 | self.assertIs(lp._par_space, None) 122 | 123 | self.assertFalse(lp.initialized) 124 | 125 | self.assertIs(lp._parameters, None) 126 | self.assertTrue(lp.biased) 127 | self.assertEqual(lp._bias, 0) 128 | self.assertIs(lp._par, None) 129 | 130 | par_mock = Mock() 131 | par_space_mock = Mock() 132 | 133 | with self.assertRaises(ValueError): 134 | lp_mocked = LinearPolicy(2, 1, par_mock, par_space_mock) 135 | 136 | par_mock = [2, 1] 137 | 138 | lp_mocked = LinearPolicy(2, 1, par_mock, par_space_mock) 139 | 140 | self.assertTrue(lp_mocked.initialized) 141 | assert(all(par_mock == lp_mocked.parameters)) 142 | 143 | self.assertEqual(par_space_mock, lp_mocked.parameter_space) 144 | 145 | def test_discrete_map(self): 146 | """Test: DISCRETELINEARPOLICY: map.""" 147 | dp = DiscreteLinearPolicy(2, 1, biased=False) 148 | dp.parameters = np.array([1, 1]) 149 | self.assertEqual(dp([1, 1]), 1) 150 | self.assertEqual(dp([-1, -1]), 0) 151 | 152 | dp2 = DiscreteLinearPolicy(2, 2, biased=False) 153 | dp2.parameters = np.array([1, 1, -1, -1]) 154 | assert(all(dp2([1, 1]) == [1, 0])) 155 | assert(all(dp2([-1, -1]) == [0, 1])) 156 | 157 | 158 | class TestController(TestCase): 159 | """Test NonLinearQuadrocopterController.""" 160 | 161 | def test_controller_init(self): 162 | """Test: CONTROLLER: initialization.""" 163 | ctrl = NonLinearQuadrocopterController() 164 | 165 | self.assertEquals(ctrl._zeta_z, .7) 166 | assert(all(isclose(ctrl._params, [.7, .7, .7, .5, .707]))) 167 | self.assertIsNone(ctrl.reference) 168 | self.assertTrue(ctrl.initialized) 169 | self.assertIsInstance(ctrl._par_space, BoundedSpace) 170 | 171 | def test_controller_map(self): 172 | """Test: CONTROLLER: mapping.""" 173 | ref = Reference('circle', 1 / 70.) 174 | ref.reset(StateVector()) 175 | ctrl = NonLinearQuadrocopterController(reference=ref) 176 | 177 | action = ctrl(StateVector()) 178 | 179 | print(action) 180 | assert all(isclose(action, [0.20510876, -0.30667618, 0., -6.28318548])) 181 | 182 | def test_controller_properties(self): 183 | """Test: CONTROLLER: properties.""" 184 | ctrl = NonLinearQuadrocopterController() 185 | 186 | ctrl.parameters = [0., 1., 0., 1., 0.] 187 | assert(all(np.isclose(ctrl.parameters, [0., 1., 0., 1., 0.]))) 188 | 189 | self.assertEquals(ctrl.parameter_space, ctrl._par_space) 190 | -------------------------------------------------------------------------------- /SafeRLBench/envs/_quadrocopter/quadrocopter_classes.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | """ 4 | quadrocopter_classes.py 5 | 6 | Written By: Adrian Esser and David Wu 7 | 8 | Changes: 9 | - Aug 2015 - Vectorized quadrotor state, moved state conversions here 10 | 11 | This file contains all classes for the quadrocopter simulation! 12 | 13 | This class defines an object for the state of the drone. 14 | The state contains the position, velocity, and acceleration information, 15 | the rotation matrix (which implies pitch, roll, and yaw), and the angular 16 | velocity information. 17 | """ 18 | 19 | from __future__ import print_function, division, absolute_import 20 | 21 | import numpy as np 22 | 23 | from .transformations import (quaternion_from_euler, euler_from_matrix, 24 | euler_matrix, euler_from_quaternion) 25 | 26 | 27 | __all__ = ['State', 'Parameters', 'StateVector'] 28 | 29 | 30 | class StateVector(np.ndarray): 31 | 32 | def __new__(cls, input=None): 33 | obj = np.zeros(22).view(cls) 34 | obj[-1] = 1 35 | 36 | if input is not None: 37 | obj[:len(input)] = input 38 | 39 | return obj 40 | 41 | @property 42 | def pos(self): 43 | return self[0:3] 44 | 45 | @pos.setter 46 | def pos(self, pos): 47 | self[0:3] = pos 48 | 49 | @property 50 | def vel(self): 51 | return self[3:6] 52 | 53 | @vel.setter 54 | def vel(self, vel): 55 | self[3:6] = vel 56 | 57 | @property 58 | def acc(self): 59 | return self[6:9] 60 | 61 | @acc.setter 62 | def acc(self, acc): 63 | self[6:9] = acc 64 | 65 | @property 66 | def euler(self): 67 | return self[9:12] 68 | 69 | @euler.setter 70 | def euler(self, euler): 71 | self[9:12] = euler 72 | 73 | @property 74 | def omega_g(self): 75 | return self[12:15] 76 | 77 | @omega_g.setter 78 | def omega_g(self, omega_g): 79 | self[12:15] = omega_g 80 | 81 | @property 82 | def omega_b(self): 83 | return self[15:18] 84 | 85 | @omega_b.setter 86 | def omega_b(self, omega_b): 87 | self[15:18] = omega_b 88 | 89 | @property 90 | def quat(self): 91 | return self[18:22] 92 | 93 | @quat.setter 94 | def quat(self, quat): 95 | self[18:22] = quat 96 | 97 | 98 | class State: 99 | 100 | def __init__(self): 101 | 102 | self.R = np.eye(3) 103 | self.pos = np.zeros(3) 104 | self.vel = np.zeros(3) 105 | self.acc = np.zeros(3) 106 | self.omega = np.zeros(3) 107 | 108 | @property 109 | def quaternion(self): 110 | """Rotation quaternion corresponding to R.""" 111 | return quaternion_from_euler(*self.rpy) 112 | 113 | @property 114 | def rpy(self): 115 | """Roll, pitch, yaw corresponding to R.""" 116 | return np.array(euler_from_matrix(self.R)) 117 | 118 | @property 119 | def state_vector(self): 120 | """Return the state as a StateVector.""" 121 | state = StateVector() 122 | state.pos = self.pos 123 | state.vel = self.vel 124 | state.acc = self.acc 125 | state.quat = self.quaternion 126 | state.euler = self.rpy 127 | state.omega_b = self.omega 128 | state.omega_g = self.R.dot(self.omega) 129 | return state 130 | 131 | @state_vector.setter 132 | def state_vector(self, state): 133 | self.pos = state.pos 134 | self.vel = state.vel 135 | self.acc = state.acc 136 | self.omega = state.omega_b 137 | self.R = self.rpy_to_R(euler_from_quaternion(state.quat)) 138 | 139 | def rpy_to_R(self, rpy): 140 | return euler_matrix(*rpy)[:3, :3] 141 | 142 | 143 | class Parameters: 144 | """Parameters for quadrotor the define the physics.""" 145 | 146 | def __init__(self): 147 | 148 | # m, mass of vehicle (kg) 149 | self.m = 1.477 150 | # g, mass normalized gravitational force (m/s^2) 151 | self.g = 9.8 152 | # L, vehicle arm length (m) 153 | self.L = 0.18 154 | # K, motor constant, determined experimentally 155 | self.K = 0.26 156 | # Ix, inertia around the body's x-axis (kg-m^2) 157 | self.Ix = 0.01152 158 | # Iy, inertia around the body's y-axis (kg-m^2) 159 | self.Iy = 0.01152 160 | # Iz, inertia around the body's z-axis (kg-m^2) 161 | self.Iz = 0.0218 162 | # fmin, mass normalized minimum rotor force (m/s^2) 163 | self.fmin = 0.17 164 | # fmax, mass normalized maximum rotor force (m/s^2) 165 | self.fmax = 6.0 166 | # vmax, maximum quadrotor velocity (m/s) 167 | self.vmax = 2.0 168 | # eta, damping ratio 169 | self.eta = 0.707 170 | # tau_z, time constant for vertical direction 171 | self.tau_z = 1.0 172 | # tau_Iz, integral time constant for vertical direction 173 | self.tau_Iz = 0.05 174 | # tau_yaw, time constant for yaw rate 175 | self.tau_yaw = 0.55 176 | # tau_Iyaw, integral time constant for yaw rate 177 | self.tau_Iyaw = 0.01 178 | # eta_y, damping ratio 179 | self.eta_y = 0.707 180 | # tau_y, time constant for x and y direction 181 | self.tau_y = 1.7 182 | # tau_Iu, integral time constant for x and y dir. 183 | self.tau_Iu = 2.5 184 | # tau_p, time constant for roll rate 185 | self.tau_p = 0.18 186 | # tau_q, time constant for pitch rate 187 | self.tau_q = 0.18 188 | # tau_r, time constant for yaw rate 189 | self.tau_r = 0.1 190 | # tau_rp, time constant 191 | self.tau_rp = 0.18 192 | # tau_f, time constant for force integration 193 | self.tau_f = 0.1 194 | 195 | # Air drag factor in body x direction [dimensionless] 196 | self.CD_bx = 0.55 197 | # Air drag factor in body y direction [dimensionless] 198 | self.CD_by = 1.25 199 | # Air drag factor in body z direction [dimensionless] 200 | self.CD_bz = 0.3 201 | 202 | # Air drag factor in body x direction [-] 203 | self.CD_bx = 0.35 204 | # Air drag factor in body y direction [-] 205 | self.CD_by = 1.25 206 | # Air drag factor in body z direction [-] 207 | self.CD_bz = 0.3 208 | 209 | # Delay in the signal being sent from quad to computer (us) 210 | self.incoming_delay = 0.0 211 | # Delay in signal being sent from computer to quad (us) 212 | self.outgoing_delay = 100000.0 213 | # Update rate of inner loop (us) 214 | self.inner_loop_cycle = 8000.0 215 | # Update rate of outer loop (us) 216 | self.outer_loop_cycle = 15000.0 217 | 218 | # Takeoff height (m) 219 | self.takeoff_height = 1.0 220 | # Takeoff speed (m/s) 221 | self.takeoff_speed = 0.25 222 | -------------------------------------------------------------------------------- /examples/GettingStarted.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Getting Started\n", 11 | "\n", 12 | "This is an Notebook containing the examples from the **Getting Started** section in the documentation. Refer to the documentation for very verbose description of this code." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "deletable": true, 19 | "editable": true 20 | }, 21 | "source": [ 22 | "### Optimizing a Policy" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true, 30 | "deletable": true, 31 | "editable": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# import the classes we need\n", 36 | "from SafeRLBench.envs import LinearCar\n", 37 | "from SafeRLBench.policy import LinearPolicy\n", 38 | "from SafeRLBench.algo import PolicyGradient" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false, 46 | "deletable": true, 47 | "editable": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# get an instance of `LinearCar` with the default arguments.\n", 52 | "linear_car = LinearCar()\n", 53 | "# we need a policy which maps R^2 to R\n", 54 | "policy = LinearPolicy(2, 1)\n", 55 | "# setup parameters\n", 56 | "policy.parameters = [-1, -1, 1]\n", 57 | "\n", 58 | "# plug the environment and policy into the algorithm\n", 59 | "optimizer = PolicyGradient(linear_car, policy, estimator='central_fd')\n", 60 | "\n", 61 | "# run optimization\n", 62 | "optimizer.optimize()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "deletable": true, 69 | "editable": true 70 | }, 71 | "source": [ 72 | "Lets take a look at what happened during the run. For this we can access the monitor and generate some plots. " 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false, 80 | "deletable": true, 81 | "editable": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import matplotlib.pyplot as plt\n", 86 | "\n", 87 | "y = optimizer.monitor.rewards\n", 88 | "\n", 89 | "plt.plot(range(len(y)), y)\n", 90 | "plt.show()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "deletable": true, 97 | "editable": true 98 | }, 99 | "source": [ 100 | "### Configuration" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true, 108 | "deletable": true, 109 | "editable": true 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "# import the configuration object\n", 114 | "from SafeRLBench import config" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true, 122 | "deletable": true, 123 | "editable": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# setup stream handler\n", 128 | "config.logger_add_stream_handler()\n", 129 | "# setup logger level\n", 130 | "config.logger_set_level(config.DEBUG)\n", 131 | "# raise monitor verbosity\n", 132 | "config.monitor_set_verbosity(2)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "deletable": true, 139 | "editable": true 140 | }, 141 | "source": [ 142 | "After changing these values, please run the cell which invokes `optimizer.optimize` again to see what happens." 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": { 148 | "collapsed": true, 149 | "deletable": true, 150 | "editable": true 151 | }, 152 | "source": [ 153 | "### Benchmark" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false, 161 | "deletable": true, 162 | "editable": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# import the best performance measure\n", 167 | "from SafeRLBench.measure import BestPerformance\n", 168 | "# import the Bench and BenchConfig\n", 169 | "from SafeRLBench import Bench, BenchConfig" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": true, 177 | "deletable": true, 178 | "editable": true 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "# define environment configuration.\n", 183 | "envs = [[(LinearCar, {'horizon': 100})]]\n", 184 | "# define algorithms configuration.\n", 185 | "algs = [[\n", 186 | " (PolicyGradient, [{\n", 187 | " 'policy': LinearPolicy(2, 1, par=[-1, -1, 1]),\n", 188 | " 'estimator': 'central_fd',\n", 189 | " 'var': var\n", 190 | " } for var in [1, 1.5, 2, 2.5]])\n", 191 | "]]\n", 192 | "\n", 193 | "# instantiate BenchConfig\n", 194 | "config = BenchConfig(algs, envs)\n", 195 | "\n", 196 | "# instantiate the bench\n", 197 | "bench = Bench(config, BestPerformance())\n", 198 | "\n", 199 | "# configure to run in parallel\n", 200 | "config.jobs_set(4)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false, 208 | "deletable": true, 209 | "editable": true 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "bench()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "collapsed": false, 221 | "deletable": true, 222 | "editable": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "bench.measures[0]" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": false, 234 | "deletable": true, 235 | "editable": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "best_run = bench.measures[0].result[0][0]\n", 240 | "monitor = best_run.get_alg_monitor()\n", 241 | "best_trace = monitor.traces[monitor.rewards.index(max(monitor.rewards))]\n", 242 | "y = [t[1][0] for t in best_trace]\n", 243 | "x = range(len(y))\n", 244 | "\n", 245 | "import matplotlib.pyplot as plt\n", 246 | "\n", 247 | "plt.plot(x, y)\n", 248 | "plt.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "collapsed": true, 256 | "deletable": true, 257 | "editable": true 258 | }, 259 | "outputs": [], 260 | "source": [] 261 | } 262 | ], 263 | "metadata": { 264 | "kernelspec": { 265 | "display_name": "Python (py36-srb)", 266 | "language": "python", 267 | "name": "py36-srb" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 3 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython3", 279 | "version": "3.6.1" 280 | } 281 | }, 282 | "nbformat": 4, 283 | "nbformat_minor": 2 284 | } 285 | -------------------------------------------------------------------------------- /examples/SafeOpt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Using SafeOpt" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "collapsed": false, 18 | "deletable": true, 19 | "editable": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import GPy, safeopt\n", 24 | "\n", 25 | "from SafeRLBench.algo import SafeOptSwarm\n", 26 | "from SafeRLBench.envs import Quadrocopter, LinearCar\n", 27 | "from SafeRLBench.policy import NonLinearQuadrocopterController, LinearPolicy\n", 28 | "\n", 29 | "from SafeRLBench.measure import BestPerformance, SafetyMeasure\n", 30 | "\n", 31 | "from SafeRLBench import Bench\n", 32 | "\n", 33 | "# set up logging\n", 34 | "from SafeRLBench import config\n", 35 | "\n", 36 | "config.logger_set_level(config.INFO)\n", 37 | "config.logger_add_stream_handler()\n", 38 | "config.monitor_set_verbosity(2)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "#### Linear Car" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": false, 53 | "deletable": true, 54 | "editable": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "noise_var = 0.05 ** 2\n", 59 | "\n", 60 | "bounds = [(-1., 0.), (-1., 0.), (0., 1.)]\n", 61 | "\n", 62 | "algos = [(SafeOptSwarm, [{\n", 63 | " 'policy': LinearPolicy(2, 1, par=[-1, 0, 1]),\n", 64 | " 'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=.4, ARD=True),\n", 65 | " 'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),\n", 66 | " 'max_it': 20,\n", 67 | " 'avg_reward': -20,\n", 68 | " 'window': 3,\n", 69 | " 'fmin': -100,\n", 70 | " 'bounds': bounds, \n", 71 | " 'info': std\n", 72 | "} for std in [30, 35, 40, 45, 50]])]\n", 73 | "\n", 74 | "envs = [(LinearCar, {})]\n", 75 | "\n", 76 | "bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(-100)])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false, 84 | "deletable": true, 85 | "editable": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "bench()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Below we output the results of the safety measure. List comprehension is used to get a more readable format for the\n", 97 | "tuples.\n", 98 | "The first element shows the standard deviation used, the second the number of violations and the last one the sum over\n", 99 | "all violations, just as documented in the `SafetyMeasure` class.\n", 100 | "\n", 101 | "We can see that increasing the standard deviation will ensure that the safty constraints will not be violated." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false, 109 | "deletable": true, 110 | "editable": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "#### Quadrocopter" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false, 129 | "deletable": true, 130 | "editable": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "noise_var = 0.05 ** 2\n", 135 | "\n", 136 | "# Set fixed Gaussian measurement noise\n", 137 | "likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)\n", 138 | "\n", 139 | "# Bounds on the inputs variable\n", 140 | "bounds = [(0., 1.), (0., 1.), (0., 1.), (0., 1.), (0., 1.)]\n", 141 | "\n", 142 | "# Define Kernel\n", 143 | "kernel = GPy.kern.RBF(input_dim=len(bounds), variance=1000.*2, lengthscale=1.0, ARD=True)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": false, 151 | "deletable": true, 152 | "editable": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "noise_var = 0.05 ** 2\n", 157 | "\n", 158 | "fmin = -2400\n", 159 | "\n", 160 | "# Bounds on the inputs variable\n", 161 | "# bounds = [(1e-2, .9), (1e-2, .9), (1e-1, .9), (.2, .7), (1e-2, .9)]\n", 162 | "bounds = [(1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.)]\n", 163 | "\n", 164 | "algos = [(SafeOptSwarm, [{\n", 165 | " 'policy': NonLinearQuadrocopterController(),\n", 166 | " 'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=0.2, ARD=True),\n", 167 | " 'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),\n", 168 | " 'max_it': 20,\n", 169 | " 'avg_reward': -1500,\n", 170 | " 'window': 3,\n", 171 | " 'fmin': fmin,\n", 172 | " 'bounds': bounds,\n", 173 | " 'swarm_size': 1000,\n", 174 | " 'info': std\n", 175 | "} for std in [1000, 1250, 1500, 1750, 2000]])]\n", 176 | "\n", 177 | "envs = [(Quadrocopter, {})]\n", 178 | "\n", 179 | "bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(fmin)])" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false, 187 | "deletable": true, 188 | "editable": true 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "bench()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Below we output the results of the safety measure and performance. List comprehension is used to get a more readable format for the tuples.\n", 200 | "The first element shows the standard deviation used, the second the number of violations and the last one the sum over\n", 201 | "all violations, just as documented in the `SafetyMeasure` class." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false, 209 | "deletable": true, 210 | "editable": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "print([(t[0].alg_conf['info'], int(t[1])) for t in bench.measures[0].result])" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "collapsed": true 233 | }, 234 | "outputs": [], 235 | "source": [] 236 | } 237 | ], 238 | "metadata": { 239 | "kernelspec": { 240 | "display_name": "Python (py36-srb)", 241 | "language": "python", 242 | "name": "py36-srb" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.6.1" 255 | } 256 | }, 257 | "nbformat": 4, 258 | "nbformat_minor": 2 259 | } 260 | -------------------------------------------------------------------------------- /SafeRLBench/policy/linear_policy.py: -------------------------------------------------------------------------------- 1 | """Linear Policy Class.""" 2 | 3 | from SafeRLBench import Policy, ProbPolicy 4 | from SafeRLBench.spaces import BoundedSpace 5 | 6 | import numpy as np 7 | 8 | __all__ = ('LinearPolicy', 'DiscreteLinearPolicy', 'NoisyLinearPolicy') 9 | 10 | 11 | class LinearPolicy(Policy): 12 | """Policy implementing a linear mapping from state to action space. 13 | 14 | Attributes 15 | ---------- 16 | d_state : positive integer 17 | Dimension of the state space. 18 | d_action : positive integer 19 | Dimension of the action space 20 | parameters : nd-array 21 | Array containing initial parameters. 22 | initialized : boolean 23 | Boolean indicating if parameters have been initialized. 24 | biased : boolean 25 | Flag indicating if the policy is supposed to be biased or not. 26 | """ 27 | 28 | def __init__(self, d_state, d_action, 29 | par=None, par_space=None, biased=True): 30 | """Initialize LinearPolicy. 31 | 32 | Parameters 33 | ---------- 34 | d_state : positive integer 35 | Dimension of the state space. 36 | d_action : positive integer 37 | Dimension of the action space 38 | par : ndarray 39 | Array containing initial parameters. If there is a constant bias, 40 | the array needs to be flat with shape (d_state * d_action + 1,). 41 | Otherwise it may either have shape (d_action, d_state) or 42 | (d_state * d_action,) 43 | biased : boolean 44 | Flag indicating if the policy is supposed to be biased or not. 45 | """ 46 | assert(d_state > 0 and d_action > 0) 47 | self.d_state = d_state 48 | self.d_action = d_action 49 | 50 | self.par_dim = d_state * d_action 51 | 52 | self._par_space = None 53 | 54 | self.initialized = False 55 | 56 | if par is not None: 57 | self.parameters = par 58 | else: 59 | # make sure some fields exist. 60 | self._parameters = None 61 | self.biased = biased 62 | self._bias = 0 63 | self._par = None 64 | 65 | if par_space is not None: 66 | self.parameter_space = par_space 67 | 68 | def map(self, state): 69 | """Map a state to an action. 70 | 71 | Parameters 72 | ---------- 73 | state : array-like 74 | Element of state space. 75 | 76 | Returns 77 | ------- 78 | action : ndarray 79 | Element of action space. 80 | """ 81 | if self.d_action == 1: 82 | ret = self._parameters.dot(state).item() + self._bias 83 | else: 84 | ret = self._parameters.dot(state) + self._bias 85 | return ret 86 | 87 | @property 88 | def parameters(self): 89 | """Property to access parameters. 90 | 91 | The property returns the same representation as used when set. 92 | If the mapping contains a bias, then the input needs to be a ndarray 93 | with shape (d_action * d_state + 1,) otherwise it may either be a 94 | (d_action, d_state) or (d_action * d_state,) shaped array 95 | """ 96 | if not self.initialized: 97 | raise NameError('Policy parameters not initialized yet.') 98 | return self._par 99 | 100 | @parameters.setter 101 | def parameters(self, par): 102 | par = np.array(par).copy() 103 | 104 | if not self.initialized: 105 | shape = par.shape 106 | if (shape == (self.d_action, self.d_state) 107 | or shape == (self.par_dim,)): 108 | self.biased = False 109 | self._bias = 0 110 | elif shape == (self.par_dim + 1,): 111 | self.biased = True 112 | else: 113 | raise ValueError("Parameters with shape %s invalid.", 114 | str(shape)) 115 | 116 | self.initialized = True 117 | 118 | # store parameter in original representation. 119 | self._par = par 120 | 121 | if self.d_action == 1: 122 | shape = (self.d_state,) 123 | else: 124 | shape = (self.d_action, self.d_state) 125 | 126 | if not self.biased: 127 | self._parameters = par.reshape(shape) 128 | else: 129 | self._bias = par[-1] 130 | self._parameters = par[0:-1].reshape(shape) 131 | 132 | @property 133 | def parameter_space(self): 134 | """Property storing the parameter space. 135 | 136 | By default the parameter space will be assigned to be a BoundedSpace 137 | between [0,1]^d. However it might be necessary to change this. A user 138 | may thus assign a new parameter space. 139 | 140 | WARNING: Currently there is no sanity check for manually assigned 141 | parameter spaces. 142 | """ 143 | if self._par_space is None: 144 | if self.biased: 145 | shape = (self.par_dim + 1,) 146 | else: 147 | shape = (self.par_dim,) 148 | self._par_space = BoundedSpace(0, 1, shape) 149 | 150 | return self._par_space 151 | 152 | @parameter_space.setter 153 | def parameter_space(self, par_space): 154 | self._par_space = par_space 155 | 156 | 157 | class DiscreteLinearPolicy(LinearPolicy): 158 | """LinearPolicy on a discrete action space of {0, 1}^d.""" 159 | 160 | def map(self, state): 161 | """Map to discrete action space. 162 | 163 | Parameters 164 | ---------- 165 | state : element of state space 166 | state to be mapped. 167 | 168 | Returns 169 | ------- 170 | action : ndarray 171 | Element of {0, 1}^d_action 172 | """ 173 | cont_action = super(DiscreteLinearPolicy, self).map(state) 174 | if self.d_action == 1: 175 | if (cont_action < 0): 176 | action = 0 177 | else: 178 | action = 1 179 | else: 180 | action = np.zeros(cont_action.shape, dtype=int) 181 | action[cont_action > 0] += 1 182 | 183 | return action 184 | 185 | 186 | class NoisyLinearPolicy(LinearPolicy, ProbPolicy): 187 | """ 188 | Policy implementing a linear mapping from state to action space with noise. 189 | 190 | Attributes 191 | ---------- 192 | d_state : positive integer 193 | Dimension of the state space. 194 | d_action : positive integer 195 | Dimension of the action space 196 | sigma : double 197 | Sigma for gaussian noise 198 | parameters : nd-array 199 | Array containing initial parameters. 200 | initialized : boolean 201 | Boolean indicating if parameters have been initialized. 202 | biased : boolean 203 | Flag indicating if the policy is supposed to be biased or not. 204 | """ 205 | 206 | def __init__(self, d_state, d_action, sigma, 207 | par=None, par_space=None, biased=False): 208 | """Initialize Noisy Linear Policy. 209 | 210 | Parameters 211 | ---------- 212 | d_state : positive integer 213 | Dimension of the state space. 214 | d_action : positive integer 215 | Dimension of the action space 216 | sigma : double 217 | Sigma for gaussian noise 218 | par : ndarray 219 | Array containing initial parameters. If there is a constant bias, 220 | the array needs to be flat with shape (d_state * d_action + 1,). 221 | Otherwise it may either have shape (d_action, d_state) or 222 | (d_state * d_action,) 223 | biased : boolean 224 | Flag indicating if the policy is supposed to be biased or not. 225 | """ 226 | assert(d_state > 0 and d_action > 0) 227 | 228 | self.sigma = sigma 229 | 230 | self.random_state = np.random.RandomState() 231 | 232 | super(NoisyLinearPolicy, self).__init__(d_state, d_action, par, 233 | par_space, biased) 234 | 235 | def map(self, state): 236 | """Map a state to an action. 237 | 238 | Parameters 239 | ---------- 240 | state : array-like 241 | Element of state space. 242 | 243 | Returns 244 | ------- 245 | action : ndarray 246 | Element of action space. 247 | """ 248 | noise = self.random_state.normal(0, self.sigma) 249 | return super(NoisyLinearPolicy, self).map(state) + noise 250 | 251 | def grad_log_prob(self, state, action): 252 | """Compute the gradient of the logarithm of the probability dist.""" 253 | noise = action - super(NoisyLinearPolicy, self).map(state) 254 | return - 2 * noise * self.parameters / self.sigma**2 255 | -------------------------------------------------------------------------------- /SafeRLBench/configuration.py: -------------------------------------------------------------------------------- 1 | """Global Configuration Class.""" 2 | import logging 3 | import sys 4 | 5 | 6 | class SRBConfig(object): 7 | """SafeRLBench configuration class. 8 | 9 | This is a configuration class providing a container for global variables 10 | and configuration functions. 11 | 12 | In general this class should not be instantiated directly, but rather 13 | accessed through the global variable ``SafeRLBench.config``, which is 14 | created when the package is imported and will contain the root logger of 15 | the package. 16 | 17 | Attributes 18 | ---------- 19 | logger_stream_handler : 20 | This is a property wrapping the current stream handler. The current 21 | stream handler can be accessed through this property, or it may even 22 | be replaced with a new stream handler. In case of resetting the stream 23 | handler, the old handler will be removed from the logger 24 | automatically. 25 | logger_file_handler : 26 | This is a property wrapping the current file handler. The current 27 | file handler can be accessed through this property, or it may even 28 | be replaced with a new stream handler. In case of resetting the file 29 | handler, the old handler will be removed from the logger 30 | automatically. 31 | logger_format : 32 | This is a property to access the format stored. This is the default 33 | format that will be used when adding the default handlers. 34 | When assigned to, the formats of already set loggers will be changed 35 | to the new format. 36 | log : 37 | The logger object. 38 | n_jobs : 39 | Number of jobs used by the library 40 | monitor_verbosity : 41 | Verbosity of the monitor. 42 | 43 | Methods 44 | ------- 45 | monitor_set_verbosity(verbosity) 46 | Set monitor verbosity level. 47 | jobs_set(n_jobs) 48 | Set the amount of jobs used by a worker pool. 49 | logger_set_level(level=logging.INFO) 50 | Set the logger level package wide. 51 | logger_add_stream_handler() 52 | Set a handler to print logs to stdout. 53 | logger_add_file_handler(path) 54 | Set a handler to print to file. 55 | 56 | Notes 57 | ----- 58 | Access logger levels through the static variables: 59 | 60 | +-----------+------------------+ 61 | |DEBUG | logging.DEBUG | 62 | +-----------+------------------+ 63 | |INFO | logging.INFO | 64 | +-----------+------------------+ 65 | |WARNING | logging.WARNING | 66 | +-----------+------------------+ 67 | |ERROR | logging.ERROR | 68 | +-----------+------------------+ 69 | |CRITICAL | logging.CRITICAL | 70 | +-----------+------------------+ 71 | """ 72 | 73 | DEBUG = logging.DEBUG 74 | INFO = logging.INFO 75 | WARNING = logging.WARNING 76 | ERROR = logging.ERROR 77 | CRITICAL = logging.CRITICAL 78 | 79 | def __init__(self, log): 80 | """Initialize default configuration.""" 81 | # some libraries think it is a good idea to add handlers by default 82 | # without documenting that at all, thanks gpy... 83 | log.propagate = False 84 | 85 | self.log = log 86 | self.n_jobs = 1 87 | self.monitor_verbosity = 0 88 | 89 | self._stream_handler = None 90 | self._file_handler = None 91 | self._fmt = ('%(process)d - %(asctime)s - %(name)s - %(levelname)s' 92 | + ' - %(message)s') 93 | self._formatter = logging.Formatter(self._fmt) 94 | 95 | def monitor_set_verbosity(self, verbosity): 96 | """Set monitor verbosity level. 97 | 98 | Parameters 99 | ---------- 100 | verbose : int 101 | Non negative verbosity level 102 | """ 103 | if verbosity < 0: 104 | raise ValueError('Verbosity level can not be negative.') 105 | self.monitor_verbosity = verbosity 106 | 107 | def jobs_set(self, n_jobs): 108 | """Set the amount of jobs used by a worker pool. 109 | 110 | Parameters 111 | ---------- 112 | n_jobs : Int 113 | Number of jobs, needs to be larger than 0. 114 | """ 115 | if n_jobs <= 0: 116 | raise ValueError('Number of jobs needs to be larger than 0.') 117 | self.n_jobs = n_jobs 118 | 119 | def logger_set_level(self, level=logging.INFO): 120 | """Set the logger level package wide. 121 | 122 | Parameters 123 | ---------- 124 | level : 125 | Logger level as defined in logging. 126 | """ 127 | self.log.setLevel(level) 128 | 129 | @property 130 | def logger_stream_handler(self): 131 | """Property storing the current stream handler. 132 | 133 | If overwritten with a new stream handler, the logger will be updated 134 | with the new stream handler. 135 | 136 | Examples 137 | -------- 138 | Setup a stream handler for the logger. 139 | 140 | >>> from SafeRLBench import config 141 | >>> import logging 142 | >>> # configurate stream handler 143 | >>> ch = logging.StreamHandler(sys.stdout) 144 | >>> config.logger_stream_handler = ch 145 | 146 | To use the default format: 147 | 148 | >>> formatter = logging.Formatter(config.logger_format) 149 | >>> ch.setFormatter(formatter) 150 | 151 | which is equivalent to using `logger_add_stream_handler`. 152 | """ 153 | return self._stream_handler 154 | 155 | @logger_stream_handler.setter 156 | def logger_stream_handler(self, ch): 157 | """Setter method for logger_stream_handler property.""" 158 | if self._stream_handler is not None: 159 | self.log.removeHandler(self._stream_handler) 160 | 161 | self._stream_handler = ch 162 | if ch is not None: 163 | self.log.addHandler(ch) 164 | 165 | @property 166 | def logger_file_handler(self): 167 | """Property storing the current file handler. 168 | 169 | If overwritten with a new file handler, the logger will be updated with 170 | the new file handler. 171 | 172 | Examples 173 | -------- 174 | Setup a stream handler for the logger. 175 | 176 | >>> from SafeRLBench import config 177 | >>> import logging 178 | >>> # configurate stream handler 179 | >>> fh = logging.FileHandler('logs.log') 180 | >>> config.logger_file_handler = fh 181 | 182 | To use the default format: 183 | 184 | >>> formatter = logging.Formatter(config.logger_format) 185 | >>> fh.setFormatter(formatter) 186 | 187 | which is equivalent to using `logger_add_file_handler`. 188 | """ 189 | return self._file_handler 190 | 191 | @logger_file_handler.setter 192 | def logger_file_handler(self, fh): 193 | """Setter method for logger_file_handler property.""" 194 | if self._file_handler is not None: 195 | self.log.removeHandler(self._file_handler) 196 | 197 | self._file_handler = fh 198 | if fh is not None: 199 | self.log.addHandler(fh) 200 | 201 | @property 202 | def logger_format(self): 203 | """Property for default logger format. 204 | 205 | If overwritten stream and file handler will be updated accordingly. 206 | However if manually updating stream and file handler logger_format will 207 | be ignored. 208 | """ 209 | return self._fmt 210 | 211 | @logger_format.setter 212 | def logger_format(self, fmt): 213 | """Setter method for logger_format property.""" 214 | self._formatter = logging.Formatter(fmt) 215 | 216 | self._fmt = fmt 217 | 218 | if self.logger_stream_handler is not None: 219 | self.logger_stream_handler.setFormatter(self._formatter) 220 | 221 | if self.logger_file_handler is not None: 222 | self.logger_file_handler.setFormatter(self._formatter) 223 | 224 | def logger_add_stream_handler(self): 225 | """Set a handler to print logs to stdout.""" 226 | if self._stream_handler is not None: 227 | self.log.removeHandler(self._stream_handler) 228 | 229 | ch = logging.StreamHandler(sys.stdout) 230 | ch.setFormatter(self._formatter) 231 | 232 | self._stream_handler = ch 233 | self.log.addHandler(ch) 234 | 235 | def logger_add_file_handler(self, path): 236 | """Set a handler to print to file. 237 | 238 | Parameters 239 | ---------- 240 | path : 241 | Path to log file. 242 | """ 243 | if self._file_handler is not None: 244 | self.log.removeHandler(self._file_handler) 245 | 246 | fh = logging.FileHandler(path) 247 | fh.setFormatter(self._formatter) 248 | 249 | self._file_handler = fh 250 | self.log.addHandler(fh) 251 | -------------------------------------------------------------------------------- /SafeRLBench/policy/neural_network.py: -------------------------------------------------------------------------------- 1 | """Neural Network Policy implementation.""" 2 | 3 | from SafeRLBench import Policy 4 | from SafeRLBench.error import add_dependency, MultipleCallsException 5 | from SafeRLBench.spaces import RdSpace 6 | 7 | import numpy as np 8 | from numpy.random import normal 9 | 10 | try: 11 | import tensorflow as tf 12 | except ModuleNotFoundError: 13 | tf = None 14 | 15 | import logging 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | def default_init_weights(shape): 21 | """Initialize default weights.""" 22 | weights = tf.random_normal(shape, mean=0, stddev=0.1, name='weights') 23 | return tf.Variable(weights) 24 | 25 | 26 | class NeuralNetwork(Policy): 27 | """Fully connected Neural Network Policy. 28 | 29 | Attributes 30 | ---------- 31 | args : list 32 | Contains the args used to initialize the policy. 33 | kwargs : dict 34 | Contains the kwargs used to initialize the policy. 35 | layers : list of integers 36 | A list describing the layer sizes. The first element represents the 37 | size of the input layer, the last element the size of the output 38 | layer. 39 | state_space : space instance 40 | action_space : space instance 41 | weights : tf.Variable 42 | If none the init_weights function will be used to initialize the 43 | weights. 44 | init_weights : callable 45 | Takes a shape as an argument and returns a tf.Variable according to 46 | this shape. 47 | activation : list of activation functions 48 | An activation function which will be used to construct the respective 49 | layer. If only one activation function is passed, it will be used for 50 | every layer. If the argument is None by default the sigmoid function 51 | will be used. 52 | dtype : string 53 | Data type of input and output. 54 | W_action : list of tf.Variable 55 | The list contains the `tf.Variable` instances describing the mapping 56 | between the hidden layers. The i-th entry describes the connection 57 | between layer i and layer i+1. 58 | W_var : list of tf.Variable 59 | This list contains the weights used to compute the variance estimation. 60 | Each entry corresponds to one layer and contains weights of shape 61 | (layer[i], 1). 62 | a_pred : 63 | Action estimate of the fully connected neural network defined by 64 | `W_action` and activation. 65 | var : 66 | Variance estimate which is a weighted sum of all hidden units. 67 | The weights are described by `W_var`. 68 | h : list of tf.Tensor 69 | Hidden layers 70 | """ 71 | 72 | def __init__(self, 73 | layers, weights=None, init_weights=None, activation=None, 74 | dtype='float', scope='global', do_setup=False): 75 | """Initialize Neural Network wrapper.""" 76 | add_dependency(tf, 'TensorFlow') 77 | 78 | if (len(layers) < 2): 79 | raise ValueError('At least two layers needed.') 80 | 81 | # determine state and action space 82 | state_space = RdSpace((layers[0],)) 83 | action_space = RdSpace((layers[-1],)) 84 | 85 | # store arguments convenient for copy operation 86 | self.args = [layers] 87 | self.kwargs = { 88 | 'weights': weights, 89 | 'init_weights': init_weights, 90 | 'activation': activation, 91 | 'dtype': dtype 92 | } 93 | 94 | self.state_space = state_space 95 | self.action_space = action_space 96 | 97 | self.dtype = dtype 98 | self.layers = layers 99 | self.scope = scope 100 | 101 | self.is_set_up = False 102 | 103 | if init_weights is None: 104 | self.init_weights = default_init_weights 105 | else: 106 | self.init_weights = init_weights 107 | 108 | # Activation function 109 | if activation is None: 110 | activation = (len(layers) - 2) * [tf.sigmoid] 111 | elif (isinstance(activation, list) 112 | and (len(activation) != len(layers) - 2)): 113 | raise ValueError('Activation list has wrong size.') 114 | else: 115 | activation = (len(layers) - 2) * [activation] 116 | 117 | self.activation = activation 118 | 119 | # Symbols 120 | self.X = tf.placeholder(dtype, shape=[None, layers[0]], name='X') 121 | self.a = tf.placeholder(dtype, shape=[None, layers[-1]], name='a') 122 | 123 | if do_setup: 124 | with tf.variable_scope(self.scope): 125 | self.setup() 126 | else: 127 | # Make sure all fields exist 128 | self.W_action = None 129 | self.W_var = None 130 | self.a_pred = None 131 | self.var = None 132 | self.h = None 133 | 134 | self.sess = None 135 | 136 | def setup(self): 137 | """Set up the network graph. 138 | 139 | The weights and graph will be initialized by this function. If do_setup 140 | is True, setup will automatically be called, when instantiating the 141 | class. 142 | """ 143 | if self.is_set_up: 144 | raise MultipleCallsException('Network is already set up.') 145 | 146 | layers = self.layers 147 | weights = self.kwargs['weights'] 148 | 149 | # Weights for the action estimation 150 | with tf.variable_scope('action_estimator'): 151 | if weights is None: 152 | w = [] 153 | for i in range(len(layers) - 1): 154 | w.append(self.init_weights((layers[i], layers[i + 1]))) 155 | else: 156 | w = weights 157 | 158 | self.W_action = w 159 | 160 | # generate network 161 | self.a_pred = self._generate_network() 162 | 163 | # Weights for variance estimation 164 | with tf.variable_scope('variance_estimator'): 165 | self.W_var = [] 166 | for i in range(1, len(layers) - 1): 167 | self.W_var.append(self.init_weights((layers[i], 1))) 168 | 169 | # generate variance network 170 | self.var = self._generate_variance() 171 | 172 | self.is_set_up = True 173 | 174 | def _generate_network(self): 175 | self.h = [self.X] 176 | for i, act in enumerate(self.activation): 177 | h_i = self.h[i] 178 | w_i = self.W_action[i] 179 | self.h.append(act(tf.matmul(h_i, w_i))) 180 | 181 | return tf.matmul(self.h[-1], self.W_action[-1]) 182 | 183 | def _generate_variance(self): 184 | var = [] 185 | if not self.W_var: 186 | return tf.constant(0, name='variance') 187 | for h_i, w_i in zip(self.W_var, self.h[1:]): 188 | var.append(tf.reduce_sum(tf.matmul(w_i, h_i))) 189 | return tf.abs(tf.reduce_sum(var, name='variance')) 190 | 191 | def copy(self, scope, do_setup=True): 192 | """Generate a copy of the network. 193 | 194 | The copy will instantiate the class with the same arguments, but 195 | replace `scope` and `do_setup` with the respective arguments passed 196 | to this function. 197 | 198 | Parameters 199 | ---------- 200 | scope : String 201 | Indication the scope that should be used when initializing the 202 | network. 203 | do_setup : Boolean 204 | Default: True ; Indicating if the `setup` method, should be called 205 | when instantiating. 206 | """ 207 | self.kwargs['scope'] = scope 208 | self.kwargs['do_setup'] = do_setup 209 | return NeuralNetwork(*self.args, **self.kwargs) 210 | 211 | def map(self, state): 212 | """Compute output in session. 213 | 214 | Make sure a default session is set when calling. 215 | """ 216 | state = state.flatten() 217 | assert(self.state_space.contains(state)) 218 | 219 | if self.sess is None: 220 | sess = tf.get_default_session() 221 | else: 222 | sess = self.sess 223 | mean, var = sess.run([self.a_pred, self.var], {self.X: [state]}) 224 | 225 | action = np.array(normal(mean, var)) 226 | action = action.reshape(self.action_space.shape) 227 | 228 | return action 229 | 230 | @property 231 | def parameters(self): 232 | """Return weights of the neural network. 233 | 234 | This returns a list of tf.Variables. Please note that these can not 235 | simply be updated by assignment. See the parameters.setter docstring 236 | for more information. 237 | The list of tf.Variables can be directly accessed through the 238 | attribute `W`. 239 | """ 240 | if self.sess is None: 241 | return tf.get_default_session().run(self.W_action + self.W_var) 242 | else: 243 | return self.sess.run(self.W_action + self.W_var) 244 | 245 | @parameters.setter 246 | def parameters(self, update): 247 | """Setter function for parameters. 248 | 249 | Since the parameters are a list of `tf.Variable`, we need to feed them 250 | into an assign operator. Thus the argument, needs to be a list 251 | containing an element for each Variable in `W_action` and `W_var` in 252 | that order, i.e. `W_var` will be the last element. 253 | 254 | Parameters 255 | ---------- 256 | update : 257 | List of parameters for each `tf.Varible` 258 | 259 | Notes 260 | ----- 261 | Make sure there is a default session or `self.sess` is set. 262 | """ 263 | if not isinstance(update, list): 264 | update = [update] 265 | 266 | variables = self.W_action + self.W_var 267 | assign_op = [] 268 | 269 | for (var, val) in zip(variables, update): 270 | assign_op.append(var.assign(val)) 271 | 272 | if self.sess is None: 273 | sess = tf.get_default_session() 274 | else: 275 | sess = self.sess 276 | 277 | sess.run(assign_op) 278 | 279 | @property 280 | def parameter_space(self): 281 | """Return parameter space.""" 282 | pass 283 | -------------------------------------------------------------------------------- /SafeRLBench/base.py: -------------------------------------------------------------------------------- 1 | """Module implements Baseclasses.""" 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | from abc import ABCMeta, abstractmethod 6 | from six import add_metaclass 7 | 8 | from SafeRLBench import AlgoMonitor, EnvMonitor 9 | 10 | __all__ = ('EnvironmentBase', 'Space') 11 | 12 | 13 | @add_metaclass(ABCMeta) 14 | class EnvironmentBase(EnvMonitor): 15 | """Environment Base Class. 16 | 17 | This base class defines and implements an interface to any environment 18 | implementation part of the environment module. Subclasses inheriting 19 | from EnvironmentBase need to make sure they meet the requirements below. 20 | 21 | Any subclass must implement: 22 | * _update(action) 23 | * _reset() 24 | 25 | Any subclass might override: 26 | * _rollout(policy) 27 | 28 | Make sure the `state_space`, `action_space` and `horizon` attributes will 29 | be set in any subclass, as the default implementation and / or the monitor 30 | may access them to retrieve information. 31 | 32 | Attributes 33 | ---------- 34 | state_space : 35 | State space of the environment. 36 | action_space : 37 | Action space of the environment. 38 | horizon : 39 | Maximum number of iterations until rollout will stop. 40 | monitor : EnvData instance 41 | Contains the monitoring data. The monitor will be automatically 42 | initialized during creation. 43 | 44 | Methods 45 | ------- 46 | rollout(policy) 47 | Perform a rollout according to the actions selected by policy. 48 | update(action) 49 | Update the environment state according to the action. 50 | reset() 51 | Reset the environment to the initial state. 52 | 53 | Notes 54 | ----- 55 | When overwriting _rollout(policy) use the provided interface functions 56 | and do not directly call the private implementation. 57 | """ 58 | 59 | def __init__(self, state_space, action_space, horizon=0): 60 | """Initialize EnvironmentBase. 61 | 62 | Parameters 63 | ---------- 64 | state_space : 65 | State space of the environment. 66 | action_space : 67 | Action space of the environment. 68 | horizon : 69 | Maximum number of iterations until rollout will stop. 70 | """ 71 | super(EnvironmentBase, self).__init__() 72 | self.state_space = state_space 73 | self.action_space = action_space 74 | self.horizon = horizon 75 | 76 | # Implement in subclasses: 77 | # See update(self, action) for more information 78 | @abstractmethod 79 | def _update(self, action): 80 | raise NotImplementedError 81 | 82 | # See reset(self) for more information 83 | @abstractmethod 84 | def _reset(self): 85 | raise NotImplementedError 86 | 87 | # Override in subclasses if necessary 88 | def _rollout(self, policy): 89 | self.reset() 90 | trace = [] 91 | for n in range(self.horizon): 92 | action = policy(self.state) 93 | trace.append(self.update(action)) 94 | return trace 95 | 96 | def update(self, action): 97 | """Update the environment state according to the action. 98 | 99 | Wraps the subclass implementation _update(action) providing 100 | monitoring capabilities. 101 | 102 | Parameters 103 | ---------- 104 | action: array-like 105 | Element of action_space 106 | 107 | Returns 108 | ------- 109 | tuple : 3-tuple 110 | action : array-like 111 | element of action space as it has been applied in update 112 | state : array-like 113 | element of state_space which is the resulting state after 114 | applying action 115 | reward : float 116 | reward for resulting state 117 | """ 118 | with self.monitor_update(): 119 | t = self._update(action) 120 | return t 121 | 122 | def reset(self): 123 | """Reset the environment to initial state. 124 | 125 | Reset wraps the subclass implementation _reset() providing monitoring 126 | capabilities. 127 | """ 128 | with self.monitor_reset(): 129 | self._reset() 130 | 131 | def rollout(self, policy): 132 | """Perform a rollout according to the actions selected by policy. 133 | 134 | Wraps the implementation _rollout(policy) providing monitoring 135 | capabilities. 136 | 137 | Parameters 138 | ---------- 139 | Policy : callable 140 | Maps element of state_space to element of action_space 141 | 142 | Returns 143 | ------- 144 | trace : list of 3-tuple 145 | List of (action, state, reward)-tuple as returned by update(). 146 | """ 147 | with self.monitor_rollout(): 148 | trace = self._rollout(policy) 149 | return trace 150 | 151 | def __repr__(self): 152 | """Return class name.""" 153 | return self.__class__.__name__ 154 | 155 | 156 | @add_metaclass(ABCMeta) 157 | class Space(object): 158 | """Baseclass for Spaceobject. 159 | 160 | All methods have to be implemented in any subclass. 161 | 162 | Methods 163 | ------- 164 | contains(x) 165 | Check if x is an element of space. 166 | element 167 | Return arbitray element in space. 168 | """ 169 | 170 | @abstractmethod 171 | def contains(self, x): 172 | """Check if x is an element of space.""" 173 | pass 174 | 175 | @abstractmethod 176 | def sample(self): 177 | """Return an arbitrary element in space for unit testing.""" 178 | pass 179 | 180 | @property 181 | @abstractmethod 182 | def dimension(self): 183 | """Return the dimension of the space.""" 184 | pass 185 | 186 | 187 | @add_metaclass(ABCMeta) 188 | class AlgorithmBase(AlgoMonitor): 189 | """Baseclass for any algorithm. 190 | 191 | This baseclass defines a uniform interface for any algorithm part of 192 | the algorithm module SafeRLBench.algo. It features monitoring capabilities 193 | for tracking and evaluating the execution of the algorithm. 194 | 195 | Inheriting from `AlgorithmBase` is suspect to some constraints, i.e. any 196 | algorithm needs to be implemented using the following functions. 197 | 198 | Any subclass must overwrite: 199 | * _initialize(policy) 200 | * _step(policy) 201 | * _is_finished() 202 | 203 | Any subclass may overwrite: 204 | * _optimize(policy) 205 | 206 | In case one does overwrite _optimize, the functions _initialize(), 207 | _step(parameter), _is_finished() may just pass unless they are used. 208 | This may however change the information tracked by the monitor. 209 | 210 | Attributes 211 | ---------- 212 | environment : 213 | Environment we want to optimize on 214 | policy : 215 | Policy to be optimized 216 | max_it : int 217 | Maximum number of iterations 218 | monitor : AlgoData instance 219 | Contains monitoring data. The monitor will automatically initialize 220 | on creation of an object. 221 | 222 | Methods 223 | ------- 224 | optimize() 225 | Optimize a policy with respective algorithm. 226 | initialize() 227 | Initialize policy parameter. 228 | step() 229 | Update policy parameters. 230 | is_finished() 231 | Return true when algorithm is finished. 232 | 233 | Notes 234 | ----- 235 | Specification of the private functions. 236 | 237 | _initialize(self): 238 | Initialize the algorithm. 239 | _step(): 240 | Compute one step of the algorithm. 241 | _is_finished(): 242 | Return True when algorithm is supposed to finish. 243 | """ 244 | 245 | def __init__(self, environment, policy, max_it): 246 | super(AlgorithmBase, self).__init__() 247 | 248 | self.environment = environment 249 | self.policy = policy 250 | self.max_it = max_it 251 | 252 | self.grad = None 253 | 254 | # Have to be overwritten. 255 | @abstractmethod 256 | def _initialize(self): 257 | pass 258 | 259 | @abstractmethod 260 | def _step(self): 261 | pass 262 | 263 | @abstractmethod 264 | def _is_finished(self): 265 | pass 266 | 267 | # May be overwritten 268 | def _optimize(self): 269 | self.initialize() 270 | 271 | for n in range(self.max_it): 272 | self.step() 273 | if self.is_finished(): 274 | break 275 | 276 | def optimize(self): 277 | """Optimize policy parameter. 278 | 279 | Wraps subclass implementation in _optimize(policy). 280 | 281 | Parameters 282 | ---------- 283 | policy: PolicyBase subclass 284 | """ 285 | with self.monitor_optimize(): 286 | self._optimize() 287 | 288 | def initialize(self): 289 | """Initialize policy parameter. 290 | 291 | Wraps subclass implementation in _initialize(policy) 292 | 293 | Parameters 294 | ---------- 295 | policy: PolicyBase subclass 296 | """ 297 | with self.monitor_initialize(): 298 | self._initialize() 299 | 300 | def step(self): 301 | """Update policy parameter. 302 | 303 | Wraps subclass implementation in _step(policy). 304 | 305 | Parameters 306 | ---------- 307 | policy: PolicyBase subclass 308 | """ 309 | with self.monitor_step(): 310 | self._step() 311 | 312 | def is_finished(self): 313 | """Return True when algorithm is supposed to finish. 314 | 315 | Wraps subclass implementation in _is_finished(). 316 | """ 317 | stop = self._is_finished() 318 | return stop 319 | 320 | def reset(self): 321 | """Reset the monitor.""" 322 | self._alg_reset() 323 | 324 | def __repr__(self): 325 | if hasattr(self, '_info'): 326 | return self._info() 327 | return self.__class__.__name__ 328 | 329 | 330 | @add_metaclass(ABCMeta) 331 | class Policy(object): 332 | """Minimal policy interface.""" 333 | 334 | def __call__(self, state): 335 | return self.map(state) 336 | 337 | @abstractmethod 338 | def map(self, state): 339 | """Map element of state space to action space.""" 340 | pass 341 | 342 | @property 343 | @abstractmethod 344 | def parameters(self): 345 | """Access current parameters.""" 346 | pass 347 | 348 | @parameters.setter 349 | @abstractmethod 350 | def parameters(self, par): 351 | pass 352 | 353 | @property 354 | @abstractmethod 355 | def parameter_space(self): 356 | """Return parameter space.""" 357 | 358 | 359 | @add_metaclass(ABCMeta) 360 | class ProbPolicy(Policy): 361 | """Probabilistic policy interface.""" 362 | 363 | @abstractmethod 364 | def grad_log_prob(self, state, action): 365 | """Return the :math:log(grad p(action | state)):math:.""" 366 | pass 367 | -------------------------------------------------------------------------------- /SafeRLBench/envs/_quadrocopter/quadrotor_dynamics.py: -------------------------------------------------------------------------------- 1 | """Quadrotor Dynamics.""" 2 | 3 | from __future__ import print_function 4 | from __future__ import division 5 | from __future__ import absolute_import 6 | 7 | import numpy as np 8 | 9 | from .quadrocopter_classes import State, Parameters 10 | 11 | __all__ = ['QuadrotorDynamics', 'wind_creator', 'random_disturbance_creator'] 12 | 13 | 14 | class QuadrotorDynamics(object): 15 | """Implement the quadrotor dynamics and states (independent of gazebo). 16 | 17 | Attributes 18 | ---------- 19 | pos: 3d array 20 | Initial position of quadrotor 21 | vel: 3d array 22 | Initial velocity of quadrotor 23 | acc: 3d array 24 | Initial acceleration of quadrotor 25 | R: 3x3 array 26 | Initial rotation matrix 27 | external_forces: list 28 | a list of callables that take the state as input and return forces on 29 | the quadrotor in global coordinates. 30 | 31 | Notes 32 | ----- 33 | There seems to be an instability where the acceleration overflows and then 34 | causes issues in the controller. 35 | """ 36 | 37 | def __init__(self, pos=None, vel=None, acc=None, R=None, 38 | external_forces=None): 39 | """Initialize quadrocopter dynamics. 40 | 41 | Parameters 42 | ---------- 43 | pos: 3d array 44 | Initial position of quadrotor 45 | vel: 3d array 46 | Initial velocity of quadrotor 47 | acc: 3d array 48 | Initial acceleration of quadrotor 49 | R: 3x3 array 50 | Initial rotation matrix 51 | external_forces: list 52 | a list of callables that take the state as input and return forces 53 | on the quadrotor in global coordinates. 54 | """ 55 | self.state = State() 56 | self.params = Parameters() 57 | 58 | if external_forces is None: 59 | self.external_forces = () 60 | else: 61 | self.external_forces = external_forces 62 | 63 | if pos is not None: 64 | self.state.pos = pos.copy() 65 | if vel is not None: 66 | self.state.vel = vel.copy() 67 | if acc is not None: 68 | self.state.acc = acc.copy() 69 | if R is not None: 70 | self.state.R = R.copy() 71 | 72 | def dynamics_derivative(self, pitch, roll, z_vel, yaw_vel): 73 | """Return the state derivatives for the current state and input.""" 74 | rates = self._inputs_to_desired_rates(pitch, roll, z_vel, yaw_vel) 75 | 76 | forces = self._determine_forces(*rates) 77 | 78 | return self._forces_to_derivatives(forces) 79 | 80 | def update_position(self, inputs): 81 | """Compute the derivatives and integrate them based on inputs.""" 82 | pitch, roll, z_vel, yaw_vel = inputs 83 | derivatives = self.dynamics_derivative(pitch, roll, z_vel, yaw_vel) 84 | self._integrate_derivatives(derivatives, 85 | self.params.inner_loop_cycle * 1e-6) 86 | 87 | def _inputs_to_desired_rates(self, pitch, roll, z_vel, yaw_vel): 88 | """Convert inputs to desired angular rates and thrust.""" 89 | # Current roll, and yaw angles 90 | roll_cur, _, yaw_cur = self.state.rpy 91 | 92 | # r_des is simply the commanded yaw rate 93 | r_des = yaw_vel 94 | 95 | # calculate the commanded acceleration in the z direction, 96 | # (z_dot_des - z_dot) / tau_z 97 | z_ddot_des = (z_vel - self.state.vel[2]) / self.params.tau_Iz 98 | 99 | # And from this we may find the commanded thrust, (g + z_ddot_cmd)/R33 100 | c_des = (self.params.g + z_ddot_des) / self.state.R[2, 2] 101 | 102 | # Calculate the commanded yaw angle from: 103 | yaw_des = yaw_vel * self.params.tau_Iyaw + yaw_cur 104 | 105 | # R13_des = sin(yaw_des) * sin(roll_cmd) 106 | # + cos(yaw_des) * cos(roll_cmd) * sin(pitch_cmd) 107 | r_13_des = (np.sin(yaw_des) * np.sin(roll) + 108 | np.cos(yaw_des) * np.cos(roll) * np.sin(pitch)) 109 | 110 | # R23_des = cos(roll_cmd) * sin(yaw_des) * sin(pitch_cmd) 111 | # - cos(yaw_des) * sin(roll_cmd) 112 | r_23_des = (np.cos(roll) * np.sin(yaw_des) * np.sin(pitch) - 113 | np.cos(yaw_des) * np.sin(roll)) 114 | 115 | # p_des = (R21*(R13_des-R13) - R11*(R23_des-R23))/(R33*tau_rp) 116 | p_des = (self.state.R[1, 0] * (r_13_des - self.state.R[0, 2]) - 117 | self.state.R[0, 0] * (r_23_des - self.state.R[1, 2])) 118 | p_des /= self.state.R[2, 2] * self.params.tau_rp 119 | 120 | # q_des = (R22*(R13_des-R13) - R12*(R23_des-R23))/(R33*tau_rp) 121 | q_des = (self.state.R[1, 1] * (r_13_des - self.state.R[0, 2]) - 122 | self.state.R[0, 1] * (r_23_des - self.state.R[1, 2])) 123 | q_des /= self.state.R[2, 2] * self.params.tau_rp 124 | 125 | # Return everything! 126 | return p_des, q_des, r_des, c_des 127 | 128 | def _determine_forces(self, p_des, q_des, r_des, c_des): 129 | """Convert desired angular rates and thrust to rotor forces.""" 130 | L = self.params.L 131 | K = self.params.K 132 | m = self.params.m 133 | 134 | a = np.array(((0, L, 0, -L), 135 | (-L, 0, L, 0), 136 | (K, -K, K, -K), 137 | (1 / m, 1 / m, 1 / m, 1 / m)), 138 | dtype=np.float64) 139 | 140 | # The inertial matrix 141 | j = np.diag((self.params.Ix, self.params.Iy, self.params.Iz)) 142 | 143 | # The current angular velocity vector 144 | omega = self.state.omega 145 | 146 | # The rate vector (our approximation of omega_dot) 147 | rate_vector = np.array( 148 | (((1 / self.params.tau_p) * (p_des - self.state.omega[0])), 149 | ((1 / self.params.tau_q) * (q_des - self.state.omega[1])), 150 | ((1 / self.params.tau_r) * (r_des - self.state.omega[2])))).T 151 | 152 | b = j.dot(rate_vector) + np.cross(omega, j.dot(omega)) 153 | 154 | # Add c_des to the bottom of the row vector 155 | b = np.concatenate((b, [c_des])) 156 | 157 | # Return the four rotor forces 158 | return np.linalg.solve(a, b) 159 | 160 | def _forces_to_derivatives(self, forces): 161 | """Compute the state derivatives based on applied forces.""" 162 | # Update position 163 | derivatives = State() 164 | 165 | derivatives.pos[:] = self.state.vel 166 | 167 | drag = self._compute_drag() 168 | 169 | # Update accelerations 170 | derivatives.acc = np.sum(forces) * self.state.R[:, 2] - drag 171 | 172 | # Add external forces 173 | for force in self.external_forces: 174 | derivatives.acc += force(self.state) 175 | 176 | # Normalize with mass and add gravity 177 | derivatives.acc /= self.params.m 178 | derivatives.acc[2] -= self.params.g 179 | 180 | # Update velocities 181 | derivatives.vel[:] = self.state.acc 182 | 183 | p, q, r = self.state.omega 184 | derivatives.R = self.state.R.dot(np.array([[0, -r, q], 185 | [r, 0, -p], 186 | [-q, p, 0]])) 187 | 188 | # Angular velocity changes 189 | f1, f2, f3, f4 = forces 190 | 191 | # p' = (1/Ix)*(L*(f2-f4) + (Iy-Iz)*r*q) 192 | p_dot = (self.params.L * (f2 - f4) + 193 | (self.params.Iy - self.params.Iz) * 194 | self.state.omega[2] * self.state.omega[1]) / self.params.Ix 195 | 196 | # q' = (1/Iy)*(L*(f3-f1) + (Iz-Ix)*r*p) 197 | q_dot = (self.params.L * (f3 - f1) + 198 | (self.params.Iz - self.params.Ix) * 199 | self.state.omega[2] * self.state.omega[0]) / self.params.Iy 200 | 201 | # r' = (1/Iz)*(K*(f1-f2+f3-f4) + (Ix-Iy)*p*q) 202 | r_dot = (self.params.K * (f1 - f2 + f3 - f4) + 203 | (self.params.Ix - self.params.Iy) * 204 | self.state.omega[0] * self.state.omega[1]) / self.params.Iz 205 | 206 | derivatives.omega = np.array([p_dot, q_dot, r_dot]) 207 | 208 | return derivatives 209 | 210 | def _integrate_derivatives(self, derivatives, dt): 211 | """Simple euler integration to determine new states.""" 212 | self.state.pos += dt * derivatives.pos 213 | self.state.vel += dt * derivatives.vel 214 | self.state.acc[:] = derivatives.acc 215 | 216 | self.state.R += dt * derivatives.R 217 | self.state.omega += dt * derivatives.omega 218 | 219 | def _compute_drag(self): 220 | """Compute velocities and applies linear drag model. 221 | 222 | Inverts the current rotation matrix and solves for the components of 223 | quadrocopter velocities in the body coordinates. Then a simple linear 224 | drag model equation is applied. This is done because the quadrocopter 225 | platform areas don't change in this refernce frame. The drag forces are 226 | returned in global coordinates. 227 | """ 228 | v_b = np.linalg.solve(self.state.R, self.state.vel) 229 | 230 | drag_model = np.array((self.params.CD_bx, 231 | self.params.CD_by, 232 | self.params.CD_bz)) * v_b 233 | 234 | return self.state.R.dot(drag_model) 235 | 236 | 237 | def wind_creator(direction, strength): 238 | """ 239 | Return callable that computes the wind force on the quadrotor. 240 | 241 | Parameters: 242 | direction: 3d-array 243 | Direction vector for the wind. 244 | strength: float 245 | Strength of the wind in N / m^2 246 | """ 247 | direction = np.asarray(direction, dtype=np.float).squeeze() 248 | direction /= np.linalg.norm(direction) 249 | 250 | quadrotor_length = 0.3 251 | quadrotor_height = 0.05 252 | 253 | norm_area = np.array((quadrotor_length * quadrotor_height, 254 | quadrotor_length * quadrotor_height, 255 | quadrotor_length ** 2)) 256 | 257 | def wind_force(state): 258 | """Return wind force. 259 | 260 | Homogeneous wind, this does not create any torques. 261 | 262 | Parameters 263 | ---------- 264 | state : 265 | """ 266 | # Project surface areas into the wind direction 267 | area = np.abs(direction.dot(state.R)) * norm_area 268 | force = np.sum(area) * strength * direction 269 | return force 270 | 271 | return wind_force 272 | 273 | 274 | def random_disturbance_creator(covariance, mean=None): 275 | """Add gaussian disturbance forces with a certain covariance function. 276 | 277 | Parameters 278 | ---------- 279 | covariance: np.array 280 | A 3x3 array of the covariance matrix 281 | mean: np.array 282 | A 1d array of the 3 mean values (defaults to zero-mean) 283 | 284 | Returns 285 | ------- 286 | disturbance: callable 287 | A function that can be used as an external force in quadsim 288 | """ 289 | if mean is None: 290 | mean = np.zeros((3,)) 291 | 292 | def random_force(state): 293 | """Return wind force. 294 | 295 | Parameters 296 | ---------- 297 | state: State 298 | 299 | Returns 300 | ------- 301 | force: np.array 302 | """ 303 | return np.random.multivariate_normal(mean, covariance) 304 | 305 | return random_force 306 | -------------------------------------------------------------------------------- /SafeRLBench/envs/quadrocopter.py: -------------------------------------------------------------------------------- 1 | """Quadrocopter environment wrapper.""" 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | from SafeRLBench import EnvironmentBase 6 | from SafeRLBench.spaces import RdSpace 7 | 8 | from ._quadrocopter import QuadrotorDynamics 9 | from ._quadrocopter import StateVector 10 | 11 | from functools import partial 12 | 13 | from six import string_types 14 | 15 | import numpy as np 16 | from numpy import array 17 | from numpy import pi, cos, sin 18 | from numpy.linalg import norm 19 | 20 | import logging 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | # Available reference functions. 25 | REFERENCE_TYPES = ['circle', 'stationary', 'oscillate'] 26 | 27 | 28 | class Quadrocopter(EnvironmentBase): 29 | """Quadrocopter simulation. 30 | 31 | Attributes 32 | ---------- 33 | horizon : int 34 | Number of iterations for the main simulation 35 | pre_sim_horizon : int 36 | Number of iterations for the pre-simulation. 37 | _model : model object 38 | Object simulating the quadrotor dynamics. 39 | """ 40 | 41 | def __init__(self, 42 | init_pos=None, init_vel=None, num_sec=9, 43 | num_init_sec=4, ref='circle', period=1 / 70., 44 | seed=None): 45 | """Quadrocopter initialization. 46 | 47 | Parameters 48 | ---------- 49 | init_pos : array-like 50 | Initial position of the quadrocopter. Default: None ; which will 51 | set init_pos to [1, 0, 0]. 52 | init_vel : array-like 53 | Initial velocity of the quadrocopter. Default: None ; which will 54 | set init_vel to [0, pi / 2, 0] 55 | num_sec : integer 56 | num_init_sec : integer 57 | ref : string or reference object 58 | Name of the reference. Currently supported are 'circle', 59 | 'stationary' or 'oscillate'. 60 | period : float 61 | seed : int 62 | """ 63 | # spaces 64 | self.state_space = RdSpace((22,)) 65 | self.action_space = RdSpace((4,)) 66 | 67 | # seed 68 | if seed is not None: 69 | np.random.seed = seed 70 | self._seed = seed 71 | 72 | # initial position 73 | if init_pos is None: 74 | init_pos = array([cos(0), sin(0), 0.]) 75 | 76 | if len(init_pos) != 3: 77 | raise ValueError("init_pos with invalid length %d.", init_pos) 78 | 79 | # initial velocity 80 | if init_vel is None: 81 | init_vel = array([-pi / 2. * sin(0), pi / 2. * cos(0), 0.]) 82 | 83 | if len(init_vel) != 3: 84 | raise ValueError("init_vel with invalid length %d.", init_vel) 85 | 86 | # initialize model 87 | self._model = QuadrotorDynamics(init_pos, init_vel) 88 | 89 | if isinstance(ref, string_types): 90 | self.reference = Reference(ref, period) 91 | else: 92 | self.reference = ref 93 | self.period = ref.period 94 | 95 | self.reference.reset(self.state) 96 | 97 | self.horizon = int(1. / period) * num_sec 98 | self.pre_sim_horizon = int(1. / period) * num_init_sec 99 | 100 | self.period = self.reference.period 101 | 102 | self._init_pos = init_pos 103 | self._init_vel = init_vel 104 | 105 | self._trajectory = np.atleast_2d(np.zeros(3)) 106 | self._time = [] 107 | self._step = 0 108 | 109 | def _update(self, action): 110 | assert self.action_space.contains(action), "Invalid action." 111 | 112 | self._model.update_position(action) 113 | 114 | self._step += 1 115 | time = self._step * self.period 116 | 117 | self._time.append(time) 118 | self._trajectory = np.vstack((self._trajectory, self.state.pos)) 119 | 120 | reward = self._reward() 121 | self.reference.update(self.state, time) 122 | 123 | return action, self.state.copy(), reward 124 | 125 | def _reset(self): 126 | self._model = QuadrotorDynamics(self._init_pos, self._init_vel) 127 | self.reference.reset(self.state) 128 | self._trajectory = np.atleast_2d(np.zeros(3)) 129 | self._time = [] 130 | self._step = 0 131 | 132 | def _rollout(self, policy): 133 | if hasattr(policy, 'reference'): 134 | policy.reference = self.reference 135 | self.reset() 136 | trace = [] 137 | for n in range(self.horizon): 138 | action = policy(self.state) 139 | trace.append(self.update(action)) 140 | return trace 141 | 142 | def _reward(self): 143 | state = self.state 144 | ref = self.reference.reference 145 | 146 | reward = -norm(state.pos - ref.pos) - norm(state.vel - ref.vel) 147 | 148 | if np.isnan(reward): 149 | reward = -1.79769313e+308 150 | 151 | return reward 152 | 153 | @property 154 | def seed(self): 155 | """Seed.""" 156 | return self._seed 157 | 158 | @seed.setter 159 | def seed(self, value): 160 | np.random.seed(value) 161 | self._seed = value 162 | 163 | @property 164 | def state(self): 165 | """Provide access to state_vector.""" 166 | # this whole state vector implementation is annoyingly inefficient. 167 | return self._model.state.state_vector 168 | 169 | @state.setter 170 | def state(self, state): 171 | self._model.state.state_vector = state.view(StateVector) 172 | 173 | 174 | class Reference(object): 175 | """Reference object for quadrocopter environment.""" 176 | 177 | def __init__(self, name='circle', period=1. / 70, keep_record=True, 178 | **kwargs): 179 | """Initialize Reference. 180 | 181 | Parameters 182 | ---------- 183 | name : str 184 | The name of the reference function. 185 | period : float 186 | The time step that the simulation takes every iteration. 187 | keep_record : bool 188 | Whether the history of the reference object should be saved. 189 | **kwargs : dict 190 | """ 191 | # name type checking. 192 | if not isinstance(name, string_types): 193 | raise ValueError('Invalid type for argument name.') 194 | if name not in REFERENCE_TYPES: 195 | raise ValueError(name + ' is not a valid reference.') 196 | 197 | self._name = name 198 | self.period = period 199 | self._iter = 0 200 | self._reference_function = self._reference_chooser(**kwargs) 201 | self._current_ref = None 202 | self.keep_record = keep_record 203 | if keep_record: 204 | self._record = [] 205 | 206 | @property 207 | def name(self): 208 | """Return the type of reference function.""" 209 | return self._name 210 | 211 | @name.setter 212 | def name(self, value): 213 | if value not in REFERENCE_TYPES: 214 | raise ValueError(value + ' is not a valid reference.') 215 | 216 | self.reset() 217 | 218 | self._name = value 219 | self._reference_function = self._ref_chooser() 220 | 221 | @property 222 | def record(self): 223 | """Return the reference record of the simulation.""" 224 | if self.keep_record: 225 | return np.atleast_2d(self._record) 226 | else: 227 | logger.warning("Reference record has not been saved.") 228 | 229 | def reset(self, state=None): 230 | """Reset internal state.""" 231 | self._iter = 0 232 | self._current_ref = self._reference_function(state, 0, False) 233 | if self.keep_record: 234 | self._record = [] 235 | 236 | def update(self, state, time, finished=False): 237 | """Compute the state of the reference object.""" 238 | ref = self._reference_function(state, time, finished) 239 | self._iter += 1 240 | 241 | if self.keep_record: 242 | ref_value = np.hstack((ref.pos, ref.vel, ref.euler, ref.omega_b)) 243 | self._update_record(ref_value) 244 | 245 | self._current_ref = ref 246 | 247 | return ref 248 | 249 | @property 250 | def reference(self): 251 | """Return the reference.""" 252 | return self._current_ref 253 | 254 | def _update_record(self, ref_value): 255 | self._record.append(ref_value) 256 | assert self._iter == len(self._record) 257 | 258 | def _reference_chooser(self, **kwargs): 259 | # CIRCLE 260 | if self._name == 'circle': 261 | if kwargs.get('speed', False): 262 | speed = kwargs['speed'] 263 | else: 264 | speed = pi / 2. 265 | if kwargs.get('initial_angle', False): 266 | init_angle = kwargs['initial_angle'] 267 | else: 268 | init_angle = 0. 269 | if kwargs.get('radius', False): 270 | radius = kwargs['radius'] 271 | else: 272 | radius = 1. 273 | if kwargs.get('z_vel', False): 274 | z_vel = kwargs['z_vel'] 275 | else: 276 | z_vel = 0. 277 | return partial(_circle_reference, 278 | speed=speed, 279 | init_angle=init_angle, 280 | radius=radius, 281 | z_vel=z_vel) 282 | # STATIONARY 283 | elif self._name == 'stationary': 284 | if kwargs.get('position', False): 285 | position = kwargs['position'] 286 | else: 287 | position = [1., 0., 0.] 288 | return partial(_stationary_reference, 289 | position=position) 290 | # OSCILLATE 291 | elif self._name == 'oscillate': 292 | if kwargs.get('x_velocity', False): 293 | x_vel = kwargs['x_velocity'] 294 | else: 295 | x_vel = 0.5 296 | if kwargs.get('omega', False): 297 | omega = kwargs['omega'] 298 | else: 299 | omega = 1. 300 | if kwargs.get('radius', False): 301 | radius = kwargs['radius'] 302 | else: 303 | radius = 0.5 304 | return partial(_oscillate_reference, 305 | x_vel=x_vel, 306 | omega=omega, 307 | radius=radius) 308 | 309 | 310 | # private circle reference function 311 | def _circle_reference(state, 312 | time, 313 | finished, 314 | radius=None, 315 | speed=None, 316 | init_angle=None, 317 | z_vel=None): 318 | ref = StateVector() 319 | angle = init_angle + speed / radius * time 320 | 321 | ref.pos = array([radius * cos(angle), 322 | radius * sin(angle), 323 | z_vel * time]) 324 | ref.vel[:] = [-speed * sin(angle), speed * cos(angle), z_vel] 325 | ref.euler[2] = pi + np.arctan2(state.pos[1], state.pos[0]) 326 | # reference.omega_b[2] = speed / radius 327 | return ref 328 | 329 | 330 | # private stationary reference function 331 | def _stationary_reference(state, 332 | time, 333 | finished, 334 | position=None): 335 | ref = StateVector() 336 | ref.pos[0] = position[0] 337 | ref.pos[1] = position[1] 338 | ref.pos[2] = position[2] 339 | return ref 340 | 341 | 342 | # private oscillation reference function 343 | def _oscillate_reference(state, 344 | time, 345 | finished, 346 | x_vel=None, 347 | omega=None, 348 | radius=None): 349 | ref = StateVector() 350 | angle = omega * time 351 | ref.pos[0] = x_vel * time 352 | ref.pos[1] = radius * sin(angle) 353 | ref.pos[2] = 0. 354 | ref.vel[0] = x_vel 355 | ref.vel[1] = radius * omega * cos(angle) 356 | ref.vel[2] = 0. 357 | return ref 358 | -------------------------------------------------------------------------------- /SafeRLBench/monitor.py: -------------------------------------------------------------------------------- 1 | """Monitoring implementations.""" 2 | 3 | import logging 4 | import time 5 | 6 | from SafeRLBench import config 7 | 8 | from contextlib import contextmanager 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ('EnvMonitor', 'AlgoMonitor') 13 | 14 | 15 | class EnvMonitor(object): 16 | """ 17 | Environment Monitor, providing tracking for environments. 18 | 19 | Attributes 20 | ---------- 21 | monitor : 22 | This is the container where monitoring data will be stored. 23 | 24 | Methods 25 | ------- 26 | monitor_update() 27 | Context manager for monitoring environment updates. It should be used 28 | when invoking the private ``_update`` implementation from the interface 29 | method. 30 | monitor_rollout() 31 | Context manager for monitoring environment rollout. It should be used 32 | when invoking the private ``_rollout`` implementation from the 33 | interface method. 34 | monitor_reset() 35 | Context manager for monitoring environment resets. It should be used 36 | when invoking the private ``_reset`` implementation from the interface 37 | method. 38 | """ 39 | 40 | def __new__(cls, *args, **kwargs): 41 | """Create monitor in subclasses.""" 42 | obj = object.__new__(cls) 43 | obj.monitor = EnvData() 44 | return obj 45 | 46 | @contextmanager 47 | def monitor_update(self): 48 | """Context monitoring update.""" 49 | self._before_update() 50 | yield self 51 | self._after_update() 52 | 53 | @contextmanager 54 | def monitor_rollout(self): 55 | """Context monitoring rollout.""" 56 | self._before_rollout() 57 | yield self 58 | self._after_rollout() 59 | 60 | @contextmanager 61 | def monitor_reset(self): 62 | """Context monitoring reset.""" 63 | self._before_reset() 64 | yield self 65 | self._after_reset() 66 | 67 | def _before_update(self): 68 | """Monitor environment before update. 69 | 70 | Parameters 71 | ---------- 72 | env : 73 | Environment instance to be monitored. 74 | """ 75 | pass 76 | 77 | def _after_update(self): 78 | """Monitor environment after update. 79 | 80 | Parameters 81 | ---------- 82 | env : 83 | Environment instance to be monitored. 84 | """ 85 | pass 86 | 87 | def _before_rollout(self): 88 | """Monitor environment before rollout. 89 | 90 | Parameters 91 | ---------- 92 | env : 93 | Environment instance to be monitored. 94 | """ 95 | pass 96 | 97 | def _after_rollout(self): 98 | """ 99 | Monitor environment after rollout. 100 | 101 | Parameters 102 | ---------- 103 | env : 104 | Environment instance to be monitored. 105 | """ 106 | self.monitor.rollout_cnt += 1 107 | 108 | def _before_reset(self): 109 | """Monitor environment before reset. 110 | 111 | Parameters 112 | ---------- 113 | env : 114 | Environment instance to be monitored. 115 | """ 116 | pass 117 | 118 | def _after_reset(self): 119 | """Monitor environment after reset. 120 | 121 | Parameters 122 | ---------- 123 | env : 124 | Environment instance to be monitored. 125 | """ 126 | pass 127 | 128 | 129 | class AlgoMonitor(object): 130 | """Algorithm monitor tracks algorithms' activity. 131 | 132 | This class is inherited by the `AlgorithmBase` class and will provide it 133 | with tracking capabilities. 134 | 135 | Attributes 136 | ---------- 137 | monitor : 138 | This is the container where monitoring data will be stored. 139 | grad : 140 | The Alogrithm can set this field, to provide information about the 141 | current gradient to the monitor. 142 | has_policy : 143 | In case the algorithm does not depend on a policy and does not need 144 | any parameters, this can me set to False, to prevent issues with 145 | tracking data that does not exist. 146 | 147 | Methods 148 | ------- 149 | monitor_optimize() 150 | Context manager for monitoring algorithm optimizations. It should be 151 | used when invoking the private ``_optimize`` implementation from the 152 | interface method. 153 | monitor_initialize() 154 | Context manager for monitoring algorithm initializations. It should be 155 | used when invoking the private ``_initialize`` implementation from the 156 | interface method. 157 | monitor_step() 158 | Context manager for monitoring algorithm step. It should be used when 159 | invoking the private ``_step`` implementation from the interface 160 | method. 161 | """ 162 | 163 | def __new__(cls, *args, **kwargs): 164 | """Create monitor in subclasses.""" 165 | obj = object.__new__(cls) 166 | obj.monitor = AlgoData() 167 | obj.grad = None 168 | obj.has_policy = True 169 | return obj 170 | 171 | @contextmanager 172 | def monitor_optimize(self): 173 | """Context monitoring optimization.""" 174 | self._before_optimize() 175 | yield self 176 | self._after_optimize() 177 | 178 | @contextmanager 179 | def monitor_initialize(self): 180 | """Context monitoring initialize.""" 181 | yield self 182 | if self.has_policy: 183 | self.monitor.parameters.append(self.policy.parameters) 184 | 185 | @contextmanager 186 | def monitor_step(self): 187 | """Context monitoring stepping.""" 188 | self._before_step() 189 | yield self 190 | self._after_step() 191 | 192 | def _before_optimize(self): 193 | """Set monitor up for optimization run. 194 | 195 | Parameters 196 | ---------- 197 | alg : 198 | the algorithm instance to be monitored 199 | """ 200 | if config.monitor_verbosity > 0: 201 | logger.info('Starting optimization of %s...', str(self)) 202 | 203 | # reset monitor object in case of rerun 204 | self.monitor.reset() 205 | 206 | # init monitor dict for algorithm 207 | self.monitor.t = time.time() 208 | 209 | # init optimization time control 210 | self.monitor.optimize_start = time.time() 211 | 212 | def _after_optimize(self): 213 | """Catch data after optimization run.""" 214 | # retrieve time of optimization 215 | optimize_end = time.time() 216 | optimize_time = optimize_end - self.monitor.optimize_start 217 | 218 | if self.monitor.optimize_start == 0: 219 | logger.warning('Time measure for optimize corrupted') 220 | 221 | self.monitor.optimize_start = 0 222 | 223 | self.monitor.optimize_time = optimize_time 224 | 225 | # if the gradient attribute has been set 226 | if self.grad is not None: 227 | logger.debug('Finished optimization after %d steps with grad %s.', 228 | self.monitor.step_cnt, str(self.grad)) 229 | else: 230 | logger.debug('Finished optimization after %d steps.', 231 | self.monitor.step_cnt) 232 | 233 | if self.has_policy: 234 | # independently compute traces after optimization is finished 235 | if config.monitor_verbosity > 0: 236 | logger.info('Computing traces for %s run...', str(self)) 237 | 238 | for parameters in self.monitor.parameters: 239 | 240 | self.policy.parameters = parameters 241 | 242 | # compute trace 243 | trace = self.environment._rollout(self.policy) 244 | self.monitor.traces.append(trace) 245 | 246 | # compute total reward 247 | reward = sum([t[2] for t in trace]) 248 | self.monitor.rewards.append(reward) 249 | 250 | def _before_step(self): 251 | """Monitor algorithm before step. 252 | 253 | Parameters 254 | ---------- 255 | alg : 256 | Algorithm instance to be monitored. 257 | """ 258 | # count the number of rollouts for each step 259 | self.environment.monitor.rollout_cnt = 0 260 | 261 | if config.monitor_verbosity > 2: 262 | logger.info('Computing step %d for %s...', self.monitor.step_cnt, 263 | str(self)) 264 | 265 | def _after_step(self): 266 | """Monitor algorithm after step. 267 | 268 | Parameters 269 | ---------- 270 | alg : 271 | Algorithm instance to be monitored. 272 | """ 273 | emonitor = self.environment.monitor 274 | 275 | self.monitor.step_cnt += 1 276 | 277 | # store the number of rollouts 278 | self.monitor.rollout_cnts.append(emonitor.rollout_cnt) 279 | 280 | # retrieve information from the policy 281 | if self.has_policy: 282 | # retrieve current parameters 283 | parameters = self.policy.parameters 284 | # store information 285 | self.monitor.parameters.append(parameters) 286 | 287 | # log if wanted 288 | self._step_log() 289 | 290 | def _step_log(self): 291 | # print information if wanted 292 | monitor = self.monitor 293 | n = monitor.step_cnt 294 | log = 0 295 | 296 | # check verbosity level 297 | if config.monitor_verbosity > 0: 298 | if monitor.step_cnt % 1000 == 0: 299 | log = 1000 300 | 301 | if config.monitor_verbosity > 1: 302 | if monitor.step_cnt % 100 == 0: 303 | log = 100 304 | 305 | if config.monitor_verbosity > 2: 306 | log = 1 307 | 308 | if log: 309 | # generate time strings 310 | now = time.time() 311 | t = now - monitor.optimize_start 312 | t_s = "{:.2f}".format(t) 313 | avg_s = "{:.3f}".format(t / n) 314 | 315 | # generate log message 316 | msg = 'Status for ' + self.__class__.__name__ + ' on ' 317 | msg += self.environment.__class__.__name__ + ':\n\n' 318 | msg += '\tRun: %d\tTime: %s\t Avg: %s\n' % (n, t_s, avg_s) 319 | if self.has_policy: 320 | # retrieve current state 321 | par_s = str(self.policy.parameters) 322 | msg += '\tParameter: \t%s\n' % (par_s) 323 | 324 | logger.info(msg) 325 | 326 | def _alg_reset(self): 327 | """Reset the algorithm monitor.""" 328 | self.monitor.reset() 329 | 330 | 331 | class EnvData(object): 332 | """Class to store environment tracking data. 333 | 334 | Attributes 335 | ---------- 336 | rollout_cnt : Int 337 | number of rollouts performed on environment. 338 | """ 339 | 340 | def __init__(self): 341 | """Initialize attributes.""" 342 | self.rollout_cnt = 0 343 | 344 | 345 | class AlgoData(object): 346 | """Class used to store algorithm tracking data. 347 | 348 | Attributes 349 | ---------- 350 | optimize_start : Float 351 | Start time of the optimization. 352 | optimize_time : Float 353 | Start time of intermediate runs. 354 | step_cnt : Int 355 | Number of steps performed since initialization. 356 | rollout_cnts : List 357 | Number of rollouts during one step. 358 | parameters : List 359 | List of parameters found during optimization. 360 | traces : List 361 | List of traces for parameters. 362 | rewards : List 363 | List of rewards for parameters. 364 | """ 365 | 366 | def __init__(self): 367 | """Initialize attributes.""" 368 | self.reset() 369 | 370 | def reset(self): 371 | """Reset monitor data.""" 372 | self.optimize_start = 0 373 | self.optimize_time = 0 374 | 375 | self.step_cnt = 0 376 | self.rollout_cnts = [] 377 | 378 | self.parameters = [] 379 | self.traces = [] 380 | self.rewards = [] 381 | -------------------------------------------------------------------------------- /SafeRLBench/algo/safeopt.py: -------------------------------------------------------------------------------- 1 | """SafeOpt Wrapper.""" 2 | 3 | from SafeRLBench import AlgorithmBase 4 | from SafeRLBench.error import add_dependency 5 | 6 | from numpy import mean, array 7 | 8 | try: 9 | import safeopt 10 | except ModuleNotFoundError: 11 | safeopt = None 12 | 13 | try: 14 | import GPy 15 | except ModuleNotFoundError: 16 | GPy = None 17 | 18 | import logging 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | __all__ = ('SafeOpt', 'SafeOptSwarm') 23 | 24 | 25 | class _SafeOptWrap(AlgorithmBase): 26 | 27 | def __init__(self, opt, gp_opt_par, gp_par, environment, policy, max_it, 28 | avg_reward, window): 29 | super(_SafeOptWrap, self).__init__(environment, policy, max_it) 30 | 31 | self._opt = opt 32 | 33 | self.gp_opt = None 34 | 35 | self.gp_opt_par = gp_opt_par 36 | self.gp_par = gp_par 37 | 38 | self.avg_reward = avg_reward 39 | self.window = window 40 | self.rewards = [] 41 | 42 | def _initialize(self): 43 | logger.debug("Initializing Policy.") 44 | # check if policy is already initialized by the user 45 | if self.policy.initialized: 46 | logger.debug("Use pre-set policy parameters.") 47 | parameters = self.policy.parameters 48 | else: 49 | logger.debug("Draw parameters at random.") 50 | parameters = self.policy.parameter_space.sample() 51 | self.policy.parameters = parameters 52 | 53 | # Compute a rollout 54 | trace = self.environment.rollout(self.policy) 55 | reward = sum([t[2] for t in trace]) 56 | 57 | # Initialize gaussian process with args: 58 | gp = [] 59 | for pars in zip(*self.gp_par): 60 | gp.append(GPy.core.GP(array([parameters]), array([[reward]]), 61 | *pars)) 62 | 63 | # Initialize SafeOpt 64 | self.gp_opt = self._opt(gp, **self.gp_opt_par) 65 | 66 | def _step(self): 67 | parameters = self.gp_opt.optimize() 68 | self.policy.parameters = parameters 69 | 70 | trace = self.environment.rollout(self.policy) 71 | reward = sum([t[2] for t in trace]) 72 | 73 | self.gp_opt.add_new_data_point(parameters, reward) 74 | self.rewards.append(reward) 75 | 76 | def _is_finished(self): 77 | if ((len(self.rewards) > self.window) 78 | and mean(self.rewards[(len(self.rewards) - self.window):-1]) 79 | > self.avg_reward): 80 | return True 81 | else: 82 | return False 83 | 84 | 85 | class SafeOpt(_SafeOptWrap): 86 | """Wrap SafeOpt algorithm. 87 | 88 | This class wraps the `SafeOpt` algorithm. It relies on the original 89 | implementation of `SafeOpt` which has to be installed before using this 90 | wrapper. 91 | 92 | Attributes 93 | ---------- 94 | environment : 95 | Environment to be optimized. 96 | policy : 97 | Policy to be optimized. 98 | max_it : 99 | Maximal number of iterations before we abort. 100 | avg_reward : integer 101 | Average reward at which the optimization will be finished. 102 | window : integer 103 | Window for the average reward 104 | gp : GPy Gaussian process 105 | A Gaussian process which is initialized with safe, initial data points. 106 | If a list of GPs then the first one is the value, while all the 107 | other ones are safety constraints. 108 | gp_opt : SafeOptSwarm object 109 | Instance of `SafeOptSwarm` used for optimization. 110 | gp_opt_par : dict 111 | Dictionary of parameters to initialize `SafeOpt`. 112 | """ 113 | 114 | def __init__(self, 115 | environment, policy, max_it, avg_reward, window, 116 | kernel, likelihood, parameter_set, fmin, 117 | lipschitz=None, beta=3.0, num_contexts=0, threshold=0, 118 | scaling='auto', info=None): 119 | """Initialize Attributes. 120 | 121 | Parameters 122 | ---------- 123 | environment : 124 | environmet to be optimized. 125 | policy : 126 | policy to be optimized. 127 | max_it : 128 | maximal number of iterations before we abort. 129 | avg_reward : integer 130 | average reward at which the optimization will be finished. 131 | window : integer 132 | window for the average reward 133 | kernel : GPy kernel 134 | Kernel used to initialize the gaussian process. If this is a list 135 | multiple kernels will be initialized. The size of this argument 136 | has to agree with the size of the likelihood. 137 | likelihood : GPy likelihood 138 | Likelihood used to initialize kernels. If this is a list, multiple 139 | kernels will be initialized. The size of this argument has to 140 | agree with the size of the likelihood. 141 | parameter_set : 2d-array 142 | List of parameters 143 | fmin : list of floats 144 | Safety threshold for the function value. If multiple safety 145 | constraints are used this can also be a list of floats (the first 146 | one is always the one for the values, can be set to None if not 147 | wanted) 148 | lipschitz : list of floats 149 | The Lipschitz constant of the system, if None the GP confidence 150 | intervals are used directly. 151 | beta : float or callable 152 | A constant or a function of the time step that scales the 153 | confidence interval of the acquisition function. 154 | threshold : float or list of floats 155 | The algorithm will not try to expand any points that are below this 156 | threshold. This makes the algorithm stop expanding points 157 | eventually. If a list, this represents the stopping criterion for 158 | all the gps. This ignores the scaling factor. 159 | scaling : list of floats or "auto" 160 | A list used to scale the GP uncertainties to compensate for 161 | different input sizes. This should be set to the maximal variance 162 | of each kernel. You should probably leave this to "auto" unless 163 | your kernel is non-stationary. 164 | info : 165 | Dummy argument that can hold anything usable to identify the 166 | configuration. 167 | """ 168 | add_dependency(safeopt, 'SafeOpt') 169 | add_dependency(GPy, 'GPy') 170 | 171 | # store the `SafeOpt` arguments. 172 | gp_opt_par = { 173 | 'parameter_set': parameter_set, 174 | 'fmin': fmin, 175 | 'lipschitz': lipschitz, 176 | 'beta': beta, 177 | 'num_contexts': num_contexts, 178 | 'threshold': threshold, 179 | 'scaling': scaling} 180 | 181 | # store the kernel arguments 182 | if not isinstance(kernel, list): 183 | kernel = [kernel] 184 | if not isinstance(likelihood, list): 185 | likelihood = [likelihood] 186 | assert len(likelihood) == len(kernel), ( 187 | 'kernel and likelihood need to have same length (%d /= %d)' 188 | % (len(likelihood), len(kernel))) 189 | 190 | gp_par = (kernel, likelihood) 191 | 192 | super(SafeOpt, self).__init__(safeopt.SafeOpt, gp_opt_par, gp_par, 193 | environment, policy, max_it, avg_reward, 194 | window) 195 | 196 | 197 | class SafeOptSwarm(_SafeOptWrap): 198 | """Wrap SafeOpt algorithm. 199 | 200 | This class wraps the `SafeOptSwarm` algorithm. It relies on the original 201 | implementation of `SafeOptSwarm` which is part of the `safeopt` package 202 | and has to be installed before using this class. 203 | 204 | Attributes 205 | ---------- 206 | environment : 207 | Environment to be optimized. 208 | policy : 209 | Policy to be optimized. 210 | max_it : 211 | Maximal number of iterations before we abort. 212 | avg_reward : integer 213 | Average reward at which the optimization will be finished. 214 | window : integer 215 | Window for the average reward 216 | gp : GPy Gaussian process 217 | A Gaussian process which is initialized with safe, initial data points. 218 | If a list of GPs then the first one is the value, while all the 219 | other ones are safety constraints. 220 | gp_opt : SafeOptSwarm object 221 | Instance of SafeOptSwarm used for optimization. 222 | gp_opt_par : list 223 | List of parameters to initialize `SafeOpt`. 224 | """ 225 | 226 | def __init__(self, 227 | environment, policy, max_it, avg_reward, window, 228 | kernel, likelihood, fmin, bounds, beta=3.0, threshold=0, 229 | scaling='auto', swarm_size=20, info=None): 230 | """Initialize Attributes. 231 | 232 | Parameters 233 | ---------- 234 | environment : 235 | Environment to be optimized. 236 | policy : 237 | policy to be optimized. 238 | max_it : 239 | maximal number of iterations before we abort. 240 | avg_reward : integer 241 | average reward at which the optimization will be finished. 242 | window : integer 243 | window for the average reward 244 | kernel : GPy kernel 245 | Kernel used to initialize the gaussian process. If this is a list 246 | multiple kernels will be initialized. The size of this argument 247 | has to agree with the size of the likelihood. 248 | likelihood : GPy likelihood 249 | Likelihood used to initialize kernels. If this is a list, multiple 250 | kernels will be initialized. The size of this argument has to 251 | agree with the size of the likelihood. 252 | fmin : list of floats 253 | Safety threshold for the function value. If multiple safety 254 | constraints are used this can also be a list of floats (the first 255 | one is always the one for the values, can be set to None if not 256 | wanted) 257 | bounds : pair of floats or list of pairs of floats 258 | If a list is given, then each pair represents the lower/upper bound 259 | in each dimension. Otherwise, we assume the same bounds for all 260 | dimensions. This is mostly important for plotting or to restrict 261 | particles to a certain domain. 262 | beta : float or callable 263 | A constant or a function of the time step that scales the 264 | confidence interval of the acquisition function. 265 | threshold : float or list of floats 266 | The algorithm will not try to expand any points that are below this 267 | threshold. This makes the algorithm stop expanding points 268 | eventually. If a list, this represents the stopping criterion for 269 | all the gps. This ignores the scaling factor. 270 | scaling : list of floats or "auto" 271 | A list used to scale the GP uncertainties to compensate for 272 | different input sizes. This should be set to the maximal variance 273 | of each kernel. You should probably set this to "auto" unless your 274 | kernel is non-stationary 275 | swarm_size : int 276 | The number of particles in each of the optimization swarms 277 | info : 278 | Dummy argument that can hold anything usable to identify the 279 | configuration. 280 | """ 281 | add_dependency(safeopt, 'SafeOpt') 282 | add_dependency(GPy, 'GPy') 283 | 284 | # store the `SafeOpt` arguments. 285 | gp_opt_par = { 286 | 'fmin': fmin, 287 | 'bounds': bounds, 288 | 'beta': beta, 289 | 'threshold': threshold, 290 | 'scaling': scaling, 291 | 'swarm_size': swarm_size 292 | } 293 | 294 | # store the kernel arguments 295 | if not isinstance(kernel, list): 296 | kernel = [kernel] 297 | if not isinstance(likelihood, list): 298 | likelihood = [likelihood] 299 | assert len(likelihood) == len(kernel), ( 300 | 'kernel and likelihood need to have same length (%d /= %d)' 301 | % (len(likelihood), len(kernel))) 302 | 303 | gp_par = (kernel, likelihood) 304 | 305 | super(SafeOptSwarm, self).__init__(safeopt.SafeOptSwarm, gp_opt_par, 306 | gp_par, environment, policy, max_it, 307 | avg_reward, window) 308 | -------------------------------------------------------------------------------- /SafeRLBench/algo/policygradient.py: -------------------------------------------------------------------------------- 1 | """Policy Gradient implementations.""" 2 | 3 | from SafeRLBench import AlgorithmBase 4 | from SafeRLBench.spaces import BoundedSpace 5 | 6 | import numpy as np 7 | from numpy.linalg import solve, norm 8 | 9 | from abc import ABCMeta, abstractmethod 10 | from six import add_metaclass 11 | 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class PolicyGradient(AlgorithmBase): 18 | """Implementing many policy gradient methods. 19 | 20 | This uses standard gradient descent using different policy gradient 21 | estimators. 22 | 23 | Attributes 24 | ---------- 25 | environment : 26 | Environment we want to optimize the policy on. This should be a 27 | subclass of `EnvironmentBase`. 28 | policy : 29 | Policy we want to find parameters for. This should be a subclass of 30 | `Policy`. 31 | estimator : 32 | Either an estimator object, that is a subclass of 33 | PolicyGradientEstimator or a string. A list of possible estimator 34 | strings can be found in the Notes section. By default 'reinforce' will 35 | be used 36 | eps : float 37 | The optimizer will stop optimization ones the norm of the gradient is 38 | smaller than `eps`. 39 | rate : float 40 | This is the rate we use for the updates in each step 41 | 42 | Notes 43 | ----- 44 | These strings can be used to access the implemented estimators. 45 | 46 | +------------+---------------------------------+ 47 | |'forward_fd'| Uses forward finite differences.| 48 | +------------+---------------------------------+ 49 | |'central_fd'| Uses central finite differences.| 50 | +------------+---------------------------------+ 51 | |'reinforce' | Classic reinforce estimator. | 52 | +------------+---------------------------------+ 53 | |'gpomdp' | Uses GPOMDP estimator. | 54 | +------------+---------------------------------+ 55 | """ 56 | 57 | def __init__(self, 58 | environment, policy, estimator='reinforce', 59 | max_it=1000, eps=0.0001, est_eps=0.001, 60 | parameter_space=BoundedSpace(0, 1, (3,)), 61 | rate=1, var=0.5): 62 | """Initialize PolicyGradient. 63 | 64 | Parameters 65 | ---------- 66 | environment : 67 | Environment we want to optimize the policy on. This should be a 68 | subclass of `EnvironmentBase`. 69 | policy : 70 | Policy we want to find parameters for. This should be a subclass of 71 | `Policy`. 72 | estimator : 73 | Either an estimator object, that is a subclass of 74 | PolicyGradientEstimator or a string. A list of possible estimator 75 | strings can be found in the Notes section. By default 'reinforce' 76 | will be used 77 | eps : float 78 | The optimizer will stop optimization ones the norm of the gradient 79 | is smaller than `eps`. 80 | est_eps : float 81 | In case an estimator needs to converge, this is the margin it will 82 | use to stop. 83 | parameter_space : 84 | rate : float 85 | This is the rate we use for the updates in each step 86 | var : float 87 | This parameter will be used depending on the estimator type. e.g. 88 | for central differences this value corresponds to the grid size 89 | that is used. 90 | """ 91 | super(PolicyGradient, self).__init__(environment, policy, max_it) 92 | 93 | self.parameter_space = policy.parameter_space 94 | 95 | self.eps = eps 96 | self.rate = rate 97 | 98 | if isinstance(estimator, str): 99 | estimator = estimators[estimator] 100 | elif issubclass(estimator, PolicyGradientEstimator): 101 | pass 102 | else: 103 | raise ImportError('Invalid Estimator') 104 | 105 | self.estimator = estimator(environment, self.parameter_space, max_it, 106 | est_eps, var) 107 | 108 | def _initialize(self): 109 | logger.debug("Initializing Policy.") 110 | # check if policy is already initialized by the user 111 | if self.policy.initialized: 112 | logger.debug("Use pre-set policy parameters.") 113 | return self.policy.parameters 114 | 115 | # outerwise draw an element at random from the parameter space 116 | parameter = self.parameter_space.sample() 117 | 118 | for _ in range(1000): 119 | self.policy.parameters = parameter 120 | grad = self.estimator(self.policy) 121 | 122 | if (norm(grad) >= 1000 * self.eps): 123 | return parameter 124 | 125 | parameter = self.parameter_space.sample() 126 | 127 | logger.error('Unable to find non-zero gradient.') 128 | 129 | def _step(self): 130 | grad = self.estimator(self.policy) 131 | 132 | parameter = self.policy.parameters 133 | 134 | self.policy.parameters = parameter + self.rate * grad 135 | 136 | self.grad = grad 137 | 138 | def _is_finished(self): 139 | done = False 140 | if np.isnan(self.grad).any(): 141 | done = True 142 | logger.warning('Abort Optimization. Gradient contained not NaN') 143 | done = done or (norm(self.grad) < self.eps) 144 | return done 145 | 146 | 147 | @add_metaclass(ABCMeta) 148 | class PolicyGradientEstimator(object): 149 | """Interface for Gradient Estimators.""" 150 | 151 | name = 'Policy Gradient' 152 | 153 | def __init__(self, environment, parameter_space, max_it=200, eps=0.001): 154 | """Initialize.""" 155 | self.environment = environment 156 | self.state_dim = environment.state.shape[0] 157 | self.par_dim = parameter_space.dimension 158 | 159 | self.eps = eps 160 | self.max_it = max_it 161 | 162 | def __repr__(self): 163 | return self.__class__.__name__ 164 | 165 | def __call__(self, policy): 166 | """Invoke _estimate_gradient(policy).""" 167 | return self._estimate_gradient(policy) 168 | 169 | @abstractmethod 170 | def _estimate_gradient(self, policy): 171 | pass 172 | 173 | 174 | class ForwardFDEstimator(PolicyGradientEstimator): 175 | """Forward Finite Differences Gradient Estimator.""" 176 | 177 | name = 'Forward Finite Differences' 178 | 179 | def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)), 180 | max_it=200, eps=0.001, var=1): 181 | """Initialize.""" 182 | super(ForwardFDEstimator, self).__init__(environment, parameter_space, 183 | max_it, eps) 184 | self.var = var 185 | 186 | def _estimate_gradient(self, policy): 187 | env = self.environment 188 | var = self.var 189 | # store current policy parameter 190 | parameter = policy.parameters 191 | par_dim = policy.parameter_space.dimension 192 | 193 | # using forward differences 194 | trace = env.rollout(policy) 195 | j_ref = sum([x[2] for x in trace]) / len(trace) 196 | 197 | dj = np.zeros((2 * par_dim)) 198 | dv = np.append(np.eye(par_dim), -np.eye(par_dim), axis=0) 199 | dv *= var 200 | 201 | for n in range(par_dim): 202 | variation = dv[n] 203 | 204 | policy.parameters = parameter + variation 205 | trace_n = env.rollout(policy) 206 | 207 | jn = sum([x[2] for x in trace]) / len(trace_n) 208 | 209 | dj[n] = j_ref - jn 210 | 211 | grad = solve(dv.T.dot(dv), dv.T.dot(dj)) 212 | 213 | # reset current policy parameter 214 | policy.parameters = parameter 215 | 216 | return grad 217 | 218 | 219 | class CentralFDEstimator(PolicyGradientEstimator): 220 | """Central Finite Differences Gradient Estimator.""" 221 | 222 | name = 'Central Finite Differences' 223 | 224 | def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)), 225 | max_it=200, eps=0.001, var=1): 226 | """Initialize.""" 227 | super(CentralFDEstimator, self).__init__(environment, parameter_space, 228 | max_it, eps) 229 | self.var = var 230 | 231 | def _estimate_gradient(self, policy): 232 | env = self.environment 233 | 234 | parameter = policy.parameters 235 | par_dim = policy.parameter_space.dimension 236 | 237 | dj = np.zeros((par_dim,)) 238 | dv = np.eye(par_dim) * self.var / 2 239 | 240 | for n in range(par_dim): 241 | variation = dv[n] 242 | 243 | policy.parameters = parameter + variation 244 | trace_n = env.rollout(policy) 245 | 246 | policy.parameters = parameter - variation 247 | trace_n_ref = env.rollout(policy) 248 | 249 | jn = sum([x[2] for x in trace_n]) / len(trace_n) 250 | jn_ref = sum([x[2] for x in trace_n_ref]) / len(trace_n_ref) 251 | 252 | dj[n] = jn - jn_ref 253 | 254 | grad = solve(dv.T.dot(dv), dv.T.dot(dj)) 255 | policy.parameters = parameter 256 | 257 | return grad 258 | 259 | 260 | class ReinforceEstimator(PolicyGradientEstimator): 261 | """Reinforce Gradient Estimator.""" 262 | 263 | name = 'Reinforce' 264 | 265 | def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)), 266 | max_it=200, eps=0.001, lam=0.5): 267 | """Initialize.""" 268 | super(ReinforceEstimator, self).__init__(environment, parameter_space, 269 | max_it, eps) 270 | self.lam = lam 271 | 272 | def _estimate_gradient(self, policy): 273 | env = self.environment 274 | par_shape = policy.parameters.shape 275 | max_it = self.max_it 276 | 277 | b_div = np.zeros(par_shape) 278 | b_nom = np.zeros(par_shape) 279 | 280 | grads = np.zeros(par_shape) 281 | grad = np.zeros(par_shape) 282 | 283 | for n in range(max_it): 284 | trace = env.rollout(policy) 285 | 286 | lam = self.lam 287 | 288 | actions = [x[0] for x in trace] 289 | states = [x[1] for x in trace] 290 | 291 | rewards_sum = sum([x[2] * lam**k for k, x in enumerate(trace)]) 292 | 293 | lg_sum = sum(list(map(policy.grad_log_prob, states, actions))) 294 | 295 | b_div_n = lg_sum**2 296 | b_nom_n = b_div_n * rewards_sum 297 | 298 | b_div += b_div_n 299 | b_nom += b_nom_n 300 | 301 | b = b_nom / b_div 302 | grad_n = lg_sum * (rewards_sum - b) 303 | 304 | grads += grad_n 305 | 306 | grad_old = grad 307 | grad = grads / (n + 1) 308 | 309 | if (n > 2 and norm(grad_old - grad) < self.eps): 310 | return grad 311 | 312 | logger.warning('ReinforceEstimator did not converge!' 313 | + 'You may want to raise max_it.') 314 | return grad 315 | 316 | 317 | class GPOMDPEstimator(PolicyGradientEstimator): 318 | """GPOMDP Gradient Estimator.""" 319 | 320 | name = 'GPOMDP' 321 | 322 | def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3,)), 323 | max_it=200, eps=0.001, lam=0.5): 324 | """Initialize.""" 325 | super(GPOMDPEstimator, self).__init__(environment, parameter_space, 326 | max_it, eps) 327 | self.lam = lam 328 | 329 | def _estimate_gradient(self, policy): 330 | env = self.environment 331 | h = env.horizon 332 | shape = policy.parameters.shape 333 | 334 | b_nom = np.zeros((h, shape)) 335 | b_div = np.zeros((h, shape)) 336 | b = np.zeros((h, shape)) 337 | grad = np.zeros(shape) 338 | 339 | lam = self.lam 340 | 341 | for n in range(self.max_it): 342 | trace = env.rollout(policy) 343 | b_n = np.zeros((h, shape)) 344 | 345 | for k, state in enumerate(trace): 346 | update = policy.grad_log_prob(state[1], state[0]) 347 | for j in range(k + 1): 348 | b_n[j] += update 349 | 350 | fac = n / (n + 1) 351 | 352 | b_n = b_n**2 353 | b_div = fac * b_div + b_n / (n + 1) 354 | 355 | for k, state in enumerate(trace): 356 | b_nom[k] = fac * b_nom[k] 357 | b_nom[k] += b_n[k] * state[2] * lam**k / (n + 1) 358 | 359 | b = b_nom / b_div 360 | 361 | grad_update = np.zeros(shape) 362 | update = np.zeros(shape) 363 | for k, state in enumerate(trace): 364 | update += policy.grad_log_prob(state[1], state[0]) 365 | grad_update += update * (-b[k] + state[2] * lam**k) 366 | 367 | if (n > 2 and norm(grad_update / (n + 1)) < self.eps): 368 | grad /= (n + 1) 369 | return grad 370 | grad += np.nan_to_num(grad_update) 371 | 372 | logger.warning('GPOMDP did not converge! ' 373 | + 'You may want to raise max_it.') 374 | grad /= n + 1 375 | return grad 376 | 377 | 378 | """Dictionary for resolving estimator strings.""" 379 | estimators = { 380 | 'forward_fd': ForwardFDEstimator, 381 | 'central_fd': CentralFDEstimator, 382 | 'reinforce': ReinforceEstimator, 383 | 'gpomdp': GPOMDPEstimator 384 | } 385 | --------------------------------------------------------------------------------