├── docs ├── introduction.rst ├── requirements.txt ├── api.rst ├── _templates │ └── template.rst ├── index.rst ├── Makefile ├── make.bat └── conf.py ├── requirements_dev.txt ├── .dockerignore ├── requirements.txt ├── .gitignore ├── .travis.yml ├── scripts ├── jupyter_output.py └── test_code.sh ├── Dockerfile.python2 ├── Dockerfile.python3 ├── Makefile ├── safe_learning ├── configuration.py ├── __init__.py ├── tests │ ├── test_lyapunov.py │ ├── test_utilities.py │ ├── test_rl.py │ └── test_functions.py ├── reinforcement_learning.py └── utilities.py ├── LICENSE ├── Dockerfile.dev ├── examples ├── README.rst ├── plotting.py ├── basic_dynamic_programming.ipynb ├── 1d_region_of_attraction_estimate.ipynb ├── 1d_example.ipynb ├── inverted_pendulum.ipynb ├── reinforcement_learning_cartpole.ipynb └── adaptive_safety_verification.ipynb ├── setup.py └── README.rst /docs/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | TODO -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | numpydoc >= 0.6 3 | sphinx_rtd_theme >= 0.1.8 4 | mock 5 | 6 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ***************** 3 | 4 | .. automodule:: safe_learning 5 | 6 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | mock 2 | flake8>=3.0,<=3.5.0 3 | pytest==4.6.9 4 | pytest-cov==2.8.1 5 | pydocstyle>=2.0,<2.1 6 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | examples 2 | htmlcov 3 | .travis.yml 4 | .gitignore 5 | .git 6 | *.pyc 7 | .ipynb_checkpoints 8 | **/__pycache__ 9 | safe_learning.egg-info 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.0,<1.15 2 | scipy>=1.0.0<=1.2.1 3 | gpflow==0.4.0 4 | matplotlib<=4.0.0 5 | scs==2.0.2 6 | cvxpy>=1,<=1.0.15 7 | tensorflow>=1.6.0,<=1.12.0 8 | future<=0.18.0 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea 3 | .ipynb_checkpoints 4 | htmlcov 5 | .coverage 6 | .cache 7 | safe_learning.egg-info 8 | __pycache__ 9 | docs/safe_learning.* 10 | docs/_build 11 | *.swp 12 | *.DS_Store 13 | .pytest_cache 14 | -------------------------------------------------------------------------------- /docs/_templates/template.rst: -------------------------------------------------------------------------------- 1 | {{ name }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | .. auto{{ objtype }}:: {{ objname }} {% if objtype == "class" %} 6 | :members: 7 | :inherited-members: 8 | {% endif %} 9 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the Safe Learning documentation! 2 | =========================================== 3 | 4 | .. include:: introduction.rst 5 | 6 | .. toctree:: 7 | :caption: Contents 8 | :maxdepth: 3 9 | 10 | api 11 | 12 | Indices and tables 13 | ================== 14 | 15 | * :ref:`genindex` 16 | * :ref:`modindex` 17 | * :ref:`search` 18 | 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: required 4 | services: 5 | - docker 6 | 7 | env: 8 | - PYTHON=python2 9 | - PYTHON=python3 10 | 11 | # Setup anaconda 12 | install: 13 | # Disabled since docker pull does not affect cache 14 | # Fixed in Docker 1.13 with --cache-from 15 | # - docker pull befelix/lyapunov-learning-private:${PYTHON} || true 16 | - docker build -f Dockerfile.${PYTHON} -t test-image . 17 | - docker ps -a 18 | 19 | # Run tests 20 | script: 21 | - docker run test-image scripts/test_code.sh 22 | 23 | -------------------------------------------------------------------------------- /scripts/jupyter_output.py: -------------------------------------------------------------------------------- 1 | def scrub_output_pre_save(model, **kwargs): 2 | """scrub output before saving notebooks""" 3 | # only run on notebooks 4 | if model['type'] != 'notebook': 5 | return 6 | # only run on nbformat v4 7 | if model['content']['nbformat'] != 4: 8 | return 9 | 10 | for cell in model['content']['cells']: 11 | if cell['cell_type'] != 'code': 12 | continue 13 | cell['outputs'] = [] 14 | cell['execution_count'] = None 15 | 16 | c.FileContentsManager.pre_save_hook = scrub_output_pre_save 17 | -------------------------------------------------------------------------------- /Dockerfile.python2: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda:4.5.11 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda install python=2.7 --yes --quiet \ 11 | && conda clean --yes --all \ 12 | && hash -r 13 | 14 | # Copy the main code 15 | COPY . /code 16 | RUN cd /code \ 17 | && pip install pip==18.1 \ 18 | && pip install numpy==1.14.5 \ 19 | && pip install -e .[test] --process-dependency-links \ 20 | && rm -rf /root/.cache 21 | 22 | WORKDIR /code 23 | -------------------------------------------------------------------------------- /Dockerfile.python3: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:4.5.11 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda install python=3.5 --yes --quiet \ 11 | # && conda clean --yes --all \ 12 | && hash -r 13 | 14 | # Copy the main code 15 | COPY . /code 16 | RUN cd /code \ 17 | && pip install pip==18.1 \ 18 | && pip install numpy==1.14.5 \ 19 | && pip install -e .[test] --process-dependency-links \ 20 | && rm -rf /root/.cache 21 | 22 | WORKDIR /code 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = SafeLearning 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | 3 | help: 4 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 5 | 6 | doc: ## Build documentation (docs/_build/html/index.html) 7 | cd docs && $(MAKE) html 8 | 9 | coverage: ## Construct coverage (htmlcov/index.html) 10 | coverage html 11 | 12 | test-local: ## Test the local installation of the code 13 | ./scripts/test_code.sh 14 | 15 | test: docker ## Test the docker images 16 | docker run safe_learning_py2 make test-local 17 | docker run safe_learning_py3 make test-local 18 | 19 | dev: ## Mount current code as volume and run jupyterlab for development 20 | docker build -f Dockerfile.dev -t safe_learning_dev . 21 | docker run -p 8888:8888 -v $(shell pwd):/code safe_learning_dev 22 | 23 | docker: ## Build the docker images 24 | docker build -f Dockerfile.python2 -t safe_learning_py2 . 25 | docker build -f Dockerfile.python3 -t safe_learning_py3 . 26 | 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=SafeLearning 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /safe_learning/configuration.py: -------------------------------------------------------------------------------- 1 | """General configuration class for dtypes.""" 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import tensorflow as tf 6 | 7 | 8 | class Configuration(object): 9 | """Configuration class.""" 10 | 11 | def __init__(self): 12 | """Initialization.""" 13 | super(Configuration, self).__init__() 14 | 15 | # Dtype for computations 16 | self.dtype = tf.float64 17 | 18 | # Batch size for stability verification 19 | self.gp_batch_size = 10000 20 | 21 | @property 22 | def np_dtype(self): 23 | """Return the numpy dtype.""" 24 | return self.dtype.as_numpy_dtype 25 | 26 | def __repr__(self): 27 | """Print the parameters.""" 28 | params = ['Configuration parameters:', ''] 29 | for param, value in self.__dict__.items(): 30 | params.append('{}: {}'.format(param, value.__repr__())) 31 | 32 | return '\n'.join(params) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Felix Berkenkamp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/test_code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | module="safe_learning" 4 | 5 | get_script_dir () { 6 | SOURCE="${BASH_SOURCE[0]}" 7 | # While $SOURCE is a symlink, resolve it 8 | while [ -h "$SOURCE" ]; do 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | SOURCE="$( readlink "$SOURCE" )" 11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory 12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" 13 | done 14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 15 | echo "$DIR" 16 | } 17 | 18 | # Change to script root 19 | cd $(get_script_dir)/.. 20 | GREEN='\033[0;32m' 21 | NC='\033[0m' 22 | 23 | # Run style tests 24 | echo -e "${GREEN}Running style tests.${NC}" 25 | flake8 $module --exclude test*.py,__init__.py --ignore=E402,E731,W503 --show-source || { exit 1; } 26 | 27 | # Ignore import errors for __init__ and tests 28 | flake8 $module --filename=__init__.py,test*.py --ignore=F,E402,W503 --show-source || { exit 1; } 29 | 30 | echo -e "${GREEN}Testing docstring conventions.${NC}" 31 | # Test docstring conventions 32 | pydocstyle $module --convention=numpy || { exit 1; } 33 | 34 | # Run unit tests 35 | echo -e "${GREEN}Running unit tests.${NC}" 36 | pytest --doctest-modules --cov --cov-fail-under=80 $module || { exit 1; } 37 | 38 | -------------------------------------------------------------------------------- /Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda update conda --yes --quiet \ 11 | && conda install python=3.5 pip numpy scipy pandas --yes --quiet \ 12 | && conda clean --yes --all \ 13 | && hash -r 14 | 15 | # Get the requirements files (seperate from the main body) 16 | COPY requirements.txt requirements_dev.txt /reqs/ 17 | 18 | # Install requirements and clean up 19 | RUN pip --no-cache-dir install -r /reqs/requirements.txt \ 20 | && pip --no-cache-dir install -r /reqs/requirements_dev.txt \ 21 | && pip install jupyter jupyterlab dumb-init \ 22 | && rm -rf /root/.cache \ 23 | && rm -rf /reqs 24 | 25 | # Manually install GPflow and clean up 26 | RUN git clone --depth=1 --branch=0.4.0 https://github.com/GPflow/GPflow.git \ 27 | && cd GPflow \ 28 | && python setup.py install \ 29 | && rm -rf /GPflow 30 | 31 | # Output scrubber for jupyter 32 | ADD scripts/jupyter_output.py / 33 | 34 | RUN jupyter notebook --generate-config \ 35 | && cat /jupyter_output.py >> /root/.jupyter/jupyter_notebook_config.py \ 36 | && rm /jupyter_output.py 37 | 38 | WORKDIR /code 39 | 40 | # Make sure Ctrl+C commands can be forwarded 41 | ENTRYPOINT ["dumb-init", "--"] 42 | 43 | CMD python setup.py develop \ 44 | && jupyter lab --ip="0.0.0.0" --no-browser --allow-root 45 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Example notebooks for the library 2 | ================================= 3 | 4 | Introductions 5 | ------------- 6 | - `1d_region_of_attraction_estimate.ipynb <./1d_region_of_attraction_estimate.ipynb>`_ shows how to estimate and learn the region of attraction for a fixed policy. 7 | - `basic_dynamic_programming.ipynb <./basic_dynamic_programming.ipynb>`_ does basic dynamic programming with piecewise linear function approximators for the mountain car example. 8 | - `reinforcement_learning_pendulum.ipynb <./reinforcement_learning_pendulum.ipynb>`_ does approximate policy iteration in an actor-critic framework with neural networks for the inverted pendulum. 9 | - `reinforcement_learning_cartpole.ipynb <./reinforcement_learning_cartpole.ipynb>`_ does the same as above for the cart-pole (i.e., the inverted pendulum on a cart). 10 | 11 | Experiments 12 | ----------- 13 | - `1d_example.ipynb <./1d_example.ipynb>`_ contains a 1D example including plots of the sets. 14 | - `inverted_pendulum.ipynb <./inverted_pendulum.ipynb>`_ contains a full neural network example with an inverted pendulum. 15 | - `adaptive_safety_verification.ipynb <./adaptive_safety_verification.ipynb>`_ investigates the benefits of an adaptive discretization in identifying safe sets for the inverted pendulum. 16 | - `lyapunov_function_learning.ipynb <./lyapunov_function_learning.ipynb>`_ demonstrates how a parameterized Lyapunov candidate for the inverted pendulum can be trained with the machine learning approach in [1]_. 17 | 18 | .. [1] S. M. Richards, F. Berkenkamp, A. Krause, 19 | `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems `_. Conference on Robot Learning (CoRL), 2018. 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from setuptools.command.test import test as TestCommand 3 | import sys 4 | import pip 5 | 6 | class PyTest(TestCommand): 7 | user_options = [('pytest-args=', 'a', "Arguments to pass to pytest")] 8 | 9 | def initialize_options(self): 10 | TestCommand.initialize_options(self) 11 | self.pytest_args = '' 12 | 13 | def run_tests(self): 14 | import shlex 15 | #import here, cause outside the eggs aren't loaded 16 | import pytest 17 | errno = pytest.main(shlex.split(self.pytest_args)) 18 | sys.exit(errno) 19 | 20 | with open('requirements.txt', 'r') as f: 21 | requirements = f.read().splitlines() 22 | 23 | with open('requirements_dev.txt', 'r') as f: 24 | test_requirements = f.read().splitlines() 25 | 26 | setup( 27 | name="safe_learning", 28 | version="0.0.1", 29 | author="Felix Berkenkamp", 30 | author_email="fberkenkamp@gmail.com", 31 | description=("An demonstration of how to create, document, and publish " 32 | "to the cheese shop a5 pypi.org."), 33 | license="MIT", 34 | keywords="safe reinforcement learning Lyapunov", 35 | url="https://github.com/befelix/lyapunov-learning", 36 | packages=find_packages(exclude=['docs']), 37 | setup_requires=['numpy'], 38 | install_requires=requirements, 39 | extras_require={'test': list(test_requirements)}, 40 | tests_require=test_requirements, 41 | dependency_links=['git+https://github.com/GPflow/GPflow.git@0.4.0#egg=gpflow-0.4.0'], 42 | cmdclass={'test': PyTest}, 43 | classifiers=[ 44 | # How mature is this project? Common values are 45 | # 3 - Alpha 46 | # 4 - Beta 47 | # 5 - Production/Stable 48 | 'Development Status :: 3 - Alpha', 49 | 50 | # Indicate who your project is intended for 51 | 'Intended Audience :: Developers', 52 | 'Topic :: Software Development :: Build Tools', 53 | 'License :: OSI Approved :: MIT License', 54 | 'Programming Language :: Python :: 2', 55 | 'Programming Language :: Python :: 2.7', 56 | 'Programming Language :: Python :: 3', 57 | 'Programming Language :: Python :: 3.5', 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ===================================================== 2 | Safe Reinforcement Learning with Stability Guarantees 3 | ===================================================== 4 | 5 | .. image:: https://travis-ci.org/befelix/safe_learning.svg?branch=master 6 | :target: https://travis-ci.org/befelix/safe_learning 7 | :alt: Build status 8 | .. image:: https://readthedocs.org/projects/safe-learning/badge/?version=latest 9 | :target: http://safe-learning.readthedocs.io/en/latest/?badge=latest 10 | :alt: Documentation Status 11 | 12 | This code accompanies the paper [1]_ and implements the code for estimating the region of attraction for a policy and optimizing the policy subject to stability constraints. For the old numpy-based code to estimate the region of attraction in [2]_ see the `lyapunov-learning `_ repository. The code for learning Lyapunov functions from [3]_ can be found in the `examples <./examples>`_ folder. 13 | 14 | .. [1] F. Berkenkamp, M. Turchetta, A. P. Schoellig, A. Krause, 15 | `Safe Model-based Reinforcement Learning with Stability Guarantees `_ 16 | in Proc. of the Conference on Neural Information Processing Systems (NIPS), 2017. 17 | 18 | .. [2] F. Berkenkamp, R. Moriconi, A. P. Schoellig, A. Krause, 19 | `Safe Learning of Regions of Attraction in Uncertain, Nonlinear Systems with Gaussian Processes `_ 20 | in Proc. of the Conference on Decision and Control (CDC), 2016. 21 | 22 | .. [3] S. M. Richards, F. Berkenkamp, A. Krause, 23 | `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems `_. Conference on Robot Learning (CoRL), 2018. 24 | 25 | Getting started 26 | --------------- 27 | 28 | This library is tested based on both python 2.7 and 3.5, together with the following dependencies, since ``pip>=19`` does not support ``--process-dependency-links`` (see below) 29 | 30 | :: 31 | 32 | pip install pip==18.1 33 | pip install numpy==1.14.5 34 | 35 | 36 | Based on this, you can install the library by cloning the repository and installing it with 37 | 38 | ``pip install . --process-dependency-links`` 39 | 40 | To run the tests with the bash script in ``scripts/test_code.sh``, you need to install additional dependencies with 41 | 42 | ``pip install ".[test]" --process-dependency-links`` 43 | 44 | The ``--process-dependency-links`` flag is needed to install ``gpflow==0.4.0``, which is not on pypi. You can skip it if that particular version of the library is already installed. 45 | 46 | You can the find example jupyter notebooks and the experiments in the paper in the `examples <./examples>`_ folder. 47 | 48 | -------------------------------------------------------------------------------- /safe_learning/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `safeopt` package implements tools for Safe Bayesian optimization. 3 | 4 | Stability verification 5 | ---------------------- 6 | 7 | The :class:`Lyapunov` class provides the main point of entry for the stability 8 | analysis. It can be used to compute the region of attraction and together with 9 | :func:`get_safe_sample` sets up the safe sampling scheme. 10 | 11 | .. autosummary:: 12 | 13 | :template: template.rst 14 | :toctree: 15 | 16 | Lyapunov 17 | get_safe_sample 18 | smallest_boundary_value 19 | get_lyapunov_region 20 | 21 | 22 | Approximate Dynamics Programming 23 | -------------------------------- 24 | 25 | We use approximate dynamics programming to compute value functions. 26 | 27 | .. autosummary:: 28 | 29 | :template: template.rst 30 | :toctree: 31 | 32 | PolicyIteration 33 | 34 | 35 | Functions 36 | --------- 37 | 38 | These are generic function classes for convenience. They are all compatible 39 | with :class:`Lyapunov` and :class:`PolicyIteration` and can be added, 40 | multiplied, and stacked as needed. 41 | 42 | .. autosummary:: 43 | 44 | :template: template.rst 45 | :toctree: 46 | 47 | GridWorld 48 | FunctionStack 49 | Triangulation 50 | PiecewiseConstant 51 | LinearSystem 52 | QuadraticFunction 53 | Saturation 54 | NeuralNetwork 55 | GaussianProcess 56 | GPRCached 57 | sample_gp_function 58 | 59 | 60 | Utilities 61 | --------- 62 | 63 | These are utilities to make working with tensorflow more pleasant. 64 | 65 | .. autosummary:: 66 | 67 | :template: template.rst 68 | :toctree: 69 | 70 | utilities.combinations 71 | utilities.linearly_spaced_combinations 72 | utilities.lqr 73 | utilities.dlqr 74 | utilities.ellipse_bounds 75 | utilities.concatenate_inputs 76 | utilities.make_tf_fun 77 | utilities.with_scope 78 | utilities.use_parent_scope 79 | utilities.add_weight_constraint 80 | utilities.batchify 81 | utilities.get_storage 82 | utilities.set_storage 83 | utilities.unique_rows 84 | utilities.gradient_clipping 85 | 86 | """ 87 | 88 | from __future__ import absolute_import 89 | 90 | # Add the configuration settings 91 | from .configuration import Configuration 92 | config = Configuration() 93 | del Configuration 94 | 95 | from .functions import * 96 | from .lyapunov import * 97 | from .reinforcement_learning import * 98 | from . import utilities 99 | 100 | try: 101 | from pytest import main as run_tests 102 | except ImportError: 103 | def run_tests(): 104 | """Run the test package.""" 105 | raise ImportError('Testing requires the pytest package.') 106 | -------------------------------------------------------------------------------- /safe_learning/tests/test_lyapunov.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the Lyapunov functions.""" 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | from numpy.testing import assert_allclose, assert_equal 6 | import pytest 7 | import unittest 8 | import numpy as np 9 | import tensorflow as tf 10 | import sys 11 | 12 | from safe_learning.functions import (LinearSystem, GridWorld) 13 | from safe_learning.lyapunov import (Lyapunov, smallest_boundary_value) 14 | 15 | if sys.version_info.major <= 2: 16 | import mock 17 | else: 18 | from unittest import mock 19 | 20 | 21 | class TestLyapunov(object): 22 | """Test the Lyapunov base class.""" 23 | 24 | def test_safe_set_init(self): 25 | """Test the safe set initialization.""" 26 | with tf.Session(): 27 | discretization = GridWorld([[0, 1], [0, 1]], 3) 28 | lyap_fun = lambda x: tf.reduce_sum(tf.square(x), axis=1) 29 | 30 | dynamics = LinearSystem(np.array([[1, 0.01], 31 | [0., 1.]])) 32 | lf = 0.4 33 | lv = 0.3 34 | eps = 0.5 35 | 36 | policy = lambda x: 0. * x 37 | lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv, 38 | eps, policy) 39 | 40 | initial_set = [1, 3] 41 | lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv, 42 | eps, policy, initial_set=initial_set) 43 | 44 | initial_set = np.array([False, True, False, True, False, 45 | False, False, False, False]) 46 | assert_equal(initial_set, lyap.safe_set) 47 | 48 | def test_update(self): 49 | """Test the update step.""" 50 | with tf.Session(): 51 | discretization = GridWorld([[-1, 1]], 3) 52 | lyap_fun = lambda x: tf.reduce_sum(tf.square(x), 53 | axis=1, 54 | keep_dims=True) 55 | policy = lambda x: -.1 * x 56 | 57 | dynamics = LinearSystem(np.array([[1, 1.]])) 58 | lf = 0.4 59 | lv = 0.3 60 | eps = .5 61 | 62 | initial_set = [1] 63 | 64 | lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv, 65 | eps, policy, initial_set=initial_set) 66 | 67 | lyap.update_safe_set() 68 | assert_equal(lyap.safe_set, np.array([False, True, False])) 69 | 70 | eps = 0. 71 | lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv, 72 | eps, policy, initial_set=initial_set) 73 | lyap.update_safe_set() 74 | assert_equal(lyap.safe_set, np.ones(3, dtype=np.bool)) 75 | 76 | 77 | def test_smallest_boundary_value(): 78 | """Test the boundary value function.""" 79 | with tf.Session(): 80 | fun = lambda x: 2 * tf.reduce_sum(tf.abs(x), axis=1) 81 | discretization = GridWorld([[-1.5, 1], [-1, 1.5]], [3, 3]) 82 | min_value = smallest_boundary_value(fun, discretization) 83 | assert min_value == 2.5 84 | 85 | 86 | if __name__ == '__main__': 87 | unittest.main() 88 | -------------------------------------------------------------------------------- /safe_learning/tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | """Test the utilities.""" 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import pytest 6 | import numpy as np 7 | import tensorflow as tf 8 | from numpy.testing import assert_allclose 9 | 10 | from safe_learning.utilities import (dlqr, get_storage, set_storage, 11 | get_feed_dict, unique_rows, 12 | compute_trajectory) 13 | 14 | from safe_learning import LinearSystem 15 | 16 | 17 | def test_dlqr(): 18 | """Test the dlqr function.""" 19 | true_k = np.array([[0.61803399]]) 20 | true_p = np.array([[1.61803399]]) 21 | 22 | k, p = dlqr(1, 1, 1, 1) 23 | assert_allclose(k, true_k) 24 | assert_allclose(p, true_p) 25 | 26 | k, p = dlqr([[1]], [[1]], [[1]], [[1]]) 27 | assert_allclose(k, true_k) 28 | assert_allclose(p, true_p) 29 | 30 | 31 | class TestStorage(object): 32 | """Test the class storage.""" 33 | 34 | @pytest.fixture 35 | def sample_class(self): 36 | """Sample class for testing.""" 37 | class A(object): 38 | """Some class.""" 39 | 40 | def __init__(self): 41 | """Initialize.""" 42 | super(A, self).__init__() 43 | self.storage = {} 44 | 45 | def method(self, value, index=None): 46 | storage = get_storage(self.storage, index=index) 47 | set_storage(self.storage, [('value', value)], index=index) 48 | return storage 49 | 50 | return A() 51 | 52 | def test_storage(self, sample_class): 53 | """Test the storage.""" 54 | storage = sample_class.method(5) 55 | assert storage is None 56 | storage = sample_class.method(4) 57 | assert storage['value'] == 5 58 | storage = sample_class.method(None) 59 | assert storage['value'] == 4 60 | 61 | # Test index 62 | storage = sample_class.method(3, index='test') 63 | assert storage is None 64 | storage = sample_class.method(4, index='test') 65 | assert storage['value'] == 3 66 | storage = sample_class.method(3, index='test2') 67 | assert storage is None 68 | storage = sample_class.method(3, index='test') 69 | assert storage['value'] is 4 70 | 71 | 72 | def test_get_feed_dict(): 73 | """Test the global get_feed_dict method.""" 74 | graph = tf.Graph() 75 | feed_dict = get_feed_dict(graph) 76 | # Initialized new dictionary 77 | assert feed_dict == {} 78 | 79 | # Test assignment 80 | feed_dict['test'] = 5 81 | 82 | # Make sure we keep getting the same object 83 | assert feed_dict is get_feed_dict(graph) 84 | 85 | 86 | def test_unique_rows(): 87 | """Test the unique_rows function.""" 88 | a = np.array([[1, 1], [1, 2], [1, 3], [1, 2], [1, 3], [1, 4], [2, 3]]) 89 | uniques = np.array([[1, 1], [1, 2], [1, 3], [1, 4], [2, 3]]) 90 | 91 | assert_allclose(unique_rows(a), uniques) 92 | 93 | 94 | def test_compute_trajectory(): 95 | """Test the compute_trajectory function.""" 96 | A = np.array([[1., 0.1], 97 | [0., 1.]]) 98 | B = np.array([[0.01], 99 | [0.1]]) 100 | 101 | dynamics = LinearSystem((A, B)) 102 | Q = np.diag([1., 0.01]) 103 | R = np.array([[0.01]]) 104 | K, _ = dlqr(A, B, Q, R) 105 | policy = LinearSystem([-K]) 106 | 107 | x0 = np.array([[0.1, 0.]]) 108 | with tf.Session() as sess: 109 | res = compute_trajectory(dynamics, policy, x0, num_steps=20) 110 | 111 | states, actions = res 112 | assert_allclose(states[[0], :], x0) 113 | assert_allclose(states[-1, :], np.array([0., 0.]), atol=0.01) 114 | assert_allclose(actions, states[:-1].dot(-K.T)) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Safe Learning documentation build configuration file, created by 5 | # sphinx-quickstart on Tue May 23 07:02:08 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | from __future__ import absolute_import 21 | 22 | import sys 23 | import os 24 | import shlex 25 | import mock 26 | 27 | MOCK_MODULES = ['tensorflow', 28 | 'gpflow', 29 | 'future', 30 | 'future.builtins', 31 | 'future.backports', 32 | 'mpl_toolkits', 33 | 'mpl_toolkits.mplot3d', 34 | 'matplotlib', 35 | 'matplotlib.pyplot', 36 | 'numpy', 37 | 'scipy', 38 | 'scipy.interpolate', 39 | 'scipy.spatial', 40 | 'scipy.linalg', 41 | 'scipy.spatial.distance', 42 | 'scipy.special', 43 | 'scipy.stats', 44 | ] 45 | 46 | for mod_name in MOCK_MODULES: 47 | sys.modules[mod_name] = mock.Mock() 48 | 49 | sys.path.insert(0, os.path.abspath('../')) 50 | # -- General configuration ------------------------------------------------ 51 | 52 | # If your documentation needs a minimal Sphinx version, state it here. 53 | # 54 | # needs_sphinx = '1.0' 55 | 56 | # Add any Sphinx extension module names here, as strings. They can be 57 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 58 | # ones. 59 | extensions = ['sphinx.ext.autodoc', 60 | 'numpydoc', 61 | 'sphinx.ext.autosummary'] 62 | 63 | # Add any paths that contain templates here, relative to this directory. 64 | templates_path = ['_templates'] 65 | 66 | # Generate an autosummary with one file per function. 67 | autosummary_generate = True 68 | autodoc_default_flags = [] 69 | 70 | # The suffix(es) of source filenames. 71 | # You can specify multiple suffix as a list of string: 72 | # 73 | # source_suffix = ['.rst', '.md'] 74 | source_suffix = '.rst' 75 | 76 | # The master toctree document. 77 | master_doc = 'index' 78 | 79 | # General information about the project. 80 | project = 'Safe Learning' 81 | copyright = '2017, Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause' 82 | author = 'Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause' 83 | 84 | # The version info for the project you're documenting, acts as replacement for 85 | # |version| and |release|, also used in various other places throughout the 86 | # built documents. 87 | # 88 | # The short X.Y version. 89 | version = '0.1' 90 | # The full version, including alpha/beta/rc tags. 91 | release = '0.1' 92 | 93 | # The language for content autogenerated by Sphinx. Refer to documentation 94 | # for a list of supported languages. 95 | # 96 | # This is also used if you do content translation via gettext catalogs. 97 | # Usually you set "language" from the command line for these cases. 98 | language = None 99 | 100 | # List of patterns, relative to source directory, that match files and 101 | # directories to ignore when looking for source files. 102 | # This patterns also effect to html_static_path and html_extra_path 103 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 104 | 105 | # The name of the Pygments (syntax highlighting) style to use. 106 | pygments_style = 'sphinx' 107 | 108 | # If true, `todo` and `todoList` produce output, else they produce nothing. 109 | todo_include_todos = False 110 | 111 | 112 | # -- Options for HTML output ---------------------------------------------- 113 | 114 | # The theme to use for HTML and HTML Help pages. See the documentation for 115 | # a list of builtin themes. 116 | # 117 | html_theme = 'sphinx_rtd_theme' 118 | 119 | # Theme options are theme-specific and customize the look and feel of a theme 120 | # further. For a list of options available for each theme, see the 121 | # documentation. 122 | # 123 | # html_theme_options = {} 124 | 125 | # Add any paths that contain custom static files (such as style sheets) here, 126 | # relative to this directory. They are copied after the builtin static files, 127 | # so a file named "default.css" will overwrite the builtin "default.css". 128 | html_static_path = ['_static'] 129 | 130 | 131 | # -- Options for HTMLHelp output ------------------------------------------ 132 | 133 | # Output file base name for HTML help builder. 134 | htmlhelp_basename = 'SafeLearningdoc' 135 | 136 | 137 | # -- Options for LaTeX output --------------------------------------------- 138 | 139 | latex_elements = { 140 | # The paper size ('letterpaper' or 'a4paper'). 141 | # 142 | # 'papersize': 'letterpaper', 143 | 144 | # The font size ('10pt', '11pt' or '12pt'). 145 | # 146 | # 'pointsize': '10pt', 147 | 148 | # Additional stuff for the LaTeX preamble. 149 | # 150 | # 'preamble': '', 151 | 152 | # Latex figure (float) alignment 153 | # 154 | # 'figure_align': 'htbp', 155 | } 156 | 157 | # Grouping the document tree into LaTeX files. List of tuples 158 | # (source start file, target name, title, 159 | # author, documentclass [howto, manual, or own class]). 160 | latex_documents = [ 161 | (master_doc, 'SafeLearning.tex', 'Safe Learning Documentation', 162 | 'Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause', 'manual'), 163 | ] 164 | 165 | 166 | # -- Options for manual page output --------------------------------------- 167 | 168 | # One entry per manual page. List of tuples 169 | # (source start file, name, description, authors, manual section). 170 | man_pages = [ 171 | (master_doc, 'safelearning', 'Safe Learning Documentation', 172 | [author], 1) 173 | ] 174 | 175 | 176 | # -- Options for Texinfo output ------------------------------------------- 177 | 178 | # Grouping the document tree into Texinfo files. List of tuples 179 | # (source start file, target name, title, author, 180 | # dir menu entry, description, category) 181 | texinfo_documents = [ 182 | (master_doc, 'SafeLearning', 'Safe Learning Documentation', 183 | author, 'SafeLearning', 'One line description of project.', 184 | 'Miscellaneous'), 185 | ] 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /safe_learning/tests/test_rl.py: -------------------------------------------------------------------------------- 1 | """Unit tests for treinforcement learning.""" 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | from numpy.testing import assert_allclose 6 | import sys 7 | import pytest 8 | import tensorflow as tf 9 | import numpy as np 10 | import scipy.linalg 11 | from safe_learning.utilities import dlqr 12 | 13 | from safe_learning import (PolicyIteration, Triangulation, GridWorld, 14 | QuadraticFunction, LinearSystem) 15 | 16 | if sys.version_info.major <= 2: 17 | import mock 18 | else: 19 | from unittest import mock 20 | 21 | try: 22 | import cvxpy 23 | except ImportError: 24 | cvxpy = None 25 | 26 | 27 | class TestPolicyIteration(object): 28 | """Test the policy iteration.""" 29 | def test_integration(self): 30 | """Test the values.""" 31 | with tf.Session(graph=tf.Graph()) as sess: 32 | a = np.array([[1.2]]) 33 | b = np.array([[0.9]]) 34 | q = np.array([[1]]) 35 | r = np.array([[0.1]]) 36 | 37 | k, p = dlqr(a, b, q, r) 38 | true_value = QuadraticFunction(-p) 39 | 40 | discretization = GridWorld([[-1, 1]], 19) 41 | value_function = Triangulation(discretization, 42 | 0. * discretization.all_points, 43 | project=True) 44 | 45 | dynamics = LinearSystem((a, b)) 46 | 47 | policy_discretization = GridWorld([-1, 1], 5) 48 | policy = Triangulation(policy_discretization, 49 | -k / 2 * policy_discretization.all_points) 50 | reward_function = QuadraticFunction(-scipy.linalg.block_diag(q, r)) 51 | 52 | rl = PolicyIteration(policy, 53 | dynamics, 54 | reward_function, 55 | value_function) 56 | 57 | value_iter = rl.value_iteration() 58 | 59 | loss = -tf.reduce_sum(rl.future_values(rl.state_space)) 60 | optimizer = tf.train.GradientDescentOptimizer(0.01) 61 | adapt_policy = optimizer.minimize(loss, 62 | var_list=rl.policy.parameters) 63 | 64 | sess.run(tf.global_variables_initializer()) 65 | 66 | for _ in range(10): 67 | sess.run(value_iter) 68 | for _ in range(5): 69 | sess.run(adapt_policy) 70 | 71 | values = rl.value_function.parameters[0].eval() 72 | true_values = true_value(rl.state_space).eval() 73 | policy_values = rl.policy.parameters[0].eval() 74 | 75 | assert_allclose(values, true_values, atol=0.1) 76 | assert_allclose(policy_values, -k * policy_discretization.all_points, 77 | atol=0.1) 78 | # 79 | # assert(max_error < disc_error) 80 | # assert_allclose(rl.values, value_function.parameters[:, 0]) 81 | 82 | @pytest.mark.skipif(cvxpy is None, reason='Cvxpy is not installed.') 83 | def test_optimization(self): 84 | """Test the value function optimization.""" 85 | dynamics = mock.Mock() 86 | dynamics.return_value = np.arange(4, dtype=np.float)[:, None] 87 | 88 | rewards = mock.Mock() 89 | rewards.return_value = np.arange(4, dtype=np.float)[:, None] 90 | 91 | # transition probabilities 92 | trans_probs = np.array([[0, .5, .5, 0], 93 | [.2, .1, .3, .5], 94 | [.3, .2, .4, .1], 95 | [0, 0, 0, 1]], 96 | dtype=np.float) 97 | 98 | value_function = mock.Mock() 99 | value_function.tri.parameter_derivative.return_value = trans_probs 100 | value_function.nindex = 4 101 | value_function.parameters = [tf.Variable(np.zeros((4, 1), 102 | dtype=np.float))] 103 | 104 | states = np.arange(4, dtype=np.float)[:, None] 105 | value_function.discretization.all_points = states 106 | 107 | policy = mock.Mock() 108 | policy.return_value = 'actions' 109 | 110 | rl = PolicyIteration(policy, 111 | dynamics, 112 | rewards, 113 | value_function) 114 | 115 | true_values = np.linalg.solve(np.eye(4) - rl.gamma * trans_probs, 116 | rewards.return_value.ravel())[:, None] 117 | 118 | with tf.Session() as sess: 119 | sess.run(tf.variables_initializer(value_function.parameters)) 120 | sess.run(rl.optimize_value_function()) 121 | values = rl.value_function.parameters[0].eval() 122 | 123 | # Confirm result 124 | assert_allclose(values, true_values) 125 | 126 | dynamics.assert_called_with(rl.state_space, 'actions') 127 | rewards.assert_called_with(rl.state_space, 'actions') 128 | 129 | # rl.terminal_states = np.array([0, 0, 0, 1], dtype=np.bool) 130 | # rl.optimize_value_function() 131 | # 132 | # trans_probs2 = np.array([[0, .5, .5, 0, 0], 133 | # [.2, .1, .3, .5, 0], 134 | # [.3, .2, .4, .1, 0], 135 | # [0, 0, 0, 0, 1], 136 | # [0, 0, 0, 0, 1]], 137 | # dtype=np.float) 138 | # rewards2 = np.zeros(5) 139 | # rewards2[:4] = rewards() 140 | # true_values = np.linalg.solve(np.eye(5) - rl.gamma * trans_probs2, 141 | # rewards2) 142 | # 143 | # assert_allclose(rl.values, true_values[:4]) 144 | 145 | def test_future_values(self): 146 | """Test future values.""" 147 | dynamics = mock.Mock() 148 | dynamics.return_value = 'next_states' 149 | 150 | rewards = mock.Mock() 151 | rewards.return_value = np.arange(4, dtype=np.float)[:, None] 152 | 153 | value_function = mock.Mock() 154 | value_function.return_value = np.arange(4, dtype=np.float)[:, None] 155 | value_function.discretization.all_points = \ 156 | np.arange(4, dtype=np.float)[:, None] 157 | 158 | policy = mock.Mock() 159 | policy.return_value = 'actions' 160 | 161 | rl = PolicyIteration(policy, 162 | dynamics, 163 | rewards, 164 | value_function) 165 | 166 | true_values = np.arange(4, dtype=np.float)[:, None] * (1 + rl.gamma) 167 | 168 | future_values = rl.future_values('states') 169 | 170 | dynamics.assert_called_with('states', 'actions') 171 | rewards.assert_called_with('states', 'actions') 172 | assert_allclose(future_values, true_values) 173 | 174 | # rl.terminal_states = np.array([0, 0, 0, 1], dtype=np.bool) 175 | # future_values = rl.get_future_values(rl.policy) 176 | # true_values[rl.terminal_states] = rewards()[rl.terminal_states] 177 | # 178 | # assert_allclose(future_values, true_values) 179 | 180 | 181 | if __name__ == '__main__': 182 | pytest.main() 183 | -------------------------------------------------------------------------------- /examples/plotting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import tensorflow as tf 4 | from IPython.display import display, HTML 5 | from mpl_toolkits.mplot3d import Axes3D 6 | 7 | from safe_learning.utilities import (with_scope, get_storage, set_storage, 8 | get_feed_dict) 9 | 10 | 11 | __all__ = ['plot_lyapunov_1d', 'plot_triangulation', 'show_graph'] 12 | 13 | 14 | # An object to store graph elements 15 | _STORAGE = {} 16 | 17 | 18 | @with_scope('plot_lyapunov_1d') 19 | def plot_lyapunov_1d(lyapunov, true_dynamics, legend=False): 20 | """Plot the lyapunov function of a 1D system 21 | 22 | Parameters 23 | ---------- 24 | lyapunov : instance of `Lyapunov` 25 | true_dynamics : callable 26 | legend : bool, optional 27 | """ 28 | sess = tf.get_default_session() 29 | feed_dict = get_feed_dict(sess.graph) 30 | 31 | # Get the storage (specific to the lyapunov function) 32 | storage = get_storage(_STORAGE, index=lyapunov) 33 | 34 | if storage is None: 35 | # Lyapunov function 36 | states = lyapunov.discretization.all_points 37 | actions = lyapunov.policy(states) 38 | next_states = lyapunov.dynamics(states, actions) 39 | v_bounds = lyapunov.v_decrease_confidence(states, next_states) 40 | true_next_states = true_dynamics(states, actions, noise=False) 41 | delta_v_true, _ = lyapunov.v_decrease_confidence(states, 42 | true_next_states) 43 | 44 | storage = [('states', states), 45 | ('next_states', next_states), 46 | ('v_bounds', v_bounds), 47 | ('true_next_states', true_next_states), 48 | ('delta_v_true', delta_v_true)] 49 | set_storage(_STORAGE, storage, index=lyapunov) 50 | else: 51 | (states, next_states, v_bounds, 52 | true_next_states, delta_v_true) = storage.values() 53 | 54 | extent = [np.min(states), np.max(states)] 55 | safe_set = lyapunov.safe_set 56 | threshold = lyapunov.threshold(states) 57 | 58 | # Create figure axes 59 | fig, axes = plt.subplots(2, 1, figsize=(10, 12)) 60 | 61 | # Format axes 62 | axes[0].set_title('GP model of the dynamics') 63 | axes[0].set_xlim(extent) 64 | axes[1].set_xlim(extent) 65 | axes[1].set_xlabel('$x$') 66 | axes[1].set_ylabel(r'Upper bound of $\Delta V(x)$') 67 | axes[1].set_title(r'Determining stability with $\Delta V(x)$') 68 | 69 | # Plot dynamics 70 | axes[0].plot(states, 71 | true_next_states.eval(feed_dict=feed_dict), 72 | color='black', alpha=0.8) 73 | 74 | mean, bound = sess.run(next_states, feed_dict=feed_dict) 75 | axes[0].fill_between(states[:, 0], 76 | mean[:, 0] - bound[:, 0], 77 | mean[:, 0] + bound[:, 0], 78 | color=(0.8, 0.8, 1)) 79 | 80 | if hasattr(lyapunov.dynamics, 'X'): 81 | axes[0].plot(lyapunov.dynamics.X[:, 0], 82 | lyapunov.dynamics.Y[:, 0], 83 | 'x', ms=8, mew=2) 84 | 85 | v_dot_mean, v_dot_bound = sess.run(v_bounds, feed_dict=feed_dict) 86 | # # Plot V_dot 87 | print(v_dot_mean.shape) 88 | print(v_dot_bound.shape) 89 | plt.fill_between(states[:, 0], 90 | v_dot_mean[:, 0] - v_dot_bound[:, 0], 91 | v_dot_mean[:, 0] + v_dot_bound[:, 0], 92 | color=(0.8, 0.8, 1)) 93 | 94 | threshold_plot = plt.plot(extent, [threshold, threshold], 95 | 'k-.', label=r'Safety threshold ($L \tau$ )') 96 | 97 | # # Plot the true V_dot or Delta_V 98 | delta_v = delta_v_true.eval(feed_dict=feed_dict) 99 | v_dot_true_plot = axes[1].plot(states[:, 0], 100 | delta_v, 101 | color='k', 102 | label=r'True $\Delta V(x)$') 103 | 104 | # # Create twin axis 105 | ax2 = axes[1].twinx() 106 | ax2.set_ylabel(r'$V(x)$') 107 | ax2.set_xlim(extent) 108 | 109 | # # Plot Lyapunov function 110 | V_unsafe = np.ma.masked_where(safe_set, lyapunov.values) 111 | V_safe = np.ma.masked_where(~safe_set, lyapunov.values) 112 | unsafe_plot = ax2.plot(states, V_unsafe, 113 | color='b', 114 | label=r'$V(x)$ (unsafe, $\Delta V(x) > L \tau$)') 115 | safe_plot = ax2.plot(states, V_safe, 116 | color='r', 117 | label=r'$V(x)$ (safe, $\Delta V(x) \leq L \tau$)') 118 | 119 | if legend: 120 | lns = unsafe_plot + safe_plot + threshold_plot + v_dot_true_plot 121 | labels = [x.get_label() for x in lns] 122 | plt.legend(lns, labels, loc=4, fancybox=True, framealpha=0.75) 123 | 124 | # Create helper lines 125 | if np.any(safe_set): 126 | max_id = np.argmax(lyapunov.values[safe_set]) 127 | x_safe = states[safe_set][max_id] 128 | y_range = axes[1].get_ylim() 129 | axes[1].plot([x_safe, x_safe], y_range, 'k-.') 130 | axes[1].plot([-x_safe, -x_safe], y_range, 'k-.') 131 | 132 | # Show plot 133 | plt.show() 134 | 135 | 136 | def plot_triangulation(triangulation, axis=None, three_dimensional=False, 137 | xlabel=None, ylabel=None, zlabel=None, **kwargs): 138 | """Plot a triangulation. 139 | 140 | Parameters 141 | ---------- 142 | values: ndarray 143 | axis: optional 144 | three_dimensional: bool, optional 145 | Whether to plot 3D 146 | 147 | Returns 148 | ------- 149 | axis: 150 | The axis on which we plotted. 151 | """ 152 | values = triangulation.parameters[0].eval() 153 | 154 | if three_dimensional: 155 | if axis is None: 156 | axis = Axes3D(plt.figure()) 157 | 158 | # Get the simplices and plot 159 | delaunay = triangulation.tri 160 | state_space = triangulation.discretization.all_points 161 | 162 | simplices = delaunay.simplices(np.arange(delaunay.nsimplex)) 163 | c = axis.plot_trisurf(state_space[:, 0], state_space[:, 1], values[:, 0], 164 | triangles=simplices.copy(), 165 | cmap='viridis', lw=0.1, **kwargs) 166 | cbar = plt.colorbar(c) 167 | else: 168 | if axis is None: 169 | axis = plt.figure().gca() 170 | 171 | domain = triangulation.discretization.limits.tolist() 172 | num_points = triangulation.discretization.num_points 173 | 174 | # Some magic reshaping to go to physical coordinates 175 | vals = values.reshape(num_points[0], num_points[1]).T[::-1] 176 | axis = plt.imshow(vals, origin='upper', 177 | extent=domain[0] + domain[1], 178 | aspect='auto', cmap='viridis', interpolation='bilinear', **kwargs) 179 | cbar = plt.colorbar(axis) 180 | axis = axis.axes 181 | 182 | if xlabel is not None: 183 | axis.set_xlabel(xlabel) 184 | if ylabel is not None: 185 | axis.set_ylabel(ylabel) 186 | if zlabel is not None: 187 | cbar.set_label(zlabel) 188 | 189 | return axis 190 | 191 | 192 | def strip_consts(graph_def, max_const_size=32): 193 | """Strip large constant values from graph_def. 194 | 195 | Taken from 196 | http://stackoverflow.com/questions/38189119/simple-way-to-visualize-a- 197 | tensorflow-graph-in-jupyter 198 | """ 199 | strip_def = tf.GraphDef() 200 | for n0 in graph_def.node: 201 | n = strip_def.node.add() 202 | n.MergeFrom(n0) 203 | if n.op == 'Const': 204 | tensor = n.attr['value'].tensor 205 | size = len(tensor.tensor_content) 206 | if size > max_const_size: 207 | tensor.tensor_content = str.encode("" % size) 208 | return strip_def 209 | 210 | 211 | def show_graph(graph_def, max_const_size=32): 212 | """Visualize TensorFlow graph. 213 | 214 | Taken from 215 | http://stackoverflow.com/questions/38189119/simple-way-to-visualize-a- 216 | tensorflow-graph-in-jupyter 217 | """ 218 | if hasattr(graph_def, 'as_graph_def'): 219 | graph_def = graph_def.as_graph_def() 220 | strip_def = strip_consts(graph_def, max_const_size=max_const_size) 221 | code = """ 222 | 223 | 228 | 229 |
230 | 231 |
232 | """.format(data=repr(str(strip_def)), 233 | id='graph'+str(np.random.rand())) 234 | 235 | iframe = """ 236 | 237 | """.format(code.replace('"', '"')) 238 | display(HTML(iframe)) 239 | 240 | -------------------------------------------------------------------------------- /examples/basic_dynamic_programming.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import tensorflow as tf\n", 12 | "%matplotlib inline\n", 13 | "\n", 14 | "import safe_learning\n", 15 | "import plotting" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Problem definition\n", 23 | "\n", 24 | "We define a reinforcement learning problem with piecewise linear function approximators. These rely on a regular discretization of the grid into cells." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "domain = [[-1.2, 0.7], [-.07, .07]]\n", 34 | "n_points = [20, 20]\n", 35 | "\n", 36 | "# Define a discretization on the domain\n", 37 | "discretization = safe_learning.GridWorld(domain, n_points)\n", 38 | "\n", 39 | "# Value function is piecewise linear\n", 40 | "value_function = safe_learning.Triangulation(discretization, np.zeros(discretization.nindex), project=True,\n", 41 | " name='tri_value_function')\n", 42 | "\n", 43 | "# Policy is piecewise linear and saturated\n", 44 | "policy = safe_learning.Triangulation(discretization, np.zeros(discretization.nindex), project=True,\n", 45 | " name='tri_policy')\n", 46 | "policy = safe_learning.Saturation(policy, -1., 1.)\n", 47 | "\n", 48 | "# Discount factor\n", 49 | "gamma = .99\n", 50 | "terminal_reward = 1 - gamma\n", 51 | "\n", 52 | "@safe_learning.utilities.with_scope('true_dynamics')\n", 53 | "def dynamics(states, actions):\n", 54 | " \"\"\"Return future states of the car\"\"\" \n", 55 | " x0 = states[:, 0] + states[:, 1]\n", 56 | " x1 = states[:, 1] + 0.001 * actions[:, 0] - 0.0025 * tf.cos(3 * states[:, 0])\n", 57 | " \n", 58 | " return tf.stack((x0, x1), axis=1)\n", 59 | "\n", 60 | "\n", 61 | "@safe_learning.utilities.with_scope('reward_function')\n", 62 | "def reward_function(states, actions):\n", 63 | " \"\"\"Reward function for the mountain car\"\"\"\n", 64 | " zeros = tf.zeros((states.shape[0], 1), tf.float64)\n", 65 | " ones = tf.ones_like(zeros)\n", 66 | " # Reward is zero except at terminal states\n", 67 | " return tf.where(tf.greater(states[:, 0], 0.6), terminal_reward * ones, zeros)\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Define the policy optimization problems\n", 75 | "\n", 76 | "Based on the dynamics we define the tensorflow operations to optimize the policy." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# Define the reinforcement learning setup\n", 86 | "rl = safe_learning.PolicyIteration(\n", 87 | " policy,\n", 88 | " dynamics,\n", 89 | " reward_function,\n", 90 | " value_function,\n", 91 | " gamma=gamma)\n", 92 | "\n", 93 | "# Create a tensorflow session\n", 94 | "session = tf.InteractiveSession()\n", 95 | "session.run(tf.global_variables_initializer())\n", 96 | "\n", 97 | "# Optimize over policy variables\n", 98 | "with tf.name_scope('dynamics_programming'):\n", 99 | " # For triangulations we can solve a linear programm to determine the value function\n", 100 | " # value_opt = rl.value_iteration()\n", 101 | " value_opt = rl.optimize_value_function()\n", 102 | " \n", 103 | " # The policy is optimized using gradient descent\n", 104 | " policy_loss = -1 / (1-gamma) * tf.reduce_mean(rl.future_values(rl.state_space))\n", 105 | "\n", 106 | " optimizer = tf.train.GradientDescentOptimizer(1.)\n", 107 | " adapt_policy = optimizer.minimize(policy_loss,\n", 108 | " var_list=[rl.policy.parameters])\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "\n", 118 | "old_values = np.zeros_like(rl.value_function.parameters[0].eval())\n", 119 | "old_actions = np.zeros_like(rl.policy.parameters[0].eval())\n", 120 | "converged = False\n", 121 | "action_space = np.array([[-1.], [1.]])\n", 122 | "\n", 123 | "\n", 124 | "for i in range(30):\n", 125 | " # Optimize value function\n", 126 | " value_opt.eval()\n", 127 | "\n", 128 | " # Optimize policy (discrete over grid or gradient descent)\n", 129 | " # rl.discrete_policy_optimization(action_space)\n", 130 | " for _ in range(200):\n", 131 | " session.run(adapt_policy)\n", 132 | "\n", 133 | " # Get new parameters\n", 134 | " values, actions = session.run([rl.value_function.parameters[0],\n", 135 | " rl.policy.parameters[0]])\n", 136 | "\n", 137 | " # Compute errors\n", 138 | " value_change = np.max(np.abs(old_values - values))\n", 139 | " actions_change = np.max(np.abs(old_actions - actions))\n", 140 | "\n", 141 | " # Break if converged\n", 142 | " if value_change <= 1e-1 and actions_change <= 1e-1:\n", 143 | " converged = True\n", 144 | " break\n", 145 | " else:\n", 146 | " old_values = values\n", 147 | " old_actions = actions\n", 148 | "\n", 149 | "\n", 150 | "if converged:\n", 151 | " print('converged after {} iterations. \\nerror: {}, \\npolicy: {}'\n", 152 | " .format(i + 1, value_change, actions_change))\n", 153 | "else:\n", 154 | " print('didnt converge, error: {} and policy: {}'\n", 155 | " .format(value_change, actions_change))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "# Plot the resulting value function and policy" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "plotting.plot_triangulation(rl.value_function, xlabel='position', ylabel='velocity')\n", 172 | "plt.show()\n", 173 | "\n", 174 | "plotting.plot_triangulation(rl.value_function, three_dimensional=True,\n", 175 | " xlabel='position', ylabel='velocity', zlabel='values')\n", 176 | "plt.show()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "plotting.plot_triangulation(rl.policy, zlabel='policy', xlabel='position', ylabel='velocity')\n", 186 | "plt.show()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "# Visualize the trajectory" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "with tf.name_scope('compute_trajectory'):\n", 203 | " states = np.zeros((1000, 2), dtype=np.float)\n", 204 | " states[0, 0] = -0.5\n", 205 | "\n", 206 | " state = tf.placeholder(tf.float64, [1, 2])\n", 207 | " next_states = rl.dynamics(state, rl.policy(state))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# Compute the trajectories.\n", 217 | "for i in range(len(states) - 1):\n", 218 | " states[i+1, :] = next_states.eval(feed_dict={state: states[[i], :]})\n", 219 | "\n", 220 | " # break if terminal\n", 221 | " if states[i+1, 0] >= 0.6:\n", 222 | " states[i+1:] = states[i+1]\n", 223 | " break" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "ax = plotting.plot_triangulation(rl.value_function, xlabel='position', ylabel='velocity')\n", 233 | "ax.plot(states[:,0], states[:, 1], lw=3, color='k')\n", 234 | "ax.plot(np.ones(2) * 0.6, ax.get_ylim(), lw=2, color='r')\n", 235 | "\n", 236 | "plt.show()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "# Visualize the computation graph" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "plotting.show_graph(tf.get_default_graph())" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | } 262 | ], 263 | "metadata": { 264 | "anaconda-cloud": {}, 265 | "kernelspec": { 266 | "display_name": "Python 3", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.6.4" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /safe_learning/reinforcement_learning.py: -------------------------------------------------------------------------------- 1 | """Classes for reinforcement learning.""" 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | from types import ModuleType 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | try: 10 | import cvxpy 11 | except ImportError as exception: 12 | cvxpy = exception 13 | 14 | from .utilities import (make_tf_fun, with_scope, get_storage, set_storage, 15 | get_feed_dict) 16 | 17 | from safe_learning import config 18 | 19 | __all__ = ['PolicyIteration'] 20 | 21 | 22 | class OptimizationError(Exception): 23 | pass 24 | 25 | 26 | class PolicyIteration(object): 27 | """A class for policy iteration. 28 | 29 | Parameters 30 | ---------- 31 | policy : callable 32 | The policy that maps states to actions. 33 | dynamics : callable 34 | A function that can be called with states and actions as inputs and 35 | returns future states. 36 | reward_function : callable 37 | A function that takes the state, action, and next state as input and 38 | returns the reward corresponding to this transition. 39 | value_function : instance of `DeterministicFunction` 40 | The function approximator for the value function. It is used to 41 | evaluate the value function at states. 42 | gamma : float 43 | The discount factor for reinforcement learning. 44 | """ 45 | 46 | def __init__(self, policy, dynamics, reward_function, value_function, 47 | gamma=0.98): 48 | """Initialization. 49 | 50 | See `PolicyIteration` for details. 51 | """ 52 | super(PolicyIteration, self).__init__() 53 | self.dynamics = dynamics 54 | self.reward_function = reward_function 55 | self.value_function = value_function 56 | self.gamma = gamma 57 | 58 | state_space = self.value_function.discretization.all_points 59 | self.state_space = tf.stack(state_space, name='state_space') 60 | 61 | self.policy = policy 62 | self.feed_dict = get_feed_dict(tf.get_default_graph()) 63 | self._storage = {} 64 | 65 | @with_scope('future_values') 66 | def future_values(self, states, policy=None, actions=None, lyapunov=None, 67 | lagrange_multiplier=1.): 68 | """Return the value at the current states. 69 | 70 | Parameters 71 | ---------- 72 | states : ndarray 73 | The states at which to compute future values. 74 | policy : callable, optional 75 | The policy for which to evaluate. Defaults to `self.policy`. This 76 | argument is ignored if actions is not None. 77 | actions : array or tensor, optional 78 | The actions to be taken for the states. 79 | lyapunov : instance of `Lyapunov` 80 | A Lyapunov function that acts as a constraint for the optimization. 81 | lagrange_multiplier: float 82 | A scaling factor for the `slack` of the optimization problem. 83 | 84 | Returns 85 | ------- 86 | The expected long term reward when taking an action according to the 87 | policy and then taking the value of self.value_function. 88 | """ 89 | if actions is None: 90 | if policy is None: 91 | policy = self.policy 92 | actions = policy(states) 93 | 94 | next_states = self.dynamics(states, actions) 95 | rewards = self.reward_function(states, actions) 96 | 97 | # Only use the mean dynamics 98 | if isinstance(next_states, tuple): 99 | next_states, var = next_states 100 | 101 | expected_values = self.value_function(next_states) 102 | 103 | # Perform value update 104 | updated_values = rewards + self.gamma * expected_values 105 | 106 | # Adjust the cost for the Lyapunov decrease 107 | if lyapunov is not None: 108 | decrease = lyapunov.v_decrease_bound(states, (next_states, var)) 109 | 110 | # Want to enfore `constraint <= 0` 111 | constraint = decrease - lyapunov.threshold(states) 112 | updated_values -= lagrange_multiplier * constraint 113 | 114 | return updated_values 115 | 116 | @with_scope('bellmann_error') 117 | def bellmann_error(self, states): 118 | """Compute the squared bellmann error. 119 | 120 | Parameters 121 | ---------- 122 | states : array 123 | 124 | Returns 125 | ------- 126 | error : float 127 | """ 128 | # Make sure we do not compute the gradient with respect to the 129 | # training target. 130 | target = tf.stop_gradient(self.future_values(states)) 131 | # Squared bellmann error 132 | return tf.reduce_sum(tf.square(target - self.value_function(states)), 133 | name='bellmann_error') 134 | 135 | @with_scope('value_iteration') 136 | def value_iteration(self): 137 | """Perform one step of value iteration.""" 138 | future_values = self.future_values(self.state_space) 139 | return tf.assign(self.value_function.parameters[0], future_values, 140 | name='value_iteration_update') 141 | 142 | @make_tf_fun(tf.float64) 143 | def _run_cvx_optimization(self, next_states, rewards, **solver_options): 144 | """Tensorflow wrapper around a cvxpy value function optimization. 145 | 146 | Parameters 147 | ---------- 148 | next_states : ndarray 149 | rewards : ndarray 150 | 151 | Returns 152 | ------- 153 | values : ndarray 154 | The optimal values at the states. 155 | """ 156 | # Define random variables; convert index from np.int64 to regular 157 | # python int to avoid strange cvxpy error; see: 158 | # https://github.com/cvxgrp/cvxpy/issues/380 159 | values = cvxpy.Variable(rewards.shape) 160 | 161 | value_matrix = self.value_function.tri.parameter_derivative( 162 | next_states) 163 | # Make cvxpy work with sparse matrices 164 | value_matrix = cvxpy.Constant(value_matrix) 165 | 166 | objective = cvxpy.Maximize(cvxpy.sum(values)) 167 | constraints = [values <= rewards + self.gamma * value_matrix * values] 168 | prob = cvxpy.Problem(objective, constraints) 169 | 170 | # Solve optimization problem 171 | prob.solve(**solver_options) 172 | 173 | # Some error checking 174 | if not prob.status == cvxpy.OPTIMAL: 175 | raise OptimizationError('Optimization problem is {}' 176 | .format(prob.status)) 177 | 178 | return np.array(values.value) 179 | 180 | @with_scope('optimize_value_function') 181 | def optimize_value_function(self, **solver_options): 182 | """Optimize the value function using cvx. 183 | 184 | Parameters 185 | ---------- 186 | solver_options : kwargs, optional 187 | Additional solver options passes to cvxpy.Problem.solve. 188 | 189 | Returns 190 | ------- 191 | assign_op : tf.Tensor 192 | An assign operation that updates the value function. 193 | """ 194 | if not isinstance(cvxpy, ModuleType): 195 | raise cvxpy 196 | 197 | actions = self.policy(self.state_space) 198 | next_states = self.dynamics(self.state_space, actions) 199 | 200 | # Only use the mean dynamics 201 | if isinstance(next_states, tuple): 202 | next_states, var = next_states 203 | 204 | rewards = self.reward_function(self.state_space, 205 | actions) 206 | 207 | values = self._run_cvx_optimization(next_states, 208 | rewards, 209 | **solver_options) 210 | 211 | return tf.assign(self.value_function.parameters[0], values) 212 | 213 | @with_scope('discrete_policy_optimization') 214 | def discrete_policy_optimization(self, action_space, constraint=None): 215 | """Optimize the policy for a given value function. 216 | 217 | Parameters 218 | ---------- 219 | action_space : ndarray 220 | The parameter value to evaluate (for each parameter). This is 221 | geared towards piecewise linear functions. 222 | constraint : callable 223 | A function that can be called with a policy. Returns the slack of 224 | the safety constraint for each state. A policy is safe if the slack 225 | is >=0 for all constraints. 226 | """ 227 | states = self.policy.discretization.all_points 228 | n_states = states.shape[0] 229 | n_options, n_actions = action_space.shape 230 | 231 | # Initialize 232 | values = np.empty((n_states, n_options), dtype=config.np_dtype) 233 | action_array = np.broadcast_to(np.zeros(n_actions, 234 | dtype=config.np_dtype), 235 | (n_states, n_actions)) 236 | 237 | # Create tensorflow operations, but reuse previous graph elements 238 | storage = get_storage(self._storage) 239 | 240 | if storage is None: 241 | # Computation of future values 242 | actions = tf.placeholder(config.dtype, 243 | shape=action_array.shape, 244 | name='actions') 245 | future_values = self.future_values(states, 246 | actions=actions) 247 | 248 | # Assigning new parameters 249 | parameters = tf.placeholder(config.dtype, action_array.shape) 250 | assign_op = tf.assign(self.policy.parameters[0], parameters) 251 | 252 | # Put things into storage 253 | storage = [('actions', actions), 254 | ('future_values', future_values), 255 | ('parameters', parameters), 256 | ('assign_op', assign_op)] 257 | set_storage(self._storage, storage) 258 | else: 259 | # Get items out of storage 260 | actions, future_values, parameters, assign_op = storage.values() 261 | 262 | feed_dict = self.feed_dict 263 | feed_dict[actions] = action_array 264 | 265 | # Compute values for each action 266 | for i, action in enumerate(action_space): 267 | # Update feed dict 268 | action_array.base[:] = action 269 | # Compute values 270 | values[:, i] = future_values.eval(feed_dict=feed_dict)[:, 0] 271 | 272 | if constraint is not None: 273 | # TODO: optimize safety if unsafe 274 | unsafe = constraint(action_array) < 0 275 | values[unsafe, i] = -np.inf 276 | 277 | # Select best action for policy 278 | best_actions = action_space[np.argmax(values, axis=1)] 279 | assign_op.eval({parameters: best_actions}) 280 | -------------------------------------------------------------------------------- /examples/1d_region_of_attraction_estimate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Stability verification of a fixed uncertain system (without dynamic programming)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from __future__ import division, print_function\n", 17 | "\n", 18 | "import tensorflow as tf\n", 19 | "import gpflow\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from future.builtins import *\n", 23 | "from functools import partial\n", 24 | "%matplotlib inline\n", 25 | "\n", 26 | "import plotting\n", 27 | "import safe_learning\n", 28 | "\n", 29 | "try:\n", 30 | " session.close()\n", 31 | "except NameError:\n", 32 | " pass\n", 33 | "\n", 34 | "graph = tf.Graph()\n", 35 | "session = tf.InteractiveSession(graph=graph)\n", 36 | "session.run(tf.global_variables_initializer())" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\\tau$" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "discretization = safe_learning.GridWorld([-1, 1], 1001)\n", 53 | "tau = 1 / discretization.nindex\n", 54 | "\n", 55 | "print('Grid size: {0}'.format(discretization.nindex))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "We define the GP model using one particular sample of the GP, in addition to a stable, closed-loop, linear model.\n", 63 | "$$x_{l+1} = 0.25 x_k + g_\\pi(x),$$\n", 64 | "\n", 65 | "The prior dynamics are locally asymptotically stable. Moreover, in the one-dimensional case, the dynamics are stable as long as $|x_{k+1}| \\leq |x_{k}|$." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# Observation noise\n", 75 | "noise_var = 0.01 ** 2\n", 76 | "\n", 77 | "with tf.variable_scope('gp'):\n", 78 | " # Mean dynamics\n", 79 | " mean_function = safe_learning.LinearSystem((0.25, 0.), name='mean_dynamics')\n", 80 | "\n", 81 | " kernel = (gpflow.kernels.Matern32(1, lengthscales=1, variance=0.4**2, active_dims=[0])\n", 82 | " * gpflow.kernels.Linear(1, active_dims=[0]))\n", 83 | "\n", 84 | " gp = safe_learning.GPRCached(np.empty((0, 2), dtype=safe_learning.config.np_dtype),\n", 85 | " np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n", 86 | " kernel,\n", 87 | " mean_function=mean_function)\n", 88 | " gp.likelihood.variance = noise_var\n", 89 | "\n", 90 | " gpfun = safe_learning.GaussianProcess(gp, name='gp_dynamics')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# Define one sample as the true dynamics\n", 100 | "np.random.seed(5)\n", 101 | "\n", 102 | "# # Set up a discretization\n", 103 | "sample_disc = np.hstack((np.linspace(-1, 1, 50)[:, None],\n", 104 | " np.zeros((50, 1))))\n", 105 | "\n", 106 | "# # Draw samples\n", 107 | "fs = safe_learning.sample_gp_function(sample_disc, gpfun, number=10, return_function=False)\n", 108 | "plt.plot(sample_disc[:, 0], fs.T)\n", 109 | "\n", 110 | "plt.ylabel('$g(x)$')\n", 111 | "plt.xlabel('x')\n", 112 | "plt.title('Samples drawn from the GP model of the dynamics')\n", 113 | "plt.show()\n", 114 | "\n", 115 | "\n", 116 | "true_dynamics = safe_learning.sample_gp_function(\n", 117 | " sample_disc,\n", 118 | " gpfun)[0]\n", 119 | "\n", 120 | "# Plot the basic model\n", 121 | "with tf.variable_scope('plot_true_dynamics'):\n", 122 | " true_y = true_dynamics(sample_disc, noise=False).eval(feed_dict=true_dynamics.feed_dict)\n", 123 | "plt.plot(sample_disc[:, 0], true_y, color='black', alpha=0.8)\n", 124 | "plt.title('GP model of the dynamics')\n", 125 | "plt.show()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# lyapunov_function = safe_learning.QuadraticFunction(np.array([[1]]))\n", 135 | "lyapunov_disc = safe_learning.GridWorld([-1., 1.], 3)\n", 136 | "lyapunov_function = safe_learning.Triangulation(lyapunov_disc, [1, 0, 1], name='lyapunov_function')\n", 137 | "\n", 138 | "dynamics = gpfun\n", 139 | "policy = safe_learning.LinearSystem(np.array([0.]), name='policy')\n", 140 | "\n", 141 | "# Lipschitz constant\n", 142 | "# L_dyn = 0.25 + dynamics.beta(0) * np.sqrt(gp.kern.Mat32.variance) / gp.kern.Mat32.lengthscale * np.max(np.abs(extent))\n", 143 | "# L_V = np.max(lyapunov_function.gradient(grid))\n", 144 | "\n", 145 | "L_dyn = 0.25\n", 146 | "L_V = 1.\n", 147 | "\n", 148 | "lyapunov = safe_learning.Lyapunov(discretization, lyapunov_function, dynamics, L_dyn, L_V, tau, policy)\n", 149 | "\n", 150 | "# Specify the desired accuracy\n", 151 | "# accuracy = np.max(lyapunov.V) / 1e10" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Safety based on GP model\n", 159 | "\n", 160 | "Let's start by plotting the prior over the dynamics and the associated prior over $\\dot{V}(x)$." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "lyapunov.update_safe_set()\n", 170 | "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Clearly the model does not allow us to classify any states as safe ($\\dot{V} < -L \\tau$). However, as a starting point, we assume that we know that the system is asymptotially stable within some initial set, $\\mathcal{S}_0$:\n", 178 | "\n", 179 | "$$\\mathcal{S}_0 = \\{ x \\in \\mathbb{R} \\,|\\, |x| < 0.2 \\}$$" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "lyapunov.initial_safe_set = np.abs(lyapunov.discretization.all_points.squeeze()) < 0.2" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## Online learning\n", 196 | "As we sample within this initial safe set, we gain more knowledge about the system. In particular, we iteratively select the state withing the safe set, $\\mathcal{S}_n$, where the dynamics are the most uncertain (highest variance)." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "grid = lyapunov.discretization.all_points\n", 206 | "lyapunov.update_safe_set()\n", 207 | "\n", 208 | "with tf.variable_scope('sample_new_safe_point'):\n", 209 | " safe_set = tf.placeholder(safe_learning.config.dtype, [None, None])\n", 210 | " _, dynamics_std_tf = lyapunov.dynamics(safe_set, lyapunov.policy(safe_set))\n", 211 | " \n", 212 | " \n", 213 | " tf_max_state = tf.placeholder(safe_learning.config.dtype, [1, None])\n", 214 | " tf_max_action = lyapunov.policy(tf_max_state)\n", 215 | " tf_measurement = true_dynamics(tf_max_state, tf_max_action)\n", 216 | " \n", 217 | "feed_dict = lyapunov.dynamics.feed_dict\n", 218 | " \n", 219 | "def update_gp():\n", 220 | " \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n", 221 | " # Maximum uncertainty in safe set\n", 222 | " safe_grid = grid[lyapunov.safe_set]\n", 223 | " \n", 224 | " feed_dict[safe_set] = safe_grid\n", 225 | " dynamics_std = dynamics_std_tf.eval(feed_dict=feed_dict)\n", 226 | " \n", 227 | " max_id = np.argmax(dynamics_std)\n", 228 | " max_state = safe_grid[[max_id], :].copy()\n", 229 | " \n", 230 | " feed_dict[tf_max_state] = max_state\n", 231 | " max_action, measurement = session.run([tf_max_action, tf_measurement],\n", 232 | " feed_dict=feed_dict)\n", 233 | " \n", 234 | " arg = np.hstack((max_state, max_action))\n", 235 | " lyapunov.dynamics.add_data_point(arg, measurement)\n", 236 | " lyapunov.update_safe_set()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# Update the GP model a couple of times\n", 246 | "for i in range(4):\n", 247 | " update_gp()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# Plot the new safe set\n", 257 | "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "We continue to sample like this, until we find the maximum safe set" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "for i in range(20):\n", 274 | " update_gp()\n", 275 | "\n", 276 | "lyapunov.update_safe_set()\n", 277 | "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=False)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "plotting.show_graph(tf.get_default_graph())" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [] 295 | } 296 | ], 297 | "metadata": { 298 | "anaconda-cloud": {}, 299 | "kernelspec": { 300 | "display_name": "Python 3", 301 | "language": "python", 302 | "name": "python3" 303 | }, 304 | "language_info": { 305 | "codemirror_mode": { 306 | "name": "ipython", 307 | "version": 3 308 | }, 309 | "file_extension": ".py", 310 | "mimetype": "text/x-python", 311 | "name": "python", 312 | "nbconvert_exporter": "python", 313 | "pygments_lexer": "ipython3", 314 | "version": "3.6.4" 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | -------------------------------------------------------------------------------- /examples/1d_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from __future__ import division, print_function\n", 10 | "\n", 11 | "import tensorflow as tf\n", 12 | "import gpflow\n", 13 | "import numpy as np\n", 14 | "from scipy import linalg\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "from matplotlib import colors\n", 17 | "%matplotlib inline\n", 18 | "\n", 19 | "import safe_learning\n", 20 | "import plotting\n", 21 | "np.random.seed(0)\n", 22 | "\n", 23 | "try:\n", 24 | " session.close()\n", 25 | "except NameError:\n", 26 | " pass\n", 27 | "\n", 28 | "session = tf.InteractiveSession()\n", 29 | "session.run(tf.global_variables_initializer())" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Goal:\n", 37 | "\n", 38 | "Optimize over the policy such that the safe set does not shrink" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\\tau$" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# x_min, x_max, discretization\\\n", 55 | "state_limits = np.array([[-1., 1.]])\n", 56 | "action_limits = np.array([[-.5, .5]])\n", 57 | "num_states = 1000\n", 58 | "num_actions = 101\n", 59 | "\n", 60 | "safety_disc = safe_learning.GridWorld(state_limits, num_states)\n", 61 | "\n", 62 | "# Discretization for optimizing the policy (discrete action space)\n", 63 | "# This is not necessary if one uses gradients to optimize the policy\n", 64 | "action_disc = safe_learning.GridWorld(action_limits, num_actions)\n", 65 | "\n", 66 | "# Discretization constant\n", 67 | "tau = np.max(safety_disc.unit_maxes)\n", 68 | "\n", 69 | "# Initial policy: All zeros\n", 70 | "policy_disc = safe_learning.GridWorld(state_limits, 51)\n", 71 | "policy = safe_learning.Triangulation(policy_disc, np.zeros(len(policy_disc)), name='policy')\n", 72 | "\n", 73 | "print('Grid size: {0}'.format(len(safety_disc)))" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### Define GP dynamics model" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "kernel = (gpflow.kernels.Matern32(2, lengthscales=1, active_dims=[0, 1]) *\n", 90 | " gpflow.kernels.Linear(2, variance=[0.2, 1], ARD=True))\n", 91 | "\n", 92 | "noise_var = 0.01 ** 2\n", 93 | "\n", 94 | "# Mean dynamics\n", 95 | "mean_function = safe_learning.LinearSystem(([1, 0.1]), name='prior_dynamics')\n", 96 | "\n", 97 | "mean_lipschitz = 0.8\n", 98 | "gp_lipschitz = 0.5 # beta * np.sqrt(kernel.Mat32.variance) / kernel.Mat32.lengthscale * np.max(np.abs(state_limits))\n", 99 | "lipschitz_dynamics = mean_lipschitz + gp_lipschitz\n", 100 | "\n", 101 | "a = 1.2\n", 102 | "b = 1.\n", 103 | "q = 1.\n", 104 | "r = 1.\n", 105 | "\n", 106 | "true_dynamics = safe_learning.LinearSystem((a, b), name='true_dynamics')\n", 107 | "\n", 108 | "# Define a GP model over the dynamics\n", 109 | "gp = gpflow.gpr.GPR(np.empty((0, 2), dtype=safe_learning.config.np_dtype),\n", 110 | " np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n", 111 | " kernel,\n", 112 | " mean_function=mean_function)\n", 113 | "gp.likelihood.variance = noise_var\n", 114 | "\n", 115 | "dynamics = safe_learning.GaussianProcess(gp, name='gp_dynamics')\n", 116 | "\n", 117 | "k_opt, s_opt = safe_learning.utilities.dlqr(a, b, q, r)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "### Define Lyapunov function" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "lyapunov_function = safe_learning.Triangulation(safe_learning.GridWorld(state_limits, 3),\n", 134 | " vertex_values=[1, 0, 1],\n", 135 | " name='lyapunov_function')\n", 136 | "lipschitz_lyapunov = 1.\n", 137 | "\n", 138 | "lyapunov = safe_learning.Lyapunov(safety_disc,\n", 139 | " lyapunov_function,\n", 140 | " dynamics,\n", 141 | " lipschitz_dynamics,\n", 142 | " lipschitz_lyapunov,\n", 143 | " tau,\n", 144 | " policy)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Initial safe set " 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "lyapunov.initial_safe_set = np.abs(lyapunov.discretization.all_points.squeeze()) < 0.05\n", 161 | "\n", 162 | "lyapunov.update_safe_set()\n", 163 | "noisy_dynamics = lambda x, u, noise: true_dynamics(x, u)\n", 164 | "plotting.plot_lyapunov_1d(lyapunov, noisy_dynamics, legend=True)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### Reinforcement learning for the mean dynamics" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# mean_dynamics = dynamics.to_mean_function()\n", 181 | "\n", 182 | "reward = safe_learning.QuadraticFunction(linalg.block_diag(-q, -r), name='reward_function')\n", 183 | "\n", 184 | "value_function = safe_learning.Triangulation(policy_disc,\n", 185 | " np.zeros(len(policy_disc)),\n", 186 | " project=True,\n", 187 | " name='value_function')\n", 188 | "\n", 189 | "rl = safe_learning.PolicyIteration(policy, dynamics, reward, value_function)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "### Plot the dynamics\n", 197 | "\n", 198 | "Note that the initial policy is just all zeros!!!" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "_STORAGE = {}\n", 208 | "\n", 209 | "plotting_discretization = safe_learning.GridWorld(np.vstack((state_limits, action_limits)),\n", 210 | " [num_states, num_actions])\n", 211 | "\n", 212 | "@safe_learning.utilities.with_scope('get_safe_set')\n", 213 | "def get_safe_sets(lyapunov, positive=True):\n", 214 | " \n", 215 | " state_disc = lyapunov.discretization\n", 216 | " \n", 217 | " safe_states = state_disc.index_to_state(np.where(lyapunov.safe_set))\n", 218 | " safe_actions = action_disc.all_points\n", 219 | " feed_dict = lyapunov.feed_dict\n", 220 | "\n", 221 | " state_actions = np.column_stack([arr.ravel() for arr in\n", 222 | " np.meshgrid(safe_states, safe_actions, indexing='ij')])\n", 223 | " safe_set = lyapunov.safe_set.reshape(state_disc.num_points)\n", 224 | " \n", 225 | " storage = safe_learning.utilities.get_storage(_STORAGE, index=lyapunov)\n", 226 | " \n", 227 | " if storage is None:\n", 228 | " tf_state_actions = tf.placeholder(safe_learning.config.dtype,\n", 229 | " shape=[None, state_actions.shape[1]])\n", 230 | " \n", 231 | " next_states = lyapunov.dynamics(tf_state_actions)\n", 232 | " \n", 233 | " mean, bound = next_states\n", 234 | " bound = tf.reduce_sum(bound, axis=1)\n", 235 | " lv = lyapunov.lipschitz_lyapunov(mean)\n", 236 | " values = tf.squeeze(lyapunov.lyapunov_function(mean), 1) + lv * bound\n", 237 | " maps_inside = tf.less(values, lyapunov.c_max, name='maps_inside_levelset')\n", 238 | " \n", 239 | " state, actions = tf.split(tf_state_actions, [1, 1], axis=1)\n", 240 | " \n", 241 | " dec = lyapunov.v_decrease_bound(state, next_states)\n", 242 | " \n", 243 | " decreases = tf.less(dec, lyapunov.threshold(state))\n", 244 | " \n", 245 | " storage = [('tf_state_actions', tf_state_actions),\n", 246 | " ('maps_inside', maps_inside),\n", 247 | " ('mean', mean),\n", 248 | " ('decreases', decreases)]\n", 249 | " safe_learning.utilities.set_storage(_STORAGE, storage, index=lyapunov)\n", 250 | " else:\n", 251 | " tf_state_actions, maps_inside, mean, decreases = storage.values()\n", 252 | "\n", 253 | " # Put placeholder values inside feed_dict and evaluate\n", 254 | " feed_dict[tf_state_actions] = state_actions\n", 255 | " maps_inside, mean, decreases = session.run([maps_inside, mean, decreases],\n", 256 | " feed_dict=feed_dict)\n", 257 | " \n", 258 | " # Add the mean safe set on top\n", 259 | " if not positive:\n", 260 | " next_state_index = lyapunov.discretization.state_to_index(mean)\n", 261 | " safe_in_expectation = lyapunov.safe_set[next_state_index]\n", 262 | " maps_inside &= safe_in_expectation\n", 263 | " \n", 264 | " maps_inside_total = np.zeros(plotting_discretization.nindex, dtype=np.bool)\n", 265 | " maps_inside_total = maps_inside_total.reshape(plotting_discretization.num_points)\n", 266 | " decreases_total = np.zeros_like(maps_inside_total)\n", 267 | " \n", 268 | " maps_inside_total[safe_set, :] = maps_inside.reshape(len(safe_states), len(safe_actions))\n", 269 | " decreases_total[safe_set, :] = decreases.reshape(len(safe_states), len(safe_actions))\n", 270 | "\n", 271 | " return maps_inside_total, decreases_total\n", 272 | "\n", 273 | "\n", 274 | "@safe_learning.utilities.with_scope('plot_lyapunov_2d')\n", 275 | "def plot_things():\n", 276 | " fig, axes = plt.subplots(2, 2, figsize=(10, 10), gridspec_kw={'width_ratios': [30, 1]})\n", 277 | "\n", 278 | " # Hide fake cax\n", 279 | " cax, cax1 = axes[:, 1]\n", 280 | " cax1.set_visible(False)\n", 281 | " cax.set_ylabel('Standard deviation $\\sigma$')\n", 282 | "\n", 283 | " ax0, ax1 = axes[:, 0]\n", 284 | " ax0.set_ylabel('action')\n", 285 | " ax1.set_xlabel('state')\n", 286 | " ax1.set_ylabel('$v(\\mathbf{x})$')\n", 287 | "\n", 288 | " ax1.set_ylim(0, np.max(lyapunov.values))\n", 289 | " ax1.set_xlim(state_limits.squeeze())\n", 290 | " ax0.set_xlim(state_limits.squeeze())\n", 291 | " ax0.set_ylim(action_limits.squeeze())\n", 292 | " ax0.set_xticks([])\n", 293 | "\n", 294 | " # Hide x-ticks of ax0\n", 295 | " plt.setp(ax0.get_xticklabels(), visible=False)\n", 296 | "\n", 297 | " # width between cax and main axis\n", 298 | " plt.subplots_adjust(wspace=.05)\n", 299 | " feed_dict = lyapunov.feed_dict\n", 300 | " \n", 301 | " # Plot the dynamics\n", 302 | " states = lyapunov.discretization.all_points\n", 303 | " state_actions = plotting_discretization.all_points\n", 304 | " \n", 305 | " storage = safe_learning.utilities.get_storage(_STORAGE, index=lyapunov)\n", 306 | " if storage is None:\n", 307 | " actions = lyapunov.policy(states)\n", 308 | " next_states = lyapunov.dynamics(state_actions)\n", 309 | " \n", 310 | " storage = [('actions', actions),\n", 311 | " ('next_states', next_states)]\n", 312 | " \n", 313 | " safe_learning.utilities.set_storage(_STORAGE, storage, index=lyapunov)\n", 314 | " else:\n", 315 | " actions, next_states = storage.values()\n", 316 | " \n", 317 | " mean, bound = session.run(next_states, feed_dict=feed_dict)\n", 318 | " \n", 319 | " # Show the GP variance\n", 320 | " img = ax0.imshow(bound.reshape(plotting_discretization.num_points).T,\n", 321 | " origin='lower',\n", 322 | " extent=plotting_discretization.limits.ravel(),\n", 323 | " aspect='auto')\n", 324 | " \n", 325 | " # Plot the dynamics\n", 326 | " ax0.plot(lyapunov.dynamics.X[:, 0],\n", 327 | " lyapunov.dynamics.X[:, 1], 'x')\n", 328 | " cbar = plt.colorbar(img, cax=cax)\n", 329 | "\n", 330 | " safe, safe_expanders = get_safe_sets(lyapunov) \n", 331 | " safe = safe.reshape(plotting_discretization.num_points)\n", 332 | " v_dec = safe_expanders.reshape(plotting_discretization.num_points)\n", 333 | " \n", 334 | " safe_mask = np.ma.masked_where(~safe, safe)\n", 335 | " \n", 336 | "\n", 337 | " # Overlay the safety feature\n", 338 | " img = ax0.imshow(safe_mask.T,\n", 339 | " origin='lower',\n", 340 | " extent=plotting_discretization.limits.ravel(),\n", 341 | " alpha=0.2,\n", 342 | " cmap=colors.ListedColormap(['white']),\n", 343 | " aspect='auto',\n", 344 | " vmin=0,\n", 345 | " vmax=1) \n", 346 | " \n", 347 | " # Overlay the safety feature\n", 348 | " if np.any(v_dec):\n", 349 | " v_dec_mask = np.ma.masked_where(~v_dec, v_dec)\n", 350 | " img = ax0.imshow(v_dec_mask.T,\n", 351 | " origin='lower',\n", 352 | " extent=plotting_discretization.limits.ravel(),\n", 353 | " alpha=0.5,\n", 354 | " cmap=colors.ListedColormap(['red']),\n", 355 | " aspect='auto',\n", 356 | " vmin=0,\n", 357 | " vmax=1)\n", 358 | " \n", 359 | " is_safe = lyapunov.safe_set\n", 360 | " # Plot the Lyapunov function\n", 361 | " lyap_safe = np.ma.masked_where(~is_safe, lyapunov.values)\n", 362 | " lyap_unsafe = np.ma.masked_where(is_safe, lyapunov.values)\n", 363 | "\n", 364 | " # Plot lines for the boundary of the safety feature\n", 365 | " x_min_safe = np.min(states[is_safe])\n", 366 | " x_max_safe = np.max(states[is_safe])\n", 367 | "\n", 368 | " ax1.plot(states, lyap_safe, 'r')\n", 369 | " ax1.plot(states, lyap_unsafe, 'b')\n", 370 | "\n", 371 | " kw_axv = {'color': 'red',\n", 372 | " 'alpha': 0.5}\n", 373 | " ax0.axvline(x=x_min_safe, ymin=-0.2, ymax=1, clip_on=False, **kw_axv)\n", 374 | " ax1.axvline(x=x_min_safe, ymin=0, ymax=1, clip_on=False, **kw_axv)\n", 375 | "\n", 376 | " ax0.axvline(x=x_max_safe, ymin=-0.2, ymax=1, clip_on=False, **kw_axv)\n", 377 | " ax1.axvline(x=x_max_safe, ymin=0, ymax=1, clip_on=False, **kw_axv)\n", 378 | " \n", 379 | " # Plot the current policy\n", 380 | " actions = actions.eval(feed_dict=feed_dict)\n", 381 | " ax0.step(states, actions, label='safe policy', alpha=0.5)\n", 382 | "\n", 383 | " ax0.legend()\n", 384 | " plt.show()\n", 385 | "\n", 386 | "# optimize_safe_policy(lyapunov)\n", 387 | "lyapunov.update_safe_set()\n", 388 | "plot_things()" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "## Online learning\n", 396 | "As we sample within this initial safe set, we gain more knowledge about the system. In particular, we iteratively select the state withing the safe set, $\\mathcal{S}_n$, where the dynamics are the most uncertain (highest variance)." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "action_space = action_disc.all_points\n", 406 | "action_variation = safe_learning.GridWorld(np.array(action_limits) / 20, 11).all_points\n", 407 | "\n", 408 | "rl_opt_value_function = rl.optimize_value_function()\n", 409 | "for i in range(3):\n", 410 | " rl_opt_value_function.eval(feed_dict=lyapunov.feed_dict)\n", 411 | " rl.discrete_policy_optimization(action_space)\n", 412 | "\n", 413 | "\n", 414 | "with tf.variable_scope('add_new_measurement'):\n", 415 | " action_dim = rl.policy.output_dim\n", 416 | " tf_max_state_action = tf.placeholder(safe_learning.config.dtype,\n", 417 | " shape=[1, safety_disc.ndim + action_dim])\n", 418 | " tf_measurement = true_dynamics(tf_max_state_action)\n", 419 | " \n", 420 | "def update_gp():\n", 421 | " \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n", 422 | " # Optimize the value/function and policy\n", 423 | " rl_opt_value_function.eval(feed_dict=lyapunov.feed_dict)\n", 424 | " rl.discrete_policy_optimization(action_space)\n", 425 | " \n", 426 | " # Get a new sample location\n", 427 | " lyapunov.update_safe_set()\n", 428 | " max_state_action, _ = safe_learning.get_safe_sample(lyapunov,\n", 429 | " action_variation,\n", 430 | " action_limits)\n", 431 | "\n", 432 | " # Obtain a measurement of the true dynamics\n", 433 | " lyapunov.feed_dict[tf_max_state_action] = max_state_action\n", 434 | " measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n", 435 | "\n", 436 | " # Add the measurement to our GP dynamics\n", 437 | " lyapunov.dynamics.add_data_point(max_state_action, measurement)\n", 438 | " \n", 439 | "\n", 440 | "update_gp()\n", 441 | "plot_things()" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "for i in range(20):\n", 451 | " update_gp()\n", 452 | " \n", 453 | "lyapunov.update_safe_set()\n", 454 | "plot_things()" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "plotting.show_graph(tf.get_default_graph())" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [] 472 | } 473 | ], 474 | "metadata": { 475 | "anaconda-cloud": {}, 476 | "kernelspec": { 477 | "display_name": "Python 3", 478 | "language": "python", 479 | "name": "python3" 480 | }, 481 | "language_info": { 482 | "codemirror_mode": { 483 | "name": "ipython", 484 | "version": 3 485 | }, 486 | "file_extension": ".py", 487 | "mimetype": "text/x-python", 488 | "name": "python", 489 | "nbconvert_exporter": "python", 490 | "pygments_lexer": "ipython3", 491 | "version": "3.6.4" 492 | } 493 | }, 494 | "nbformat": 4, 495 | "nbformat_minor": 2 496 | } 497 | -------------------------------------------------------------------------------- /safe_learning/utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for plotting, function definitions, and GPs. 3 | 4 | This file defines utilities needed for the experiments, such as creating 5 | parameter grids, computing LQR controllers, Lyapunov functions, sample 6 | functions of Gaussian processes, and plotting ellipses. 7 | 8 | Author: Felix Berkenkamp, Learning & Adaptive Systems Group, ETH Zurich 9 | (GitHub: befelix) 10 | """ 11 | 12 | from __future__ import absolute_import, division, print_function 13 | 14 | import itertools 15 | import inspect 16 | from functools import wraps, partial 17 | 18 | import numpy as np 19 | import scipy.interpolate 20 | import scipy.linalg 21 | import tensorflow as tf 22 | from future.builtins import zip, range 23 | from future.backports import OrderedDict 24 | 25 | from safe_learning import config 26 | 27 | __all__ = ['combinations', 'linearly_spaced_combinations', 'lqr', 'dlqr', 28 | 'ellipse_bounds', 'concatenate_inputs', 'make_tf_fun', 29 | 'with_scope', 'use_parent_scope', 'add_weight_constraint', 30 | 'batchify', 'get_storage', 'set_storage', 'unique_rows', 31 | 'gradient_clipping'] 32 | 33 | 34 | _STORAGE = {} 35 | 36 | 37 | def make_tf_fun(return_type, gradient=None, stateful=True): 38 | """Convert a python function to a tensorflow function. 39 | 40 | Parameters 41 | ---------- 42 | return_type : list 43 | A list of tensorflow return types. Needs to match with the gradient. 44 | gradient : callable, optional 45 | A function that provides the gradient. It takes `op` and one gradient 46 | per output of the function as inputs and returns one gradient for each 47 | input of the function. If stateful is `False` then tensorflow does not 48 | seem to compute gradients at all. 49 | 50 | Returns 51 | ------- 52 | A tensorflow function with gradients registered. 53 | """ 54 | def wrap(function): 55 | """Create a new function.""" 56 | # Function name with stipped underscore (not allowed by tensorflow) 57 | name = function.__name__.lstrip('_') 58 | 59 | # Without gradients we can take the short route here 60 | if gradient is None: 61 | @wraps(function) 62 | def wrapped_function(self, *args, **kwargs): 63 | method = partial(function, self, **kwargs) 64 | return tf.py_func(method, args, return_type, 65 | stateful=stateful, name=name) 66 | 67 | return wrapped_function 68 | 69 | # Name for the gradient operation 70 | grad_name = name + '_gradient' 71 | 72 | @wraps(function) 73 | def wrapped_function(self, *args): 74 | # Overwrite the gradient 75 | graph = tf.get_default_graph() 76 | 77 | # Make sure the name we specify is unique 78 | unique_grad_name = graph.unique_name(grad_name) 79 | 80 | # Register the new gradient method with tensorflow 81 | tf.RegisterGradient(unique_grad_name)(gradient) 82 | 83 | # Remove self: Tensorflow does not allow for non-tensor inputs 84 | method = partial(function, self) 85 | 86 | with graph.gradient_override_map({"PyFunc": unique_grad_name}): 87 | return tf.py_func(method, args, return_type, 88 | stateful=stateful, name=name) 89 | 90 | return wrapped_function 91 | return wrap 92 | 93 | 94 | def with_scope(name): 95 | """Set the tensorflow scope for the function. 96 | 97 | Parameters 98 | ---------- 99 | name : string, optional 100 | 101 | Returns 102 | ------- 103 | The tensorflow function with scope name. 104 | """ 105 | def wrap(function): 106 | @wraps(function) 107 | def wrapped_function(*args, **kwargs): 108 | with tf.name_scope(name): 109 | return function(*args, **kwargs) 110 | return wrapped_function 111 | return wrap 112 | 113 | 114 | def use_parent_scope(function): 115 | """Use the parent scope for tensorflow.""" 116 | @wraps(function) 117 | def wrapped_function(self, *args, **kwargs): 118 | with tf.variable_scope(self.scope_name): 119 | return function(self, *args, **kwargs) 120 | return wrapped_function 121 | 122 | 123 | def concatenate_inputs(start=0): 124 | """Concatenate the numpy array inputs to the functions. 125 | 126 | Parameters 127 | ---------- 128 | start : int, optional 129 | The attribute number at which to start concatenating. 130 | """ 131 | def wrap(function): 132 | @wraps(function) 133 | def wrapped_function(*args, **kwargs): 134 | """Concatenate the input arguments.""" 135 | nargs = len(args) - start 136 | # Check for tensorflow objects 137 | tf_objects = (tf.Tensor, tf.Variable) 138 | if any(isinstance(arg, tf_objects) for arg in args[start:]): 139 | # reduce number of function calls in graph 140 | if nargs == 1: 141 | return function(*args, **kwargs) 142 | # concatenate extra arguments 143 | args = args[:start] + (tf.concat(args[start:], axis=1),) 144 | return function(*args, **kwargs) 145 | else: 146 | # Map to 2D objects 147 | to_concatenate = map(np.atleast_2d, args[start:]) 148 | 149 | if nargs == 1: 150 | concatenated = tuple(to_concatenate) 151 | else: 152 | concatenated = (np.hstack(to_concatenate),) 153 | 154 | args = args[:start] + concatenated 155 | return function(*args, **kwargs) 156 | 157 | return wrapped_function 158 | 159 | return wrap 160 | 161 | 162 | def add_weight_constraint(optimization, var_list, bound_list): 163 | """Add weight constraints to an optimization step. 164 | 165 | Parameters 166 | ---------- 167 | optimization : tf.Tensor 168 | The optimization routine that updates the parameters. 169 | var_list : list 170 | A list of variables that should be bounded. 171 | bound_list : list 172 | A list of bounds (lower, upper) for each variable in var_list. 173 | 174 | Returns 175 | ------- 176 | assign_operations : list 177 | A list of assign operations that correspond to one step of the 178 | constrained optimization. 179 | """ 180 | with tf.control_dependencies([optimization]): 181 | new_list = [] 182 | for var, bound in zip(var_list, bound_list): 183 | clipped_var = tf.clip_by_value(var, bound[0], bound[1]) 184 | assign = tf.assign(var, clipped_var) 185 | new_list.append(assign) 186 | return new_list 187 | 188 | 189 | def gradient_clipping(optimizer, loss, var_list, limits): 190 | """Clip the gradients for the optimization problem. 191 | 192 | Parameters 193 | ---------- 194 | optimizer : instance of tensorflow optimizer 195 | loss : tf.Tensor 196 | The loss that we want to optimize. 197 | var_list : tuple 198 | A list of variables for which we want to compute gradients. 199 | limits : tuple 200 | A list of tuples with lower/upper bounds for each variable. 201 | 202 | Returns 203 | ------- 204 | opt : tf.Tensor 205 | One optimization step with clipped gradients. 206 | 207 | Examples 208 | -------- 209 | >>> from safe_learning.utilities import gradient_clipping 210 | >>> var = tf.Variable(1.) 211 | >>> loss = tf.square(var - 1.) 212 | >>> optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) 213 | >>> opt_loss = gradient_clipping(optimizer, loss, [var], [(-1, 1)]) 214 | """ 215 | gradients = optimizer.compute_gradients(loss, var_list=var_list) 216 | 217 | clipped_gradients = [(tf.clip_by_value(grad, low, up), var) 218 | for (grad, var), (low, up) in zip(gradients, limits)] 219 | 220 | # Return optimization step 221 | return optimizer.apply_gradients(clipped_gradients) 222 | 223 | 224 | def batchify(arrays, batch_size): 225 | """Yield the arrays in batches and in order. 226 | 227 | The last batch might be smaller than batch_size. 228 | 229 | Parameters 230 | ---------- 231 | arrays : list of ndarray 232 | The arrays that we want to convert to batches. 233 | batch_size : int 234 | The size of each individual batch. 235 | """ 236 | if not isinstance(arrays, (list, tuple)): 237 | arrays = (arrays,) 238 | 239 | # Iterate over array in batches 240 | for i, i_next in zip(itertools.count(start=0, step=batch_size), 241 | itertools.count(start=batch_size, step=batch_size)): 242 | 243 | batches = [array[i:i_next] for array in arrays] 244 | 245 | # Break if there are no points left 246 | if batches[0].size: 247 | yield i, batches 248 | else: 249 | break 250 | 251 | 252 | def combinations(arrays): 253 | """Return a single array with combinations of parameters. 254 | 255 | Parameters 256 | ---------- 257 | arrays : list of np.array 258 | 259 | Returns 260 | ------- 261 | array : np.array 262 | An array that contains all combinations of the input arrays 263 | """ 264 | return np.array(np.meshgrid(*arrays)).T.reshape(-1, len(arrays)) 265 | 266 | 267 | def linearly_spaced_combinations(bounds, num_samples): 268 | """ 269 | Return 2-D array with all linearly spaced combinations with the bounds. 270 | 271 | Parameters 272 | ---------- 273 | bounds : sequence of tuples 274 | The bounds for the variables, [(x1_min, x1_max), (x2_min, x2_max), ...] 275 | num_samples : integer or array_likem 276 | Number of samples to use for every dimension. Can be a constant if 277 | the same number should be used for all, or an array to fine-tune 278 | precision. Total number of data points is num_samples ** len(bounds). 279 | 280 | Returns 281 | ------- 282 | combinations : 2-d array 283 | A 2-d arrray. If d = len(bounds) and l = prod(num_samples) then it 284 | is of size l x d, that is, every row contains one combination of 285 | inputs. 286 | """ 287 | bounds = np.atleast_2d(bounds) 288 | num_vars = len(bounds) 289 | num_samples = np.broadcast_to(num_samples, num_vars) 290 | 291 | # Create linearly spaced test inputs 292 | inputs = [np.linspace(b[0], b[1], n) for b, n in zip(bounds, 293 | num_samples)] 294 | 295 | # Convert to 2-D array 296 | return combinations(inputs) 297 | 298 | 299 | def lqr(a, b, q, r): 300 | """Compute the continuous time LQR-controller. 301 | 302 | The optimal control input is `u = -k.dot(x)`. 303 | 304 | Parameters 305 | ---------- 306 | a : np.array 307 | b : np.array 308 | q : np.array 309 | r : np.array 310 | 311 | Returns 312 | ------- 313 | k : np.array 314 | Controller matrix 315 | p : np.array 316 | Cost to go matrix 317 | """ 318 | a, b, q, r = map(np.atleast_2d, (a, b, q, r)) 319 | p = scipy.linalg.solve_continuous_are(a, b, q, r) 320 | 321 | # LQR gain 322 | k = np.linalg.solve(r, b.T.dot(p)) 323 | 324 | return k, p 325 | 326 | 327 | def dlqr(a, b, q, r): 328 | """Compute the discrete-time LQR controller. 329 | 330 | The optimal control input is `u = -k.dot(x)`. 331 | 332 | Parameters 333 | ---------- 334 | a : np.array 335 | b : np.array 336 | q : np.array 337 | r : np.array 338 | 339 | Returns 340 | ------- 341 | k : np.array 342 | Controller matrix 343 | p : np.array 344 | Cost to go matrix 345 | """ 346 | a, b, q, r = map(np.atleast_2d, (a, b, q, r)) 347 | p = scipy.linalg.solve_discrete_are(a, b, q, r) 348 | 349 | # LQR gain 350 | # k = (b.T * p * b + r)^-1 * (b.T * p * a) 351 | bp = b.T.dot(p) 352 | tmp1 = bp.dot(b) 353 | tmp1 += r 354 | tmp2 = bp.dot(a) 355 | k = np.linalg.solve(tmp1, tmp2) 356 | 357 | return k, p 358 | 359 | 360 | def ellipse_bounds(P, level, n=100): 361 | """Compute the bounds of a 2D ellipse. 362 | 363 | The levelset of the ellipsoid is given by 364 | level = x' P x. Given the coordinates of the first 365 | dimension, this function computes the corresponding 366 | lower and upper values of the second dimension and 367 | removes any values of x0 that are outside of the ellipse. 368 | 369 | Parameters 370 | ---------- 371 | P : np.array 372 | The matrix of the ellipsoid 373 | level : float 374 | The value of the levelset 375 | n : int 376 | Number of data points 377 | 378 | Returns 379 | ------- 380 | x : np.array 381 | 1D array of x positions of the ellipse 382 | yu : np.array 383 | The upper bound of the ellipse 384 | yl : np.array 385 | The lower bound of the ellipse 386 | 387 | Notes 388 | ----- 389 | This can be used as 390 | ```plt.fill_between(*ellipse_bounds(P, level))``` 391 | """ 392 | # Round up to multiple of 2 393 | n += n % 2 394 | 395 | # Principal axes of ellipsoid 396 | eigval, eigvec = np.linalg.eig(P) 397 | eigvec *= np.sqrt(level / eigval) 398 | 399 | # set zero angle at maximum x 400 | angle = np.linspace(0, 2 * np.pi, n)[:, None] 401 | angle += np.arctan(eigvec[0, 1] / eigvec[0, 0]) 402 | 403 | # Compute positions 404 | pos = np.cos(angle) * eigvec[:, 0] + np.sin(angle) * eigvec[:, 1] 405 | n /= 2 406 | 407 | # Return x-position (symmetric) and upper/lower bounds 408 | return pos[:n, 0], pos[:n, 1], pos[:n - 1:-1, 1] 409 | 410 | 411 | def get_storage(dictionary, index=None): 412 | """Get a unique storage point within a class method. 413 | 414 | Parameters 415 | ---------- 416 | dictionary : dict 417 | A dictionary used for storage. 418 | index : hashable 419 | An index under which to store the element. Needs to be hashable. 420 | This is useful for functions which might be accessed with multiple 421 | different arguments. 422 | 423 | Returns 424 | ------- 425 | storage : OrderedDict 426 | The storage object. Is None if no storage exists. Otherwise it 427 | returns the OrderedDict that was previously put in the storage. 428 | """ 429 | # Use function name as storage name 430 | frame = inspect.currentframe() 431 | storage_name = inspect.getframeinfo(frame.f_back).function 432 | 433 | storage = dictionary.get(storage_name) 434 | 435 | if index is None: 436 | return storage 437 | elif storage is not None: 438 | # Return directly the indexed object 439 | try: 440 | return storage[index] 441 | except KeyError: 442 | pass 443 | 444 | 445 | def set_storage(dictionary, name_value, index=None): 446 | """Set the storage point within a class method. 447 | 448 | Parameters 449 | ---------- 450 | dictionary : dict 451 | name_value : tuple 452 | A list of tuples, where each tuple contains a string with the name 453 | of the storage object and the corresponding value that is to be put 454 | in storage. These are stored as OrderedDicts. 455 | index : hashable 456 | An index under which to store the element. Needs to be hashable. 457 | This is useful for functions which might be accessed with multiple 458 | different arguements. 459 | """ 460 | # Use function name as storage name 461 | frame = inspect.currentframe() 462 | storage_name = inspect.getframeinfo(frame.f_back).function 463 | 464 | storage = OrderedDict(name_value) 465 | if index is None: 466 | dictionary[storage_name] = storage 467 | else: 468 | # Make sure the storage is initialized 469 | if storage_name not in dictionary: 470 | dictionary[storage_name] = {} 471 | # Set the indexed storage 472 | dictionary[storage_name][index] = storage 473 | 474 | 475 | def get_feed_dict(graph): 476 | """Return the global feed_dict used for this graph. 477 | 478 | Parameters 479 | ---------- 480 | graph : tf.Graph 481 | 482 | Returns 483 | ------- 484 | feed_dict : dict 485 | The feed_dict for this graph. 486 | """ 487 | try: 488 | # Just return the feed_dict 489 | return graph.feed_dict_sl 490 | except AttributeError: 491 | # Create a new feed_dict for this graph 492 | graph.feed_dict_sl = {} 493 | return graph.feed_dict_sl 494 | 495 | 496 | def unique_rows(array): 497 | """Return the unique rows of the array. 498 | 499 | Parameters 500 | ---------- 501 | array : ndarray 502 | A 2D numpy array. 503 | 504 | Returns 505 | ------- 506 | unique_array : ndarray 507 | A 2D numpy array that contains all the unique rows of array. 508 | """ 509 | array = np.ascontiguousarray(array) 510 | # Combine all the rows into a single element of the flexible void datatype 511 | dtype = np.dtype((np.void, array.dtype.itemsize * array.shape[1])) 512 | combined_array = array.view(dtype=dtype) 513 | # Get all the unique rows of the combined array 514 | _, idx = np.unique(combined_array, return_index=True) 515 | 516 | return array[idx] 517 | 518 | 519 | def compute_trajectory(dynamics, policy, initial_state, num_steps): 520 | """Compute a state trajectory given dynamics and a policy. 521 | 522 | Parameters 523 | ---------- 524 | dynamics : callable 525 | A function that takes the current state and action as input and returns 526 | the next state. 527 | policy : callable 528 | A function that takes the current state as input and returns the 529 | action. 530 | initial_state : Tensor or ndarray 531 | The initial state at which to start simulating. 532 | num_steps : int 533 | The number of steps for which to simulate the system. 534 | 535 | Returns 536 | ------- 537 | states : ndarray 538 | A (num_steps x state_dim) array with one state on each row. 539 | actions : ndarray 540 | A (num_steps x action_dim) array with the corresponding action on each 541 | row. 542 | """ 543 | initial_state = np.atleast_2d(initial_state) 544 | state_dim = initial_state.shape[1] 545 | 546 | # Get storage (indexed by dynamics and policy) 547 | index = (dynamics, policy) 548 | storage = get_storage(_STORAGE, index=index) 549 | 550 | if storage is None: 551 | # Compute next state under the policy 552 | tf_state = tf.placeholder(config.dtype, [1, state_dim]) 553 | tf_action = policy(tf_state) 554 | tf_next_state = dynamics(tf_state, tf_action) 555 | 556 | storage = [('tf_state', tf_state), 557 | ('tf_action', tf_action), 558 | ('tf_next_state', tf_next_state)] 559 | 560 | set_storage(_STORAGE, storage, index=index) 561 | else: 562 | tf_state, tf_action, tf_next_state = storage.values() 563 | 564 | # Initialize 565 | dtype = config.np_dtype 566 | states = np.empty((num_steps, state_dim), dtype=dtype) 567 | actions = np.empty((num_steps - 1, policy.output_dim), dtype=dtype) 568 | 569 | states[0, :] = initial_state 570 | 571 | # Get the feed dict 572 | session = tf.get_default_session() 573 | feed_dict = get_feed_dict(session.graph) 574 | 575 | next_data = [tf_next_state, tf_action] 576 | 577 | # Run simulation 578 | for i in range(num_steps - 1): 579 | feed_dict[tf_state] = states[[i], :] 580 | states[i + 1, :], actions[i, :] = session.run(next_data, 581 | feed_dict=feed_dict) 582 | 583 | return states, actions 584 | -------------------------------------------------------------------------------- /examples/inverted_pendulum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from __future__ import division, print_function\n", 10 | "\n", 11 | "from functools import partial\n", 12 | "\n", 13 | "import gpflow\n", 14 | "import tensorflow as tf\n", 15 | "import numpy as np\n", 16 | "import tensorflow as tf\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "from scipy import signal, linalg\n", 19 | "\n", 20 | "# Nice progress bars\n", 21 | "try:\n", 22 | " from tqdm import tqdm\n", 23 | "except ImportError:\n", 24 | " tqdm = lambda x: x\n", 25 | "\n", 26 | "import safe_learning\n", 27 | "import plotting\n", 28 | "from utilities import InvertedPendulum\n", 29 | "\n", 30 | "%matplotlib inline\n", 31 | "\n", 32 | "# Open a new session (close old one if exists)\n", 33 | "try:\n", 34 | " session.close()\n", 35 | "except NameError:\n", 36 | " pass\n", 37 | "\n", 38 | "session = tf.InteractiveSession()\n", 39 | "session.run(tf.global_variables_initializer())" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Define underlying dynamic system and costs/rewards\n", 47 | "Define the dynamics of the true and false system\n" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "n = 2\n", 57 | "m = 1\n", 58 | "\n", 59 | "# 'Wrong' model parameters\n", 60 | "mass = 0.1\n", 61 | "friction = 0.\n", 62 | "length = 0.5\n", 63 | "gravity = 9.81\n", 64 | "inertia = mass * length ** 2\n", 65 | "\n", 66 | "# True model parameters\n", 67 | "true_mass = 0.15\n", 68 | "true_friction = 0.1\n", 69 | "true_length = length\n", 70 | "true_inertia = true_mass * true_length ** 2\n", 71 | "\n", 72 | "# Input saturation\n", 73 | "x_max = np.deg2rad(30)\n", 74 | "u_max = gravity * true_mass * true_length * np.sin(x_max)\n", 75 | "\n", 76 | "# Normalization\n", 77 | "norm_state = np.array([x_max, np.sqrt(gravity / length)])\n", 78 | "norm_action = np.array([u_max])\n", 79 | "\n", 80 | "# Corresponding dynamic systems\n", 81 | "true_dynamics = InvertedPendulum(mass=true_mass, length=true_length, friction=true_friction,\n", 82 | " normalization=(norm_state, norm_action))\n", 83 | "\n", 84 | "wrong_pendulum = InvertedPendulum(mass=mass, length=length, friction=friction,\n", 85 | " normalization=(norm_state, norm_action))\n", 86 | "\n", 87 | "# LQR cost matrices\n", 88 | "q = 1 * np.diag([1., 2.])\n", 89 | "r = 1.2 * np.array([[1]], dtype=safe_learning.config.np_dtype)\n", 90 | "\n", 91 | "# Quadratic (LQR) reward function\n", 92 | "gamma = 0.98\n", 93 | "reward_function = safe_learning.QuadraticFunction(linalg.block_diag(-q, -r))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "# Set up a discretization for safety verification" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# x_min, x_max, discretization\\\n", 110 | "state_limits = np.array([[-2., 2.], [-1.5, 1.5]])\n", 111 | "action_limits = np.array([[-1, 1]])\n", 112 | "num_states = [2001, 1501]\n", 113 | "\n", 114 | "safety_disc = safe_learning.GridWorld(state_limits, num_states)\n", 115 | "policy_disc = safe_learning.GridWorld(state_limits, [55, 55])\n", 116 | "\n", 117 | "# Discretization constant\n", 118 | "tau = np.min(safety_disc.unit_maxes)\n", 119 | "\n", 120 | "print('Grid size: {0}'.format(safety_disc.nindex))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "# Define the GP dynamics model\n", 128 | "\n", 129 | "We use a combination of kernels to model the errors in the dynamics" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "A, B = wrong_pendulum.linearize()\n", 139 | "lipschitz_dynamics = 1\n", 140 | "\n", 141 | "noise_var = 0.001 ** 2\n", 142 | "\n", 143 | "m_true = np.hstack((true_dynamics.linearize()))\n", 144 | "m = np.hstack((A, B))\n", 145 | "\n", 146 | "variances = (m_true - m) ** 2\n", 147 | "\n", 148 | "# Make sure things remain \n", 149 | "np.clip(variances, 1e-5, None, out=variances)\n", 150 | "\n", 151 | "# Kernels\n", 152 | "kernel1 = (gpflow.kernels.Linear(3, variance=variances[0, :], ARD=True)\n", 153 | " + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n", 154 | " * gpflow.kernels.Linear(1, variance=variances[0, 1]))\n", 155 | "\n", 156 | "kernel2 = (gpflow.kernels.Linear(3, variance=variances[1, :], ARD=True)\n", 157 | " + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n", 158 | " * gpflow.kernels.Linear(1, variance=variances[1, 1]))\n", 159 | "\n", 160 | "# Mean dynamics\n", 161 | "\n", 162 | "mean_dynamics = safe_learning.LinearSystem((A, B), name='mean_dynamics')\n", 163 | "mean_function1 = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_1')\n", 164 | "mean_function2 = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_2')\n", 165 | "\n", 166 | "# Define a GP model over the dynamics\n", 167 | "gp1 = gpflow.gpr.GPR(np.empty((0, 3), dtype=safe_learning.config.np_dtype),\n", 168 | " np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n", 169 | " kernel1,\n", 170 | " mean_function=mean_function1)\n", 171 | "gp1.likelihood.variance = noise_var\n", 172 | "\n", 173 | "gp2 = gpflow.gpr.GPR(np.empty((0, 3), dtype=safe_learning.config.np_dtype),\n", 174 | " np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n", 175 | " kernel2,\n", 176 | " mean_function=mean_function2)\n", 177 | "gp2.likelihood.variance = noise_var\n", 178 | "\n", 179 | "gp1_fun = safe_learning.GaussianProcess(gp1)\n", 180 | "gp2_fun = safe_learning.GaussianProcess(gp2)\n", 181 | "\n", 182 | "dynamics = safe_learning.FunctionStack((gp1_fun, gp2_fun))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "# Compute the optimal policy for the linear (and wrong) mean dynamics\n", 192 | "k, s = safe_learning.utilities.dlqr(A, B, q, r)\n", 193 | "init_policy = safe_learning.LinearSystem((-k), name='initial_policy')\n", 194 | "init_policy = safe_learning.Saturation(init_policy, -1, 1)\n", 195 | "\n", 196 | "# Define the Lyapunov function corresponding to the initial policy\n", 197 | "init_lyapunov = safe_learning.QuadraticFunction(s)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "# Set up the dynamic programming problem" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "# Define a neural network policy\n", 214 | "relu = tf.nn.relu\n", 215 | "policy = safe_learning.NeuralNetwork(layers=[32, 32, 1],\n", 216 | " nonlinearities=[relu, relu, tf.nn.tanh],\n", 217 | " output_scale=action_limits[0, 1])\n", 218 | "\n", 219 | "# Define value function approximation\n", 220 | "value_function = safe_learning.Triangulation(policy_disc,\n", 221 | " -init_lyapunov(policy_disc.all_points).eval(),\n", 222 | " project=True)\n", 223 | "\n", 224 | "# Define policy optimization problem\n", 225 | "rl = safe_learning.PolicyIteration(\n", 226 | " policy,\n", 227 | " dynamics,\n", 228 | " reward_function,\n", 229 | " value_function,\n", 230 | " gamma=gamma)\n", 231 | " \n", 232 | "\n", 233 | "with tf.name_scope('rl_mean_optimization'):\n", 234 | " rl_opt_value_function = rl.optimize_value_function()\n", 235 | " \n", 236 | " # Placeholder for states\n", 237 | " tf_states_mean = tf.placeholder(safe_learning.config.dtype, [None, 2])\n", 238 | " \n", 239 | " # Optimize for expected gain\n", 240 | " values = rl.future_values(tf_states_mean)\n", 241 | " policy_loss = -tf.reduce_mean(values)\n", 242 | " \n", 243 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)\n", 244 | " adapt_policy_mean = optimizer.minimize(policy_loss, var_list=rl.policy.parameters)\n" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "# Start the session\n", 254 | "session.run(tf.global_variables_initializer())" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "### Run initial dynamic programming for the mean dynamics" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "for i in tqdm(range(3000)):\n", 271 | " \n", 272 | " # select random training batches\n", 273 | " rl.feed_dict[tf_states_mean] = policy_disc.sample_continuous(1000)\n", 274 | "\n", 275 | " session.run(adapt_policy_mean, feed_dict=rl.feed_dict)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "# Define the Lyapunov function\n", 283 | "\n", 284 | "Here we use the fact that the optimal value function is a Lyapunov function for the optimal policy if the dynamics are deterministic. As uncertainty about the dynamics decreases, the value function for the mean dynamics will thus converge to a Lyapunov function." 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "lyapunov_function = -rl.value_function\n", 294 | "lipschitz_lyapunov = lambda x: tf.reduce_max(tf.abs(rl.value_function.gradient(x)),\n", 295 | " axis=1, keepdims=True)\n", 296 | "\n", 297 | "lipschitz_policy = lambda x: policy.lipschitz() \n", 298 | "\n", 299 | "a_true, b_true = true_dynamics.linearize()\n", 300 | "lipschitz_dynamics = lambda x: np.max(np.abs(a_true)) + np.max(np.abs(b_true)) * lipschitz_policy(x)\n", 301 | "\n", 302 | "# Lyapunov function definitial\n", 303 | "lyapunov = safe_learning.Lyapunov(safety_disc,\n", 304 | " lyapunov_function,\n", 305 | " dynamics,\n", 306 | " lipschitz_dynamics,\n", 307 | " lipschitz_lyapunov,\n", 308 | " tau,\n", 309 | " policy=rl.policy,\n", 310 | " initial_set=None)\n", 311 | "\n", 312 | "# Set initial safe set (level set) based on initial Lyapunov candidate\n", 313 | "values = init_lyapunov(safety_disc.all_points).eval()\n", 314 | "cutoff = np.max(values) * 0.005\n", 315 | "\n", 316 | "lyapunov.initial_safe_set = np.squeeze(values, axis=1) <= cutoff" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "def plot_safe_set(lyapunov, show=True):\n", 326 | " \"\"\"Plot the safe set for a given Lyapunov function.\"\"\"\n", 327 | " plt.imshow(lyapunov.safe_set.reshape(num_states).T,\n", 328 | " origin='lower',\n", 329 | " extent=lyapunov.discretization.limits.ravel(),\n", 330 | " vmin=0,\n", 331 | " vmax=1)\n", 332 | " \n", 333 | " if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):\n", 334 | " X = lyapunov.dynamics.functions[0].X\n", 335 | " plt.plot(X[:, 0], X[:, 1], 'rx')\n", 336 | " \n", 337 | " plt.title('safe set')\n", 338 | " plt.colorbar()\n", 339 | " if show:\n", 340 | " plt.show()\n", 341 | " \n", 342 | "lyapunov.update_safe_set()\n", 343 | "plot_safe_set(lyapunov)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "## Safe policy update\n", 351 | "\n", 352 | "We do dynamic programming, but enfore the decrease condition on the Lyapunov function using a Lagrange multiplier" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "with tf.name_scope('policy_optimization'):\n", 362 | " \n", 363 | " # Placeholder for states\n", 364 | " tf_states = tf.placeholder(safe_learning.config.dtype, [None, 2])\n", 365 | " \n", 366 | " # Add Lyapunov uncertainty (but only if safety-relevant)\n", 367 | " values = rl.future_values(tf_states, lyapunov=lyapunov)\n", 368 | " \n", 369 | " policy_loss = -tf.reduce_mean(values)\n", 370 | " \n", 371 | "\n", 372 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)\n", 373 | " adapt_policy = optimizer.minimize(policy_loss, var_list=rl.policy.parameters)\n", 374 | " \n", 375 | " \n", 376 | "def rl_optimize_policy(num_iter):\n", 377 | " # Optimize value function\n", 378 | " session.run(rl_opt_value_function, feed_dict=rl.feed_dict)\n", 379 | "\n", 380 | " # select random training batches\n", 381 | " for i in tqdm(range(num_iter)):\n", 382 | " rl.feed_dict[tf_states] = lyapunov.discretization.sample_continuous(1000)\n", 383 | "\n", 384 | " session.run(adapt_policy, feed_dict=rl.feed_dict)" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "# Exploration\n", 392 | "\n", 393 | "We explore close to the current policy by sampling the most uncertain state that does not leave the current level set" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "action_variation = np.array([[-0.02], [0.], [0.02]], dtype=safe_learning.config.np_dtype)\n", 403 | "\n", 404 | "\n", 405 | "with tf.name_scope('add_new_measurement'):\n", 406 | " action_dim = lyapunov.policy.output_dim\n", 407 | " tf_max_state_action = tf.placeholder(safe_learning.config.dtype,\n", 408 | " shape=[1, safety_disc.ndim + action_dim])\n", 409 | " tf_measurement = true_dynamics(tf_max_state_action)\n", 410 | " \n", 411 | "def update_gp():\n", 412 | " \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n", 413 | " # Get a new sample location\n", 414 | " max_state_action, _ = safe_learning.get_safe_sample(lyapunov,\n", 415 | " action_variation,\n", 416 | " action_limits,\n", 417 | " num_samples=1000)\n", 418 | "\n", 419 | " # Obtain a measurement of the true dynamics\n", 420 | " lyapunov.feed_dict[tf_max_state_action] = max_state_action\n", 421 | " measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n", 422 | "\n", 423 | " # Add the measurement to our GP dynamics\n", 424 | " lyapunov.dynamics.add_data_point(max_state_action, measurement)\n", 425 | " " 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "# Run the optimization" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "# lyapunov.update_safe_set()\n", 442 | "rl_optimize_policy(num_iter=200)\n", 443 | "rl_optimize_policy(num_iter=200)\n", 444 | "\n", 445 | "lyapunov.update_safe_set()\n", 446 | "plot_safe_set(lyapunov)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "for i in range(5):\n", 456 | " print('iteration {} with c_max: {}'.format(i, lyapunov.feed_dict[lyapunov.c_max]))\n", 457 | " for i in tqdm(range(10)):\n", 458 | " update_gp()\n", 459 | " \n", 460 | " rl_optimize_policy(num_iter=200)\n", 461 | " lyapunov.update_values()\n", 462 | " \n", 463 | " # Update safe set and plot\n", 464 | " lyapunov.update_safe_set()\n", 465 | " plot_safe_set(lyapunov) " 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": {}, 471 | "source": [ 472 | "# Plot trajectories and analyse improvement" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "x0 = np.array([[1., -.5]])\n", 482 | "\n", 483 | "states_new, actions_new = safe_learning.utilities.compute_trajectory(true_dynamics, rl.policy, x0, 100)\n", 484 | "states_old, actions_old = safe_learning.utilities.compute_trajectory(true_dynamics, init_policy, x0, 100)\n", 485 | "\n", 486 | "t = np.arange(len(states_new)) * true_dynamics.dt" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [ 495 | "plt.plot(t, states_new[:, 0], label='new')\n", 496 | "plt.plot(t, states_old[:, 0], label='old')\n", 497 | "plt.xlabel('time [s]')\n", 498 | "plt.ylabel('angle [rad]')\n", 499 | "plt.legend()\n", 500 | "plt.show()\n", 501 | "\n", 502 | "plt.plot(t, states_new[:, 1], label='new')\n", 503 | "plt.plot(t, states_old[:, 1], label='old')\n", 504 | "plt.xlabel('time [s]')\n", 505 | "plt.ylabel('angular velocity [rad/s]')\n", 506 | "plt.legend()\n", 507 | "plt.show()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "plt.plot(t[:-1], actions_new, label='new')\n", 517 | "plt.plot(t[:-1], actions_old, label='old')\n", 518 | "plt.xlabel('time [s]')\n", 519 | "plt.ylabel('actions')\n", 520 | "plt.legend()" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "print('reward old:', tf.reduce_sum(rl.reward_function(states_old[:-1], actions_old)).eval(feed_dict=rl.feed_dict))\n", 530 | "print('reward new:', tf.reduce_sum(rl.reward_function(states_new[:-1], actions_new)).eval(feed_dict=rl.feed_dict))" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [] 539 | } 540 | ], 541 | "metadata": { 542 | "anaconda-cloud": {}, 543 | "kernelspec": { 544 | "display_name": "Python 3", 545 | "language": "python", 546 | "name": "python3" 547 | }, 548 | "language_info": { 549 | "codemirror_mode": { 550 | "name": "ipython", 551 | "version": 3 552 | }, 553 | "file_extension": ".py", 554 | "mimetype": "text/x-python", 555 | "name": "python", 556 | "nbconvert_exporter": "python", 557 | "pygments_lexer": "ipython3", 558 | "version": "3.6.4" 559 | } 560 | }, 561 | "nbformat": 4, 562 | "nbformat_minor": 2 563 | } 564 | -------------------------------------------------------------------------------- /examples/reinforcement_learning_cartpole.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reinforcement Learning for the Cart-Pole\n", 8 | "\n", 9 | "Perform approximate policy iteration in an actor-critic framework for the cart-pole (i.e., inverted pendulum on a cart)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from __future__ import division, print_function\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "import tensorflow as tf\n", 22 | "import gpflow\n", 23 | "import safe_learning\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import time\n", 26 | "import os\n", 27 | "\n", 28 | "from matplotlib.colors import ListedColormap\n", 29 | "from mpl_toolkits.mplot3d import Axes3D\n", 30 | "from scipy.linalg import block_diag\n", 31 | "from utilities import CartPole, compute_closedloop_response, get_parameter_change, find_nearest, reward_rollout, compute_roa, binary_cmap\n", 32 | "\n", 33 | "# Nice progress bars\n", 34 | "try:\n", 35 | " from tqdm import tqdm\n", 36 | "except ImportError:\n", 37 | " tqdm = lambda x: x\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## User Options" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "class Options(object):\n", 54 | " def __init__(self, **kwargs):\n", 55 | " super(Options, self).__init__()\n", 56 | " self.__dict__.update(kwargs)\n", 57 | "\n", 58 | "OPTIONS = Options(np_dtype = safe_learning.config.np_dtype,\n", 59 | " tf_dtype = safe_learning.config.dtype,\n", 60 | " saturate = True, # apply saturation constraints to the control input\n", 61 | " eps = 1e-8, # numerical tolerance\n", 62 | " use_linear_dynamics = False, # use the linearized form of the dynamics as the true dynamics (for testing)\n", 63 | " dpi = 200,\n", 64 | " num_cores = 4,\n", 65 | " num_sockets = 1)\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## TensorFlow Session\n", 73 | "\n", 74 | "Customize the TensorFlow session for the current device." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "os.environ[\"KMP_BLOCKTIME\"] = str(0)\n", 84 | "os.environ[\"KMP_SETTINGS\"] = str(1)\n", 85 | "os.environ[\"KMP_AFFINITY\"] = 'granularity=fine,noverbose,compact,1,0'\n", 86 | "os.environ[\"OMP_NUM_THREADS\"] = str(OPTIONS.num_cores)\n", 87 | "\n", 88 | "config = tf.ConfigProto(intra_op_parallelism_threads = OPTIONS.num_cores,\n", 89 | " inter_op_parallelism_threads = OPTIONS.num_sockets,\n", 90 | " allow_soft_placement = False,\n", 91 | " device_count = {'CPU': OPTIONS.num_cores})\n", 92 | "\n", 93 | "try:\n", 94 | " session.close()\n", 95 | "except NameError:\n", 96 | " pass\n", 97 | "session = tf.InteractiveSession(config=config)\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Dynamics\n", 105 | "\n", 106 | "Define the nonlinear and linearized forms of the inverted pendulum dynamics." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "# Constants\n", 116 | "dt = 0.01 # sampling time\n", 117 | "g = 9.81 # gravity\n", 118 | "\n", 119 | "# System parameters\n", 120 | "m = 0.175 # pendulum mass\n", 121 | "M = 1.732 # cart mass\n", 122 | "L = 0.28 # pole length\n", 123 | "b = 0.01 # rotational friction\n", 124 | "\n", 125 | "# State and action normalizers\n", 126 | "x_max = 0.5 # linear position [m]\n", 127 | "theta_max = np.deg2rad(30) # angular position [rad]\n", 128 | "x_dot_max = 2 # linear velocity [m/s]\n", 129 | "theta_dot_max = np.deg2rad(30) # angular velocity [rad/s]\n", 130 | "u_max = (m + M) * (x_dot_max ** 2) / x_max # linear force [N], control action\n", 131 | "\n", 132 | "state_norm = (x_max, theta_max, x_dot_max, theta_dot_max)\n", 133 | "action_norm = (u_max,)\n", 134 | "\n", 135 | "# Dimensions and domains\n", 136 | "state_dim = 4\n", 137 | "action_dim = 1\n", 138 | "state_limits = np.array([[-1., 1.]] * state_dim)\n", 139 | "action_limits = np.array([[-1., 1.]] * action_dim)\n", 140 | "\n", 141 | "# Initialize system class and its linearization\n", 142 | "cartpole = CartPole(m, M, L, b, dt, [state_norm, action_norm])\n", 143 | "A, B = cartpole.linearize()\n", 144 | "\n", 145 | "if OPTIONS.use_linear_dynamics:\n", 146 | " dynamics = safe_learning.functions.LinearSystem((A, B), name='dynamics')\n", 147 | "else:\n", 148 | " dynamics = cartpole.__call__\n", 149 | "\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "## Reward Function\n", 157 | "\n", 158 | "Define a positive-definite reward function over the state-action space $\\mathcal{X} \\times \\mathcal{U}$." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "Q = 0.1 * np.identity(state_dim).astype(OPTIONS.np_dtype) # state cost matrix\n", 168 | "R = 0.1 * np.identity(action_dim).astype(OPTIONS.np_dtype) # action cost matrix\n", 169 | "\n", 170 | "# Quadratic reward (- cost) function\n", 171 | "reward_function = safe_learning.QuadraticFunction(block_diag(- Q, - R), name='reward_function')\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Parametric Policy and Value Function\n", 179 | "\n", 180 | "Define a parametric value function $V_{\\bf \\theta} : \\mathcal{X} \\to \\mathbb{R}$ and policy $\\pi_{\\bf \\delta} : \\mathcal{X} \\to \\mathcal{U}$ as neural networks." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "# Policy\n", 190 | "layer_dims = [64, 64, action_dim]\n", 191 | "activations = [tf.nn.relu, tf.nn.relu, None]\n", 192 | "if OPTIONS.saturate:\n", 193 | " activations[-1] = tf.nn.tanh\n", 194 | "policy = safe_learning.functions.NeuralNetwork(layer_dims, activations, name='policy', use_bias=False)\n", 195 | "\n", 196 | "# Value function\n", 197 | "layer_dims = [64, 64, 1]\n", 198 | "activations = [tf.nn.relu, tf.nn.relu, None]\n", 199 | "value_function = safe_learning.functions.NeuralNetwork(layer_dims, activations, name='value_function', use_bias=False)\n" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## LQR Policy\n", 207 | "\n", 208 | "We compare our results to the LQR solution for the linearized system later." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "K, P = safe_learning.utilities.dlqr(A, B, Q, R)\n", 218 | "policy_lqr = safe_learning.functions.LinearSystem((-K, ), name='policy_lqr')\n", 219 | "if OPTIONS.saturate:\n", 220 | " policy_lqr = safe_learning.Saturation(policy_lqr, -1, 1)\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "## TensorFlow Graph" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "# Use parametric policy and value function\n", 237 | "states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')\n", 238 | "actions = policy(states)\n", 239 | "rewards = reward_function(states, actions)\n", 240 | "values = value_function(states)\n", 241 | "future_states = dynamics(states, actions)\n", 242 | "future_values = value_function(future_states)\n", 243 | "\n", 244 | "# Compare with LQR solution, possibly with saturation constraints\n", 245 | "actions_lqr = policy_lqr(states)\n", 246 | "rewards_lqr = reward_function(states, actions_lqr)\n", 247 | "future_states_lqr = dynamics(states, actions_lqr)\n", 248 | "\n", 249 | "# Discount factor and scaling\n", 250 | "max_state = np.ones((1, state_dim))\n", 251 | "max_action = np.ones((1, action_dim))\n", 252 | "r_max = np.linalg.multi_dot((max_state, Q, max_state.T)) + np.linalg.multi_dot((max_action, R, max_action.T))\n", 253 | "gamma = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='discount_factor')\n", 254 | "\n", 255 | "val_scaling = 1 / r_max.ravel()\n", 256 | "pol_scaling = (1 - gamma) / r_max.ravel()\n", 257 | "\n", 258 | "# Policy evaluation\n", 259 | "with tf.name_scope('value_optimization'):\n", 260 | " value_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')\n", 261 | " target = tf.stop_gradient(rewards + gamma * future_values, name='target')\n", 262 | " value_objective = pol_scaling * tf.reduce_mean(tf.abs(values - target), name='objective')\n", 263 | " optimizer = tf.train.GradientDescentOptimizer(value_learning_rate)\n", 264 | " value_update = optimizer.minimize(value_objective, var_list=value_function.parameters)\n", 265 | "\n", 266 | "# Policy improvement\n", 267 | "with tf.name_scope('policy_optimization'):\n", 268 | " policy_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')\n", 269 | " policy_objective = - pol_scaling * tf.reduce_mean(rewards + gamma * future_values, name='objective')\n", 270 | " optimizer = tf.train.GradientDescentOptimizer(policy_learning_rate)\n", 271 | " policy_update = optimizer.minimize(policy_objective, var_list=policy.parameters)\n", 272 | " \n", 273 | "# Sampling \n", 274 | "with tf.name_scope('state_sampler'):\n", 275 | " batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size')\n", 276 | " batch = tf.random_uniform([batch_size, state_dim], -1, 1, dtype=OPTIONS.tf_dtype, name='batch')\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "## Approximate Policy Iteration\n", 284 | "\n", 285 | "Train the policy $\\pi_{\\bf \\delta}$ and value function $V_{\\bf \\theta}$ in tandem with approximate policy iteration. Changing the discount factor strongly affects the results; a low discount factor encourages a well-behaved value function, while a high discount factor encourages the policy to yield a larger ROA. We compare $\\pi_{\\bf \\delta}$ to the LQR policy $\\pi$ with saturation constraints, and $V_{\\bf \\theta}$ to the LQR value function $V_\\pi$ and the value function $V_{\\pi_{\\bf \\delta}}$ induced by the parametric policy $\\pi_{\\bf \\delta}$. We compute $V_{\\pi_{\\bf \\delta}}$ as a rollout sum of discounted rewards at states in a state space discretization." 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### Initialization" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "session.run(tf.global_variables_initializer())\n", 302 | "\n", 303 | "# Uniformly sampled test set\n", 304 | "test_size = 1e3\n", 305 | "test_set = batch.eval({batch_size: test_size})\n", 306 | "\n", 307 | "# Keep track of the test set loss and parameter changes during training\n", 308 | "value_test_loss = []\n", 309 | "value_param_changes = []\n", 310 | "policy_test_loss = []\n", 311 | "policy_param_changes = []\n" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "### Training" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "# Training hyperparameters\n", 328 | "max_iters = 200\n", 329 | "value_iters = 100\n", 330 | "policy_iters = 10\n", 331 | "feed_dict = {\n", 332 | " states: test_set,\n", 333 | " gamma: 0.99,\n", 334 | " value_learning_rate: 0.2,\n", 335 | " policy_learning_rate: 0.5,\n", 336 | " batch_size: 1e2,\n", 337 | "}\n", 338 | "\n", 339 | "old_value_params = session.run(value_function.parameters)\n", 340 | "old_policy_params = session.run(policy.parameters)\n", 341 | "\n", 342 | "for i in tqdm(range(max_iters)):\n", 343 | " # Policy evaluation (value update)\n", 344 | " for _ in range(value_iters):\n", 345 | " feed_dict[states] = batch.eval(feed_dict)\n", 346 | " session.run(value_update, feed_dict)\n", 347 | " new_value_params = session.run(value_function.parameters)\n", 348 | " value_param_changes.append(get_parameter_change(old_value_params, new_value_params))\n", 349 | " old_value_params = new_value_params\n", 350 | "\n", 351 | " # Policy improvement (policy update)\n", 352 | " for _ in range(policy_iters):\n", 353 | " feed_dict[states] = batch.eval(feed_dict)\n", 354 | " session.run(policy_update, feed_dict)\n", 355 | " new_policy_params = session.run(policy.parameters)\n", 356 | " policy_param_changes.append(get_parameter_change(old_policy_params, new_policy_params))\n", 357 | " old_policy_params = new_policy_params\n", 358 | " \n", 359 | " # Record objectives\n", 360 | " feed_dict[states] = test_set\n", 361 | " value_test_loss.append(value_objective.eval(feed_dict))\n", 362 | " policy_test_loss.append(policy_objective.eval(feed_dict))\n" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "### Training Results" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "fig, axes = plt.subplots(2, 2, figsize=(12, 5), dpi=OPTIONS.dpi)\n", 379 | "fig.subplots_adjust(wspace=0.3, hspace=0.4)\n", 380 | "\n", 381 | "ax = axes[0,0]\n", 382 | "ax.plot(value_test_loss, '.-r')\n", 383 | "ax.set_xlabel(r'Policy iteration $k$')\n", 384 | "ax.set_ylabel(r'test loss (policy evaluation)')\n", 385 | "\n", 386 | "ax = axes[0,1]\n", 387 | "ax.plot(value_param_changes, '.-r')\n", 388 | "ax.set_xlabel(r'Policy iteration $k$')\n", 389 | "ax.set_ylabel(r'$||{\\bf \\theta}_k - {\\bf \\theta}_{k-1}||_\\infty$')\n", 390 | "\n", 391 | "ax = axes[1,0]\n", 392 | "ax.plot(policy_test_loss, '.-b')\n", 393 | "ax.set_xlabel(r'Policy iteration $k$')\n", 394 | "ax.set_ylabel(r'test loss (policy improvement)')\n", 395 | "\n", 396 | "ax = axes[1,1]\n", 397 | "ax.plot(policy_param_changes, '.-b')\n", 398 | "ax.set_xlabel(r'Policy iteration $k$')\n", 399 | "ax.set_ylabel(r'$||{\\bf \\delta}_k - {\\bf \\delta}_{k-1}||_\\infty$')\n", 400 | "\n", 401 | "plt.show()\n" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## Estimated Value Functions and ROAs" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "# Number of states along each dimension\n", 418 | "num_states = 51\n", 419 | "\n", 420 | "# State grid\n", 421 | "grid_limits = np.array([[-1., 1.], ] * state_dim)\n", 422 | "grid = safe_learning.GridWorld(grid_limits, num_states)\n", 423 | "\n", 424 | "# Estimate value functions and ROAs with rollout\n", 425 | "roa_horizon = 2000\n", 426 | "rollout_horizon = 500\n", 427 | "roa_tol = 0.1\n", 428 | "rollout_tol = 0.01\n", 429 | "discount = feed_dict[gamma] # use the same discount factor from training!\n", 430 | "pivot_state = np.asarray([0., 0., 0., 0.], dtype=OPTIONS.np_dtype)\n", 431 | "\n", 432 | "# Snap pivot_state to the closest grid point\n", 433 | "pivot_index = np.zeros_like(pivot_state, dtype=int)\n", 434 | "for d in range(grid.ndim):\n", 435 | " pivot_index[d], pivot_state[d] = find_nearest(grid.discrete_points[d], pivot_state[d])\n", 436 | "\n", 437 | "# Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to pivot_state\n", 438 | "planes = [[1, 3], [0, 2]]\n", 439 | "grid_slices = []\n", 440 | "for p in planes:\n", 441 | " grid_slices.append(np.logical_and(grid.all_points[:, p[0]] == pivot_state[p[0]], \n", 442 | " grid.all_points[:, p[1]] == pivot_state[p[1]]).ravel())\n", 443 | "\n", 444 | "# LQR solution (\\pi and V_\\pi)\n", 445 | "closed_loop_dynamics = lambda x: future_states_lqr.eval({states: x})\n", 446 | "reward_eval = lambda x: rewards_lqr.eval({states: x})\n", 447 | "true_values = [reward_rollout(grid.all_points[mask], closed_loop_dynamics, reward_eval, discount, rollout_horizon, rollout_tol) for mask in grid_slices]\n", 448 | "true_roas = [compute_roa(grid.all_points[mask], closed_loop_dynamics, roa_horizon, roa_tol) for mask in grid_slices]\n", 449 | "\n", 450 | "# Parametric policy's value function V_{\\pi_\\delta}\n", 451 | "closed_loop_dynamics = lambda x: future_states.eval({states: x})\n", 452 | "reward_eval = lambda x: rewards.eval({states: x})\n", 453 | "est_values = [reward_rollout(grid.all_points[mask], closed_loop_dynamics, reward_eval, discount, rollout_horizon, rollout_tol) for mask in grid_slices]\n", 454 | "est_roas = [compute_roa(grid.all_points[mask], closed_loop_dynamics, roa_horizon, roa_tol) for mask in grid_slices]\n", 455 | "\n", 456 | "# Parametric value function V_\\theta\n", 457 | "par_values = [values.eval({states: grid.all_points[mask]}) for mask in grid_slices]\n" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "### Plotting" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "planes = [[0, 2], [1, 3]]\n", 474 | "norms = np.asarray([x_max, np.rad2deg(theta_max), x_dot_max, np.rad2deg(theta_dot_max)])\n", 475 | "scaled_discrete_points = [norm * points for norm, points in zip(norms, grid.discrete_points)]\n", 476 | "\n", 477 | "fig = plt.figure(figsize=(12, 12), dpi=OPTIONS.dpi)\n", 478 | "\n", 479 | "for i, p in enumerate(planes):\n", 480 | " ax = fig.add_subplot(221 + i, projection='3d')\n", 481 | " if i == 0:\n", 482 | " ax.set_title(r'$\\theta = {:g}$'.format(pivot_state[1]) + r', $\\dot\\theta = {:g}$'.format(pivot_state[3]) + '\\n')\n", 483 | " ax.set_xlabel(r'$x$ [m]')\n", 484 | " ax.set_ylabel(r'$\\dot{x}$ [m/s]')\n", 485 | " else:\n", 486 | " ax.set_title(r'$x= {:g}$'.format(pivot_state[0]) + r', $\\dot x = {:g}$'.format(pivot_state[2]) + '\\n')\n", 487 | " ax.set_xlabel(r'$\\theta$ [deg]')\n", 488 | " ax.set_ylabel(r'$\\dot{\\theta}$ [deg/s]')\n", 489 | " ax.view_init(None, -45)\n", 490 | "\n", 491 | " xx, yy = np.meshgrid(*[scaled_discrete_points[p[0]], scaled_discrete_points[p[1]]])\n", 492 | "\n", 493 | " for j, (values, color) in enumerate(zip([true_values, est_values, par_values], [(0, 0, 1, 0.6), (0, 1, 0, 0.8), (1, 0, 0, 0.65)])):\n", 494 | " z = - values[i].reshape(grid.num_points[p])\n", 495 | " surf = ax.plot_surface(xx, yy, z, color=color)\n", 496 | " surf._facecolors2d = surf._facecolors3d\n", 497 | " surf._edgecolors2d = surf._edgecolors3d\n", 498 | " proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0, 0, 1, 0.6), (0, 1, 0, 0.8), (1, 0, 0, 0.65)]] \n", 499 | " ax.legend(proxy, [r'$-V_{\\pi}({\\bf x})$', r'$-V_{\\pi_{\\bf \\delta}}({\\bf x})$', r'$-V_{\\bf \\theta}({\\bf x})$'])\n", 500 | "\n", 501 | "\n", 502 | "for i, (p, mask) in enumerate(zip(planes, grid_slices)):\n", 503 | " ax = fig.add_subplot(223 + i, projection='3d')\n", 504 | " if i == 0:\n", 505 | " ax.set_title(r'$\\theta = {:g}$'.format(pivot_state[1]) + r', $\\dot\\theta = {:g}$'.format(pivot_state[3]) + '\\n')\n", 506 | " ax.set_xlabel(r'$x$ [m]')\n", 507 | " ax.set_ylabel(r'$\\dot{x}$ [m/s]') \n", 508 | " else:\n", 509 | " ax.set_title(r'$x= {:g}$'.format(pivot_state[0]) + r', $\\dot x = {:g}$'.format(pivot_state[2]) + '\\n')\n", 510 | " ax.set_xlabel(r'$\\theta$ [deg]')\n", 511 | " ax.set_ylabel(r'$\\dot{\\theta}$ [deg/s]')\n", 512 | " ax.view_init(None, -45)\n", 513 | " \n", 514 | " xx, yy = np.meshgrid(*[scaled_discrete_points[p[0]], scaled_discrete_points[p[1]]])\n", 515 | " acts = u_max * actions.eval({states: grid.all_points[mask]})\n", 516 | " true_acts = u_max * actions_lqr.eval({states: grid.all_points[mask]})\n", 517 | "\n", 518 | " ax.plot_surface(xx, yy, true_acts.reshape(grid.num_points[p]), color='blue', alpha=0.55)\n", 519 | " ax.plot_surface(xx, yy, acts.reshape(grid.num_points[p]), color='red', alpha=0.75)\n", 520 | "\n", 521 | " z = est_roas[i].reshape(grid.num_points[p])\n", 522 | " ax.contourf(xx, yy, z, cmap=binary_cmap('green', 0.65), zdir='z', offset=-u_max)\n", 523 | "\n", 524 | " proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0, 0, 1, 0.6), (1, 0, 0, 0.65), (0., 1., 0., 0.65)]]\n", 525 | " ax.legend(proxy, [r'$\\pi({\\bf x})$ [N]', r'$\\pi_{\\bf \\delta}({\\bf x})$ [N]', r'ROA for $\\pi_{\\bf \\delta}$'])\n", 526 | "\n", 527 | "plt.show()\n" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [] 536 | } 537 | ], 538 | "metadata": { 539 | "kernelspec": { 540 | "display_name": "Python 3", 541 | "language": "python", 542 | "name": "python3" 543 | }, 544 | "language_info": { 545 | "codemirror_mode": { 546 | "name": "ipython", 547 | "version": 3 548 | }, 549 | "file_extension": ".py", 550 | "mimetype": "text/x-python", 551 | "name": "python", 552 | "nbconvert_exporter": "python", 553 | "pygments_lexer": "ipython3", 554 | "version": "3.6.4" 555 | } 556 | }, 557 | "nbformat": 4, 558 | "nbformat_minor": 2 559 | } 560 | -------------------------------------------------------------------------------- /safe_learning/tests/test_functions.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the functions file.""" 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | from numpy.testing import assert_equal, assert_allclose 6 | import pytest 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from safe_learning.functions import (_Triangulation, Triangulation, 11 | ScipyDelaunay, GridWorld, 12 | PiecewiseConstant, DeterministicFunction, 13 | UncertainFunction, QuadraticFunction, 14 | DimensionError, GPRCached, 15 | GaussianProcess, NeuralNetwork) 16 | from safe_learning.utilities import concatenate_inputs 17 | 18 | try: 19 | import gpflow 20 | except ImportError: 21 | gpflow = None 22 | 23 | 24 | class TestFunction(object): 25 | """Test the function class.""" 26 | 27 | @pytest.fixture(scope='class') 28 | def testing_class(self): 29 | class A(DeterministicFunction): 30 | def __init__(self, value, name='a'): 31 | super(A, self).__init__() 32 | with tf.variable_scope(self.scope_name): 33 | self.variable = tf.Variable(value) 34 | sess = tf.get_default_session() 35 | sess.run(tf.variables_initializer([self.variable])) 36 | 37 | def build_evaluation(self, point): 38 | return self.variable * point 39 | 40 | sess = tf.Session() 41 | return A, sess 42 | 43 | def test_class(self, testing_class): 44 | """Test that the class is working.""" 45 | A, sess = testing_class 46 | with sess.as_default(): 47 | a = A(2.) 48 | input = np.array(1.) 49 | 50 | output = a(input) 51 | assert_allclose(2. * input, output.eval()) 52 | 53 | # Test double output 54 | output2 = a(input) 55 | assert_allclose(2. * input, output2.eval()) 56 | 57 | def test_add(self, testing_class): 58 | """Test adding functions.""" 59 | A, sess = testing_class 60 | with sess.as_default(): 61 | a1 = A(3.) 62 | a2 = A(2.) 63 | 64 | a = a1 + a2 65 | 66 | input = np.array(1.) 67 | output = a(input) 68 | 69 | assert_allclose(5. * input, output.eval()) 70 | 71 | assert a1.parameters[0] in a.parameters 72 | assert a2.parameters[0] in a.parameters 73 | 74 | def test_mult(self, testing_class): 75 | """Test multiplying functions.""" 76 | A, sess = testing_class 77 | with sess.as_default(): 78 | a1 = A(3.) 79 | a2 = A(2.) 80 | 81 | a = a1 * a2 82 | 83 | input = np.array(1.) 84 | output = a(input) 85 | 86 | assert_allclose(6. * input, output.eval()) 87 | 88 | assert a1.parameters[0] in a.parameters 89 | assert a2.parameters[0] in a.parameters 90 | 91 | # Test multiplying with constant 92 | a = a1 * 2. 93 | output = a(input) 94 | assert_allclose(6. * input, output.eval()) 95 | 96 | def test_neg(self, testing_class): 97 | """Test multiplying functions.""" 98 | A, sess = testing_class 99 | with sess.as_default(): 100 | a = A(3.) 101 | b = -a 102 | 103 | input = np.array(2.) 104 | output = b(input) 105 | 106 | assert_allclose(-3. * input, output.eval()) 107 | 108 | assert a.parameters[0] is b.parameters[0] 109 | 110 | def test_copy(self, testing_class): 111 | """Test copying.""" 112 | A, sess = testing_class 113 | with sess.as_default(): 114 | a = A(2.) 115 | b = A(3.) 116 | b.copy_parameters(a) 117 | 118 | p1 = a.parameters[0] 119 | p2 = b.parameters[0] 120 | 121 | assert p1.eval() == p2.eval() 122 | assert p1 is not p2 123 | 124 | 125 | class TestDeterministicFuction(object): 126 | """Test the base class.""" 127 | 128 | def test_errors(self): 129 | """Check notImplemented error.""" 130 | f = DeterministicFunction() 131 | pytest.raises(NotImplementedError, f.build_evaluation, None) 132 | 133 | 134 | class TestUncertainFunction(object): 135 | """Test the base class.""" 136 | 137 | def test_errors(self): 138 | """Check notImplemented error.""" 139 | f = UncertainFunction() 140 | pytest.raises(NotImplementedError, f.build_evaluation, None) 141 | 142 | def test_mean_function(self): 143 | """Test the conversion to a deterministic function.""" 144 | f = UncertainFunction() 145 | f.build_evaluation = lambda x: (1, 2) 146 | fd = f.to_mean_function() 147 | assert(fd(None) == 1) 148 | 149 | 150 | @pytest.mark.skipif(gpflow is None, reason='gpflow module not installed') 151 | class TestGPRCached(object): 152 | """Test the GPR_cached class.""" 153 | 154 | @pytest.fixture(scope="class") 155 | def gps(self): 156 | """Create cached and uncached gpflow models and GPy model.""" 157 | x = np.array([[1, 0], [0, 1]], dtype=float) 158 | y = np.array([[0], [1]], dtype=float) 159 | kernel = gpflow.kernels.RBF(2) 160 | gp = gpflow.gpr.GPR(x, y, kernel) 161 | gp_cached = GPRCached(x, y, kernel) 162 | return gp, gp_cached 163 | 164 | def test_adding_data(self, gps): 165 | """Test that adding data works.""" 166 | test_points = np.array([[0.9, 0.1], [3., 2]]) 167 | 168 | gp, gp_cached = gps 169 | gpfun = GaussianProcess(gp) 170 | gpfun_cached = GaussianProcess(gp_cached) 171 | 172 | x = np.array([[1.2, 2.3]]) 173 | y = np.array([[2.4]]) 174 | 175 | gpfun.add_data_point(x, y) 176 | m1, v1 = gpfun(test_points) 177 | 178 | gpfun_cached.add_data_point(x, y) 179 | m2, v2 = gpfun_cached(test_points) 180 | 181 | feed_dict = gpfun.feed_dict.copy() 182 | feed_dict.update(gpfun_cached.feed_dict) 183 | 184 | with tf.Session() as sess: 185 | m1, v1, m2, v2 = sess.run([m1, v1, m2, v2], feed_dict=feed_dict) 186 | 187 | assert_allclose(m1, m2) 188 | assert_allclose(v1, v2) 189 | 190 | def test_predict_f(self, gps): 191 | """Make sure predictions is same as in uncached case.""" 192 | # Note that this messes things up terribly due to caching. So this 193 | # must be the last test that we run. 194 | gp, gp_cached = gps 195 | test_points = np.array([[0.9, 0.1], [3., 2]]) 196 | a1, b1 = gp_cached.predict_f(test_points) 197 | a2, b2 = gp.predict_f(test_points) 198 | assert_allclose(a1, a2) 199 | assert_allclose(b1, b2) 200 | 201 | 202 | @pytest.mark.skipIf(gpflow is None, 'gpflow module not installed') 203 | class Testgpflow(object): 204 | """Test the GaussianProcess function class.""" 205 | 206 | @pytest.fixture(scope="class") 207 | def setup(self): 208 | """Create GP model with gpflow and GPy.""" 209 | with tf.Session() as sess: 210 | x = np.array([[1, 0], [0, 1]], dtype=float) 211 | y = np.array([[0], [1]], dtype=float) 212 | kernel = gpflow.kernels.RBF(2) 213 | gp = gpflow.gpr.GPR(x, y, kernel) 214 | yield sess, gp 215 | 216 | def test_evaluation(self, setup): 217 | """Make sure evaluation works.""" 218 | test_points = np.array([[0.9, 0.1], [3., 2]]) 219 | beta = 3.0 220 | sess, gp = setup 221 | 222 | ufun = GaussianProcess(gp, beta=beta) 223 | 224 | # Evaluate GP 225 | mean_1, error_1 = ufun(test_points) 226 | mean_1, error_1 = sess.run([mean_1, error_1], 227 | feed_dict=ufun.feed_dict) 228 | 229 | # Test multiple inputs 230 | mean_2, error_2 = ufun(test_points[:, [0]], 231 | test_points[:, [1]]) 232 | mean_2, error_2 = sess.run([mean_2, error_2], feed_dict=ufun.feed_dict) 233 | 234 | assert_allclose(mean_1, mean_2) 235 | assert_allclose(error_1, error_2) 236 | 237 | def test_new_data(self, setup): 238 | """Test adding data points to the GP.""" 239 | test_points = np.array([[0.9, 0.1], [3., 2]]) 240 | sess, gp = setup 241 | 242 | ufun = GaussianProcess(gp) 243 | 244 | x = np.array([[1.2, 2.3]]) 245 | y = np.array([[2.4]]) 246 | 247 | ufun.add_data_point(x, y) 248 | 249 | assert_allclose(ufun.X, np.array([[1, 0], 250 | [0, 1], 251 | [1.2, 2.3]])) 252 | assert_allclose(ufun.Y, np.array([[0], [1], [2.4]])) 253 | 254 | # Check prediction is correct after adding data (cholesky update) 255 | a1, b1 = ufun(test_points) 256 | a1, b1 = sess.run([a1, b1], feed_dict=ufun.feed_dict) 257 | 258 | a1_true = np.array([[0.16371139], [0.22048311]]) 259 | b1_true = np.array([[1.37678679], [1.98183191]]) 260 | assert_allclose(a1, a1_true) 261 | assert_allclose(b1, b1_true) 262 | 263 | 264 | class TestQuadraticFunction(object): 265 | """Test the quadratic function.""" 266 | 267 | def test_evaluate(self): 268 | """Setup testing environment for quadratic.""" 269 | points = np.array([[0, 0], 270 | [0, 1], 271 | [1, 0], 272 | [1, 1]], dtype=np.float) 273 | P = np.array([[1., 0.1], 274 | [0.2, 2.]]) 275 | quad = QuadraticFunction(P) 276 | true_fval = np.array([[0., 2., 1., 3.3]]).T 277 | 278 | with tf.Session(): 279 | tf_res = quad(points) 280 | res = tf_res.eval() 281 | 282 | assert_allclose(true_fval, res) 283 | 284 | 285 | def test_scipy_delaunay(): 286 | """Test the fake replacement for Scipy.""" 287 | limits = [[-1, 1], [-1, 2]] 288 | num_points = [2, 6] 289 | discretization = GridWorld(limits, num_points) 290 | sp_delaunay = ScipyDelaunay(limits, num_points) 291 | delaunay = _Triangulation(discretization) 292 | 293 | assert_equal(delaunay.nsimplex, sp_delaunay.nsimplex) 294 | assert_equal(delaunay.input_dim, sp_delaunay.ndim) 295 | sp_delaunay.find_simplex(np.array([[0, 0]])) 296 | 297 | 298 | class TestGridworld(object): 299 | """Test the general GridWorld definitions.""" 300 | 301 | def test_dimensions_error(self): 302 | """Test dimension errors.""" 303 | limits = [[-1.1, 1.5], [2.2, 2.4]] 304 | num_points = [7, 8] 305 | grid = GridWorld(limits, num_points) 306 | 307 | pytest.raises(DimensionError, grid._check_dimensions, 308 | np.array([[1, 2, 3]])) 309 | 310 | pytest.raises(DimensionError, grid._check_dimensions, 311 | np.array([[1]])) 312 | 313 | def test_index_state_conversion(self): 314 | """Test all index conversions.""" 315 | limits = [[-1.1, 1.5], [2.2, 2.4]] 316 | num_points = [7, 8] 317 | grid = GridWorld(limits, num_points) 318 | 319 | # Forward and backwards convert all indeces 320 | indeces = np.arange(grid.nindex) 321 | states = grid.index_to_state(indeces) 322 | indeces2 = grid.state_to_index(states) 323 | assert_equal(indeces, indeces2) 324 | 325 | # test 1D input 326 | grid.state_to_index([0, 2.3]) 327 | grid.index_to_state(1) 328 | 329 | # Test rectangles 330 | rectangles = np.arange(grid.nrectangles) 331 | states = grid.rectangle_to_state(rectangles) 332 | rectangles2 = grid.state_to_rectangle(states + grid.unit_maxes / 2) 333 | assert_equal(rectangles, rectangles2) 334 | 335 | rectangle = grid.state_to_rectangle(100 * np.ones((1, 2))) 336 | assert_equal(rectangle, grid.nrectangles - 1) 337 | 338 | rectangle = grid.state_to_rectangle(-100 * np.ones((1, 2))) 339 | assert_equal(rectangle, 0) 340 | 341 | # Test rectangle corners 342 | corners = grid.rectangle_corner_index(rectangles) 343 | corner_states = grid.rectangle_to_state(rectangles) 344 | corners2 = grid.state_to_index(corner_states) 345 | assert_equal(corners, corners2) 346 | 347 | # Test point outside grid 348 | test_point = np.array([[-1.2, 2.]]) 349 | index = grid.state_to_index(test_point) 350 | assert_equal(index, 0) 351 | 352 | def test_integer_numpoints(self): 353 | """Check integer numpoints argument.""" 354 | grid = GridWorld([[1, 2], [3, 4]], 2) 355 | assert_equal(grid.num_points, np.array([2, 2])) 356 | 357 | def test_0d(self): 358 | """Check that initialization works for 1d-discretization.""" 359 | grid = GridWorld([[0, 1]], 3) 360 | 361 | test = np.array([[0.1, 0.4, 0.9]]).T 362 | res = np.array([0, 1, 2]) 363 | assert_allclose(grid.state_to_index(test), res) 364 | 365 | res = np.array([0, 0, 1]) 366 | assert_allclose(grid.state_to_rectangle(test), res) 367 | assert_allclose(grid.rectangle_to_state(res), res[:, None] * 0.5) 368 | 369 | 370 | class TestConcatenateDecorator(object): 371 | """Test the concatenate_input decorator.""" 372 | 373 | @concatenate_inputs(start=1) 374 | def fun(self, x): 375 | """Test function.""" 376 | return x 377 | 378 | def test_concatenate_numpy(self): 379 | """Test concatenation of inputs for numpy.""" 380 | x = np.arange(4).reshape(2, 2) 381 | y = x + 4 382 | true_res = np.hstack((x, y)) 383 | res = self.fun(x, y) 384 | assert_allclose(res, true_res) 385 | assert_allclose(self.fun(x), x) 386 | 387 | def test_concatenate_tensorflow(self): 388 | """Test concatenation of inputs for tensorflow.""" 389 | x_data = np.arange(4).reshape(2, 2).astype(np.float32) 390 | true_res = np.hstack((x_data, x_data + 4)) 391 | x = tf.placeholder(dtype=tf.float32, shape=[2, 2]) 392 | y = x + 4 393 | 394 | fun_x = self.fun(x) 395 | fun_xy = self.fun(x, y) 396 | 397 | assert isinstance(fun_x, tf.Tensor) 398 | assert isinstance(fun_xy, tf.Tensor) 399 | 400 | with tf.Session() as sess: 401 | res_x, res_both = sess.run([fun_x, fun_xy], 402 | {x: x_data}) 403 | 404 | assert_allclose(res_both, true_res) 405 | assert_allclose(res_x, x_data) 406 | 407 | 408 | class TestPiecewiseConstant(object): 409 | """Test a piecewise constant function.""" 410 | 411 | def test_init(self): 412 | """Test initialisation.""" 413 | limits = [[-1, 1], [-1, 1]] 414 | npoints = 4 415 | discretization = GridWorld(limits, npoints) 416 | pwc = PiecewiseConstant(discretization, np.arange(16)) 417 | assert_allclose(pwc.parameters, np.arange(16)[:, None]) 418 | 419 | def test_evaluation(self): 420 | """Evaluation tests for piecewise constant function.""" 421 | limits = [[-1, 1], [-1, 1]] 422 | npoints = 3 423 | discretization = GridWorld(limits, npoints) 424 | pwc = PiecewiseConstant(discretization) 425 | 426 | vertex_points = pwc.discretization.index_to_state( 427 | np.arange(pwc.nindex)) 428 | vertex_values = np.sum(vertex_points, axis=1, keepdims=True) 429 | pwc.parameters = vertex_values 430 | 431 | test = pwc(vertex_points) 432 | assert_allclose(test, vertex_values) 433 | 434 | outside_point = np.array([[-1.5, -1.5]]) 435 | test1 = pwc(outside_point) 436 | assert_allclose(test1, np.array([[-2]])) 437 | 438 | # Test constraint evaluation 439 | test2 = pwc.parameter_derivative(vertex_points) 440 | test2 = test2.toarray().dot(vertex_values) 441 | assert_allclose(test2, vertex_values) 442 | 443 | def test_gradient(self): 444 | """Test the gradient.""" 445 | limits = [[-1, 1], [-1, 1]] 446 | npoints = 3 447 | discretization = GridWorld(limits, npoints) 448 | pwc = PiecewiseConstant(discretization) 449 | test_points = pwc.discretization.index_to_state(np.arange(pwc.nindex)) 450 | gradient = pwc.gradient(test_points) 451 | assert_allclose(gradient, 0) 452 | 453 | 454 | class TestTriangulationNumpy(object): 455 | """Test the generalized Delaunay triangulation in numpy.""" 456 | 457 | def test_find_simplex(self): 458 | """Test the simplices on the grid.""" 459 | limits = [[-1, 1], [-1, 2]] 460 | num_points = [3, 7] 461 | discretization = GridWorld(limits, num_points) 462 | delaunay = _Triangulation(discretization) 463 | 464 | # Test the basic properties 465 | assert_equal(delaunay.discretization.nrectangles, 2 * 6) 466 | assert_equal(delaunay.input_dim, 2) 467 | assert_equal(delaunay.nsimplex, 2 * 2 * 6) 468 | assert_equal(delaunay.discretization.offset, np.array([-1, -1])) 469 | assert_equal(delaunay.discretization.unit_maxes, 470 | np.array([2, 3]) / (np.array(num_points) - 1)) 471 | 472 | # test the simplex indices 473 | lower = delaunay.triangulation.find_simplex(np.array([0, 0])).squeeze() 474 | upper = 1 - lower 475 | 476 | test_points = np.array([[0, 0], 477 | [0.9, 0.45], 478 | [1.1, 0], 479 | [1.9, 2.9]]) 480 | 481 | test_points += np.array(limits)[:, 0] 482 | 483 | true_result = np.array([lower, upper, 6 * 2 + lower, 11 * 2 + upper]) 484 | result = delaunay.find_simplex(test_points) 485 | 486 | assert_allclose(result, true_result) 487 | 488 | # Test the ability to find simplices 489 | simplices = delaunay.simplices(result) 490 | true_simplices = np.array([[0, 1, 7], 491 | [1, 7, 8], 492 | [7, 8, 14], 493 | [13, 19, 20]]) 494 | assert_equal(np.sort(simplices, axis=1), true_simplices) 495 | 496 | # Test point ouside domain (should map to bottom left and top right) 497 | assert_equal(lower, delaunay.find_simplex(np.array([[-100., -100.]]))) 498 | assert_equal(delaunay.nsimplex - 1 - lower, 499 | delaunay.find_simplex(np.array([[100., 100.]]))) 500 | 501 | def test_values(self): 502 | """Test the evaluation function.""" 503 | eps = 1e-10 504 | 505 | discretization = GridWorld([[0, 1], [0, 1]], [2, 2]) 506 | delaunay = _Triangulation(discretization) 507 | 508 | test_points = np.array([[0, 0], 509 | [1 - eps, 0], 510 | [0, 1 - eps], 511 | [0.5 - eps, 0.5 - eps], 512 | [0, 0.5], 513 | [0.5, 0]]) 514 | nodes = delaunay.discretization.state_to_index(np.array([[0, 0], 515 | [1, 0], 516 | [0, 1]])) 517 | 518 | H = delaunay.parameter_derivative(test_points).toarray() 519 | 520 | true_H = np.zeros((len(test_points), delaunay.nindex), 521 | dtype=np.float) 522 | true_H[0, nodes[0]] = 1 523 | true_H[1, nodes[1]] = 1 524 | true_H[2, nodes[2]] = 1 525 | true_H[3, nodes[[1, 2]]] = 0.5 526 | true_H[4, nodes[[0, 2]]] = 0.5 527 | true_H[5, nodes[[0, 1]]] = 0.5 528 | 529 | assert_allclose(H, true_H, atol=1e-7) 530 | 531 | # Test value property 532 | values = np.random.rand(delaunay.nindex) 533 | delaunay.parameters = values 534 | v1 = H.dot(values)[:, None] 535 | v2 = delaunay(test_points) 536 | assert_allclose(v1, v2) 537 | 538 | # Test the projections 539 | test_point = np.array([[-0.5, -0.5]]) 540 | delaunay.parameters = np.array([0, 1, 1, 1]) 541 | unprojected = delaunay(test_point) 542 | delaunay.project = True 543 | projected = delaunay(test_point) 544 | 545 | assert_allclose(projected, np.array([[0]])) 546 | assert_allclose(unprojected, np.array([[-1]])) 547 | 548 | def test_multiple_dimensions(self): 549 | """Test delaunay in three dimensions.""" 550 | limits = [[0, 1]] * 3 551 | discretization = GridWorld(limits, [2] * 3) 552 | delaunay = _Triangulation(discretization) 553 | assert_equal(delaunay.input_dim, 3) 554 | assert_equal(delaunay.discretization.nrectangles, 1) 555 | assert_equal(delaunay.nsimplex, np.math.factorial(3)) 556 | 557 | corner_points = np.array([[0, 0, 0], 558 | [1, 0, 0], 559 | [0, 1, 0], 560 | [0, 0, 1], 561 | [0, 1, 1], 562 | [1, 1, 0], 563 | [1, 0, 1], 564 | [1, 1, 1]], dtype=np.float) 565 | 566 | values = np.sum(delaunay.discretization.index_to_state(np.arange(8)), 567 | axis=1) / 3 568 | 569 | test_points = np.vstack((corner_points, 570 | np.array([[0, 0, 0.5], 571 | [0.5, 0, 0], 572 | [0, 0.5, 0], 573 | [0.5, 0.5, 0.5]]))) 574 | corner_values = np.sum(corner_points, axis=1) / 3 575 | true_values = np.hstack((corner_values, 576 | np.array([1 / 6, 1 / 6, 1 / 6, 1 / 2]))) 577 | 578 | delaunay.parameters = values 579 | result = delaunay(test_points) 580 | assert_allclose(result, true_values[:, None], atol=1e-5) 581 | 582 | def test_gradient(self): 583 | """Test the gradient_at function.""" 584 | discretization = GridWorld([[0, 1], [0, 1]], [2, 2]) 585 | delaunay = _Triangulation(discretization) 586 | 587 | points = np.array([[0, 0], 588 | [1, 0], 589 | [0, 1], 590 | [1, 1]], dtype=np.int) 591 | nodes = delaunay.discretization.state_to_index(points) 592 | 593 | # Simplex with node values: 594 | # 3 - 1 595 | # | \ | 596 | # 1 - 2 597 | # --> x 598 | 599 | values = np.zeros(delaunay.nindex) 600 | values[nodes] = [1, 2, 3, 1] 601 | 602 | test_points = np.array([[0.01, 0.01], 603 | [0.99, 0.99]]) 604 | 605 | true_grad = np.array([[1, 2], [-2, -1]]) 606 | 607 | # Construct true H (gradient as function of values) 608 | true_H = np.zeros((2 * delaunay.input_dim, delaunay.nindex)) 609 | 610 | true_H[0, nodes[[0, 1]]] = [-1, 1] 611 | true_H[1, nodes[[0, 2]]] = [-1, 1] 612 | true_H[2, nodes[[2, 3]]] = [-1, 1] 613 | true_H[3, nodes[[1, 3]]] = [-1, 1] 614 | 615 | # Evaluate gradient with and without values 616 | H = delaunay.gradient_parameter_derivative(test_points).toarray() 617 | delaunay.parameters = values 618 | grad = delaunay.gradient(test_points) 619 | 620 | # Compare 621 | assert_allclose(grad, true_grad) 622 | assert_allclose(H, true_H) 623 | assert_allclose(true_grad, 624 | H.dot(values).reshape(-1, delaunay.input_dim)) 625 | 626 | def test_1d(self): 627 | """Test the triangulation for 1D inputs.""" 628 | discretization = GridWorld([[0, 1]], 3) 629 | delaunay = _Triangulation(discretization, vertex_values=[0, 0.5, 0]) 630 | vertex_values = delaunay.parameters 631 | 632 | test_points = np.array([[0, 0.2, 0.5, 0.6, 0.9, 1.]]).T 633 | test_point = test_points[[0], :] 634 | 635 | simplices = delaunay.find_simplex(test_points) 636 | true_simplices = np.array([0, 0, 1, 1, 1, 1]) 637 | assert_allclose(simplices, true_simplices) 638 | assert_allclose(delaunay.find_simplex(test_point), 639 | true_simplices[[0]]) 640 | 641 | values = delaunay(test_points) 642 | true_values = np.array([0, 0.2, 0.5, 0.4, 0.1, 0])[:, None] 643 | assert_allclose(values, true_values) 644 | 645 | value_constraint = delaunay.parameter_derivative(test_points) 646 | values = value_constraint.toarray().dot(vertex_values) 647 | assert_allclose(values, true_values) 648 | 649 | gradient = delaunay.gradient(test_points) 650 | true_gradient = np.array([1, 1, -1, -1, -1, -1])[:, None] 651 | assert_allclose(gradient, true_gradient) 652 | 653 | gradient_deriv = delaunay.gradient_parameter_derivative(test_points) 654 | gradient = gradient_deriv.toarray().dot(vertex_values) 655 | assert_allclose(gradient.reshape(-1, 1), true_gradient) 656 | 657 | 658 | class TestTriangulation(object): 659 | """Test the tensorflow wrapper around the numpy triangulation.""" 660 | 661 | @pytest.fixture(scope="class") 662 | def setup(self): 663 | """Create testing environment.""" 664 | with tf.Session(graph=tf.Graph()) as sess: 665 | npoints = 3 666 | 667 | discretization = GridWorld([[0, 1], [0, 1]], npoints) 668 | parameters = np.sum(discretization.all_points ** 2, 669 | axis=1, keepdims=True) 670 | trinp = _Triangulation(discretization, vertex_values=parameters) 671 | 672 | tri = Triangulation(discretization, vertex_values=parameters) 673 | 674 | test_points = np.array([[-10, -10], 675 | [0.2, 0.7], 676 | [0, 0], 677 | [0, 1], 678 | [1, 1], 679 | [-0.2, 0.5], 680 | [0.43, 0.21]]) 681 | 682 | sess.run(tf.global_variables_initializer()) 683 | yield sess, tri, trinp, test_points 684 | 685 | def test_evaluate(self, setup): 686 | """Test the evaluations.""" 687 | sess, tri, trinp, test_points = setup 688 | # with tf.Session() as sess: 689 | res = sess.run(tri(test_points)) 690 | assert_allclose(res, trinp(test_points)) 691 | 692 | def test_projected_evaluate(self, setup): 693 | """Test evaluations with enabled projection.""" 694 | sess, tri, trinp, test_points = setup 695 | 696 | # Enable project 697 | trinp.project = True 698 | tri.project = True 699 | 700 | res = sess.run(tri(test_points)) 701 | assert_allclose(res, trinp(test_points)) 702 | 703 | def test_gradient_x(self, setup): 704 | """Test the gradients with respect to the inputs.""" 705 | sess, tri, trinp, test_points = setup 706 | 707 | points = tf.placeholder(tf.float64, [None, None]) 708 | feed_dict = {points: test_points} 709 | 710 | # Dsiable project 711 | trinp.project = False 712 | tri.project = False 713 | 714 | # Just another run test 715 | y = tri(points) 716 | res = sess.run(y, feed_dict=feed_dict) 717 | assert_allclose(res, trinp(test_points)) 718 | 719 | # Test gradients 720 | grad = tf.gradients(y, points) 721 | res = sess.run(grad, feed_dict=feed_dict)[0] 722 | assert_allclose(res, trinp.gradient(test_points)) 723 | 724 | # Enable project 725 | trinp.project = True 726 | tri.project = True 727 | 728 | # Results are different outside of the projection. 729 | inside = (np.all(test_points < trinp.limits[:, [1]].T, axis=1) 730 | & np.all(test_points > trinp.limits[:, [0]].T, axis=1)) 731 | 732 | test_points = test_points[inside] 733 | 734 | # Test gradients projected 735 | y = tri(points) 736 | grad = tf.gradients(y, points) 737 | res = sess.run(grad, feed_dict=feed_dict)[0] 738 | assert_allclose(res[inside], trinp.gradient(test_points)) 739 | 740 | def test_gradient_param(self, setup): 741 | """Test the gradients with respect to the parameters.""" 742 | sess, tri, trinp, test_points = setup 743 | 744 | # Disable project 745 | trinp.project = True 746 | tri.project = True 747 | 748 | x = tf.placeholder(tf.float64, [1, 2]) 749 | 750 | true_gradient = trinp.parameter_derivative(test_points) 751 | true_gradient = np.array(true_gradient.todense()) 752 | 753 | y = tri(x) 754 | grad_tf = tf.gradients(y, tri.parameters)[0] 755 | dense_gradient = np.zeros(true_gradient[0].shape, dtype=np.float) 756 | 757 | for i, test in enumerate(test_points): 758 | gradient = sess.run(grad_tf, feed_dict={x: test[None, :]}) 759 | dense_gradient[:] = 0. 760 | dense_gradient[gradient.indices] = gradient.values[:, 0] 761 | assert_allclose(dense_gradient, true_gradient[i]) 762 | 763 | 764 | def test_neural_network(): 765 | """Test the NeuralNetwork class init.""" 766 | relu = tf.nn.relu 767 | 768 | with tf.Session() as sess: 769 | nn = NeuralNetwork(layers=[2, 3, 1], 770 | nonlinearities=[relu, relu, None]) 771 | 772 | # x = tf.placeholder() 773 | res = nn(np.random.rand(4, 2)) 774 | sess.run(tf.global_variables_initializer()) 775 | res, lipschitz = sess.run([res, nn.lipschitz()]) 776 | 777 | assert lipschitz > 0. 778 | 779 | 780 | if __name__ == '__main__': 781 | pytest.main() 782 | -------------------------------------------------------------------------------- /examples/adaptive_safety_verification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adaptive Safety Verification for the Inverted Pendulum\n", 8 | "\n", 9 | "Determine the largest safe set for a GP model of the inverted pendulum with an adaptive discretization." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from __future__ import division, print_function\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "import tensorflow as tf\n", 22 | "import gpflow\n", 23 | "import safe_learning\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import time\n", 26 | "import os\n", 27 | "\n", 28 | "from scipy.linalg import block_diag\n", 29 | "from utilities import InvertedPendulum, binary_cmap\n", 30 | "\n", 31 | "# Nice progress bars\n", 32 | "try:\n", 33 | " from tqdm import tqdm\n", 34 | "except ImportError:\n", 35 | " tqdm = lambda x: x\n", 36 | "\n", 37 | "_STORAGE = {}\n", 38 | "\n", 39 | "HEAT_MAP = plt.get_cmap('inferno', lut=None)\n", 40 | "HEAT_MAP.set_over('white')\n", 41 | "HEAT_MAP.set_under('black')\n", 42 | "\n", 43 | "LEVEL_MAP = plt.get_cmap('viridis', lut=21)\n", 44 | "LEVEL_MAP.set_over('gold')\n", 45 | "LEVEL_MAP.set_under('white')\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "## User Options" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "class Options(object):\n", 62 | " def __init__(self, **kwargs):\n", 63 | " super(Options, self).__init__()\n", 64 | " self.__dict__.update(kwargs)\n", 65 | "\n", 66 | "OPTIONS = Options(np_dtype = safe_learning.config.np_dtype,\n", 67 | " tf_dtype = safe_learning.config.dtype,\n", 68 | " saturate = True, # apply saturation constraints to the control input\n", 69 | " eps = 1e-8, # numerical tolerance\n", 70 | " use_linear_dynamics = False, # use the linearized form of the dynamics as the true dynamics (for testing)\n", 71 | " use_lipschitz_scaling = True, # use different Lipschitz constants in each state for the Lyapunov function\n", 72 | " use_zero_threshold = False, # assume the discretization is infinitely fine (i.e., tau = 0; for testing)\n", 73 | " use_true_parameters = False, # use the true physical parameters in the GP model (for testing)\n", 74 | " use_linear_kernels = False, # use only linear kernels in the GP model\n", 75 | " use_adaptive_grid = True, # use an adaptive discretization for safety verification\n", 76 | " gp_confidence_scaling = 2., # scaling factor for GP confidence intervals (i.e., beta)\n", 77 | " gp_noise_variance = 0.001 ** 2, # noise variance used in GP model\n", 78 | " gp_num_scaling = 1., # internal scaling factor for better numerical stability in GP prediction\n", 79 | " dpi = 200,\n", 80 | " num_cores = 4,\n", 81 | " num_sockets = 1)\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## TensorFlow Session\n", 89 | "\n", 90 | "Customize the TensorFlow session for the current device." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "os.environ[\"KMP_BLOCKTIME\"] = str(0)\n", 100 | "os.environ[\"KMP_SETTINGS\"] = str(1)\n", 101 | "os.environ[\"KMP_AFFINITY\"] = 'granularity=fine,noverbose,compact,1,0'\n", 102 | "os.environ[\"OMP_NUM_THREADS\"] = str(OPTIONS.num_cores)\n", 103 | "\n", 104 | "config = tf.ConfigProto(intra_op_parallelism_threads = OPTIONS.num_cores,\n", 105 | " inter_op_parallelism_threads = OPTIONS.num_sockets,\n", 106 | " allow_soft_placement = False,\n", 107 | " device_count = {'CPU': OPTIONS.num_cores})\n", 108 | "\n", 109 | "try:\n", 110 | " session.close()\n", 111 | "except NameError:\n", 112 | " pass\n", 113 | "session = tf.InteractiveSession(config=config)\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Dynamics\n", 121 | "\n", 122 | "Define the nonlinear and linearized forms of the inverted pendulum dynamics." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# Constants\n", 132 | "dt = 0.01 # sampling time\n", 133 | "g = 9.81 # gravity\n", 134 | "\n", 135 | "# True system parameters\n", 136 | "m = 0.15 # pendulum mass\n", 137 | "L = 0.5 # pole length\n", 138 | "b = 0.1 # rotational friction\n", 139 | "\n", 140 | "# State and action normalizers\n", 141 | "theta_max = np.deg2rad(30) # angular position [rad]\n", 142 | "omega_max = np.sqrt(g / L) # angular velocity [rad/s]\n", 143 | "u_max = g * m * L * np.sin(theta_max) # torque [N.m], control action\n", 144 | "\n", 145 | "state_norm = (theta_max, omega_max)\n", 146 | "action_norm = (u_max,)\n", 147 | "\n", 148 | "# Dimensions and domains\n", 149 | "state_dim = 2\n", 150 | "action_dim = 1\n", 151 | "state_limits = np.array([[-1., 1.]] * state_dim)\n", 152 | "action_limits = np.array([[-1., 1.]] * action_dim)\n", 153 | "\n", 154 | "# True system\n", 155 | "true_pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])\n", 156 | "A_true, B_true = true_pendulum.linearize()\n", 157 | "\n", 158 | "if OPTIONS.use_linear_dynamics:\n", 159 | " true_dynamics = safe_learning.functions.LinearSystem((A_true, B_true), name='true_dynamics')\n", 160 | "else:\n", 161 | " true_dynamics = true_pendulum.__call__\n", 162 | "\n", 163 | "if not OPTIONS.use_true_parameters:\n", 164 | " # \"Wrong\" system\n", 165 | " m = 0.1 # pendulum mass\n", 166 | " L = 0.4 # pole length\n", 167 | " b = 0.0 # rotational friction\n", 168 | "pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])\n", 169 | "A, B = pendulum.linearize()\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## GP Model\n", 177 | "\n", 178 | "Define a GP model with possibly wrong physical parameters." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "# Prior variances; make sure at least some non-zero value is maintained\n", 188 | "M_true = np.hstack((A_true, B_true))\n", 189 | "M = np.hstack((A, B))\n", 190 | "prior_variances = (M_true - M) ** 2\n", 191 | "np.clip(prior_variances, 1e-3, None, out=prior_variances)\n", 192 | "\n", 193 | "# Input to GP is of the form (x, u) = (state, action)\n", 194 | "full_dim = state_dim + action_dim\n", 195 | "\n", 196 | "# Kernels\n", 197 | "if OPTIONS.use_linear_kernels:\n", 198 | " kernel_theta = gpflow.kernels.Linear(full_dim, variance=prior_variances[0, :], ARD=True)\n", 199 | " kernel_omega = gpflow.kernels.Linear(full_dim, variance=prior_variances[1, :], ARD=True)\n", 200 | "else:\n", 201 | " kernel_theta = (gpflow.kernels.Linear(full_dim, variance=prior_variances[0, :], ARD=True)\n", 202 | " + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n", 203 | " * gpflow.kernels.Linear(1, variance=prior_variances[0, 1]))\n", 204 | " kernel_omega = (gpflow.kernels.Linear(full_dim, variance=prior_variances[1, :], ARD=True)\n", 205 | " + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n", 206 | " * gpflow.kernels.Linear(1, variance=prior_variances[1, 1]))\n", 207 | "\n", 208 | "# Use linearized form for the mean dynamics\n", 209 | "mean_function_theta = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_theta')\n", 210 | "mean_function_omega = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_omega')\n", 211 | "\n", 212 | "# TODO Tensorflow may spit out a lot of allocator errors when creating 0-length dataholders in gpflow, e.g., when:\n", 213 | "# - initializing with empty data matrices X and Y, or\n", 214 | "# - using GPRCached (initializes empty dataholders for Cholesky decomposition)\n", 215 | "\n", 216 | "# X_init = np.empty((0, full_dim), dtype=OPTIONS.np_dtype)\n", 217 | "# Y_init = np.empty((0, 1), dtype=OPTIONS.np_dtype)\n", 218 | "# gp_theta = safe_learning.GPRCached(X_init, Y_init, kernel_theta, mean_function_theta, OPTIONS.gp_num_scaling)\n", 219 | "# gp_omega = safe_learning.GPRCached(X_init, Y_init, kernel_omega, mean_function_omega, OPTIONS.gp_num_scaling)\n", 220 | "\n", 221 | "# Define a GP model over the dynamics\n", 222 | "X_init = np.zeros((1, full_dim), dtype=OPTIONS.np_dtype)\n", 223 | "Y_init = np.zeros((1, 1), dtype=OPTIONS.np_dtype)\n", 224 | "\n", 225 | "gp_theta = gpflow.gpr.GPR(X_init, Y_init, kernel_theta, mean_function_theta)\n", 226 | "gp_omega = gpflow.gpr.GPR(X_init, Y_init, kernel_omega, mean_function_omega)\n", 227 | "\n", 228 | "gp_theta.likelihood.variance = OPTIONS.gp_noise_variance\n", 229 | "gp_omega.likelihood.variance = OPTIONS.gp_noise_variance\n", 230 | "\n", 231 | "gp_theta_fun = safe_learning.GaussianProcess(gp_theta, OPTIONS.gp_confidence_scaling)\n", 232 | "gp_omega_fun = safe_learning.GaussianProcess(gp_omega, OPTIONS.gp_confidence_scaling)\n", 233 | "\n", 234 | "# Stack GP functions to get a block-diagonal kernel matrix, which yields more efficient GP prediction\n", 235 | "dynamics = safe_learning.FunctionStack((gp_theta_fun, gp_omega_fun))\n" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "## State Discretization and Initial Safe Set\n", 243 | "\n", 244 | "Define a possibly adaptive discretization, and an initial known safe set as a subset of this discretization." 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "# Number of states along each dimension\n", 254 | "if OPTIONS.use_adaptive_grid:\n", 255 | " num_states = 501\n", 256 | "else:\n", 257 | " num_states = 3001\n", 258 | "\n", 259 | "# State grid\n", 260 | "grid_limits = np.array([[-1., 1.], ] * state_dim)\n", 261 | "grid = safe_learning.GridWorld(grid_limits, num_states)\n", 262 | "\n", 263 | "# Discretization constant\n", 264 | "if OPTIONS.use_zero_threshold:\n", 265 | " tau = 0.0\n", 266 | "else:\n", 267 | " tau = np.sum(grid.unit_maxes) / 2\n", 268 | "\n", 269 | "print('Grid size: {}'.format(grid.nindex))\n", 270 | "print('Discretization constant (tau): {}'.format(tau))\n", 271 | "\n", 272 | "# Set initial safe set as a ball around the origin (in normalized coordinates)\n", 273 | "cutoff_radius = 0.2\n", 274 | "initial_safe_set = np.linalg.norm(grid.all_points, ord=2, axis=1) <= cutoff_radius\n" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## Fixed Policy\n", 282 | "\n", 283 | "Fix the policy to the LQR solution for the linearized, discretized, true system, possibly with saturation constraints." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "Q = np.diag([1., 2.]).astype(OPTIONS.np_dtype) # state cost matrix\n", 293 | "R = 1.2 * np.identity(action_dim).astype(OPTIONS.np_dtype) # action cost matrix\n", 294 | "K, P = safe_learning.utilities.dlqr(A_true, B_true, Q, R)\n", 295 | "P /= np.abs(P).max() # normalize cost\n", 296 | "\n", 297 | "policy = safe_learning.LinearSystem(-K, name='policy')\n", 298 | "if OPTIONS.saturate:\n", 299 | " policy = safe_learning.Saturation(policy, -1, 1)\n", 300 | "\n", 301 | "# Visualize policy\n", 302 | "def plot_policy(policy, grid, norms, tol=1e-10):\n", 303 | " fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)\n", 304 | " ticks = np.linspace(-1., 1., 9)\n", 305 | " cutoff = 1. - tol\n", 306 | " plot_limits = np.asarray(norms).reshape((-1, 1)) * grid.limits\n", 307 | " \n", 308 | " z = policy(grid.all_points).eval().reshape(grid.num_points)\n", 309 | " im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)\n", 310 | " cbar = fig.colorbar(im, ax=ax, label=r'$u = \\pi(x)$ [normalized]', ticks=ticks)\n", 311 | " ax.set_xlabel(r'$\\theta$ [deg]')\n", 312 | " ax.set_ylabel(r'$\\omega$ [deg/s]')\n", 313 | " plt.show()\n", 314 | "\n", 315 | "\n", 316 | "norms = np.rad2deg(state_norm)\n", 317 | "plot_policy(policy, grid, norms)\n" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "## Closed-Loop Dynamics Lipschitz Constant" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "# Policy (linear)\n", 334 | "L_pol = np.linalg.norm(-K, 1)\n", 335 | "\n", 336 | "# Dynamics (linear approximation)\n", 337 | "L_dyn = np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1) * L_pol\n" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "## Fixed Lyapunov Function\n", 345 | "\n", 346 | "Fix the Lyapunov function to the LQR solution for the linearized, discretized, true system." 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "# Define the Lyapunov function corresponding to the LQR policy\n", 356 | "lyapunov_function = safe_learning.QuadraticFunction(P)\n", 357 | "\n", 358 | "# Approximate local Lipschitz constants with gradients\n", 359 | "grad_lyapunov_function = safe_learning.LinearSystem((2 * P,))\n", 360 | "if OPTIONS.use_lipschitz_scaling:\n", 361 | " L_v = lambda x: tf.abs(grad_lyapunov_function(x))\n", 362 | "else:\n", 363 | " L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keep_dims=True)\n", 364 | "\n", 365 | "# Initialize class (with a possibly adaptive discretization for safety verification)\n", 366 | "lyapunov = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set, adaptive=OPTIONS.use_adaptive_grid)\n", 367 | "lyapunov.update_values()\n", 368 | "lyapunov.update_safe_set()\n" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "## TensorFlow Graph" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "# Current\n", 385 | "states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, grid.ndim], name='states')\n", 386 | "actions = policy(states)\n", 387 | "values = lyapunov.lyapunov_function(states)\n", 388 | "\n", 389 | "# Predicted future\n", 390 | "future_states_mean, future_states_error = lyapunov.dynamics(states, actions)\n", 391 | "future_values_mean = lyapunov.lyapunov_function(future_states_mean)\n", 392 | "lv = lyapunov.lipschitz_lyapunov(future_states_mean)\n", 393 | "future_values_error = tf.reduce_sum(lv * future_states_error, axis=1, keepdims=True)\n", 394 | "dv_mean = future_values_mean - values\n", 395 | "dv_bound = dv_mean + future_values_error\n", 396 | "\n", 397 | "# True future\n", 398 | "future_states = true_dynamics(states, actions)\n", 399 | "future_values = lyapunov.lyapunov_function(future_states)\n", 400 | "dv = future_values - values\n", 401 | "\n", 402 | "# Discretization effects\n", 403 | "tau = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='discretization_constant')\n", 404 | "threshold = lyapunov.threshold(states, tau)\n", 405 | "negative = tf.less(dv_bound, threshold)\n" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "## Visualize Discretization Effects\n", 413 | "\n", 414 | "The tightened Lyapunov stability certificate $\\Delta v({\\bf x}) := v(f_\\pi({\\bf x})) - v({\\bf x}) < - L_{\\Delta v}\\tau$ becomes easier to satisfy as the grid is refined (i.e., as the spacing $\\tau$ decreases). However, this creates more states that must be verified within any level set due to the curse of dimensionality. For a given uniform grid with a side length of $M$ cells, $\\Delta v({\\bf x}) < - L_{\\Delta v}\\tau$ may not be satisfied, but \n", 415 | " $$\\Delta v({\\bf x}) < - L_{\\Delta v}\\frac{\\tau}{N({\\bf x})}$$ \n", 416 | "may be, where $N({\\bf x}) \\in \\mathbb{N}_{\\geq 1}$ represents an adaptive refinement of the grid cell centred at $\\bf{x}$. This new condition would need to be checked at the $N({\\bf x})^d$ additional grid points created around $\\bf x$. We visualize the required refinement $N(\\bf{x})$ when beginning with a uniform square (i.e., $M^d$-sized) grid for the true dynamics below." 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "# Adjust this parameter to see the effect of different uniform discretizations, and the required adaptive refinement. \n", 426 | "cells_per_side = 250\n", 427 | "\n", 428 | "# Initialize a uniform square grid\n", 429 | "grid_limits = np.array([[-1., 1.], ] * state_dim)\n", 430 | "grid = safe_learning.GridWorld(grid_limits, cells_per_side + 1)\n", 431 | "grid_spacing = np.sum(grid.unit_maxes) / 2\n", 432 | "\n", 433 | "# Create a colormap for N(x)\n", 434 | "N_max = 16\n", 435 | "cmap = plt.get_cmap('viridis', lut=N_max)\n", 436 | "cmap.set_over('gold')\n", 437 | "cmap.set_under((1., 1., 1., 0.))\n", 438 | "\n", 439 | "# Compute the required refinement N(x) for the adaptive discretization; if dv >= 0, then no amount of refinement will help, so we set N(x) = -1 (white) for plotting\n", 440 | "feed_dict = {states: grid.all_points, tau: [[np.sum(grid.unit_maxes) / 2]]}\n", 441 | "N = (threshold / dv).eval(feed_dict)\n", 442 | "N[np.isnan(N)] = -1\n", 443 | "N[N < 0] = -1\n", 444 | "N = np.ceil(N)\n", 445 | "\n", 446 | "# Visualize results\n", 447 | "fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)\n", 448 | "\n", 449 | "z = N.reshape(grid.num_points)\n", 450 | "im = ax.imshow(z.T, origin='lower', extent=grid_limits.ravel(), aspect=grid_limits[0, 1] / grid_limits[1, 1], cmap=cmap, vmin=0, vmax=N_max)\n", 451 | "cbar = fig.colorbar(im, ax=ax, label=r'$N({\\bf x})$', ticks=np.arange(0, N_max + 1, 2))\n", 452 | "ax.set_title(r'$M = {}$'.format(grid.num_points[0] - 1) \n", 453 | " + ', ' + r'$|\\mathcal{X}_\\tau|$ = ' + r'{:.1e}'.format(grid.nindex) \n", 454 | " + ', ' + r'$\\tau$ = ' + r'{:.0e}'.format(grid_spacing), \n", 455 | " )\n", 456 | "ax.set_xlabel(r'$\\theta$ [deg]')\n", 457 | "ax.set_ylabel(r'$\\omega$ [deg/s]')\n", 458 | "\n", 459 | "yticks = cbar.ax.get_yticks()\n", 460 | "tick_labels = ['{:.0f}'.format(y * N_max) for y in yticks]\n", 461 | "tick_labels[-1] = r'$\\geq {}$'.format(N_max)\n", 462 | "cbar.ax.set_yticklabels(tick_labels)\n", 463 | "\n", 464 | "plt.show()\n" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "## Safe Online Learning and Exploration\n", 472 | "\n", 473 | "Only visit certified safe states in order to obtain measurements and update the GP model of the dynamics." 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "# We are not updating the policy, so do not consider perturbations around the current policy\n", 483 | "action_variation = np.array([[0.]], dtype=OPTIONS.np_dtype)\n", 484 | "\n", 485 | "with tf.name_scope('add_new_measurement'):\n", 486 | " full_dim = state_dim + action_dim \n", 487 | " tf_max_state_action = tf.placeholder(OPTIONS.tf_dtype, shape=[1, full_dim])\n", 488 | " tf_measurement = true_dynamics(tf_max_state_action)\n", 489 | " \n", 490 | "def update_gp():\n", 491 | " \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n", 492 | " \n", 493 | " # Get a new sample location\n", 494 | " max_state_action, _ = safe_learning.get_safe_sample(lyapunov, action_variation, action_limits, positive=True, num_samples=1000)\n", 495 | " \n", 496 | " # Obtain a measurement of the true dynamics\n", 497 | " lyapunov.feed_dict[tf_max_state_action] = max_state_action\n", 498 | " measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n", 499 | " \n", 500 | " # Add the measurement to our GP dynamics\n", 501 | " lyapunov.dynamics.add_data_point(max_state_action, measurement)\n", 502 | "\n", 503 | "\n", 504 | "# Record some metrics during data collection\n", 505 | "safe_level = [] # current level c of the largest verifiable safe set V(c)\n", 506 | "safe_set_fraction = [] # current safe set size approximated as a fraction of the discretization that is considered safe\n", 507 | "num_measurements = [] # number of measurements collected\n", 508 | "update_count = 0 # number of safe set updates so far\n" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "### Measurements\n", 516 | "\n", 517 | "This cell can be run repeatedly to collect more measurements." 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": null, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "data_per_update = 10 # number of measurements to collect before attempting to update the safe set\n", 527 | "safe_set_updates = 12 # number of safe set updates\n", 528 | "can_shrink = False # whether or not to \"re-verify\" known safe states as the GP model is updated, \n", 529 | " # i.e., can the safe set shrink in volume? (use \"False\" for speed, \"True\" for testing)\n", 530 | "safety_factor = 1. # scaling factor used to conservatively estimate the required adaptive refinement\n", 531 | "N_max = 16 # the maximum adaptive refinement N(x) to attempt; lower is faster, while higher allows larger safe sets to be verified \n", 532 | "\n", 533 | "for _ in range(safe_set_updates):\n", 534 | " update_count += 1\n", 535 | "# print('Iteration {} with current safe level: {}'.format(update_count, lyapunov.feed_dict[lyapunov.c_max]))\n", 536 | "\n", 537 | " # Collect measurements for the GP model\n", 538 | " start = time.time()\n", 539 | " for _ in range(data_per_update): \n", 540 | " update_gp()\n", 541 | " end = time.time()\n", 542 | " duration_gp = end - start\n", 543 | " \n", 544 | " # Update safe set\n", 545 | " start = time.time()\n", 546 | " lyapunov.update_safe_set(can_shrink, N_max, safety_factor, OPTIONS.num_cores)\n", 547 | " end = time.time()\n", 548 | " duration_lyap = end - start\n", 549 | " \n", 550 | " # Record metrics\n", 551 | " safe_level.append(lyapunov.feed_dict[lyapunov.c_max])\n", 552 | " safe_set_fraction.append(np.sum(lyapunov.safe_set) / lyapunov.discretization.nindex)\n", 553 | " if update_count == 1:\n", 554 | " num_measurements.append(data_per_update)\n", 555 | " else:\n", 556 | " num_measurements.append(num_measurements[-1] + data_per_update)\n", 557 | " \n", 558 | " print('Data points collected so far: {}'.format(num_measurements[-1]))\n", 559 | " print('Safe set size (relative to grid): {:.2f}%'.format(np.sum(100 * safe_set_fraction[-1])))\n", 560 | " print('Duration of GP update (avg): {}'.format(duration_gp / data_per_update))\n", 561 | " print('Duration of safe set update: {}'.format(duration_lyap))\n", 562 | " print(\"NEW safe level: {}\".format(lyapunov.feed_dict[lyapunov.c_max]))\n", 563 | " print('')\n" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "## Results\n", 571 | "\n", 572 | "Plot the largest verifiable safe set and the measurement points. If the discretization is adaptive, use a colormap to show how much refinement $N({\\bf x})$ was necessary to satisfy the tightened Lyapunov decrease condition." 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": null, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "grid = lyapunov.discretization\n", 582 | "feed_dict = lyapunov.feed_dict\n", 583 | "feed_dict[states] = grid.all_points\n", 584 | "feed_dict[tau] = [[lyapunov.tau]]\n", 585 | "\n", 586 | "fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=OPTIONS.dpi)\n", 587 | "fig.subplots_adjust(wspace=0.25)\n", 588 | "plot_limits = np.rad2deg(state_norm).reshape((-1, 1)) * grid.limits\n", 589 | "axes[0].set_title(r'$M = {}$'.format(grid.num_points[0] - 1)\n", 590 | " + ', ' + r'$|\\mathcal{X}_\\tau| =$ ' + r'{:.1e}'.format(grid.nindex)\n", 591 | " + ', ' + r'$\\tau =$ ' + r'{:.0e}'.format(np.sum(grid.unit_maxes) / 2))\n", 592 | "axes[0].set_xlabel(r'$\\theta$ [deg]')\n", 593 | "axes[0].set_ylabel(r'$\\omega$ [deg/s]')\n", 594 | "\n", 595 | "axes[1].step(num_measurements, safe_set_fraction, 'o', where='post')\n", 596 | "axes[1].set_xlabel(r'number of measurements')\n", 597 | "axes[1].set_ylabel(r'safe set size [% of grid]')\n", 598 | "\n", 599 | "# Decrease region for the true dynamics\n", 600 | "decrease_region = (dv.eval(feed_dict) < 0).reshape(grid.num_points)\n", 601 | "cmap = binary_cmap('lightgrey')\n", 602 | "im = axes[0].imshow(decrease_region.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=0, vmax=None)\n", 603 | "\n", 604 | "# Refinement N(x) used; colorbar shown only if the discretization is adaptive\n", 605 | "N = np.copy(lyapunov._refinement)\n", 606 | "N[N == 0] = -1 # for color only\n", 607 | "\n", 608 | "z = N.reshape(grid.num_points)\n", 609 | "cmap = plt.get_cmap('viridis', lut=N_max)\n", 610 | "cmap.set_over('gold')\n", 611 | "cmap.set_under((1., 1., 1., 0.))\n", 612 | "im = axes[0].imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=0, vmax=N_max)\n", 613 | "if OPTIONS.use_adaptive_grid:\n", 614 | " cbar = fig.colorbar(im, ax=axes[0], label=r'$N({\\bf x})$', ticks=np.arange(0, N_max + 1, 2))\n", 615 | "\n", 616 | "# Initial safe set\n", 617 | "initial_safe_set = lyapunov.initial_safe_set.reshape(grid.num_points)\n", 618 | "cmap = binary_cmap('red')\n", 619 | "im = axes[0].imshow(initial_safe_set.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=None, vmax=None)\n", 620 | "\n", 621 | "# Measurements\n", 622 | "if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):\n", 623 | " # Skip origin data point\n", 624 | " X = norms.ravel() * lyapunov.dynamics.functions[0].X[1:, :grid.ndim]\n", 625 | " axes[0].plot(X[:, 0], X[:, 1], 'x', color='pink', mew=1, ms=6)\n", 626 | "\n", 627 | "# Legend\n", 628 | "colors = ['red', 'pink', 'lightgrey']\n", 629 | "proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in colors]\n", 630 | "labels = [r'Initial safe set', r'Measurements'.format(len(X)), r'$\\Delta v({\\bf x}) < 0$']\n", 631 | "axes[0].legend(proxy, labels, loc='lower left')\n", 632 | "\n", 633 | "plt.show()\n" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": null, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [] 642 | } 643 | ], 644 | "metadata": { 645 | "kernelspec": { 646 | "display_name": "Python 3", 647 | "language": "python", 648 | "name": "python3" 649 | }, 650 | "language_info": { 651 | "codemirror_mode": { 652 | "name": "ipython", 653 | "version": 3 654 | }, 655 | "file_extension": ".py", 656 | "mimetype": "text/x-python", 657 | "name": "python", 658 | "nbconvert_exporter": "python", 659 | "pygments_lexer": "ipython3", 660 | "version": "3.6.4" 661 | } 662 | }, 663 | "nbformat": 4, 664 | "nbformat_minor": 2 665 | } 666 | --------------------------------------------------------------------------------