├── docs
    ├── introduction.rst
    ├── requirements.txt
    ├── api.rst
    ├── _templates
    │   └── template.rst
    ├── index.rst
    ├── Makefile
    ├── make.bat
    └── conf.py
├── requirements_dev.txt
├── .dockerignore
├── requirements.txt
├── .gitignore
├── .travis.yml
├── scripts
    ├── jupyter_output.py
    └── test_code.sh
├── Dockerfile.python2
├── Dockerfile.python3
├── Makefile
├── safe_learning
    ├── configuration.py
    ├── __init__.py
    ├── tests
    │   ├── test_lyapunov.py
    │   ├── test_utilities.py
    │   ├── test_rl.py
    │   └── test_functions.py
    ├── reinforcement_learning.py
    └── utilities.py
├── LICENSE
├── Dockerfile.dev
├── examples
    ├── README.rst
    ├── plotting.py
    ├── basic_dynamic_programming.ipynb
    ├── 1d_region_of_attraction_estimate.ipynb
    ├── 1d_example.ipynb
    ├── inverted_pendulum.ipynb
    ├── reinforcement_learning_cartpole.ipynb
    └── adaptive_safety_verification.ipynb
├── setup.py
└── README.rst


/docs/introduction.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 | 
4 | TODO


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | numpydoc >= 0.6
3 | sphinx_rtd_theme >= 0.1.8
4 | mock
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | *****************
3 | 
4 | .. automodule:: safe_learning
5 | 
6 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | mock
2 | flake8>=3.0,<=3.5.0
3 | pytest==4.6.9
4 | pytest-cov==2.8.1
5 | pydocstyle>=2.0,<2.1
6 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | examples
 2 | htmlcov
 3 | .travis.yml
 4 | .gitignore
 5 | .git
 6 | *.pyc
 7 | .ipynb_checkpoints
 8 | **/__pycache__
 9 | safe_learning.egg-info
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.0,<1.15
2 | scipy>=1.0.0<=1.2.1
3 | gpflow==0.4.0
4 | matplotlib<=4.0.0
5 | scs==2.0.2
6 | cvxpy>=1,<=1.0.15
7 | tensorflow>=1.6.0,<=1.12.0
8 | future<=0.18.0
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .idea
 3 | .ipynb_checkpoints
 4 | htmlcov
 5 | .coverage
 6 | .cache
 7 | safe_learning.egg-info
 8 | __pycache__
 9 | docs/safe_learning.*
10 | docs/_build
11 | *.swp
12 | *.DS_Store
13 | .pytest_cache
14 | 


--------------------------------------------------------------------------------
/docs/_templates/template.rst:
--------------------------------------------------------------------------------
1 | {{ name }}
2 | {{ underline }}
3 | 
4 | .. currentmodule:: {{ module }}
5 | .. auto{{ objtype }}:: {{ objname }}   {% if objtype == "class" %}
6 |    :members:
7 |    :inherited-members:
8 |    {% endif %}
9 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to the Safe Learning documentation!
 2 | ===========================================
 3 | 
 4 | .. include::  introduction.rst
 5 | 
 6 | .. toctree::
 7 |    :caption: Contents
 8 |    :maxdepth: 3
 9 | 
10 |    api
11 | 
12 | Indices and tables
13 | ==================
14 | 
15 | * :ref:`genindex`
16 | * :ref:`modindex`
17 | * :ref:`search`
18 | 
19 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: required
 4 | services:
 5 |   - docker
 6 | 
 7 | env:
 8 |   - PYTHON=python2
 9 |   - PYTHON=python3
10 | 
11 | # Setup anaconda
12 | install:
13 |   #  Disabled since docker pull does not affect cache
14 |   #  Fixed in Docker 1.13 with --cache-from
15 |   #  - docker pull befelix/lyapunov-learning-private:${PYTHON} || true
16 |   - docker build -f Dockerfile.${PYTHON} -t test-image .
17 |   - docker ps -a
18 | 
19 | # Run tests
20 | script:
21 |   - docker run test-image scripts/test_code.sh
22 | 
23 | 


--------------------------------------------------------------------------------
/scripts/jupyter_output.py:
--------------------------------------------------------------------------------
 1 | def scrub_output_pre_save(model, **kwargs):
 2 |     """scrub output before saving notebooks"""
 3 |     # only run on notebooks
 4 |     if model['type'] != 'notebook':
 5 |         return
 6 |     # only run on nbformat v4
 7 |     if model['content']['nbformat'] != 4:
 8 |         return
 9 | 
10 |     for cell in model['content']['cells']:
11 |         if cell['cell_type'] != 'code':
12 |             continue
13 |         cell['outputs'] = []
14 |         cell['execution_count'] = None
15 | 
16 | c.FileContentsManager.pre_save_hook = scrub_output_pre_save
17 | 


--------------------------------------------------------------------------------
/Dockerfile.python2:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda:4.5.11
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda install python=2.7 --yes --quiet \
11 |   && conda clean --yes --all \
12 |   && hash -r
13 | 
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 |   && pip install pip==18.1 \
18 |   && pip install numpy==1.14.5 \
19 |   && pip install -e .[test] --process-dependency-links \
20 |   && rm -rf /root/.cache
21 | 
22 | WORKDIR /code
23 | 


--------------------------------------------------------------------------------
/Dockerfile.python3:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:4.5.11
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda install python=3.5 --yes --quiet \
11 | #  && conda clean --yes --all \
12 |   && hash -r
13 | 
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 |   && pip install pip==18.1 \
18 |   && pip install numpy==1.14.5 \
19 |   && pip install -e .[test] --process-dependency-links \
20 |   && rm -rf /root/.cache
21 | 
22 | WORKDIR /code
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SafeLearning
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help
 2 | 
 3 | help:
 4 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 5 | 
 6 | doc: ## Build documentation (docs/_build/html/index.html)
 7 | 	cd docs && $(MAKE) html
 8 | 
 9 | coverage: ## Construct coverage (htmlcov/index.html)
10 | 	coverage html
11 | 
12 | test-local: ## Test the local installation of the code
13 | 	./scripts/test_code.sh
14 | 
15 | test: docker ## Test the docker images
16 | 	docker run safe_learning_py2 make test-local
17 | 	docker run safe_learning_py3 make test-local
18 | 
19 | dev: ## Mount current code as volume and run jupyterlab for development
20 | 	docker build -f Dockerfile.dev -t safe_learning_dev .
21 | 	docker run -p 8888:8888 -v $(shell pwd):/code safe_learning_dev
22 | 
23 | docker: ## Build the docker images
24 | 	docker build -f Dockerfile.python2 -t safe_learning_py2 .
25 | 	docker build -f Dockerfile.python3 -t safe_learning_py3 .
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=SafeLearning
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/safe_learning/configuration.py:
--------------------------------------------------------------------------------
 1 | """General configuration class for dtypes."""
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | class Configuration(object):
 9 |     """Configuration class."""
10 | 
11 |     def __init__(self):
12 |         """Initialization."""
13 |         super(Configuration, self).__init__()
14 | 
15 |         # Dtype for computations
16 |         self.dtype = tf.float64
17 | 
18 |         # Batch size for stability verification
19 |         self.gp_batch_size = 10000
20 | 
21 |     @property
22 |     def np_dtype(self):
23 |         """Return the numpy dtype."""
24 |         return self.dtype.as_numpy_dtype
25 | 
26 |     def __repr__(self):
27 |         """Print the parameters."""
28 |         params = ['Configuration parameters:', '']
29 |         for param, value in self.__dict__.items():
30 |             params.append('{}: {}'.format(param, value.__repr__()))
31 | 
32 |         return '\n'.join(params)
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Felix Berkenkamp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scripts/test_code.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | module="safe_learning"
 4 | 
 5 | get_script_dir () {
 6 |      SOURCE="${BASH_SOURCE[0]}"
 7 |      # While $SOURCE is a symlink, resolve it
 8 |      while [ -h "$SOURCE" ]; do
 9 |           DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 |           SOURCE="$( readlink "$SOURCE" )"
11 |           # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 |           [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 |      done
14 |      DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 |      echo "$DIR"
16 | }
17 | 
18 | # Change to script root
19 | cd $(get_script_dir)/..
20 | GREEN='\033[0;32m'
21 | NC='\033[0m'
22 | 
23 | # Run style tests
24 | echo -e "${GREEN}Running style tests.${NC}"
25 | flake8 $module --exclude test*.py,__init__.py --ignore=E402,E731,W503 --show-source || { exit 1; }
26 | 
27 | # Ignore import errors for __init__ and tests
28 | flake8 $module --filename=__init__.py,test*.py --ignore=F,E402,W503 --show-source || { exit 1; }
29 | 
30 | echo -e "${GREEN}Testing docstring conventions.${NC}"
31 | # Test docstring conventions
32 | pydocstyle $module --convention=numpy || { exit 1; }
33 | 
34 | # Run unit tests
35 | echo -e "${GREEN}Running unit tests.${NC}"
36 | pytest --doctest-modules --cov --cov-fail-under=80 $module || { exit 1; }
37 | 
38 | 


--------------------------------------------------------------------------------
/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda update conda --yes --quiet \
11 |   && conda install python=3.5 pip numpy scipy pandas --yes --quiet \
12 |   && conda clean --yes --all \
13 |   && hash -r
14 | 
15 | # Get the requirements files (seperate from the main body)
16 | COPY requirements.txt requirements_dev.txt /reqs/
17 | 
18 | # Install requirements and clean up
19 | RUN pip --no-cache-dir install -r /reqs/requirements.txt \
20 |   && pip --no-cache-dir install -r /reqs/requirements_dev.txt \
21 |   && pip install jupyter jupyterlab dumb-init \
22 |   && rm -rf /root/.cache \
23 |   && rm -rf /reqs
24 | 
25 | # Manually install GPflow and clean up
26 | RUN git clone --depth=1 --branch=0.4.0 https://github.com/GPflow/GPflow.git \
27 |   && cd GPflow \
28 |   && python setup.py install \
29 |   && rm -rf /GPflow
30 | 
31 | # Output scrubber for jupyter
32 | ADD scripts/jupyter_output.py /
33 | 
34 | RUN jupyter notebook --generate-config \
35 |   && cat /jupyter_output.py >> /root/.jupyter/jupyter_notebook_config.py \
36 |   && rm /jupyter_output.py
37 | 
38 | WORKDIR /code
39 | 
40 | # Make sure Ctrl+C commands can be forwarded
41 | ENTRYPOINT ["dumb-init", "--"]
42 | 
43 | CMD python setup.py develop \
44 |   && jupyter lab --ip="0.0.0.0" --no-browser --allow-root
45 | 


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
 1 | Example notebooks for the library
 2 | =================================
 3 | 
 4 | Introductions
 5 | -------------
 6 | - `1d_region_of_attraction_estimate.ipynb <./1d_region_of_attraction_estimate.ipynb>`_ shows how to estimate and learn the region of attraction for a fixed policy.
 7 | - `basic_dynamic_programming.ipynb <./basic_dynamic_programming.ipynb>`_ does basic dynamic programming with piecewise linear function approximators for the mountain car example.
 8 | - `reinforcement_learning_pendulum.ipynb <./reinforcement_learning_pendulum.ipynb>`_ does approximate policy iteration in an actor-critic framework with neural networks for the inverted pendulum.
 9 | - `reinforcement_learning_cartpole.ipynb <./reinforcement_learning_cartpole.ipynb>`_ does the same as above for the cart-pole (i.e., the inverted pendulum on a cart).
10 | 
11 | Experiments
12 | -----------
13 | - `1d_example.ipynb <./1d_example.ipynb>`_ contains a 1D example including plots of the sets.
14 | - `inverted_pendulum.ipynb <./inverted_pendulum.ipynb>`_ contains a full neural network example with an inverted pendulum.
15 | - `adaptive_safety_verification.ipynb <./adaptive_safety_verification.ipynb>`_ investigates the benefits of an adaptive discretization in identifying safe sets for the inverted pendulum.
16 | - `lyapunov_function_learning.ipynb <./lyapunov_function_learning.ipynb>`_ demonstrates how a parameterized Lyapunov candidate for the inverted pendulum can be trained with the machine learning approach in [1]_.
17 | 
18 | .. [1] S. M. Richards, F. Berkenkamp, A. Krause,
19 |   `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems <https://arxiv.org/abs/1808.00924>`_. Conference on Robot Learning (CoRL), 2018.
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from setuptools.command.test import test as TestCommand
 3 | import sys
 4 | import pip
 5 | 
 6 | class PyTest(TestCommand):
 7 |     user_options = [('pytest-args=', 'a', "Arguments to pass to pytest")]
 8 | 
 9 |     def initialize_options(self):
10 |         TestCommand.initialize_options(self)
11 |         self.pytest_args = ''
12 | 
13 |     def run_tests(self):
14 |         import shlex
15 |         #import here, cause outside the eggs aren't loaded
16 |         import pytest
17 |         errno = pytest.main(shlex.split(self.pytest_args))
18 |         sys.exit(errno)
19 | 
20 | with open('requirements.txt', 'r') as f:
21 |     requirements = f.read().splitlines()
22 | 
23 | with open('requirements_dev.txt', 'r') as f:
24 |     test_requirements = f.read().splitlines()
25 | 
26 | setup(
27 |     name="safe_learning",
28 |     version="0.0.1",
29 |     author="Felix Berkenkamp",
30 |     author_email="fberkenkamp@gmail.com",
31 |     description=("An demonstration of how to create, document, and publish "
32 |                   "to the cheese shop a5 pypi.org."),
33 |     license="MIT",
34 |     keywords="safe reinforcement learning Lyapunov",
35 |     url="https://github.com/befelix/lyapunov-learning",
36 |     packages=find_packages(exclude=['docs']),
37 |     setup_requires=['numpy'],
38 |     install_requires=requirements,
39 |     extras_require={'test': list(test_requirements)},
40 |     tests_require=test_requirements,
41 |     dependency_links=['git+https://github.com/GPflow/GPflow.git@0.4.0#egg=gpflow-0.4.0'],
42 |     cmdclass={'test': PyTest},
43 |     classifiers=[
44 |         # How mature is this project? Common values are
45 |         #   3 - Alpha
46 |         #   4 - Beta
47 |         #   5 - Production/Stable
48 |         'Development Status :: 3 - Alpha',
49 | 
50 |         # Indicate who your project is intended for
51 |         'Intended Audience :: Developers',
52 |         'Topic :: Software Development :: Build Tools',
53 |         'License :: OSI Approved :: MIT License',
54 |         'Programming Language :: Python :: 2',
55 |         'Programming Language :: Python :: 2.7',
56 |         'Programming Language :: Python :: 3',
57 |         'Programming Language :: Python :: 3.5',
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | =====================================================
 2 | Safe Reinforcement Learning with Stability Guarantees
 3 | =====================================================
 4 | 
 5 | .. image:: https://travis-ci.org/befelix/safe_learning.svg?branch=master
 6 |     :target: https://travis-ci.org/befelix/safe_learning
 7 |     :alt: Build status
 8 | .. image:: https://readthedocs.org/projects/safe-learning/badge/?version=latest
 9 |     :target: http://safe-learning.readthedocs.io/en/latest/?badge=latest
10 |     :alt: Documentation Status
11 | 
12 | This code accompanies the paper [1]_ and implements the code for estimating the region of attraction for a policy and optimizing the policy subject to stability constraints. For the old numpy-based code to estimate the region of attraction in [2]_ see the `lyapunov-learning <https://github.com/befelix/lyapunov-learning>`_ repository. The code for learning Lyapunov functions from [3]_ can be found in the `examples <./examples>`_ folder.
13 | 
14 | .. [1] F. Berkenkamp, M. Turchetta, A. P. Schoellig, A. Krause,
15 |   `Safe Model-based Reinforcement Learning with Stability Guarantees <http://arxiv.org/abs/1509.01066>`_
16 |   in Proc. of the Conference on Neural Information Processing Systems (NIPS), 2017.
17 | 
18 | .. [2] F. Berkenkamp, R. Moriconi, A. P. Schoellig, A. Krause,
19 |   `Safe Learning of Regions of Attraction in Uncertain, Nonlinear Systems with Gaussian Processes <http://arxiv.org/abs/1603.04915>`_
20 |   in Proc. of the Conference on Decision and Control (CDC), 2016.
21 | 
22 | .. [3] S. M. Richards, F. Berkenkamp, A. Krause,
23 |   `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems <https://arxiv.org/abs/1808.00924>`_. Conference on Robot Learning (CoRL), 2018.
24 | 
25 | Getting started
26 | ---------------
27 | 
28 | This library is tested based on both python 2.7 and 3.5, together with the following dependencies, since ``pip>=19`` does not support ``--process-dependency-links`` (see below)
29 | 
30 | ::
31 | 
32 |   pip install pip==18.1
33 |   pip install numpy==1.14.5
34 | 
35 | 
36 | Based on this, you can install the library by cloning the repository and installing it with
37 | 
38 | ``pip install . --process-dependency-links``
39 | 
40 | To run the tests with the bash script in ``scripts/test_code.sh``, you need to install additional dependencies with
41 | 
42 | ``pip install ".[test]" --process-dependency-links``
43 | 
44 | The ``--process-dependency-links`` flag is needed to install ``gpflow==0.4.0``, which is not on pypi. You can skip it if that particular version of the library is already installed.
45 | 
46 | You can the find example jupyter notebooks and the experiments in the paper in the `examples <./examples>`_ folder.
47 | 
48 | 


--------------------------------------------------------------------------------
/safe_learning/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The `safeopt` package implements tools for Safe Bayesian optimization.
  3 | 
  4 | Stability verification
  5 | ----------------------
  6 | 
  7 | The :class:`Lyapunov` class provides the main point of entry for the stability
  8 | analysis. It can be used to compute the region of attraction and together with
  9 | :func:`get_safe_sample` sets up the safe sampling scheme.
 10 | 
 11 | .. autosummary::
 12 | 
 13 |    :template: template.rst
 14 |    :toctree:
 15 | 
 16 |    Lyapunov
 17 |    get_safe_sample
 18 |    smallest_boundary_value
 19 |    get_lyapunov_region
 20 | 
 21 | 
 22 | Approximate Dynamics Programming
 23 | --------------------------------
 24 | 
 25 | We use approximate dynamics programming to compute value functions.
 26 | 
 27 | .. autosummary::
 28 | 
 29 |    :template: template.rst
 30 |    :toctree:
 31 | 
 32 |    PolicyIteration
 33 | 
 34 | 
 35 | Functions
 36 | ---------
 37 | 
 38 | These are generic function classes for convenience. They are all compatible
 39 | with :class:`Lyapunov` and :class:`PolicyIteration` and can be added,
 40 | multiplied, and stacked as needed.
 41 | 
 42 | .. autosummary::
 43 | 
 44 |    :template: template.rst
 45 |    :toctree:
 46 | 
 47 |    GridWorld
 48 |    FunctionStack
 49 |    Triangulation
 50 |    PiecewiseConstant
 51 |    LinearSystem
 52 |    QuadraticFunction
 53 |    Saturation
 54 |    NeuralNetwork
 55 |    GaussianProcess
 56 |    GPRCached
 57 |    sample_gp_function
 58 | 
 59 | 
 60 | Utilities
 61 | ---------
 62 | 
 63 | These are utilities to make working with tensorflow more pleasant.
 64 | 
 65 | .. autosummary::
 66 | 
 67 |    :template: template.rst
 68 |    :toctree:
 69 | 
 70 |    utilities.combinations
 71 |    utilities.linearly_spaced_combinations
 72 |    utilities.lqr
 73 |    utilities.dlqr
 74 |    utilities.ellipse_bounds
 75 |    utilities.concatenate_inputs
 76 |    utilities.make_tf_fun
 77 |    utilities.with_scope
 78 |    utilities.use_parent_scope
 79 |    utilities.add_weight_constraint
 80 |    utilities.batchify
 81 |    utilities.get_storage
 82 |    utilities.set_storage
 83 |    utilities.unique_rows
 84 |    utilities.gradient_clipping
 85 | 
 86 | """
 87 | 
 88 | from __future__ import absolute_import
 89 | 
 90 | # Add the configuration settings
 91 | from .configuration import Configuration
 92 | config = Configuration()
 93 | del Configuration
 94 | 
 95 | from .functions import *
 96 | from .lyapunov import *
 97 | from .reinforcement_learning import *
 98 | from . import utilities
 99 | 
100 | try:
101 |     from pytest import main as run_tests
102 | except ImportError:
103 |     def run_tests():
104 |         """Run the test package."""
105 |         raise ImportError('Testing requires the pytest package.')
106 | 


--------------------------------------------------------------------------------
/safe_learning/tests/test_lyapunov.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for the Lyapunov functions."""
 2 | 
 3 | from __future__ import division, print_function, absolute_import
 4 | 
 5 | from numpy.testing import assert_allclose, assert_equal
 6 | import pytest
 7 | import unittest
 8 | import numpy as np
 9 | import tensorflow as tf
10 | import sys
11 | 
12 | from safe_learning.functions import (LinearSystem, GridWorld)
13 | from safe_learning.lyapunov import (Lyapunov, smallest_boundary_value)
14 | 
15 | if sys.version_info.major <= 2:
16 |     import mock
17 | else:
18 |     from unittest import mock
19 | 
20 | 
21 | class TestLyapunov(object):
22 |     """Test the Lyapunov base class."""
23 | 
24 |     def test_safe_set_init(self):
25 |         """Test the safe set initialization."""
26 |         with tf.Session():
27 |             discretization = GridWorld([[0, 1], [0, 1]], 3)
28 |             lyap_fun = lambda x: tf.reduce_sum(tf.square(x), axis=1)
29 | 
30 |             dynamics = LinearSystem(np.array([[1, 0.01],
31 |                                               [0., 1.]]))
32 |             lf = 0.4
33 |             lv = 0.3
34 |             eps = 0.5
35 | 
36 |             policy = lambda x: 0. * x
37 |             lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv,
38 |                             eps, policy)
39 | 
40 |             initial_set = [1, 3]
41 |             lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv,
42 |                             eps, policy, initial_set=initial_set)
43 | 
44 |             initial_set = np.array([False, True, False, True, False,
45 |                                     False, False, False, False])
46 |             assert_equal(initial_set, lyap.safe_set)
47 | 
48 |     def test_update(self):
49 |         """Test the update step."""
50 |         with tf.Session():
51 |             discretization = GridWorld([[-1, 1]], 3)
52 |             lyap_fun = lambda x: tf.reduce_sum(tf.square(x),
53 |                                                axis=1,
54 |                                                keep_dims=True)
55 |             policy = lambda x: -.1 * x
56 | 
57 |             dynamics = LinearSystem(np.array([[1, 1.]]))
58 |             lf = 0.4
59 |             lv = 0.3
60 |             eps = .5
61 | 
62 |             initial_set = [1]
63 | 
64 |             lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv,
65 |                             eps, policy, initial_set=initial_set)
66 | 
67 |             lyap.update_safe_set()
68 |             assert_equal(lyap.safe_set, np.array([False, True, False]))
69 | 
70 |             eps = 0.
71 |             lyap = Lyapunov(discretization, lyap_fun, dynamics, lf, lv,
72 |                             eps, policy, initial_set=initial_set)
73 |             lyap.update_safe_set()
74 |             assert_equal(lyap.safe_set, np.ones(3, dtype=np.bool))
75 | 
76 | 
77 | def test_smallest_boundary_value():
78 |     """Test the boundary value function."""
79 |     with tf.Session():
80 |         fun = lambda x: 2 * tf.reduce_sum(tf.abs(x), axis=1)
81 |         discretization = GridWorld([[-1.5, 1], [-1, 1.5]], [3, 3])
82 |         min_value = smallest_boundary_value(fun, discretization)
83 |         assert min_value == 2.5
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     unittest.main()
88 | 


--------------------------------------------------------------------------------
/safe_learning/tests/test_utilities.py:
--------------------------------------------------------------------------------
  1 | """Test the utilities."""
  2 | 
  3 | from __future__ import absolute_import, print_function, division
  4 | 
  5 | import pytest
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from numpy.testing import assert_allclose
  9 | 
 10 | from safe_learning.utilities import (dlqr, get_storage, set_storage,
 11 |                                      get_feed_dict, unique_rows,
 12 |                                      compute_trajectory)
 13 | 
 14 | from safe_learning import LinearSystem
 15 | 
 16 | 
 17 | def test_dlqr():
 18 |     """Test the dlqr function."""
 19 |     true_k = np.array([[0.61803399]])
 20 |     true_p = np.array([[1.61803399]])
 21 | 
 22 |     k, p = dlqr(1, 1, 1, 1)
 23 |     assert_allclose(k, true_k)
 24 |     assert_allclose(p, true_p)
 25 | 
 26 |     k, p = dlqr([[1]], [[1]], [[1]], [[1]])
 27 |     assert_allclose(k, true_k)
 28 |     assert_allclose(p, true_p)
 29 | 
 30 | 
 31 | class TestStorage(object):
 32 |     """Test the class storage."""
 33 | 
 34 |     @pytest.fixture
 35 |     def sample_class(self):
 36 |         """Sample class for testing."""
 37 |         class A(object):
 38 |             """Some class."""
 39 | 
 40 |             def __init__(self):
 41 |                 """Initialize."""
 42 |                 super(A, self).__init__()
 43 |                 self.storage = {}
 44 | 
 45 |             def method(self, value, index=None):
 46 |                 storage = get_storage(self.storage, index=index)
 47 |                 set_storage(self.storage, [('value', value)], index=index)
 48 |                 return storage
 49 | 
 50 |         return A()
 51 | 
 52 |     def test_storage(self, sample_class):
 53 |         """Test the storage."""
 54 |         storage = sample_class.method(5)
 55 |         assert storage is None
 56 |         storage = sample_class.method(4)
 57 |         assert storage['value'] == 5
 58 |         storage = sample_class.method(None)
 59 |         assert storage['value'] == 4
 60 | 
 61 |         # Test index
 62 |         storage = sample_class.method(3, index='test')
 63 |         assert storage is None
 64 |         storage = sample_class.method(4, index='test')
 65 |         assert storage['value'] == 3
 66 |         storage = sample_class.method(3, index='test2')
 67 |         assert storage is None
 68 |         storage = sample_class.method(3, index='test')
 69 |         assert storage['value'] is 4
 70 | 
 71 | 
 72 | def test_get_feed_dict():
 73 |     """Test the global get_feed_dict method."""
 74 |     graph = tf.Graph()
 75 |     feed_dict = get_feed_dict(graph)
 76 |     # Initialized new dictionary
 77 |     assert feed_dict == {}
 78 | 
 79 |     # Test assignment
 80 |     feed_dict['test'] = 5
 81 | 
 82 |     # Make sure we keep getting the same object
 83 |     assert feed_dict is get_feed_dict(graph)
 84 | 
 85 | 
 86 | def test_unique_rows():
 87 |     """Test the unique_rows function."""
 88 |     a = np.array([[1, 1], [1, 2], [1, 3], [1, 2], [1, 3], [1, 4], [2, 3]])
 89 |     uniques = np.array([[1, 1], [1, 2], [1, 3], [1, 4], [2, 3]])
 90 | 
 91 |     assert_allclose(unique_rows(a), uniques)
 92 | 
 93 | 
 94 | def test_compute_trajectory():
 95 |     """Test the compute_trajectory function."""
 96 |     A = np.array([[1., 0.1],
 97 |                   [0., 1.]])
 98 |     B = np.array([[0.01],
 99 |                   [0.1]])
100 | 
101 |     dynamics = LinearSystem((A, B))
102 |     Q = np.diag([1., 0.01])
103 |     R = np.array([[0.01]])
104 |     K, _ = dlqr(A, B, Q, R)
105 |     policy = LinearSystem([-K])
106 | 
107 |     x0 = np.array([[0.1, 0.]])
108 |     with tf.Session() as sess:
109 |         res = compute_trajectory(dynamics, policy, x0, num_steps=20)
110 | 
111 |     states, actions = res
112 |     assert_allclose(states[[0], :], x0)
113 |     assert_allclose(states[-1, :], np.array([0., 0.]), atol=0.01)
114 |     assert_allclose(actions, states[:-1].dot(-K.T))


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Safe Learning documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue May 23 07:02:08 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | from __future__ import absolute_import
 21 | 
 22 | import sys
 23 | import os
 24 | import shlex
 25 | import mock
 26 | 
 27 | MOCK_MODULES = ['tensorflow',
 28 |                 'gpflow',
 29 |                 'future',
 30 |                 'future.builtins',
 31 |                 'future.backports',
 32 |                 'mpl_toolkits',
 33 |                 'mpl_toolkits.mplot3d',
 34 |                 'matplotlib',
 35 |                 'matplotlib.pyplot',
 36 |                 'numpy',
 37 |                 'scipy',
 38 |                 'scipy.interpolate',
 39 |                 'scipy.spatial',
 40 |                 'scipy.linalg',
 41 |                 'scipy.spatial.distance',
 42 |                 'scipy.special',
 43 |                 'scipy.stats',
 44 |                 ]
 45 | 
 46 | for mod_name in MOCK_MODULES:
 47 |     sys.modules[mod_name] = mock.Mock()
 48 | 
 49 | sys.path.insert(0, os.path.abspath('../'))
 50 | # -- General configuration ------------------------------------------------
 51 | 
 52 | # If your documentation needs a minimal Sphinx version, state it here.
 53 | #
 54 | # needs_sphinx = '1.0'
 55 | 
 56 | # Add any Sphinx extension module names here, as strings. They can be
 57 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 58 | # ones.
 59 | extensions = ['sphinx.ext.autodoc',
 60 |               'numpydoc',
 61 |               'sphinx.ext.autosummary']
 62 | 
 63 | # Add any paths that contain templates here, relative to this directory.
 64 | templates_path = ['_templates']
 65 | 
 66 | # Generate an autosummary with one file per function.
 67 | autosummary_generate = True
 68 | autodoc_default_flags = []
 69 | 
 70 | # The suffix(es) of source filenames.
 71 | # You can specify multiple suffix as a list of string:
 72 | #
 73 | # source_suffix = ['.rst', '.md']
 74 | source_suffix = '.rst'
 75 | 
 76 | # The master toctree document.
 77 | master_doc = 'index'
 78 | 
 79 | # General information about the project.
 80 | project = 'Safe Learning'
 81 | copyright = '2017, Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause'
 82 | author = 'Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause'
 83 | 
 84 | # The version info for the project you're documenting, acts as replacement for
 85 | # |version| and |release|, also used in various other places throughout the
 86 | # built documents.
 87 | #
 88 | # The short X.Y version.
 89 | version = '0.1'
 90 | # The full version, including alpha/beta/rc tags.
 91 | release = '0.1'
 92 | 
 93 | # The language for content autogenerated by Sphinx. Refer to documentation
 94 | # for a list of supported languages.
 95 | #
 96 | # This is also used if you do content translation via gettext catalogs.
 97 | # Usually you set "language" from the command line for these cases.
 98 | language = None
 99 | 
100 | # List of patterns, relative to source directory, that match files and
101 | # directories to ignore when looking for source files.
102 | # This patterns also effect to html_static_path and html_extra_path
103 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
104 | 
105 | # The name of the Pygments (syntax highlighting) style to use.
106 | pygments_style = 'sphinx'
107 | 
108 | # If true, `todo` and `todoList` produce output, else they produce nothing.
109 | todo_include_todos = False
110 | 
111 | 
112 | # -- Options for HTML output ----------------------------------------------
113 | 
114 | # The theme to use for HTML and HTML Help pages.  See the documentation for
115 | # a list of builtin themes.
116 | #
117 | html_theme = 'sphinx_rtd_theme'
118 | 
119 | # Theme options are theme-specific and customize the look and feel of a theme
120 | # further.  For a list of options available for each theme, see the
121 | # documentation.
122 | #
123 | # html_theme_options = {}
124 | 
125 | # Add any paths that contain custom static files (such as style sheets) here,
126 | # relative to this directory. They are copied after the builtin static files,
127 | # so a file named "default.css" will overwrite the builtin "default.css".
128 | html_static_path = ['_static']
129 | 
130 | 
131 | # -- Options for HTMLHelp output ------------------------------------------
132 | 
133 | # Output file base name for HTML help builder.
134 | htmlhelp_basename = 'SafeLearningdoc'
135 | 
136 | 
137 | # -- Options for LaTeX output ---------------------------------------------
138 | 
139 | latex_elements = {
140 |     # The paper size ('letterpaper' or 'a4paper').
141 |     #
142 |     # 'papersize': 'letterpaper',
143 | 
144 |     # The font size ('10pt', '11pt' or '12pt').
145 |     #
146 |     # 'pointsize': '10pt',
147 | 
148 |     # Additional stuff for the LaTeX preamble.
149 |     #
150 |     # 'preamble': '',
151 | 
152 |     # Latex figure (float) alignment
153 |     #
154 |     # 'figure_align': 'htbp',
155 | }
156 | 
157 | # Grouping the document tree into LaTeX files. List of tuples
158 | # (source start file, target name, title,
159 | #  author, documentclass [howto, manual, or own class]).
160 | latex_documents = [
161 |     (master_doc, 'SafeLearning.tex', 'Safe Learning Documentation',
162 |      'Felix Berkenkamp, Matteo Turchetta, Angela P. Schoellig, Andreas Krause', 'manual'),
163 | ]
164 | 
165 | 
166 | # -- Options for manual page output ---------------------------------------
167 | 
168 | # One entry per manual page. List of tuples
169 | # (source start file, name, description, authors, manual section).
170 | man_pages = [
171 |     (master_doc, 'safelearning', 'Safe Learning Documentation',
172 |      [author], 1)
173 | ]
174 | 
175 | 
176 | # -- Options for Texinfo output -------------------------------------------
177 | 
178 | # Grouping the document tree into Texinfo files. List of tuples
179 | # (source start file, target name, title, author,
180 | #  dir menu entry, description, category)
181 | texinfo_documents = [
182 |     (master_doc, 'SafeLearning', 'Safe Learning Documentation',
183 |      author, 'SafeLearning', 'One line description of project.',
184 |      'Miscellaneous'),
185 | ]
186 | 
187 | 
188 | 
189 | 


--------------------------------------------------------------------------------
/safe_learning/tests/test_rl.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for treinforcement learning."""
  2 | 
  3 | from __future__ import division, print_function, absolute_import
  4 | 
  5 | from numpy.testing import assert_allclose
  6 | import sys
  7 | import pytest
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | import scipy.linalg
 11 | from safe_learning.utilities import dlqr
 12 | 
 13 | from safe_learning import (PolicyIteration, Triangulation, GridWorld,
 14 |                            QuadraticFunction, LinearSystem)
 15 | 
 16 | if sys.version_info.major <= 2:
 17 |     import mock
 18 | else:
 19 |     from unittest import mock
 20 | 
 21 | try:
 22 |     import cvxpy
 23 | except ImportError:
 24 |     cvxpy = None
 25 | 
 26 | 
 27 | class TestPolicyIteration(object):
 28 |     """Test the policy iteration."""
 29 |     def test_integration(self):
 30 |         """Test the values."""
 31 |         with tf.Session(graph=tf.Graph()) as sess:
 32 |             a = np.array([[1.2]])
 33 |             b = np.array([[0.9]])
 34 |             q = np.array([[1]])
 35 |             r = np.array([[0.1]])
 36 | 
 37 |             k, p = dlqr(a, b, q, r)
 38 |             true_value = QuadraticFunction(-p)
 39 | 
 40 |             discretization = GridWorld([[-1, 1]], 19)
 41 |             value_function = Triangulation(discretization,
 42 |                                            0. * discretization.all_points,
 43 |                                            project=True)
 44 | 
 45 |             dynamics = LinearSystem((a, b))
 46 | 
 47 |             policy_discretization = GridWorld([-1, 1], 5)
 48 |             policy = Triangulation(policy_discretization,
 49 |                                    -k / 2 * policy_discretization.all_points)
 50 |             reward_function = QuadraticFunction(-scipy.linalg.block_diag(q, r))
 51 | 
 52 |             rl = PolicyIteration(policy,
 53 |                                  dynamics,
 54 |                                  reward_function,
 55 |                                  value_function)
 56 | 
 57 |             value_iter = rl.value_iteration()
 58 | 
 59 |             loss = -tf.reduce_sum(rl.future_values(rl.state_space))
 60 |             optimizer = tf.train.GradientDescentOptimizer(0.01)
 61 |             adapt_policy = optimizer.minimize(loss,
 62 |                                               var_list=rl.policy.parameters)
 63 | 
 64 |             sess.run(tf.global_variables_initializer())
 65 | 
 66 |             for _ in range(10):
 67 |                 sess.run(value_iter)
 68 |                 for _ in range(5):
 69 |                     sess.run(adapt_policy)
 70 | 
 71 |             values = rl.value_function.parameters[0].eval()
 72 |             true_values = true_value(rl.state_space).eval()
 73 |             policy_values = rl.policy.parameters[0].eval()
 74 | 
 75 |         assert_allclose(values, true_values, atol=0.1)
 76 |         assert_allclose(policy_values, -k * policy_discretization.all_points,
 77 |                         atol=0.1)
 78 |         #
 79 |         # assert(max_error < disc_error)
 80 |         # assert_allclose(rl.values, value_function.parameters[:, 0])
 81 | 
 82 |     @pytest.mark.skipif(cvxpy is None, reason='Cvxpy is not installed.')
 83 |     def test_optimization(self):
 84 |         """Test the value function optimization."""
 85 |         dynamics = mock.Mock()
 86 |         dynamics.return_value = np.arange(4, dtype=np.float)[:, None]
 87 | 
 88 |         rewards = mock.Mock()
 89 |         rewards.return_value = np.arange(4, dtype=np.float)[:, None]
 90 | 
 91 |         # transition probabilities
 92 |         trans_probs = np.array([[0, .5, .5, 0],
 93 |                                 [.2, .1, .3, .5],
 94 |                                 [.3, .2, .4, .1],
 95 |                                 [0, 0, 0, 1]],
 96 |                                dtype=np.float)
 97 | 
 98 |         value_function = mock.Mock()
 99 |         value_function.tri.parameter_derivative.return_value = trans_probs
100 |         value_function.nindex = 4
101 |         value_function.parameters = [tf.Variable(np.zeros((4, 1),
102 |                                                           dtype=np.float))]
103 | 
104 |         states = np.arange(4, dtype=np.float)[:, None]
105 |         value_function.discretization.all_points = states
106 | 
107 |         policy = mock.Mock()
108 |         policy.return_value = 'actions'
109 | 
110 |         rl = PolicyIteration(policy,
111 |                              dynamics,
112 |                              rewards,
113 |                              value_function)
114 | 
115 |         true_values = np.linalg.solve(np.eye(4) - rl.gamma * trans_probs,
116 |                                       rewards.return_value.ravel())[:, None]
117 | 
118 |         with tf.Session() as sess:
119 |             sess.run(tf.variables_initializer(value_function.parameters))
120 |             sess.run(rl.optimize_value_function())
121 |             values = rl.value_function.parameters[0].eval()
122 | 
123 |         # Confirm result
124 |         assert_allclose(values, true_values)
125 | 
126 |         dynamics.assert_called_with(rl.state_space, 'actions')
127 |         rewards.assert_called_with(rl.state_space, 'actions')
128 | 
129 |         # rl.terminal_states = np.array([0, 0, 0, 1], dtype=np.bool)
130 |         # rl.optimize_value_function()
131 |         #
132 |         # trans_probs2 = np.array([[0, .5, .5, 0, 0],
133 |         #                          [.2, .1, .3, .5, 0],
134 |         #                          [.3, .2, .4, .1, 0],
135 |         #                          [0, 0, 0, 0, 1],
136 |         #                          [0, 0, 0, 0, 1]],
137 |         #                         dtype=np.float)
138 |         # rewards2 = np.zeros(5)
139 |         # rewards2[:4] = rewards()
140 |         # true_values = np.linalg.solve(np.eye(5) - rl.gamma * trans_probs2,
141 |         #                               rewards2)
142 |         #
143 |         # assert_allclose(rl.values, true_values[:4])
144 | 
145 |     def test_future_values(self):
146 |         """Test future values."""
147 |         dynamics = mock.Mock()
148 |         dynamics.return_value = 'next_states'
149 | 
150 |         rewards = mock.Mock()
151 |         rewards.return_value = np.arange(4, dtype=np.float)[:, None]
152 | 
153 |         value_function = mock.Mock()
154 |         value_function.return_value = np.arange(4, dtype=np.float)[:, None]
155 |         value_function.discretization.all_points = \
156 |             np.arange(4, dtype=np.float)[:, None]
157 | 
158 |         policy = mock.Mock()
159 |         policy.return_value = 'actions'
160 | 
161 |         rl = PolicyIteration(policy,
162 |                              dynamics,
163 |                              rewards,
164 |                              value_function)
165 | 
166 |         true_values = np.arange(4, dtype=np.float)[:, None] * (1 + rl.gamma)
167 | 
168 |         future_values = rl.future_values('states')
169 | 
170 |         dynamics.assert_called_with('states', 'actions')
171 |         rewards.assert_called_with('states', 'actions')
172 |         assert_allclose(future_values, true_values)
173 | 
174 |         # rl.terminal_states = np.array([0, 0, 0, 1], dtype=np.bool)
175 |         # future_values = rl.get_future_values(rl.policy)
176 |         # true_values[rl.terminal_states] = rewards()[rl.terminal_states]
177 |         #
178 |         # assert_allclose(future_values, true_values)
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     pytest.main()
183 | 


--------------------------------------------------------------------------------
/examples/plotting.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import tensorflow as tf
  4 | from IPython.display import display, HTML
  5 | from mpl_toolkits.mplot3d import Axes3D
  6 | 
  7 | from safe_learning.utilities import (with_scope, get_storage, set_storage,
  8 |                                      get_feed_dict)
  9 | 
 10 | 
 11 | __all__ = ['plot_lyapunov_1d', 'plot_triangulation', 'show_graph']
 12 | 
 13 | 
 14 | # An object to store graph elements
 15 | _STORAGE = {}
 16 | 
 17 | 
 18 | @with_scope('plot_lyapunov_1d')
 19 | def plot_lyapunov_1d(lyapunov, true_dynamics, legend=False):
 20 |     """Plot the lyapunov function of a 1D system
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     lyapunov : instance of `Lyapunov`
 25 |     true_dynamics : callable
 26 |     legend : bool, optional
 27 |     """
 28 |     sess = tf.get_default_session()
 29 |     feed_dict = get_feed_dict(sess.graph)
 30 | 
 31 |     # Get the storage (specific to the lyapunov function)
 32 |     storage = get_storage(_STORAGE, index=lyapunov)
 33 | 
 34 |     if storage is None:
 35 |         # Lyapunov function
 36 |         states = lyapunov.discretization.all_points
 37 |         actions = lyapunov.policy(states)
 38 |         next_states = lyapunov.dynamics(states, actions)
 39 |         v_bounds = lyapunov.v_decrease_confidence(states, next_states)
 40 |         true_next_states = true_dynamics(states, actions, noise=False)
 41 |         delta_v_true, _ = lyapunov.v_decrease_confidence(states,
 42 |                                                          true_next_states)
 43 | 
 44 |         storage = [('states', states),
 45 |                    ('next_states', next_states),
 46 |                    ('v_bounds', v_bounds),
 47 |                    ('true_next_states', true_next_states),
 48 |                    ('delta_v_true', delta_v_true)]
 49 |         set_storage(_STORAGE, storage, index=lyapunov)
 50 |     else:
 51 |         (states, next_states, v_bounds,
 52 |          true_next_states, delta_v_true) = storage.values()
 53 | 
 54 |     extent = [np.min(states), np.max(states)]
 55 |     safe_set = lyapunov.safe_set
 56 |     threshold = lyapunov.threshold(states)
 57 | 
 58 |     # Create figure axes
 59 |     fig, axes = plt.subplots(2, 1, figsize=(10, 12))
 60 | 
 61 |     # Format axes
 62 |     axes[0].set_title('GP model of the dynamics')
 63 |     axes[0].set_xlim(extent)
 64 |     axes[1].set_xlim(extent)
 65 |     axes[1].set_xlabel('$x$')
 66 |     axes[1].set_ylabel(r'Upper bound of $\Delta V(x)$')
 67 |     axes[1].set_title(r'Determining stability with $\Delta V(x)$')
 68 | 
 69 |     # Plot dynamics
 70 |     axes[0].plot(states,
 71 |                  true_next_states.eval(feed_dict=feed_dict),
 72 |                  color='black', alpha=0.8)
 73 | 
 74 |     mean, bound = sess.run(next_states, feed_dict=feed_dict)
 75 |     axes[0].fill_between(states[:, 0],
 76 |                          mean[:, 0] - bound[:, 0],
 77 |                          mean[:, 0] + bound[:, 0],
 78 |                          color=(0.8, 0.8, 1))
 79 | 
 80 |     if hasattr(lyapunov.dynamics, 'X'):
 81 |         axes[0].plot(lyapunov.dynamics.X[:, 0],
 82 |                      lyapunov.dynamics.Y[:, 0],
 83 |                      'x', ms=8, mew=2)
 84 | 
 85 |     v_dot_mean, v_dot_bound = sess.run(v_bounds, feed_dict=feed_dict)
 86 |     # # Plot V_dot
 87 |     print(v_dot_mean.shape)
 88 |     print(v_dot_bound.shape)
 89 |     plt.fill_between(states[:, 0],
 90 |                      v_dot_mean[:, 0] - v_dot_bound[:, 0],
 91 |                      v_dot_mean[:, 0] + v_dot_bound[:, 0],
 92 |                      color=(0.8, 0.8, 1))
 93 | 
 94 |     threshold_plot = plt.plot(extent, [threshold, threshold],
 95 |                               'k-.', label=r'Safety threshold ($L \tau$ )')
 96 | 
 97 |     # # Plot the true V_dot or Delta_V
 98 |     delta_v = delta_v_true.eval(feed_dict=feed_dict)
 99 |     v_dot_true_plot = axes[1].plot(states[:, 0],
100 |                                    delta_v,
101 |                                    color='k',
102 |                                    label=r'True $\Delta V(x)$')
103 | 
104 |     # # Create twin axis
105 |     ax2 = axes[1].twinx()
106 |     ax2.set_ylabel(r'$V(x)$')
107 |     ax2.set_xlim(extent)
108 | 
109 |     # # Plot Lyapunov function
110 |     V_unsafe = np.ma.masked_where(safe_set, lyapunov.values)
111 |     V_safe = np.ma.masked_where(~safe_set, lyapunov.values)
112 |     unsafe_plot = ax2.plot(states, V_unsafe,
113 |                            color='b',
114 |                            label=r'$V(x)$ (unsafe, $\Delta V(x) > L \tau$)')
115 |     safe_plot = ax2.plot(states, V_safe,
116 |                          color='r',
117 |                          label=r'$V(x)$ (safe, $\Delta V(x) \leq L \tau$)')
118 | 
119 |     if legend:
120 |         lns = unsafe_plot + safe_plot + threshold_plot + v_dot_true_plot
121 |         labels = [x.get_label() for x in lns]
122 |         plt.legend(lns, labels, loc=4, fancybox=True, framealpha=0.75)
123 | 
124 |     # Create helper lines
125 |     if np.any(safe_set):
126 |         max_id = np.argmax(lyapunov.values[safe_set])
127 |         x_safe = states[safe_set][max_id]
128 |         y_range = axes[1].get_ylim()
129 |         axes[1].plot([x_safe, x_safe], y_range, 'k-.')
130 |         axes[1].plot([-x_safe, -x_safe], y_range, 'k-.')
131 | 
132 |     # Show plot
133 |     plt.show()
134 | 
135 | 
136 | def plot_triangulation(triangulation, axis=None, three_dimensional=False,
137 |                        xlabel=None, ylabel=None, zlabel=None, **kwargs):
138 |     """Plot a triangulation.
139 |     
140 |     Parameters
141 |     ----------
142 |     values: ndarray
143 |     axis: optional
144 |     three_dimensional: bool, optional
145 |         Whether to plot 3D
146 |         
147 |     Returns
148 |     -------
149 |     axis:
150 |         The axis on which we plotted.
151 |     """
152 |     values = triangulation.parameters[0].eval()
153 | 
154 |     if three_dimensional:
155 |         if axis is None:
156 |             axis = Axes3D(plt.figure())
157 | 
158 |         # Get the simplices and plot
159 |         delaunay = triangulation.tri
160 |         state_space = triangulation.discretization.all_points
161 |         
162 |         simplices = delaunay.simplices(np.arange(delaunay.nsimplex))
163 |         c = axis.plot_trisurf(state_space[:, 0], state_space[:, 1], values[:, 0],
164 |                             triangles=simplices.copy(),
165 |                             cmap='viridis', lw=0.1, **kwargs)
166 |         cbar = plt.colorbar(c)
167 |     else:
168 |         if axis is None:
169 |             axis = plt.figure().gca()
170 |             
171 |         domain = triangulation.discretization.limits.tolist()
172 |         num_points = triangulation.discretization.num_points
173 |             
174 |         # Some magic reshaping to go to physical coordinates
175 |         vals = values.reshape(num_points[0], num_points[1]).T[::-1]
176 |         axis = plt.imshow(vals, origin='upper',
177 |                           extent=domain[0] + domain[1],
178 |                           aspect='auto', cmap='viridis', interpolation='bilinear', **kwargs)
179 |         cbar = plt.colorbar(axis)
180 |         axis = axis.axes
181 |         
182 |     if xlabel is not None:
183 |         axis.set_xlabel(xlabel)
184 |     if ylabel is not None:
185 |         axis.set_ylabel(ylabel)
186 |     if zlabel is not None:
187 |         cbar.set_label(zlabel)
188 |         
189 |     return axis
190 | 
191 | 
192 | def strip_consts(graph_def, max_const_size=32):
193 |     """Strip large constant values from graph_def.
194 | 
195 |     Taken from
196 |     http://stackoverflow.com/questions/38189119/simple-way-to-visualize-a-
197 |     tensorflow-graph-in-jupyter
198 |     """
199 |     strip_def = tf.GraphDef()
200 |     for n0 in graph_def.node:
201 |         n = strip_def.node.add()
202 |         n.MergeFrom(n0)
203 |         if n.op == 'Const':
204 |             tensor = n.attr['value'].tensor
205 |             size = len(tensor.tensor_content)
206 |             if size > max_const_size:
207 |                 tensor.tensor_content = str.encode("<stripped %d bytes>" % size)
208 |     return strip_def
209 | 
210 | 
211 | def show_graph(graph_def, max_const_size=32):
212 |     """Visualize TensorFlow graph.
213 | 
214 |     Taken from
215 |     http://stackoverflow.com/questions/38189119/simple-way-to-visualize-a-
216 |     tensorflow-graph-in-jupyter
217 |     """
218 |     if hasattr(graph_def, 'as_graph_def'):
219 |         graph_def = graph_def.as_graph_def()
220 |     strip_def = strip_consts(graph_def, max_const_size=max_const_size)
221 |     code = """
222 |         <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
223 |         <script>
224 |           function load() {{
225 |             document.getElementById("{id}").pbtxt = {data};
226 |           }}
227 |         </script>
228 |         <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
229 |         <div style="height:600px">
230 |           <tf-graph-basic id="{id}"></tf-graph-basic>
231 |         </div>
232 |     """.format(data=repr(str(strip_def)),
233 |                id='graph'+str(np.random.rand()))
234 | 
235 |     iframe = """
236 |         <iframe seamless style="width:100%;height:620px;border:0" srcdoc="{}"></iframe>
237 |     """.format(code.replace('"', '&quot;'))
238 |     display(HTML(iframe))
239 | 
240 | 


--------------------------------------------------------------------------------
/examples/basic_dynamic_programming.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import matplotlib.pyplot as plt\n",
 11 |     "import tensorflow as tf\n",
 12 |     "%matplotlib inline\n",
 13 |     "\n",
 14 |     "import safe_learning\n",
 15 |     "import plotting"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Problem definition\n",
 23 |     "\n",
 24 |     "We define a reinforcement learning problem with piecewise linear function approximators. These rely on a regular discretization of the grid into cells."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "domain = [[-1.2, 0.7], [-.07, .07]]\n",
 34 |     "n_points = [20, 20]\n",
 35 |     "\n",
 36 |     "# Define a discretization on the domain\n",
 37 |     "discretization = safe_learning.GridWorld(domain, n_points)\n",
 38 |     "\n",
 39 |     "# Value function is piecewise linear\n",
 40 |     "value_function = safe_learning.Triangulation(discretization, np.zeros(discretization.nindex), project=True,\n",
 41 |     "                                             name='tri_value_function')\n",
 42 |     "\n",
 43 |     "# Policy is piecewise linear and saturated\n",
 44 |     "policy = safe_learning.Triangulation(discretization, np.zeros(discretization.nindex), project=True,\n",
 45 |     "                                     name='tri_policy')\n",
 46 |     "policy = safe_learning.Saturation(policy, -1., 1.)\n",
 47 |     "\n",
 48 |     "# Discount factor\n",
 49 |     "gamma = .99\n",
 50 |     "terminal_reward = 1 - gamma\n",
 51 |     "\n",
 52 |     "@safe_learning.utilities.with_scope('true_dynamics')\n",
 53 |     "def dynamics(states, actions):\n",
 54 |     "    \"\"\"Return future states of the car\"\"\"    \n",
 55 |     "    x0 = states[:, 0] + states[:, 1]\n",
 56 |     "    x1 = states[:, 1] + 0.001 * actions[:, 0] - 0.0025 * tf.cos(3 * states[:, 0])\n",
 57 |     "    \n",
 58 |     "    return tf.stack((x0, x1), axis=1)\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "@safe_learning.utilities.with_scope('reward_function')\n",
 62 |     "def reward_function(states, actions):\n",
 63 |     "    \"\"\"Reward function for the mountain car\"\"\"\n",
 64 |     "    zeros = tf.zeros((states.shape[0], 1), tf.float64)\n",
 65 |     "    ones = tf.ones_like(zeros)\n",
 66 |     "    # Reward is zero except at terminal states\n",
 67 |     "    return tf.where(tf.greater(states[:, 0], 0.6), terminal_reward * ones, zeros)\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## Define the policy optimization problems\n",
 75 |     "\n",
 76 |     "Based on the dynamics we define the tensorflow operations to optimize the policy."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Define the reinforcement learning setup\n",
 86 |     "rl = safe_learning.PolicyIteration(\n",
 87 |     "    policy,\n",
 88 |     "    dynamics,\n",
 89 |     "    reward_function,\n",
 90 |     "    value_function,\n",
 91 |     "    gamma=gamma)\n",
 92 |     "\n",
 93 |     "# Create a tensorflow session\n",
 94 |     "session = tf.InteractiveSession()\n",
 95 |     "session.run(tf.global_variables_initializer())\n",
 96 |     "\n",
 97 |     "# Optimize over policy variables\n",
 98 |     "with tf.name_scope('dynamics_programming'):\n",
 99 |     "    # For triangulations we can solve a linear programm to determine the value function\n",
100 |     "    # value_opt = rl.value_iteration()\n",
101 |     "    value_opt = rl.optimize_value_function()\n",
102 |     "    \n",
103 |     "    # The policy is optimized using gradient descent\n",
104 |     "    policy_loss = -1 / (1-gamma) * tf.reduce_mean(rl.future_values(rl.state_space))\n",
105 |     "\n",
106 |     "    optimizer = tf.train.GradientDescentOptimizer(1.)\n",
107 |     "    adapt_policy = optimizer.minimize(policy_loss,\n",
108 |     "                                      var_list=[rl.policy.parameters])\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "\n",
118 |     "old_values = np.zeros_like(rl.value_function.parameters[0].eval())\n",
119 |     "old_actions = np.zeros_like(rl.policy.parameters[0].eval())\n",
120 |     "converged = False\n",
121 |     "action_space = np.array([[-1.], [1.]])\n",
122 |     "\n",
123 |     "\n",
124 |     "for i in range(30):\n",
125 |     "    # Optimize value function\n",
126 |     "    value_opt.eval()\n",
127 |     "\n",
128 |     "    # Optimize policy (discrete over grid or gradient descent)\n",
129 |     "    # rl.discrete_policy_optimization(action_space)\n",
130 |     "    for _ in range(200):\n",
131 |     "        session.run(adapt_policy)\n",
132 |     "\n",
133 |     "    # Get new parameters\n",
134 |     "    values, actions = session.run([rl.value_function.parameters[0],\n",
135 |     "                                  rl.policy.parameters[0]])\n",
136 |     "\n",
137 |     "    # Compute errors\n",
138 |     "    value_change = np.max(np.abs(old_values - values))\n",
139 |     "    actions_change = np.max(np.abs(old_actions - actions))\n",
140 |     "\n",
141 |     "    # Break if converged\n",
142 |     "    if value_change <= 1e-1 and actions_change <= 1e-1:\n",
143 |     "        converged = True\n",
144 |     "        break\n",
145 |     "    else:\n",
146 |     "        old_values = values\n",
147 |     "        old_actions = actions\n",
148 |     "\n",
149 |     "\n",
150 |     "if converged:\n",
151 |     "    print('converged after {} iterations. \\nerror: {}, \\npolicy: {}'\n",
152 |     "          .format(i + 1, value_change, actions_change))\n",
153 |     "else:\n",
154 |     "    print('didnt converge, error: {} and policy: {}'\n",
155 |     "          .format(value_change, actions_change))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "# Plot the resulting value function and policy"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "plotting.plot_triangulation(rl.value_function, xlabel='position', ylabel='velocity')\n",
172 |     "plt.show()\n",
173 |     "\n",
174 |     "plotting.plot_triangulation(rl.value_function, three_dimensional=True,\n",
175 |     "                            xlabel='position', ylabel='velocity', zlabel='values')\n",
176 |     "plt.show()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "plotting.plot_triangulation(rl.policy, zlabel='policy', xlabel='position', ylabel='velocity')\n",
186 |     "plt.show()"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "# Visualize the trajectory"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "with tf.name_scope('compute_trajectory'):\n",
203 |     "    states = np.zeros((1000, 2), dtype=np.float)\n",
204 |     "    states[0, 0] = -0.5\n",
205 |     "\n",
206 |     "    state = tf.placeholder(tf.float64, [1, 2])\n",
207 |     "    next_states = rl.dynamics(state, rl.policy(state))"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "# Compute the trajectories.\n",
217 |     "for i in range(len(states) - 1):\n",
218 |     "    states[i+1, :] = next_states.eval(feed_dict={state: states[[i], :]})\n",
219 |     "\n",
220 |     "    # break if terminal\n",
221 |     "    if states[i+1, 0] >= 0.6:\n",
222 |     "        states[i+1:] = states[i+1]\n",
223 |     "        break"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "ax = plotting.plot_triangulation(rl.value_function, xlabel='position', ylabel='velocity')\n",
233 |     "ax.plot(states[:,0], states[:, 1], lw=3, color='k')\n",
234 |     "ax.plot(np.ones(2) * 0.6, ax.get_ylim(), lw=2, color='r')\n",
235 |     "\n",
236 |     "plt.show()"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "# Visualize the computation graph"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "plotting.show_graph(tf.get_default_graph())"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": []
261 |   }
262 |  ],
263 |  "metadata": {
264 |   "anaconda-cloud": {},
265 |   "kernelspec": {
266 |    "display_name": "Python 3",
267 |    "language": "python",
268 |    "name": "python3"
269 |   },
270 |   "language_info": {
271 |    "codemirror_mode": {
272 |     "name": "ipython",
273 |     "version": 3
274 |    },
275 |    "file_extension": ".py",
276 |    "mimetype": "text/x-python",
277 |    "name": "python",
278 |    "nbconvert_exporter": "python",
279 |    "pygments_lexer": "ipython3",
280 |    "version": "3.6.4"
281 |   }
282 |  },
283 |  "nbformat": 4,
284 |  "nbformat_minor": 2
285 | }
286 | 


--------------------------------------------------------------------------------
/safe_learning/reinforcement_learning.py:
--------------------------------------------------------------------------------
  1 | """Classes for reinforcement learning."""
  2 | 
  3 | from __future__ import absolute_import, division, print_function
  4 | 
  5 | from types import ModuleType
  6 | 
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | try:
 10 |     import cvxpy
 11 | except ImportError as exception:
 12 |     cvxpy = exception
 13 | 
 14 | from .utilities import (make_tf_fun, with_scope, get_storage, set_storage,
 15 |                         get_feed_dict)
 16 | 
 17 | from safe_learning import config
 18 | 
 19 | __all__ = ['PolicyIteration']
 20 | 
 21 | 
 22 | class OptimizationError(Exception):
 23 |     pass
 24 | 
 25 | 
 26 | class PolicyIteration(object):
 27 |     """A class for policy iteration.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     policy : callable
 32 |         The policy that maps states to actions.
 33 |     dynamics : callable
 34 |         A function that can be called with states and actions as inputs and
 35 |         returns future states.
 36 |     reward_function : callable
 37 |         A function that takes the state, action, and next state as input and
 38 |         returns the reward corresponding to this transition.
 39 |     value_function : instance of `DeterministicFunction`
 40 |         The function approximator for the value function. It is used to
 41 |         evaluate the value function at states.
 42 |     gamma : float
 43 |         The discount factor for reinforcement learning.
 44 |     """
 45 | 
 46 |     def __init__(self, policy, dynamics, reward_function, value_function,
 47 |                  gamma=0.98):
 48 |         """Initialization.
 49 | 
 50 |         See `PolicyIteration` for details.
 51 |         """
 52 |         super(PolicyIteration, self).__init__()
 53 |         self.dynamics = dynamics
 54 |         self.reward_function = reward_function
 55 |         self.value_function = value_function
 56 |         self.gamma = gamma
 57 | 
 58 |         state_space = self.value_function.discretization.all_points
 59 |         self.state_space = tf.stack(state_space, name='state_space')
 60 | 
 61 |         self.policy = policy
 62 |         self.feed_dict = get_feed_dict(tf.get_default_graph())
 63 |         self._storage = {}
 64 | 
 65 |     @with_scope('future_values')
 66 |     def future_values(self, states, policy=None, actions=None, lyapunov=None,
 67 |                       lagrange_multiplier=1.):
 68 |         """Return the value at the current states.
 69 | 
 70 |         Parameters
 71 |         ----------
 72 |         states : ndarray
 73 |             The states at which to compute future values.
 74 |         policy : callable, optional
 75 |             The policy for which to evaluate. Defaults to `self.policy`. This
 76 |             argument is ignored if actions is not None.
 77 |         actions : array or tensor, optional
 78 |             The actions to be taken for the states.
 79 |         lyapunov : instance of `Lyapunov`
 80 |             A Lyapunov function that acts as a constraint for the optimization.
 81 |         lagrange_multiplier: float
 82 |             A scaling factor for the `slack` of the optimization problem.
 83 | 
 84 |         Returns
 85 |         -------
 86 |         The expected long term reward when taking an action according to the
 87 |         policy and then taking the value of self.value_function.
 88 |         """
 89 |         if actions is None:
 90 |             if policy is None:
 91 |                 policy = self.policy
 92 |             actions = policy(states)
 93 | 
 94 |         next_states = self.dynamics(states, actions)
 95 |         rewards = self.reward_function(states, actions)
 96 | 
 97 |         # Only use the mean dynamics
 98 |         if isinstance(next_states, tuple):
 99 |             next_states, var = next_states
100 | 
101 |         expected_values = self.value_function(next_states)
102 | 
103 |         # Perform value update
104 |         updated_values = rewards + self.gamma * expected_values
105 | 
106 |         # Adjust the cost for the Lyapunov decrease
107 |         if lyapunov is not None:
108 |             decrease = lyapunov.v_decrease_bound(states, (next_states, var))
109 | 
110 |             # Want to enfore `constraint <= 0`
111 |             constraint = decrease - lyapunov.threshold(states)
112 |             updated_values -= lagrange_multiplier * constraint
113 | 
114 |         return updated_values
115 | 
116 |     @with_scope('bellmann_error')
117 |     def bellmann_error(self, states):
118 |         """Compute the squared bellmann error.
119 | 
120 |         Parameters
121 |         ----------
122 |         states : array
123 | 
124 |         Returns
125 |         -------
126 |         error : float
127 |         """
128 |         # Make sure we do not compute the gradient with respect to the
129 |         # training target.
130 |         target = tf.stop_gradient(self.future_values(states))
131 |         # Squared bellmann error
132 |         return tf.reduce_sum(tf.square(target - self.value_function(states)),
133 |                              name='bellmann_error')
134 | 
135 |     @with_scope('value_iteration')
136 |     def value_iteration(self):
137 |         """Perform one step of value iteration."""
138 |         future_values = self.future_values(self.state_space)
139 |         return tf.assign(self.value_function.parameters[0], future_values,
140 |                          name='value_iteration_update')
141 | 
142 |     @make_tf_fun(tf.float64)
143 |     def _run_cvx_optimization(self, next_states, rewards, **solver_options):
144 |         """Tensorflow wrapper around a cvxpy value function optimization.
145 | 
146 |         Parameters
147 |         ----------
148 |         next_states : ndarray
149 |         rewards : ndarray
150 | 
151 |         Returns
152 |         -------
153 |         values : ndarray
154 |             The optimal values at the states.
155 |         """
156 |         # Define random variables; convert index from np.int64 to regular
157 |         # python int to avoid strange cvxpy error; see:
158 |         # https://github.com/cvxgrp/cvxpy/issues/380
159 |         values = cvxpy.Variable(rewards.shape)
160 | 
161 |         value_matrix = self.value_function.tri.parameter_derivative(
162 |             next_states)
163 |         # Make cvxpy work with sparse matrices
164 |         value_matrix = cvxpy.Constant(value_matrix)
165 | 
166 |         objective = cvxpy.Maximize(cvxpy.sum(values))
167 |         constraints = [values <= rewards + self.gamma * value_matrix * values]
168 |         prob = cvxpy.Problem(objective, constraints)
169 | 
170 |         # Solve optimization problem
171 |         prob.solve(**solver_options)
172 | 
173 |         # Some error checking
174 |         if not prob.status == cvxpy.OPTIMAL:
175 |             raise OptimizationError('Optimization problem is {}'
176 |                                     .format(prob.status))
177 | 
178 |         return np.array(values.value)
179 | 
180 |     @with_scope('optimize_value_function')
181 |     def optimize_value_function(self, **solver_options):
182 |         """Optimize the value function using cvx.
183 | 
184 |         Parameters
185 |         ----------
186 |         solver_options : kwargs, optional
187 |             Additional solver options passes to cvxpy.Problem.solve.
188 | 
189 |         Returns
190 |         -------
191 |         assign_op : tf.Tensor
192 |             An assign operation that updates the value function.
193 |         """
194 |         if not isinstance(cvxpy, ModuleType):
195 |             raise cvxpy
196 | 
197 |         actions = self.policy(self.state_space)
198 |         next_states = self.dynamics(self.state_space, actions)
199 | 
200 |         # Only use the mean dynamics
201 |         if isinstance(next_states, tuple):
202 |             next_states, var = next_states
203 | 
204 |         rewards = self.reward_function(self.state_space,
205 |                                        actions)
206 | 
207 |         values = self._run_cvx_optimization(next_states,
208 |                                             rewards,
209 |                                             **solver_options)
210 | 
211 |         return tf.assign(self.value_function.parameters[0], values)
212 | 
213 |     @with_scope('discrete_policy_optimization')
214 |     def discrete_policy_optimization(self, action_space, constraint=None):
215 |         """Optimize the policy for a given value function.
216 | 
217 |         Parameters
218 |         ----------
219 |         action_space : ndarray
220 |             The parameter value to evaluate (for each parameter). This is
221 |             geared towards piecewise linear functions.
222 |         constraint : callable
223 |             A function that can be called with a policy. Returns the slack of
224 |             the safety constraint for each state. A policy is safe if the slack
225 |             is >=0 for all constraints.
226 |         """
227 |         states = self.policy.discretization.all_points
228 |         n_states = states.shape[0]
229 |         n_options, n_actions = action_space.shape
230 | 
231 |         # Initialize
232 |         values = np.empty((n_states, n_options), dtype=config.np_dtype)
233 |         action_array = np.broadcast_to(np.zeros(n_actions,
234 |                                                 dtype=config.np_dtype),
235 |                                        (n_states, n_actions))
236 | 
237 |         # Create tensorflow operations, but reuse previous graph elements
238 |         storage = get_storage(self._storage)
239 | 
240 |         if storage is None:
241 |             # Computation of future values
242 |             actions = tf.placeholder(config.dtype,
243 |                                      shape=action_array.shape,
244 |                                      name='actions')
245 |             future_values = self.future_values(states,
246 |                                                actions=actions)
247 | 
248 |             # Assigning new parameters
249 |             parameters = tf.placeholder(config.dtype, action_array.shape)
250 |             assign_op = tf.assign(self.policy.parameters[0], parameters)
251 | 
252 |             # Put things into storage
253 |             storage = [('actions', actions),
254 |                        ('future_values', future_values),
255 |                        ('parameters', parameters),
256 |                        ('assign_op', assign_op)]
257 |             set_storage(self._storage, storage)
258 |         else:
259 |             # Get items out of storage
260 |             actions, future_values, parameters, assign_op = storage.values()
261 | 
262 |         feed_dict = self.feed_dict
263 |         feed_dict[actions] = action_array
264 | 
265 |         # Compute values for each action
266 |         for i, action in enumerate(action_space):
267 |             # Update feed dict
268 |             action_array.base[:] = action
269 |             # Compute values
270 |             values[:, i] = future_values.eval(feed_dict=feed_dict)[:, 0]
271 | 
272 |             if constraint is not None:
273 |                 # TODO: optimize safety if unsafe
274 |                 unsafe = constraint(action_array) < 0
275 |                 values[unsafe, i] = -np.inf
276 | 
277 |         # Select best action for policy
278 |         best_actions = action_space[np.argmax(values, axis=1)]
279 |         assign_op.eval({parameters: best_actions})
280 | 


--------------------------------------------------------------------------------
/examples/1d_region_of_attraction_estimate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Stability verification of a fixed uncertain system (without dynamic programming)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from __future__ import division, print_function\n",
 17 |     "\n",
 18 |     "import tensorflow as tf\n",
 19 |     "import gpflow\n",
 20 |     "import numpy as np\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "from future.builtins import *\n",
 23 |     "from functools import partial\n",
 24 |     "%matplotlib inline\n",
 25 |     "\n",
 26 |     "import plotting\n",
 27 |     "import safe_learning\n",
 28 |     "\n",
 29 |     "try:\n",
 30 |     "    session.close()\n",
 31 |     "except NameError:\n",
 32 |     "    pass\n",
 33 |     "\n",
 34 |     "graph = tf.Graph()\n",
 35 |     "session = tf.InteractiveSession(graph=graph)\n",
 36 |     "session.run(tf.global_variables_initializer())"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\\tau$"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "discretization = safe_learning.GridWorld([-1, 1], 1001)\n",
 53 |     "tau = 1 / discretization.nindex\n",
 54 |     "\n",
 55 |     "print('Grid size: {0}'.format(discretization.nindex))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "We define the GP model using one particular sample of the GP, in addition to a stable, closed-loop, linear model.\n",
 63 |     "$$x_{l+1} = 0.25 x_k + g_\\pi(x),$$\n",
 64 |     "\n",
 65 |     "The prior dynamics are locally asymptotically stable. Moreover, in the one-dimensional case, the dynamics are stable as long as $|x_{k+1}| \\leq |x_{k}|$."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Observation noise\n",
 75 |     "noise_var = 0.01 ** 2\n",
 76 |     "\n",
 77 |     "with tf.variable_scope('gp'):\n",
 78 |     "    # Mean dynamics\n",
 79 |     "    mean_function = safe_learning.LinearSystem((0.25, 0.), name='mean_dynamics')\n",
 80 |     "\n",
 81 |     "    kernel = (gpflow.kernels.Matern32(1, lengthscales=1, variance=0.4**2, active_dims=[0])\n",
 82 |     "              * gpflow.kernels.Linear(1, active_dims=[0]))\n",
 83 |     "\n",
 84 |     "    gp = safe_learning.GPRCached(np.empty((0, 2), dtype=safe_learning.config.np_dtype),\n",
 85 |     "                        np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n",
 86 |     "                        kernel,\n",
 87 |     "                        mean_function=mean_function)\n",
 88 |     "    gp.likelihood.variance = noise_var\n",
 89 |     "\n",
 90 |     "    gpfun = safe_learning.GaussianProcess(gp, name='gp_dynamics')"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Define one sample as the true dynamics\n",
100 |     "np.random.seed(5)\n",
101 |     "\n",
102 |     "# # Set up a discretization\n",
103 |     "sample_disc = np.hstack((np.linspace(-1, 1, 50)[:, None],\n",
104 |     "                         np.zeros((50, 1))))\n",
105 |     "\n",
106 |     "# # Draw samples\n",
107 |     "fs = safe_learning.sample_gp_function(sample_disc, gpfun, number=10, return_function=False)\n",
108 |     "plt.plot(sample_disc[:, 0], fs.T)\n",
109 |     "\n",
110 |     "plt.ylabel('$g(x)$')\n",
111 |     "plt.xlabel('x')\n",
112 |     "plt.title('Samples drawn from the GP model of the dynamics')\n",
113 |     "plt.show()\n",
114 |     "\n",
115 |     "\n",
116 |     "true_dynamics = safe_learning.sample_gp_function(\n",
117 |     "    sample_disc,\n",
118 |     "    gpfun)[0]\n",
119 |     "\n",
120 |     "# Plot the basic model\n",
121 |     "with tf.variable_scope('plot_true_dynamics'):\n",
122 |     "    true_y = true_dynamics(sample_disc, noise=False).eval(feed_dict=true_dynamics.feed_dict)\n",
123 |     "plt.plot(sample_disc[:, 0], true_y, color='black', alpha=0.8)\n",
124 |     "plt.title('GP model of the dynamics')\n",
125 |     "plt.show()"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "# lyapunov_function = safe_learning.QuadraticFunction(np.array([[1]]))\n",
135 |     "lyapunov_disc = safe_learning.GridWorld([-1., 1.], 3)\n",
136 |     "lyapunov_function = safe_learning.Triangulation(lyapunov_disc, [1, 0, 1], name='lyapunov_function')\n",
137 |     "\n",
138 |     "dynamics = gpfun\n",
139 |     "policy = safe_learning.LinearSystem(np.array([0.]), name='policy')\n",
140 |     "\n",
141 |     "# Lipschitz constant\n",
142 |     "# L_dyn = 0.25 + dynamics.beta(0) * np.sqrt(gp.kern.Mat32.variance) / gp.kern.Mat32.lengthscale * np.max(np.abs(extent))\n",
143 |     "# L_V = np.max(lyapunov_function.gradient(grid))\n",
144 |     "\n",
145 |     "L_dyn = 0.25\n",
146 |     "L_V = 1.\n",
147 |     "\n",
148 |     "lyapunov = safe_learning.Lyapunov(discretization, lyapunov_function, dynamics, L_dyn, L_V, tau, policy)\n",
149 |     "\n",
150 |     "# Specify the desired accuracy\n",
151 |     "# accuracy = np.max(lyapunov.V) / 1e10"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "## Safety based on GP model\n",
159 |     "\n",
160 |     "Let's start by plotting the prior over the dynamics and the associated prior over $\\dot{V}(x)$."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "lyapunov.update_safe_set()\n",
170 |     "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "Clearly the model does not allow us to classify any states as safe ($\\dot{V} < -L \\tau$). However, as a starting point, we assume that we know that the system is asymptotially stable within some initial set, $\\mathcal{S}_0$:\n",
178 |     "\n",
179 |     "$$\\mathcal{S}_0 = \\{ x \\in \\mathbb{R} \\,|\\, |x| < 0.2 \\}$$"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "lyapunov.initial_safe_set = np.abs(lyapunov.discretization.all_points.squeeze()) < 0.2"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "## Online learning\n",
196 |     "As we sample within this initial safe set, we gain more knowledge about the system. In particular, we iteratively select the state withing the safe set, $\\mathcal{S}_n$, where the dynamics are the most uncertain (highest variance)."
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "grid = lyapunov.discretization.all_points\n",
206 |     "lyapunov.update_safe_set()\n",
207 |     "\n",
208 |     "with tf.variable_scope('sample_new_safe_point'):\n",
209 |     "    safe_set = tf.placeholder(safe_learning.config.dtype, [None, None])\n",
210 |     "    _, dynamics_std_tf = lyapunov.dynamics(safe_set, lyapunov.policy(safe_set))\n",
211 |     "    \n",
212 |     "                                           \n",
213 |     "    tf_max_state = tf.placeholder(safe_learning.config.dtype, [1, None])\n",
214 |     "    tf_max_action = lyapunov.policy(tf_max_state)\n",
215 |     "    tf_measurement = true_dynamics(tf_max_state, tf_max_action)\n",
216 |     "    \n",
217 |     "feed_dict = lyapunov.dynamics.feed_dict\n",
218 |     "                                        \n",
219 |     "def update_gp():\n",
220 |     "    \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n",
221 |     "    # Maximum uncertainty in safe set\n",
222 |     "    safe_grid = grid[lyapunov.safe_set]\n",
223 |     "                                        \n",
224 |     "    feed_dict[safe_set] = safe_grid\n",
225 |     "    dynamics_std = dynamics_std_tf.eval(feed_dict=feed_dict)\n",
226 |     "    \n",
227 |     "    max_id = np.argmax(dynamics_std)\n",
228 |     "    max_state = safe_grid[[max_id], :].copy()\n",
229 |     "                                           \n",
230 |     "    feed_dict[tf_max_state] = max_state\n",
231 |     "    max_action, measurement = session.run([tf_max_action, tf_measurement],\n",
232 |     "                                          feed_dict=feed_dict)\n",
233 |     "                                   \n",
234 |     "    arg = np.hstack((max_state, max_action))\n",
235 |     "    lyapunov.dynamics.add_data_point(arg, measurement)\n",
236 |     "    lyapunov.update_safe_set()"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "# Update the GP model a couple of times\n",
246 |     "for i in range(4):\n",
247 |     "    update_gp()"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "# Plot the new safe set\n",
257 |     "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "markdown",
262 |    "metadata": {},
263 |    "source": [
264 |     "We continue to sample like this, until we find the maximum safe set"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "for i in range(20):\n",
274 |     "    update_gp()\n",
275 |     "\n",
276 |     "lyapunov.update_safe_set()\n",
277 |     "plotting.plot_lyapunov_1d(lyapunov, true_dynamics, legend=False)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": [
286 |     "plotting.show_graph(tf.get_default_graph())"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": []
295 |   }
296 |  ],
297 |  "metadata": {
298 |   "anaconda-cloud": {},
299 |   "kernelspec": {
300 |    "display_name": "Python 3",
301 |    "language": "python",
302 |    "name": "python3"
303 |   },
304 |   "language_info": {
305 |    "codemirror_mode": {
306 |     "name": "ipython",
307 |     "version": 3
308 |    },
309 |    "file_extension": ".py",
310 |    "mimetype": "text/x-python",
311 |    "name": "python",
312 |    "nbconvert_exporter": "python",
313 |    "pygments_lexer": "ipython3",
314 |    "version": "3.6.4"
315 |   }
316 |  },
317 |  "nbformat": 4,
318 |  "nbformat_minor": 2
319 | }
320 | 


--------------------------------------------------------------------------------
/examples/1d_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from __future__ import division, print_function\n",
 10 |     "\n",
 11 |     "import tensorflow as tf\n",
 12 |     "import gpflow\n",
 13 |     "import numpy as np\n",
 14 |     "from scipy import linalg\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from matplotlib import colors\n",
 17 |     "%matplotlib inline\n",
 18 |     "\n",
 19 |     "import safe_learning\n",
 20 |     "import plotting\n",
 21 |     "np.random.seed(0)\n",
 22 |     "\n",
 23 |     "try:\n",
 24 |     "    session.close()\n",
 25 |     "except NameError:\n",
 26 |     "    pass\n",
 27 |     "\n",
 28 |     "session = tf.InteractiveSession()\n",
 29 |     "session.run(tf.global_variables_initializer())"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### Goal:\n",
 37 |     "\n",
 38 |     "Optimize over the policy such that the safe set does not shrink"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\\tau$"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# x_min, x_max, discretization\\\n",
 55 |     "state_limits = np.array([[-1., 1.]])\n",
 56 |     "action_limits = np.array([[-.5, .5]])\n",
 57 |     "num_states = 1000\n",
 58 |     "num_actions = 101\n",
 59 |     "\n",
 60 |     "safety_disc = safe_learning.GridWorld(state_limits, num_states)\n",
 61 |     "\n",
 62 |     "# Discretization for optimizing the policy (discrete action space)\n",
 63 |     "# This is not necessary if one uses gradients to optimize the policy\n",
 64 |     "action_disc = safe_learning.GridWorld(action_limits, num_actions)\n",
 65 |     "\n",
 66 |     "# Discretization constant\n",
 67 |     "tau = np.max(safety_disc.unit_maxes)\n",
 68 |     "\n",
 69 |     "# Initial policy: All zeros\n",
 70 |     "policy_disc = safe_learning.GridWorld(state_limits, 51)\n",
 71 |     "policy = safe_learning.Triangulation(policy_disc, np.zeros(len(policy_disc)), name='policy')\n",
 72 |     "\n",
 73 |     "print('Grid size: {0}'.format(len(safety_disc)))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "### Define GP dynamics model"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "kernel = (gpflow.kernels.Matern32(2, lengthscales=1, active_dims=[0, 1]) *\n",
 90 |     "          gpflow.kernels.Linear(2, variance=[0.2, 1], ARD=True))\n",
 91 |     "\n",
 92 |     "noise_var = 0.01 ** 2\n",
 93 |     "\n",
 94 |     "# Mean dynamics\n",
 95 |     "mean_function = safe_learning.LinearSystem(([1, 0.1]), name='prior_dynamics')\n",
 96 |     "\n",
 97 |     "mean_lipschitz = 0.8\n",
 98 |     "gp_lipschitz = 0.5 # beta * np.sqrt(kernel.Mat32.variance) / kernel.Mat32.lengthscale * np.max(np.abs(state_limits))\n",
 99 |     "lipschitz_dynamics = mean_lipschitz + gp_lipschitz\n",
100 |     "\n",
101 |     "a = 1.2\n",
102 |     "b = 1.\n",
103 |     "q = 1.\n",
104 |     "r = 1.\n",
105 |     "\n",
106 |     "true_dynamics = safe_learning.LinearSystem((a, b), name='true_dynamics')\n",
107 |     "\n",
108 |     "# Define a GP model over the dynamics\n",
109 |     "gp = gpflow.gpr.GPR(np.empty((0, 2), dtype=safe_learning.config.np_dtype),\n",
110 |     "                    np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n",
111 |     "                    kernel,\n",
112 |     "                    mean_function=mean_function)\n",
113 |     "gp.likelihood.variance = noise_var\n",
114 |     "\n",
115 |     "dynamics = safe_learning.GaussianProcess(gp, name='gp_dynamics')\n",
116 |     "\n",
117 |     "k_opt, s_opt = safe_learning.utilities.dlqr(a, b, q, r)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "### Define Lyapunov function"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "lyapunov_function = safe_learning.Triangulation(safe_learning.GridWorld(state_limits, 3),\n",
134 |     "                                                vertex_values=[1, 0, 1],\n",
135 |     "                                                name='lyapunov_function')\n",
136 |     "lipschitz_lyapunov = 1.\n",
137 |     "\n",
138 |     "lyapunov = safe_learning.Lyapunov(safety_disc,\n",
139 |     "                                  lyapunov_function,\n",
140 |     "                                  dynamics,\n",
141 |     "                                  lipschitz_dynamics,\n",
142 |     "                                  lipschitz_lyapunov,\n",
143 |     "                                  tau,\n",
144 |     "                                  policy)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Initial safe set "
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "lyapunov.initial_safe_set = np.abs(lyapunov.discretization.all_points.squeeze()) < 0.05\n",
161 |     "\n",
162 |     "lyapunov.update_safe_set()\n",
163 |     "noisy_dynamics = lambda x, u, noise: true_dynamics(x, u)\n",
164 |     "plotting.plot_lyapunov_1d(lyapunov, noisy_dynamics, legend=True)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "### Reinforcement learning for the mean dynamics"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "# mean_dynamics = dynamics.to_mean_function()\n",
181 |     "\n",
182 |     "reward = safe_learning.QuadraticFunction(linalg.block_diag(-q, -r), name='reward_function')\n",
183 |     "\n",
184 |     "value_function = safe_learning.Triangulation(policy_disc,\n",
185 |     "                                             np.zeros(len(policy_disc)),\n",
186 |     "                                             project=True,\n",
187 |     "                                             name='value_function')\n",
188 |     "\n",
189 |     "rl = safe_learning.PolicyIteration(policy, dynamics, reward, value_function)"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "### Plot the dynamics\n",
197 |     "\n",
198 |     "Note that the initial policy is just all zeros!!!"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "_STORAGE = {}\n",
208 |     "\n",
209 |     "plotting_discretization = safe_learning.GridWorld(np.vstack((state_limits, action_limits)),\n",
210 |     "                                                 [num_states, num_actions])\n",
211 |     "\n",
212 |     "@safe_learning.utilities.with_scope('get_safe_set')\n",
213 |     "def get_safe_sets(lyapunov, positive=True):\n",
214 |     "    \n",
215 |     "    state_disc = lyapunov.discretization\n",
216 |     "    \n",
217 |     "    safe_states = state_disc.index_to_state(np.where(lyapunov.safe_set))\n",
218 |     "    safe_actions = action_disc.all_points\n",
219 |     "    feed_dict = lyapunov.feed_dict\n",
220 |     "\n",
221 |     "    state_actions = np.column_stack([arr.ravel() for arr in\n",
222 |     "                                     np.meshgrid(safe_states, safe_actions, indexing='ij')])\n",
223 |     "    safe_set = lyapunov.safe_set.reshape(state_disc.num_points)\n",
224 |     "    \n",
225 |     "    storage = safe_learning.utilities.get_storage(_STORAGE, index=lyapunov)\n",
226 |     "    \n",
227 |     "    if storage is None:\n",
228 |     "        tf_state_actions = tf.placeholder(safe_learning.config.dtype,\n",
229 |     "                                          shape=[None, state_actions.shape[1]])\n",
230 |     "    \n",
231 |     "        next_states = lyapunov.dynamics(tf_state_actions)\n",
232 |     "        \n",
233 |     "        mean, bound = next_states\n",
234 |     "        bound = tf.reduce_sum(bound, axis=1)\n",
235 |     "        lv = lyapunov.lipschitz_lyapunov(mean)\n",
236 |     "        values = tf.squeeze(lyapunov.lyapunov_function(mean), 1) + lv * bound\n",
237 |     "        maps_inside = tf.less(values, lyapunov.c_max, name='maps_inside_levelset')\n",
238 |     "    \n",
239 |     "        state, actions = tf.split(tf_state_actions, [1, 1], axis=1)\n",
240 |     "        \n",
241 |     "        dec = lyapunov.v_decrease_bound(state, next_states)\n",
242 |     "        \n",
243 |     "        decreases = tf.less(dec, lyapunov.threshold(state))\n",
244 |     "        \n",
245 |     "        storage = [('tf_state_actions', tf_state_actions),\n",
246 |     "                   ('maps_inside', maps_inside),\n",
247 |     "                   ('mean', mean),\n",
248 |     "                   ('decreases', decreases)]\n",
249 |     "        safe_learning.utilities.set_storage(_STORAGE, storage, index=lyapunov)\n",
250 |     "    else:\n",
251 |     "        tf_state_actions, maps_inside, mean, decreases = storage.values()\n",
252 |     "\n",
253 |     "    # Put placeholder values inside feed_dict and evaluate\n",
254 |     "    feed_dict[tf_state_actions] = state_actions\n",
255 |     "    maps_inside, mean, decreases = session.run([maps_inside, mean, decreases],\n",
256 |     "                                               feed_dict=feed_dict)\n",
257 |     "    \n",
258 |     "    # Add the mean safe set on top\n",
259 |     "    if not positive:\n",
260 |     "        next_state_index = lyapunov.discretization.state_to_index(mean)\n",
261 |     "        safe_in_expectation = lyapunov.safe_set[next_state_index]\n",
262 |     "        maps_inside &= safe_in_expectation\n",
263 |     "        \n",
264 |     "    maps_inside_total = np.zeros(plotting_discretization.nindex, dtype=np.bool)\n",
265 |     "    maps_inside_total = maps_inside_total.reshape(plotting_discretization.num_points)\n",
266 |     "    decreases_total = np.zeros_like(maps_inside_total)\n",
267 |     "    \n",
268 |     "    maps_inside_total[safe_set, :] = maps_inside.reshape(len(safe_states), len(safe_actions))\n",
269 |     "    decreases_total[safe_set, :] = decreases.reshape(len(safe_states), len(safe_actions))\n",
270 |     "\n",
271 |     "    return maps_inside_total, decreases_total\n",
272 |     "\n",
273 |     "\n",
274 |     "@safe_learning.utilities.with_scope('plot_lyapunov_2d')\n",
275 |     "def plot_things():\n",
276 |     "    fig, axes = plt.subplots(2, 2, figsize=(10, 10), gridspec_kw={'width_ratios': [30, 1]})\n",
277 |     "\n",
278 |     "    # Hide fake cax\n",
279 |     "    cax, cax1 = axes[:, 1]\n",
280 |     "    cax1.set_visible(False)\n",
281 |     "    cax.set_ylabel('Standard deviation $\\sigma$')\n",
282 |     "\n",
283 |     "    ax0, ax1 = axes[:, 0]\n",
284 |     "    ax0.set_ylabel('action')\n",
285 |     "    ax1.set_xlabel('state')\n",
286 |     "    ax1.set_ylabel('$v(\\mathbf{x})$')\n",
287 |     "\n",
288 |     "    ax1.set_ylim(0, np.max(lyapunov.values))\n",
289 |     "    ax1.set_xlim(state_limits.squeeze())\n",
290 |     "    ax0.set_xlim(state_limits.squeeze())\n",
291 |     "    ax0.set_ylim(action_limits.squeeze())\n",
292 |     "    ax0.set_xticks([])\n",
293 |     "\n",
294 |     "    # Hide x-ticks of ax0\n",
295 |     "    plt.setp(ax0.get_xticklabels(), visible=False)\n",
296 |     "\n",
297 |     "    # width between cax and main axis\n",
298 |     "    plt.subplots_adjust(wspace=.05)\n",
299 |     "    feed_dict = lyapunov.feed_dict\n",
300 |     "        \n",
301 |     "    # Plot the dynamics\n",
302 |     "    states = lyapunov.discretization.all_points\n",
303 |     "    state_actions = plotting_discretization.all_points\n",
304 |     "    \n",
305 |     "    storage = safe_learning.utilities.get_storage(_STORAGE, index=lyapunov)\n",
306 |     "    if storage is None:\n",
307 |     "        actions = lyapunov.policy(states)\n",
308 |     "        next_states = lyapunov.dynamics(state_actions)\n",
309 |     "        \n",
310 |     "        storage = [('actions', actions),\n",
311 |     "                   ('next_states', next_states)]\n",
312 |     "        \n",
313 |     "        safe_learning.utilities.set_storage(_STORAGE, storage, index=lyapunov)\n",
314 |     "    else:\n",
315 |     "        actions, next_states = storage.values()\n",
316 |     "    \n",
317 |     "    mean, bound = session.run(next_states, feed_dict=feed_dict)\n",
318 |     "    \n",
319 |     "    # Show the GP variance\n",
320 |     "    img = ax0.imshow(bound.reshape(plotting_discretization.num_points).T,\n",
321 |     "                     origin='lower',\n",
322 |     "                     extent=plotting_discretization.limits.ravel(),\n",
323 |     "                     aspect='auto')\n",
324 |     "    \n",
325 |     "    # Plot the dynamics\n",
326 |     "    ax0.plot(lyapunov.dynamics.X[:, 0],\n",
327 |     "             lyapunov.dynamics.X[:, 1], 'x')\n",
328 |     "    cbar = plt.colorbar(img, cax=cax)\n",
329 |     "\n",
330 |     "    safe, safe_expanders = get_safe_sets(lyapunov)    \n",
331 |     "    safe = safe.reshape(plotting_discretization.num_points)\n",
332 |     "    v_dec = safe_expanders.reshape(plotting_discretization.num_points)\n",
333 |     "    \n",
334 |     "    safe_mask = np.ma.masked_where(~safe, safe)\n",
335 |     "    \n",
336 |     "\n",
337 |     "    # Overlay the safety feature\n",
338 |     "    img = ax0.imshow(safe_mask.T,\n",
339 |     "                     origin='lower',\n",
340 |     "                     extent=plotting_discretization.limits.ravel(),\n",
341 |     "                     alpha=0.2,\n",
342 |     "                     cmap=colors.ListedColormap(['white']),\n",
343 |     "                     aspect='auto',\n",
344 |     "                     vmin=0,\n",
345 |     "                     vmax=1)    \n",
346 |     "    \n",
347 |     "    # Overlay the safety feature\n",
348 |     "    if np.any(v_dec):\n",
349 |     "        v_dec_mask = np.ma.masked_where(~v_dec, v_dec)\n",
350 |     "        img = ax0.imshow(v_dec_mask.T,\n",
351 |     "                         origin='lower',\n",
352 |     "                         extent=plotting_discretization.limits.ravel(),\n",
353 |     "                         alpha=0.5,\n",
354 |     "                         cmap=colors.ListedColormap(['red']),\n",
355 |     "                         aspect='auto',\n",
356 |     "                         vmin=0,\n",
357 |     "                         vmax=1)\n",
358 |     "    \n",
359 |     "    is_safe = lyapunov.safe_set\n",
360 |     "    # Plot the Lyapunov function\n",
361 |     "    lyap_safe = np.ma.masked_where(~is_safe, lyapunov.values)\n",
362 |     "    lyap_unsafe = np.ma.masked_where(is_safe, lyapunov.values)\n",
363 |     "\n",
364 |     "    # Plot lines for the boundary of the safety feature\n",
365 |     "    x_min_safe = np.min(states[is_safe])\n",
366 |     "    x_max_safe = np.max(states[is_safe])\n",
367 |     "\n",
368 |     "    ax1.plot(states, lyap_safe, 'r')\n",
369 |     "    ax1.plot(states, lyap_unsafe, 'b')\n",
370 |     "\n",
371 |     "    kw_axv = {'color': 'red',\n",
372 |     "              'alpha': 0.5}\n",
373 |     "    ax0.axvline(x=x_min_safe, ymin=-0.2, ymax=1, clip_on=False, **kw_axv)\n",
374 |     "    ax1.axvline(x=x_min_safe, ymin=0, ymax=1, clip_on=False, **kw_axv)\n",
375 |     "\n",
376 |     "    ax0.axvline(x=x_max_safe, ymin=-0.2, ymax=1, clip_on=False, **kw_axv)\n",
377 |     "    ax1.axvline(x=x_max_safe, ymin=0, ymax=1, clip_on=False, **kw_axv)\n",
378 |     "    \n",
379 |     "    # Plot the current policy\n",
380 |     "    actions = actions.eval(feed_dict=feed_dict)\n",
381 |     "    ax0.step(states, actions, label='safe policy', alpha=0.5)\n",
382 |     "\n",
383 |     "    ax0.legend()\n",
384 |     "    plt.show()\n",
385 |     "\n",
386 |     "# optimize_safe_policy(lyapunov)\n",
387 |     "lyapunov.update_safe_set()\n",
388 |     "plot_things()"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "## Online learning\n",
396 |     "As we sample within this initial safe set, we gain more knowledge about the system. In particular, we iteratively select the state withing the safe set, $\\mathcal{S}_n$, where the dynamics are the most uncertain (highest variance)."
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {},
403 |    "outputs": [],
404 |    "source": [
405 |     "action_space = action_disc.all_points\n",
406 |     "action_variation = safe_learning.GridWorld(np.array(action_limits) / 20, 11).all_points\n",
407 |     "\n",
408 |     "rl_opt_value_function = rl.optimize_value_function()\n",
409 |     "for i in range(3):\n",
410 |     "    rl_opt_value_function.eval(feed_dict=lyapunov.feed_dict)\n",
411 |     "    rl.discrete_policy_optimization(action_space)\n",
412 |     "\n",
413 |     "\n",
414 |     "with tf.variable_scope('add_new_measurement'):\n",
415 |     "        action_dim = rl.policy.output_dim\n",
416 |     "        tf_max_state_action = tf.placeholder(safe_learning.config.dtype,\n",
417 |     "                                             shape=[1, safety_disc.ndim + action_dim])\n",
418 |     "        tf_measurement = true_dynamics(tf_max_state_action)\n",
419 |     "        \n",
420 |     "def update_gp():\n",
421 |     "    \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n",
422 |     "    # Optimize the value/function and policy\n",
423 |     "    rl_opt_value_function.eval(feed_dict=lyapunov.feed_dict)\n",
424 |     "    rl.discrete_policy_optimization(action_space)\n",
425 |     "    \n",
426 |     "    # Get a new sample location\n",
427 |     "    lyapunov.update_safe_set()\n",
428 |     "    max_state_action, _ = safe_learning.get_safe_sample(lyapunov,\n",
429 |     "                                                        action_variation,\n",
430 |     "                                                        action_limits)\n",
431 |     "\n",
432 |     "    # Obtain a measurement of the true dynamics\n",
433 |     "    lyapunov.feed_dict[tf_max_state_action] = max_state_action\n",
434 |     "    measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n",
435 |     "\n",
436 |     "    # Add the measurement to our GP dynamics\n",
437 |     "    lyapunov.dynamics.add_data_point(max_state_action, measurement)\n",
438 |     "    \n",
439 |     "\n",
440 |     "update_gp()\n",
441 |     "plot_things()"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": null,
447 |    "metadata": {},
448 |    "outputs": [],
449 |    "source": [
450 |     "for i in range(20):\n",
451 |     "    update_gp()\n",
452 |     "    \n",
453 |     "lyapunov.update_safe_set()\n",
454 |     "plot_things()"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": null,
460 |    "metadata": {},
461 |    "outputs": [],
462 |    "source": [
463 |     "plotting.show_graph(tf.get_default_graph())"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": null,
469 |    "metadata": {},
470 |    "outputs": [],
471 |    "source": []
472 |   }
473 |  ],
474 |  "metadata": {
475 |   "anaconda-cloud": {},
476 |   "kernelspec": {
477 |    "display_name": "Python 3",
478 |    "language": "python",
479 |    "name": "python3"
480 |   },
481 |   "language_info": {
482 |    "codemirror_mode": {
483 |     "name": "ipython",
484 |     "version": 3
485 |    },
486 |    "file_extension": ".py",
487 |    "mimetype": "text/x-python",
488 |    "name": "python",
489 |    "nbconvert_exporter": "python",
490 |    "pygments_lexer": "ipython3",
491 |    "version": "3.6.4"
492 |   }
493 |  },
494 |  "nbformat": 4,
495 |  "nbformat_minor": 2
496 | }
497 | 


--------------------------------------------------------------------------------
/safe_learning/utilities.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for plotting, function definitions, and GPs.
  3 | 
  4 | This file defines utilities needed for the experiments, such as creating
  5 | parameter grids, computing LQR controllers, Lyapunov functions, sample
  6 | functions of Gaussian processes, and plotting ellipses.
  7 | 
  8 | Author: Felix Berkenkamp, Learning & Adaptive Systems Group, ETH Zurich
  9 |         (GitHub: befelix)
 10 | """
 11 | 
 12 | from __future__ import absolute_import, division, print_function
 13 | 
 14 | import itertools
 15 | import inspect
 16 | from functools import wraps, partial
 17 | 
 18 | import numpy as np
 19 | import scipy.interpolate
 20 | import scipy.linalg
 21 | import tensorflow as tf
 22 | from future.builtins import zip, range
 23 | from future.backports import OrderedDict
 24 | 
 25 | from safe_learning import config
 26 | 
 27 | __all__ = ['combinations', 'linearly_spaced_combinations', 'lqr', 'dlqr',
 28 |            'ellipse_bounds', 'concatenate_inputs', 'make_tf_fun',
 29 |            'with_scope', 'use_parent_scope', 'add_weight_constraint',
 30 |            'batchify', 'get_storage', 'set_storage', 'unique_rows',
 31 |            'gradient_clipping']
 32 | 
 33 | 
 34 | _STORAGE = {}
 35 | 
 36 | 
 37 | def make_tf_fun(return_type, gradient=None, stateful=True):
 38 |     """Convert a python function to a tensorflow function.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     return_type : list
 43 |         A list of tensorflow return types. Needs to match with the gradient.
 44 |     gradient : callable, optional
 45 |         A function that provides the gradient. It takes `op` and one gradient
 46 |         per output of the function as inputs and returns one gradient for each
 47 |         input of the function. If stateful is `False` then tensorflow does not
 48 |         seem to compute gradients at all.
 49 | 
 50 |     Returns
 51 |     -------
 52 |     A tensorflow function with gradients registered.
 53 |     """
 54 |     def wrap(function):
 55 |         """Create a new function."""
 56 |         # Function name with stipped underscore (not allowed by tensorflow)
 57 |         name = function.__name__.lstrip('_')
 58 | 
 59 |         # Without gradients we can take the short route here
 60 |         if gradient is None:
 61 |             @wraps(function)
 62 |             def wrapped_function(self, *args, **kwargs):
 63 |                 method = partial(function, self, **kwargs)
 64 |                 return tf.py_func(method, args, return_type,
 65 |                                   stateful=stateful, name=name)
 66 | 
 67 |             return wrapped_function
 68 | 
 69 |         # Name for the gradient operation
 70 |         grad_name = name + '_gradient'
 71 | 
 72 |         @wraps(function)
 73 |         def wrapped_function(self, *args):
 74 |             # Overwrite the gradient
 75 |             graph = tf.get_default_graph()
 76 | 
 77 |             # Make sure the name we specify is unique
 78 |             unique_grad_name = graph.unique_name(grad_name)
 79 | 
 80 |             # Register the new gradient method with tensorflow
 81 |             tf.RegisterGradient(unique_grad_name)(gradient)
 82 | 
 83 |             # Remove self: Tensorflow does not allow for non-tensor inputs
 84 |             method = partial(function, self)
 85 | 
 86 |             with graph.gradient_override_map({"PyFunc": unique_grad_name}):
 87 |                 return tf.py_func(method, args, return_type,
 88 |                                   stateful=stateful, name=name)
 89 | 
 90 |         return wrapped_function
 91 |     return wrap
 92 | 
 93 | 
 94 | def with_scope(name):
 95 |     """Set the tensorflow scope for the function.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     name : string, optional
100 | 
101 |     Returns
102 |     -------
103 |     The tensorflow function with scope name.
104 |     """
105 |     def wrap(function):
106 |         @wraps(function)
107 |         def wrapped_function(*args, **kwargs):
108 |             with tf.name_scope(name):
109 |                 return function(*args, **kwargs)
110 |         return wrapped_function
111 |     return wrap
112 | 
113 | 
114 | def use_parent_scope(function):
115 |     """Use the parent scope for tensorflow."""
116 |     @wraps(function)
117 |     def wrapped_function(self, *args, **kwargs):
118 |         with tf.variable_scope(self.scope_name):
119 |             return function(self, *args, **kwargs)
120 |     return wrapped_function
121 | 
122 | 
123 | def concatenate_inputs(start=0):
124 |     """Concatenate the numpy array inputs to the functions.
125 | 
126 |     Parameters
127 |     ----------
128 |     start : int, optional
129 |         The attribute number at which to start concatenating.
130 |     """
131 |     def wrap(function):
132 |         @wraps(function)
133 |         def wrapped_function(*args, **kwargs):
134 |             """Concatenate the input arguments."""
135 |             nargs = len(args) - start
136 |             # Check for tensorflow objects
137 |             tf_objects = (tf.Tensor, tf.Variable)
138 |             if any(isinstance(arg, tf_objects) for arg in args[start:]):
139 |                 # reduce number of function calls in graph
140 |                 if nargs == 1:
141 |                     return function(*args, **kwargs)
142 |                 # concatenate extra arguments
143 |                 args = args[:start] + (tf.concat(args[start:], axis=1),)
144 |                 return function(*args, **kwargs)
145 |             else:
146 |                 # Map to 2D objects
147 |                 to_concatenate = map(np.atleast_2d, args[start:])
148 | 
149 |                 if nargs == 1:
150 |                     concatenated = tuple(to_concatenate)
151 |                 else:
152 |                     concatenated = (np.hstack(to_concatenate),)
153 | 
154 |                 args = args[:start] + concatenated
155 |                 return function(*args, **kwargs)
156 | 
157 |         return wrapped_function
158 | 
159 |     return wrap
160 | 
161 | 
162 | def add_weight_constraint(optimization, var_list, bound_list):
163 |     """Add weight constraints to an optimization step.
164 | 
165 |     Parameters
166 |     ----------
167 |     optimization : tf.Tensor
168 |         The optimization routine that updates the parameters.
169 |     var_list : list
170 |         A list of variables that should be bounded.
171 |     bound_list : list
172 |         A list of bounds (lower, upper) for each variable in var_list.
173 | 
174 |     Returns
175 |     -------
176 |     assign_operations : list
177 |         A list of assign operations that correspond to one step of the
178 |         constrained optimization.
179 |     """
180 |     with tf.control_dependencies([optimization]):
181 |         new_list = []
182 |         for var, bound in zip(var_list, bound_list):
183 |             clipped_var = tf.clip_by_value(var, bound[0], bound[1])
184 |             assign = tf.assign(var, clipped_var)
185 |             new_list.append(assign)
186 |     return new_list
187 | 
188 | 
189 | def gradient_clipping(optimizer, loss, var_list, limits):
190 |     """Clip the gradients for the optimization problem.
191 | 
192 |     Parameters
193 |     ----------
194 |     optimizer : instance of tensorflow optimizer
195 |     loss : tf.Tensor
196 |         The loss that we want to optimize.
197 |     var_list : tuple
198 |         A list of variables for which we want to compute gradients.
199 |     limits : tuple
200 |         A list of tuples with lower/upper bounds for each variable.
201 | 
202 |     Returns
203 |     -------
204 |     opt : tf.Tensor
205 |         One optimization step with clipped gradients.
206 | 
207 |     Examples
208 |     --------
209 |     >>> from safe_learning.utilities import gradient_clipping
210 |     >>> var = tf.Variable(1.)
211 |     >>> loss = tf.square(var - 1.)
212 |     >>> optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
213 |     >>> opt_loss = gradient_clipping(optimizer, loss, [var], [(-1, 1)])
214 |     """
215 |     gradients = optimizer.compute_gradients(loss, var_list=var_list)
216 | 
217 |     clipped_gradients = [(tf.clip_by_value(grad, low, up), var)
218 |                          for (grad, var), (low, up) in zip(gradients, limits)]
219 | 
220 |     # Return optimization step
221 |     return optimizer.apply_gradients(clipped_gradients)
222 | 
223 | 
224 | def batchify(arrays, batch_size):
225 |     """Yield the arrays in batches and in order.
226 | 
227 |     The last batch might be smaller than batch_size.
228 | 
229 |     Parameters
230 |     ----------
231 |     arrays : list of ndarray
232 |         The arrays that we want to convert to batches.
233 |     batch_size : int
234 |         The size of each individual batch.
235 |     """
236 |     if not isinstance(arrays, (list, tuple)):
237 |         arrays = (arrays,)
238 | 
239 |     # Iterate over array in batches
240 |     for i, i_next in zip(itertools.count(start=0, step=batch_size),
241 |                          itertools.count(start=batch_size, step=batch_size)):
242 | 
243 |         batches = [array[i:i_next] for array in arrays]
244 | 
245 |         # Break if there are no points left
246 |         if batches[0].size:
247 |             yield i, batches
248 |         else:
249 |             break
250 | 
251 | 
252 | def combinations(arrays):
253 |     """Return a single array with combinations of parameters.
254 | 
255 |     Parameters
256 |     ----------
257 |     arrays : list of np.array
258 | 
259 |     Returns
260 |     -------
261 |     array : np.array
262 |         An array that contains all combinations of the input arrays
263 |     """
264 |     return np.array(np.meshgrid(*arrays)).T.reshape(-1, len(arrays))
265 | 
266 | 
267 | def linearly_spaced_combinations(bounds, num_samples):
268 |     """
269 |     Return 2-D array with all linearly spaced combinations with the bounds.
270 | 
271 |     Parameters
272 |     ----------
273 |     bounds : sequence of tuples
274 |         The bounds for the variables, [(x1_min, x1_max), (x2_min, x2_max), ...]
275 |     num_samples : integer or array_likem
276 |         Number of samples to use for every dimension. Can be a constant if
277 |         the same number should be used for all, or an array to fine-tune
278 |         precision. Total number of data points is num_samples ** len(bounds).
279 | 
280 |     Returns
281 |     -------
282 |     combinations : 2-d array
283 |         A 2-d arrray. If d = len(bounds) and l = prod(num_samples) then it
284 |         is of size l x d, that is, every row contains one combination of
285 |         inputs.
286 |     """
287 |     bounds = np.atleast_2d(bounds)
288 |     num_vars = len(bounds)
289 |     num_samples = np.broadcast_to(num_samples, num_vars)
290 | 
291 |     # Create linearly spaced test inputs
292 |     inputs = [np.linspace(b[0], b[1], n) for b, n in zip(bounds,
293 |                                                          num_samples)]
294 | 
295 |     # Convert to 2-D array
296 |     return combinations(inputs)
297 | 
298 | 
299 | def lqr(a, b, q, r):
300 |     """Compute the continuous time LQR-controller.
301 | 
302 |     The optimal control input is `u = -k.dot(x)`.
303 | 
304 |     Parameters
305 |     ----------
306 |     a : np.array
307 |     b : np.array
308 |     q : np.array
309 |     r : np.array
310 | 
311 |     Returns
312 |     -------
313 |     k : np.array
314 |         Controller matrix
315 |     p : np.array
316 |         Cost to go matrix
317 |     """
318 |     a, b, q, r = map(np.atleast_2d, (a, b, q, r))
319 |     p = scipy.linalg.solve_continuous_are(a, b, q, r)
320 | 
321 |     # LQR gain
322 |     k = np.linalg.solve(r, b.T.dot(p))
323 | 
324 |     return k, p
325 | 
326 | 
327 | def dlqr(a, b, q, r):
328 |     """Compute the discrete-time LQR controller.
329 | 
330 |     The optimal control input is `u = -k.dot(x)`.
331 | 
332 |     Parameters
333 |     ----------
334 |     a : np.array
335 |     b : np.array
336 |     q : np.array
337 |     r : np.array
338 | 
339 |     Returns
340 |     -------
341 |     k : np.array
342 |         Controller matrix
343 |     p : np.array
344 |         Cost to go matrix
345 |     """
346 |     a, b, q, r = map(np.atleast_2d, (a, b, q, r))
347 |     p = scipy.linalg.solve_discrete_are(a, b, q, r)
348 | 
349 |     # LQR gain
350 |     # k = (b.T * p * b + r)^-1 * (b.T * p * a)
351 |     bp = b.T.dot(p)
352 |     tmp1 = bp.dot(b)
353 |     tmp1 += r
354 |     tmp2 = bp.dot(a)
355 |     k = np.linalg.solve(tmp1, tmp2)
356 | 
357 |     return k, p
358 | 
359 | 
360 | def ellipse_bounds(P, level, n=100):
361 |     """Compute the bounds of a 2D ellipse.
362 | 
363 |     The levelset of the ellipsoid is given by
364 |     level = x' P x. Given the coordinates of the first
365 |     dimension, this function computes the corresponding
366 |     lower and upper values of the second dimension and
367 |     removes any values of x0 that are outside of the ellipse.
368 | 
369 |     Parameters
370 |     ----------
371 |     P : np.array
372 |         The matrix of the ellipsoid
373 |     level : float
374 |         The value of the levelset
375 |     n : int
376 |         Number of data points
377 | 
378 |     Returns
379 |     -------
380 |     x : np.array
381 |         1D array of x positions of the ellipse
382 |     yu : np.array
383 |         The upper bound of the ellipse
384 |     yl : np.array
385 |         The lower bound of the ellipse
386 | 
387 |     Notes
388 |     -----
389 |     This can be used as
390 |     ```plt.fill_between(*ellipse_bounds(P, level))```
391 |     """
392 |     # Round up to multiple of 2
393 |     n += n % 2
394 | 
395 |     # Principal axes of ellipsoid
396 |     eigval, eigvec = np.linalg.eig(P)
397 |     eigvec *= np.sqrt(level / eigval)
398 | 
399 |     # set zero angle at maximum x
400 |     angle = np.linspace(0, 2 * np.pi, n)[:, None]
401 |     angle += np.arctan(eigvec[0, 1] / eigvec[0, 0])
402 | 
403 |     # Compute positions
404 |     pos = np.cos(angle) * eigvec[:, 0] + np.sin(angle) * eigvec[:, 1]
405 |     n /= 2
406 | 
407 |     # Return x-position (symmetric) and upper/lower bounds
408 |     return pos[:n, 0], pos[:n, 1], pos[:n - 1:-1, 1]
409 | 
410 | 
411 | def get_storage(dictionary, index=None):
412 |     """Get a unique storage point within a class method.
413 | 
414 |     Parameters
415 |     ----------
416 |     dictionary : dict
417 |         A dictionary used for storage.
418 |     index : hashable
419 |         An index under which to store the element. Needs to be hashable.
420 |         This is useful for functions which might be accessed with multiple
421 |         different arguments.
422 | 
423 |     Returns
424 |     -------
425 |     storage : OrderedDict
426 |         The storage object. Is None if no storage exists. Otherwise it
427 |         returns the OrderedDict that was previously put in the storage.
428 |     """
429 |     # Use function name as storage name
430 |     frame = inspect.currentframe()
431 |     storage_name = inspect.getframeinfo(frame.f_back).function
432 | 
433 |     storage = dictionary.get(storage_name)
434 | 
435 |     if index is None:
436 |         return storage
437 |     elif storage is not None:
438 |         # Return directly the indexed object
439 |         try:
440 |             return storage[index]
441 |         except KeyError:
442 |             pass
443 | 
444 | 
445 | def set_storage(dictionary, name_value, index=None):
446 |     """Set the storage point within a class method.
447 | 
448 |     Parameters
449 |     ----------
450 |     dictionary : dict
451 |     name_value : tuple
452 |         A list of tuples, where each tuple contains a string with the name
453 |         of the storage object and the corresponding value that is to be put
454 |         in storage. These are stored as OrderedDicts.
455 |     index : hashable
456 |         An index under which to store the element. Needs to be hashable.
457 |         This is useful for functions which might be accessed with multiple
458 |         different arguements.
459 |     """
460 |     # Use function name as storage name
461 |     frame = inspect.currentframe()
462 |     storage_name = inspect.getframeinfo(frame.f_back).function
463 | 
464 |     storage = OrderedDict(name_value)
465 |     if index is None:
466 |         dictionary[storage_name] = storage
467 |     else:
468 |         # Make sure the storage is initialized
469 |         if storage_name not in dictionary:
470 |             dictionary[storage_name] = {}
471 |         # Set the indexed storage
472 |         dictionary[storage_name][index] = storage
473 | 
474 | 
475 | def get_feed_dict(graph):
476 |     """Return the global feed_dict used for this graph.
477 | 
478 |     Parameters
479 |     ----------
480 |     graph : tf.Graph
481 | 
482 |     Returns
483 |     -------
484 |     feed_dict : dict
485 |         The feed_dict for this graph.
486 |     """
487 |     try:
488 |         # Just return the feed_dict
489 |         return graph.feed_dict_sl
490 |     except AttributeError:
491 |         # Create a new feed_dict for this graph
492 |         graph.feed_dict_sl = {}
493 |         return graph.feed_dict_sl
494 | 
495 | 
496 | def unique_rows(array):
497 |     """Return the unique rows of the array.
498 | 
499 |     Parameters
500 |     ----------
501 |     array : ndarray
502 |         A 2D numpy array.
503 | 
504 |     Returns
505 |     -------
506 |     unique_array : ndarray
507 |         A 2D numpy array that contains all the unique rows of array.
508 |     """
509 |     array = np.ascontiguousarray(array)
510 |     # Combine all the rows into a single element of the flexible void datatype
511 |     dtype = np.dtype((np.void, array.dtype.itemsize * array.shape[1]))
512 |     combined_array = array.view(dtype=dtype)
513 |     # Get all the unique rows of the combined array
514 |     _, idx = np.unique(combined_array, return_index=True)
515 | 
516 |     return array[idx]
517 | 
518 | 
519 | def compute_trajectory(dynamics, policy, initial_state, num_steps):
520 |     """Compute a state trajectory given dynamics and a policy.
521 | 
522 |     Parameters
523 |     ----------
524 |     dynamics : callable
525 |         A function that takes the current state and action as input and returns
526 |         the next state.
527 |     policy : callable
528 |         A function that takes the current state as input and returns the
529 |         action.
530 |     initial_state : Tensor or ndarray
531 |         The initial state at which to start simulating.
532 |     num_steps : int
533 |         The number of steps for which to simulate the system.
534 | 
535 |     Returns
536 |     -------
537 |     states : ndarray
538 |         A (num_steps x state_dim) array with one state on each row.
539 |     actions : ndarray
540 |         A (num_steps x action_dim) array with the corresponding action on each
541 |         row.
542 |     """
543 |     initial_state = np.atleast_2d(initial_state)
544 |     state_dim = initial_state.shape[1]
545 | 
546 |     # Get storage (indexed by dynamics and policy)
547 |     index = (dynamics, policy)
548 |     storage = get_storage(_STORAGE, index=index)
549 | 
550 |     if storage is None:
551 |         # Compute next state under the policy
552 |         tf_state = tf.placeholder(config.dtype, [1, state_dim])
553 |         tf_action = policy(tf_state)
554 |         tf_next_state = dynamics(tf_state, tf_action)
555 | 
556 |         storage = [('tf_state', tf_state),
557 |                    ('tf_action', tf_action),
558 |                    ('tf_next_state', tf_next_state)]
559 | 
560 |         set_storage(_STORAGE, storage, index=index)
561 |     else:
562 |         tf_state, tf_action, tf_next_state = storage.values()
563 | 
564 |     # Initialize
565 |     dtype = config.np_dtype
566 |     states = np.empty((num_steps, state_dim), dtype=dtype)
567 |     actions = np.empty((num_steps - 1, policy.output_dim), dtype=dtype)
568 | 
569 |     states[0, :] = initial_state
570 | 
571 |     # Get the feed dict
572 |     session = tf.get_default_session()
573 |     feed_dict = get_feed_dict(session.graph)
574 | 
575 |     next_data = [tf_next_state, tf_action]
576 | 
577 |     # Run simulation
578 |     for i in range(num_steps - 1):
579 |         feed_dict[tf_state] = states[[i], :]
580 |         states[i + 1, :], actions[i, :] = session.run(next_data,
581 |                                                       feed_dict=feed_dict)
582 | 
583 |     return states, actions
584 | 


--------------------------------------------------------------------------------
/examples/inverted_pendulum.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from __future__ import division, print_function\n",
 10 |     "\n",
 11 |     "from functools import partial\n",
 12 |     "\n",
 13 |     "import gpflow\n",
 14 |     "import tensorflow as tf\n",
 15 |     "import numpy as np\n",
 16 |     "import tensorflow as tf\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "from scipy import signal, linalg\n",
 19 |     "\n",
 20 |     "# Nice progress bars\n",
 21 |     "try:\n",
 22 |     "    from tqdm import tqdm\n",
 23 |     "except ImportError:\n",
 24 |     "    tqdm = lambda x: x\n",
 25 |     "\n",
 26 |     "import safe_learning\n",
 27 |     "import plotting\n",
 28 |     "from utilities import InvertedPendulum\n",
 29 |     "\n",
 30 |     "%matplotlib inline\n",
 31 |     "\n",
 32 |     "# Open a new session (close old one if exists)\n",
 33 |     "try:\n",
 34 |     "    session.close()\n",
 35 |     "except NameError:\n",
 36 |     "    pass\n",
 37 |     "\n",
 38 |     "session = tf.InteractiveSession()\n",
 39 |     "session.run(tf.global_variables_initializer())"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "# Define underlying dynamic system and costs/rewards\n",
 47 |     "Define the dynamics of the true and false system\n"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "n = 2\n",
 57 |     "m = 1\n",
 58 |     "\n",
 59 |     "# 'Wrong' model parameters\n",
 60 |     "mass = 0.1\n",
 61 |     "friction = 0.\n",
 62 |     "length = 0.5\n",
 63 |     "gravity = 9.81\n",
 64 |     "inertia = mass * length ** 2\n",
 65 |     "\n",
 66 |     "# True model parameters\n",
 67 |     "true_mass = 0.15\n",
 68 |     "true_friction = 0.1\n",
 69 |     "true_length = length\n",
 70 |     "true_inertia = true_mass * true_length ** 2\n",
 71 |     "\n",
 72 |     "# Input saturation\n",
 73 |     "x_max = np.deg2rad(30)\n",
 74 |     "u_max = gravity * true_mass * true_length * np.sin(x_max)\n",
 75 |     "\n",
 76 |     "# Normalization\n",
 77 |     "norm_state = np.array([x_max, np.sqrt(gravity / length)])\n",
 78 |     "norm_action = np.array([u_max])\n",
 79 |     "\n",
 80 |     "# Corresponding dynamic systems\n",
 81 |     "true_dynamics = InvertedPendulum(mass=true_mass, length=true_length, friction=true_friction,\n",
 82 |     "                                 normalization=(norm_state, norm_action))\n",
 83 |     "\n",
 84 |     "wrong_pendulum = InvertedPendulum(mass=mass, length=length, friction=friction,\n",
 85 |     "                                  normalization=(norm_state, norm_action))\n",
 86 |     "\n",
 87 |     "# LQR cost matrices\n",
 88 |     "q = 1 * np.diag([1., 2.])\n",
 89 |     "r = 1.2 * np.array([[1]], dtype=safe_learning.config.np_dtype)\n",
 90 |     "\n",
 91 |     "# Quadratic (LQR) reward function\n",
 92 |     "gamma = 0.98\n",
 93 |     "reward_function = safe_learning.QuadraticFunction(linalg.block_diag(-q, -r))"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "# Set up a discretization for safety verification"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "# x_min, x_max, discretization\\\n",
110 |     "state_limits = np.array([[-2., 2.], [-1.5, 1.5]])\n",
111 |     "action_limits = np.array([[-1, 1]])\n",
112 |     "num_states = [2001, 1501]\n",
113 |     "\n",
114 |     "safety_disc = safe_learning.GridWorld(state_limits, num_states)\n",
115 |     "policy_disc = safe_learning.GridWorld(state_limits, [55, 55])\n",
116 |     "\n",
117 |     "# Discretization constant\n",
118 |     "tau = np.min(safety_disc.unit_maxes)\n",
119 |     "\n",
120 |     "print('Grid size: {0}'.format(safety_disc.nindex))"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "# Define the GP dynamics model\n",
128 |     "\n",
129 |     "We use a combination of kernels to model the errors in the dynamics"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "A, B = wrong_pendulum.linearize()\n",
139 |     "lipschitz_dynamics = 1\n",
140 |     "\n",
141 |     "noise_var = 0.001 ** 2\n",
142 |     "\n",
143 |     "m_true = np.hstack((true_dynamics.linearize()))\n",
144 |     "m = np.hstack((A, B))\n",
145 |     "\n",
146 |     "variances = (m_true - m) ** 2\n",
147 |     "\n",
148 |     "# Make sure things remain \n",
149 |     "np.clip(variances, 1e-5, None, out=variances)\n",
150 |     "\n",
151 |     "# Kernels\n",
152 |     "kernel1 = (gpflow.kernels.Linear(3, variance=variances[0, :], ARD=True)\n",
153 |     "           + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n",
154 |     "           * gpflow.kernels.Linear(1, variance=variances[0, 1]))\n",
155 |     "\n",
156 |     "kernel2 = (gpflow.kernels.Linear(3, variance=variances[1, :], ARD=True)\n",
157 |     "           + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n",
158 |     "           * gpflow.kernels.Linear(1, variance=variances[1, 1]))\n",
159 |     "\n",
160 |     "# Mean dynamics\n",
161 |     "\n",
162 |     "mean_dynamics = safe_learning.LinearSystem((A, B), name='mean_dynamics')\n",
163 |     "mean_function1 = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_1')\n",
164 |     "mean_function2 = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_2')\n",
165 |     "\n",
166 |     "# Define a GP model over the dynamics\n",
167 |     "gp1 = gpflow.gpr.GPR(np.empty((0, 3), dtype=safe_learning.config.np_dtype),\n",
168 |     "                    np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n",
169 |     "                    kernel1,\n",
170 |     "                    mean_function=mean_function1)\n",
171 |     "gp1.likelihood.variance = noise_var\n",
172 |     "\n",
173 |     "gp2 = gpflow.gpr.GPR(np.empty((0, 3), dtype=safe_learning.config.np_dtype),\n",
174 |     "                    np.empty((0, 1), dtype=safe_learning.config.np_dtype),\n",
175 |     "                    kernel2,\n",
176 |     "                    mean_function=mean_function2)\n",
177 |     "gp2.likelihood.variance = noise_var\n",
178 |     "\n",
179 |     "gp1_fun = safe_learning.GaussianProcess(gp1)\n",
180 |     "gp2_fun = safe_learning.GaussianProcess(gp2)\n",
181 |     "\n",
182 |     "dynamics = safe_learning.FunctionStack((gp1_fun, gp2_fun))"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "# Compute the optimal policy for the linear (and wrong) mean dynamics\n",
192 |     "k, s = safe_learning.utilities.dlqr(A, B, q, r)\n",
193 |     "init_policy = safe_learning.LinearSystem((-k), name='initial_policy')\n",
194 |     "init_policy = safe_learning.Saturation(init_policy, -1, 1)\n",
195 |     "\n",
196 |     "# Define the Lyapunov function corresponding to the initial policy\n",
197 |     "init_lyapunov = safe_learning.QuadraticFunction(s)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "# Set up the dynamic programming problem"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "# Define a neural network policy\n",
214 |     "relu = tf.nn.relu\n",
215 |     "policy = safe_learning.NeuralNetwork(layers=[32, 32, 1],\n",
216 |     "                                     nonlinearities=[relu, relu, tf.nn.tanh],\n",
217 |     "                                     output_scale=action_limits[0, 1])\n",
218 |     "\n",
219 |     "# Define value function approximation\n",
220 |     "value_function = safe_learning.Triangulation(policy_disc,\n",
221 |     "                                             -init_lyapunov(policy_disc.all_points).eval(),\n",
222 |     "                                             project=True)\n",
223 |     "\n",
224 |     "# Define policy optimization problem\n",
225 |     "rl = safe_learning.PolicyIteration(\n",
226 |     "    policy,\n",
227 |     "    dynamics,\n",
228 |     "    reward_function,\n",
229 |     "    value_function,\n",
230 |     "    gamma=gamma)\n",
231 |     "    \n",
232 |     "\n",
233 |     "with tf.name_scope('rl_mean_optimization'):\n",
234 |     "    rl_opt_value_function = rl.optimize_value_function()\n",
235 |     "    \n",
236 |     "    # Placeholder for states\n",
237 |     "    tf_states_mean = tf.placeholder(safe_learning.config.dtype, [None, 2])\n",
238 |     "    \n",
239 |     "    # Optimize for expected gain\n",
240 |     "    values = rl.future_values(tf_states_mean)\n",
241 |     "    policy_loss = -tf.reduce_mean(values)\n",
242 |     "    \n",
243 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)\n",
244 |     "    adapt_policy_mean = optimizer.minimize(policy_loss, var_list=rl.policy.parameters)\n"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": [
253 |     "# Start the session\n",
254 |     "session.run(tf.global_variables_initializer())"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "### Run initial dynamic programming for the mean dynamics"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "for i in tqdm(range(3000)):\n",
271 |     "    \n",
272 |     "    # select random training batches\n",
273 |     "    rl.feed_dict[tf_states_mean] = policy_disc.sample_continuous(1000)\n",
274 |     "\n",
275 |     "    session.run(adapt_policy_mean, feed_dict=rl.feed_dict)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "# Define the Lyapunov function\n",
283 |     "\n",
284 |     "Here we use the fact that the optimal value function is a Lyapunov function for the optimal policy if the dynamics are deterministic. As uncertainty about the dynamics decreases, the value function for the mean dynamics will thus converge to a Lyapunov function."
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {},
291 |    "outputs": [],
292 |    "source": [
293 |     "lyapunov_function = -rl.value_function\n",
294 |     "lipschitz_lyapunov = lambda x: tf.reduce_max(tf.abs(rl.value_function.gradient(x)),\n",
295 |     "                                             axis=1, keepdims=True)\n",
296 |     "\n",
297 |     "lipschitz_policy = lambda x: policy.lipschitz() \n",
298 |     "\n",
299 |     "a_true, b_true = true_dynamics.linearize()\n",
300 |     "lipschitz_dynamics = lambda x: np.max(np.abs(a_true)) + np.max(np.abs(b_true)) * lipschitz_policy(x)\n",
301 |     "\n",
302 |     "# Lyapunov function definitial\n",
303 |     "lyapunov = safe_learning.Lyapunov(safety_disc,\n",
304 |     "                                  lyapunov_function,\n",
305 |     "                                  dynamics,\n",
306 |     "                                  lipschitz_dynamics,\n",
307 |     "                                  lipschitz_lyapunov,\n",
308 |     "                                  tau,\n",
309 |     "                                  policy=rl.policy,\n",
310 |     "                                  initial_set=None)\n",
311 |     "\n",
312 |     "# Set initial safe set (level set) based on initial Lyapunov candidate\n",
313 |     "values = init_lyapunov(safety_disc.all_points).eval()\n",
314 |     "cutoff = np.max(values) * 0.005\n",
315 |     "\n",
316 |     "lyapunov.initial_safe_set = np.squeeze(values, axis=1) <= cutoff"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "def plot_safe_set(lyapunov, show=True):\n",
326 |     "    \"\"\"Plot the safe set for a given Lyapunov function.\"\"\"\n",
327 |     "    plt.imshow(lyapunov.safe_set.reshape(num_states).T,\n",
328 |     "               origin='lower',\n",
329 |     "               extent=lyapunov.discretization.limits.ravel(),\n",
330 |     "               vmin=0,\n",
331 |     "               vmax=1)\n",
332 |     "    \n",
333 |     "    if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):\n",
334 |     "        X = lyapunov.dynamics.functions[0].X\n",
335 |     "        plt.plot(X[:, 0], X[:, 1], 'rx')\n",
336 |     "    \n",
337 |     "    plt.title('safe set')\n",
338 |     "    plt.colorbar()\n",
339 |     "    if show:\n",
340 |     "        plt.show()\n",
341 |     "    \n",
342 |     "lyapunov.update_safe_set()\n",
343 |     "plot_safe_set(lyapunov)"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "## Safe policy update\n",
351 |     "\n",
352 |     "We do dynamic programming, but enfore the decrease condition on the Lyapunov function using a Lagrange multiplier"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "with tf.name_scope('policy_optimization'):\n",
362 |     "    \n",
363 |     "    # Placeholder for states\n",
364 |     "    tf_states = tf.placeholder(safe_learning.config.dtype, [None, 2])\n",
365 |     "    \n",
366 |     "    # Add Lyapunov uncertainty (but only if safety-relevant)\n",
367 |     "    values = rl.future_values(tf_states, lyapunov=lyapunov)\n",
368 |     "    \n",
369 |     "    policy_loss = -tf.reduce_mean(values)\n",
370 |     "    \n",
371 |     "\n",
372 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)\n",
373 |     "    adapt_policy = optimizer.minimize(policy_loss, var_list=rl.policy.parameters)\n",
374 |     "    \n",
375 |     "    \n",
376 |     "def rl_optimize_policy(num_iter):\n",
377 |     "    # Optimize value function\n",
378 |     "    session.run(rl_opt_value_function, feed_dict=rl.feed_dict)\n",
379 |     "\n",
380 |     "    # select random training batches\n",
381 |     "    for i in tqdm(range(num_iter)):\n",
382 |     "        rl.feed_dict[tf_states] = lyapunov.discretization.sample_continuous(1000)\n",
383 |     "\n",
384 |     "        session.run(adapt_policy, feed_dict=rl.feed_dict)"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "metadata": {},
390 |    "source": [
391 |     "# Exploration\n",
392 |     "\n",
393 |     "We explore close to the current policy by sampling the most uncertain state that does not leave the current level set"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": null,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "action_variation = np.array([[-0.02], [0.], [0.02]], dtype=safe_learning.config.np_dtype)\n",
403 |     "\n",
404 |     "\n",
405 |     "with tf.name_scope('add_new_measurement'):\n",
406 |     "        action_dim = lyapunov.policy.output_dim\n",
407 |     "        tf_max_state_action = tf.placeholder(safe_learning.config.dtype,\n",
408 |     "                                             shape=[1, safety_disc.ndim + action_dim])\n",
409 |     "        tf_measurement = true_dynamics(tf_max_state_action)\n",
410 |     "        \n",
411 |     "def update_gp():\n",
412 |     "    \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n",
413 |     "    # Get a new sample location\n",
414 |     "    max_state_action, _ = safe_learning.get_safe_sample(lyapunov,\n",
415 |     "                                                        action_variation,\n",
416 |     "                                                        action_limits,\n",
417 |     "                                                        num_samples=1000)\n",
418 |     "\n",
419 |     "    # Obtain a measurement of the true dynamics\n",
420 |     "    lyapunov.feed_dict[tf_max_state_action] = max_state_action\n",
421 |     "    measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n",
422 |     "\n",
423 |     "    # Add the measurement to our GP dynamics\n",
424 |     "    lyapunov.dynamics.add_data_point(max_state_action, measurement)\n",
425 |     "    "
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "markdown",
430 |    "metadata": {},
431 |    "source": [
432 |     "# Run the optimization"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": null,
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": [
441 |     "# lyapunov.update_safe_set()\n",
442 |     "rl_optimize_policy(num_iter=200)\n",
443 |     "rl_optimize_policy(num_iter=200)\n",
444 |     "\n",
445 |     "lyapunov.update_safe_set()\n",
446 |     "plot_safe_set(lyapunov)"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": null,
452 |    "metadata": {},
453 |    "outputs": [],
454 |    "source": [
455 |     "for i in range(5):\n",
456 |     "    print('iteration {} with c_max: {}'.format(i, lyapunov.feed_dict[lyapunov.c_max]))\n",
457 |     "    for i in tqdm(range(10)):\n",
458 |     "        update_gp()\n",
459 |     "    \n",
460 |     "    rl_optimize_policy(num_iter=200)\n",
461 |     "    lyapunov.update_values()\n",
462 |     "    \n",
463 |     "    # Update safe set and plot\n",
464 |     "    lyapunov.update_safe_set()\n",
465 |     "    plot_safe_set(lyapunov)    "
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "markdown",
470 |    "metadata": {},
471 |    "source": [
472 |     "# Plot trajectories and analyse improvement"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": null,
478 |    "metadata": {},
479 |    "outputs": [],
480 |    "source": [
481 |     "x0 = np.array([[1., -.5]])\n",
482 |     "\n",
483 |     "states_new, actions_new = safe_learning.utilities.compute_trajectory(true_dynamics, rl.policy, x0, 100)\n",
484 |     "states_old, actions_old = safe_learning.utilities.compute_trajectory(true_dynamics, init_policy, x0, 100)\n",
485 |     "\n",
486 |     "t = np.arange(len(states_new)) * true_dynamics.dt"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": null,
492 |    "metadata": {},
493 |    "outputs": [],
494 |    "source": [
495 |     "plt.plot(t, states_new[:, 0], label='new')\n",
496 |     "plt.plot(t, states_old[:, 0], label='old')\n",
497 |     "plt.xlabel('time [s]')\n",
498 |     "plt.ylabel('angle [rad]')\n",
499 |     "plt.legend()\n",
500 |     "plt.show()\n",
501 |     "\n",
502 |     "plt.plot(t, states_new[:, 1], label='new')\n",
503 |     "plt.plot(t, states_old[:, 1], label='old')\n",
504 |     "plt.xlabel('time [s]')\n",
505 |     "plt.ylabel('angular velocity [rad/s]')\n",
506 |     "plt.legend()\n",
507 |     "plt.show()"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "code",
512 |    "execution_count": null,
513 |    "metadata": {},
514 |    "outputs": [],
515 |    "source": [
516 |     "plt.plot(t[:-1], actions_new, label='new')\n",
517 |     "plt.plot(t[:-1], actions_old, label='old')\n",
518 |     "plt.xlabel('time [s]')\n",
519 |     "plt.ylabel('actions')\n",
520 |     "plt.legend()"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": null,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "print('reward old:', tf.reduce_sum(rl.reward_function(states_old[:-1], actions_old)).eval(feed_dict=rl.feed_dict))\n",
530 |     "print('reward new:', tf.reduce_sum(rl.reward_function(states_new[:-1], actions_new)).eval(feed_dict=rl.feed_dict))"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": []
539 |   }
540 |  ],
541 |  "metadata": {
542 |   "anaconda-cloud": {},
543 |   "kernelspec": {
544 |    "display_name": "Python 3",
545 |    "language": "python",
546 |    "name": "python3"
547 |   },
548 |   "language_info": {
549 |    "codemirror_mode": {
550 |     "name": "ipython",
551 |     "version": 3
552 |    },
553 |    "file_extension": ".py",
554 |    "mimetype": "text/x-python",
555 |    "name": "python",
556 |    "nbconvert_exporter": "python",
557 |    "pygments_lexer": "ipython3",
558 |    "version": "3.6.4"
559 |   }
560 |  },
561 |  "nbformat": 4,
562 |  "nbformat_minor": 2
563 | }
564 | 


--------------------------------------------------------------------------------
/examples/reinforcement_learning_cartpole.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reinforcement Learning for the Cart-Pole\n",
  8 |     "\n",
  9 |     "Perform approximate policy iteration in an actor-critic framework for the cart-pole (i.e., inverted pendulum on a cart)."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from __future__ import division, print_function\n",
 19 |     "\n",
 20 |     "import numpy as np\n",
 21 |     "import tensorflow as tf\n",
 22 |     "import gpflow\n",
 23 |     "import safe_learning\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import time\n",
 26 |     "import os\n",
 27 |     "\n",
 28 |     "from matplotlib.colors import ListedColormap\n",
 29 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 30 |     "from scipy.linalg import block_diag\n",
 31 |     "from utilities import CartPole, compute_closedloop_response, get_parameter_change, find_nearest, reward_rollout, compute_roa, binary_cmap\n",
 32 |     "\n",
 33 |     "# Nice progress bars\n",
 34 |     "try:\n",
 35 |     "    from tqdm import tqdm\n",
 36 |     "except ImportError:\n",
 37 |     "    tqdm = lambda x: x\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## User Options"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "class Options(object):\n",
 54 |     "    def __init__(self, **kwargs):\n",
 55 |     "        super(Options, self).__init__()\n",
 56 |     "        self.__dict__.update(kwargs)\n",
 57 |     "\n",
 58 |     "OPTIONS = Options(np_dtype              = safe_learning.config.np_dtype,\n",
 59 |     "                  tf_dtype              = safe_learning.config.dtype,\n",
 60 |     "                  saturate              = True,                            # apply saturation constraints to the control input\n",
 61 |     "                  eps                   = 1e-8,                            # numerical tolerance\n",
 62 |     "                  use_linear_dynamics   = False,                           # use the linearized form of the dynamics as the true dynamics (for testing)\n",
 63 |     "                  dpi                   = 200,\n",
 64 |     "                  num_cores             = 4,\n",
 65 |     "                  num_sockets           = 1)\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "## TensorFlow Session\n",
 73 |     "\n",
 74 |     "Customize the TensorFlow session for the current device."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "os.environ[\"KMP_BLOCKTIME\"]    = str(0)\n",
 84 |     "os.environ[\"KMP_SETTINGS\"]     = str(1)\n",
 85 |     "os.environ[\"KMP_AFFINITY\"]     = 'granularity=fine,noverbose,compact,1,0'\n",
 86 |     "os.environ[\"OMP_NUM_THREADS\"]  = str(OPTIONS.num_cores)\n",
 87 |     "\n",
 88 |     "config = tf.ConfigProto(intra_op_parallelism_threads  = OPTIONS.num_cores,\n",
 89 |     "                        inter_op_parallelism_threads  = OPTIONS.num_sockets,\n",
 90 |     "                        allow_soft_placement          = False,\n",
 91 |     "                        device_count                  = {'CPU': OPTIONS.num_cores})\n",
 92 |     "\n",
 93 |     "try:\n",
 94 |     "    session.close()\n",
 95 |     "except NameError:\n",
 96 |     "    pass\n",
 97 |     "session = tf.InteractiveSession(config=config)\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## Dynamics\n",
105 |     "\n",
106 |     "Define the nonlinear and linearized forms of the inverted pendulum dynamics."
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "# Constants\n",
116 |     "dt = 0.01   # sampling time\n",
117 |     "g = 9.81    # gravity\n",
118 |     "\n",
119 |     "# System parameters\n",
120 |     "m = 0.175    # pendulum mass\n",
121 |     "M = 1.732    # cart mass\n",
122 |     "L = 0.28     # pole length\n",
123 |     "b = 0.01     # rotational friction\n",
124 |     "\n",
125 |     "# State and action normalizers\n",
126 |     "x_max         = 0.5                                 # linear position [m]\n",
127 |     "theta_max     = np.deg2rad(30)                      # angular position [rad]\n",
128 |     "x_dot_max     = 2                                   # linear velocity [m/s]\n",
129 |     "theta_dot_max = np.deg2rad(30)                      # angular velocity [rad/s]\n",
130 |     "u_max         = (m + M) * (x_dot_max ** 2) / x_max  # linear force [N], control action\n",
131 |     "\n",
132 |     "state_norm = (x_max, theta_max, x_dot_max, theta_dot_max)\n",
133 |     "action_norm = (u_max,)\n",
134 |     "\n",
135 |     "# Dimensions and domains\n",
136 |     "state_dim     = 4\n",
137 |     "action_dim    = 1\n",
138 |     "state_limits  = np.array([[-1., 1.]] * state_dim)\n",
139 |     "action_limits = np.array([[-1., 1.]] * action_dim)\n",
140 |     "\n",
141 |     "# Initialize system class and its linearization\n",
142 |     "cartpole = CartPole(m, M, L, b, dt, [state_norm, action_norm])\n",
143 |     "A, B = cartpole.linearize()\n",
144 |     "\n",
145 |     "if OPTIONS.use_linear_dynamics:\n",
146 |     "    dynamics = safe_learning.functions.LinearSystem((A, B), name='dynamics')\n",
147 |     "else:\n",
148 |     "    dynamics = cartpole.__call__\n",
149 |     "\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "## Reward Function\n",
157 |     "\n",
158 |     "Define a positive-definite reward function over the state-action space $\\mathcal{X} \\times \\mathcal{U}$."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "Q = 0.1 * np.identity(state_dim).astype(OPTIONS.np_dtype)     # state cost matrix\n",
168 |     "R = 0.1 * np.identity(action_dim).astype(OPTIONS.np_dtype)    # action cost matrix\n",
169 |     "\n",
170 |     "# Quadratic reward (- cost) function\n",
171 |     "reward_function = safe_learning.QuadraticFunction(block_diag(- Q, - R), name='reward_function')\n"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "## Parametric Policy and Value Function\n",
179 |     "\n",
180 |     "Define a parametric value function $V_{\\bf \\theta} : \\mathcal{X} \\to \\mathbb{R}$ and policy $\\pi_{\\bf \\delta} : \\mathcal{X} \\to \\mathcal{U}$ as neural networks."
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "# Policy\n",
190 |     "layer_dims = [64, 64, action_dim]\n",
191 |     "activations = [tf.nn.relu, tf.nn.relu, None]\n",
192 |     "if OPTIONS.saturate:\n",
193 |     "    activations[-1] = tf.nn.tanh\n",
194 |     "policy = safe_learning.functions.NeuralNetwork(layer_dims, activations, name='policy', use_bias=False)\n",
195 |     "\n",
196 |     "# Value function\n",
197 |     "layer_dims = [64, 64, 1]\n",
198 |     "activations = [tf.nn.relu, tf.nn.relu, None]\n",
199 |     "value_function = safe_learning.functions.NeuralNetwork(layer_dims, activations, name='value_function', use_bias=False)\n"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "## LQR Policy\n",
207 |     "\n",
208 |     "We compare our results to the LQR solution for the linearized system later."
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "K, P = safe_learning.utilities.dlqr(A, B, Q, R)\n",
218 |     "policy_lqr = safe_learning.functions.LinearSystem((-K, ), name='policy_lqr')\n",
219 |     "if OPTIONS.saturate:\n",
220 |     "    policy_lqr = safe_learning.Saturation(policy_lqr, -1, 1)\n"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "## TensorFlow Graph"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "# Use parametric policy and value function\n",
237 |     "states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')\n",
238 |     "actions = policy(states)\n",
239 |     "rewards = reward_function(states, actions)\n",
240 |     "values = value_function(states)\n",
241 |     "future_states = dynamics(states, actions)\n",
242 |     "future_values = value_function(future_states)\n",
243 |     "\n",
244 |     "# Compare with LQR solution, possibly with saturation constraints\n",
245 |     "actions_lqr = policy_lqr(states)\n",
246 |     "rewards_lqr = reward_function(states, actions_lqr)\n",
247 |     "future_states_lqr = dynamics(states, actions_lqr)\n",
248 |     "\n",
249 |     "# Discount factor and scaling\n",
250 |     "max_state = np.ones((1, state_dim))\n",
251 |     "max_action = np.ones((1, action_dim))\n",
252 |     "r_max = np.linalg.multi_dot((max_state, Q, max_state.T)) + np.linalg.multi_dot((max_action, R, max_action.T))\n",
253 |     "gamma = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='discount_factor')\n",
254 |     "\n",
255 |     "val_scaling = 1 / r_max.ravel()\n",
256 |     "pol_scaling = (1 - gamma) / r_max.ravel()\n",
257 |     "\n",
258 |     "# Policy evaluation\n",
259 |     "with tf.name_scope('value_optimization'):\n",
260 |     "    value_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')\n",
261 |     "    target = tf.stop_gradient(rewards + gamma * future_values, name='target')\n",
262 |     "    value_objective = pol_scaling * tf.reduce_mean(tf.abs(values - target), name='objective')\n",
263 |     "    optimizer = tf.train.GradientDescentOptimizer(value_learning_rate)\n",
264 |     "    value_update = optimizer.minimize(value_objective, var_list=value_function.parameters)\n",
265 |     "\n",
266 |     "# Policy improvement\n",
267 |     "with tf.name_scope('policy_optimization'):\n",
268 |     "    policy_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')\n",
269 |     "    policy_objective = - pol_scaling * tf.reduce_mean(rewards + gamma * future_values, name='objective')\n",
270 |     "    optimizer = tf.train.GradientDescentOptimizer(policy_learning_rate)\n",
271 |     "    policy_update = optimizer.minimize(policy_objective, var_list=policy.parameters)\n",
272 |     "    \n",
273 |     "# Sampling    \n",
274 |     "with tf.name_scope('state_sampler'):\n",
275 |     "    batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size')\n",
276 |     "    batch = tf.random_uniform([batch_size, state_dim], -1, 1, dtype=OPTIONS.tf_dtype, name='batch')\n"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "## Approximate Policy Iteration\n",
284 |     "\n",
285 |     "Train the policy $\\pi_{\\bf \\delta}$ and value function $V_{\\bf \\theta}$ in tandem with approximate policy iteration. Changing the discount factor strongly affects the results; a low discount factor encourages a well-behaved value function, while a high discount factor encourages the policy to yield a larger ROA. We compare $\\pi_{\\bf \\delta}$ to the LQR policy $\\pi$ with saturation constraints, and $V_{\\bf \\theta}$ to the LQR value function $V_\\pi$ and the value function $V_{\\pi_{\\bf \\delta}}$ induced by the parametric policy $\\pi_{\\bf \\delta}$. We compute $V_{\\pi_{\\bf \\delta}}$ as a rollout sum of discounted rewards at states in a state space discretization."
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "### Initialization"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "session.run(tf.global_variables_initializer())\n",
302 |     "\n",
303 |     "# Uniformly sampled test set\n",
304 |     "test_size = 1e3\n",
305 |     "test_set = batch.eval({batch_size: test_size})\n",
306 |     "\n",
307 |     "# Keep track of the test set loss and parameter changes during training\n",
308 |     "value_test_loss = []\n",
309 |     "value_param_changes = []\n",
310 |     "policy_test_loss = []\n",
311 |     "policy_param_changes = []\n"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "### Training"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": [
327 |     "# Training hyperparameters\n",
328 |     "max_iters    = 200\n",
329 |     "value_iters  = 100\n",
330 |     "policy_iters = 10\n",
331 |     "feed_dict = {\n",
332 |     "    states:                test_set,\n",
333 |     "    gamma:                 0.99,\n",
334 |     "    value_learning_rate:   0.2,\n",
335 |     "    policy_learning_rate:  0.5,\n",
336 |     "    batch_size:            1e2,\n",
337 |     "}\n",
338 |     "\n",
339 |     "old_value_params = session.run(value_function.parameters)\n",
340 |     "old_policy_params = session.run(policy.parameters)\n",
341 |     "\n",
342 |     "for i in tqdm(range(max_iters)):\n",
343 |     "    # Policy evaluation (value update)\n",
344 |     "    for _ in range(value_iters):\n",
345 |     "        feed_dict[states] = batch.eval(feed_dict)\n",
346 |     "        session.run(value_update, feed_dict)\n",
347 |     "    new_value_params = session.run(value_function.parameters)\n",
348 |     "    value_param_changes.append(get_parameter_change(old_value_params, new_value_params))\n",
349 |     "    old_value_params = new_value_params\n",
350 |     "\n",
351 |     "    # Policy improvement (policy update)\n",
352 |     "    for _ in range(policy_iters):\n",
353 |     "        feed_dict[states] = batch.eval(feed_dict)\n",
354 |     "        session.run(policy_update, feed_dict)\n",
355 |     "    new_policy_params = session.run(policy.parameters)\n",
356 |     "    policy_param_changes.append(get_parameter_change(old_policy_params, new_policy_params))\n",
357 |     "    old_policy_params = new_policy_params\n",
358 |     "    \n",
359 |     "    # Record objectives\n",
360 |     "    feed_dict[states] = test_set\n",
361 |     "    value_test_loss.append(value_objective.eval(feed_dict))\n",
362 |     "    policy_test_loss.append(policy_objective.eval(feed_dict))\n"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {},
368 |    "source": [
369 |     "### Training Results"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "fig, axes = plt.subplots(2, 2, figsize=(12, 5), dpi=OPTIONS.dpi)\n",
379 |     "fig.subplots_adjust(wspace=0.3, hspace=0.4)\n",
380 |     "\n",
381 |     "ax = axes[0,0]\n",
382 |     "ax.plot(value_test_loss, '.-r')\n",
383 |     "ax.set_xlabel(r'Policy iteration $k$')\n",
384 |     "ax.set_ylabel(r'test loss (policy evaluation)')\n",
385 |     "\n",
386 |     "ax = axes[0,1]\n",
387 |     "ax.plot(value_param_changes, '.-r')\n",
388 |     "ax.set_xlabel(r'Policy iteration $k$')\n",
389 |     "ax.set_ylabel(r'$||{\\bf \\theta}_k - {\\bf \\theta}_{k-1}||_\\infty$')\n",
390 |     "\n",
391 |     "ax = axes[1,0]\n",
392 |     "ax.plot(policy_test_loss, '.-b')\n",
393 |     "ax.set_xlabel(r'Policy iteration $k$')\n",
394 |     "ax.set_ylabel(r'test loss (policy improvement)')\n",
395 |     "\n",
396 |     "ax = axes[1,1]\n",
397 |     "ax.plot(policy_param_changes, '.-b')\n",
398 |     "ax.set_xlabel(r'Policy iteration $k$')\n",
399 |     "ax.set_ylabel(r'$||{\\bf \\delta}_k - {\\bf \\delta}_{k-1}||_\\infty$')\n",
400 |     "\n",
401 |     "plt.show()\n"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "## Estimated Value Functions and ROAs"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {},
415 |    "outputs": [],
416 |    "source": [
417 |     "# Number of states along each dimension\n",
418 |     "num_states = 51\n",
419 |     "\n",
420 |     "# State grid\n",
421 |     "grid_limits = np.array([[-1., 1.], ] * state_dim)\n",
422 |     "grid = safe_learning.GridWorld(grid_limits, num_states)\n",
423 |     "\n",
424 |     "# Estimate value functions and ROAs with rollout\n",
425 |     "roa_horizon     = 2000\n",
426 |     "rollout_horizon = 500\n",
427 |     "roa_tol         = 0.1\n",
428 |     "rollout_tol     = 0.01\n",
429 |     "discount        = feed_dict[gamma]  # use the same discount factor from training!\n",
430 |     "pivot_state     = np.asarray([0., 0., 0., 0.], dtype=OPTIONS.np_dtype)\n",
431 |     "\n",
432 |     "# Snap pivot_state to the closest grid point\n",
433 |     "pivot_index = np.zeros_like(pivot_state, dtype=int)\n",
434 |     "for d in range(grid.ndim):\n",
435 |     "    pivot_index[d], pivot_state[d] = find_nearest(grid.discrete_points[d], pivot_state[d])\n",
436 |     "\n",
437 |     "# Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to pivot_state\n",
438 |     "planes = [[1, 3], [0, 2]]\n",
439 |     "grid_slices = []\n",
440 |     "for p in planes:\n",
441 |     "    grid_slices.append(np.logical_and(grid.all_points[:, p[0]] == pivot_state[p[0]], \n",
442 |     "                                      grid.all_points[:, p[1]] == pivot_state[p[1]]).ravel())\n",
443 |     "\n",
444 |     "# LQR solution (\\pi and V_\\pi)\n",
445 |     "closed_loop_dynamics = lambda x: future_states_lqr.eval({states: x})\n",
446 |     "reward_eval          = lambda x: rewards_lqr.eval({states: x})\n",
447 |     "true_values          = [reward_rollout(grid.all_points[mask], closed_loop_dynamics, reward_eval, discount, rollout_horizon, rollout_tol) for mask in grid_slices]\n",
448 |     "true_roas            = [compute_roa(grid.all_points[mask], closed_loop_dynamics, roa_horizon, roa_tol) for mask in grid_slices]\n",
449 |     "\n",
450 |     "# Parametric policy's value function V_{\\pi_\\delta}\n",
451 |     "closed_loop_dynamics = lambda x: future_states.eval({states: x})\n",
452 |     "reward_eval          = lambda x: rewards.eval({states: x})\n",
453 |     "est_values           = [reward_rollout(grid.all_points[mask], closed_loop_dynamics, reward_eval, discount, rollout_horizon, rollout_tol) for mask in grid_slices]\n",
454 |     "est_roas             = [compute_roa(grid.all_points[mask], closed_loop_dynamics, roa_horizon, roa_tol) for mask in grid_slices]\n",
455 |     "\n",
456 |     "# Parametric value function V_\\theta\n",
457 |     "par_values = [values.eval({states: grid.all_points[mask]}) for mask in grid_slices]\n"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "markdown",
462 |    "metadata": {},
463 |    "source": [
464 |     "### Plotting"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": null,
470 |    "metadata": {},
471 |    "outputs": [],
472 |    "source": [
473 |     "planes = [[0, 2], [1, 3]]\n",
474 |     "norms = np.asarray([x_max, np.rad2deg(theta_max), x_dot_max, np.rad2deg(theta_dot_max)])\n",
475 |     "scaled_discrete_points = [norm * points for norm, points in zip(norms, grid.discrete_points)]\n",
476 |     "\n",
477 |     "fig = plt.figure(figsize=(12, 12), dpi=OPTIONS.dpi)\n",
478 |     "\n",
479 |     "for i, p in enumerate(planes):\n",
480 |     "    ax = fig.add_subplot(221 + i, projection='3d')\n",
481 |     "    if i == 0:\n",
482 |     "        ax.set_title(r'$\\theta = {:g}$'.format(pivot_state[1]) + r', $\\dot\\theta = {:g}$'.format(pivot_state[3]) + '\\n')\n",
483 |     "        ax.set_xlabel(r'$x$ [m]')\n",
484 |     "        ax.set_ylabel(r'$\\dot{x}$ [m/s]')\n",
485 |     "    else:\n",
486 |     "        ax.set_title(r'$x= {:g}$'.format(pivot_state[0]) + r', $\\dot x = {:g}$'.format(pivot_state[2]) + '\\n')\n",
487 |     "        ax.set_xlabel(r'$\\theta$ [deg]')\n",
488 |     "        ax.set_ylabel(r'$\\dot{\\theta}$ [deg/s]')\n",
489 |     "    ax.view_init(None, -45)\n",
490 |     "\n",
491 |     "    xx, yy = np.meshgrid(*[scaled_discrete_points[p[0]], scaled_discrete_points[p[1]]])\n",
492 |     "\n",
493 |     "    for j, (values, color) in enumerate(zip([true_values, est_values, par_values], [(0, 0, 1, 0.6), (0, 1, 0, 0.8), (1, 0, 0, 0.65)])):\n",
494 |     "        z = - values[i].reshape(grid.num_points[p])\n",
495 |     "        surf = ax.plot_surface(xx, yy, z, color=color)\n",
496 |     "        surf._facecolors2d = surf._facecolors3d\n",
497 |     "        surf._edgecolors2d = surf._edgecolors3d\n",
498 |     "    proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0, 0, 1, 0.6), (0, 1, 0, 0.8), (1, 0, 0, 0.65)]]    \n",
499 |     "    ax.legend(proxy, [r'$-V_{\\pi}({\\bf x})$', r'$-V_{\\pi_{\\bf \\delta}}({\\bf x})$', r'$-V_{\\bf \\theta}({\\bf x})$'])\n",
500 |     "\n",
501 |     "\n",
502 |     "for i, (p, mask) in enumerate(zip(planes, grid_slices)):\n",
503 |     "    ax = fig.add_subplot(223 + i, projection='3d')\n",
504 |     "    if i == 0:\n",
505 |     "        ax.set_title(r'$\\theta = {:g}$'.format(pivot_state[1]) + r', $\\dot\\theta = {:g}$'.format(pivot_state[3]) + '\\n')\n",
506 |     "        ax.set_xlabel(r'$x$ [m]')\n",
507 |     "        ax.set_ylabel(r'$\\dot{x}$ [m/s]') \n",
508 |     "    else:\n",
509 |     "        ax.set_title(r'$x= {:g}$'.format(pivot_state[0]) + r', $\\dot x = {:g}$'.format(pivot_state[2]) + '\\n')\n",
510 |     "        ax.set_xlabel(r'$\\theta$ [deg]')\n",
511 |     "        ax.set_ylabel(r'$\\dot{\\theta}$ [deg/s]')\n",
512 |     "    ax.view_init(None, -45)\n",
513 |     "    \n",
514 |     "    xx, yy = np.meshgrid(*[scaled_discrete_points[p[0]], scaled_discrete_points[p[1]]])\n",
515 |     "    acts = u_max * actions.eval({states: grid.all_points[mask]})\n",
516 |     "    true_acts = u_max * actions_lqr.eval({states: grid.all_points[mask]})\n",
517 |     "\n",
518 |     "    ax.plot_surface(xx, yy, true_acts.reshape(grid.num_points[p]), color='blue', alpha=0.55)\n",
519 |     "    ax.plot_surface(xx, yy, acts.reshape(grid.num_points[p]), color='red', alpha=0.75)\n",
520 |     "\n",
521 |     "    z = est_roas[i].reshape(grid.num_points[p])\n",
522 |     "    ax.contourf(xx, yy, z, cmap=binary_cmap('green', 0.65), zdir='z', offset=-u_max)\n",
523 |     "\n",
524 |     "    proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0, 0, 1, 0.6), (1, 0, 0, 0.65), (0., 1., 0., 0.65)]]\n",
525 |     "    ax.legend(proxy, [r'$\\pi({\\bf x})$ [N]', r'$\\pi_{\\bf \\delta}({\\bf x})$ [N]', r'ROA for $\\pi_{\\bf \\delta}$'])\n",
526 |     "\n",
527 |     "plt.show()\n"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "code",
532 |    "execution_count": null,
533 |    "metadata": {},
534 |    "outputs": [],
535 |    "source": []
536 |   }
537 |  ],
538 |  "metadata": {
539 |   "kernelspec": {
540 |    "display_name": "Python 3",
541 |    "language": "python",
542 |    "name": "python3"
543 |   },
544 |   "language_info": {
545 |    "codemirror_mode": {
546 |     "name": "ipython",
547 |     "version": 3
548 |    },
549 |    "file_extension": ".py",
550 |    "mimetype": "text/x-python",
551 |    "name": "python",
552 |    "nbconvert_exporter": "python",
553 |    "pygments_lexer": "ipython3",
554 |    "version": "3.6.4"
555 |   }
556 |  },
557 |  "nbformat": 4,
558 |  "nbformat_minor": 2
559 | }
560 | 


--------------------------------------------------------------------------------
/safe_learning/tests/test_functions.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for the functions file."""
  2 | 
  3 | from __future__ import division, print_function, absolute_import
  4 | 
  5 | from numpy.testing import assert_equal, assert_allclose
  6 | import pytest
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | from safe_learning.functions import (_Triangulation, Triangulation,
 11 |                                      ScipyDelaunay, GridWorld,
 12 |                                      PiecewiseConstant, DeterministicFunction,
 13 |                                      UncertainFunction, QuadraticFunction,
 14 |                                      DimensionError, GPRCached,
 15 |                                      GaussianProcess, NeuralNetwork)
 16 | from safe_learning.utilities import concatenate_inputs
 17 | 
 18 | try:
 19 |     import gpflow
 20 | except ImportError:
 21 |     gpflow = None
 22 | 
 23 | 
 24 | class TestFunction(object):
 25 |     """Test the function class."""
 26 | 
 27 |     @pytest.fixture(scope='class')
 28 |     def testing_class(self):
 29 |         class A(DeterministicFunction):
 30 |             def __init__(self, value, name='a'):
 31 |                 super(A, self).__init__()
 32 |                 with tf.variable_scope(self.scope_name):
 33 |                     self.variable = tf.Variable(value)
 34 |                     sess = tf.get_default_session()
 35 |                     sess.run(tf.variables_initializer([self.variable]))
 36 | 
 37 |             def build_evaluation(self, point):
 38 |                 return self.variable * point
 39 | 
 40 |         sess = tf.Session()
 41 |         return A, sess
 42 | 
 43 |     def test_class(self, testing_class):
 44 |         """Test that the class is working."""
 45 |         A, sess = testing_class
 46 |         with sess.as_default():
 47 |             a = A(2.)
 48 |             input = np.array(1.)
 49 | 
 50 |             output = a(input)
 51 |             assert_allclose(2. * input, output.eval())
 52 | 
 53 |             # Test double output
 54 |             output2 = a(input)
 55 |             assert_allclose(2. * input, output2.eval())
 56 | 
 57 |     def test_add(self, testing_class):
 58 |         """Test adding functions."""
 59 |         A, sess = testing_class
 60 |         with sess.as_default():
 61 |             a1 = A(3.)
 62 |             a2 = A(2.)
 63 | 
 64 |             a = a1 + a2
 65 | 
 66 |             input = np.array(1.)
 67 |             output = a(input)
 68 | 
 69 |             assert_allclose(5. * input, output.eval())
 70 | 
 71 |             assert a1.parameters[0] in a.parameters
 72 |             assert a2.parameters[0] in a.parameters
 73 | 
 74 |     def test_mult(self, testing_class):
 75 |         """Test multiplying functions."""
 76 |         A, sess = testing_class
 77 |         with sess.as_default():
 78 |             a1 = A(3.)
 79 |             a2 = A(2.)
 80 | 
 81 |             a = a1 * a2
 82 | 
 83 |             input = np.array(1.)
 84 |             output = a(input)
 85 | 
 86 |             assert_allclose(6. * input, output.eval())
 87 | 
 88 |             assert a1.parameters[0] in a.parameters
 89 |             assert a2.parameters[0] in a.parameters
 90 | 
 91 |             # Test multiplying with constant
 92 |             a = a1 * 2.
 93 |             output = a(input)
 94 |             assert_allclose(6. * input, output.eval())
 95 | 
 96 |     def test_neg(self, testing_class):
 97 |         """Test multiplying functions."""
 98 |         A, sess = testing_class
 99 |         with sess.as_default():
100 |             a = A(3.)
101 |             b = -a
102 | 
103 |             input = np.array(2.)
104 |             output = b(input)
105 | 
106 |             assert_allclose(-3. * input, output.eval())
107 | 
108 |             assert a.parameters[0] is b.parameters[0]
109 | 
110 |     def test_copy(self, testing_class):
111 |         """Test copying."""
112 |         A, sess = testing_class
113 |         with sess.as_default():
114 |             a = A(2.)
115 |             b = A(3.)
116 |             b.copy_parameters(a)
117 | 
118 |             p1 = a.parameters[0]
119 |             p2 = b.parameters[0]
120 | 
121 |             assert p1.eval() == p2.eval()
122 |             assert p1 is not p2
123 | 
124 | 
125 | class TestDeterministicFuction(object):
126 |     """Test the base class."""
127 | 
128 |     def test_errors(self):
129 |         """Check notImplemented error."""
130 |         f = DeterministicFunction()
131 |         pytest.raises(NotImplementedError, f.build_evaluation, None)
132 | 
133 | 
134 | class TestUncertainFunction(object):
135 |     """Test the base class."""
136 | 
137 |     def test_errors(self):
138 |         """Check notImplemented error."""
139 |         f = UncertainFunction()
140 |         pytest.raises(NotImplementedError, f.build_evaluation, None)
141 | 
142 |     def test_mean_function(self):
143 |         """Test the conversion to a deterministic function."""
144 |         f = UncertainFunction()
145 |         f.build_evaluation = lambda x: (1, 2)
146 |         fd = f.to_mean_function()
147 |         assert(fd(None) == 1)
148 | 
149 | 
150 | @pytest.mark.skipif(gpflow is None, reason='gpflow module not installed')
151 | class TestGPRCached(object):
152 |     """Test the GPR_cached class."""
153 | 
154 |     @pytest.fixture(scope="class")
155 |     def gps(self):
156 |         """Create cached and uncached gpflow models and GPy model."""
157 |         x = np.array([[1, 0], [0, 1]], dtype=float)
158 |         y = np.array([[0], [1]], dtype=float)
159 |         kernel = gpflow.kernels.RBF(2)
160 |         gp = gpflow.gpr.GPR(x, y, kernel)
161 |         gp_cached = GPRCached(x, y, kernel)
162 |         return gp, gp_cached
163 | 
164 |     def test_adding_data(self, gps):
165 |         """Test that adding data works."""
166 |         test_points = np.array([[0.9, 0.1], [3., 2]])
167 | 
168 |         gp, gp_cached = gps
169 |         gpfun = GaussianProcess(gp)
170 |         gpfun_cached = GaussianProcess(gp_cached)
171 | 
172 |         x = np.array([[1.2, 2.3]])
173 |         y = np.array([[2.4]])
174 | 
175 |         gpfun.add_data_point(x, y)
176 |         m1, v1 = gpfun(test_points)
177 | 
178 |         gpfun_cached.add_data_point(x, y)
179 |         m2, v2 = gpfun_cached(test_points)
180 | 
181 |         feed_dict = gpfun.feed_dict.copy()
182 |         feed_dict.update(gpfun_cached.feed_dict)
183 | 
184 |         with tf.Session() as sess:
185 |             m1, v1, m2, v2 = sess.run([m1, v1, m2, v2], feed_dict=feed_dict)
186 | 
187 |         assert_allclose(m1, m2)
188 |         assert_allclose(v1, v2)
189 | 
190 |     def test_predict_f(self, gps):
191 |         """Make sure predictions is same as in uncached case."""
192 |         # Note that this messes things up terribly due to caching. So this
193 |         # must be the last test that we run.
194 |         gp, gp_cached = gps
195 |         test_points = np.array([[0.9, 0.1], [3., 2]])
196 |         a1, b1 = gp_cached.predict_f(test_points)
197 |         a2, b2 = gp.predict_f(test_points)
198 |         assert_allclose(a1, a2)
199 |         assert_allclose(b1, b2)
200 | 
201 | 
202 | @pytest.mark.skipIf(gpflow is None, 'gpflow module not installed')
203 | class Testgpflow(object):
204 |     """Test the GaussianProcess function class."""
205 | 
206 |     @pytest.fixture(scope="class")
207 |     def setup(self):
208 |         """Create GP model with gpflow and GPy."""
209 |         with tf.Session() as sess:
210 |             x = np.array([[1, 0], [0, 1]], dtype=float)
211 |             y = np.array([[0], [1]], dtype=float)
212 |             kernel = gpflow.kernels.RBF(2)
213 |             gp = gpflow.gpr.GPR(x, y, kernel)
214 |             yield sess, gp
215 | 
216 |     def test_evaluation(self, setup):
217 |         """Make sure evaluation works."""
218 |         test_points = np.array([[0.9, 0.1], [3., 2]])
219 |         beta = 3.0
220 |         sess, gp = setup
221 | 
222 |         ufun = GaussianProcess(gp, beta=beta)
223 | 
224 |         # Evaluate GP
225 |         mean_1, error_1 = ufun(test_points)
226 |         mean_1, error_1 = sess.run([mean_1, error_1],
227 |                                    feed_dict=ufun.feed_dict)
228 | 
229 |         # Test multiple inputs
230 |         mean_2, error_2 = ufun(test_points[:, [0]],
231 |                                test_points[:, [1]])
232 |         mean_2, error_2 = sess.run([mean_2, error_2], feed_dict=ufun.feed_dict)
233 | 
234 |         assert_allclose(mean_1, mean_2)
235 |         assert_allclose(error_1, error_2)
236 | 
237 |     def test_new_data(self, setup):
238 |         """Test adding data points to the GP."""
239 |         test_points = np.array([[0.9, 0.1], [3., 2]])
240 |         sess, gp = setup
241 | 
242 |         ufun = GaussianProcess(gp)
243 | 
244 |         x = np.array([[1.2, 2.3]])
245 |         y = np.array([[2.4]])
246 | 
247 |         ufun.add_data_point(x, y)
248 | 
249 |         assert_allclose(ufun.X, np.array([[1, 0],
250 |                                           [0, 1],
251 |                                           [1.2, 2.3]]))
252 |         assert_allclose(ufun.Y, np.array([[0], [1], [2.4]]))
253 | 
254 |         # Check prediction is correct after adding data (cholesky update)
255 |         a1, b1 = ufun(test_points)
256 |         a1, b1 = sess.run([a1, b1], feed_dict=ufun.feed_dict)
257 | 
258 |         a1_true = np.array([[0.16371139], [0.22048311]])
259 |         b1_true = np.array([[1.37678679], [1.98183191]])
260 |         assert_allclose(a1, a1_true)
261 |         assert_allclose(b1, b1_true)
262 | 
263 | 
264 | class TestQuadraticFunction(object):
265 |     """Test the quadratic function."""
266 | 
267 |     def test_evaluate(self):
268 |         """Setup testing environment for quadratic."""
269 |         points = np.array([[0, 0],
270 |                            [0, 1],
271 |                            [1, 0],
272 |                            [1, 1]], dtype=np.float)
273 |         P = np.array([[1., 0.1],
274 |                       [0.2, 2.]])
275 |         quad = QuadraticFunction(P)
276 |         true_fval = np.array([[0., 2., 1., 3.3]]).T
277 | 
278 |         with tf.Session():
279 |             tf_res = quad(points)
280 |             res = tf_res.eval()
281 | 
282 |         assert_allclose(true_fval, res)
283 | 
284 | 
285 | def test_scipy_delaunay():
286 |     """Test the fake replacement for Scipy."""
287 |     limits = [[-1, 1], [-1, 2]]
288 |     num_points = [2, 6]
289 |     discretization = GridWorld(limits, num_points)
290 |     sp_delaunay = ScipyDelaunay(limits, num_points)
291 |     delaunay = _Triangulation(discretization)
292 | 
293 |     assert_equal(delaunay.nsimplex, sp_delaunay.nsimplex)
294 |     assert_equal(delaunay.input_dim, sp_delaunay.ndim)
295 |     sp_delaunay.find_simplex(np.array([[0, 0]]))
296 | 
297 | 
298 | class TestGridworld(object):
299 |     """Test the general GridWorld definitions."""
300 | 
301 |     def test_dimensions_error(self):
302 |         """Test dimension errors."""
303 |         limits = [[-1.1, 1.5], [2.2, 2.4]]
304 |         num_points = [7, 8]
305 |         grid = GridWorld(limits, num_points)
306 | 
307 |         pytest.raises(DimensionError, grid._check_dimensions,
308 |                       np.array([[1, 2, 3]]))
309 | 
310 |         pytest.raises(DimensionError, grid._check_dimensions,
311 |                       np.array([[1]]))
312 | 
313 |     def test_index_state_conversion(self):
314 |         """Test all index conversions."""
315 |         limits = [[-1.1, 1.5], [2.2, 2.4]]
316 |         num_points = [7, 8]
317 |         grid = GridWorld(limits, num_points)
318 | 
319 |         # Forward and backwards convert all indeces
320 |         indeces = np.arange(grid.nindex)
321 |         states = grid.index_to_state(indeces)
322 |         indeces2 = grid.state_to_index(states)
323 |         assert_equal(indeces, indeces2)
324 | 
325 |         # test 1D input
326 |         grid.state_to_index([0, 2.3])
327 |         grid.index_to_state(1)
328 | 
329 |         # Test rectangles
330 |         rectangles = np.arange(grid.nrectangles)
331 |         states = grid.rectangle_to_state(rectangles)
332 |         rectangles2 = grid.state_to_rectangle(states + grid.unit_maxes / 2)
333 |         assert_equal(rectangles, rectangles2)
334 | 
335 |         rectangle = grid.state_to_rectangle(100 * np.ones((1, 2)))
336 |         assert_equal(rectangle, grid.nrectangles - 1)
337 | 
338 |         rectangle = grid.state_to_rectangle(-100 * np.ones((1, 2)))
339 |         assert_equal(rectangle, 0)
340 | 
341 |         # Test rectangle corners
342 |         corners = grid.rectangle_corner_index(rectangles)
343 |         corner_states = grid.rectangle_to_state(rectangles)
344 |         corners2 = grid.state_to_index(corner_states)
345 |         assert_equal(corners, corners2)
346 | 
347 |         # Test point outside grid
348 |         test_point = np.array([[-1.2, 2.]])
349 |         index = grid.state_to_index(test_point)
350 |         assert_equal(index, 0)
351 | 
352 |     def test_integer_numpoints(self):
353 |         """Check integer numpoints argument."""
354 |         grid = GridWorld([[1, 2], [3, 4]], 2)
355 |         assert_equal(grid.num_points, np.array([2, 2]))
356 | 
357 |     def test_0d(self):
358 |         """Check that initialization works for 1d-discretization."""
359 |         grid = GridWorld([[0, 1]], 3)
360 | 
361 |         test = np.array([[0.1, 0.4, 0.9]]).T
362 |         res = np.array([0, 1, 2])
363 |         assert_allclose(grid.state_to_index(test), res)
364 | 
365 |         res = np.array([0, 0, 1])
366 |         assert_allclose(grid.state_to_rectangle(test), res)
367 |         assert_allclose(grid.rectangle_to_state(res), res[:, None] * 0.5)
368 | 
369 | 
370 | class TestConcatenateDecorator(object):
371 |     """Test the concatenate_input decorator."""
372 | 
373 |     @concatenate_inputs(start=1)
374 |     def fun(self, x):
375 |         """Test function."""
376 |         return x
377 | 
378 |     def test_concatenate_numpy(self):
379 |         """Test concatenation of inputs for numpy."""
380 |         x = np.arange(4).reshape(2, 2)
381 |         y = x + 4
382 |         true_res = np.hstack((x, y))
383 |         res = self.fun(x, y)
384 |         assert_allclose(res, true_res)
385 |         assert_allclose(self.fun(x), x)
386 | 
387 |     def test_concatenate_tensorflow(self):
388 |         """Test concatenation of inputs for tensorflow."""
389 |         x_data = np.arange(4).reshape(2, 2).astype(np.float32)
390 |         true_res = np.hstack((x_data, x_data + 4))
391 |         x = tf.placeholder(dtype=tf.float32, shape=[2, 2])
392 |         y = x + 4
393 | 
394 |         fun_x = self.fun(x)
395 |         fun_xy = self.fun(x, y)
396 | 
397 |         assert isinstance(fun_x, tf.Tensor)
398 |         assert isinstance(fun_xy, tf.Tensor)
399 | 
400 |         with tf.Session() as sess:
401 |             res_x, res_both = sess.run([fun_x, fun_xy],
402 |                                        {x: x_data})
403 | 
404 |         assert_allclose(res_both, true_res)
405 |         assert_allclose(res_x, x_data)
406 | 
407 | 
408 | class TestPiecewiseConstant(object):
409 |     """Test a piecewise constant function."""
410 | 
411 |     def test_init(self):
412 |         """Test initialisation."""
413 |         limits = [[-1, 1], [-1, 1]]
414 |         npoints = 4
415 |         discretization = GridWorld(limits, npoints)
416 |         pwc = PiecewiseConstant(discretization, np.arange(16))
417 |         assert_allclose(pwc.parameters, np.arange(16)[:, None])
418 | 
419 |     def test_evaluation(self):
420 |         """Evaluation tests for piecewise constant function."""
421 |         limits = [[-1, 1], [-1, 1]]
422 |         npoints = 3
423 |         discretization = GridWorld(limits, npoints)
424 |         pwc = PiecewiseConstant(discretization)
425 | 
426 |         vertex_points = pwc.discretization.index_to_state(
427 |             np.arange(pwc.nindex))
428 |         vertex_values = np.sum(vertex_points, axis=1, keepdims=True)
429 |         pwc.parameters = vertex_values
430 | 
431 |         test = pwc(vertex_points)
432 |         assert_allclose(test, vertex_values)
433 | 
434 |         outside_point = np.array([[-1.5, -1.5]])
435 |         test1 = pwc(outside_point)
436 |         assert_allclose(test1, np.array([[-2]]))
437 | 
438 |         # Test constraint evaluation
439 |         test2 = pwc.parameter_derivative(vertex_points)
440 |         test2 = test2.toarray().dot(vertex_values)
441 |         assert_allclose(test2, vertex_values)
442 | 
443 |     def test_gradient(self):
444 |         """Test the gradient."""
445 |         limits = [[-1, 1], [-1, 1]]
446 |         npoints = 3
447 |         discretization = GridWorld(limits, npoints)
448 |         pwc = PiecewiseConstant(discretization)
449 |         test_points = pwc.discretization.index_to_state(np.arange(pwc.nindex))
450 |         gradient = pwc.gradient(test_points)
451 |         assert_allclose(gradient, 0)
452 | 
453 | 
454 | class TestTriangulationNumpy(object):
455 |     """Test the generalized Delaunay triangulation in numpy."""
456 | 
457 |     def test_find_simplex(self):
458 |         """Test the simplices on the grid."""
459 |         limits = [[-1, 1], [-1, 2]]
460 |         num_points = [3, 7]
461 |         discretization = GridWorld(limits, num_points)
462 |         delaunay = _Triangulation(discretization)
463 | 
464 |         # Test the basic properties
465 |         assert_equal(delaunay.discretization.nrectangles, 2 * 6)
466 |         assert_equal(delaunay.input_dim, 2)
467 |         assert_equal(delaunay.nsimplex, 2 * 2 * 6)
468 |         assert_equal(delaunay.discretization.offset, np.array([-1, -1]))
469 |         assert_equal(delaunay.discretization.unit_maxes,
470 |                      np.array([2, 3]) / (np.array(num_points) - 1))
471 | 
472 |         # test the simplex indices
473 |         lower = delaunay.triangulation.find_simplex(np.array([0, 0])).squeeze()
474 |         upper = 1 - lower
475 | 
476 |         test_points = np.array([[0, 0],
477 |                                 [0.9, 0.45],
478 |                                 [1.1, 0],
479 |                                 [1.9, 2.9]])
480 | 
481 |         test_points += np.array(limits)[:, 0]
482 | 
483 |         true_result = np.array([lower, upper, 6 * 2 + lower, 11 * 2 + upper])
484 |         result = delaunay.find_simplex(test_points)
485 | 
486 |         assert_allclose(result, true_result)
487 | 
488 |         # Test the ability to find simplices
489 |         simplices = delaunay.simplices(result)
490 |         true_simplices = np.array([[0, 1, 7],
491 |                                    [1, 7, 8],
492 |                                    [7, 8, 14],
493 |                                    [13, 19, 20]])
494 |         assert_equal(np.sort(simplices, axis=1), true_simplices)
495 | 
496 |         # Test point ouside domain (should map to bottom left and top right)
497 |         assert_equal(lower, delaunay.find_simplex(np.array([[-100., -100.]])))
498 |         assert_equal(delaunay.nsimplex - 1 - lower,
499 |                      delaunay.find_simplex(np.array([[100., 100.]])))
500 | 
501 |     def test_values(self):
502 |         """Test the evaluation function."""
503 |         eps = 1e-10
504 | 
505 |         discretization = GridWorld([[0, 1], [0, 1]], [2, 2])
506 |         delaunay = _Triangulation(discretization)
507 | 
508 |         test_points = np.array([[0, 0],
509 |                                 [1 - eps, 0],
510 |                                 [0, 1 - eps],
511 |                                 [0.5 - eps, 0.5 - eps],
512 |                                 [0, 0.5],
513 |                                 [0.5, 0]])
514 |         nodes = delaunay.discretization.state_to_index(np.array([[0, 0],
515 |                                                        [1, 0],
516 |                                                        [0, 1]]))
517 | 
518 |         H = delaunay.parameter_derivative(test_points).toarray()
519 | 
520 |         true_H = np.zeros((len(test_points), delaunay.nindex),
521 |                           dtype=np.float)
522 |         true_H[0, nodes[0]] = 1
523 |         true_H[1, nodes[1]] = 1
524 |         true_H[2, nodes[2]] = 1
525 |         true_H[3, nodes[[1, 2]]] = 0.5
526 |         true_H[4, nodes[[0, 2]]] = 0.5
527 |         true_H[5, nodes[[0, 1]]] = 0.5
528 | 
529 |         assert_allclose(H, true_H, atol=1e-7)
530 | 
531 |         # Test value property
532 |         values = np.random.rand(delaunay.nindex)
533 |         delaunay.parameters = values
534 |         v1 = H.dot(values)[:, None]
535 |         v2 = delaunay(test_points)
536 |         assert_allclose(v1, v2)
537 | 
538 |         # Test the projections
539 |         test_point = np.array([[-0.5, -0.5]])
540 |         delaunay.parameters = np.array([0, 1, 1, 1])
541 |         unprojected = delaunay(test_point)
542 |         delaunay.project = True
543 |         projected = delaunay(test_point)
544 | 
545 |         assert_allclose(projected, np.array([[0]]))
546 |         assert_allclose(unprojected, np.array([[-1]]))
547 | 
548 |     def test_multiple_dimensions(self):
549 |         """Test delaunay in three dimensions."""
550 |         limits = [[0, 1]] * 3
551 |         discretization = GridWorld(limits, [2] * 3)
552 |         delaunay = _Triangulation(discretization)
553 |         assert_equal(delaunay.input_dim, 3)
554 |         assert_equal(delaunay.discretization.nrectangles, 1)
555 |         assert_equal(delaunay.nsimplex, np.math.factorial(3))
556 | 
557 |         corner_points = np.array([[0, 0, 0],
558 |                                   [1, 0, 0],
559 |                                   [0, 1, 0],
560 |                                   [0, 0, 1],
561 |                                   [0, 1, 1],
562 |                                   [1, 1, 0],
563 |                                   [1, 0, 1],
564 |                                   [1, 1, 1]], dtype=np.float)
565 | 
566 |         values = np.sum(delaunay.discretization.index_to_state(np.arange(8)),
567 |                         axis=1) / 3
568 | 
569 |         test_points = np.vstack((corner_points,
570 |                                  np.array([[0, 0, 0.5],
571 |                                            [0.5, 0, 0],
572 |                                            [0, 0.5, 0],
573 |                                            [0.5, 0.5, 0.5]])))
574 |         corner_values = np.sum(corner_points, axis=1) / 3
575 |         true_values = np.hstack((corner_values,
576 |                                  np.array([1 / 6, 1 / 6, 1 / 6, 1 / 2])))
577 | 
578 |         delaunay.parameters = values
579 |         result = delaunay(test_points)
580 |         assert_allclose(result, true_values[:, None], atol=1e-5)
581 | 
582 |     def test_gradient(self):
583 |         """Test the gradient_at function."""
584 |         discretization = GridWorld([[0, 1], [0, 1]], [2, 2])
585 |         delaunay = _Triangulation(discretization)
586 | 
587 |         points = np.array([[0, 0],
588 |                            [1, 0],
589 |                            [0, 1],
590 |                            [1, 1]], dtype=np.int)
591 |         nodes = delaunay.discretization.state_to_index(points)
592 | 
593 |         # Simplex with node values:
594 |         # 3 - 1
595 |         # | \ |
596 |         # 1 - 2
597 |         # --> x
598 | 
599 |         values = np.zeros(delaunay.nindex)
600 |         values[nodes] = [1, 2, 3, 1]
601 | 
602 |         test_points = np.array([[0.01, 0.01],
603 |                                 [0.99, 0.99]])
604 | 
605 |         true_grad = np.array([[1, 2], [-2, -1]])
606 | 
607 |         # Construct true H (gradient as function of values)
608 |         true_H = np.zeros((2 * delaunay.input_dim, delaunay.nindex))
609 | 
610 |         true_H[0, nodes[[0, 1]]] = [-1, 1]
611 |         true_H[1, nodes[[0, 2]]] = [-1, 1]
612 |         true_H[2, nodes[[2, 3]]] = [-1, 1]
613 |         true_H[3, nodes[[1, 3]]] = [-1, 1]
614 | 
615 |         # Evaluate gradient with and without values
616 |         H = delaunay.gradient_parameter_derivative(test_points).toarray()
617 |         delaunay.parameters = values
618 |         grad = delaunay.gradient(test_points)
619 | 
620 |         # Compare
621 |         assert_allclose(grad, true_grad)
622 |         assert_allclose(H, true_H)
623 |         assert_allclose(true_grad,
624 |                         H.dot(values).reshape(-1, delaunay.input_dim))
625 | 
626 |     def test_1d(self):
627 |         """Test the triangulation for 1D inputs."""
628 |         discretization = GridWorld([[0, 1]], 3)
629 |         delaunay = _Triangulation(discretization, vertex_values=[0, 0.5, 0])
630 |         vertex_values = delaunay.parameters
631 | 
632 |         test_points = np.array([[0, 0.2, 0.5, 0.6, 0.9, 1.]]).T
633 |         test_point = test_points[[0], :]
634 | 
635 |         simplices = delaunay.find_simplex(test_points)
636 |         true_simplices = np.array([0, 0, 1, 1, 1, 1])
637 |         assert_allclose(simplices, true_simplices)
638 |         assert_allclose(delaunay.find_simplex(test_point),
639 |                         true_simplices[[0]])
640 | 
641 |         values = delaunay(test_points)
642 |         true_values = np.array([0, 0.2, 0.5, 0.4, 0.1, 0])[:, None]
643 |         assert_allclose(values, true_values)
644 | 
645 |         value_constraint = delaunay.parameter_derivative(test_points)
646 |         values = value_constraint.toarray().dot(vertex_values)
647 |         assert_allclose(values, true_values)
648 | 
649 |         gradient = delaunay.gradient(test_points)
650 |         true_gradient = np.array([1, 1, -1, -1, -1, -1])[:, None]
651 |         assert_allclose(gradient, true_gradient)
652 | 
653 |         gradient_deriv = delaunay.gradient_parameter_derivative(test_points)
654 |         gradient = gradient_deriv.toarray().dot(vertex_values)
655 |         assert_allclose(gradient.reshape(-1, 1), true_gradient)
656 | 
657 | 
658 | class TestTriangulation(object):
659 |     """Test the tensorflow wrapper around the numpy triangulation."""
660 | 
661 |     @pytest.fixture(scope="class")
662 |     def setup(self):
663 |         """Create testing environment."""
664 |         with tf.Session(graph=tf.Graph()) as sess:
665 |             npoints = 3
666 | 
667 |             discretization = GridWorld([[0, 1], [0, 1]], npoints)
668 |             parameters = np.sum(discretization.all_points ** 2,
669 |                                 axis=1, keepdims=True)
670 |             trinp = _Triangulation(discretization, vertex_values=parameters)
671 | 
672 |             tri = Triangulation(discretization, vertex_values=parameters)
673 | 
674 |             test_points = np.array([[-10, -10],
675 |                                     [0.2, 0.7],
676 |                                     [0, 0],
677 |                                     [0, 1],
678 |                                     [1, 1],
679 |                                     [-0.2, 0.5],
680 |                                     [0.43, 0.21]])
681 | 
682 |             sess.run(tf.global_variables_initializer())
683 |             yield sess, tri, trinp, test_points
684 | 
685 |     def test_evaluate(self, setup):
686 |         """Test the evaluations."""
687 |         sess, tri, trinp, test_points = setup
688 |         # with tf.Session() as sess:
689 |         res = sess.run(tri(test_points))
690 |         assert_allclose(res, trinp(test_points))
691 | 
692 |     def test_projected_evaluate(self, setup):
693 |         """Test evaluations with enabled projection."""
694 |         sess, tri, trinp, test_points = setup
695 | 
696 |         # Enable project
697 |         trinp.project = True
698 |         tri.project = True
699 | 
700 |         res = sess.run(tri(test_points))
701 |         assert_allclose(res, trinp(test_points))
702 | 
703 |     def test_gradient_x(self, setup):
704 |         """Test the gradients with respect to the inputs."""
705 |         sess, tri, trinp, test_points = setup
706 | 
707 |         points = tf.placeholder(tf.float64, [None, None])
708 |         feed_dict = {points: test_points}
709 | 
710 |         # Dsiable project
711 |         trinp.project = False
712 |         tri.project = False
713 | 
714 |         # Just another run test
715 |         y = tri(points)
716 |         res = sess.run(y, feed_dict=feed_dict)
717 |         assert_allclose(res, trinp(test_points))
718 | 
719 |         # Test gradients
720 |         grad = tf.gradients(y, points)
721 |         res = sess.run(grad, feed_dict=feed_dict)[0]
722 |         assert_allclose(res, trinp.gradient(test_points))
723 | 
724 |         # Enable project
725 |         trinp.project = True
726 |         tri.project = True
727 | 
728 |         # Results are different outside of the projection.
729 |         inside = (np.all(test_points < trinp.limits[:, [1]].T, axis=1)
730 |                   & np.all(test_points > trinp.limits[:, [0]].T, axis=1))
731 | 
732 |         test_points = test_points[inside]
733 | 
734 |         # Test gradients projected
735 |         y = tri(points)
736 |         grad = tf.gradients(y, points)
737 |         res = sess.run(grad, feed_dict=feed_dict)[0]
738 |         assert_allclose(res[inside], trinp.gradient(test_points))
739 | 
740 |     def test_gradient_param(self, setup):
741 |         """Test the gradients with respect to the parameters."""
742 |         sess, tri, trinp, test_points = setup
743 | 
744 |         # Disable project
745 |         trinp.project = True
746 |         tri.project = True
747 | 
748 |         x = tf.placeholder(tf.float64, [1, 2])
749 | 
750 |         true_gradient = trinp.parameter_derivative(test_points)
751 |         true_gradient = np.array(true_gradient.todense())
752 | 
753 |         y = tri(x)
754 |         grad_tf = tf.gradients(y, tri.parameters)[0]
755 |         dense_gradient = np.zeros(true_gradient[0].shape, dtype=np.float)
756 | 
757 |         for i, test in enumerate(test_points):
758 |             gradient = sess.run(grad_tf, feed_dict={x: test[None, :]})
759 |             dense_gradient[:] = 0.
760 |             dense_gradient[gradient.indices] = gradient.values[:, 0]
761 |             assert_allclose(dense_gradient, true_gradient[i])
762 | 
763 | 
764 | def test_neural_network():
765 |     """Test the NeuralNetwork class init."""
766 |     relu = tf.nn.relu
767 | 
768 |     with tf.Session() as sess:
769 |         nn = NeuralNetwork(layers=[2, 3, 1],
770 |                            nonlinearities=[relu, relu, None])
771 | 
772 |         # x = tf.placeholder()
773 |         res = nn(np.random.rand(4, 2))
774 |         sess.run(tf.global_variables_initializer())
775 |         res, lipschitz = sess.run([res, nn.lipschitz()])
776 | 
777 |     assert lipschitz > 0.
778 | 
779 | 
780 | if __name__ == '__main__':
781 |     pytest.main()
782 | 


--------------------------------------------------------------------------------
/examples/adaptive_safety_verification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Adaptive Safety Verification for the Inverted Pendulum\n",
  8 |     "\n",
  9 |     "Determine the largest safe set for a GP model of the inverted pendulum with an adaptive discretization."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from __future__ import division, print_function\n",
 19 |     "\n",
 20 |     "import numpy as np\n",
 21 |     "import tensorflow as tf\n",
 22 |     "import gpflow\n",
 23 |     "import safe_learning\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import time\n",
 26 |     "import os\n",
 27 |     "\n",
 28 |     "from scipy.linalg import block_diag\n",
 29 |     "from utilities import InvertedPendulum, binary_cmap\n",
 30 |     "\n",
 31 |     "# Nice progress bars\n",
 32 |     "try:\n",
 33 |     "    from tqdm import tqdm\n",
 34 |     "except ImportError:\n",
 35 |     "    tqdm = lambda x: x\n",
 36 |     "\n",
 37 |     "_STORAGE = {}\n",
 38 |     "\n",
 39 |     "HEAT_MAP = plt.get_cmap('inferno', lut=None)\n",
 40 |     "HEAT_MAP.set_over('white')\n",
 41 |     "HEAT_MAP.set_under('black')\n",
 42 |     "\n",
 43 |     "LEVEL_MAP = plt.get_cmap('viridis', lut=21)\n",
 44 |     "LEVEL_MAP.set_over('gold')\n",
 45 |     "LEVEL_MAP.set_under('white')\n"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## User Options"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "class Options(object):\n",
 62 |     "    def __init__(self, **kwargs):\n",
 63 |     "        super(Options, self).__init__()\n",
 64 |     "        self.__dict__.update(kwargs)\n",
 65 |     "\n",
 66 |     "OPTIONS = Options(np_dtype              = safe_learning.config.np_dtype,\n",
 67 |     "                  tf_dtype              = safe_learning.config.dtype,\n",
 68 |     "                  saturate              = True,                            # apply saturation constraints to the control input\n",
 69 |     "                  eps                   = 1e-8,                            # numerical tolerance\n",
 70 |     "                  use_linear_dynamics   = False,                           # use the linearized form of the dynamics as the true dynamics (for testing)\n",
 71 |     "                  use_lipschitz_scaling = True,                            # use different Lipschitz constants in each state for the Lyapunov function\n",
 72 |     "                  use_zero_threshold    = False,                           # assume the discretization is infinitely fine (i.e., tau = 0; for testing)\n",
 73 |     "                  use_true_parameters   = False,                           # use the true physical parameters in the GP model (for testing)\n",
 74 |     "                  use_linear_kernels    = False,                           # use only linear kernels in the GP model\n",
 75 |     "                  use_adaptive_grid     = True,                            # use an adaptive discretization for safety verification\n",
 76 |     "                  gp_confidence_scaling = 2.,                              # scaling factor for GP confidence intervals (i.e., beta)\n",
 77 |     "                  gp_noise_variance     = 0.001 ** 2,                      # noise variance used in GP model\n",
 78 |     "                  gp_num_scaling        = 1.,                              # internal scaling factor for better numerical stability in GP prediction\n",
 79 |     "                  dpi                   = 200,\n",
 80 |     "                  num_cores             = 4,\n",
 81 |     "                  num_sockets           = 1)\n"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## TensorFlow Session\n",
 89 |     "\n",
 90 |     "Customize the TensorFlow session for the current device."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "os.environ[\"KMP_BLOCKTIME\"]    = str(0)\n",
100 |     "os.environ[\"KMP_SETTINGS\"]     = str(1)\n",
101 |     "os.environ[\"KMP_AFFINITY\"]     = 'granularity=fine,noverbose,compact,1,0'\n",
102 |     "os.environ[\"OMP_NUM_THREADS\"]  = str(OPTIONS.num_cores)\n",
103 |     "\n",
104 |     "config = tf.ConfigProto(intra_op_parallelism_threads  = OPTIONS.num_cores,\n",
105 |     "                        inter_op_parallelism_threads  = OPTIONS.num_sockets,\n",
106 |     "                        allow_soft_placement          = False,\n",
107 |     "                        device_count                  = {'CPU': OPTIONS.num_cores})\n",
108 |     "\n",
109 |     "try:\n",
110 |     "    session.close()\n",
111 |     "except NameError:\n",
112 |     "    pass\n",
113 |     "session = tf.InteractiveSession(config=config)\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Dynamics\n",
121 |     "\n",
122 |     "Define the nonlinear and linearized forms of the inverted pendulum dynamics."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# Constants\n",
132 |     "dt = 0.01   # sampling time\n",
133 |     "g = 9.81    # gravity\n",
134 |     "\n",
135 |     "# True system parameters\n",
136 |     "m = 0.15    # pendulum mass\n",
137 |     "L = 0.5     # pole length\n",
138 |     "b = 0.1     # rotational friction\n",
139 |     "\n",
140 |     "# State and action normalizers\n",
141 |     "theta_max = np.deg2rad(30)                 # angular position [rad]\n",
142 |     "omega_max = np.sqrt(g / L)                 # angular velocity [rad/s]\n",
143 |     "u_max     = g * m * L * np.sin(theta_max)  # torque [N.m], control action\n",
144 |     "\n",
145 |     "state_norm  = (theta_max, omega_max)\n",
146 |     "action_norm = (u_max,)\n",
147 |     "\n",
148 |     "# Dimensions and domains\n",
149 |     "state_dim     = 2\n",
150 |     "action_dim    = 1\n",
151 |     "state_limits  = np.array([[-1., 1.]] * state_dim)\n",
152 |     "action_limits = np.array([[-1., 1.]] * action_dim)\n",
153 |     "\n",
154 |     "# True system\n",
155 |     "true_pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])\n",
156 |     "A_true, B_true = true_pendulum.linearize()\n",
157 |     "\n",
158 |     "if OPTIONS.use_linear_dynamics:\n",
159 |     "    true_dynamics = safe_learning.functions.LinearSystem((A_true, B_true), name='true_dynamics')\n",
160 |     "else:\n",
161 |     "    true_dynamics = true_pendulum.__call__\n",
162 |     "\n",
163 |     "if not OPTIONS.use_true_parameters:\n",
164 |     "    # \"Wrong\" system\n",
165 |     "    m = 0.1     # pendulum mass\n",
166 |     "    L = 0.4     # pole length\n",
167 |     "    b = 0.0     # rotational friction\n",
168 |     "pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])\n",
169 |     "A, B = pendulum.linearize()\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "## GP Model\n",
177 |     "\n",
178 |     "Define a GP model with possibly wrong physical parameters."
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "# Prior variances; make sure at least some non-zero value is maintained\n",
188 |     "M_true = np.hstack((A_true, B_true))\n",
189 |     "M = np.hstack((A, B))\n",
190 |     "prior_variances = (M_true - M) ** 2\n",
191 |     "np.clip(prior_variances, 1e-3, None, out=prior_variances)\n",
192 |     "\n",
193 |     "# Input to GP is of the form (x, u) = (state, action)\n",
194 |     "full_dim = state_dim + action_dim\n",
195 |     "\n",
196 |     "# Kernels\n",
197 |     "if OPTIONS.use_linear_kernels:\n",
198 |     "    kernel_theta = gpflow.kernels.Linear(full_dim, variance=prior_variances[0, :], ARD=True)\n",
199 |     "    kernel_omega = gpflow.kernels.Linear(full_dim, variance=prior_variances[1, :], ARD=True)\n",
200 |     "else:\n",
201 |     "    kernel_theta = (gpflow.kernels.Linear(full_dim, variance=prior_variances[0, :], ARD=True)\n",
202 |     "                    + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n",
203 |     "                    * gpflow.kernels.Linear(1, variance=prior_variances[0, 1]))\n",
204 |     "    kernel_omega = (gpflow.kernels.Linear(full_dim, variance=prior_variances[1, :], ARD=True)\n",
205 |     "                    + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])\n",
206 |     "                    * gpflow.kernels.Linear(1, variance=prior_variances[1, 1]))\n",
207 |     "\n",
208 |     "# Use linearized form for the mean dynamics\n",
209 |     "mean_function_theta = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_theta')\n",
210 |     "mean_function_omega = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_omega')\n",
211 |     "\n",
212 |     "# TODO Tensorflow may spit out a lot of allocator errors when creating 0-length dataholders in gpflow, e.g., when:\n",
213 |     "#     - initializing with empty data matrices X and Y, or\n",
214 |     "#     - using GPRCached (initializes empty dataholders for Cholesky decomposition)\n",
215 |     "\n",
216 |     "# X_init = np.empty((0, full_dim), dtype=OPTIONS.np_dtype)\n",
217 |     "# Y_init = np.empty((0, 1), dtype=OPTIONS.np_dtype)\n",
218 |     "# gp_theta = safe_learning.GPRCached(X_init, Y_init, kernel_theta, mean_function_theta, OPTIONS.gp_num_scaling)\n",
219 |     "# gp_omega = safe_learning.GPRCached(X_init, Y_init, kernel_omega, mean_function_omega, OPTIONS.gp_num_scaling)\n",
220 |     "\n",
221 |     "# Define a GP model over the dynamics\n",
222 |     "X_init   = np.zeros((1, full_dim), dtype=OPTIONS.np_dtype)\n",
223 |     "Y_init   = np.zeros((1, 1), dtype=OPTIONS.np_dtype)\n",
224 |     "\n",
225 |     "gp_theta = gpflow.gpr.GPR(X_init, Y_init, kernel_theta, mean_function_theta)\n",
226 |     "gp_omega = gpflow.gpr.GPR(X_init, Y_init, kernel_omega, mean_function_omega)\n",
227 |     "\n",
228 |     "gp_theta.likelihood.variance = OPTIONS.gp_noise_variance\n",
229 |     "gp_omega.likelihood.variance = OPTIONS.gp_noise_variance\n",
230 |     "\n",
231 |     "gp_theta_fun = safe_learning.GaussianProcess(gp_theta, OPTIONS.gp_confidence_scaling)\n",
232 |     "gp_omega_fun = safe_learning.GaussianProcess(gp_omega, OPTIONS.gp_confidence_scaling)\n",
233 |     "\n",
234 |     "# Stack GP functions to get a block-diagonal kernel matrix, which yields more efficient GP prediction\n",
235 |     "dynamics = safe_learning.FunctionStack((gp_theta_fun, gp_omega_fun))\n"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "## State Discretization and Initial Safe Set\n",
243 |     "\n",
244 |     "Define a possibly adaptive discretization, and an initial known safe set as a subset of this discretization."
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": [
253 |     "# Number of states along each dimension\n",
254 |     "if OPTIONS.use_adaptive_grid:\n",
255 |     "    num_states = 501\n",
256 |     "else:\n",
257 |     "    num_states = 3001\n",
258 |     "\n",
259 |     "# State grid\n",
260 |     "grid_limits = np.array([[-1., 1.], ] * state_dim)\n",
261 |     "grid = safe_learning.GridWorld(grid_limits, num_states)\n",
262 |     "\n",
263 |     "# Discretization constant\n",
264 |     "if OPTIONS.use_zero_threshold:\n",
265 |     "    tau = 0.0\n",
266 |     "else:\n",
267 |     "    tau = np.sum(grid.unit_maxes) / 2\n",
268 |     "\n",
269 |     "print('Grid size: {}'.format(grid.nindex))\n",
270 |     "print('Discretization constant (tau): {}'.format(tau))\n",
271 |     "\n",
272 |     "# Set initial safe set as a ball around the origin (in normalized coordinates)\n",
273 |     "cutoff_radius    = 0.2\n",
274 |     "initial_safe_set = np.linalg.norm(grid.all_points, ord=2, axis=1) <= cutoff_radius\n"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "## Fixed Policy\n",
282 |     "\n",
283 |     "Fix the policy to the LQR solution for the linearized, discretized, true system, possibly with saturation constraints."
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "Q = np.diag([1., 2.]).astype(OPTIONS.np_dtype)                # state cost matrix\n",
293 |     "R = 1.2 * np.identity(action_dim).astype(OPTIONS.np_dtype)    # action cost matrix\n",
294 |     "K, P = safe_learning.utilities.dlqr(A_true, B_true, Q, R)\n",
295 |     "P /= np.abs(P).max()                                          # normalize cost\n",
296 |     "\n",
297 |     "policy = safe_learning.LinearSystem(-K, name='policy')\n",
298 |     "if OPTIONS.saturate:\n",
299 |     "    policy = safe_learning.Saturation(policy, -1, 1)\n",
300 |     "\n",
301 |     "# Visualize policy\n",
302 |     "def plot_policy(policy, grid, norms, tol=1e-10):\n",
303 |     "    fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)\n",
304 |     "    ticks = np.linspace(-1., 1., 9)\n",
305 |     "    cutoff = 1. - tol\n",
306 |     "    plot_limits = np.asarray(norms).reshape((-1, 1)) * grid.limits\n",
307 |     "    \n",
308 |     "    z = policy(grid.all_points).eval().reshape(grid.num_points)\n",
309 |     "    im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)\n",
310 |     "    cbar = fig.colorbar(im, ax=ax, label=r'$u = \\pi(x)$ [normalized]', ticks=ticks)\n",
311 |     "    ax.set_xlabel(r'$\\theta$ [deg]')\n",
312 |     "    ax.set_ylabel(r'$\\omega$ [deg/s]')\n",
313 |     "    plt.show()\n",
314 |     "\n",
315 |     "\n",
316 |     "norms = np.rad2deg(state_norm)\n",
317 |     "plot_policy(policy, grid, norms)\n"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "metadata": {},
323 |    "source": [
324 |     "## Closed-Loop Dynamics Lipschitz Constant"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": null,
330 |    "metadata": {},
331 |    "outputs": [],
332 |    "source": [
333 |     "# Policy (linear)\n",
334 |     "L_pol = np.linalg.norm(-K, 1)\n",
335 |     "\n",
336 |     "# Dynamics (linear approximation)\n",
337 |     "L_dyn = np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1) * L_pol\n"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "## Fixed Lyapunov Function\n",
345 |     "\n",
346 |     "Fix the Lyapunov function to the LQR solution for the linearized, discretized, true system."
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "# Define the Lyapunov function corresponding to the LQR policy\n",
356 |     "lyapunov_function = safe_learning.QuadraticFunction(P)\n",
357 |     "\n",
358 |     "# Approximate local Lipschitz constants with gradients\n",
359 |     "grad_lyapunov_function = safe_learning.LinearSystem((2 * P,))\n",
360 |     "if OPTIONS.use_lipschitz_scaling:\n",
361 |     "    L_v = lambda x: tf.abs(grad_lyapunov_function(x))\n",
362 |     "else:\n",
363 |     "    L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keep_dims=True)\n",
364 |     "\n",
365 |     "# Initialize class (with a possibly adaptive discretization for safety verification)\n",
366 |     "lyapunov = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set, adaptive=OPTIONS.use_adaptive_grid)\n",
367 |     "lyapunov.update_values()\n",
368 |     "lyapunov.update_safe_set()\n"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "## TensorFlow Graph"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "# Current\n",
385 |     "states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, grid.ndim], name='states')\n",
386 |     "actions = policy(states)\n",
387 |     "values = lyapunov.lyapunov_function(states)\n",
388 |     "\n",
389 |     "# Predicted future\n",
390 |     "future_states_mean, future_states_error = lyapunov.dynamics(states, actions)\n",
391 |     "future_values_mean = lyapunov.lyapunov_function(future_states_mean)\n",
392 |     "lv = lyapunov.lipschitz_lyapunov(future_states_mean)\n",
393 |     "future_values_error = tf.reduce_sum(lv * future_states_error, axis=1, keepdims=True)\n",
394 |     "dv_mean = future_values_mean - values\n",
395 |     "dv_bound = dv_mean + future_values_error\n",
396 |     "\n",
397 |     "# True future\n",
398 |     "future_states = true_dynamics(states, actions)\n",
399 |     "future_values = lyapunov.lyapunov_function(future_states)\n",
400 |     "dv = future_values - values\n",
401 |     "\n",
402 |     "# Discretization effects\n",
403 |     "tau = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='discretization_constant')\n",
404 |     "threshold = lyapunov.threshold(states, tau)\n",
405 |     "negative = tf.less(dv_bound, threshold)\n"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "markdown",
410 |    "metadata": {},
411 |    "source": [
412 |     "## Visualize Discretization Effects\n",
413 |     "\n",
414 |     "The tightened Lyapunov stability certificate $\\Delta v({\\bf x}) := v(f_\\pi({\\bf x})) - v({\\bf x}) < - L_{\\Delta v}\\tau$ becomes easier to satisfy as the grid is refined (i.e., as the spacing $\\tau$ decreases). However, this creates more states that must be verified within any level set due to the curse of dimensionality. For a given uniform grid with a side length of $M$ cells, $\\Delta v({\\bf x}) < - L_{\\Delta v}\\tau$ may not be satisfied, but \n",
415 |     "    $$\\Delta v({\\bf x}) < - L_{\\Delta v}\\frac{\\tau}{N({\\bf x})}$$ \n",
416 |     "may be, where $N({\\bf x}) \\in \\mathbb{N}_{\\geq 1}$ represents an adaptive refinement of the grid cell centred at $\\bf{x}$. This new condition would need to be checked at the $N({\\bf x})^d$ additional grid points created around $\\bf x$. We visualize the required refinement $N(\\bf{x})$ when beginning with a uniform square (i.e., $M^d$-sized) grid for the true dynamics below."
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": null,
422 |    "metadata": {},
423 |    "outputs": [],
424 |    "source": [
425 |     "# Adjust this parameter to see the effect of different uniform discretizations, and the required adaptive refinement. \n",
426 |     "cells_per_side = 250\n",
427 |     "\n",
428 |     "# Initialize a uniform square grid\n",
429 |     "grid_limits = np.array([[-1., 1.], ] * state_dim)\n",
430 |     "grid = safe_learning.GridWorld(grid_limits, cells_per_side + 1)\n",
431 |     "grid_spacing = np.sum(grid.unit_maxes) / 2\n",
432 |     "\n",
433 |     "# Create a colormap for N(x)\n",
434 |     "N_max = 16\n",
435 |     "cmap = plt.get_cmap('viridis', lut=N_max)\n",
436 |     "cmap.set_over('gold')\n",
437 |     "cmap.set_under((1., 1., 1., 0.))\n",
438 |     "\n",
439 |     "# Compute the required refinement N(x) for the adaptive discretization; if dv >= 0, then no amount of refinement will help, so we set N(x) = -1 (white) for plotting\n",
440 |     "feed_dict = {states: grid.all_points, tau: [[np.sum(grid.unit_maxes) / 2]]}\n",
441 |     "N = (threshold / dv).eval(feed_dict)\n",
442 |     "N[np.isnan(N)] = -1\n",
443 |     "N[N < 0] = -1\n",
444 |     "N = np.ceil(N)\n",
445 |     "\n",
446 |     "# Visualize results\n",
447 |     "fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)\n",
448 |     "\n",
449 |     "z = N.reshape(grid.num_points)\n",
450 |     "im = ax.imshow(z.T, origin='lower', extent=grid_limits.ravel(), aspect=grid_limits[0, 1] / grid_limits[1, 1], cmap=cmap, vmin=0, vmax=N_max)\n",
451 |     "cbar = fig.colorbar(im, ax=ax, label=r'$N({\\bf x})$', ticks=np.arange(0, N_max + 1, 2))\n",
452 |     "ax.set_title(r'$M = {}$'.format(grid.num_points[0] - 1) \n",
453 |     "             + ', ' + r'$|\\mathcal{X}_\\tau|$ = ' + r'{:.1e}'.format(grid.nindex) \n",
454 |     "             + ', ' + r'$\\tau$ = ' + r'{:.0e}'.format(grid_spacing), \n",
455 |     "             )\n",
456 |     "ax.set_xlabel(r'$\\theta$ [deg]')\n",
457 |     "ax.set_ylabel(r'$\\omega$ [deg/s]')\n",
458 |     "\n",
459 |     "yticks = cbar.ax.get_yticks()\n",
460 |     "tick_labels = ['{:.0f}'.format(y * N_max) for y in yticks]\n",
461 |     "tick_labels[-1] = r'$\\geq {}$'.format(N_max)\n",
462 |     "cbar.ax.set_yticklabels(tick_labels)\n",
463 |     "\n",
464 |     "plt.show()\n"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "markdown",
469 |    "metadata": {},
470 |    "source": [
471 |     "## Safe Online Learning and Exploration\n",
472 |     "\n",
473 |     "Only visit certified safe states in order to obtain measurements and update the GP model of the dynamics."
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "metadata": {},
480 |    "outputs": [],
481 |    "source": [
482 |     "# We are not updating the policy, so do not consider perturbations around the current policy\n",
483 |     "action_variation = np.array([[0.]], dtype=OPTIONS.np_dtype)\n",
484 |     "\n",
485 |     "with tf.name_scope('add_new_measurement'):\n",
486 |     "    full_dim = state_dim + action_dim \n",
487 |     "    tf_max_state_action = tf.placeholder(OPTIONS.tf_dtype, shape=[1, full_dim])\n",
488 |     "    tf_measurement = true_dynamics(tf_max_state_action)\n",
489 |     "    \n",
490 |     "def update_gp():\n",
491 |     "    \"\"\"Update the GP model based on an actively selected data point.\"\"\"\n",
492 |     "    \n",
493 |     "    # Get a new sample location\n",
494 |     "    max_state_action, _ = safe_learning.get_safe_sample(lyapunov, action_variation, action_limits, positive=True, num_samples=1000)\n",
495 |     "    \n",
496 |     "    # Obtain a measurement of the true dynamics\n",
497 |     "    lyapunov.feed_dict[tf_max_state_action] = max_state_action\n",
498 |     "    measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)\n",
499 |     "    \n",
500 |     "    # Add the measurement to our GP dynamics\n",
501 |     "    lyapunov.dynamics.add_data_point(max_state_action, measurement)\n",
502 |     "\n",
503 |     "\n",
504 |     "# Record some metrics during data collection\n",
505 |     "safe_level = []         # current level c of the largest verifiable safe set V(c)\n",
506 |     "safe_set_fraction = []  # current safe set size approximated as a fraction of the discretization that is considered safe\n",
507 |     "num_measurements = []   # number of measurements collected\n",
508 |     "update_count = 0        # number of safe set updates so far\n"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "### Measurements\n",
516 |     "\n",
517 |     "This cell can be run repeatedly to collect more measurements."
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": null,
523 |    "metadata": {},
524 |    "outputs": [],
525 |    "source": [
526 |     "data_per_update  = 10     # number of measurements to collect before attempting to update the safe set\n",
527 |     "safe_set_updates = 12     # number of safe set updates\n",
528 |     "can_shrink       = False  # whether or not to \"re-verify\" known safe states as the GP model is updated, \n",
529 |     "                          # i.e., can the safe set shrink in volume? (use \"False\" for speed, \"True\" for testing)\n",
530 |     "safety_factor    = 1.     # scaling factor used to conservatively estimate the required adaptive refinement\n",
531 |     "N_max            = 16     # the maximum adaptive refinement N(x) to attempt; lower is faster, while higher allows larger safe sets to be verified \n",
532 |     "\n",
533 |     "for _ in range(safe_set_updates):\n",
534 |     "    update_count += 1\n",
535 |     "#     print('Iteration {} with current safe level: {}'.format(update_count, lyapunov.feed_dict[lyapunov.c_max]))\n",
536 |     "\n",
537 |     "    # Collect measurements for the GP model\n",
538 |     "    start = time.time()\n",
539 |     "    for _ in range(data_per_update): \n",
540 |     "        update_gp()\n",
541 |     "    end = time.time()\n",
542 |     "    duration_gp = end - start\n",
543 |     "    \n",
544 |     "    # Update safe set\n",
545 |     "    start = time.time()\n",
546 |     "    lyapunov.update_safe_set(can_shrink, N_max, safety_factor, OPTIONS.num_cores)\n",
547 |     "    end = time.time()\n",
548 |     "    duration_lyap = end - start\n",
549 |     "    \n",
550 |     "    # Record metrics\n",
551 |     "    safe_level.append(lyapunov.feed_dict[lyapunov.c_max])\n",
552 |     "    safe_set_fraction.append(np.sum(lyapunov.safe_set) / lyapunov.discretization.nindex)\n",
553 |     "    if update_count == 1:\n",
554 |     "        num_measurements.append(data_per_update)\n",
555 |     "    else:\n",
556 |     "        num_measurements.append(num_measurements[-1] + data_per_update)\n",
557 |     "    \n",
558 |     "    print('Data points collected so far: {}'.format(num_measurements[-1]))\n",
559 |     "    print('Safe set size (relative to grid): {:.2f}%'.format(np.sum(100 * safe_set_fraction[-1])))\n",
560 |     "    print('Duration of GP update (avg): {}'.format(duration_gp / data_per_update))\n",
561 |     "    print('Duration of safe set update: {}'.format(duration_lyap))\n",
562 |     "    print(\"NEW safe level: {}\".format(lyapunov.feed_dict[lyapunov.c_max]))\n",
563 |     "    print('')\n"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "## Results\n",
571 |     "\n",
572 |     "Plot the largest verifiable safe set and the measurement points. If the discretization is adaptive, use a colormap to show how much refinement $N({\\bf x})$ was necessary to satisfy the tightened Lyapunov decrease condition."
573 |    ]
574 |   },
575 |   {
576 |    "cell_type": "code",
577 |    "execution_count": null,
578 |    "metadata": {},
579 |    "outputs": [],
580 |    "source": [
581 |     "grid = lyapunov.discretization\n",
582 |     "feed_dict = lyapunov.feed_dict\n",
583 |     "feed_dict[states] = grid.all_points\n",
584 |     "feed_dict[tau] =  [[lyapunov.tau]]\n",
585 |     "\n",
586 |     "fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=OPTIONS.dpi)\n",
587 |     "fig.subplots_adjust(wspace=0.25)\n",
588 |     "plot_limits = np.rad2deg(state_norm).reshape((-1, 1)) * grid.limits\n",
589 |     "axes[0].set_title(r'$M = {}$'.format(grid.num_points[0] - 1)\n",
590 |     "             + ', ' + r'$|\\mathcal{X}_\\tau| =$ ' + r'{:.1e}'.format(grid.nindex)\n",
591 |     "             + ', ' + r'$\\tau =$ ' + r'{:.0e}'.format(np.sum(grid.unit_maxes) / 2))\n",
592 |     "axes[0].set_xlabel(r'$\\theta$ [deg]')\n",
593 |     "axes[0].set_ylabel(r'$\\omega$ [deg/s]')\n",
594 |     "\n",
595 |     "axes[1].step(num_measurements, safe_set_fraction, 'o', where='post')\n",
596 |     "axes[1].set_xlabel(r'number of measurements')\n",
597 |     "axes[1].set_ylabel(r'safe set size [% of grid]')\n",
598 |     "\n",
599 |     "# Decrease region for the true dynamics\n",
600 |     "decrease_region = (dv.eval(feed_dict) < 0).reshape(grid.num_points)\n",
601 |     "cmap = binary_cmap('lightgrey')\n",
602 |     "im = axes[0].imshow(decrease_region.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=0, vmax=None)\n",
603 |     "\n",
604 |     "# Refinement N(x) used; colorbar shown only if the discretization is adaptive\n",
605 |     "N = np.copy(lyapunov._refinement)\n",
606 |     "N[N == 0] = -1 # for color only\n",
607 |     "\n",
608 |     "z = N.reshape(grid.num_points)\n",
609 |     "cmap = plt.get_cmap('viridis', lut=N_max)\n",
610 |     "cmap.set_over('gold')\n",
611 |     "cmap.set_under((1., 1., 1., 0.))\n",
612 |     "im = axes[0].imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=0, vmax=N_max)\n",
613 |     "if OPTIONS.use_adaptive_grid:\n",
614 |     "    cbar = fig.colorbar(im, ax=axes[0], label=r'$N({\\bf x})$', ticks=np.arange(0, N_max + 1, 2))\n",
615 |     "\n",
616 |     "# Initial safe set\n",
617 |     "initial_safe_set = lyapunov.initial_safe_set.reshape(grid.num_points)\n",
618 |     "cmap = binary_cmap('red')\n",
619 |     "im = axes[0].imshow(initial_safe_set.T, origin='lower', extent=plot_limits.ravel(), aspect=plot_limits[0, 1] / plot_limits[1, 1], cmap=cmap, vmin=None, vmax=None)\n",
620 |     "\n",
621 |     "# Measurements\n",
622 |     "if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):\n",
623 |     "    # Skip origin data point\n",
624 |     "    X = norms.ravel() * lyapunov.dynamics.functions[0].X[1:, :grid.ndim]\n",
625 |     "    axes[0].plot(X[:, 0], X[:, 1], 'x', color='pink', mew=1, ms=6)\n",
626 |     "\n",
627 |     "# Legend\n",
628 |     "colors = ['red', 'pink', 'lightgrey']\n",
629 |     "proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in colors]\n",
630 |     "labels = [r'Initial safe set', r'Measurements'.format(len(X)), r'$\\Delta v({\\bf x}) < 0$']\n",
631 |     "axes[0].legend(proxy, labels, loc='lower left')\n",
632 |     "\n",
633 |     "plt.show()\n"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": null,
639 |    "metadata": {},
640 |    "outputs": [],
641 |    "source": []
642 |   }
643 |  ],
644 |  "metadata": {
645 |   "kernelspec": {
646 |    "display_name": "Python 3",
647 |    "language": "python",
648 |    "name": "python3"
649 |   },
650 |   "language_info": {
651 |    "codemirror_mode": {
652 |     "name": "ipython",
653 |     "version": 3
654 |    },
655 |    "file_extension": ".py",
656 |    "mimetype": "text/x-python",
657 |    "name": "python",
658 |    "nbconvert_exporter": "python",
659 |    "pygments_lexer": "ipython3",
660 |    "version": "3.6.4"
661 |   }
662 |  },
663 |  "nbformat": 4,
664 |  "nbformat_minor": 2
665 | }
666 | 


--------------------------------------------------------------------------------