├── README
├── HDPy
    ├── epuck
    │   ├── plant
    │   │   ├── __init__.py
    │   │   └── plants.py
    │   ├── policy
    │   │   ├── __init__.py
    │   │   └── policies.py
    │   ├── __init__.py
    │   ├── epuck_arena.py
    │   ├── analysis_epuck.py
    │   └── epuck.py
    ├── puppy
    │   ├── policy
    │   │   └── __init__.py
    │   ├── plant
    │   │   ├── __init__.py
    │   │   ├── AccelerationReward.py
    │   │   └── plants.py
    │   ├── __init__.py
    │   ├── puppy.py
    │   └── analysis_puppy.py
    ├── __init__.py
    ├── inout.py
    └── rl.py
├── doc
    ├── source
    │   ├── _downloads
    │   │   └── latest.tar.gz
    │   ├── todopg.rst
    │   ├── license.rst
    │   ├── analysis.rst
    │   ├── utils.rst
    │   ├── epuck.rst
    │   ├── puppy.rst
    │   ├── rc.rst
    │   ├── references.rst
    │   ├── rl.rst
    │   ├── download.rst
    │   ├── puppy_online.rst
    │   ├── index.rst
    │   ├── pp.rst
    │   ├── conf.py
    │   └── puppy_offline.rst
    ├── make.bat
    └── Makefile
├── test
    ├── esn_acd.hdf5
    ├── puppy_example_trajectory_supervisor.py
    ├── puppy_actor_supervisor.py
    ├── puppy_offline_sampling_supervisor.py
    ├── puppy_online_supervisor.py
    ├── analysis.py
    ├── puppy_example_trajectory_sequence.py
    ├── puppy_example_trajectory_eval.py
    ├── puppy_offline_replay.py
    ├── puppy_offline_sampling_robot.py
    ├── epuck_online.py
    ├── puppy_example_trajectory_robot.py
    ├── acd.py
    ├── puppy_online_robot.py
    ├── rc_example.py
    └── puppy_offline_analysis.py
├── data
    ├── doc
    │   ├── rc_example.pdf
    │   ├── rc_example.png
    │   ├── epuck_data.hdf5
    │   ├── epuck_result.pdf
    │   ├── epuck_result.png
    │   └── puppy_offline_result.png
    ├── puppy_unit.json
    └── puppy_stat.json
├── .gitignore
├── setup.py
└── LICENSE.txt


/README:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/HDPy/epuck/plant/__init__.py:
--------------------------------------------------------------------------------
1 | from plants import *
2 | 


--------------------------------------------------------------------------------
/doc/source/_downloads/latest.tar.gz:
--------------------------------------------------------------------------------
1 | HDPy-1.0.tar.gz


--------------------------------------------------------------------------------
/HDPy/epuck/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from policies import *
2 | 


--------------------------------------------------------------------------------
/HDPy/puppy/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from policies import *
2 | 


--------------------------------------------------------------------------------
/test/esn_acd.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/test/esn_acd.hdf5


--------------------------------------------------------------------------------
/data/doc/rc_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/rc_example.pdf


--------------------------------------------------------------------------------
/data/doc/rc_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/rc_example.png


--------------------------------------------------------------------------------
/data/doc/epuck_data.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_data.hdf5


--------------------------------------------------------------------------------
/data/doc/epuck_result.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_result.pdf


--------------------------------------------------------------------------------
/data/doc/epuck_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_result.png


--------------------------------------------------------------------------------
/doc/source/todopg.rst:
--------------------------------------------------------------------------------
1 | 
2 | Hic sunt dracones
3 | =================
4 | 
5 | .. todolist::
6 |     
7 | 


--------------------------------------------------------------------------------
/HDPy/puppy/plant/__init__.py:
--------------------------------------------------------------------------------
1 | from plants import *
2 | from AccelerationReward import AccelerationReward
3 | 


--------------------------------------------------------------------------------
/data/doc/puppy_offline_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/puppy_offline_result.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.pyc
 3 | *.pyo
 4 | .*project
 5 | doc/build/
 6 | dist/
 7 | MANIFEST
 8 | HDPy.pth
 9 | 
10 | # compiled downloads
11 | doc/source/_downloads/*
12 | doc/source/_downloads/
13 | 


--------------------------------------------------------------------------------
/doc/source/license.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _license:
 3 | 
 4 | License
 5 | =======
 6 | 
 7 | This project is released under the terms of the 3-clause BSD License.
 8 | 
 9 | .. literalinclude:: ../../LICENSE.txt
10 | 


--------------------------------------------------------------------------------
/HDPy/puppy/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | .. automodule:: HDPy.puppy.puppy
 4 | 
 5 | .. automodule:: HDPy.puppy.analysis_puppy
 6 | 
 7 | """
 8 | from puppy import *
 9 | from analysis_puppy import *
10 | import policy
11 | import plant
12 | 


--------------------------------------------------------------------------------
/HDPy/epuck/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | .. automodule:: HDPy.epuck.epuck
 4 | 
 5 | .. .. automodule:: HDPy.epuck.epuck_arena
 6 | 
 7 | .. automodule:: HDPy.epuck.analysis_epuck
 8 | 
 9 | """
10 | from epuck import *
11 | from analysis_epuck import *
12 | import plant
13 | import policy
14 | import epuck_arena as env
15 | 


--------------------------------------------------------------------------------
/test/puppy_example_trajectory_supervisor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from controller import Supervisor
 3 | import PuPy
 4 | 
 5 | # checks
 6 | checks = [
 7 |     PuPy.QuitOnDemand(),
 8 |     PuPy.RevertOnDemand()
 9 | ]
10 | 
11 | # set up supervisor
12 | s = PuPy.supervisorBuilder(Supervisor, 20, [PuPy.ReceiverCheck(checks)])
13 | 
14 | # run
15 | s.run()
16 | 


--------------------------------------------------------------------------------
/test/puppy_actor_supervisor.py:
--------------------------------------------------------------------------------
 1 | from controller import Supervisor
 2 | import PuPy
 3 | 
 4 | # checks
 5 | checks = []
 6 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000)))
 7 | checks.append(PuPy.RevertMaxIter(3000 * 300))
 8 | 
 9 | # set up supervisor
10 | s = PuPy.supervisorBuilder(Supervisor, 20, checks)
11 | 
12 | # run
13 | s.run()
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | 
 3 | setup(
 4 |     name='HDPy',
 5 |     url='http://www.igsor.net/research/HDPy/',
 6 |     author='Matthias Baumgartner',
 7 |     author_email='research@igsor.net',
 8 |     version='1.0',
 9 |     packages=['HDPy'],
10 |     license='Free for use',
11 |     long_description=open('README').read(),
12 |     requires=("scipy","numpy","mdp","Oger")
13 | )
14 | 


--------------------------------------------------------------------------------
/doc/source/analysis.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Analysis
 3 | ========
 4 | 
 5 | .. module:: HDPy
 6 | 
 7 | Introduction
 8 | ------------
 9 | 
10 | .. automodule:: HDPy.analysis
11 | 
12 | Example
13 | -------
14 | 
15 | 
16 | Reference
17 | ---------
18 | 
19 | .. autoclass:: Analysis
20 |     :members:
21 | 
22 | .. autofunction:: overview
23 | 
24 | .. autofunction:: node_inspection
25 | 
26 | .. autofunction:: critic
27 | 
28 | 


--------------------------------------------------------------------------------
/test/puppy_offline_sampling_supervisor.py:
--------------------------------------------------------------------------------
 1 | from controller import Supervisor
 2 | import PuPy
 3 | 
 4 | # checks
 5 | checks = []
 6 | checks.append(PuPy.RevertOutOfArena(arena_size=(-10, 10, -10, 10), distance=0, grace_time_ms=(3 * 3000)))
 7 | # respawn the robot at a random location in a bounded area
 8 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000)))
 9 | 
10 | # set up supervisor
11 | s = PuPy.supervisorBuilder(Supervisor, 20, checks)
12 | 
13 | # run
14 | s.run()
15 | 


--------------------------------------------------------------------------------
/test/puppy_online_supervisor.py:
--------------------------------------------------------------------------------
 1 | from controller import Supervisor
 2 | import PuPy
 3 | 
 4 | checks = []
 5 | # Revert the simulation if the robot tumbled ...
 6 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000)))
 7 | # ... or went out of a predefined space
 8 | checks.append(PuPy.RevertOutOfArena(arena_size=(-10, 10, -10, 10), distance=0, grace_time_ms=(3 * 3000)))
 9 | 
10 | # set up supervisor
11 | s = PuPy.supervisorBuilder(Supervisor, 20, checks)
12 | 
13 | # run
14 | s.run()
15 | 


--------------------------------------------------------------------------------
/doc/source/utils.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Utility functions
 3 | =================
 4 | 
 5 | .. contents::
 6 | 
 7 | .. module:: HDPy
 8 | 
 9 | Introduction
10 | ------------
11 | 
12 | .. automodule:: HDPy.inout
13 | 
14 | Reference
15 | ---------
16 | 
17 | 
18 | .. autofunction:: remove_init_only_groups
19 | 
20 | .. autofunction:: h5_reorder
21 | 
22 | .. autofunction:: h5_merge_experiments
23 | 
24 | .. autofunction:: remove_boundary_groups
25 | 
26 | .. autoclass:: H5CombinedFile
27 | 
28 | .. autoclass:: H5CombinedGroup
29 | 


--------------------------------------------------------------------------------
/doc/source/epuck.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _epuck:
 3 | 
 4 | ePuck
 5 | =====
 6 | 
 7 | .. contents::
 8 | 
 9 | Introduction
10 | ------------
11 | 
12 | .. automodule:: HDPy.epuck
13 | 
14 | Example
15 | -------
16 | 
17 | .. literalinclude:: ../../test/epuck_online.py
18 | 
19 | .. image:: ../../data/doc/epuck_result.png
20 | 
21 | Reference
22 | ---------
23 | 
24 | .. module:: HDPy
25 | 
26 | .. autoclass:: HDPy.epuck.Robot
27 |     :members: read_sensors, take_action, reset, reset_random, plot_trajectory
28 | 
29 | .. autoclass:: HDPy.epuck.AbsoluteRobot
30 |     :show-inheritance:
31 | 
32 | .. autofunction:: HDPy.epuck.simulation_loop
33 | 
34 | .. autofunction:: HDPy.epuck.epuck_plot_snapshot
35 | 
36 | .. autofunction:: HDPy.epuck.epuck_plot_value_over_action
37 | 
38 | .. autofunction:: HDPy.epuck.epuck_plot_all_trajectories
39 | 


--------------------------------------------------------------------------------
/test/analysis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import HDPy,pylab
 3 | a = HDPy.Analysis('esn_acd.hdf5')
 4 | 
 5 | # Prediction plot
 6 | fig = pylab.figure(1)
 7 | ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
 8 | a.plot_path_return_prediction('18', ax)
 9 | fig.suptitle('Predictor evaluation')
10 | # If all works well, you should see a figure with 5 lines in it,
11 | # according to the Analysis.plot_path_return_prediction documentation
12 | 
13 | # Simple plot functions
14 | fig = pylab.figure(2)
15 | a.plot_readout_sum(fig.add_subplot(321))
16 | a.plot_reward(fig.add_subplot(322))
17 | a.plot_derivative(fig.add_subplot(323))
18 | a.plot_actions(fig.add_subplot(324))
19 | a.plot_error(fig.add_subplot(325))
20 | a.plot_accumulated_reward(fig.add_subplot(326))
21 | fig.suptitle('Some characteristics')
22 | # If all works well, you should see 6 subplots with the respective
23 | # curves displayed.
24 | 
25 | # Show the plot
26 | pylab.show(block=False)
27 | 
28 | print "Check the graphs visually. If they correspond to your expectations, the test was successful."
29 | 


--------------------------------------------------------------------------------
/doc/source/puppy.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _puppy:
 3 | 
 4 | Puppy
 5 | =====
 6 | 
 7 | .. contents::
 8 | 
 9 | .. module:: HDPy
10 | 
11 | Introduction
12 | ------------
13 | 
14 | .. automodule:: HDPy.puppy
15 | 
16 | Example
17 | -------
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    
22 |    puppy_offline
23 |    puppy_online
24 | 
25 | Reference
26 | ---------
27 | 
28 | .. autoclass:: HDPy.puppy.PuppyHDP
29 |     :members: new_episode, init_episode, _step, event_handler
30 |     :show-inheritance:
31 | 
32 | .. autoclass:: HDPy.puppy.OfflineCollector
33 |     :members: new_episode, __call__, _next_action_hook, event_handler
34 |     :show-inheritance:
35 | 
36 | .. autofunction:: HDPy.puppy.offline_playback
37 | 
38 | 
39 | .. autofunction:: HDPy.puppy.plot_trajectory
40 | 
41 | .. autofunction:: HDPy.puppy.plot_all_trajectories
42 | 
43 | .. autofunction:: HDPy.puppy.plot_linetarget
44 | 
45 | .. autofunction:: HDPy.puppy.plot_locationtarget
46 | 
47 | .. autofunction:: HDPy.puppy.plot_landmarks
48 | 
49 | .. autofunction:: HDPy.puppy.plot_action
50 | 
51 | .. autofunction:: HDPy.puppy.plot_inspected_trajectory
52 | 
53 | .. autoclass:: HDPy.puppy.ActionVideo
54 |     :members:
55 | 


--------------------------------------------------------------------------------
/doc/source/rc.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Reservoir Computing
 3 | ===================
 4 | 
 5 | .. contents::
 6 | 
 7 | Introduction
 8 | ------------
 9 | 
10 | .. automodule:: HDPy.rc
11 | 
12 | Example
13 | -------
14 | 
15 | .. literalinclude:: ../../test/rc_example.py
16 | 
17 | >>> Sparse         	0.004785	0.744121
18 | >>> Orthogonal     	0.004858	0.749770
19 | >>> Ring of Neurons	0.004827	0.747397
20 | 
21 | .. image:: ../../data/doc/rc_example.png
22 | 
23 | Reference
24 | ---------
25 | 
26 | .. module:: HDPy
27 | 
28 | .. autoclass:: ReservoirNode
29 |     :members: execute, copy, input_dim, output_dim, reset, save, _post_update_hook, __call__
30 | 
31 | .. autoclass:: PlainRLS
32 |     :members: train, __call__, save, stop_training, copy
33 | 
34 | .. autoclass:: StabilizedRLS
35 |     :members:
36 |     :show-inheritance:
37 | 
38 | 
39 | .. autofunction:: sparse_reservoir
40 | .. autofunction:: dense_w_in
41 | .. autofunction:: sparse_w_in
42 | .. autofunction:: dense_w_bias
43 | .. autofunction:: orthogonal_reservoir
44 | .. autofunction:: chain_of_neurons
45 | .. autofunction:: ring_of_neurons
46 | 
47 | .. autofunction:: reservoir_memory
48 | .. autofunction:: find_radius_for_mc
49 | 
50 | 


--------------------------------------------------------------------------------
/doc/source/references.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | References
 3 | ==========
 4 | 
 5 | .. [FB98] B. Farhang-Boroujeny; Adaptive filters: theory and applications
 6 | 
 7 | .. [TS12] T. Strauss et al; Design strategies for weight matrices of echo state networks.
 8 | 
 9 | .. [ESN-ACD] P. Koprinkova-Hristova et al; Adaptive Critic Design with Echo State Network, 2010
10 |              M. Oubbati et al.; Anticipating rewards in continuous time and space with echo state networks and actor-critic design, 2011
11 |              M. Oubbati et al; Adaptive Learning in Continuous Environment Using Actor-Critic Design and Echo-State Networks, 2012
12 |              P. Koprinkova-Hristova; Heuristic dynamic programming using echo state network as online trainable adaptive critic, 2012
13 | 
14 | .. [RL] Sutton, Barto; Reinforcement Learning: An Introduction, 1998
15 | 
16 | .. [Oger] http://organic.elis.ugent.be/organic/engine
17 | 
18 | .. [MDP] http://mdp-toolkit.sourceforge.net/
19 | 
20 | .. [Webots] http://www.cyberbotics.com/
21 | 
22 | .. [HDF5] http://www.hdfgroup.org/HDF5/
23 | 
24 | .. [matplotlib] http://matplotlib.org/
25 | 
26 | .. [scipy] http://scipy.org/
27 | 
28 | .. [numpy] http://www.numpy.org/
29 | 


--------------------------------------------------------------------------------
/doc/source/rl.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _reinforcement-learning:
 3 | 
 4 | Reinforcement Learning
 5 | ======================
 6 | 
 7 | .. contents::
 8 | 
 9 | Introduction
10 | ------------
11 | 
12 | .. automodule:: HDPy.rl
13 | 
14 | .. automodule:: HDPy.hdp
15 | 
16 | Reference
17 | ---------
18 | 
19 | .. module:: HDPy
20 | 
21 | .. autoclass:: Plant
22 |     :members:
23 |     :noindex:
24 | 
25 | .. autoclass:: Policy
26 |     :members:
27 |     :noindex:
28 | 
29 | .. autoclass:: ActorCritic
30 |     :members: new_episode, __call__, init_episode, _step, _pre_increment_hook, _next_action_hook, save, load, set_normalization, set_alpha, set_gamma, set_momentum
31 | 
32 | .. autoclass:: Momentum
33 |     :members: __call__
34 | 
35 | .. autoclass:: ConstMomentum
36 |     :show-inheritance:
37 | 
38 | .. autoclass:: RadialMomentum
39 |     :show-inheritance:
40 | 
41 | .. autoclass:: ADHDP
42 |     :show-inheritance:
43 |     :members: _critic_eval, _critic_deriv, init_episode, _step
44 | 
45 | .. autoclass:: ActionGradient
46 |     :show-inheritance:
47 | 
48 | .. autoclass:: ActionRecomputation
49 |     :show-inheritance:
50 | 
51 | .. autoclass:: ActionBruteForce
52 |     :show-inheritance:
53 | 


--------------------------------------------------------------------------------
/doc/source/download.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Download
 3 | ========
 4 | 
 5 | .. contents::
 6 | 
 7 | Installation
 8 | ------------
 9 | 
10 | Using `Pip Installs Python (Pip) <http://www.pip-installer.org/en/latest/index.html>`_,
11 | simply type::
12 | 
13 |     pip install http://www.igsor.net/research/HDPy/_downloads/latest.tar.gz
14 | 
15 | if you want to use the package from the webpage. If you have downloaded it yourself, use::
16 | 
17 |     pip install path/to/HDPy.tar.gz
18 | 
19 | If you're using `distutils <http://docs.python.org/distutils/>`_, type::
20 |     
21 |     tar -xzf path/to/HDPy.tgz        # extract files.
22 |     cd HDPy*                         # change into HDPy directory.
23 |     sudo python setup.py install    # install using distutils (as root).
24 |     #rm -R .                        # remove source. If desired, uncomment this line.
25 |     #cd .. && rmdir HDPy*            # remove working directory. If desired, uncomment this line.
26 | 
27 | The project is also available on git, with the package and all supplementary data::
28 | 
29 |     git clone https://github.com/igsor/PuPy
30 | 
31 | Make sure, [numpy]_ and [scipy]_ are
32 | installed on your system. For plotting, [matplotlib]_ is required.
33 | 
34 | - :download:`HDPy-1.0 <_downloads/HDPy-1.0.tar.gz>` (latest)
35 | 
36 | - :download:`This documentation (pdf) <_downloads/HDPy-1.0-doc.pdf>`
37 | 
38 | License
39 | -------
40 | 
41 | This project is released under the terms of the 3-clause BSD License. See the section
42 | :ref:`license` for details.
43 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, Matthias Baumgartner
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the HDPy nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL MATTHIAS BAUMGARTNER BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/test/puppy_example_trajectory_sequence.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import h5py
 3 | import numpy as np
 4 | 
 5 | # Number of steps that will always be executed
 6 | init_steps = 4
 7 | # Sample action execution step increment
 8 | step_size = 3
 9 | # Target location
10 | sequence_file = '/tmp/example_sequence.hdf5'
11 | 
12 | # main trajectory
13 | main_trajectory = [[0.8, 0.8]] * 3 + [[0.8, 0.78]] * 2 + [[0.82, 0.9]] * 10 + [[0.8, 0.9]] * 5 + [[0.82, 0.9]] * 10
14 | main_trajectory = np.array(main_trajectory)
15 | 
16 | # example actions, to be executed at some steps of the main trajectory
17 | action_samples = np.array([
18 |     [ 0.4,  0.8],
19 |     [ 0.8,  0.4],
20 |     [ 0.6,  0.8],
21 |     [ 0.8,  0.6],
22 |     [ 0.8,  1. ],
23 |     [ 1. ,  0.8],
24 |     [ 0.8,  0.8]
25 | ])
26 | 
27 | # Create the trajectories to be executed. This is the main trajectory
28 | # up to step i, then each example action for three steps
29 | ex_trajectory = [main_trajectory]
30 | for i in range(init_steps, main_trajectory.shape[0] + 1, step_size):
31 |     ex_trajectory += [np.vstack((main_trajectory[:i], sample, sample, sample)) for sample in action_samples]
32 | 
33 | # Store the example trajectories in a HDF5 file
34 | # The trajectories are stored in seperate datasets (traj_000) and an
35 | # index (idx) is initialized for progress bookkeeping.
36 | f = h5py.File(sequence_file, 'w')
37 | f.create_dataset('idx', data=0)
38 | f.create_dataset('main', data=main_trajectory)
39 | for idx, traj in enumerate(ex_trajectory):
40 |     name='traj_%03i' % idx
41 |     f.create_dataset(name, data=traj)
42 | 
43 | f.close()
44 | 
45 | print "Stored", len(ex_trajectory), "sequences"
46 | 


--------------------------------------------------------------------------------
/data/puppy_unit.json:
--------------------------------------------------------------------------------
1 | {"touch1": [318.79300866155143, 318.79300866155143], "touch0": [416.51093258988385, 416.51093258988385], "touch3": [93.367137677804635, 93.367137677804635], "touch2": [91.339993788601305, 91.339993788601305], "accelerometer_y": [125.82104299169822, 484.73654626435808], "accelerometer_x": [16.481284764750427, 240.26805028986931], "accelerometer_z": [8.6216357004890369, 763.59255751903424], "trg3": [-0.36999999999995326, 1.7939550005402431], "trg2": [-0.36999999999995326, 1.8010496963983], "trg1": [-0.23000000000004661, 1.7939550005403366], "trg0": [-0.22999999999995313, 1.8010496963983], "puppyGPS_y": [3.9754356744413162, 14.010466560779145], "puppyGPS_x": [0.19506757643533312, 10.248893013477851], "puppyGPS_z": [1.2872867042949137, 1.2540506437910039], "hip1": [0.020197128948414633, 1.5946117265067943], "hip0": [1.2896001857700594, 3.2298778091748535], "hip3": [-1.2155965132847109, 2.1905430233360428], "hip2": [-0.97646833307138503, 1.9532006250465155], "knee2": [-0.34111541743978924, 1.57988888529936], "knee3": [0.1725124163642322, 2.1038787759286786], "knee0": [-0.17982543233382797, 1.6488687140115976], "knee1": [0.25999539867561805, 2.1109918545054964], "compass_x": [1.5113113871478845e-08, 0.99999998100110943], "compass_y": [-2.0735495542822946e-09, 0.99999999717288612], "gyro_z": [-1.3809654710283041, 93.3398601954394], "compass_z": [-2.412434418896936e-08, 0.99999992550827077], "gyro_x": [-62.471712345347527, 161.38280248187169], "gyro_y": [-1.5297053400941252, 86.861033891238151], "a_curr" : [1.0023086296125743, 0.80230205374438834], "a_next": [1.0023086296125743, 0.80230205374438834], "landmark_dist": [10.739577828006809, 4.452661157861807]}
2 | 


--------------------------------------------------------------------------------
/data/puppy_stat.json:
--------------------------------------------------------------------------------
1 | {"touch1": [1.8205343822841078, 6.2456455370456458], "touch0": [1.8077355030158033, 6.2163911930927789], "touch3": [0.78094450507051227, 3.5567929404623104], "touch2": [0.77865152797154391, 3.5566699652357108], "accelerometer_y": [0.0052592965293057339, 4.0331391763913951], "accelerometer_x": [0.31926554829406006, 6.4376879740242048], "accelerometer_z": [7.7485002758765118, 6.4018921917828049], "trg3": [-0.36999999999935246, 0.53618385189057927], "trg2": [-0.36999999999931188, 0.53663473970933218], "trg1": [-0.23000000000761395, 0.53618385189056794], "trg0": [-0.2300000000077049, 0.53663473970932629], "puppyGPS_y": [-0.48713510136415211, 3.4430779971106529], "puppyGPS_x": [-0.51553505401105415, 3.452080563583682], "puppyGPS_z": [0.082007755878077127, 0.015098264420749501], "hip1": [-0.23262800153461199, 0.45348593855348529], "hip0": [-0.2325851976123004, 0.45389519948856594], "hip3": [-0.37030397094335737, 0.45578243275684055], "hip2": [-0.37036884850694912, 0.45629441360468687], "knee2": [0.01585728894698487, 0.31866762188472231], "knee3": [0.016154538793381523, 0.31827911763809236], "knee0": [0.12376281856748228, 0.18315571166606334], "knee1": [0.12348114248564791, 0.18283449297610352], "compass_x": [-0.0032868095280248217, 0.68986740172661298], "compass_y": [-0.0091208433194725647, 0.66571483441303869], "gyro_z": [-0.0019240353085375921, 0.50516156782102606], "compass_z": [6.7525306390130052e-05, 0.28427579886077453], "gyro_x": [-0.0011709593429102395, 0.98989036887649229], "gyro_y": [-0.020663182790115593, 1.9425298200825714], "a_curr": [0.72683929129987579, 0.25596165799046389], "a_next": [0.72683929129987579, 0.25596165799046389], "landmark_dist": [16.963004359285215, 16.961933883080803]}
2 | 


--------------------------------------------------------------------------------
/test/puppy_example_trajectory_eval.py:
--------------------------------------------------------------------------------
 1 | import HDPy
 2 | import pickle
 3 | import os
 4 | import sys
 5 | import PuPy
 6 | import itertools
 7 | 
 8 | # Load reservoir
 9 | f = open('/tmp/puppy_reservoir.pic', 'r')
10 | reservoir = pickle.load(f)
11 | reservoir.reset()
12 | f.close()
13 | 
14 | # Load readout
15 | f = open('/tmp/puppy_readout.pic', 'r')
16 | readout = pickle.load(f)
17 | f.close()
18 | 
19 | # Critic is evaluated, thus don't train it anymore
20 | readout.stop_training()
21 | 
22 | # Create a policy
23 | bound_gait = {
24 |     'amplitude' : ( 0.8, 1.0, 0.8, 1.0),
25 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
26 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
27 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
28 | }
29 | 
30 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait))
31 | 
32 | # Create a plant
33 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))]
34 | target_loc = (6.0, 4.0)
35 | plant = HDPy.puppy.plant.TargetLocationLandmarks(
36 |     target_loc,
37 |     landmarks,
38 |     reward_noise = 0.0
39 | )
40 | 
41 | # Load the normalization
42 | nrm = PuPy.Normalization('../data/puppy_unit.json')
43 | 
44 | # Initialize the collector
45 | collector = PuPy.RobotCollector(
46 |     child   = policy,
47 |     expfile = '/tmp/example_eval.hdf5'
48 | )
49 | 
50 | # Create HDP instance
51 | actor = HDPy.PuppyHDP(
52 |     tumbled_reward  =0.0,
53 |     reservoir       = reservoir,
54 |     readout         = readout,
55 |     plant           = plant,
56 |     policy          = collector,
57 |     gamma           = 0.0,
58 |     alpha           = 1.0,
59 |     init_steps      = 10,
60 |     norm            = nrm
61 | )
62 | 
63 | HDPy.puppy.offline_playback(
64 |     '/tmp/example_data.hdf5',
65 |     actor,
66 |     150,
67 |     20
68 | )
69 | 
70 | 


--------------------------------------------------------------------------------
/test/puppy_offline_replay.py:
--------------------------------------------------------------------------------
 1 | import HDPy
 2 | import PuPy
 3 | import numpy as np
 4 | import itertools
 5 | 
 6 | # Create a policy
 7 | bound_gait = {
 8 |     'amplitude' : ( 0.8, 1.0, 0.8, 1.0),
 9 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
10 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
11 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
12 | }
13 | 
14 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait))
15 | 
16 | # Create a plant
17 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))]
18 | target_loc = (6.0, 4.0)
19 | plant = HDPy.puppy.plant.TargetLocationLandmarks(
20 |     target_loc,
21 |     landmarks,
22 |     reward_noise    = 0.0
23 | )
24 | 
25 | # Load the normalization
26 | nrm = PuPy.Normalization('../data/puppy_unit.json')
27 | 
28 | # Create a reservoir
29 | reservoir = HDPy.ReservoirNode(
30 |     output_dim      = 10,
31 |     input_dim       = policy.action_space_dim() + plant.state_space_dim(),
32 |     spectral_radius = 0.98,
33 |     w               = HDPy.sparse_reservoir(20),
34 | )
35 | 
36 | reservoir.initialize()
37 | reservoir.save('/tmp/puppy_reservoir.pic')
38 | 
39 | # Create a readout
40 | readout = HDPy.StabilizedRLS(
41 |     with_bias       = True,
42 |     input_dim       = reservoir.get_output_dim() + reservoir.get_input_dim(),
43 |     output_dim      = 1,
44 |     lambda_         = 1.0
45 | )
46 | 
47 | # Initialize the collector
48 | collector = PuPy.RobotCollector(
49 |     child   = policy,
50 |     expfile = '/tmp/puppy_critic.hdf5'
51 | )
52 | 
53 | # Initialize the Critic
54 | critic = HDPy.PuppyHDP(
55 |     tumbled_reward  = 0.0,
56 |     reservoir       = reservoir,
57 |     readout         = readout,
58 |     plant           = plant,
59 |     policy          = collector,
60 |     gamma           = 0.5,
61 |     alpha           = 1.0,
62 |     init_steps      = 10,
63 |     norm            = nrm
64 | )
65 | 
66 | # Train the critic on offline data
67 | HDPy.puppy.offline_playback(
68 |     '/tmp/puppy_offline_data.hdf5',
69 |     critic,
70 |     samples_per_action  = 150,
71 |     ms_per_step         = 20,
72 |     episode_start       = 0,
73 |     episode_end         = 1000,
74 |     min_episode_len     = 30
75 | )
76 | 
77 | # Store the readout for later use
78 | readout.save('/tmp/puppy_readout.pic')
79 | 


--------------------------------------------------------------------------------
/HDPy/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module combines Reinforcement Learning and Reservoir Computing by
 3 | means of an Actor-Critic design. In Reinforcement Learning, the learning
 4 | subject is expressed through the agent while the teacher denoted as
 5 | environment or plant. At each time step, the agent chooses an action
 6 | :math:`a_t`, which leads it from state :math:`s_t` to state
 7 | :math:`s_{t+1}`. The state information is provided to the agent by the
 8 | environment, together with a reward :math:`r_{t+1}` which announces how
 9 | good or bad the state is considered. Note that the reward cannot be used
10 | as learning target, as it is not an error but merely a hint if the agent
11 | goes into the right direction. Instead, the agent's goal is to collect
12 | as much reward as possible with time. The Return expresses this by
13 | taking future rewards into account:
14 | 
15 | .. math::
16 |     R_t = \sum\limits_{k=0}^T \gamma^k r_{t+k+1}
17 | 
18 | As it may not be meaningful to consider the whole future, the influence
19 | of rewards is decreased the farther they are off. This is controlled
20 | through the discount rate :math:`\gamma`. Further, experiments are often
21 | episodic (meaning that they terminate somewhen). This is accounted for
22 | by summing until the episode length :math:`T` [RL]_.
23 | 
24 | An Actor-Critic design splits the agent into two parts: The Actor
25 | decides on the action, for which it is in turn criticised by the Critic.
26 | Meaning, that the Critic learns long-time behaviour, i.e. approximates
27 | the Return, while the Actor uses the Critic's approximation to select
28 | the action which maximizes the Return in a single step. This module
29 | incorporates Reservoir Computing as the Critic's function approximator
30 | [ESN-ACD]_.
31 | 
32 | """
33 | from rc import *
34 | from rl import *
35 | from analysis import *
36 | from puppy import *
37 | from inout import *
38 | from hdp import *
39 | 
40 | import puppy
41 | import epuck
42 | 
43 | from epuck.analysis_epuck import *
44 | from puppy.analysis_puppy import *
45 | from puppy.puppy import *
46 | from puppy.policy import FRA, LRA, LRP
47 | 
48 | #from plants import *
49 | #from analysis_epuck import *
50 | #from analysis_puppy import *
51 | #import policies as policy
52 | #from policies_puppy import FRA, LRA, LRP # Deprecated, don't use like this but through policy.puppy.{FRA,LRA,LRP}
53 | 
54 | 
55 | #__all__ = ['']
56 | 


--------------------------------------------------------------------------------
/doc/source/puppy_online.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _puppy_online:
 3 | 
 4 | Puppy online workflow
 5 | =====================
 6 | 
 7 | .. note::
 8 |     The data recorded this way cannot be used to train another robot
 9 |     offline on the same dataset. This is because vital metadata for
10 |     replay is not stored by the normal HDP implementation.
11 | 
12 | In the online setup, all comutations are done within a running [Webots]_
13 | instance. For [Webots]_, a supervisor and robot controller script is
14 | required, as documented in :py:mod:`PuPy`. In this example, the
15 | simulation is to be reverted whenever the robot falls or leaves a
16 | predefined arena. In this case, reverting the simulation is preferred
17 | over respawning the robot, since this guarantees that the robot is
18 | started in the same state in every episode.
19 | 
20 | .. literalinclude:: ../../test/puppy_online_supervisor.py
21 | 
22 | The robot controller is structured similar to the ones in the offline
23 | case. First, the preliminaries for :py:class:`ADHDP` are to be prepared.
24 | Hence, in the initialization the policy, plant and echo-state network
25 | is created. Furthermore, an :math:`\epsilon`-greedy acting schema is
26 | set up, by subtyping :py:class:`PuppyHDP` and specifying
27 | :py:meth`ActorCritic._next_action_hook`. Once, the actor-critic instance
28 | is ready in *acd*, the simulation is set up and finally run. In contrast
29 | to the offline case, the Actor-Critic instance is combined with Webots,
30 | as documented in :py:mod:`PuPy`.
31 | 
32 | .. literalinclude:: ../../test/puppy_online_robot.py
33 | 
34 | These two controllers can be loaded into webots and the simulation
35 | executed. All observations will be stored in the file
36 | ``/tmp/puppy_online.hdf5``, the reservoir and readout are saved at
37 | ``/tmp/puppy_reservoir.pic`` and ``/tmp/puppy_readout.pic``.
38 | 
39 | The simulation is reverted once in a while, hence the controller script
40 | will be terminated and reloaded several times. For the
41 | controller to work, it must load the reservoir and readout if they
42 | already exist. Note that the approach shown below saves the readout
43 | before exiting. This will fail for large reservoirs, as the teardown
44 | timeframe is limited by webots. In such a case, the readout weights
45 | may be written into a file (and regained from it upon startup) at every
46 | iteration (this can efficiently be done by means of a seperate HDF5
47 | file).
48 | 


--------------------------------------------------------------------------------
/HDPy/epuck/policy/policies.py:
--------------------------------------------------------------------------------
 1 | from ...rl import Policy
 2 | import warnings
 3 | import numpy as np
 4 | 
 5 | class Heading(Policy):
 6 |     """ePuck policy with the heading as action.
 7 |     
 8 |     Due to historical reasons, it is up to the implementation of the
 9 |     robot to interprete the action (i.e. if it's considered relative
10 |     or absolute).
11 |     
12 |     Note that since Webots is not used for ePuck simulation, the action
13 |     sequence is reduced to a single item and hence not returned as list.
14 |     This behaviour works fine with the :py:class:`Robot` class.
15 |     
16 |     """
17 |     def __init__(self):
18 |         super(Heading, self).__init__(action_space_dim=1)
19 |         self.action = self.initial_action()
20 |     
21 |     def initial_action(self):
22 |         """Return the initial action (0.0)."""
23 |         return np.atleast_2d([0.0]).T
24 |     
25 |     def update(self, action_upd):
26 |         """Update the action."""
27 |         self.action = action_upd
28 |     
29 |     def get_iterator(self, time_start_ms, time_end_ms, step_size_ms):
30 |         """Return the heading."""
31 |         return self.action
32 |     
33 |     def reset(self):
34 |         """Reset the action to the initial one (0.0)."""
35 |         self.action = self.initial_action()
36 | 
37 | class HeadingRandInit(Heading):
38 |     """ePuck policy with the heading as action and random
39 |     initialization.
40 |     
41 |     The only difference to :py:class:`Heading` is that the initial
42 |     action is not 0.0 but randomly sampled in [0, 2*pi].
43 |     
44 |     """
45 |     def initial_action(self):
46 |         """Sample an action and return it as initial one."""
47 |         rnd = np.random.uniform(0.0, 2*np.pi)
48 |         return np.atleast_2d([rnd]).T
49 | 
50 | class Trivial(Heading):
51 |     """ePuck policy with the heading as action.
52 |     
53 |     .. deprecated:: 1.0
54 |         Use :py:class:`Heading` instead
55 |     
56 |     """
57 |     def __init__(self):
58 |         warnings.warn("This class is deprecated. Use 'Heading' instead")
59 |         super(Trivial, self).__init__()
60 | 
61 | class RandInit(HeadingRandInit):
62 |     """ePuck policy with the heading as action and random
63 |     initialization.
64 |     
65 |     .. deprecated:: 1.0
66 |         Use :py:class:`HeadingRandInit` instead
67 |     
68 |     """
69 |     def __init__(self):
70 |         warnings.warn("This class is deprecated. Use 'HeadingRandInit' instead")
71 |         super(RandInit, self).__init__()
72 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Reinforcement Reservoir Learning documentation master file, created by
 2 |    sphinx-quickstart on Wed May 22 19:50:46 2013.
 3 | 
 4 | Heuristic Dynamic Programming in Python
 5 | =======================================
 6 | 
 7 | .. automodule:: HDPy
 8 | 
 9 | 
10 | This documentation gives an overview over the module's functionality,
11 | gives an usage example and lists the interfaces. This order is kept
12 | constant over all (i.e. most) pages. The first four pages
13 | (:ref:`idx_basics`) list the basic interfaces and describe the methods
14 | which implement Reservoir Computing and Reinforcement Learning. These
15 | structures are independent on the experimental platform. 
16 | 
17 | This package was originally implemented for two platforms, the Puppy
18 | and ePuck robots. The corresponding (and hence platform dependent) code
19 | is documented in the second section (:ref:`idx_platforms`).
20 | 
21 | The third section (:ref:`idx_resources`) provides further information
22 | and download and installation resources.
23 | 
24 | Note that some of the examples write files. In this case, the paths are
25 | usually hardcoded and valid for a unix-like file tree. As data is
26 | temporary, it is hence stored in ``/tmp``. When working on other
27 | systems, the paths have to be adapted.
28 | 
29 | Furthermore, due to Python's magnificent online help, the interface
30 | documentation is also available from within the interactive interpreter
31 | (e.g. IPython):
32 | 
33 | >>> import HDPy
34 | >>> help(HDPy)
35 | 
36 | .. note::
37 |     The examples have been written for linux. As most of them include
38 |     paths, they are also specified for a unix-like filesystem. On other
39 |     systems, they have to be adapted. Also note that some of the paths
40 |     may require adaptions, even on a linux machine (e.g. normalization
41 |     data files).
42 | 
43 | Contents
44 | --------
45 | 
46 | .. _idx_basics:
47 | 
48 | Basics
49 | ^^^^^^
50 | 
51 | .. toctree::
52 |    :maxdepth: 1
53 |    
54 |    rc
55 |    rl
56 |    utils
57 |    analysis
58 | 
59 | .. _idx_platforms:
60 | 
61 | Platforms
62 | ^^^^^^^^^
63 | 
64 | .. toctree::
65 |    :maxdepth: 1
66 | 
67 |    pp
68 |    epuck
69 |    puppy
70 | 
71 | .. _idx_resources:
72 | 
73 | Resources
74 | ^^^^^^^^^
75 | 
76 | .. toctree::
77 |    :maxdepth: 1
78 | 
79 |    todopg
80 |    download
81 |    license
82 |    references
83 | 
84 | 
85 | Indices and tables
86 | ------------------
87 | 
88 | * :ref:`genindex`
89 | * :ref:`modindex`
90 | * :ref:`search`
91 | 
92 | 


--------------------------------------------------------------------------------
/test/puppy_offline_sampling_robot.py:
--------------------------------------------------------------------------------
 1 | from controller import Robot
 2 | import HDPy
 3 | import PuPy
 4 | import numpy as np
 5 | 
 6 | # setup:
 7 | sampling_period_ms = 20
 8 | ctrl_period_ms = 3000
 9 | 
10 | # Policy setup
11 | bound_gait = {
12 |     'amplitude' : ( 0.8, 1.0, 0.8, 1.0),
13 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
14 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
15 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
16 | }
17 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait))
18 | 
19 | def random_initial_action():
20 |     """Select a random action initially instead of a fixed one,
21 |     specified by the gait definition.
22 |     """
23 |     N = policy.action_space_dim()
24 |     action = np.atleast_2d([-1.0] * N)
25 |     while (action < 0.4).any() or (action > 2.0).any() or action.ptp() > 0.5:
26 |         action = np.random.normal(0.9, 0.3, size=action.shape)
27 |     return action.T
28 | 
29 | policy.initial_action = random_initial_action
30 | 
31 | # Offline data collector setup
32 | class OfflinePuppy(HDPy.puppy.OfflineCollector):
33 |     def _next_action_hook(self, a_next):
34 |         """Define the schema according to which actions will be selected.
35 |         Hence, this function defines the action and state space sampling
36 |         schema. Note that the influence on training is intense.
37 |         
38 |         """
39 |         a_next = np.zeros(self.a_curr.shape)
40 |         # Prohibit too small or large amplitudes
41 |         while (a_next < 0.2).any() or (a_next > 2.0).any() or ((a_next > 1.0).any() and a_next.ptp() > 0.4):
42 |             a_next = self.a_curr + np.random.normal(0.0, 0.15, size=self.a_curr.shape)
43 |         
44 |         return a_next
45 | 
46 | # Initialize the collector
47 | data_collector = PuPy.RobotCollector(
48 |     child   = policy,
49 |     expfile = '/tmp/puppy_offline_data.hdf5'
50 | )
51 | tumble_collector = PuPy.TumbleCollector(
52 |     child              = data_collector,
53 |     sampling_period_ms = sampling_period_ms,
54 |     ctrl_period_ms     = ctrl_period_ms
55 | )
56 | collector = PuPy.ResetCollector(
57 |     child              = tumble_collector,
58 |     sampling_period_ms = sampling_period_ms,
59 |     ctrl_period_ms     = ctrl_period_ms
60 | )
61 | 
62 | 
63 | # actor instantiation
64 | actor = OfflinePuppy(
65 | #    policy      = collector,
66 |     policy      = data_collector,
67 |     init_steps  = 10,
68 | )
69 | 
70 | # robot instantiation
71 | r = PuPy.robotBuilder(
72 |     Robot,
73 |     actor,
74 |     sampling_period_ms  = sampling_period_ms,
75 |     ctrl_period_ms      = ctrl_period_ms,
76 | #    event_handlers      = [actor.event_handler, tumble_collector.event_handler, collector.event_handler]
77 | )
78 | 
79 | # invoke the main loop, starts the simulation
80 | r.run()
81 | 


--------------------------------------------------------------------------------
/test/epuck_online.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import HDPy
  3 | import PuPy
  4 | import os
  5 | import pylab
  6 | 
  7 | ## INITIALIZATION ##
  8 | 
  9 | # Robot
 10 | obstacles = [
 11 |     HDPy.epuck.env.train_lower,
 12 |     HDPy.epuck.env.train_middle,
 13 |     HDPy.epuck.env.train_left,
 14 |     HDPy.epuck.env.train_upper
 15 | ]
 16 | 
 17 | robot = HDPy.epuck.AbsoluteRobot(
 18 |     walls       = HDPy.epuck.env.obstacles_box,
 19 |     obstacles   = obstacles,
 20 |     tol         = 0.0,
 21 |     speed       = 0.5,
 22 |     step_time   = 0.5,
 23 | )
 24 | 
 25 | # Plant and Policy
 26 | policy = HDPy.epuck.policy.HeadingRandInit()
 27 | plant = HDPy.epuck.plant.CollisionAvoidanceFrontal(
 28 |     theta       = 1.0,
 29 |     obs_noise   = 0.05
 30 | )
 31 | 
 32 | # Set up reservoir
 33 | reservoir = HDPy.ReservoirNode(
 34 |     output_dim      = 50,
 35 |     input_dim       = policy.action_space_dim() + plant.state_space_dim(),
 36 |     spectral_radius = 0.95,
 37 |     input_scaling   = 1.0/3.0,
 38 |     bias_scaling    = -3.0,
 39 |     fan_in_w        = 20
 40 | )
 41 | 
 42 | reservoir.initialize()
 43 | 
 44 | # Set up readout
 45 | readout = HDPy.StabilizedRLS(
 46 |     with_bias   = True,
 47 |     input_dim   = reservoir.get_output_dim() + policy.action_space_dim() + plant.state_space_dim(),
 48 |     output_dim  = 1,
 49 |     lambda_     = 1.0
 50 | )
 51 | 
 52 | # Custom ADHDP
 53 | class ExperimentingHDP(HDPy.ADHDP):
 54 |     def _next_action_hook(self, a_next):
 55 |         """Project action into the interval [0,2pi]."""
 56 |         return a_next % (2*np.pi)
 57 | 
 58 | # Remove old data file
 59 | if os.path.exists('/tmp/epuck_data.hdf5'):
 60 |     os.unlink('/tmp/epuck_data.hdf5')
 61 | 
 62 | collector = PuPy.RobotCollector(
 63 |     child   = policy,
 64 |     expfile = '/tmp/epuck_data.hdf5')
 65 | 
 66 | # Create ADHDP instance
 67 | acd = ExperimentingHDP(
 68 |     # Demanded by ADHDP
 69 |     reservoir   = reservoir,
 70 |     readout     = readout,
 71 |     # Demanded by ActorCritic
 72 |     plant       = plant,
 73 |     policy      = collector,
 74 |     gamma       = 0.5,
 75 |     alpha       = 1.0,
 76 |     init_steps  = 5,
 77 | )
 78 | 
 79 | ## SIMULATION LOOP ##
 80 | 
 81 | # Execute the simulation for 10 episodes, with 100 steps tops each
 82 | HDPy.epuck.simulation_loop(
 83 |     acd,
 84 |     robot,
 85 |     max_step        = 100,
 86 |     max_episodes    = 10,
 87 |     max_total_iter  = -1
 88 | )
 89 | 
 90 | ## EVALUATION ##
 91 | 
 92 | # Load the data file
 93 | analysis = HDPy.Analysis('/tmp/epuck_data.hdf5')
 94 | 
 95 | # Plot the trajectories and obstacles
 96 | axis = pylab.figure().add_subplot(111)
 97 | robot._plot_obstacles(axis=axis)
 98 | HDPy.epuck.plot_all_trajectories(analysis, axis)
 99 | 
100 | # Show the figure
101 | pylab.show(block=False)
102 | 


--------------------------------------------------------------------------------
/test/puppy_example_trajectory_robot.py:
--------------------------------------------------------------------------------
 1 | from controller import Robot
 2 | import PuPy
 3 | import HDPy
 4 | import numpy as np
 5 | import h5py
 6 | 
 7 | # Initialize a policy
 8 | bound_gait = {
 9 |     'amplitude' : ( 0.8, 1.0, 0.8, 1.0),
10 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
11 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
12 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
13 | }
14 | 
15 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait, 'bounding'))
16 | 
17 | # OfflineCollector which follows a predefined sequence of actions
18 | # after the initial behaviour (policy with default params for 25 steps).
19 | class TrajectoryFollower(HDPy.puppy.OfflineCollector):
20 |     def __init__(self, trajectory, *args, **kwargs):
21 |         super(TrajectoryFollower, self).__init__(*args, **kwargs)
22 |         self.trajectory = trajectory
23 |         self._traj_idx = 0
24 |     
25 |     def _next_action_hook(self, a_next):
26 |         if self._traj_idx >= self.trajectory.shape[0]:
27 |             # If all actions have been executed, signal the supervisor
28 |             # to revert the simulation
29 |             self.robot.send_msg('revert_on_demand')
30 |             return self.a_curr
31 |             
32 |         # If there's a next action, execute it
33 |         a_next = np.atleast_2d(self.trajectory[self._traj_idx]).T
34 |         self._traj_idx += 1
35 |         
36 |         return a_next
37 | 
38 | # Load the sequence file
39 | f = h5py.File('/tmp/example_sequence.hdf5','a')
40 | # Get the index of the trajectory to be executed
41 | idx = f['idx'][()]
42 | grp_name = 'traj_%03i' % idx
43 | if grp_name in f:
44 |     # Not yet finished, increment the index such that the next
45 |     # trajectory is executed in the next experiment.
46 |     trajectory = f[grp_name][:]
47 |     do_quit = False
48 |     f['idx'][()] += 1
49 | else:
50 |     # Simulation is finished, execute any trajectory and prepare for
51 |     # termination
52 |     while grp_name not in f and idx >= 0:
53 |         idx -= 1
54 |         grp_name = 'traj_%03i' % (idx)
55 |     
56 |     if idx < 0:
57 |         raise Exception('Could not find last trajectory')
58 |     
59 |     trajectory = f[grp_name][:]
60 |     do_quit = True
61 | 
62 | f.close()
63 | 
64 | # Initialize the collector
65 | collector = PuPy.RobotCollector(
66 |     child   = policy,
67 |     expfile = '/tmp/example_data.hdf5'
68 | )
69 | 
70 | # Initialize the actor
71 | actor = TrajectoryFollower(
72 |     trajectory  = trajectory,
73 |     policy      = collector,
74 |     init_steps  = 10,
75 | )
76 | 
77 | # Initialize the robot, bind it to webots
78 | r = PuPy.robotBuilder(
79 |     Robot,
80 |     actor,
81 |     sampling_period_ms  = 20,
82 |     ctrl_period_ms      = 3000,
83 | )
84 | 
85 | # Register robot in actor for signalling
86 | actor.robot = r
87 | 
88 | if do_quit:
89 |     # Quit the simulation when all trajectories are handled
90 |     r.send_msg('quit_on_demand')
91 | 
92 | # Run the simulation
93 | r.run()
94 | 


--------------------------------------------------------------------------------
/test/acd.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import HDPy
  3 | import PuPy
  4 | import pylab
  5 | import numpy as np
  6 | 
  7 | # Create and initialize Policy
  8 | gait = PuPy.Gait(params={
  9 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
 10 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
 11 |     'amplitude' : ( 0.56, 0.56, 0.65, 0.65),
 12 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
 13 | })
 14 | 
 15 | policy = HDPy.FRA(gait)
 16 | 
 17 | # Plot action
 18 | it = policy.get_iterator(0, 100, 20)
 19 | pylab.subplot(311)
 20 | data = [it.next() for i in range(100)]
 21 | pylab.title('Motor action, untampered for 100 steps, 20ms each')
 22 | pylab.xlabel('time')
 23 | pylab.plot(data)
 24 | pylab.show(block=False)
 25 | 
 26 | 
 27 | # Create and initialize Plant
 28 | plant = HDPy.puppy.plant.SpeedReward()
 29 | 
 30 | # Create and initialize ACD
 31 | reservoir = HDPy.SparseReservoirNode(
 32 |     output_dim=10,
 33 |     input_dim=policy.action_space_dim() + plant.state_space_dim(),
 34 |     reset_states=False,
 35 |     spectral_radius=0.9,
 36 |     fan_in_i=100,
 37 |     fan_in_w=20
 38 | )
 39 | 
 40 | readout = HDPy.StabilizedRLS(
 41 |     with_bias=True,
 42 |     input_dim=reservoir.get_output_dim() + reservoir.get_input_dim(),
 43 |     output_dim=1,
 44 |     lambda_=1.0
 45 | )
 46 | expfile = '/tmp/acd.hdf5'
 47 | collector = PuPy.RobotCollector(child=policy, expfile=expfile)
 48 | acd = HDPy.ADHDP(
 49 |     reservoir,
 50 |     readout,
 51 |     plant,
 52 |     collector
 53 | )
 54 | 
 55 | acd.set_alpha(0.5)
 56 | 
 57 | 
 58 | N = 100
 59 | ep0 = {
 60 |     'accelerometer_z'   : np.ones(N) * 2.0 + np.random.randn(N)+0.2,
 61 |     'puppyGPS_x'        : np.ones([N,2]) * [0.0,  1.0] + np.random.randn(N,2)*0.2,
 62 |     'puppyGPS_y'        : np.ones([N,2]) * [0.0, 10.0] + np.random.randn(N,2)*0.2
 63 |     }
 64 | 
 65 | ep1 = {
 66 |     'accelerometer_z'   : np.ones(N) * 2.0 + np.random.randn(N)+0.2,
 67 |     'puppyGPS_x'        : np.ones([N,2]) * [1.0,  3.0] + np.random.randn(N,2)*0.5,
 68 |     'puppyGPS_y'        : np.ones([N,2]) * [10.0, 18.0] + np.random.randn(N,2)*0.5
 69 |     }
 70 | 
 71 | # Initialize for some epochs
 72 | it = acd(ep0, time_start_ms=  0, time_end_ms=100, step_size_ms=1)
 73 | it = acd(ep0, time_start_ms=100, time_end_ms=200, step_size_ms=1)
 74 | it = acd(ep0, time_start_ms=200, time_end_ms=300, step_size_ms=1)
 75 | 
 76 | # First epoch
 77 | it = acd(ep0, time_start_ms=300, time_end_ms=400, step_size_ms=1)
 78 | data = [it.next() for i in range(100)]
 79 | pylab.subplot(312)
 80 | pylab.title('')
 81 | pylab.xlabel('time')
 82 | pylab.plot(data)
 83 | pylab.show(block=False)
 84 | 
 85 | # Second epoch
 86 | it = acd(ep1, time_start_ms=400, time_end_ms=500, step_size_ms=1)
 87 | data = [it.next() for i in range(100)]
 88 | pylab.subplot(313)
 89 | pylab.title('')
 90 | pylab.xlabel('time')
 91 | pylab.plot(data)
 92 | pylab.show(block=False)
 93 | 
 94 | # Test load/save
 95 | import tempfile, os
 96 | fh, pth = tempfile.mkstemp()
 97 | acd.save(pth)
 98 | acd2 = HDPy.ADHDP.load(pth)
 99 | os.unlink(pth)
100 | 
101 | 


--------------------------------------------------------------------------------
/test/puppy_online_robot.py:
--------------------------------------------------------------------------------
  1 | from controller import Robot
  2 | import PuPy
  3 | import HDPy
  4 | import numpy as np
  5 | import os
  6 | import itertools
  7 | import pickle
  8 | 
  9 | 
 10 | ## INITIALIZATION ##
 11 | 
 12 | # Create a policy
 13 | bound_gait = {
 14 |     'amplitude' : ( 0.8, 1.0, 0.8, 1.0),
 15 |     'frequency' : (1.0, 1.0, 1.0, 1.0),
 16 |     'offset'    : ( -0.23, -0.23, -0.37, -0.37),
 17 |     'phase'     : (0.0, 0.0, 0.5, 0.5)
 18 | }
 19 | 
 20 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait))
 21 | 
 22 | # Create a plant
 23 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))]
 24 | target_loc = (6.0, 4.0)
 25 | plant = HDPy.puppy.plant.TargetLocationLandmarks(
 26 |     target_loc,
 27 |     landmarks,
 28 |     reward_noise    = 0.0
 29 | )
 30 | # Load the normalization
 31 | nrm = PuPy.Normalization(os.path.split(HDPy.__file__)[0]+'/../data/puppy_unit.json')
 32 | 
 33 | # Reservoir
 34 | if os.path.exists('/tmp/puppy_reservoir.pic'):
 35 |     reservoir = pickle.load(open('/tmp/puppy_reservoir.pic','r'))
 36 | else:
 37 |     reservoir = HDPy.ReservoirNode(
 38 |         output_dim      = 100,
 39 |         input_dim       = policy.action_space_dim() + plant.state_space_dim(),
 40 |         reset_states    = False,
 41 |         spectral_radius = 0.7,
 42 |         w               = HDPy.sparse_reservoir(20),
 43 |     )
 44 |     reservoir.initialize()
 45 |     reservoir.save('/tmp/puppy_reservoir.pic')
 46 | 
 47 | # Readout
 48 | if os.path.exists('/tmp/puppy_readout.pic'):
 49 |     readout = pickle.load(open('/tmp/puppy_readout.pic','r'))
 50 | else:
 51 |     readout = HDPy.StabilizedRLS(
 52 |         input_dim   = reservoir.get_output_dim() + reservoir.get_input_dim(),
 53 |         output_dim  = 1,
 54 |         with_bias   = True,
 55 |         lambda_     = 1.0
 56 |     )
 57 | 
 58 | # Acting schema
 59 | class OnlinePuppy(HDPy.PuppyHDP):
 60 |     def _next_action_hook(self, a_next):
 61 |         """Choose the action in an eps-greedy
 62 |         fashion, meaning that a random action
 63 |         is preferred over the suggested one with
 64 |         probability eps.
 65 |         """
 66 |         if np.random.rand() < 0.2:
 67 |             a_next = np.random.uniform(low=0.2, high=1.0, size=a_next.shape)
 68 |         # clip the action to a bounded range
 69 |         a_next[a_next < 0.2] = 0.2
 70 |         a_next[a_next > 1.0] = 1.0
 71 |         return a_next
 72 | 
 73 | # Initialize the collector
 74 | collector = PuPy.RobotCollector(
 75 |     child   = policy,
 76 |     expfile = '/tmp/puppy_online.hdf5'
 77 | )
 78 | 
 79 | # actor
 80 | actor = OnlinePuppy(
 81 |     # HDPy.puppy.PuppyHDP
 82 |     tumbled_reward  = 0.0,
 83 |     # HDPy.ADHDP
 84 |     reservoir       = reservoir,
 85 |     readout         = readout,
 86 |     # HDPy.ActorCritic
 87 |     plant           = plant,
 88 |     policy          = collector,
 89 |     gamma           = 0.5,
 90 |     alpha           = 1.0,
 91 |     init_steps      = 10,
 92 |     norm            = nrm
 93 | )
 94 | 
 95 | # robot
 96 | r = PuPy.robotBuilder(
 97 |     Robot,
 98 |     actor,
 99 |     sampling_period_ms  = 20,
100 |     ctrl_period_ms      = 3000,
101 | #    event_handlers      = actor.event_handler
102 | )
103 | 
104 | ## SIMULATION LOOP ##
105 | 
106 | # run the simulation
107 | r.run()
108 | 
109 | # teardown
110 | readout.save('/tmp/puppy_readout.pic')
111 | 


--------------------------------------------------------------------------------
/test/rc_example.py:
--------------------------------------------------------------------------------
  1 | """
  2 | """
  3 | import HDPy
  4 | import numpy as np
  5 | import pylab
  6 | 
  7 | ## PARAMS ##
  8 | 
  9 | washout         = 200
 10 | num_train       = 5000
 11 | num_test        = 1000
 12 | reservoir_size  = 100
 13 | 
 14 | ## INITIALIZATION ##
 15 | 
 16 | # Reservoir
 17 | reservoir_sparse = HDPy.ReservoirNode(
 18 |     input_dim       = 1,
 19 |     output_dim      = reservoir_size,
 20 |     spectral_radius = 0.9,
 21 |     w_bias          = None,
 22 |     w               = HDPy.sparse_reservoir(20),
 23 |     w_in            = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal),
 24 | )
 25 | 
 26 | reservoir_orthogonal = HDPy.ReservoirNode(
 27 |     input_dim       = 1,
 28 |     output_dim      = reservoir_size,
 29 |     spectral_radius = 0.9,
 30 |     w_bias          = None,
 31 |     w               = HDPy.orthogonal_reservoir(20.0),
 32 |     w_in            = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal),
 33 | )
 34 | 
 35 | reservoir_ring = HDPy.ReservoirNode(
 36 |     input_dim       = 1,
 37 |     output_dim      = reservoir_size,
 38 |     spectral_radius = 0.9,
 39 |     w_bias          = None,
 40 |     w               = HDPy.ring_of_neurons,
 41 |     w_in            = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal),
 42 | )
 43 | 
 44 | 
 45 | # Readout
 46 | readout = HDPy.StabilizedRLS(
 47 |     input_dim       = reservoir_size,
 48 |     output_dim      = 1,
 49 |     with_bias       = True,
 50 |     lambda_         = 1.0,
 51 | )
 52 | 
 53 | readout_orthogonal  = readout.copy()
 54 | readout_ring        = readout.copy()
 55 | readout_sparse      = readout.copy()
 56 | 
 57 | # Data
 58 | def narma30(num_samples=1000):
 59 |     """30th order NARMA dataset. Copied from [Oger]_."""
 60 |     system_order = 30
 61 |     inputs = np.random.rand(num_samples, 1) * 0.5
 62 |     outputs = np.zeros((num_samples, 1))
 63 |     
 64 |     for k in range(system_order-1, num_samples-1):
 65 |         outputs[k + 1] = 0.2 * outputs[k] + 0.04 * \
 66 |         outputs[k] * np.sum(outputs[k - (system_order-1):k+1]) + \
 67 |         1.5 * inputs[k - 29] * inputs[k] + 0.001
 68 |     return inputs, outputs 
 69 | 
 70 | src, trg = narma30(washout + num_train + num_test)
 71 | 
 72 | ## TRAINING ##
 73 | 
 74 | setups = ('Sparse', 'Orthogonal', 'Ring of Neurons')
 75 | reservoirs = (reservoir_sparse, reservoir_orthogonal, reservoir_ring)
 76 | readouts = (readout_sparse, readout_orthogonal, readout_ring)
 77 | 
 78 | # Initialize the reservoirs
 79 | # Propagate data through the reservoirs, no training
 80 | for res in reservoirs:
 81 |     res(src[:washout])
 82 | 
 83 | # Train the readout
 84 | # Propagate data through reservoir, train the readout online
 85 | for res, out in zip(reservoirs, readouts):
 86 |     r_state = res(src[washout:num_train])
 87 |     out.train(r_state, trg[washout:num_train])
 88 | 
 89 | # Test the networks
 90 | signals = []
 91 | for res, out in zip(reservoirs, readouts):
 92 |     r_state = res(src[washout+num_train:])
 93 |     pred = out(r_state)
 94 |     signals.append(pred)
 95 | 
 96 | ## PLOTTING ##
 97 | 
 98 | # Error measurement
 99 | mse     = lambda sig_pred, sig_trg: ((sig_pred - sig_trg)**2).mean()
100 | rmse    = lambda sig_pred, sig_trg: np.sqrt(mse(sig_pred, sig_trg))
101 | nrmse   = lambda sig_pred, sig_trg: rmse(sig_pred, sig_trg) / sig_trg.std()
102 | 
103 | # Output and reservoir output plotting
104 | pretty_str = "{0:<" + str(max(map(len, setups))) + "}\t{1:0.6f}\t{2:0.6f}"
105 | print "Reservoir type\tMSE\t\tNRMSE"
106 | for sig, lbl in zip(signals, setups):
107 |     pylab.plot(sig, label=lbl)
108 |     err_mse = mse(sig, trg[washout + num_train:])
109 |     err_nrmse = nrmse(sig, trg[washout + num_train:])
110 |     print pretty_str.format(lbl, err_mse, err_nrmse)
111 | 
112 | # Target plotting
113 | pylab.plot(trg[washout+num_train:], 'c', label='Target')
114 | 
115 | # Show the plot
116 | pylab.axis((0.0, 70.0, 0.0, 0.45))
117 | pylab.legend(loc=0)
118 | pylab.show(block=False)
119 | 


--------------------------------------------------------------------------------
/HDPy/epuck/epuck_arena.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The environment of the ePuck robot consists of several walls and
  3 | obstacles. The difference between those two is that walls are isolated
  4 | lines while obstacles are polygons, hence closed shapes. Some obstacles
  5 | and arena arrangements have been prepared in :py:mod:`HDPy.epuck.env`.
  6 | 
  7 | """
  8 | # open rectangle
  9 | _left, _right = -1.0, 5.0
 10 | _bottom, _top = -4.0, 20.0
 11 | obstacles_open = [
 12 |     (_left, _bottom, _right, _bottom), # bottom line
 13 |     (_left, _bottom, _left, _top),     # left line
 14 |     (_right, _bottom, _right, _top)    # right line
 15 | ]
 16 | 
 17 | # rhomboid
 18 | _rad_x, _rad_y = 10.0, 10.0
 19 | obstacles_rhomb = [
 20 |     (0.0, _rad_y, _rad_x, 0.0),  # top to right
 21 |     (0.0, -_rad_y, _rad_x, 0.0), # bottom to right
 22 |     (-_rad_x, 0.0, 0.0, _rad_y), # left to top
 23 |     (-_rad_x, 0.0, 0.0, -_rad_y) # left to bottom
 24 | ]
 25 | 
 26 | # box
 27 | _rad_x, _rad_y = 10.0, 10.0
 28 | obstacles_box = [
 29 |     ( _rad_x, -_rad_y,  _rad_x,  _rad_y), # right
 30 |     (-_rad_x, -_rad_y, -_rad_x,  _rad_y), # left
 31 |     (-_rad_x,  _rad_y,  _rad_x,  _rad_y), # top
 32 |     (-_rad_x, -_rad_y,  _rad_x, -_rad_y)  # bottom
 33 | ]
 34 | 
 35 | # wall
 36 | _dist = 5.0
 37 | obstacles_wall = [
 38 |     ( _dist, -10, _dist, 10 ) # wall at dist
 39 | ]
 40 | 
 41 | # lower box
 42 | train_lower = [
 43 |     (6.0, -6.0),
 44 |     (10.0, -6.0),
 45 |     (10.0, -10.0),
 46 |     (6.0, -10.0)
 47 | ]
 48 | 
 49 | # middle lower box
 50 | train_middle = [
 51 |     (0.0, -3.0),
 52 |     (2.0, -5.0),
 53 |     (0.0, -7.0),
 54 |     (-2.0, -5.0)
 55 | ]
 56 | 
 57 | # left rectangle
 58 | train_left = [
 59 |     (-6.0, 4.0),
 60 |     (-4.0, 4.0),
 61 |     (-4.0, -2.0),
 62 |     (-6.0, -2.0)
 63 | ]
 64 | 
 65 | # upper right box
 66 | train_upper = [
 67 |     (5.0, 6.0),
 68 |     (6.0, 2.0),
 69 |     (5.0, -1.0),
 70 |     (0.0, 4.0)
 71 | ]
 72 | 
 73 | # right triangle
 74 | test_right = [
 75 |     (2.0, -7.0),
 76 |     (4.0, 1.0),
 77 |     (6.0, -2.0)
 78 | ]
 79 | 
 80 | # upper rectangle
 81 | test_upper = [
 82 |     (-6.0, 4.0),
 83 |     (-6.0, 6.0),
 84 |     (6.0, 6.0),
 85 |     (6.0, 4.0)
 86 | ]
 87 | 
 88 | # left lower rectangle
 89 | test_left = [
 90 |     (-4.0, 0.0),
 91 |     (-1.0, -4.0),
 92 |     (-4.0, -7.0),
 93 |     (-7.0, -4.0)
 94 | ]
 95 | 
 96 | def box_gen_lines((cx, cy), (sx, sy)):
 97 |     """Create a rectangle using a center ``(cx,cy)`` and side length
 98 |     ``(sx, sy)``."""
 99 |     return [
100 |         (cx - sx, cy - sy, cx - sx, cy + sy),
101 |         (cx + sx, cy - sy, cx + sx, cy + sy),
102 |         (cx - sx, cy + sy, cx + sx, cy + sy),
103 |         (cx - sx, cy - sy, cx + sx, cy - sy)
104 |     ]
105 | 
106 | def box_gen_corners((cx, cy), (sx, sy)):
107 |     """Create a rectangle using two corners ``(cx,cy)`` and ``(sx,sy)``."""
108 |     return [
109 |         (cx - sx, cy - sy),
110 |         (cx + sx, cy - sy),
111 |         (cx + sx, cy + sy),
112 |         (cx - sx, cy + sy)
113 |     ]
114 | 
115 | obstacles_boxes = box_gen_lines((5.0, 5.0), (1.0, 1.0)) \
116 |     + box_gen_lines((0.0, 3.0), (1.0, 1.0)) \
117 |     + box_gen_lines((-2.0, -3.0), (1.0, 1.0)) \
118 |     + box_gen_lines((4.0, -2.0), (1.0, 1.0)) \
119 |     + box_gen_lines((-6.0, 5.0), (1.0, 1.0))
120 | 
121 | obstacles_maze = box_gen_lines((3.0, 3.0), (2.5, 1.5)) + box_gen_lines((3.0, 3.0), (5.0, 4.5))
122 | 
123 | obstacles_pipe = [
124 |     (-1.0, 1.5, -1.0, -1.5), # behind
125 |     (-1.0, 1.5, 5.0, 1.5), # top
126 |     (-1.0, -1.5, 8.0, -1.5), # bottom
127 |     (5.0, 1.5, 5.0, 8.0), # ascent, left
128 |     (8.0, -1.5, 8.0, 5.0), # ascent, right
129 |     (5.0, 8.0, 15.0, 8.0), # opening, left
130 |     (8.0, 5.0, 15.0, 5.0)  # opening, right
131 | ]
132 |     
133 | 
134 | # Inverse crown
135 | obstacle_crown = [
136 |     (0.0, 0.0),
137 |     (1.0, 1.0),
138 |     (2.0, -1.0),
139 |     (3.0, 1.0),
140 |     (4.0, 0.0),
141 |     (4.0, 2.0),
142 |     (0.0, 2.0)
143 | ]
144 | 


--------------------------------------------------------------------------------
/test/puppy_offline_analysis.py:
--------------------------------------------------------------------------------
  1 | import HDPy
  2 | import pylab
  3 | import numpy as np
  4 | import h5py
  5 | import sys
  6 | 
  7 | # global config var
  8 | step_width = 150
  9 | step_width_plotting = 50
 10 | 
 11 | # the observations file lists the initial epoch, while the critic datafile doesn't
 12 | # thus, the sensor data must be shifted by one step_width
 13 | obs_offset = step_width
 14 | # The analysis experiments are always reverted, hence there's only one initial sample
 15 | # (check out puppy.offline_playback:"if 'init_step' in data_grp: [...]", ActorCritic.__call__ and PuppyHDP.init_episode)
 16 | # For this initial sample, nothing is written into the analysis_critic_pth file. Hence, there's an offset
 17 | # of one epoch for data in analysis_data_pth and analysis_critic_pth.
 18 | # Note that if the experiments are restarted instead of reverted, this offset would be =2
 19 | 
 20 | robot_radius = 0.2
 21 | 
 22 | 
 23 | # Open files
 24 | a = HDPy.Analysis(HDPy.H5CombinedFile('/tmp/example_eval.hdf5', '/tmp/example_data.hdf5'))
 25 | 
 26 | # Create figure
 27 | fig = pylab.figure()
 28 | axis = fig.add_subplot(111)
 29 | 
 30 | # Plot target
 31 | target_loc = (6.0, 4.0)
 32 | HDPy.puppy.plot_locationtarget(axis, target=target_loc, distance=0.5)
 33 | axis.invert_xaxis() # positive x-axis in webots goes to the left!
 34 | pylab.show(block=False)
 35 | 
 36 | # Retrieve and plot the initial trajectory
 37 | grp = a['0'] # this is assumed to be the main trajectory
 38 | main_pth = grp['a_curr'][:]
 39 | main_len = main_pth.shape[0] * step_width
 40 | HDPy.puppy.plot_trajectory(a, axis, '0', step_width, offset=step_width*25, label='Initial trajectory')
 41 | pylab.show(block=False)
 42 | 
 43 | def find_offset(a0, a1):
 44 |     """Return the number of steps for which the sequences ``a0`` and
 45 |     ``a1`` are identical.
 46 |     """
 47 |     offset = min(a0.shape[0], a1.shape[0])
 48 |     while not (a0[:offset] == a1[:offset]).all():
 49 |         offset -= 1
 50 |         if offset < 0:
 51 |             raise IndexError()
 52 |     
 53 |     return offset
 54 | 
 55 | # group experiments with respect to the main trajectory cutoff and also
 56 | # get normalization data
 57 | pth_data = {}
 58 | for expno in a.experiments:
 59 |     if expno == '0':
 60 |         # '0' is the vanilla trajectory, don't consider it
 61 |         continue
 62 |     
 63 |     grp = a[expno]
 64 |     data_offset = find_offset(main_pth, grp['a_curr'][:])
 65 |     
 66 |     if data_offset not in pth_data:
 67 |         pth_data[data_offset] = []
 68 |     
 69 |     pth_data[data_offset].append((expno, grp['j_curr'][-3]))
 70 | 
 71 | # Compute normalization params over the whole experiment
 72 | returns_total = np.vstack([map(lambda i: i[1], lst) for lst in pth_data.values()])
 73 | nrm_total_min = returns_total.min()
 74 | nrm_total_ptp = returns_total.ptp()
 75 | 
 76 | # Go through data, plot the actions/returns
 77 | for data_offset in pth_data:
 78 |     
 79 |     # get data
 80 |     experiments, nrm_data = zip(*pth_data[data_offset])
 81 |     
 82 |     # Compute the normalization params over the current state
 83 |     p_returns = np.hstack(nrm_data)
 84 |     p_min = p_returns.min()
 85 |     p_ptp = p_returns.ptp()
 86 |     
 87 |     # Plot the robot disc
 88 |     if len(pth_data[data_offset]) > 1:
 89 |         loc_robot = (a['0']['puppyGPS_x'][(data_offset+1)*step_width-1], a['0']['puppyGPS_y'][(data_offset+1)*step_width-1])
 90 |         robot_color = (np.median(p_returns) - nrm_total_min) / (nrm_total_ptp)
 91 |         robot_color = 1.0 - robot_color
 92 |         rob = pylab.Circle(loc_robot, robot_radius, fill=True, facecolor=str(robot_color))
 93 |         axis.add_artist(rob)
 94 |     
 95 |     # Plot the rays
 96 |     for expno, return_ in pth_data[data_offset]:
 97 |         grp = a[expno]
 98 |         lbl = expno
 99 |         
100 |         sensor_offset = obs_offset + data_offset * step_width
101 |         data_x_plot = grp['puppyGPS_x'][sensor_offset-1::step_width_plotting]
102 |         data_y_plot = grp['puppyGPS_y'][sensor_offset-1::step_width_plotting]
103 | 
104 |         col = 0.25 + (return_ - p_min) / (2.0 * p_ptp+1e-7)
105 |         col = 1.0 - col
106 |         col = col[0]
107 |         
108 |         axis.plot(data_x_plot, data_y_plot, linewidth=1, label=lbl, color=str(col))
109 |         pylab.draw()
110 | 


--------------------------------------------------------------------------------
/doc/source/pp.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. _plants-and-policies:
  3 | 
  4 | Plants and Policies
  5 | ===================
  6 | 
  7 | .. contents::
  8 | 
  9 | 
 10 | Introduction
 11 | ------------
 12 | 
 13 | .. module:: HDPy
 14 | 
 15 | As described in :ref:`Reinforcement Learning <reinforcement-learning>`,
 16 | the learning problem formulation is achieved by specifying a
 17 | :py:class:`Plant` and a :py:class:`Policy`. For *Puppy* and *ePuck*,
 18 | some examples have already been implemented.
 19 | 
 20 | .. plant
 21 | 
 22 | To create a custom plant, basically the :py:class:`Plant` class has to
 23 | be subtyped. As the plant models the environment, it has to compute
 24 | a reward and state from sensor measurements. Together, they encode the
 25 | abstract learning target, a problem designer has in mind. The
 26 | implementation of a plant is quite straight-forward. The two functions
 27 | :py:meth:`Plant.state_input` and :py:meth:`Plant.reward` are called
 28 | whenever the state or reward is requested. They are expected to return
 29 | a vector (:math:`N \times 1`) and scalar, respectively. The state space
 30 | dimension :math:`N` may be announced through the plant's constructor and
 31 | later queried by calling :py:meth:`Plant.state_space_dim`. If the
 32 | plant is dependent on the episode, the :py:meth:`reset` method can be
 33 | implemented as well to reset the instance's internal state. Note that
 34 | the sensor values are not preprocessed, specifically not normalized.
 35 | To do so, a normalization instance (:py:class:`PuPy.Normalization`) is 
 36 | automatically registered at :py:attr:`Plant.normalization`. Note that
 37 | normalization is mandatory for :py:meth:`Plant.input_state`.
 38 | 
 39 | .. policy
 40 | 
 41 | The implementation of a custom policy is analogous to the creation
 42 | of a new :py:class:`Plant`. Here, the class :py:class:`Policy` is to be
 43 | subtyped, some of its methods are to be implemented. As with
 44 | :py:class:`Plant`, the normalization and action space dimensions are
 45 | automatically registered, in the later case through the default
 46 | constructor. Furthermore, the policy is reset at the beginning of a new
 47 | episode through :py:meth:`Policy.reset`.
 48 | 
 49 | The action itself is completely defined through the methods
 50 | :py:meth:`Policy.initial_action`, :py:meth:`Policy.update` and
 51 | :py:meth:`Policy.get_iterator`. The first gives a valid action, used
 52 | for initial behaviour (i.e. before the actor was in operation). The
 53 | other two define the behaviour during the experiment. After an action
 54 | has been selected, the :py:meth:`Policy.update` method is called,
 55 | which should note the new action and update internal structures. As with
 56 | the state, the action is passed as :math:`M \times 1` vector. This
 57 | will be followed by a call to :py:meth:`Policy.get_iterator`, which
 58 | in turn produces the sequence of motor targets, as requested by
 59 | :py:class:`WebotsRobotMixin`.
 60 | 
 61 | 
 62 | Reference
 63 | ---------
 64 | 
 65 | .. autoclass:: Plant
 66 |     :members:
 67 | 
 68 | .. autoclass:: Policy
 69 |     :members:
 70 | 
 71 | 
 72 | .. _plants_puppy:
 73 | 
 74 | Puppy Plants
 75 | ^^^^^^^^^^^^
 76 | 
 77 | .. autoclass:: HDPy.puppy.plant.SpeedReward
 78 | 
 79 | .. autoclass:: HDPy.puppy.plant.LineFollower
 80 | 
 81 | .. autoclass:: HDPy.puppy.plant.TargetLocation
 82 | 
 83 | .. autoclass:: HDPy.puppy.plant.TargetLocationLandmarks
 84 | 
 85 | .. autoclass:: HDPy.puppy.plant.DiffTargetLocationLandmarks
 86 | 
 87 | 
 88 | 
 89 | .. _policies_puppy:
 90 | 
 91 | Puppy Policies
 92 | ^^^^^^^^^^^^^^
 93 | 
 94 | .. GaitPolicy
 95 | 
 96 | .. automodule:: HDPy.puppy.policy.policies
 97 | 
 98 | 
 99 | Examples:
100 | 
101 | .. autoclass:: HDPy.puppy.policy.FRA
102 | 
103 | .. autoclass:: HDPy.puppy.policy.LRA
104 | 
105 | .. autoclass:: HDPy.puppy.policy.LRP
106 | 
107 | .. autoclass:: HDPy.puppy.policy.IIAPFO
108 | 
109 | 
110 | .. _plants_epuck:
111 | 
112 | ePuck Plants
113 | ^^^^^^^^^^^^
114 | 
115 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceFrontal
116 |     :show-inheritance:
117 | 
118 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceSideways
119 |     :show-inheritance:
120 | 
121 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceFull
122 |     :show-inheritance:
123 | 
124 | .. autoclass:: HDPy.epuck.plant.Attractor
125 |     :show-inheritance:
126 | 
127 | 
128 | 
129 | .. _policies_epuck:
130 | 
131 | ePuck Policies
132 | ^^^^^^^^^^^^^^
133 | 
134 | .. autoclass:: HDPy.epuck.policy.Heading
135 |     :show-inheritance:
136 | 
137 | .. autoclass:: HDPy.epuck.policy.HeadingRandInit
138 |     :show-inheritance:
139 | 


--------------------------------------------------------------------------------
/HDPy/puppy/plant/AccelerationReward.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ACD plants
  3 | 
  4 | 
  5 | """
  6 | from HDPy import Plant
  7 | import numpy as np
  8 | import scipy.constants
  9 | import scipy.signal
 10 | 
 11 | class AccelerationReward(Plant):
 12 |     """A :py:class:`Plant` with focus on the speed and acceleration of the robot.
 13 |     """
 14 | 
 15 |     def __init__(self):
 16 |         super(AccelerationReward, self).__init__(state_space_dim=24)
 17 |         self.x = []
 18 |         self.y = []
 19 |         self.ax = []
 20 |         self.ay = []
 21 |         self.az = []
 22 | 
 23 | 
 24 |     def state_input(self, state):
 25 |         """Full state
 26 |         """
 27 |         sio =  np.atleast_2d([
 28 |             self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-1]),
 29 |             self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-1]),
 30 |             self.normalization.normalize_value('puppyGPS_z', state['puppyGPS_z'][-1]),
 31 |             self.normalization.normalize_value('accelerometer_x', state['accelerometer_x'][-1]),
 32 |             self.normalization.normalize_value('accelerometer_y', state['accelerometer_y'][-1]),
 33 |             self.normalization.normalize_value('accelerometer_z', state['accelerometer_z'][-1]),
 34 |             self.normalization.normalize_value('compass_x', state['compass_x'][-1]),
 35 |             self.normalization.normalize_value('compass_y', state['compass_y'][-1]),
 36 |             self.normalization.normalize_value('compass_z', state['compass_z'][-1]),
 37 |             self.normalization.normalize_value('gyro_x', state['gyro_x'][-1]),
 38 |             self.normalization.normalize_value('gyro_y', state['gyro_y'][-1]),
 39 |             self.normalization.normalize_value('gyro_z', state['gyro_z'][-1]),
 40 |             self.normalization.normalize_value('hip0', state['hip0'][-1]),
 41 |             self.normalization.normalize_value('hip1', state['hip1'][-1]),
 42 |             self.normalization.normalize_value('hip2', state['hip2'][-1]),
 43 |             self.normalization.normalize_value('hip3', state['hip3'][-1]),
 44 |             self.normalization.normalize_value('knee0', state['knee0'][-1]),
 45 |             self.normalization.normalize_value('knee1', state['knee1'][-1]),
 46 |             self.normalization.normalize_value('knee2', state['knee2'][-1]),
 47 |             self.normalization.normalize_value('knee3', state['knee3'][-1]),
 48 | #             state['touch0'][-1],
 49 | #             state['touch1'][-1],
 50 | #             state['touch2'][-1],
 51 | #             state['touch3'][-1],
 52 |             self.normalization.normalize_value('touch0', state['touch0'][-1]),
 53 |             self.normalization.normalize_value('touch1', state['touch1'][-1]),
 54 |             self.normalization.normalize_value('touch2', state['touch2'][-1]),
 55 |             self.normalization.normalize_value('touch3', state['touch3'][-1])
 56 |         ]).T
 57 |         return sio
 58 | 
 59 | 
 60 |     def reward(self, epoch):
 61 |         """Return -100.0 if the robot tumbled.
 62 |         Maximizes speed while minimizing total acceleration
 63 |         The speed measurement is the average/covered distance since last epoch
 64 |         and sum of the acceleration minus gravity is used as negative reinforcement.
 65 |         """
 66 |         
 67 | #         if (epoch['accelerometer_z'] < 1.0).mean() > 0.8:
 68 | #             return -100.0
 69 |         
 70 |         n = epoch['puppyGPS_x'].size
 71 |         
 72 |         #keep last position
 73 |         self.x  = np.concatenate([self.x[-1:],  epoch['puppyGPS_x']])
 74 |         self.y  = np.concatenate([self.y[-1:],  epoch['puppyGPS_y']])
 75 |         
 76 |         #store last 2 epochs plus current one 
 77 |         self.ax = np.concatenate([self.ax[-2*n:], epoch['accelerometer_x']])
 78 |         self.ay = np.concatenate([self.ay[-2*n:], epoch['accelerometer_y']])
 79 |         self.az = np.concatenate([self.az[-2*n:], epoch['accelerometer_z']])
 80 |         
 81 |         spd = 0
 82 |         if self.x.size > 1:
 83 |             mov = np.linalg.norm(np.array([self.x[-1] - self.x[0], self.y[-1] - self.y[0]]))
 84 |             #check consistency
 85 |             if mov < 0.1*n:
 86 |                 # calculate displacement in a reasonable scale
 87 |                 spd = (3000.0/n) * mov;
 88 |         
 89 |         
 90 |         
 91 |         s = np.ceil(self.ax.size/3.0)
 92 |         fr = 0.3
 93 |         sr = 2*fr + (s/10.0) #should be smaller than s
 94 |         
 95 |         #filtered to remove noise; borders of the result always tend to zero and have to be trimmed
 96 |         end = -np.ceil(sr)
 97 |         beg = -s+end
 98 |         fax = firfilt(self.ax, fr, sr)[beg:end]
 99 |         fay = firfilt(self.ay, fr, sr)[beg:end]
100 |         faz = firfilt(self.az, fr, sr)[beg:end]
101 |         
102 |         if fax.size > 0:
103 |             acc = abs(fax  + fay  + faz - scipy.constants.g).mean()
104 |         else:
105 |             acc = scipy.constants.g;
106 | 
107 |         #acc = abs(epoch['accelerometer_x'] + epoch['accelerometer_y']  + epoch['accelerometer_z'] - scipy.constants.g).mean()
108 |         return spd - acc; 
109 | 
110 | 
111 | def firfilt(interval, freq, sampling_rate):
112 |     """ Second Order LowPass Filter
113 |     """
114 |     nfreq = freq/(0.5*sampling_rate)
115 |     taps =  sampling_rate + 1
116 |     a = 1
117 |     b = scipy.signal.firwin(taps, cutoff=nfreq)
118 |     firstpass = scipy.signal.lfilter(b, a, interval)
119 |     secondpass = scipy.signal.lfilter(b, a, firstpass[::-1])[::-1]
120 |     return secondpass
121 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\ReinforcementReservoirLearning.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\ReinforcementReservoirLearning.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ReinforcementReservoirLearning.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ReinforcementReservoirLearning.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/ReinforcementReservoirLearning"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ReinforcementReservoirLearning"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/HDPy/epuck/plant/plants.py:
--------------------------------------------------------------------------------
  1 | from ...rl import Plant
  2 | import warnings
  3 | import numpy as np
  4 | 
  5 | class CollisionAvoidanceFrontal(Plant):
  6 |     """Plant for ePuck to realize collision avoidance. The state
  7 |     consists of the three frontal infrared sensors. The reward is
  8 |     negative, if one of the three frontal sensors reads a proximity
  9 |     lower than ``theta``. Gaussian noise is added to the reward if
 10 |     ``obs_noise`` is positive.
 11 |     
 12 |     """
 13 |     def __init__(self, theta, obs_noise=0.0):
 14 |         super(CollisionAvoidanceFrontal, self).__init__(state_space_dim=3)
 15 |         self.theta = float(theta)
 16 |         self.obs_sigma = abs(float(obs_noise))
 17 |     
 18 |     def state_input(self, state):
 19 |         """Return the state from observations ``state``"""
 20 |         input_ = np.hstack((state['ir'][0, :2], [state['ir'][0, -1]]))
 21 |         input_ = self.normalization.normalize_value('ir', input_)
 22 |         #input_ += np.random.normal(scale=0.001, size=input_.shape) # Additive noise
 23 |         return np.atleast_2d(input_).T
 24 |     
 25 |     def reward(self, epoch):
 26 |         """Return the reward produced by ``epoch``."""
 27 |         ir_front = np.hstack((epoch['ir'][:2], [epoch['ir'][-1]]))
 28 |         ret = float(sum([min(ir - self.theta, 0) for ir in ir_front.T]))
 29 |         #ret += np.random.normal(scale=0.00001)
 30 |         if self.obs_sigma > 0.0:
 31 |             ret += np.random.normal(scale=self.obs_sigma)
 32 |         return ret
 33 | 
 34 | class CollisionAvoidanceSideways(Plant):
 35 |     """Plant for ePuck to realize collision avoidance. The state
 36 |     consists of the frontal and two sideways infrared sensors. The
 37 |     reward is negative, if one of those sensors reads a proximity
 38 |     lower than ``theta``. Gaussian noise is added to the reward if
 39 |     ``obs_noise`` is positive.
 40 |     
 41 |     """
 42 |     def __init__(self, theta, obs_noise=0.0):
 43 |         super(CollisionAvoidanceSideways, self).__init__(state_space_dim=3)
 44 |         self.theta = float(theta)
 45 |         self.obs_sigma = abs(float(obs_noise))
 46 |     
 47 |     def state_input(self, state):
 48 |         """Return the state from observations ``state``"""
 49 |         input_ = np.array((state['ir'][0, 0], state['ir'][0, 2], state['ir'][0, 6]))
 50 |         input_ = self.normalization.normalize_value('ir', input_)
 51 |         #input_ += np.random.normal(scale=0.001, size=input_.shape) # Additive noise
 52 |         return np.atleast_2d(input_).T
 53 |     
 54 |     def reward(self, epoch):
 55 |         """Return the reward produced by ``epoch``."""
 56 |         sensors = np.array((epoch['ir'][0, 0], epoch['ir'][0, 2], epoch['ir'][0, 6]))
 57 |         ret = float(sum([min(ir - self.theta, 0) for ir in sensors]))
 58 |         if self.obs_sigma > 0.0:
 59 |             ret += np.random.normal(scale=self.obs_sigma)
 60 |         return ret
 61 | 
 62 | class CollisionAvoidanceFull(Plant):
 63 |     """Plant for ePuck to realize collision avoidance. The state
 64 |     consists of all eight infrared sensors. The reward is
 65 |     negative, if one of the sensors reads a proximity lower than
 66 |     ``theta``. Gaussian noise is added to the reward if
 67 |     ``obs_noise`` is positive.
 68 |     
 69 |     """
 70 |     def __init__(self, theta, obs_noise=0.0):
 71 |         super(CollisionAvoidanceFull, self).__init__(state_space_dim=8)
 72 |         self.theta = float(theta)
 73 |         self.obs_sigma = abs(float(obs_noise))
 74 |     
 75 |     def state_input(self, state):
 76 |         """Return the state from observations ``state``"""
 77 |         input_ = state['ir'].T
 78 |         input_ = self.normalization.normalize_value('ir', input_)
 79 |         return input_
 80 |     
 81 |     def reward(self, epoch):
 82 |         """Return the reward produced by ``epoch``."""
 83 |         ret = float(sum([min(ir - self.theta, 0) for ir in epoch['ir'].T]))
 84 |         #ret += np.random.normal(scale=0.00001)
 85 |         if self.obs_sigma > 0.0:
 86 |             ret += np.random.normal(scale=self.obs_sigma)
 87 |         return ret
 88 | 
 89 | class Attractor(Plant):
 90 |     """Plant for ePuck to guide it to an ``attractor`` and away from a
 91 |     ``repeller``. Both points are to be passed as tuples. The state
 92 |     consists of the robot's location. The reward is inversely
 93 |     proportional with factor ``scale`` to the distances to the
 94 |     attractor and repeller, i.e.
 95 |     
 96 |     .. math::
 97 |         r = \\frac{s}{\Delta_a} - \\frac{s}{\Delta_r}
 98 |     
 99 |     """
100 |     def __init__(self, attractor, repeller, scale):
101 |         self.attractor = map(float, attractor)
102 |         self.repeller = map(float, repeller)
103 |         self.scale = scale
104 |         super(Attractor, self).__init__(state_space_dim=2)
105 |     
106 |     def state_input(self, state):
107 |         """Return the state from observations ``state``"""
108 |         input_ = np.atleast_2d(state['loc']).T
109 |         return input_
110 |     
111 |     def _idist(self, pt0, pt1):
112 |         """Compute the inverse distance between two points ``pt0`` and
113 |         ``pt1``. The points are expected to be coordinate tuples.
114 |         """
115 |         x_0, y_0 = pt0
116 |         x_1, y_1 = pt1
117 |         return 1.0 / np.sqrt((x_0 - x_1)**2 + (y_0 - y_1)**2)
118 |     
119 |     def reward(self, epoch):
120 |         """Return the reward produced by ``epoch``."""
121 |         reward = 0.0
122 |         reward += self.scale * self._idist(epoch['loc'][0], self.attractor)
123 |         reward -= self.scale * self._idist(epoch['loc'][0], self.repeller)
124 |         return reward
125 | 
126 | 
127 | class Trivial(CollisionAvoidanceFrontal):
128 |     """Plant for ePuck to realize collision avoidance, using the three
129 |     frontal sensors.
130 |     
131 |     .. deprecated:: 1.0
132 |         Use :py:class:`CollisionAvoidanceFrontal` instead
133 |     
134 |     """
135 |     def __init__(self, *args, **kwargs):
136 |         warnings.warn("This class is deprecated. Use 'CollisionAvoidance' instead")
137 |         super(Trivial, self).__init__(*args, **kwargs)
138 | 
139 | class SidewaysTrivial(CollisionAvoidanceSideways):
140 |     """Plant for ePuck to realize collision avoidance, using the frontal
141 |     and two sideways sensors.
142 |     
143 |     .. deprecated:: 1.0
144 |         Use :py:class:`CollisionAvoidanceFrontal` instead
145 |     
146 |     """
147 |     def __init__(self, *args, **kwargs):
148 |         warnings.warn("This class is deprecated. Use 'CollisionAvoidanceSideways' instead")
149 |         super(SidewaysTrivial, self).__init__(*args, **kwargs)
150 | 
151 | class FullTrivial(CollisionAvoidanceFull):
152 |     """Plant for ePuck to realize collision avoidance, using the all
153 |     eight infrared sensors.
154 |     
155 |     .. deprecated:: 1.0
156 |         Use :py:class:`CollisionAvoidanceFrontal` instead
157 |     
158 |     """
159 |     def __init__(self, *args, **kwargs):
160 |         warnings.warn("This class is deprecated. Use 'CollisionAvoidanceFull' instead")
161 |         super(FullTrivial, self).__init__(*args, **kwargs)
162 | 


--------------------------------------------------------------------------------
/HDPy/inout.py:
--------------------------------------------------------------------------------
  1 | """
  2 | When storing experimental data in HDF5 files, some extra operations
  3 | may be useful to process them on a low level. The operations provided
  4 | by this module mangle HDF5 files directly (through h5py), without
  5 | relying on higher-level functionality. In turn, some of the
  6 | functionality may be useful for more advanced stuff.
  7 | 
  8 | Note that all functions rely on a specific file format, specifically on
  9 | the format which is written by :py:class:`PuPy.RobotCollector`, with
 10 | experiments in groups and sensor data in seperate datasets within the
 11 | experiment group. On this ground, too short experiments can be removed
 12 | (:py:func:`remove_init_only_groups`) or files merged together
 13 | (:py:func:`h5_merge_experiments`). When data is split up between two
 14 | files, they can easily be put together by :py:class:`H5CombinedFile`.
 15 | 
 16 | """
 17 | import h5py
 18 | import warnings
 19 | 
 20 | def remove_init_only_groups(pth, init_steps):
 21 |     """Remove groups from HDF5 data files, which
 22 |     
 23 |     a) Are empty (0 members)
 24 |     b) Have collected less than ``init_steps`` epochs
 25 |     
 26 |     """
 27 |     if isinstance(pth, str):
 28 |         f = h5py.File(pth, 'a')
 29 |     else:
 30 |         f = pth
 31 |     
 32 |     all_keys = f.keys()
 33 |     remove_zero = [k for k in all_keys if len(f[k]) == 0]
 34 |     remove_short = [k for k in all_keys if len(f[k]) > 0 and f[k]['a_curr'].shape[0] < init_steps]
 35 |     
 36 |     for k in remove_zero + remove_short:
 37 |         print "Removing", k
 38 |         del f[k]
 39 |     
 40 |     print "Removed", (len(remove_zero) + len(remove_short)), "groups"
 41 |     
 42 |     return f
 43 | 
 44 | def h5_reorder(pth):
 45 |     """Rearrange the experiments in ``pth`` such that the experiment
 46 |     indices are in the range [0,N], without missing ones.
 47 |     No order of the experiments is guaranteed.
 48 |     
 49 |     """
 50 |     if isinstance(pth, str):
 51 |         f = h5py.File(pth, 'a')
 52 |     else:
 53 |         f = pth
 54 |     
 55 |     # keys must be ascending
 56 |     old_keys = map(str, sorted(map(int, f.keys())))
 57 |     for new_key, old_key in enumerate(old_keys):
 58 |         new_key = str(new_key)
 59 |         if new_key != old_key:
 60 |             if new_key not in f.keys():
 61 |                 print old_key, "->", new_key
 62 |                 f[new_key] = f[old_key]
 63 |                 del f[old_key]
 64 |             else:
 65 |                 print "Cannot move", old_key, "to", new_key, "(new key exists)"
 66 |     
 67 |     return f
 68 | 
 69 | def h5_merge_experiments(pth0, pth1, trg=None):
 70 |     """Merge groups of the HDF5 files ``pth0`` and ``pth1``. If ``trg``
 71 |     is given, a new file will be created. Otherwise the data is merged
 72 |     into ``pth0``.
 73 |     
 74 |     """
 75 |     fh1 = h5py.File(pth1, 'r')
 76 |     
 77 |     if trg is None:
 78 |         f_trg = fh0 = h5py.File(pth0, 'a')
 79 |     else:
 80 |         f_trg = h5py.File(trg, 'w')
 81 |         fh0 = h5py.File(pth0, 'r')
 82 |         # Copy groups of file0 to trg
 83 |         for k in fh0.keys():
 84 |             fh0.copy(k, f_trg)
 85 |     
 86 |     groups_0 = map(int, fh0.keys())
 87 |     groups_1 = map(int, fh1.keys())
 88 |     
 89 |     # Copy groups of file1 to trg
 90 |     offset = 1 + max(groups_0) - min(groups_1)
 91 |     for k in groups_1:
 92 |         src = str(k)
 93 |         dst = str(k + offset)
 94 |         fh1.copy(src, f_trg, name=dst)
 95 |     
 96 |     return f_trg
 97 | 
 98 | def remove_boundary_groups(pth):
 99 |     """Remove the first and last experiment with respect to webots
100 |     restart/revert in ``pth``. The boundaries are determined through
101 |     the *init_step* group. This method is to save possibly corrupted
102 |     experimental data files, due to webots' memory issues. To work
103 |     properly, the groups must not be altered before this method, e.g.
104 |     by :py:func:`remove_init_only_groups`.
105 |     
106 |     """
107 |     if isinstance(pth, str):
108 |         f = h5py.File(pth, 'a')
109 |     else:
110 |         f = pth
111 |     
112 |     keys = sorted(map(int, f.keys()))
113 |     restarts = [k for k in keys if 'init_step' in f[str(k)]]
114 |     restarts += [k-1 for k in restarts if k > 0]
115 |     restarts += [keys[-1]]
116 |     restarts = set(sorted(restarts))
117 |     for k in restarts:
118 |         del f[str(k)]
119 |     
120 |     return f
121 | 
122 | class H5CombinedFile(object):
123 |     """Combine two HDF5 files which have the same groups on the root
124 |     level but different datasets within these groups. The files are
125 |     packed together such that they can be handled as if a single file
126 |     was present. 
127 |     
128 |     ``pth_main``
129 |         Path to the first HDF5 file. If a dataset is available in
130 |         both files, the one from this file will be used.
131 |         
132 |     
133 |     ``pth_additional``
134 |         Path to the second HDF5 file.
135 |     
136 |     """
137 |     def __init__(self, pth_main, pth_additional):
138 |         self.pth0 = pth_main
139 |         self.pth1 = pth_additional
140 |         self.fh0 = h5py.File(pth_main, 'r')
141 |         self.fh1 = h5py.File(pth_additional, 'r')
142 |         self.keys0 = [k for k in self.fh0 if len(self.fh0[k]) > 0]
143 |         self.keys1 = [k for k in self.fh1 if len(self.fh1[k]) > 0]
144 |         self.keys_common = [k for k in self.keys0 if k in self.keys1]
145 |     
146 |     def __getitem__(self, key):
147 |         """Return a :py:class:`H5CombinedGroup` instance, binding the
148 |         groups ``key`` of the two files together.
149 |         """
150 |         if key not in self.keys_common:
151 |             raise KeyError()
152 |         
153 |         return H5CombinedGroup(self.fh0[key], self.fh1[key])
154 | 
155 |     def __len__(self):
156 |         """Return the length of all (shared) groups."""
157 |         return len(self.keys_common)
158 |     
159 |     def __contains__(self, item):
160 |         """True iff ``item`` is a group known in both files."""
161 |         return item in self.keys_common
162 |     
163 |     def keys(self):
164 |         """Return all group names which are present in both files."""
165 |         return self.keys_common[:]
166 |     
167 |     def close(self):
168 |         """Close all filehandlers."""
169 |         self.fh0.close()
170 |         self.fh1.close()
171 |     
172 |     def attributes(self, key):
173 |         """Return two attribute manager instances, one pointing to group
174 |         ``key`` in each file.
175 |         """
176 |         assert key in self.keys_common
177 |         attrs0 = h5py.AttributeManager(self.fh0[key])
178 |         attrs1 = h5py.AttributeManager(self.fh1[key])
179 |         return attrs0, attrs1
180 | 
181 | class H5CombinedGroup(object):
182 |     """Combine two related HDF5 groups which store different datasets
183 |     and present them as a single group. Instances to this class are
184 |     typically exclusively created through :py:class:`H5CombinedFile`.
185 |     
186 |     ``grp0``
187 |         Group of the first file. If a dataset is present in both groups,
188 |         the one from this group will be used.
189 |     
190 |     ``grp1``
191 |         Group of the second file.
192 |     
193 |     """
194 |     def __init__(self, grp0, grp1):
195 |         self.grp0 = grp0
196 |         self.grp1 = grp1
197 |     
198 |     def __getitem__(self, key):
199 |         """Return dataset ``key`` or raise an exception if neither of
200 |         the groups contains this key.
201 |         """
202 |         if key in self.grp0:
203 |             return self.grp0[key]
204 |         elif key in self.grp1:
205 |             return self.grp1[key]
206 |         else:
207 |             raise KeyError()
208 |     
209 |     def __len__(self):
210 |         """Return the number of keys in both groups."""
211 |         return len(self.grp0) + len(self.grp1)
212 |     
213 |     def __contains__(self, item):
214 |         """True iff ``item`` is a key of one of the groups."""
215 |         return item in self.grp0 or item in self.grp1
216 |     
217 |     def keys(self):
218 |         """Return a list of datasets names found in any of the groups."""
219 |         return self.grp0.keys() + self.grp1.keys()
220 |     
221 |     def attributes(self):
222 |         """Return two attribute manager instances, one pointing to each
223 |         group."""
224 |         attrs0 = h5py.AttributeManager(self.grp0)
225 |         attrs1 = h5py.AttributeManager(self.grp1)
226 |         return attrs0, attrs1
227 | 
228 | 
229 | class DataMerge(H5CombinedFile):
230 |     """Identical to :py:class:`H5Combine`
231 |     
232 |     .. deprecated:: 1.0
233 |         Use :py:class:`H5Combine`.
234 |     
235 |     """
236 |     def __init__(self, *args, **kwargs):
237 |         warnings.warn('This class is depcreated. Use H5CombinedFile instead')
238 |         super(DataMerge, self).__init__(*args, **kwargs)
239 | 
240 | class DataMergeGroup(H5CombinedGroup):
241 |     """Identical to :py:class:`H5CombinedGroup`
242 |     
243 |     .. deprecated:: 1.0
244 |         Use :py:class:`H5CombinedGroup`
245 |     
246 |     """
247 |     def __init__(self, *args, **kwargs):
248 |         warnings.warn('This class is depcreated. Use H5CombinedGroup instead')
249 |         super(DataMergeGroup, self).__init__(*args, **kwargs)
250 | 
251 | 


--------------------------------------------------------------------------------
/HDPy/epuck/analysis_epuck.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The analysis of ePuck experiments is conducted by looking at isolated
  3 | time steps of a testing or training situation. Three tools have been
  4 | implemented to support this procedure:
  5 | :py:func:`epuck_plot_all_trajectories` plots all training trajectories,
  6 | :py:func:`epuck_plot_value_over_action` creates a graph of the expected
  7 | return in a state as a function of the action.
  8 | :py:func:`epuck_plot_snapshot` plots the predicted return over an
  9 | example trajectory for several actions.
 10 | 
 11 | """
 12 | import pylab
 13 | import warnings
 14 | import numpy as np
 15 | 
 16 | def plot_all_trajectories(analysis, axis=None, key='loc'):
 17 |     """Plot trajectories of all episodes in ``analysis`` in the same
 18 |     plot ``axis``. The later an episode, the darker its trajectory is
 19 |     displayed. The trajectory data must be stored as ``key`` (default
 20 |     *loc*), a two-dimensional array. This function is intended to be
 21 |     used for analysis of **ePuck** experiments.
 22 |     """
 23 |     if axis is None:
 24 |         axis = pylab.figure().add_subplot(111)
 25 |     
 26 |     data = analysis.get_data(key)
 27 |     N = len(data)-1.0
 28 |     if N == 0.0:
 29 |         for idx, episode in enumerate(data):
 30 |             col = 0.0
 31 |             axis.plot(episode[:, 0], episode[:, 1], color=str(col), label=str(idx))
 32 |     else:
 33 |         for idx, episode in enumerate(data):
 34 |             #col = 0.75 - (0.75 * (idx - 1))/N
 35 |             col = 0.75 * (1.0 - float(idx) / N)
 36 |             axis.plot(episode[:, 0], episode[:, 1], color=str(col), label=str(idx))
 37 |     
 38 |     return axis
 39 | 
 40 | def _plot_line(axis, origin, angle, size_hi, size_lo=0.0, **kwargs):
 41 |     """Plot a straight line into ``axis``. The line is described through
 42 |     the ``origin`` and the ``angle``. It is drawn from ``size_lo`` to
 43 |     ``size_hi``, where both parameters are passed as fractions of said
 44 |     line. ``kwargs`` are passed to :py:meth:`pylab.plot`.
 45 |     """
 46 |     src = (origin[0] + np.cos(angle) * size_lo, origin[1] + np.sin(angle) * size_lo)
 47 |     trg = (origin[0] + np.cos(angle) * size_hi, origin[1] + np.sin(angle) * size_hi)
 48 |     axis.plot((src[0], trg[0]), (src[1], trg[1]), **kwargs)
 49 | 
 50 | def plot_value_over_action(critic, state, axis, a_range=None):
 51 |     """Given a trained ``critic``, plot the expected return as function
 52 |     of the action, given a ``state`` into ``axis``. Assuming 1-d action
 53 |     (otherwise, it becomes messy to plot). The default sampled actions
 54 |     range ``a_range`` is :math:`[0, 2\pi]` with step size 0.01.
 55 |     """
 56 |     if a_range is None:
 57 |         a_range = np.arange(0.0, 2*np.pi, 0.01)
 58 |     exp_return = np.vstack([critic(state, action%(2*np.pi), simulate=True) for action in a_range])
 59 |     axis.plot(a_range, exp_return, label='J(a|s)')
 60 |     axis.set_xlabel('action')
 61 |     axis.set_ylabel('Expected return')
 62 |     return axis
 63 | 
 64 | def plot_snapshot(axis, robot, critic, trajectory, sample_actions, init_steps=1, traj_chosen=None, inspected_steps=None):
 65 |     """Plot a snapshot of an *ePuck* experiment. The plot shows an
 66 |     example trajectory of the ``robot``, together with the expected
 67 |     return - i.e. evaluation of the ``critic`` at each state for some
 68 |     ``sample_actions``. Obviously, the ``critic`` needs to be
 69 |     pre-trained for this to make sense.
 70 |     
 71 |     .. note::
 72 |         The action is assumed to represent the absolute heading.
 73 |     
 74 |     ``axis``
 75 |         A :py:class:`pylab.Axis` to draw into.
 76 |     
 77 |     ``robot``
 78 |         The ePuck robot.
 79 |         
 80 |     ``critic``
 81 |         The pre-trained critic. It's supposed to be generated by
 82 |         :py:meth:`critic` (or implement the :py:meth:`critic_fu`
 83 |         interface).
 84 |     
 85 |     ``trajectory``
 86 |         Example trajectory the robot is moved along. 
 87 |     
 88 |     ``sample_actions``
 89 |         List of actions to be sampled and displayed at each step.
 90 |     
 91 |     ``init_steps``
 92 |         Number of steps the robot is initialized. During these steps,
 93 |         the robot is moved with action=0 but the ``critic`` not updated.
 94 |     
 95 |     ``traj_chosen``
 96 |         Represents the sequence of actions which was chosen by the
 97 |         algorithm at each step of the trajectory. If it is
 98 |         :py:const:`None`, it will be ignored. If not, it must be a
 99 |         list at least as long as ``trajectory``. 
100 |     
101 |     ``inspected_steps``
102 |         List of step numbers, for which the expected return is plotted
103 |         over the action, given the state at the respective step.
104 |     
105 |     """
106 |     if traj_chosen is not None:
107 |         assert len(traj_chosen) >= len(trajectory)
108 |     else:
109 |         traj_chosen = [None] * len(trajectory)
110 |     
111 |     if inspected_steps is None:
112 |         inspected_steps = []
113 |     
114 |     robot_radius = 0.1
115 |     robot_color = (0.0, 0.0, 0.0, 0.0) # white
116 |     ray_len = robot_radius + 0.05
117 |     
118 |     for i in range(init_steps): # initialize
119 |         robot.take_action(robot.pose)
120 |     
121 |     rays = []
122 |     for num_step, (action_ex, action_chosen) in enumerate(zip(trajectory, traj_chosen)):
123 |         
124 |         # execute action, get the robot into the next state
125 |         collided = robot.take_action(action_ex)
126 |         s_curr = robot.read_sensors()
127 |         
128 |         # plot the robot
129 |         loc_robot = s_curr['loc'][0]
130 |         pose = s_curr['pose'][0, 0]
131 |         if num_step % 2 == 0:
132 |             rob = pylab.Circle(loc_robot, robot_radius, fill=True, facecolor=robot_color)
133 |             axis.add_artist(rob)
134 |             # plot the robot orientation
135 |             _plot_line(axis, loc_robot, pose, robot_radius, color='k')
136 |         
137 |         if num_step % 2 == 0:
138 |             # evaluate the critic on the actions
139 |             p_returns = []
140 |             #print ""
141 |             for action_eval in sample_actions:
142 |                 predicted_return = critic(s_curr, action_eval, simulate=True)
143 |                 predicted_return = predicted_return[0, 0]
144 |                 p_returns.append((action_eval, predicted_return))
145 |                 #print action_eval, predicted_return
146 |                 
147 |             # normalize returns
148 |             r_offset = min([return_ for (action, return_) in p_returns])
149 |             r_scale = max([return_ for (action, return_) in p_returns]) - r_offset
150 |             
151 |             for action_eval, predicted_return in p_returns:
152 |                 length = ray_len + 0.1 * (predicted_return - r_offset) / r_scale
153 |                 #rays.append((loc_robot, (pose+action_eval) % (2*np.pi), length, predicted_return)) # relative heading
154 |                 rays.append((loc_robot, (action_eval) % (2*np.pi), length, predicted_return)) # absolute heading
155 | 
156 |         if num_step in inspected_steps:
157 |             fig_inspected = pylab.figure()
158 |             epuck_plot_value_over_action(critic, s_curr, fig_inspected.add_subplot(111), a_range=np.arange(-2.0*np.pi, 2.0*np.pi, 0.01))
159 |             fig_inspected.suptitle('Expected return in after %i steps (%s)' % (num_step, str(loc_robot)))
160 |         
161 |         # advance critic
162 |         critic(s_curr, action_ex, simulate=False)
163 |         
164 |         if collided:
165 |             break
166 |     
167 |     # normalize returns
168 |     r_offset = min([predicted_return for (loc, ori, length, predicted_return) in rays])
169 |     r_scale = max([predicted_return for (loc, ori, length, predicted_return) in rays]) - r_offset
170 |     
171 |     # plot rays
172 |     for (loc, ori, length, predicted_return) in rays:
173 |         nrm_return = (predicted_return - r_offset) / r_scale
174 |         col = pylab.cm.hot(0.7 * nrm_return) # for the report
175 |         #col = pylab.cm.spectral(nrm_return*0.25) # for the presentation
176 |         
177 |         # plot ray
178 |         _plot_line(axis, loc, ori, size_hi=length+0.03, size_lo=robot_radius+0.03, color=col, linewidth=4)
179 |     
180 |     return axis
181 | 
182 | 
183 | ## DEPRECATED ##
184 | 
185 | def epuck_plot_all_trajectories(*args, **kwargs):
186 |     """Alias of :py:func:`plot_all_trajectories`
187 |     
188 |     .. deprecated:: 1.0
189 |         Use :py:func:`plot_all_trajectories` instead
190 |     
191 |     """
192 |     warnings.warn("Deprecated. Use 'plot_all_trajectories' instead")
193 |     return plot_all_trajectories(*args, **kwargs)
194 | 
195 | def epuck_plot_value_over_action(*args, **kwargs):
196 |     """Alias of :py:func:`plot_value_over_action`
197 |     
198 |     .. deprecated:: 1.0
199 |         Use :py:func:`plot_value_over_action` instead
200 |     
201 |     """
202 |     warnings.warn("Deprecated. Use 'plot_value_over_action' instead")
203 |     return plot_value_over_action(*args, **kwargs)
204 | 
205 | def epuck_plot_snapshot(*args, **kwargs):
206 |     """Alias of :py:func:`plot_snapshot`
207 |     
208 |     .. deprecated:: 1.0
209 |         Use :py:func:`plot_snapshot` instead
210 |     
211 |     """
212 |     warnings.warn("Deprecated. Use 'plot_snapshot' instead")
213 |     return plot_snapshot(*args, **kwargs)
214 | 


--------------------------------------------------------------------------------
/HDPy/puppy/plant/plants.py:
--------------------------------------------------------------------------------
  1 | from ...rl import Plant
  2 | import numpy as np
  3 | import warnings
  4 | 
  5 | class SpeedReward(Plant):
  6 |     """A :py:class:`Plant` with focus on the speed of the robot.
  7 |     """
  8 |     def __init__(self):
  9 |         super(SpeedReward, self).__init__(state_space_dim=2)
 10 |     
 11 |     def state_input(self, state):
 12 |         """Return the location, sampled from the *GPS* (x,y) values.
 13 |         The sample is an average over the last 10 GPS coordinates.
 14 |         """
 15 |         sio =  np.atleast_2d([
 16 |             self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(),
 17 |             self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean()
 18 |         ]).T
 19 |         return sio
 20 |     
 21 |     def reward(self, epoch):
 22 |         """Return the covered distance and -1.0 if the robot tumbled.
 23 |         The speed measurement is taken from the 100th to the last sample.
 24 |         """
 25 |         if (epoch['accelerometer_z'][-100:] < 1.0).sum() > 80:
 26 |             return -1.0
 27 |         
 28 |         x = epoch['puppyGPS_x']
 29 |         y = epoch['puppyGPS_y']
 30 |         return np.linalg.norm(np.array([x[-1] - x[-100], y[-1] - y[-100]]))
 31 | 
 32 | class LineFollower(Plant):
 33 |     """A :py:class:`Plant` which gives negative reward proportional to
 34 |     the distance to a line in the xy plane. The line is described by
 35 |     its ``origin`` and the ``direction``.
 36 |     """
 37 |     def __init__(self, origin, direction, reward_noise=0.01):
 38 |         super(LineFollower, self).__init__(state_space_dim=2)
 39 |         self.origin = np.atleast_2d(origin)
 40 |         self.direction = np.atleast_2d(direction)
 41 |         self.reward_noise = reward_noise
 42 |         
 43 |         if self.origin.shape[0] < self.origin.shape[1]:
 44 |             self.origin = self.origin.T
 45 |             
 46 |         if self.direction.shape[0] < self.direction.shape[1]:
 47 |             self.direction = self.direction.T
 48 |         
 49 |         self.direction /= np.linalg.norm(self.direction)
 50 |         
 51 |         assert self.direction.shape == (2, 1)
 52 |         assert self.origin.shape == (2, 1)
 53 |     
 54 |     def state_input(self, state):
 55 |         """Return the latest *GPS* (x,y) values.
 56 |         """
 57 |         sio =  np.atleast_2d([
 58 |             self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(),
 59 |             self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean()
 60 |         ]).T
 61 |         return sio
 62 |     
 63 |     def reward(self, epoch):
 64 |         """Return the distance between the current robot location and
 65 |         the line.
 66 |         """
 67 |         x = epoch['puppyGPS_x'][-1]
 68 |         y = epoch['puppyGPS_y'][-1]
 69 |         point = np.atleast_2d([x, y]).T
 70 |         
 71 |         #(origin - point) - (<origin - point, dir>) * dir
 72 |         diff = self.origin - point
 73 |         proj = diff - self.direction.T.dot(diff).dot(self.direction.T).T
 74 |         #return np.tanh(1.0/np.linalg.norm(proj))
 75 |         
 76 |         reward = -np.linalg.norm(proj)
 77 |         reward += np.random.normal(scale=self.reward_noise, size=reward.shape)
 78 |         return reward
 79 | 
 80 | class TargetLocation(Plant):
 81 |     """A :py:class:`Plant` which gives negative reward proportional to
 82 |     the distance to point ``target`` in the xy plane. If the robot is
 83 |     closer than ``radius`` to the target, the reward will be 0.0.
 84 |     
 85 |     """
 86 |     def __init__(self, target, radius=0.0, reward_noise=0.01):
 87 |         super(TargetLocation, self).__init__(state_space_dim=2)
 88 |         self.target = np.atleast_2d(target)
 89 |         self.radius = radius
 90 |         self.reward_noise = reward_noise
 91 |         
 92 |         if self.target.shape[0] < self.target.shape[1]:
 93 |             self.target = self.target.T
 94 |             
 95 |         assert self.target.shape == (2, 1)
 96 |     
 97 |     def state_input(self, state):
 98 |         """Return the latest *GPS* (x,y) values."""
 99 |         sio =  np.atleast_2d([
100 |             self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(),
101 |             self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean()
102 |         ]).T
103 |         return sio
104 |     
105 |     def reward(self, epoch):
106 |         """Return the distance between the current robot location and
107 |         the target point.
108 |         
109 |         """
110 |         x = epoch['puppyGPS_x'][-1]
111 |         y = epoch['puppyGPS_y'][-1]
112 |         point = np.atleast_2d([x, y]).T
113 |         
114 |         
115 |         #(target - point)
116 |         diff = self.target - point
117 |         dist = np.linalg.norm(diff)
118 |         
119 |         if dist < self.radius:
120 |             dist = 0.0
121 |         
122 |         reward = np.exp(-0.25 * (dist - 9.0)) + 1.0
123 |         
124 |         if self.reward_noise > 0.0:
125 |             reward += np.random.normal(scale=self.reward_noise)
126 |         
127 |         return reward
128 | 
129 | class TargetLocationLandmarks(TargetLocation):
130 |     """A :py:class:`Plant` which gives negative reward proportional to
131 |     the distance to point ``target`` in the xy plane. If the robot is
132 |     closer than ``radius`` to the target, the reward will be 0.0.
133 |     The state is composed of the distance to predefined ``landmarks``,
134 |     specified with their coordinates in the xy plane. Gaussian noise
135 |     will be added to the reward, if ``reward_noise`` is positive.
136 |     
137 |     """
138 |     def __init__(self, target, landmarks, radius=0.0, reward_noise=0.01):
139 |         super(TargetLocationLandmarks, self).__init__(target, radius, reward_noise)
140 |         self._state_space_dim = len(landmarks)
141 |         
142 |         # add landmarks
143 |         self.landmarks = []
144 |         for mark in landmarks:
145 |             mark = np.atleast_2d(mark)
146 |             if mark.shape[0] < mark.shape[1]:
147 |                 mark = mark.T
148 |             self.landmarks.append(mark)
149 |     
150 |     def state_input(self, state):
151 |         """Return the distance to the landmarks."""
152 |         sio =  np.atleast_2d([
153 |             state['puppyGPS_x'][-10:].mean(),
154 |             state['puppyGPS_y'][-10:].mean()
155 |         ]).T
156 |         
157 |         dist = [np.linalg.norm(sio - mark) for mark in self.landmarks]
158 |         dist = np.atleast_2d(dist).T
159 |         dist = self.normalization.normalize_value('landmark_dist', dist)
160 |         return dist
161 | 
162 | class DiffTargetLocationLandmarks(TargetLocationLandmarks):
163 |     """A :py:class:`Plant` which gives positive reward proportional to
164 |     the absolute difference  (between two episodes) in distance to
165 |     point ``target`` in the xy plane. The state is composed of the
166 |     distance to predefined ``landmarks``,
167 |     specified with their coordinates in the xy plane. Gaussian noise
168 |     will be added to the reward, if ``reward_noise`` is positive.
169 |     
170 |     Before the first call, the distance is set to ``init_distance``.
171 |     
172 |     """
173 |     def __init__(self, target, landmarks, reward_noise=0.01, init_distance=100):
174 |         super(DiffTargetLocationLandmarks, self).__init__(target, landmarks, 0.0, reward_noise)
175 |         self.init_distance = init_distance
176 |         self._last_target_distance = self.init_distance # TODO: what is good init value?
177 |     
178 |     def reward(self, epoch):
179 |         """Return the reward of ``epoch``."""
180 |         x = epoch['puppyGPS_x'][-1]
181 |         y = epoch['puppyGPS_y'][-1]
182 |         point = np.atleast_2d([x, y]).T
183 |         
184 |         
185 |         #(target - point)
186 |         diff = self.target - point
187 |         dist = np.linalg.norm(diff)
188 |         
189 |         # reward is difference of distance between current and previous episode
190 |         reward = dist - self._last_target_distance
191 |         self._last_target_distance = dist
192 |         reward += np.random.normal(scale=self.reward_noise, size=reward.shape)
193 |         return reward
194 |     
195 |     def reset(self):
196 |         """Reset the last distance to the initial one."""
197 |         self._last_target_distance = self.init_distance
198 | 
199 | class LandmarksTarLoc(TargetLocationLandmarks):
200 |     """A :py:class:`Plant` which gives negative reward proportional to
201 |     the distance to point ``target`` in the xy plane.
202 |     
203 |     .. deprecated:: 1.0
204 |         Use :py:class:`TargetLocationLandmarks` instead.
205 |     
206 |     """
207 |     def __init__(self, *args, **kwargs):
208 |         warnings.warn('This class is depcreated. Use TargetLocationLandmarks instead')
209 |         super(LandmarksTarLoc, self).__init__(*args, **kwargs)
210 | 
211 | class LandmarksTarLocDiff(DiffTargetLocationLandmarks):
212 |     """A :py:class:`Plant` which gives positive reward proportional to
213 |     the absolute difference  (between two episodes) in distance to
214 |     point ``target`` in the xy plane.
215 |     
216 |     .. deprecated:: 1.0
217 |         Use :py:class:`DiffTargetLocationLandmarks` instead.
218 |     
219 |     """
220 |     def __init__(self, *args, **kwargs):
221 |         warnings.warn('This class is depcreated. Use DiffTargetLocationLandmarks instead')
222 |         super(LandmarksTarLocDiff, self).__init__(*args, **kwargs)
223 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Heuristic Dynamic Programming documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed May 22 19:50:46 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | #sys.path.insert(0, os.path.abspath('../../HDPy/puppy/'))
 21 | #sys.path.insert(0, os.path.abspath('../../HDPy/epuck/'))
 22 | 
 23 | # -- General configuration -----------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be extensions
 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.inheritance_diagram']
 31 | 
 32 | # Add any paths that contain templates here, relative to this directory.
 33 | templates_path = ['_templates']
 34 | 
 35 | # The suffix of source filenames.
 36 | source_suffix = '.rst'
 37 | 
 38 | # The encoding of source files.
 39 | #source_encoding = 'utf-8-sig'
 40 | 
 41 | # The master toctree document.
 42 | master_doc = 'index'
 43 | 
 44 | # General information about the project.
 45 | project = u'Heuristic Dynamic Programming with Python'
 46 | copyright = u'2013, Matthias Baumgartner'
 47 | 
 48 | # The version info for the project you're documenting, acts as replacement for
 49 | # |version| and |release|, also used in various other places throughout the
 50 | # built documents.
 51 | #
 52 | # The short X.Y version.
 53 | version = '1.0'
 54 | # The full version, including alpha/beta/rc tags.
 55 | release = '1.0'
 56 | 
 57 | # The language for content autogenerated by Sphinx. Refer to documentation
 58 | # for a list of supported languages.
 59 | #language = None
 60 | 
 61 | # There are two options for replacing |today|: either, you set today to some
 62 | # non-false value, then it is used:
 63 | #today = ''
 64 | # Else, today_fmt is used as the format for a strftime call.
 65 | #today_fmt = '%B %d, %Y'
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | exclude_patterns = []
 70 | 
 71 | # The reST default role (used for this markup: `text`) to use for all documents.
 72 | #default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | #add_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | #add_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | #show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = 'sphinx'
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | #modindex_common_prefix = []
 90 | 
 91 | 
 92 | # -- Options for HTML output ---------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | html_theme = 'default'
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | #html_theme_options = {}
102 | 
103 | # Add any paths that contain custom themes here, relative to this directory.
104 | #html_theme_path = []
105 | 
106 | # The name for this set of Sphinx documents.  If None, it defaults to
107 | # "<project> v<release> documentation".
108 | #html_title = None
109 | 
110 | # A shorter title for the navigation bar.  Default is the same as html_title.
111 | #html_short_title = None
112 | 
113 | # The name of an image file (relative to this directory) to place at the top
114 | # of the sidebar.
115 | #html_logo = None
116 | 
117 | # The name of an image file (within the static path) to use as favicon of the
118 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
119 | # pixels large.
120 | #html_favicon = None
121 | 
122 | # Add any paths that contain custom static files (such as style sheets) here,
123 | # relative to this directory. They are copied after the builtin static files,
124 | # so a file named "default.css" will overwrite the builtin "default.css".
125 | html_static_path = ['_static']
126 | 
127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
128 | # using the given strftime format.
129 | #html_last_updated_fmt = '%b %d, %Y'
130 | 
131 | # If true, SmartyPants will be used to convert quotes and dashes to
132 | # typographically correct entities.
133 | #html_use_smartypants = True
134 | 
135 | # Custom sidebar templates, maps document names to template names.
136 | #html_sidebars = {}
137 | 
138 | # Additional templates that should be rendered to pages, maps page names to
139 | # template names.
140 | #html_additional_pages = {}
141 | 
142 | # If false, no module index is generated.
143 | #html_domain_indices = True
144 | 
145 | # If false, no index is generated.
146 | #html_use_index = True
147 | 
148 | # If true, the index is split into individual pages for each letter.
149 | #html_split_index = False
150 | 
151 | # If true, links to the reST sources are added to the pages.
152 | #html_show_sourcelink = True
153 | 
154 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
155 | #html_show_sphinx = True
156 | 
157 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
158 | #html_show_copyright = True
159 | 
160 | # If true, an OpenSearch description file will be output, and all pages will
161 | # contain a <link> tag referring to it.  The value of this option must be the
162 | # base URL from which the finished HTML is served.
163 | #html_use_opensearch = ''
164 | 
165 | # This is the file name suffix for HTML files (e.g. ".xhtml").
166 | #html_file_suffix = None
167 | 
168 | # Output file base name for HTML help builder.
169 | htmlhelp_basename = 'HeuristicDynamicProgrammingdoc'
170 | 
171 | 
172 | # -- Options for LaTeX output --------------------------------------------------
173 | 
174 | latex_elements = {
175 | # The paper size ('letterpaper' or 'a4paper').
176 | #'papersize': 'letterpaper',
177 | 
178 | # The font size ('10pt', '11pt' or '12pt').
179 | #'pointsize': '10pt',
180 | 
181 | # Additional stuff for the LaTeX preamble.
182 | #'preamble': '',
183 | }
184 | 
185 | # Grouping the document tree into LaTeX files. List of tuples
186 | # (source start file, target name, title, author, documentclass [howto/manual]).
187 | latex_documents = [
188 |   ('index', 'HeuristicDynamicProgramming.tex', u'Heuristic Dynamic Programming Documentation',
189 |    u'Matthias Baumgartner', 'manual'),
190 | ]
191 | 
192 | # The name of an image file (relative to this directory) to place at the top of
193 | # the title page.
194 | #latex_logo = None
195 | 
196 | # For "manual" documents, if this is true, then toplevel headings are parts,
197 | # not chapters.
198 | #latex_use_parts = False
199 | 
200 | # If true, show page references after internal links.
201 | #latex_show_pagerefs = False
202 | 
203 | # If true, show URL addresses after external links.
204 | #latex_show_urls = False
205 | 
206 | # Documents to append as an appendix to all manuals.
207 | #latex_appendices = []
208 | 
209 | # If false, no module index is generated.
210 | #latex_domain_indices = True
211 | 
212 | 
213 | # -- Options for manual page output --------------------------------------------
214 | 
215 | # One entry per manual page. List of tuples
216 | # (source start file, name, description, authors, manual section).
217 | man_pages = [
218 |     ('index', 'heuristicdynamicprogramming', u'Heuristic Dynamic Programming Documentation',
219 |      [u'Matthias Baumgartner'], 1)
220 | ]
221 | 
222 | # If true, show URL addresses after external links.
223 | #man_show_urls = False
224 | 
225 | 
226 | # -- Options for Texinfo output ------------------------------------------------
227 | 
228 | # Grouping the document tree into Texinfo files. List of tuples
229 | # (source start file, target name, title, author,
230 | #  dir menu entry, description, category)
231 | texinfo_documents = [
232 |   ('index', 'HeuristicDynamicProgramming', u'Heuristic Dynamic Programming Documentation',
233 |    u'Matthias Baumgartner', 'HeuristicDynamicProgramming', 'One line description of project.',
234 |    'Miscellaneous'),
235 | ]
236 | 
237 | # Documents to append as an appendix to all manuals.
238 | #texinfo_appendices = []
239 | 
240 | # If false, no module index is generated.
241 | #texinfo_domain_indices = True
242 | 
243 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
244 | #texinfo_show_urls = 'footnote'
245 | 
246 | 
247 | # -- Options for Epub output ---------------------------------------------------
248 | 
249 | # Bibliographic Dublin Core info.
250 | epub_title = u'Heuristic Dynamic Programming'
251 | epub_author = u'Matthias Baumgartner'
252 | epub_publisher = u'Matthias Baumgartner'
253 | epub_copyright = u'2013, Matthias Baumgartner'
254 | 
255 | # The language of the text. It defaults to the language option
256 | # or en if the language is not set.
257 | #epub_language = ''
258 | 
259 | # The scheme of the identifier. Typical schemes are ISBN or URL.
260 | #epub_scheme = ''
261 | 
262 | # The unique identifier of the text. This can be a ISBN number
263 | # or the project homepage.
264 | #epub_identifier = ''
265 | 
266 | # A unique identification for the text.
267 | #epub_uid = ''
268 | 
269 | # A tuple containing the cover image and cover page html template filenames.
270 | #epub_cover = ()
271 | 
272 | # HTML files that should be inserted before the pages created by sphinx.
273 | # The format is a list of tuples containing the path and title.
274 | #epub_pre_files = []
275 | 
276 | # HTML files shat should be inserted after the pages created by sphinx.
277 | # The format is a list of tuples containing the path and title.
278 | #epub_post_files = []
279 | 
280 | # A list of files that should not be packed into the epub file.
281 | #epub_exclude_files = []
282 | 
283 | # The depth of the table of contents in toc.ncx.
284 | #epub_tocdepth = 3
285 | 
286 | # Allow duplicate toc entries.
287 | #epub_tocdup = True
288 | 
289 | 
290 | # Example configuration for intersphinx: refer to the Python standard library.
291 | intersphinx_mapping = {'http://docs.python.org/': None}
292 | # -- Custom config, added by hand  --------------------------------------------
293 | 
294 | todo_include_todos=True
295 | 
296 | 


--------------------------------------------------------------------------------
/doc/source/puppy_offline.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. _puppy_offline:
  3 | 
  4 | Puppy offline workflow
  5 | ======================
  6 | 
  7 | In summary, the workflow of offline puppy experiments is:
  8 | 
  9 | 1. :ref:`Capture offline training data <offline-data>`
 10 | 2. :ref:`Train a Critic on offline data <offline-critic-training>`
 11 | 3. :ref:`Create example trajectories <offline-examples>`
 12 | 4. :ref:`Simulate the Critic on an example trajectory <offline-analysis>`
 13 | 5. :ref:`Evaluate the Critic on the example trajectory <offline-analysis>`
 14 | 
 15 | In this document, these steps will be discussed in detail.
 16 | 
 17 | Working offline has the advantage over working online that the
 18 | relatively slow part - the data acquisition - is executed once, then
 19 | several Critics can be trained on the same dataset. Since the simulation
 20 | is only invoked once for all Critics, this approach is much faster.
 21 | Also, since the dataset is the same for all Critics, a comparison of the
 22 | results is possible.
 23 | 
 24 | .. note::
 25 |     Some parameters are global over all scripts, for example the
 26 |     sampling period or file paths. It must be ensured that the exact
 27 |     same values are used throughout the whole process.
 28 | 
 29 | 
 30 | 
 31 | .. _offline-data:
 32 | 
 33 | Gathering offline data
 34 | ^^^^^^^^^^^^^^^^^^^^^^
 35 | 
 36 | When working with [Webots]_, two scripts are required: a robot and a
 37 | supervisor. Note that this setup is fully described in the :py:mod:`PuPy`
 38 | documentation. For offline data acquisition, a supervisor is created
 39 | which resets the simulation whenever Puppy tumbles or leaves a
 40 | predefined arena.
 41 | 
 42 | .. literalinclude:: ../../test/puppy_offline_sampling_supervisor.py
 43 | 
 44 | The robot script is a bit more complex. The controller has to select
 45 | actions according to a predefined schema and store all data in a HDF5
 46 | file for later processing. To have the file in the correct format, the
 47 | class :py:class:`OfflineCollector` has to be used. It records all data
 48 | such that the simulation behaviour can be reproduced.
 49 | 
 50 | For the action selection mechanism, first a :py:class:`Policy` is
 51 | created. It defines the action and links it
 52 | to a motor target sequence, as explained in :ref:`plants-and-policies`.
 53 | In this case, the action is based on a gait and controlls the
 54 | amplitudes of the left and right legs. The procedure to create an
 55 | initial action is overwritten such that the initial action is randomly
 56 | chosen. The same is achieved by subtyping :py:class:`OfflineCollector`
 57 | and overwriting the :py:meth:`OfflineCollector._next_action_hook` for
 58 | action selection during the experiment. Hence, actions are chosen
 59 | randomly at all times, according to the respective schema. Note that
 60 | the action selection schema may have a huge influence on Critic training
 61 | later on.
 62 | 
 63 | .. literalinclude:: ../../test/puppy_offline_sampling_robot.py
 64 | 
 65 | With these two These two scripts, [Webots]_ can be executed and run for
 66 | some time. All sensor readouts and simulation metadata will be stored
 67 | in the file ``/tmp/puppy_offline_data.hdf5``. On this basis, a Critic
 68 | should be trained next.
 69 | 
 70 | $ webots_builder -c <robot.py> -s <supervisor.py> -t styrofoam -m fast /tmp/webots
 71 | 
 72 | 
 73 | .. _offline-critic-training:
 74 | 
 75 | Critic training
 76 | ^^^^^^^^^^^^^^^
 77 | 
 78 | For training, the :py:class:`Plant` must be specified and in case of
 79 | Puppy its ADHDP implementation in :py:class:`PuppyHDP`. Note that
 80 | although the :py:class:`Policy` is not in effect (as the selected
 81 | actions are fixed due to the offline setup), a valid instance must
 82 | be provided to the Critic. Here, the same one as for offline training is
 83 | initialized.
 84 | 
 85 | For Critic training, now also a reservoir and readout must be available,
 86 | as initialized in the example. Furthermore, the
 87 | :py:class:`PuPy.Normalization` is provided to the Critic, as during
 88 | offline data gathering the sensor data is not processed at all.
 89 | 
 90 | After the required objects have been created, they are bound together
 91 | in :py:class:`PuppyHDP`. It is also directed to store critic output
 92 | in the file ``/tmp/puppy_critic.hdf5``. Note that in this configuration,
 93 | sensor data is not copied, i.e. they are not included in the Critic's
 94 | data file, which is very convenient to save disk space.
 95 | 
 96 | Finally, the function :py:func:`puppy.offline_playback` is invoked. This
 97 | function replays the offline data such that the Critic sees it as if it
 98 | was run online in [Webots]_. Hence, the Critic is trained as in the
 99 | simulator. Only the data file has to be specified and optionally the
100 | training set can be limited (in this case to 1000 episodes).
101 | 
102 | .. literalinclude:: ../../test/puppy_offline_replay.py
103 | 
104 | After the script was successfully executed, the trained critic is
105 | available in three files:
106 | 
107 | - ``/tmp/puppy_critic.hdf5``
108 | - ``/tmp/puppy_readout.pic``
109 | - ``/tmp/puppy_reservoir.pic``
110 | 
111 | All data that is saved by the Critic is in the first file. The latter
112 | two contain the reservoir and readout, as they cannot be stored in the
113 | datafile. For further processing, the readout and reservoir files will
114 | be required. The datafile mainly serves static training analysis.
115 | 
116 | 
117 | 
118 | .. _offline-examples:
119 | 
120 | Creating example trajectories and Critic evaluation
121 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
122 | 
123 | When a Critic was trained, it is usually evaluated on a different
124 | dataset. One possibility is to train the Critic on a part of the
125 | training set and use the rest for testing. Then, it will be evaluated
126 | on action sequences, sampled as in the training dataset. If this is not
127 | desired, another set of offline data has to be acquired. In the
128 | following code, such a set is created by a predefined action sequence.
129 | 
130 | Specifically, a main trajectory is defined, with some example
131 | actions executed at every k'th steps. For this, the robot's state at the
132 | k'th step must be identical for all example actions. As the robot cannot
133 | easily be reset in [Webots]_, an easier approach is to revert the
134 | simulation and keep the robot movement identical up to the k'th step.
135 | 
136 | Three scripts are given to achieve this task. The first script creates
137 | a file which includes the action sequences and a reference to the one to
138 | be executed next. The other two scripts are a robot and supervisor
139 | controller for webots. Basically, an action sequence is loaded and
140 | executed, the measurements stored in a file in the same fashion as in
141 | the last section (:ref:`offline-data`). Hence, the file structure and
142 | called functions are the same as before.
143 | 
144 | 
145 | First, a number of action sequences is stored in a file at
146 | ``/tmp/example_sequence.hdf5``.
147 | 
148 | .. literalinclude:: ../../test/puppy_example_trajectory_sequence.py
149 | 
150 | To collect the simulation data, again a supervisor and robot controller
151 | have to be created. As noted before, the simulation is to be reverted
152 | (not restarted!) after an action sequence has finished. In this example,
153 | this is implemented by two guards which react accordingly to a signal
154 | from the robot.
155 | 
156 | .. literalinclude:: ../../test/puppy_example_trajectory_supervisor.py
157 | 
158 | Hence, the main logic is implemented in the robot controller. A special
159 | case of an :py:class:`OfflineCollector` is defined, enforcing the action
160 | to follow a specified sequence. If the sequence has ended, a signal is
161 | sent to the supervisor. The action sequence is loaded from the HDF5
162 | file, which was written before and the file updated such that all the
163 | sequences will be executed. The initialization of the robot is then
164 | analoguous to the previous section.
165 | 
166 | .. literalinclude:: ../../test/puppy_example_trajectory_robot.py
167 | 
168 | With the scripts set up, [Webots]_ can be executed. It automatically
169 | quits after all trajectories have been handled. Note that the setup
170 | of the policy, the number of initial steps and robot timings have been
171 | set to the same values as in the training data collection process.
172 | 
173 | $ webots_builder -c <robot.py> -s <supervisor.py> -t styrofoam -m fast /tmp/webots
174 | 
175 | As with offline data acquisition, the robot data is written into a HDF5,
176 | in this example at ``/tmp/example_data.hdf5``. Note that once this data
177 | is available, it can be used for testing of several Critics (as for now,
178 | all data is offline). Hence, the same process can be repeated for
179 | several example trajectories to have a more representative testing
180 | dataset.
181 | 
182 | 
183 | 
184 | .. _offline-analysis:
185 | 
186 | Critic analysis
187 | ^^^^^^^^^^^^^^^
188 | 
189 | If the example was followed until here, several files should be
190 | available:
191 | 
192 | - ``/tmp/puppy_readout.pic``, the trained Critic's readout weights
193 | - ``/tmp/puppy_reservoir.pic``, the Critic's reservoir
194 | - ``/tmp/example_data.hdf5``, the testing dataset
195 | 
196 | With those, the Critic can finally be analyzed. To do so, the Critic
197 | is executed on the testing dataset and then the result is plotted. The
198 | first part works similar to the Critic's training. The testing data is
199 | replayed, but this time the Critic is loaded instead of trained. The
200 | following script achieves this, storing the evaluation result in
201 | ``/tmp/example_eval.hdf5``. As before, plant and policy are initialized,
202 | then the reservoir and readout is loaded. Note that the readout training
203 | is disabled. After creation of the :py:class:`PuppyHDP`, it is executed
204 | on the testing data.
205 | 
206 | .. literalinclude:: ../../test/puppy_example_trajectory_eval.py
207 | 
208 | Now, the predicted return along the testing trajectory is stored in
209 | ``/tmp/example_eval.hdf5``. Based on this file, the Critic behaviour
210 | can be analysed. It does not include the data collected during
211 | simulation, hence the experiment is only completely described by also
212 | considering ``/tmp/example_data.hdf5``. This is exactly what
213 | :py:class:`H5CombineFile` is for.
214 | 
215 | Due to the initial behaviour of :py:class:`PuppyHDP` and
216 | :py:class:`OfflinePuppy`, the datasets in the two files have a different
217 | offset (indicated by ``obs_offset`` in the script). For the first epoch,
218 | sensor data is available but no actions or reward. They are only stored
219 | after the second step, hence are offset by one epoch (150 sensor samples
220 | in this case). The predicted return is delayed even more, as it is not
221 | stored during the whole initial phase (25 steps). The dataset can also
222 | be thought of being aligned backwards.
223 | 
224 | The analysis script goes through all executions of the example
225 | trajectory (one for each sample action) and orders them according to the
226 | state in which the sample action execution started. For each of those
227 | states, the sample actions are plotted as lines, colored with respect
228 | to the respective predicted return. States itself are related by
229 | plotting a circle, colored according to the median return of actions
230 | executed from it.
231 | 
232 | .. literalinclude:: ../../test/puppy_offline_analysis.py
233 | 
234 | If it worked correctly, a plot should be generated which shows the
235 | example trajectory, the sampled actions and states with the color
236 | corresponding to the predicted return (darker is better).
237 | 
238 | 
239 | .. image:: ../../data/doc/puppy_offline_result.png
240 | 


--------------------------------------------------------------------------------
/HDPy/epuck/epuck.py:
--------------------------------------------------------------------------------
  1 | """
  2 | For the ePuck robot, a small simulator is provided. It allows to place
  3 | ePuck in an arena, with unpassable walls and obstacles at (almost)
  4 | arbitrary locations. Some environment objects are predefined in
  5 | :py:mod:`HDPy.epuck.env`. The class :py:class:`Robot` provides the
  6 | implementation of the simulated ePuck. Obstacles are directly inserted
  7 | into this instance, hence it combines the robot with the environment.
  8 | 
  9 | As for other problems, a :py:class:`ADHDP` instance can be used on top
 10 | of this to control the robot motion. A plant and a policy have to be
 11 | provided (see :ref:`plants-and-policies`). Due to historical reasons,
 12 | the interpretation of the policy (i.e. action) is done in the robot.
 13 | In :py:class:`Robot`, the action is a relative heading,
 14 | :py:class:`AbsoluteRobot` implements an absolute one.
 15 | 
 16 | The robot and adhdp instances are combined in the
 17 | :py:func:`simulation_loop` function to run the simulation for a
 18 | fixed amount of time.
 19 | 
 20 | """
 21 | import numpy as np
 22 | import pylab
 23 | import warnings
 24 | 
 25 | 
 26 | def _intersect((o1x, o1y), (d1x, d1y), (o2x, o2y), (d2x, d2y)):
 27 |     """Intersection of two bounded lines. The lines are given
 28 |     with the origin and direction. Returned is the step length for
 29 |     both lines, in the same order as the input.
 30 |     
 31 |     o1x + t1 * d1x = o2x + t2 * d2x
 32 |     o1y + t1 * d1y = o2y + t2 * d2y
 33 |     => t1 = (o2x + t2 * d2x - o1x)/d1x
 34 |     => o1y + ((o2x + t2 * d2x - o1x)/d1x) * d1y = o2y + t2 * d2y
 35 |     => o1y + (o2x + t2 * d2x - o1x) * d1y/d1x = o2y + t2 * d2y
 36 |     => o1y + (t2 * d2x + o2x - o1x) * d1y/d1x = o2y + t2 * d2y
 37 |     => o1y + t2*d2x*d1y/d1x + (o2x - o1x) * d1y/d1x = o2y + t2 * d2y
 38 |     => o1y - o2y + (o2x - o1x) * d1y/d1x = t2 * d2y - t2*d2x*d1y/d1x
 39 |     => o1y - o2y + (o2x - o1x) * d1y/d1x = t2 * (d2y - d2x*d1y/d1x)
 40 |     => t2 = (o1y - o2y + (o2x - o1x) * d1y/d1x) / (d2y - d2x*d1y/d1x)
 41 |     
 42 |     """
 43 |     tol = 1e-14
 44 |     if abs(d1y - 0.0) < tol :
 45 |         # o_dir = (!0.0, 0.0)
 46 |         if abs(d2y - d2x*d1y/d1x) < tol: # parallel
 47 |             t0, t1 = float('inf'), float('inf')
 48 |         else:
 49 |             nom = o2y - o1y - d1y * (o2x - o1x)/d1x
 50 |             denom = (d1y*d2x)/d1x - d2y
 51 |             t0 = nom/denom
 52 |             t1 = (o2x - o1x + t0 * d2x)/d1x
 53 |     else:
 54 |         # o_dir = (0.0, !0.0)
 55 |         if abs(d2x - d2y*d1x/d1y) < tol: # parallel
 56 |             t0, t1 = float('inf'), float('inf')
 57 |         else:
 58 |             nom = o2x - o1x - d1x * (o2y - o1y)/d1y
 59 |             denom = (d1x*d2y)/d1y - d2x
 60 |             t0 = nom/denom
 61 |             t1 = (o2y - o1y + t0 * d2y) / d1y
 62 |     
 63 |     return t1, t0
 64 | 
 65 | def _in_obstacle(loc, obstacle):
 66 |     """Check if a location is within an obstacle.
 67 |     
 68 |     Assuming the obstacle edges are given in the right order (meaning
 69 |     that the polygon is defined through lines between successive
 70 |     points).
 71 |     
 72 |     As reference, the origin is picked. This implies that the obstacle
 73 |     must not include the origin.
 74 |     
 75 |     Edges and corners count as within the obstacle
 76 |     
 77 |     """
 78 |     if any([loc == obs for obs in obstacle]):
 79 |         return True
 80 |     
 81 |     faces = [(p0, p1) for p0, p1 in zip(obstacle[:-1], obstacle[1:])]
 82 |     faces.append((obstacle[-1], obstacle[0]))
 83 |     
 84 |     num_intersect = sum([_obs_intersect((loc, (0.0, 0.0)), line) for line in faces])
 85 |     if num_intersect % 2 == 0:
 86 |         return False
 87 |     else:
 88 |         return True
 89 | 
 90 | def _obs_intersect(((x0, y0), (x1, y1)), ((x2, y2), (x3, y3))):
 91 |     """Check if two lines intersect. The boundaries don't count as
 92 |     intersection."""
 93 |     base1 = (x0, y0)
 94 |     base2 = (x2, y2)
 95 |     dir1 = (x1-x0, y1-y0)
 96 |     dir2 = (x3-x2, y3-y2)
 97 |     t1, t2 = _intersect(base1, dir1, base2, dir2)
 98 |     
 99 |     eps = 0.00001
100 |     if -eps < t1 and t1 < 1.0 + eps and -eps < t2 and t2 < 1.0 + eps:
101 |         return True
102 |     else:
103 |         return False
104 | 
105 | class Robot(object):
106 |     """Simulated ePuck robot.
107 |     
108 |     The robot may be steered by means of change in its orientation (i.e.
109 |     the heading relative to the robot). Every time an action is
110 |     executed, the robot turns to the target orientation, then moves
111 |     forward. How much it moves is proportional to the ``speed`` and
112 |     ``step_time``. In between, infrared sensor readouts can be taken.
113 |     The robot is placed in an arena, with some obstacles and walls it
114 |     can collide with but not pass. Upon collision, the robot stops
115 |     moving.
116 |     
117 |     ``walls``
118 |         List of wall lines which cannot be passed. The lines are to be
119 |         given by their endpoints.
120 |     
121 |     ``obstacles``
122 |         List of obstacles which cannot be passed. In contrast to walls,
123 |         the obstacles are closed polygons. They have to be given
124 |         as list of corner points. Obstacles may not include the origin
125 |         (0, 0).
126 |     
127 |     ``speed``
128 |         Speed of the robot.
129 |     
130 |     ``step_time``
131 |         Time quantum for movement, i.e. for how long the robot drives
132 |         forward.
133 |     
134 |     ``tol``
135 |         Minimal distance from any obstacle or wall which counts as
136 |         collision.
137 |     
138 |     .. note::
139 |         Obstacles may not include the origin (0, 0).
140 |     
141 |     .. todo::
142 |         wall tolerance does not operate correctly.
143 |     
144 |     """
145 |     def __init__(self, walls=None, obstacles=None, speed=0.5, step_time=1.0, tol=0.0):
146 |         
147 |         if obstacles is None:
148 |             obstacles = []
149 |         
150 |         if walls is None:
151 |             walls = []
152 |         
153 |         walls = walls[:]
154 |         for obs in obstacles:
155 |             walls.extend([(x0, y0, x1, y1) for (x0, y0), (x1, y1) in zip(obs[:-1], obs[1:])])
156 |             walls.append((obs[-1][0], obs[-1][1], obs[0][0], obs[0][1]))
157 |         
158 |         if tol > 0.0:
159 |             warnings.warn("tolerance > 0 doesn't work properly; It only works if the robot faces the wall (not when parallel or away from the wall).")
160 |         
161 |         self.sensors = [2*np.pi*i/8.0 for i in range(8)]
162 |         #self.obstacles = [ (x0,y0,x1,y1) ]
163 |         self.obstacle_line = walls
164 |         self._ir_max, self.tol = 15.0, tol
165 |         self.obstacles = self._cmp_obstacles(self.obstacle_line)
166 |         self.polygons = obstacles[:]
167 |         self.speed, self.step_time = speed, step_time
168 |         self.loc = (0.0, 0.0)
169 |         self.pose = 0.0
170 |         self.trajectory = []
171 |         self.reset()
172 |     
173 |     def _cmp_obstacles(self, lines):
174 |         """Convert lines given by their endpoints to their corresponding
175 |         vector representation"""
176 |         obstacles = []
177 |         for x0, y0, x1, y1 in lines:
178 |             o_vec = (x1-x0, y1-y0)
179 |             if o_vec[0] == 0.0 and o_vec[1] == 0.0:
180 |                 raise Exception('Obstacle line must have a direction')
181 |             o_base = (x0, y0)
182 |             o_limit = 1.0
183 |             obstacles.append((o_vec, o_base, o_limit))
184 |         return obstacles
185 |     
186 |     def _cmp_obstacle_lines(self, obstacles):
187 |         """Convert lines given by as vector to their corresponding
188 |         endpoint representation."""
189 |         lines = []
190 |         for o_vec, o_base, o_limit in obstacles:
191 |             x0, y0 = o_base
192 |             if o_limit == float('inf'):
193 |                 raise Exception('Infinite lines not supported')
194 |             x1 = o_base[0] + o_limit * o_vec[0]
195 |             y1 = o_base[1] + o_limit * o_vec[1]
196 |             lines.append((x0, y0, x1, y1))
197 |         return lines
198 |     
199 |     def reset(self):
200 |         """Reset the robot to the origin."""
201 |         self.loc = (0.0, 0.0)
202 |         self.pose = 0.0
203 |         self.trajectory = [self.loc]
204 |     
205 |     def reset_random(self, loc_lo=-10.0, loc_hi=10.0):
206 |         """Reset the robot to a random location, outside the obstacles."""
207 |         for i in xrange(1000):
208 |             loc = self.loc = (np.random.uniform(loc_lo, loc_hi), np.random.uniform(loc_lo, loc_hi))
209 |             pose = self.pose = np.random.uniform(0, 2*np.pi)
210 |             
211 |             if not any([_in_obstacle(self.loc, obs) for obs in self.polygons]) and not self.take_action(0.0):
212 |                 break
213 |         
214 |         if i == 1000:
215 |             warnings.warn('Random reset iterations maximum exceeded')
216 |         
217 |         self.loc = loc
218 |         self.pose = pose
219 |         self.trajectory = [self.loc]
220 |     
221 |     def read_ir(self):
222 |         """Compute the proximities to obstacles in all infrared sensor
223 |         directions."""
224 |         # view-direction
225 |         readout = []
226 |         for sensor in self.sensors:
227 |             s_dist = self._ir_max
228 |             s_ori = self.pose + sensor
229 |             s_dir = (np.cos(s_ori), np.sin(s_ori))
230 |             s_base = self.loc
231 |             
232 |             for o_dir, o_base, o_limit in self.obstacles:
233 |                 # obstacles intersection
234 |                 t0, t1 = _intersect(o_base, o_dir, s_base, s_dir)
235 |                 
236 |                 eps = 0.00001
237 |                 if t1 >= 0 and (o_limit == float('inf') or (-eps <= t0 and t0 <= o_limit + eps)):
238 |                 #if t0 >= 0 and t1 >= 0 and t1 <= 1.0:
239 |                     # intersection at distance (t0 * s_dir)
240 |                     dist = np.linalg.norm((t1 * s_dir[0], t1 * s_dir[1]))
241 |                 else:
242 |                     # no intersection
243 |                     dist = self._ir_max
244 |                 
245 |                 if dist < s_dist:
246 |                     s_dist = dist
247 |             
248 |             readout.append(s_dist)
249 |             
250 |         return readout
251 |     
252 |     def read_sensors(self):
253 |         """Read all sensors. A :py:keyword:`dict` is returned."""
254 |         ir = self.read_ir()
255 |         #noise = np.random.normal(scale=0.01, size=(len(ir)))
256 |         #ir = map(operator.add, ir, noise)
257 |         
258 |         return {'loc': np.atleast_2d(self.loc), 'pose': np.atleast_2d(self.pose), 'ir': np.atleast_2d(ir)}
259 |     
260 |     def take_action(self, action):
261 |         """Execute an ``action`` and move forward
262 |         (speed * step_time units or until collision). Return
263 |         :py:const:`True` if the robot collided.
264 |         
265 |         """
266 |         # turn
267 |         if isinstance(action, np.ndarray):
268 |             action = action.flatten()[0]
269 |         self.pose = (self.pose + action) % (2*np.pi)
270 |         #self.pose = action % (2*np.pi)
271 |         
272 |         # move forward
273 |         t = self.speed * self.step_time # distance per step
274 |         
275 |         # Collision detection
276 |         eps = 0.00001
277 |         r_vec = (np.cos(self.pose), np.sin(self.pose))
278 |         wall_dists = [(idx, _intersect(self.loc, r_vec, o_base, o_vec), o_limit) for idx, (o_vec, o_base, o_limit) in enumerate(self.obstacles)]
279 |         wall_dists = [(idx, r_dist) for idx, (r_dist, o_dist), o_limit in wall_dists if r_dist >= 0.0 and r_dist < float('inf') and -eps <= o_dist and o_dist <= o_limit + eps]
280 |         if len(wall_dists) > 0:
281 |             # Distance to the wall
282 |             wall_idx, min_wall_dist = min(wall_dists, key=lambda (idx, dist): dist)
283 |             dist = np.linalg.norm((min_wall_dist * r_vec[0], min_wall_dist * r_vec[1]))
284 |             
285 |             # angle between wall and robot trajectory
286 |             o_vec = self.obstacles[wall_idx][0]
287 |             a = np.arccos( (o_vec[0] * r_vec[0] + o_vec[1] * r_vec[1]) / (np.linalg.norm(o_vec) * np.linalg.norm(r_vec)) )
288 |             if a > np.pi/2.0:
289 |                 a = np.pi - a
290 |             
291 |             # maximum driving distance
292 |             k = self.tol / np.sin(a)
293 |             t_max = dist - k
294 |             
295 |         else:
296 |             # no wall ahead
297 |             t_max = float('inf')
298 |         
299 |         collide = t >= t_max
300 |         t = min(t, t_max)
301 |         
302 |         # next location
303 |         self.loc = (self.loc[0] + np.cos(self.pose) * t, self.loc[1] + np.sin(self.pose) * t) # t doesn't denote the distance in moving direction!
304 |         self.trajectory.append(self.loc)
305 |         return collide
306 |     
307 |     def plot_trajectory(self, wait=False, with_tol=True, tol=None, full_view=True, axis=None):
308 |         """Plot the robot trajectory in a :py:mod:`pylab` figure.
309 |         
310 |         ``wait``
311 |             True for blocking until the figure is closed.
312 |         
313 |         ``with_tol``
314 |             Plot obstacle tolerance lines.
315 |         
316 |         ``tol``
317 |             Overwrite the obstacle tolerance.
318 |         
319 |         ``full_view``
320 |             Keep the original clipping of the window. If false, the
321 |             clipping will be adjusted to the data.
322 |         
323 |         ``axis``
324 |             A :py:mod:`pylab` axis, which should be used for plotting.
325 |             If not provided, the first axis of the first figure is used.
326 |         
327 |         """
328 |         if axis is None:
329 |             axis = pylab.figure(1).axes[0]
330 |             
331 |         axis.clear()
332 |         self._plot_obstacles(axis, with_tol, tol)
333 |         x, y = zip(*self.trajectory)
334 |         axis.plot(x, y, 'b-')
335 |         axis.plot(x, y, 'b*')
336 |         if full_view:
337 |             x0, x1, y0, y1 = axis.axis()
338 |         else:
339 |             x0, x1, y0, y1 = min(x), max(x), min(y), max(y)
340 |         axis.axis((
341 |             x0 + x0*0.1,
342 |             x1 + x1*0.1,
343 |             y0 + y0*0.1,
344 |             y1 + y1*0.1
345 |             ))
346 |         
347 |         pylab.show(block=wait)
348 | 
349 |     def _plot_obstacles(self, axis, with_tol=True, tol=None):
350 |         """Plot all obstacles and walls into a :py:mod:`pylab` figure.
351 |         
352 |         ``axis``
353 |             The axis where stuff is plotted into.
354 |         
355 |         ``with_tol``
356 |             Plot obstacle tolerance lines.
357 |         
358 |         ``tol``
359 |             Overwrite the obstacle tolerance.
360 |         
361 |         """
362 |         if tol is None:
363 |             tol = self.tol
364 |         
365 |         for vec, base, limit in self.obstacles:
366 |             # obstacle line
367 |             axis.plot((base[0], base[0]+limit*vec[0]), (base[1], base[1]+limit*vec[1]), 'k')
368 |             
369 |             if with_tol and tol > 0:
370 |                 if vec[1] == 0.0:
371 |                     y = (-vec[1]/vec[0], 1.0)
372 |                 else:
373 |                     y = (1.0, -vec[0]/vec[1])
374 |                     
375 |                 y = (y[0] * tol / np.linalg.norm(y), y[1] * tol / np.linalg.norm(y))
376 |                 base_tn = (base[0] - y[0], base[1] - y[1])
377 |                 base_tp = (base[0] + y[0], base[1] + y[1])
378 |             
379 |                 # obstacle tolerance
380 |                 axis.plot((base_tn[0], base_tn[0]+limit*vec[0]), (base_tn[1], base_tn[1]+limit*vec[1]), 'k:')
381 |                 axis.plot((base_tp[0], base_tp[0]+limit*vec[0]), (base_tp[1], base_tp[1]+limit*vec[1]), 'k:')
382 | 
383 | class AbsoluteRobot(Robot):
384 |     """Simulated ePuck robot.
385 |     
386 |     In contrast to :py:class:`Robot`, the heading is with respect to
387 |     the arena instead of the robot - i.e. it is absolute, not relative
388 |     to the robot.
389 |     
390 |     """
391 |     def take_action(self, action):
392 |         """Execute an ``action`` and move forward
393 |         (speed * step_time units or until collision). Return
394 |         :py:const:`True` if the robot collided.
395 |         
396 |         """
397 |         if isinstance(action, np.ndarray):
398 |             action = action.flatten()[0]
399 |         self.pose = action % (2*np.pi)
400 |         return super(AbsoluteRobot, self).take_action(0.0)
401 | 
402 | def simulation_loop(acd, robot, max_step=-1, max_episodes=-1, max_total_iter=-1):
403 |     """Simulate some episodes of the ePuck robot.
404 |     
405 |     This method handles data passing between the ``acd`` and ``robot``
406 |     instances in two loops, one for the episode and one for the whole
407 |     experiment.
408 |     
409 |     ``acd``
410 |         Actor-Critic instance (:py:class:`ADHDP`).
411 |     
412 |     ``robot``
413 |         Robot instance (:py:class:`Robot`).
414 |     
415 |     ``max_step``
416 |         Maximum number of steps in an episode. Negative means no limit.
417 |     
418 |     ``max_episodes``
419 |         Maximum number of episodes. Negative means no limit.
420 |     
421 |     ``max_total_iter``
422 |         Maximum number of steps in total. Negative means no limit.
423 |     
424 |     """
425 |     if max_step < 0 and max_episodes < 0 and max_total_iter < 0:
426 |         raise Exception('The simulation cannot run forever.')
427 |     
428 |     num_episode = 0
429 |     num_total_iter = 0
430 |     while True:
431 |         
432 |         # init episode
433 |         acd.new_episode()
434 |         acd.signal('new_episode') # collectors will create new group
435 |         robot.reset()
436 |         acd.child.reset()
437 |         a_curr = np.atleast_2d([acd.child.action])
438 |         
439 |         num_step = 0 # k
440 |         while True:
441 |             
442 |             # Apply current action
443 |             collided = robot.take_action(a_curr)
444 |             
445 |             # Observe sensors
446 |             s_next = robot.read_sensors()
447 |             
448 |             # Execute ACD
449 |             a_next = acd(s_next, num_step, num_step+1, 1)
450 |             
451 |             # Iterate
452 |             num_step += 1
453 |             num_total_iter += 1
454 |             if collided:
455 |                 break
456 |             if max_step > 0 and num_step >= max_step:
457 |                 break
458 |             acd.a_curr = a_curr = a_next
459 |         
460 |         if num_step <= 3:
461 |             print "Warning: episode ended prematurely"
462 |         
463 |         num_episode += 1
464 |         if max_episodes > 0 and num_episode >= max_episodes:
465 |             break
466 |         if max_total_iter > 0 and num_total_iter >= max_total_iter:
467 |             break
468 |     
469 |     return acd
470 | 


--------------------------------------------------------------------------------
/HDPy/rl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The Reinforcement Learning Problem is approached by means of an
  3 | Actor-Critic design. This method splits the agent into a
  4 | return-estimator (Critic) and an action-selection mechanism (Actor).
  5 | Information about state and reward is provided by the plant to the
  6 | agent. As the agent is still viewed as one unit, both of its parts are
  7 | embedded in the same class, the :py:class:`ActorCritic`. It does not
  8 | itself implement a method to solve the learning problem but only
  9 | provides preliminaries for an algorithm doing so. Meaning, that it
 10 | defines common members and method interfaces. Furthermore, it binds
 11 | the Actor-Critic approach to a :py:class:`PuPy.RobotActor`, such that
 12 | any of its descendants can be used within :py:mod:`PuPy`.
 13 | 
 14 | The Actor-Critic implementation is kept general, meaning that it is not
 15 | limited to a specific learning problem. For this, the template classes
 16 | :py:class:`Plant` and :py:class:`Policy` are defined. Using the former,
 17 | a concrete environment can be implemented by specifying state and reward.
 18 | The latter class is required to hide the representation of the action
 19 | from the :py:class:`ActorCritic`. Due to the integration in
 20 | :py:mod:`PuPy`, motor targets (a low-level representation of an
 21 | action) have to be generated, but the action representation for the
 22 | Reinforcement Learning problem may be more abstract. For example, gait
 23 | parameters could be used as action. From these, a motor target sequence
 24 | has to be generated to actually steer the robot.
 25 | 
 26 | """
 27 | import PuPy
 28 | import numpy as np
 29 | import cPickle as pickle
 30 | 
 31 | class Plant(object):
 32 |     """A template for Actor-Critic *plants*. The *Plant* describes the
 33 |     interaction of the Actor-Critic with the environment. Given a robot
 34 |     which follows a certain *Policy*, the environment generates rewards
 35 |     and robot states.
 36 |     
 37 |     An additional instance to :py:class:`PuPy.Normalization` may be 
 38 |     supplied in ``norm`` for normalizing sensor values.
 39 |     """
 40 |     def __init__(self, state_space_dim=None, norm=None):
 41 |         self._state_space_dim = state_space_dim
 42 |         self.normalization = None
 43 |         self.set_normalization(norm)
 44 |     
 45 |     def state_input(self, state):
 46 |         """Return the state-part of the critic input
 47 |         (i.e. the reservoir input).
 48 |         
 49 |         The state-part is derived from the current robot ``state`` and
 50 |         possibly also its ``action``. As return format, a Nx1 numpy
 51 |         vector is expected, where 2 dimensions should exist (e.g.
 52 |         :py:meth:`numpy.atleast_2d`).
 53 |         
 54 |         Although the reservoir input will consist of both, the *state*
 55 |         and *action*, this method must only return the *state* part of
 56 |         it.
 57 |         
 58 |         """
 59 |         raise NotImplementedError()
 60 |     
 61 |     def reward(self, epoch):
 62 |         """A reward generated by the *Plant* based on the current
 63 |         sensor readings in ``epoch``. The reward is single-dimensional.
 64 |         
 65 |         The reward is evaluated in every step. It builds the foundation
 66 |         of the approximated return.
 67 |         """
 68 |         raise NotImplementedError()
 69 |     
 70 |     def state_space_dim(self):
 71 |         """Return the dimension of the state space.
 72 |         This value is equal to the size of the vector returned by
 73 |         :py:meth:`state_input`.
 74 |         """
 75 |         if self._state_space_dim is None:
 76 |             raise NotImplementedError()
 77 |         return self._state_space_dim
 78 |     
 79 |     def set_normalization(self, norm):
 80 |         """Set the normalization instance to ``norm``."""
 81 |         if norm is None:
 82 |             norm = PuPy.Normalization()
 83 |         self.normalization = norm
 84 |     
 85 |     def reset(self):
 86 |         """Reset plant to initial state."""
 87 |         pass
 88 | 
 89 | class Policy(PuPy.RobotActor):
 90 |     """A template for Actor-Critic *policies*. The *Policy* defines how
 91 |     an action is translated into a control (motor) signal. It
 92 |     continously receives action updates from the *Critic* which it has
 93 |     to digest.
 94 |     
 95 |     An additional instance to :py:class:`PuPy.Normalization` may be 
 96 |     supplied in ``norm`` for normalizing sensor values.
 97 |     """
 98 |     def __init__(self, action_space_dim=None, norm=None):
 99 |         super(Policy, self).__init__()
100 |         self._action_space_dim = action_space_dim
101 |         self.normalization = None
102 |         self.set_normalization(norm)
103 |     
104 |     def initial_action(self):
105 |         """Return the initial action. A valid action must be returned
106 |         since the :py:class:`ActorCritic` relies on the format.
107 |         
108 |         The action has to be a 2-dimensional numpy vector, with both
109 |         dimensions available.
110 |         """
111 |         raise NotImplementedError()
112 |     
113 |     def update(self, action_upd):
114 |         """Update the *Policy* according to the current action update
115 |         ``action_upd``, which was in turn computed by the
116 |         :py:class:`ActorCritic`.
117 |         """
118 |         raise NotImplementedError()
119 |     
120 |     def get_iterator(self, time_start_ms, time_end_ms, step_size_ms):
121 |         """Return an iterator for the *motor_target* sequence, according
122 |         to the current action configuration.
123 |         
124 |         The *motor_targets* glue the *Policy* and *Plant* together.
125 |         Since they are applied in the robot and effect the sensor
126 |         readouts, they are an "input" to the environment. As the targets
127 |         are generated as effect of the action update, they are an output
128 |         of the policy.
129 |         
130 |         """
131 |         raise NotImplementedError()
132 |     
133 |     def action_space_dim(self):
134 |         """Return the dimension of the action space.
135 |         This value is equal to the size of the vector returned by
136 |         :py:meth:`initial_action`.
137 |         """
138 |         if self._action_space_dim is None:
139 |             raise NotImplementedError()
140 |         return self._action_space_dim
141 |     
142 |     def reset(self):
143 |         """Undo any policy updates."""
144 |         raise NotImplementedError()
145 |     
146 |     def set_normalization(self, norm):
147 |         """Set the normalization instance to ``norm``."""
148 |         if norm is None:
149 |             norm = PuPy.Normalization()
150 |         self.normalization = norm
151 |     
152 |     
153 |     def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms):
154 |         if epoch.has_key('a_next'):
155 |             self.update(np.atleast_2d(epoch['a_next']).T)
156 |         return self.get_iterator(time_start_ms, time_end_ms, step_size_ms)
157 |     
158 |     def _get_initial_targets(self, time_start_ms, time_end_ms, step_size_ms):
159 |         return self.__call__({}, time_start_ms, time_end_ms, step_size_ms)
160 |         
161 | 
162 | class _ConstParam(object):
163 |     """Stub for wrapping constant values into an executable function."""
164 |     def __init__(self, value):
165 |         self._value = value
166 |     def __call__(self, time0=None, time1=None):
167 |         """Return the constant value."""
168 |         return self._value
169 | 
170 | class Momentum(object):
171 |     """Template class for an action momentum. 
172 |     
173 |     With a momentum, the next action is computed from the lastest one
174 |     and the proposed action :math:`a^*`. The momentum controls how much
175 |     each of the two influences the next action. Generally, a momentum
176 |     of zero implies following strictly the proposal, while a momentum
177 |     of one does the opposite. Usually, the (linear) momentum is
178 |     formulated as
179 |     
180 |     .. math::
181 |         a_{t+1} = m a_t + (1-m) a^*
182 |     
183 |     The momentum may be time dependent with
184 |     - time0: Episode counter
185 |     - time1: Episode's step counter
186 |     
187 |     """
188 |     def __call__(self, a_curr, a_prop, time0=None, time1=None):
189 |         """Return the next action from a current action ``a_curr``, 
190 |         a proposal ``a_prop`` at episode ``time0`` in step ``time1``."""
191 |         raise NotImplementedError()
192 | 
193 | class ConstMomentum(Momentum):
194 |     """Linear momentum equation, as specified in :py:class:`Momentum`
195 |     with time-constant momentum value (m).
196 |     
197 |     ``value``
198 |         Momentum value, [0,1].
199 |     
200 |     """
201 |     def __init__(self, value):
202 |         super(ConstMomentum, self).__init__()
203 |         self._value = value
204 |         assert 0 <= self._value and self._value <= 1
205 |     
206 |     def __call__(self, a_curr, a_prop, time0=None, time1=None):
207 |         """Return the next action from a current action ``a_curr``, 
208 |         a proposal ``a_prop`` at episode ``time0`` in step ``time1``."""
209 |         return self._value * a_curr + (1.0 - self._value) * a_prop
210 | 
211 | class RadialMomentum(Momentum):
212 |     """Momentum with respect to angular action. The resulting action
213 |     is the (smaller) intermediate angle of the latest action
214 |     and proposal (with respect to the momentum). The actions are
215 |     supposed to be in radians, hence the output is in the range
216 |     :math:`[0,2\pi]`. The momentum is a time-constant value (m).
217 |     
218 |     ``value``
219 |         Momentum value, [0,1].
220 |     
221 |     """
222 |     def __init__(self, value):
223 |         super(RadialMomentum, self).__init__()
224 |         self._value = value
225 |         assert 0 <= self._value and self._value <= 1
226 |     
227 |     def __call__(self, a_curr, a_prop, time0=None, time1=None):
228 |         """Return the next action from a current action ``a_curr``, 
229 |         a proposal ``a_prop`` at episode ``time0`` in step ``time1``."""
230 |         phi_0 = a_curr % (2*np.pi)
231 |         phi_1 = a_prop % (2*np.pi)
232 |         imag_0 = np.exp(phi_0*1j)
233 |         imag_1 = np.exp(phi_1*1j)
234 |         imag_r = self._value * imag_0 + (1.0 - self._value) * imag_1
235 |         return np.angle(imag_r) % (2*np.pi)
236 | 
237 | class ActorCritic(PuPy.RobotActor):
238 |     """Actor-critic design.
239 |     
240 |     The Actor-Critic estimates the return function
241 |     
242 |     .. math::
243 |         J_t = \sum\limits_{k=t}^{T} \gamma^k r_{t+k+1}
244 |     
245 |     while the return is optimized at the same time. This is done by
246 |     incrementally updating the estimate for :math:`J_t` and choosing
247 |     the next action by optimizing the return in a single step. See
248 |     [ESN-ACD]_ for details.
249 |     
250 |     ``plant``
251 |         An instance of :py:class:`Plant`. The plant defines the
252 |         interaction with the environment.
253 |     
254 |     ``child``
255 |         An instance of :py:class:`RobotActor` which should be a
256 |         :py:class:`Policy` or have one as child. The policy defines the
257 |         interaction with the robot's actuators.
258 |     
259 |     ``gamma``
260 |         Choice of *gamma* in the return function. May be a constant or
261 |         a function of the time (relative to the episode start).
262 |         
263 |     ``alpha``
264 |         Choice of *alpha* in the action update. May be a constant or a
265 |         function of the time (relative to the episode start).
266 |         
267 |         The corresponding formula is
268 |         
269 |         .. math::
270 |             a_{t+1} = a_{t} + \\alpha \\frac{\partial J_t}{\partial a_t}
271 |         
272 |         See [ESN-ACD]_ for details.
273 |         
274 |     ``norm``
275 |         A :py:class:`PuPy.Normalization` for normalization purposes.
276 |         Note that the parameters for *a_curr* and *a_next* should be
277 |         exchangable, since it's really the same kind of 'sensor'.
278 |     
279 |     """
280 |     def __init__(self, plant, policy, gamma=1.0, alpha=1.0, init_steps=1, norm=None, momentum=0.0):
281 |         super(ActorCritic, self).__init__(child=policy)
282 |         
283 |         # Initial members
284 |         self.plant = plant
285 |         self.normalizer = None
286 |         self.num_episode = 0
287 |         self._init_steps = init_steps
288 |         self.a_curr = None
289 |         self._motor_action_dim = None
290 |         self.s_curr = dict()
291 |         self.alpha = None
292 |         self.momentum = None
293 |         self.gamma = None
294 |         self.num_step = 0
295 |         
296 |         # Init members through dedicated routines
297 |         self.set_normalization(norm)
298 |         self.set_alpha(alpha)
299 |         self.set_gamma(gamma)
300 |         self.set_momentum(momentum)
301 |         
302 |         # Check assumptions
303 |         assert self.child.initial_action().shape[0] >= 1
304 |         assert self.child.initial_action().shape[1] == 1
305 |         
306 |         # Start a new episode
307 |         self.new_episode()
308 |     
309 |     def new_episode(self):
310 |         """Start a new episode of the same experiment. This method can
311 |         also be used to initialize the *ActorCritic*, for example when
312 |         it is loaded from a file.
313 |         """
314 |         self.num_episode += 1
315 |         self.a_curr = self.child.initial_action()
316 |         self._motor_action_dim = self.child.action_space_dim()
317 |         self.s_curr = dict()
318 |         self.num_step = 0
319 |     
320 |     def init_episode(self, epoch, time_start_ms, time_end_ms, step_size_ms):
321 |         """Define the behaviour during the initial phase, i.e. as long
322 |         as
323 |         
324 |             num_step <= init_steps
325 |         
326 |         with ``num_step`` the episode's step iterator and ``init_steps``
327 |         given at construction (default 1). The default is to store the
328 |         ``epoch`` but do nothing else.
329 |         
330 |         .. note::
331 |             The step iterator ``num_step`` is incremented before this
332 |             method is called.
333 |         
334 |         """
335 |         self.s_curr = epoch
336 |         self._pre_increment_hook(epoch)
337 |         return self.child(epoch, time_start_ms, time_end_ms, step_size_ms)
338 |     
339 |     def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms):
340 |         """One round in the actor-critic cycle. The current observations
341 |         are given in ``epoch`` and the timing information in the rest of
342 |         the parameters. For a detailed description of the parameters,
343 |         see :py:class:`PuPy.PuppyActor`.
344 |         
345 |         This routine computes the reward from the *epoch* and manages
346 |         consecutive epochs, then lets :py:meth:`_step` compute the next
347 |         action.
348 |         
349 |         """
350 |         if self.num_step <= self._init_steps:
351 |             self.num_step += 1
352 |             return self.init_episode(epoch, time_start_ms, time_end_ms, step_size_ms)
353 |         
354 |         # extern through the robot:
355 |         # take action (a_curr = a_next in the previous run)
356 |         # observe sensors values produced by the action (a_curr = previous a_next)
357 |         
358 |         # Generate reinforcement signal U(k), given in(k)
359 |         reward = self.plant.reward(epoch)
360 |         #reward = self.plant.reward(self.s_curr)
361 |         # It's not clear, which reward should be the input to the critic:
362 |         # While the ACD papers imply the reward of time step n, the book
363 |         # by Sutton/Barto indicate the reward as being from the next
364 |         # state, n+1. Experiments indicate that it doesn't really matter.
365 |         # To be consistent with other work, I go with time n.
366 |         # Nico: I changed it to be epoch. This is just a notation thing with
367 |         # the n and n+1, but it should be the reward of the newest state.
368 |         
369 |         # do the actual work
370 |         epoch = self._step(self.s_curr, epoch, self.a_curr, reward)
371 |         
372 |         # increment
373 |         self.a_curr = np.atleast_2d(epoch['a_next']).T
374 |         self.s_curr = epoch
375 |         self.num_step += 1
376 |         
377 |         # return next action
378 |         return self.child(epoch, time_start_ms, time_end_ms, step_size_ms)
379 |     
380 |     def _step(self, s_curr, s_next, a_curr, reward):
381 |         """Execute one step of the actor and return the next action.
382 |         
383 |         When overloading this method, it must be ensured that
384 |         :py:meth:`_next_action_hook` is executed as soon as the next
385 |         action is determined and also :py:meth:`_pre_increment_hook`
386 |         should be called before the method returns (passing relevant
387 |         intermediate results).
388 |         
389 |         ``s_curr``
390 |             Previous observed state. :py:keyword:`dict`, same as ``epoch``
391 |             of the :py:meth:`__call__`.
392 |         
393 |         ``s_next``
394 |             Latest observed state. :py:keyword:`dict`, same as ``epoch``
395 |             of the :py:meth:`__call__`.
396 |         
397 |         ``a_curr``
398 |             Previously executed action. This is the action which lead
399 |             from ``s_curr`` into ``s_next``. Type specified through
400 |             the :py:class:`Policy`.
401 |         
402 |         ``reward``
403 |             Reward of ``s_next``
404 |         
405 |         """
406 |         raise NotImplementedError()
407 |     
408 |     def _pre_increment_hook(self, epoch, **kwargs):
409 |         """Template method for subclasses.
410 |         
411 |         Before the actor-critic cycle increments, this method is invoked
412 |         with all relevant locals of the :py:meth:`ADHDP.__call__`
413 |         method.
414 |         """
415 |         pass
416 |     
417 |     def _next_action_hook(self, a_next):
418 |         """Postprocessing hook, after the next action ``a_next`` was
419 |         proposed by the algorithm. Must return the possibly altered
420 |         next action in the same format."""
421 |         return a_next
422 |     
423 |     def save(self, pth):
424 |         """Store the current instance in a file at ``pth``.
425 |         
426 |         .. note::
427 |             If ``alpha`` or ``gamma`` was set to a user-defined
428 |             function, make sure it's pickable. Especially, anonymous
429 |             functions (:keyword:`lambda`) can't be pickled.
430 |         
431 |         """
432 |         child = self.child
433 |         self.child = None
434 |         
435 |         f = open(pth, 'w')
436 |         pickle.dump(self, f)
437 |         f.close()
438 |         
439 |         self.child = child
440 |     
441 |     @staticmethod
442 |     def load(pth):
443 |         """Load an instance from a file ``pth``.
444 |         """
445 |         f = open(pth, 'r')
446 |         cls = pickle.load(f)
447 |         cls.new_episode()
448 |         return cls
449 |     
450 |     def set_alpha(self, alpha):
451 |         """Define a value for ``alpha``. May be either a constant or
452 |         a function of the time.
453 |         """
454 |         if callable(alpha):
455 |             self.alpha = alpha
456 |         else:
457 |             self.alpha = _ConstParam(alpha)
458 |     
459 |     def set_gamma(self, gamma):
460 |         """Define a value for ``gamma``. May be either a constant or
461 |         a function of the time.
462 |         """
463 |         if callable(gamma):
464 |             self.gamma = gamma
465 |         else:
466 |             self.gamma = _ConstParam(gamma)
467 |     
468 |     def set_momentum(self, momentum):
469 |         """Define a value for ``momentum``. May be either a constant or
470 |         a function of the time.
471 |         """
472 |         if callable(momentum):
473 |             self.momentum = momentum
474 |         else:
475 |             self.momentum = ConstMomentum(momentum)
476 |     
477 |     def set_normalization(self, norm):
478 |         """Set the normalization instance to ``norm``. The normalization
479 |         is propagated to the plant and policy."""
480 |         if norm is None:
481 |             norm = PuPy.Normalization()
482 |         self.normalizer = norm
483 |         self.plant.set_normalization(norm)
484 |         self.child.set_normalization(norm) # for the policy.
485 | 
486 | 


--------------------------------------------------------------------------------
/HDPy/puppy/puppy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Puppy experiments are executed within the [Webots]_ simulator. Since
  3 | this module is linked to :py:mod:`PuPy` through the class
  4 | :py:class:`ActorCritic`, this is the native approach. For the purpose of
  5 | Puppy, an adapted Actor-Critic is implemented in :py:class:`PuppyHDP`,
  6 | handling Puppy specifics. It inherits from :py:class:`ADHDP`,
  7 | hence can be used in the same fashion.
  8 | 
  9 | Simulation with [Webots]_ is often time consuming. Therefore, a method
 10 | is provided to collect data in the simulation and replay it later. This
 11 | is implemented through :py:class:`OfflineCollector` and
 12 | :py:func:`puppy.offline_playback`. An example of how to approach this
 13 | is documented in :ref:`puppy_offline`.
 14 | 
 15 | """
 16 | from ..hdp import ADHDP
 17 | from ..rl import Plant
 18 | import numpy as np
 19 | import warnings
 20 | import h5py
 21 | import HDPy
 22 | 
 23 | SENSOR_NAMES = ['trg0', 'trg1', 'trg2', 'trg3', 'accelerometer_x', 'accelerometer_y', 'accelerometer_z', 'compass_x', 'compass_y', 'compass_z', 'gyro_x', 'gyro_y', 'gyro_z', 'hip0', 'hip1', 'hip2', 'hip3', 'knee0', 'knee1', 'knee2', 'knee3', 'puppyGPS_x', 'puppyGPS_y', 'puppyGPS_z', 'touch0', 'touch0', 'touch1', 'touch2', 'touch3']
 24 | 
 25 | class PuppyHDP(ADHDP):
 26 |     """ADHDP subtype for simulations using Puppy in webots.
 27 |     
 28 |     This class adds some code considering restarts of Puppy. It adds
 29 |     an optional argument ``tumbled_reward``. The reward will be forced
 30 |     to this value after the supervisor detected tumbling. If
 31 |     :py:const:`None` (the default) is used, the reward remains
 32 |     unchanged.
 33 |     
 34 |     """
 35 |     def __init__(self, *args, **kwargs):
 36 |         self._tumbled_reward = kwargs.pop('tumbled_reward', None)
 37 |         self.has_tumbled = False
 38 |         self.supervisor_tumbled_notice = 0
 39 |         super(PuppyHDP, self).__init__(*args, **kwargs)
 40 |     
 41 |     def _signal(self, msg, **kwargs):
 42 |         """Handle messages from the supervisor. Messages are expected
 43 |         when the robot has tumbled and thus the robot has to be reset.
 44 |         """
 45 |         super(PuppyHDP, self)._signal(msg, **kwargs)
 46 |         # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled'
 47 |         # for msg==reset, the robot is reset immediately
 48 |         # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot
 49 |         if msg == 'tumbled':
 50 |             #print "Tumbling received", self.num_step
 51 |             self.supervisor_tumbled_notice = 1
 52 |         
 53 |         if msg == 'reset':
 54 |             #print "Reset received", self.num_step
 55 |             self.child.reset()
 56 |             self.new_episode()
 57 |     
 58 |     # DEPRICATED: use of event_handler replaced by RobotActor's signal chain.
 59 |     #def event_handler(self, robot, epoch, current_time, msg):
 60 |     #    """Handle messages from the supervisor. Messages are expected
 61 |     #    when the robot has tumbled and thus the robot has to be reset.
 62 |     #    """
 63 |     #    # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled'
 64 |     #    # for msg==reset, the robot is reset immediately
 65 |     #    # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot
 66 |     #    if msg == 'tumbled_grace_start':
 67 |     #        #print "Tumbling received", self.num_step
 68 |     #        self.supervisor_tumbled_notice = 1
 69 |     #    
 70 |     #    if msg == 'reset':
 71 |     #        #print "Reset received", self.num_step
 72 |     #        self.child.reset()
 73 |     #        self.new_episode()
 74 |     #        self.signal('new_episode')
 75 |     
 76 |     def new_episode(self):
 77 |         """After restarting, reset the tumbled values and start the
 78 |         new episode.
 79 |         """
 80 |         super(PuppyHDP, self).new_episode()
 81 |         self.has_tumbled = False
 82 |         self.supervisor_tumbled_notice = 0
 83 |     
 84 |     def _step(self, s_curr, epoch, a_curr, reward):
 85 |         """Ensure the tumbled reward and initiate behaviour between
 86 |         restarts. The step of the parent is then invoked.
 87 |         """
 88 |         if self.has_tumbled:
 89 |             epoch['a_next'] = np.zeros(shape=a_curr.shape[::-1])
 90 |             return epoch
 91 |         
 92 |         if self.supervisor_tumbled_notice > 0:
 93 |             if self.supervisor_tumbled_notice > 1:
 94 |                 if self._tumbled_reward is not None:
 95 |                     reward = np.atleast_2d([self._tumbled_reward])
 96 |                 
 97 |                 #reward /= (1.0 - self.gamma(self.num_episode, self.num_step))
 98 |                 # geometric series to incorporate future rewards
 99 |                 # note that with this, its err = r/(1-gamma) - J * (1-gamma)
100 |                 # but should be err = r/(1-gamma) - J
101 |                 # thus, there's an difference of J*gamma
102 |                 # is solved this by temporarily set gamma = 0.0
103 |                 self.has_tumbled = True
104 |                 #old_gamma = self.gamma
105 |                 #self.set_gamma(0.0)
106 |             self.supervisor_tumbled_notice += 1
107 |         
108 |         #reward += np.random.normal(scale=0.001)
109 |         epoch = super(PuppyHDP, self)._step(s_curr, epoch, a_curr, reward)
110 |         
111 |         #if self.supervisor_tumbled_notice > 2:
112 |         #    self.gamma = old_gamma
113 |         
114 |         #print self.num_step, reward, a_curr.T, a_next.T, epoch['puppyGPS_x'][-1]
115 |         return epoch
116 |     
117 |     def init_episode(self, epoch, time_start_ms, time_end_ms, step_size_ms):
118 |         """Initial behaviour (after reset)
119 |         
120 |         .. note::
121 |             Assuming identical initial trajectories, the initial state
122 |             is the same - and thus doesn't matter.
123 |             Non-identical initial trajectories will result in
124 |             non-identical behaviour, therefore the initial state should
125 |             be different (initial state w.r.t. start of learning).
126 |             Due to this, the critic is already updated in the initial
127 |             trajectory.
128 |         """
129 |         if self.num_step > 2:
130 | #            in_state = self.plant.state_input(self.s_curr)
131 | #            a_curr_nrm = self.normalizer.normalize_value('a_curr', self.a_curr)
132 | #            i_curr = np.vstack((in_state, a_curr_nrm)).T
133 | #            x_curr = self.reservoir(i_curr, simulate=False)
134 | #            x_curr = np.hstack((x_curr, i_curr)) # FIXME: Input/Output ESN Model
135 |             i_curr, x_curr, _ = self._critic_eval(self.s_curr, self.a_curr, False, 'a_curr')
136 |             epoch['x_curr'] = x_curr
137 |             epoch['i_curr'] = i_curr
138 |             epoch['a_next'] = self.a_curr.T
139 |             epoch['a_curr'] = self.a_curr.T
140 |         
141 |         self.s_curr = epoch
142 |         return self.child(epoch, time_start_ms, time_end_ms, step_size_ms)
143 | 
144 | class OfflineCollector(ADHDP):
145 |     """Collect sensor data for Puppy in webots, such that it can be
146 |     reused later to train a critic offline.
147 |     
148 |     Note that in contrast to :py:class:`ADHDP`, some
149 |     structures are not required (reservoir, plant). They will be set
150 |     to stubs, hence don't need to be passed. 
151 |     
152 |     Some extra metadata is stored in the datafile, which allows
153 |     processing of the experiment in an offline fashion through the
154 |     function :py:func:`puppy.offline_playback`.
155 |     
156 |     """
157 |     def __init__(self, *args, **kwargs):
158 |         # look for policy's member 'action_space_dim' (policy is hidden in child or sub-child)
159 |         policy = kwargs['policy']
160 |         if hasattr(policy, 'action_space_dim'):
161 |             action_space_dim = policy.action_space_dim
162 |         else:
163 |             from ..hdp import return_none
164 |             action_space_dim = return_none
165 |         
166 |         class Phony:
167 |             """Stub for a reservoir."""
168 |             reset_states = False
169 |             def get_input_dim(self):
170 |                 """Return input dimension (action space dim.)"""
171 |                 return action_space_dim()
172 |             def reset(self):
173 |                 """Reset to the initial state (no effect)"""
174 |                 pass
175 |         
176 |         kwargs['plant'] = Plant(state_space_dim=0)
177 |         kwargs['reservoir'] = Phony()
178 |         kwargs['readout'] = None
179 |         self.supervisor_tumbled_notice = 0
180 |         super(OfflineCollector, self).__init__(*args, **kwargs)
181 |     
182 |     def new_episode(self):
183 |         """After restarting, reset the tumbled values and start the
184 |         new episode.
185 |         """
186 |         super(OfflineCollector, self).new_episode()
187 |         self.supervisor_tumbled_notice = 0
188 |     
189 |     def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms):
190 |         """Store the sensor measurements of an epoch in the datafile
191 |         as well as relevant metadata. The robot detects if the
192 |         simulation was reverted and if it has tumbled (through the
193 |         supervisor message). Other guards are not considered, as none
194 |         are covered by :py:class:`PuppyHDP`.
195 |         
196 |         """
197 |         #print "(call)", time_start_ms, self.a_curr.T, ('puppyGPS_x' in epoch and epoch['puppyGPS_x'][-1] or 'NOX')
198 |         if len(epoch) == 0:
199 |             # TODO: epoch length will never be 0 I think(?) Use _get_initial_target() for this purpose.  
200 |             # the very first initial epoch of the first episode
201 |             # this case occurs when the simulation starts or after it is reverted
202 |             self.num_step += 1
203 |             #self._pre_increment_hook(dict(), empty_initial_step=np.array([1]))
204 |             self._pre_increment_hook(dict(), init_step=np.array([self.num_step]))
205 |             return self.child.get_iterator(time_start_ms, time_end_ms, step_size_ms)
206 |         
207 |         # Determine next action
208 |         if self.num_step <= self._init_steps:
209 |             # Init
210 |             a_next = self.a_curr
211 |         elif self.supervisor_tumbled_notice > 2:
212 |             # Tumbled, prepare for reset
213 |             a_next = np.zeros(shape=self.a_curr.shape)
214 |             self.supervisor_tumbled_notice += 1
215 |         elif self.supervisor_tumbled_notice > 0:
216 |             # Tumbled, still walking
217 |             a_next = self._next_action_hook(self.a_curr)
218 |             self.supervisor_tumbled_notice += 1
219 |         else:
220 |             # Normal walking
221 |             a_next = self._next_action_hook(self.a_curr)
222 |         
223 | #         if self.num_step <= self._init_steps:
224 | #             print "(init)", a_next.T
225 | #         elif self.supervisor_tumbled_notice > 2:
226 | #             print time_start_ms, self.a_curr.T, self.num_step
227 | #         else:
228 | #             print time_start_ms, self.a_curr.T, epoch['puppyGPS_x'][-1]
229 |         
230 |         epoch['a_curr'] = self.a_curr.T
231 |         epoch['a_next'] = a_next.T
232 |         
233 |         self.a_curr = a_next
234 |         self.num_step += 1
235 |         
236 |         #print "(call-end)", self.num_step, a_next.T, a_next.shape, self.a_curr.shape
237 |         return self.child(epoch, time_start_ms, time_end_ms, step_size_ms)
238 |     
239 |     def _signal(self, msg, **kwargs):
240 |         """Handle messages from the supervisor. Messages are expected
241 |         when the robot has tumbled and thus the robot has to be reset.
242 |         """
243 |         super(OfflineCollector, self)._signal(msg, **kwargs)
244 |         # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled'
245 |         # for msg==reset, the robot is reset immediately
246 |         # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot
247 |         if msg == 'tumbled_grace_start':
248 |             #print "Tumbling received", self.num_step
249 |             self.supervisor_tumbled_notice = 1
250 |             self._pre_increment_hook(dict(), tumbled=np.array([self.num_step]))
251 |         
252 |         if msg == 'reset':
253 |             #print "Reset received", self.num_step
254 |             self.child.reset()
255 |             self.new_episode()
256 |     
257 |     # DEPRICATED: use of event_handler replaced by RobotActor's signal chain.
258 |     #def event_handler(self, robot, epoch, current_time, msg):
259 |     #    """Handle messages from the supervisor. Messages are expected
260 |     #    when the robot has tumbled and thus the robot has to be reset.
261 |     #    """
262 |     #    # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled'
263 |     #    # for msg==reset, the robot is reset immediately
264 |     #    # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot
265 |     #    if msg == 'tumbled_grace_start':
266 |     #        #print "Tumbling received", self.num_step
267 |     #        self.supervisor_tumbled_notice = 1
268 |     #        self._pre_increment_hook(dict(), tumbled=np.array([self.num_step]))
269 |     #    
270 |     #    if msg == 'reset':
271 |     #        #print "Reset received", self.num_step
272 |     #        self.child.reset()
273 |     #        self.new_episode()
274 |     #        self.signal('new_episode')
275 |     
276 |     def _next_action_hook(self, a_next):
277 |         """Defines the action sampling policy of the offline data
278 |         gathering. Note that this policy is very relevant to later
279 |         experiments, hence this methods should be overloaded (although
280 |         a default policy is provided).
281 |         """
282 |         warnings.warn('Default sampling policy is used.')
283 |         a_next = np.zeros(self.a_curr.shape)
284 |         # Prohibit too small or large amplitudes
285 |         while (a_next < 0.2).any() or (a_next > 2.0).any() or ((a_next > 1.0).any() and a_next.ptp() > 0.4):
286 |             a_next = self.a_curr + np.random.normal(0.0, 0.15, size=self.a_curr.shape)
287 |         
288 |         return a_next
289 | 
290 | def offline_playback(pth_data, critic, samples_per_action, ms_per_step, episode_start=None, episode_end=None, min_episode_len=0, err_coefficient=0.01, episode_start_test=None):
291 |     """Simulate an experiment run for the critic by using offline data.
292 |     The data has to be collected in webots, using the respective
293 |     robot and supervisor. Note that the behaviour of the simulation
294 |     should match what's expected by the critic. The critic is fed the
295 |     sensor data, in order. Of course, it can't react to it since
296 |     the next action is predefined.
297 |     
298 |     Additional to the sensor fields, the 'tumbling' dataset is expected
299 |     which indicates, if and when the robot has tumbled. It is used such
300 |     that the respective signals can be sent to the critic.
301 |     
302 |     The critic won't store any sensory data again.
303 |     
304 |     ``pth_data``
305 |         Path to the datafile with the sensory information (HDF5).
306 |     
307 |     ``critic``
308 |         PuppyHDP instance.
309 |     
310 |     ``samples_per_action``
311 |         Number of samples per control step. Must correspond to the data.
312 |     
313 |     ``ms_per_step``
314 |         Sensor sampling period.
315 |     
316 |     ``episode_start``
317 |         Defines a lower limit on the episode number. Passed as int,
318 |         is with respect to the episode index, not its identifier.
319 |     
320 |     ``episode_stop``
321 |         Defines an upper limit on the episode number. Passed as int,
322 |         is with respect to the episode index, not its identifier.
323 |     
324 |     ``min_episode_len``
325 |         Only pick episodes longer than this threshold.
326 |     
327 |     ``err_coefficient``
328 |         coefficient for the TD-error exponential moving average (EMA)
329 |     
330 |     ``episode_start_test``
331 |         starting point for the test, i.e. when we start accounting the TD-error.
332 |         
333 |     :returns: accumulated TD-error average
334 |     
335 |     """
336 |     # Open data file, get valid experiments
337 |     f = h5py.File(pth_data,'r')
338 |     storages = map(str, sorted(map(int, f.keys())))
339 |     storages = filter(lambda s: len(f[s]) > 0, storages)
340 |     if min_episode_len > 0:
341 |         storages = filter(lambda s: f[s]['a_curr'].shape[0] > min_episode_len, storages)
342 |     
343 |     if episode_end is not None:
344 |         storages = storages[:episode_end]
345 |     
346 |     if episode_start is not None:
347 |         storages = storages[episode_start:]
348 |     
349 |     assert len(storages) > 0
350 |     
351 |     if episode_start_test is None:
352 |         episode_start_test = len(storages)/2 - 1; #use last half for testing 
353 |     
354 |     # Prepare critic; redirect hooks to avoid storing epoch data twice
355 |     # and feed the actions
356 |     global accError 
357 |     accError = 0 # accumulated error
358 |     next_action = None
359 |     episode = None
360 |     critic._pre_increment_hook_orig = critic._pre_increment_hook
361 |     critic._next_action_hook_orig = critic._next_action_hook
362 |     
363 |     def pre_increment_hook(epoch, **kwargs):
364 |         kwargs['offline_episode'] = np.array([episode])
365 |         critic._pre_increment_hook_orig(dict(), **kwargs)
366 |         if int(episode) > episode_start_test and kwargs.has_key('err'):
367 |             global accError
368 |             accError = accError*(1-err_coefficient) + (kwargs['err'][0][0]**2)*err_coefficient # accumulated squared error
369 |             #accError = accError*(1-err_coefficient) + np.abs(kwargs['err'][0][0])*err_coefficient # accumulated absolute error
370 |     
371 |     def next_action_hook(a_next):
372 |         #print "(next)", a_next.T, next_action.T
373 |         return next_action
374 |     
375 |     critic._next_action_hook = next_action_hook
376 |     critic._pre_increment_hook = pre_increment_hook
377 |     
378 |     # Main loop, feed data to the critic
379 |     time_step_ms = ms_per_step * samples_per_action
380 |     time_start_ms = 0
381 |     for episode_idx, episode in enumerate(storages):
382 |         print episode_idx
383 |         
384 |         data_grp = f[episode]
385 |         N = len(data_grp['trg0'])
386 |         
387 |         # get the stored ratio
388 |         #db_samples_per_action = N / len(data_grp['a_next'])
389 |         #assert N % db_samples_per_action == 0
390 |         assert N % samples_per_action == 0
391 |         
392 |         # get tumbled infos
393 |         if 'tumble' in data_grp:
394 |             from pylab import find 
395 |             time_tumbled = find(data_grp['tumble'])[0] / samples_per_action * samples_per_action
396 |             #time_tumbled = data_grp['tumbled'][0] * db_samples_per_action
397 |             #time_tumbled = data_grp['tumble'][0] * samples_per_action
398 |         else:
399 |             time_tumbled = -1
400 |         
401 |         # initial, empty call
402 |         if 'init_step' in data_grp:
403 |             print "Simulation was started/reverted"
404 |             time_start_ms = 0
405 |             critic(dict(), time_start_ms, time_start_ms + samples_per_action, ms_per_step)
406 |             time_tumbled -= samples_per_action
407 |         
408 |         # initial action
409 |         critic.a_curr = np.atleast_2d(data_grp['a_curr'][0]).T
410 | 
411 |         # loop through data, incrementally feed the critic
412 |         for num_iter in np.arange(0, N, samples_per_action):
413 |             # next action
414 |             next_action = np.atleast_2d(data_grp['a_next'][num_iter/db_samples_per_action]).T
415 |             
416 |             # get data
417 |             time_start_ms += time_step_ms
418 |             time_end_ms = time_start_ms + time_step_ms
419 |             chunk = dict([(k, data_grp[k][num_iter:(num_iter+samples_per_action)]) for k in SENSOR_NAMES])
420 |             
421 |             # send tumbled message
422 |             if num_iter == time_tumbled:
423 |                 #critic.event_handler(None, dict(), time_tumbled, 'tumbled_grace_start')
424 |                 critic.signal('tumbled_grace_start')
425 |             
426 |             # update critic
427 |             critic(chunk, time_start_ms, time_end_ms, time_step_ms)
428 |         
429 |         # send reset after episode has finished
430 |         if episode_idx < len(storages) - 1:
431 |             #critic.event_handler(None, dict(), ms_per_step * N, 'reset')
432 |             critic.signal('reset')
433 |             critic.signal('new_episode') # collectors will create new group
434 |     
435 |     # cleanup
436 |     critic._pre_increment_hook = critic._pre_increment_hook_orig
437 |     critic._next_action_hook = critic._next_action_hook_orig
438 |     del critic._pre_increment_hook_orig
439 |     del critic._next_action_hook_orig
440 |     
441 |     return accError
442 | 
443 | 
444 | ## DEPRECATED ##
445 | 
446 | def puppy_offline_playback(*args, **kwargs):
447 |     """Alias of offline_playback.
448 |     
449 |     .. deprecated:: 1.0
450 |         Use :py:func:`offline_playback` instead
451 |         
452 |     """
453 |     warnings.warn("This function is deprecated. Use 'offline_playback' instead")
454 |     return offline_playback(*args, **kwargs)
455 | 


--------------------------------------------------------------------------------
/HDPy/puppy/analysis_puppy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | For puppy experiment analysis, snapshot functions are implemented in a
  3 | similar fashion as for the :ref:`ePuck robot <epuck>`. However, for
  4 | Puppy, the action is assumed to be two dimensional. The action snapshot
  5 | is hence an image (2d plot). Through :py:func:`puppy_plot_action`, the
  6 | figure is plotted at a specific state (identified by the epoch index
  7 | of a recorded episode). Furthermore, the overall trajectory and the
  8 | location of the inspected states can be plotted through 
  9 | :py:func:`puppy_plot_inspected_trajectory`.
 10 | This method can either be used at some isolated states (with the
 11 | mentioned methods) or in a video-like fashion. For the latter case,
 12 | :py:class:`PuppyActionVideo` implements the necessary routines.
 13 | 
 14 | The environment plotting can be managed through the functions
 15 | :py:func:`puppy_plot_linetarget`, :py:func:`puppy_plot_locationtarget`
 16 | and :py:func:`puppy_plot_landmarks`, dependent on the training target
 17 | (as defined in :ref:`plants_puppy`). For plotting the robot's trajectory
 18 | the functions :py:func:`puppy_plot_trajectory` and
 19 | :py:func:`puppy_plot_all_trajectories` can be used.
 20 | 
 21 | """
 22 | import pylab
 23 | import numpy as np
 24 | import itertools
 25 | import warnings
 26 | from puppy import SENSOR_NAMES
 27 | 
 28 | def plot_trajectory(analysis, axis, episode, step_width=1, offset=0, legend=True, **kwargs):
 29 |     """Plot the trajectory of an episode
 30 |     """
 31 |     gps_x = analysis[episode]['puppyGPS_x'][offset+step_width-1::step_width]
 32 |     gps_y = analysis[episode]['puppyGPS_y'][offset+step_width-1::step_width]
 33 |     if step_width > 1:
 34 |         gps_x = np.concatenate(([analysis[episode]['puppyGPS_x'][offset]], gps_x))
 35 |         gps_y = np.concatenate(([analysis[episode]['puppyGPS_y'][offset]], gps_y))
 36 | 
 37 |     col = kwargs.pop('color', 'k')
 38 |     label = kwargs.pop('label', 'Trajectory')
 39 |     axis.plot(gps_x, gps_y, color=col, label=label, linewidth=3, **kwargs)
 40 |     axis.axis('equal')
 41 |     if legend:
 42 |         axis.plot(gps_x[0], gps_y[0], 'ks', label='Start')
 43 |         axis.plot(gps_x[-1], gps_y[-1], 'kv', label='End')
 44 |     
 45 |     return axis
 46 | 
 47 | def plot_all_trajectories(analysis, axis, step_width=1, **kwargs):
 48 |     """Plot all trajectories in ``analysis`` into ``axis``.
 49 |     """
 50 |     gps_x = analysis.get_data('puppyGPS_x')
 51 |     gps_y = analysis.get_data('puppyGPS_y')
 52 |     
 53 |     N = len(gps_x)-1
 54 |     kwargs.pop('color', None) # remove color argument
 55 |     for idx, (x, y) in enumerate(zip(gps_x, gps_y)):
 56 |         col = 0.75 - (0.75 * (idx - 1))/N
 57 |         
 58 |         x_plot = np.concatenate(([x[0]], x[step_width-1::step_width]))
 59 |         y_plot = np.concatenate(([y[0]], y[step_width-1::step_width]))
 60 |         
 61 |         axis.plot(x_plot, y_plot, color=str(col), **kwargs)
 62 |     
 63 |     return axis
 64 | 
 65 | def plot_linetarget(axis, origin=(2.0, 0.0), direction=(1.0, 1.0), range_=(-5.0, 5.0)):
 66 |     """Plot a line given by ``origin`` and ``direction``. The ``range_``
 67 |     may be supplid, which corresponds to the length of the line (from
 68 |     the origin).
 69 |     """
 70 |     origin = np.array(origin)
 71 |     dir_ = np.array(direction)
 72 |     dir_ /= np.linalg.norm(dir_)
 73 |     line = [origin + t * dir_ for t in range_]
 74 |     line_x, line_y = zip(*line)
 75 |     axis.plot(line_x, line_y, 'k', label='Target')
 76 |     return axis
 77 | 
 78 | def plot_locationtarget(axis, target=(4.0, 4.0), distance=0.5, **kwargs):
 79 |     """Plot the ``target`` location with a sphere of radius ``distance``
 80 |     into ``axis`` to mark the target location. ``kwargs`` will be passed
 81 |     to all :py:mod:`pylab` calls."""
 82 |     linewidth = kwargs.pop('linewidth', 2)
 83 |     color = kwargs.pop('facecolor', 'k')
 84 |     fill = kwargs.pop('fill', False)
 85 |     lbl = kwargs.pop('label', 'Target')
 86 |     axis.plot([target[0]], [target[1]], 'kD', label=lbl, **kwargs)
 87 |     if distance > 0.0:
 88 |         trg_field = pylab.Circle(target, distance, fill=fill, facecolor=color, linewidth=linewidth, label=lbl, **kwargs)
 89 |         axis.add_artist(trg_field)
 90 | 
 91 |     return axis
 92 | 
 93 | def plot_landmarks(axis, landmarks, **kwargs):
 94 |     """Plot markers at ``landmark`` locations in ``axis``."""
 95 |     color = kwargs.pop('color', 'k')
 96 |     lbl = kwargs.pop('label', '')
 97 |     marker = kwargs.pop('marker','^')
 98 |     for x, y in landmarks:
 99 |         axis.plot([x], [y], marker=marker, color=color, label=lbl, **kwargs)
100 |     return axis
101 | 
102 | def _action_eval(grp, reservoir, critic, trg_epoch, obs_offset, step_width, actions_range_x, actions_range_y):
103 |     """Evaluate a set of two-dimensional actions [``action_range_x``,
104 |     ``actions_range_y``] at a specific state ``trg_epoch`` and return
105 |     the matrix of predicted returns.
106 |     
107 |     ``grp``
108 |         Observed data of the underlying experiment. Usually a
109 |         :py:class:`H5CombinedGroup` or [HDF5]_ group (e.g. through
110 |         :py:class:`Analysis`).
111 |     
112 |     ``critic``
113 |         :py:func:`critic` instance to be used for evaluation for a
114 |         certain critic input (action and state).
115 |     
116 |     ``reservoir``
117 |         Reservoir to be used. Note that this must be the same instance
118 |         as used in ``critic``.
119 |     
120 |     ``obs_offset``
121 |         Offset between robot observations (e.g. GPS) and reinforcement
122 |         learning data (i.e. actions). For offline data, the offset is
123 |         one epoch (i.e. ``step_width``), for online data, it is zero.
124 |     
125 |     ``step_width``
126 |         Number of observations per epoch. In terms of :py:mod:`PuPy`,
127 |         this is the control period over the sensor polling period.
128 |     
129 |     
130 |     """
131 |     reservoir.reset()
132 |     reservoir.states = np.atleast_2d(grp['x_curr'][trg_epoch-1, :reservoir.get_output_dim()])
133 |     
134 |     # evaluate actions
135 |     # Note: epoch is one step ahead (of a_curr, same time as a_next)!
136 |     # Note: sensor values are shifted w.r.t a_curr by obs_offset
137 |     s_curr = dict([(sensor, grp[sensor][obs_offset+step_width*(trg_epoch-1):obs_offset+trg_epoch*step_width]) for sensor in SENSOR_NAMES])
138 |     a_ret = np.zeros((len(actions_range_x), len(actions_range_y)))
139 |     
140 |     actions_iter = itertools.product(range(len(actions_range_x)), range(len(actions_range_y)))
141 |     for idx_x, idx_y in actions_iter:
142 |         action_candidate = np.atleast_2d((actions_range_x[idx_x], actions_range_y[idx_y])).T
143 |         j_curr = critic(s_curr, action_candidate, simulate=True)
144 |         a_ret[idx_x, idx_y] = j_curr[0, 0]
145 |         #print actions_range_x[idx_x], actions_range_y[idx_y], j_curr[0, 0]
146 |     
147 |     return a_ret
148 | 
149 | def plot_action(analysis, episode, critic, reservoir, inspect_epochs, actions_range_x, actions_range_y, step_width, obs_offset, epoch_actions=None):
150 |     """Along a trajectory ``episode`` of a conducted experiment given
151 |     by ``analysis``, plot the predicted return over a 2D-action at some
152 |     fixed states. For each of the states (given by ``inspect_epochs``),
153 |     a figure is created including the return prediction as an image
154 |     (i.e. 2D).
155 |     
156 |     ``analysis``
157 |         :py:class:`Analysis` instance containing the experimental data.
158 |     
159 |     ``episode``
160 |         Episode which is analysed.
161 |     
162 |     ``critic``
163 |         :py:func:`critic` instance to be used for evaluation for a
164 |         certain critic input (action and state).
165 |     
166 |     ``reservoir``
167 |         Reservoir to be used. Note that this must be the same instance
168 |         as used in ``critic``.
169 |     
170 |     ``inspect_epochs``
171 |         Epochs numbers for which the predicted actions should be
172 |         plotted.
173 |     
174 |     ``actions_range_x``
175 |         Action range in the first dimension. The return is predicted
176 |         for any combination of ``actions_range_x`` and
177 |         ``actions_range_y``.
178 |     
179 |     ``actions_range_y``
180 |         Action range in the second dimension. The return is predicted
181 |         for any combination of ``actions_range_x`` and
182 |         ``actions_range_y``.
183 |     
184 |     ``step_width``
185 |         Number of observations per epoch. In terms of :py:mod:`PuPy`,
186 |         this is the control period over the sensor polling period.
187 |     
188 |     ``obs_offset``
189 |         Offset between robot observations (e.g. GPS) and reinforcement
190 |         learning data (i.e. actions). For offline data, the offset is
191 |         one epoch (i.e. ``step_width``), for online data, it is zero.
192 |     
193 |     ``epoch_actions``
194 |         A list of actually executed actions (as tuple), for each
195 |         inspected epoch. The action is indicated in the plot by a
196 |         marker. The argument or list items may be :py:const:`None`,
197 |         in which case nothing is plotted.
198 |     
199 |     """
200 |     grp = analysis[episode]
201 |     if epoch_actions is None:
202 |         epoch_actions = [None] * len(inspect_epochs)
203 |     
204 |     for trg_epoch, actions in zip(inspect_epochs, epoch_actions):
205 |         
206 |         # simulate the actions
207 |         a_ret = _action_eval(grp, reservoir, critic, trg_epoch, obs_offset, step_width, actions_range_x, actions_range_y)
208 |         
209 |         # plot results
210 |         fig = pylab.figure()
211 |         # In the image, the y-axis is the rows, the x-axis the columns of the matrix
212 |         # Having index (0,0) in the left/bottom corner: origin='lower'
213 |         pylab.plot((0, len(actions_range_x)-1), (0, len(actions_range_y)-1), 'b')
214 |         pylab.imshow(a_ret, origin='lower', cmap=pylab.cm.gray)
215 |         pylab.colorbar()
216 |         pylab.xticks(range(len(actions_range_y)), actions_range_y)
217 |         pylab.yticks(range(len(actions_range_x)), actions_range_x)
218 |         pylab.title('Expected Return per action at epoch ' + str(trg_epoch))
219 |         pylab.xlabel('Amplitude right legs') # cols are idx_y, right legs
220 |         pylab.ylabel('Amplitude left legs') # rows are idx_x, left legs
221 |     
222 |         if actions is not None:
223 |             a_left, a_right = zip(*actions)
224 |             pylab.plot(a_left, a_right, 'r')
225 |             pylab.plot([a_left[0]], [a_right[0]], 'rs')
226 |     
227 |     return fig
228 | 
229 | def plot_inspected_trajectory(analysis, episode_idx, step_width, axis, inspect_epochs, obs_offset):
230 |     """Plot the robot trajectory of the experiment ``episode_idx``
231 |     found in ``analysis`` and a marker at all ``inspect_epochs``. This
232 |     function was created to support :py:func:`puppy_plot_action` by
233 |     giving an overview over the whole path.
234 |     
235 |     ``axis``
236 |         plotting canvas.
237 |     
238 |     ``step_width``
239 |         Number of observations per epoch. In terms of :py:mod:`PuPy`,
240 |         this is the control period over the sensor polling period.
241 |     
242 |     ``obs_offset``
243 |         Offset between robot observations (e.g. GPS) and reinforcement
244 |         learning data (i.e. actions). For offline data, the offset is
245 |         one epoch (i.e. ``step_width``), for online data, it is zero.
246 |     
247 |     """
248 |     puppy_plot_trajectory(analysis, axis, episode_idx, step_width, color='b', offset=obs_offset)
249 |     trg_x = [analysis[episode_idx]['puppyGPS_x'][obs_offset + step_width*trg_epoch+step_width-1] for trg_epoch in inspect_epochs]
250 |     trg_y = [analysis[episode_idx]['puppyGPS_y'][obs_offset + step_width*trg_epoch+step_width-1] for trg_epoch in inspect_epochs]
251 |     axis.plot(trg_x, trg_y, 'k*', label='Inspected states')
252 |     return axis
253 | 
254 | class ActionVideo:
255 |     """Set up a structure such that the predicted return over 2D
256 |     actions can be successively plotted in the same figure.
257 |     
258 |     .. todo::
259 |         The selected action isn't displayed correctly (offset?)
260 |     
261 |     ``data``
262 |         Observed data of the underlying experiment. Usually a
263 |         :py:class:`H5CombinedGroup` or [HDF5]_ group (e.g. through
264 |         :py:class:`Analysis`).
265 |     
266 |     ``critic``
267 |         :py:func:`critic` instance to be used for evaluation for a
268 |         certain critic input (action and state).
269 |     
270 |     ``reservoir``
271 |         Reservoir to be used. Note that this must be the same instance
272 |         as used in ``critic``.
273 |     
274 |     ``actions_range_x``
275 |         Action range in the first dimension. The return is predicted
276 |         for any combination of ``actions_range_x`` and
277 |         ``actions_range_y``.
278 |     
279 |     ``actions_range_y``
280 |         Action range in the second dimension. The return is predicted
281 |         for any combination of ``actions_range_x`` and
282 |         ``actions_range_y``.
283 |     
284 |     ``step_width``
285 |         Number of observations per epoch. In terms of :py:mod:`PuPy`,
286 |         this is the control period over the sensor polling period.
287 |     
288 |     ``obs_offset``
289 |         Offset between robot observations (e.g. GPS) and reinforcement
290 |         learning data (i.e. actions). For offline data, the offset is
291 |         one epoch (i.e. ``step_width``), for online data, it is zero.
292 |     
293 |     ``with_actions``
294 |         Plot markers and lines between them which represent the
295 |         actually selected action.
296 |     
297 |     """
298 |     def __init__(self, data, critic, reservoir, actions_range_x, actions_range_y, step_width, obs_offset, with_actions=True):
299 |         
300 |         # Basic figure
301 |         self.fig = None
302 |         self.title = None
303 |         self.axis = None
304 |         self.axis_image = None
305 |         
306 |         # Actions
307 |         self.with_actions = with_actions
308 |         self.actions_nrm = ((None, None), (None, None))
309 |         self.actions_line = None
310 |         self.actions_marker = None
311 |         
312 |         # Experiment data
313 |         self.data = data
314 |         self.critic = critic
315 |         self.reservoir = reservoir
316 |         self.step_width = step_width
317 |         self.obs_offset = obs_offset
318 |     
319 |     def draw_init(self, fig=None):
320 |         """Set up the initial video figure. A new figure is created
321 |         unless one is provided in ``fig``.
322 |         """
323 |         if fig is None:
324 |             fig = pylab.figure()
325 |             
326 |         # Create the figure
327 |         self.fig = fig
328 |         self.title = self.fig.suptitle('Expected Return per action')
329 |         
330 |         # Configure the axis
331 |         self.axis = self.fig.add_subplot(111)
332 |         self.axis.set_xticks(range(len(self.actions_range_y)))
333 |         self.axis.set_xticklabels(self.actions_range_y)
334 |         self.axis.set_yticks(range(len(self.actions_range_x)))
335 |         self.axis.set_yticklabels(self.actions_range_x)
336 |         self.axis.set_xlabel('Amplitude right legs') # cols are idx_y, right legs
337 |         self.axis.set_ylabel('Amplitude left legs') # rows are idx_x, left legs
338 |         
339 |         # Plot the diagonal
340 |         self.axis.plot((0, len(self.actions_range_x)-1), (0, len(self.actions_range_y)-1), 'b')
341 |         
342 |         # Prepare the image
343 |         img_data = np.zeros((len(actions_range_x), len(actions_range_y)))
344 |         self.axis_image = self.axis.imshow(img_data, origin='lower', cmap=pylab.cm.Greys)
345 |         self.fig.colorbar(self.axis_image)
346 |         
347 |         # action line
348 |         if self.with_actions:
349 |             ox, sx = self.actions_range_x[0], len(self.actions_range_x)-1
350 |             oy, sy = self.actions_range_y[0], len(self.actions_range_y)-1
351 |             self.actions_nrm = ((ox, sx), (oy, sy))
352 |             self.actions_line = self.axis.plot([sx*(0.5-ox), sx*(0.5-ox)], [sy*(0.5-oy), sy*(1.0-oy)], 'r')[0]
353 |             self.actions_marker = self.axis.plot([sx*(0.5-ox)], [sy*(0.5-oy)], 'rs')[0]
354 |         
355 |         return self
356 |     
357 |     def draw_step(self, epoch, actions=None):
358 |         """Update the figure by showing the action plot for ``epoch``.
359 |         If `with_actions` is set, a list of actions to be plotted should
360 |         be present in ``actions``.
361 |         """
362 |         # evaluate the actions
363 |         a_ret = _action_eval(
364 |             self.data,
365 |             self.reservoir,
366 |             self.critic,
367 |             epoch,
368 |             self.obs_offset,
369 |             self.step_width,
370 |             self.actions_range_x,
371 |             self.actions_range_y
372 |         )
373 |         
374 |         # update plot
375 |         self.axis_image.set_data(a_ret)
376 |         self.axis_image.set_clim(vmin=a_ret.min(), vmax=a_ret.max())
377 |         self.axis_image.changed()
378 |         
379 |         # update action line and marker
380 |         if self.with_actions:
381 |             if actions is None:
382 |                 warnings.warn('with_actions set but no actions provided')
383 |             else:
384 |                 ox, sx = self.actions_nrm[0]
385 |                 oy, sy = self.actiosn_nrm[1]
386 |                 actions[:, 0] = (actions[:, 0] - oy) * sy
387 |                 actions[:, 1] = (actions[:, 1] - ox) * sx
388 |                 self.actions_line.set_data((actions[:, 1], actions[:, 0]))
389 |                 self.actions_marker.set_data(([actions[-1, 1]], [actions[-1, 0]]))
390 |         
391 |         return self
392 |     
393 |     def draw_trajectory(self, loc_marker, epoch_idx):
394 |         """Update the marker of the current state in a trajectory plot.
395 |         The current state is read from *data* at ``epoch_idx``, the
396 |         marker plot given in ``loc_marker``.
397 |         """
398 |         loc_x = self.data['puppyGPS_x'][self.obs_offset+self.step_width*epoch_idx+self.step_width-1]
399 |         loc_y = self.data['puppyGPS_y'][self.obs_offset+self.step_width*epoch_idx+self.step_width-1]
400 |         loc_marker.set_data([loc_x], [loc_y])
401 |         return self
402 | 
403 | 
404 | ## DEPRECATED ##
405 | 
406 | def puppy_plot_trajectory(*args, **kwargs):
407 |     """Alias of plot_trajectory.
408 |     
409 |     .. deprecated:: 1.0
410 |         Use :py:func:`plot_trajectory` instead
411 |         
412 |     """
413 |     warnings.warn("This function is deprecated. Use 'plot_trajectory' instead")
414 |     return plot_trajectory(*args, **kwargs)
415 | 
416 | def puppy_plot_all_trajectories(*args, **kwargs):
417 |     """Alias of plot_all_trajectories.
418 |     
419 |     .. deprecated:: 1.0
420 |         Use :py:func:`plot_all_trajectories` instead
421 |         
422 |     """
423 |     warnings.warn("This function is deprecated. Use 'plot_all_trajectories' instead")
424 |     return plot_all_trajectories(*args, **kwargs)
425 | 
426 | def puppy_plot_linetarget(*args, **kwargs):
427 |     """Alias of plot_linetarget.
428 |     
429 |     .. deprecated:: 1.0
430 |         Use :py:func:`plot_linetarget` instead
431 |         
432 |     """
433 |     warnings.warn("This function is deprecated. Use 'plot_linetarget' instead")
434 |     return plot_linetarget(*args, **kwargs)
435 | 
436 | def puppy_plot_locationtarget(*args, **kwargs):
437 |     """Alias of plot_locationtarget.
438 |     
439 |     .. deprecated:: 1.0
440 |         Use :py:func:`plot_locationtarget` instead
441 |         
442 |     """
443 |     warnings.warn("This function is deprecated. Use 'plot_locationtarget' instead")
444 |     return plot_locationtarget(*args, **kwargs)
445 | 
446 | def puppy_plot_landmarks(*args, **kwargs):
447 |     """Alias of plot_landmarks.
448 |     
449 |     .. deprecated:: 1.0
450 |         Use :py:func:`plot_landmarks` instead
451 |         
452 |     """
453 |     warnings.warn("This function is deprecated. Use 'plot_landmarks' instead")
454 |     return plot_landmarks(*args, **kwargs)
455 | 
456 | def puppy_plot_action(*args, **kwargs):
457 |     """Alias of plot_action.
458 |     
459 |     .. deprecated:: 1.0
460 |         Use :py:func:`plot_action` instead
461 |         
462 |     """
463 |     warnings.warn("This function is deprecated. Use 'plot_action' instead")
464 |     return plot_action(*args, **kwargs)
465 | 
466 | def puppy_plot_inspected_trajectory(*args, **kwargs):
467 |     """Alias of plot_inspected_trajectory.
468 |     
469 |     .. deprecated:: 1.0
470 |         Use :py:func:`plot_inspected_trajectory` instead
471 |         
472 |     """
473 |     warnings.warn("This function is deprecated. Use 'plot_inspected_trajectory' instead")
474 |     return plot_inspected_trajectory(*args, **kwargs)
475 | 
476 | def puppy_vid_init(actions_range_x, actions_range_y, with_actions=True):
477 |     """
478 |     
479 |     .. deprecated:: 1.0
480 |         Use :py:class:`PuppyActionVideo` instead
481 |     
482 |     """
483 |     warnings.warn('deprecated, use PuppyActionVideo instead')
484 |     fig = pylab.figure()
485 |     axis = fig.add_subplot(111)
486 |     axis.set_xticks(range(len(actions_range_y)))
487 |     axis.set_xticklabels(actions_range_y)
488 |     axis.set_yticks(range(len(actions_range_x)))
489 |     axis.set_yticklabels(actions_range_x)
490 |     title = fig.suptitle('Expected Return per action')
491 |     axis.set_xlabel('Amplitude right legs') # cols are idx_y, right legs
492 |     axis.set_ylabel('Amplitude left legs') # rows are idx_x, left legs
493 |     axis.plot((0, len(actions_range_x)-1), (0, len(actions_range_y)-1), 'b')
494 |     img_data = np.zeros((len(actions_range_x), len(actions_range_y)))
495 |     axim = axis.imshow(img_data, origin='lower', cmap=pylab.cm.Greys)
496 |     fig.colorbar(axim)
497 |     
498 |     # action line
499 |     if with_actions:
500 |         ox, sx = actions_range_x[0], len(actions_range_x)-1
501 |         oy, sy = actions_range_y[0], len(actions_range_y)-1
502 |         a_line = axis.plot([sx*(0.5-ox), sx*(0.5-ox)], [sy*(0.5-oy), sy*(1.0-oy)], 'r')[0]
503 |         a_marker = axis.plot([sx*(0.5-ox)], [sy*(0.5-oy)], 'rs')[0]
504 |     else:
505 |         ox = sx = oy = sy = a_line = a_marker = None
506 |     
507 |     return fig, axis, axim, title, (a_line, a_marker, (ox, sx), (oy, sy))
508 | 
509 | def puppy_vid_action(image, (a_line, a_marker, px, py), grp, critic, reservoir, epoch, actions_range_x, actions_range_y, step_width, obs_offset, actions=None):
510 |     """
511 |     
512 |     .. deprecated:: 1.0
513 |         Use :py:class:`PuppyActionVideo` instead
514 |         
515 |     """
516 |     warnings.warn('deprecated, use PuppyActionVideo instead')
517 |     a_ret = _action_eval(grp, reservoir, critic, epoch, obs_offset, step_width, actions_range_x, actions_range_y)
518 |     
519 |     # update plot
520 |     image.set_data(a_ret)
521 |     image.set_clim(vmin=a_ret.min(), vmax=a_ret.max())
522 |     image.changed()
523 |     
524 |     if actions is not None:
525 |         actions[:, 0] = (actions[:, 0] - py[0]) * py[1]
526 |         actions[:, 1] = (actions[:, 1] - px[0]) * px[1]
527 |         a_line.set_data((actions[:, 1], actions[:, 0]))
528 |         a_marker.set_data(([actions[-1, 1]], [actions[-1, 0]]))
529 |     
530 |     return image
531 | 
532 | def puppy_vid_inspected_trajectory(grp, step_width, loc_marker, epoch_idx, obs_offset):
533 |     """
534 |     
535 |     .. deprecated:: 1.0
536 |         Use :py:class:`PuppyActionVideo` instead
537 |     
538 |     """
539 |     warnings.warn('deprecated, use PuppyActionVideo instead')
540 |     loc_x = grp['puppyGPS_x'][obs_offset+step_width*epoch_idx+step_width-1]
541 |     loc_y = grp['puppyGPS_y'][obs_offset+step_width*epoch_idx+step_width-1]
542 |     loc_marker.set_data([loc_x], [loc_y])
543 |     return loc_marker
544 | 
545 | 


--------------------------------------------------------------------------------