├── README ├── HDPy ├── epuck │ ├── plant │ │ ├── __init__.py │ │ └── plants.py │ ├── policy │ │ ├── __init__.py │ │ └── policies.py │ ├── __init__.py │ ├── epuck_arena.py │ ├── analysis_epuck.py │ └── epuck.py ├── puppy │ ├── policy │ │ └── __init__.py │ ├── plant │ │ ├── __init__.py │ │ ├── AccelerationReward.py │ │ └── plants.py │ ├── __init__.py │ ├── puppy.py │ └── analysis_puppy.py ├── __init__.py ├── inout.py └── rl.py ├── doc ├── source │ ├── _downloads │ │ └── latest.tar.gz │ ├── todopg.rst │ ├── license.rst │ ├── analysis.rst │ ├── utils.rst │ ├── epuck.rst │ ├── puppy.rst │ ├── rc.rst │ ├── references.rst │ ├── rl.rst │ ├── download.rst │ ├── puppy_online.rst │ ├── index.rst │ ├── pp.rst │ ├── conf.py │ └── puppy_offline.rst ├── make.bat └── Makefile ├── test ├── esn_acd.hdf5 ├── puppy_example_trajectory_supervisor.py ├── puppy_actor_supervisor.py ├── puppy_offline_sampling_supervisor.py ├── puppy_online_supervisor.py ├── analysis.py ├── puppy_example_trajectory_sequence.py ├── puppy_example_trajectory_eval.py ├── puppy_offline_replay.py ├── puppy_offline_sampling_robot.py ├── epuck_online.py ├── puppy_example_trajectory_robot.py ├── acd.py ├── puppy_online_robot.py ├── rc_example.py └── puppy_offline_analysis.py ├── data ├── doc │ ├── rc_example.pdf │ ├── rc_example.png │ ├── epuck_data.hdf5 │ ├── epuck_result.pdf │ ├── epuck_result.png │ └── puppy_offline_result.png ├── puppy_unit.json └── puppy_stat.json ├── .gitignore ├── setup.py └── LICENSE.txt /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /HDPy/epuck/plant/__init__.py: -------------------------------------------------------------------------------- 1 | from plants import * 2 | -------------------------------------------------------------------------------- /doc/source/_downloads/latest.tar.gz: -------------------------------------------------------------------------------- 1 | HDPy-1.0.tar.gz -------------------------------------------------------------------------------- /HDPy/epuck/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from policies import * 2 | -------------------------------------------------------------------------------- /HDPy/puppy/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from policies import * 2 | -------------------------------------------------------------------------------- /test/esn_acd.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/test/esn_acd.hdf5 -------------------------------------------------------------------------------- /data/doc/rc_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/rc_example.pdf -------------------------------------------------------------------------------- /data/doc/rc_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/rc_example.png -------------------------------------------------------------------------------- /data/doc/epuck_data.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_data.hdf5 -------------------------------------------------------------------------------- /data/doc/epuck_result.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_result.pdf -------------------------------------------------------------------------------- /data/doc/epuck_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/epuck_result.png -------------------------------------------------------------------------------- /doc/source/todopg.rst: -------------------------------------------------------------------------------- 1 | 2 | Hic sunt dracones 3 | ================= 4 | 5 | .. todolist:: 6 | 7 | -------------------------------------------------------------------------------- /HDPy/puppy/plant/__init__.py: -------------------------------------------------------------------------------- 1 | from plants import * 2 | from AccelerationReward import AccelerationReward 3 | -------------------------------------------------------------------------------- /data/doc/puppy_offline_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igsor/HDPy/HEAD/data/doc/puppy_offline_result.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.pyo 4 | .*project 5 | doc/build/ 6 | dist/ 7 | MANIFEST 8 | HDPy.pth 9 | 10 | # compiled downloads 11 | doc/source/_downloads/* 12 | doc/source/_downloads/ 13 | -------------------------------------------------------------------------------- /doc/source/license.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _license: 3 | 4 | License 5 | ======= 6 | 7 | This project is released under the terms of the 3-clause BSD License. 8 | 9 | .. literalinclude:: ../../LICENSE.txt 10 | -------------------------------------------------------------------------------- /HDPy/puppy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | .. automodule:: HDPy.puppy.puppy 4 | 5 | .. automodule:: HDPy.puppy.analysis_puppy 6 | 7 | """ 8 | from puppy import * 9 | from analysis_puppy import * 10 | import policy 11 | import plant 12 | -------------------------------------------------------------------------------- /HDPy/epuck/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | .. automodule:: HDPy.epuck.epuck 4 | 5 | .. .. automodule:: HDPy.epuck.epuck_arena 6 | 7 | .. automodule:: HDPy.epuck.analysis_epuck 8 | 9 | """ 10 | from epuck import * 11 | from analysis_epuck import * 12 | import plant 13 | import policy 14 | import epuck_arena as env 15 | -------------------------------------------------------------------------------- /test/puppy_example_trajectory_supervisor.py: -------------------------------------------------------------------------------- 1 | 2 | from controller import Supervisor 3 | import PuPy 4 | 5 | # checks 6 | checks = [ 7 | PuPy.QuitOnDemand(), 8 | PuPy.RevertOnDemand() 9 | ] 10 | 11 | # set up supervisor 12 | s = PuPy.supervisorBuilder(Supervisor, 20, [PuPy.ReceiverCheck(checks)]) 13 | 14 | # run 15 | s.run() 16 | -------------------------------------------------------------------------------- /test/puppy_actor_supervisor.py: -------------------------------------------------------------------------------- 1 | from controller import Supervisor 2 | import PuPy 3 | 4 | # checks 5 | checks = [] 6 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000))) 7 | checks.append(PuPy.RevertMaxIter(3000 * 300)) 8 | 9 | # set up supervisor 10 | s = PuPy.supervisorBuilder(Supervisor, 20, checks) 11 | 12 | # run 13 | s.run() 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='HDPy', 5 | url='http://www.igsor.net/research/HDPy/', 6 | author='Matthias Baumgartner', 7 | author_email='research@igsor.net', 8 | version='1.0', 9 | packages=['HDPy'], 10 | license='Free for use', 11 | long_description=open('README').read(), 12 | requires=("scipy","numpy","mdp","Oger") 13 | ) 14 | -------------------------------------------------------------------------------- /doc/source/analysis.rst: -------------------------------------------------------------------------------- 1 | 2 | Analysis 3 | ======== 4 | 5 | .. module:: HDPy 6 | 7 | Introduction 8 | ------------ 9 | 10 | .. automodule:: HDPy.analysis 11 | 12 | Example 13 | ------- 14 | 15 | 16 | Reference 17 | --------- 18 | 19 | .. autoclass:: Analysis 20 | :members: 21 | 22 | .. autofunction:: overview 23 | 24 | .. autofunction:: node_inspection 25 | 26 | .. autofunction:: critic 27 | 28 | -------------------------------------------------------------------------------- /test/puppy_offline_sampling_supervisor.py: -------------------------------------------------------------------------------- 1 | from controller import Supervisor 2 | import PuPy 3 | 4 | # checks 5 | checks = [] 6 | checks.append(PuPy.RevertOutOfArena(arena_size=(-10, 10, -10, 10), distance=0, grace_time_ms=(3 * 3000))) 7 | # respawn the robot at a random location in a bounded area 8 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000))) 9 | 10 | # set up supervisor 11 | s = PuPy.supervisorBuilder(Supervisor, 20, checks) 12 | 13 | # run 14 | s.run() 15 | -------------------------------------------------------------------------------- /test/puppy_online_supervisor.py: -------------------------------------------------------------------------------- 1 | from controller import Supervisor 2 | import PuPy 3 | 4 | checks = [] 5 | # Revert the simulation if the robot tumbled ... 6 | checks.append(PuPy.RevertTumbled(grace_time_ms=(3 * 3000))) 7 | # ... or went out of a predefined space 8 | checks.append(PuPy.RevertOutOfArena(arena_size=(-10, 10, -10, 10), distance=0, grace_time_ms=(3 * 3000))) 9 | 10 | # set up supervisor 11 | s = PuPy.supervisorBuilder(Supervisor, 20, checks) 12 | 13 | # run 14 | s.run() 15 | -------------------------------------------------------------------------------- /doc/source/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | Utility functions 3 | ================= 4 | 5 | .. contents:: 6 | 7 | .. module:: HDPy 8 | 9 | Introduction 10 | ------------ 11 | 12 | .. automodule:: HDPy.inout 13 | 14 | Reference 15 | --------- 16 | 17 | 18 | .. autofunction:: remove_init_only_groups 19 | 20 | .. autofunction:: h5_reorder 21 | 22 | .. autofunction:: h5_merge_experiments 23 | 24 | .. autofunction:: remove_boundary_groups 25 | 26 | .. autoclass:: H5CombinedFile 27 | 28 | .. autoclass:: H5CombinedGroup 29 | -------------------------------------------------------------------------------- /doc/source/epuck.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _epuck: 3 | 4 | ePuck 5 | ===== 6 | 7 | .. contents:: 8 | 9 | Introduction 10 | ------------ 11 | 12 | .. automodule:: HDPy.epuck 13 | 14 | Example 15 | ------- 16 | 17 | .. literalinclude:: ../../test/epuck_online.py 18 | 19 | .. image:: ../../data/doc/epuck_result.png 20 | 21 | Reference 22 | --------- 23 | 24 | .. module:: HDPy 25 | 26 | .. autoclass:: HDPy.epuck.Robot 27 | :members: read_sensors, take_action, reset, reset_random, plot_trajectory 28 | 29 | .. autoclass:: HDPy.epuck.AbsoluteRobot 30 | :show-inheritance: 31 | 32 | .. autofunction:: HDPy.epuck.simulation_loop 33 | 34 | .. autofunction:: HDPy.epuck.epuck_plot_snapshot 35 | 36 | .. autofunction:: HDPy.epuck.epuck_plot_value_over_action 37 | 38 | .. autofunction:: HDPy.epuck.epuck_plot_all_trajectories 39 | -------------------------------------------------------------------------------- /test/analysis.py: -------------------------------------------------------------------------------- 1 | 2 | import HDPy,pylab 3 | a = HDPy.Analysis('esn_acd.hdf5') 4 | 5 | # Prediction plot 6 | fig = pylab.figure(1) 7 | ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) 8 | a.plot_path_return_prediction('18', ax) 9 | fig.suptitle('Predictor evaluation') 10 | # If all works well, you should see a figure with 5 lines in it, 11 | # according to the Analysis.plot_path_return_prediction documentation 12 | 13 | # Simple plot functions 14 | fig = pylab.figure(2) 15 | a.plot_readout_sum(fig.add_subplot(321)) 16 | a.plot_reward(fig.add_subplot(322)) 17 | a.plot_derivative(fig.add_subplot(323)) 18 | a.plot_actions(fig.add_subplot(324)) 19 | a.plot_error(fig.add_subplot(325)) 20 | a.plot_accumulated_reward(fig.add_subplot(326)) 21 | fig.suptitle('Some characteristics') 22 | # If all works well, you should see 6 subplots with the respective 23 | # curves displayed. 24 | 25 | # Show the plot 26 | pylab.show(block=False) 27 | 28 | print "Check the graphs visually. If they correspond to your expectations, the test was successful." 29 | -------------------------------------------------------------------------------- /doc/source/puppy.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _puppy: 3 | 4 | Puppy 5 | ===== 6 | 7 | .. contents:: 8 | 9 | .. module:: HDPy 10 | 11 | Introduction 12 | ------------ 13 | 14 | .. automodule:: HDPy.puppy 15 | 16 | Example 17 | ------- 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | 22 | puppy_offline 23 | puppy_online 24 | 25 | Reference 26 | --------- 27 | 28 | .. autoclass:: HDPy.puppy.PuppyHDP 29 | :members: new_episode, init_episode, _step, event_handler 30 | :show-inheritance: 31 | 32 | .. autoclass:: HDPy.puppy.OfflineCollector 33 | :members: new_episode, __call__, _next_action_hook, event_handler 34 | :show-inheritance: 35 | 36 | .. autofunction:: HDPy.puppy.offline_playback 37 | 38 | 39 | .. autofunction:: HDPy.puppy.plot_trajectory 40 | 41 | .. autofunction:: HDPy.puppy.plot_all_trajectories 42 | 43 | .. autofunction:: HDPy.puppy.plot_linetarget 44 | 45 | .. autofunction:: HDPy.puppy.plot_locationtarget 46 | 47 | .. autofunction:: HDPy.puppy.plot_landmarks 48 | 49 | .. autofunction:: HDPy.puppy.plot_action 50 | 51 | .. autofunction:: HDPy.puppy.plot_inspected_trajectory 52 | 53 | .. autoclass:: HDPy.puppy.ActionVideo 54 | :members: 55 | -------------------------------------------------------------------------------- /doc/source/rc.rst: -------------------------------------------------------------------------------- 1 | 2 | Reservoir Computing 3 | =================== 4 | 5 | .. contents:: 6 | 7 | Introduction 8 | ------------ 9 | 10 | .. automodule:: HDPy.rc 11 | 12 | Example 13 | ------- 14 | 15 | .. literalinclude:: ../../test/rc_example.py 16 | 17 | >>> Sparse 0.004785 0.744121 18 | >>> Orthogonal 0.004858 0.749770 19 | >>> Ring of Neurons 0.004827 0.747397 20 | 21 | .. image:: ../../data/doc/rc_example.png 22 | 23 | Reference 24 | --------- 25 | 26 | .. module:: HDPy 27 | 28 | .. autoclass:: ReservoirNode 29 | :members: execute, copy, input_dim, output_dim, reset, save, _post_update_hook, __call__ 30 | 31 | .. autoclass:: PlainRLS 32 | :members: train, __call__, save, stop_training, copy 33 | 34 | .. autoclass:: StabilizedRLS 35 | :members: 36 | :show-inheritance: 37 | 38 | 39 | .. autofunction:: sparse_reservoir 40 | .. autofunction:: dense_w_in 41 | .. autofunction:: sparse_w_in 42 | .. autofunction:: dense_w_bias 43 | .. autofunction:: orthogonal_reservoir 44 | .. autofunction:: chain_of_neurons 45 | .. autofunction:: ring_of_neurons 46 | 47 | .. autofunction:: reservoir_memory 48 | .. autofunction:: find_radius_for_mc 49 | 50 | -------------------------------------------------------------------------------- /doc/source/references.rst: -------------------------------------------------------------------------------- 1 | 2 | References 3 | ========== 4 | 5 | .. [FB98] B. Farhang-Boroujeny; Adaptive filters: theory and applications 6 | 7 | .. [TS12] T. Strauss et al; Design strategies for weight matrices of echo state networks. 8 | 9 | .. [ESN-ACD] P. Koprinkova-Hristova et al; Adaptive Critic Design with Echo State Network, 2010 10 | M. Oubbati et al.; Anticipating rewards in continuous time and space with echo state networks and actor-critic design, 2011 11 | M. Oubbati et al; Adaptive Learning in Continuous Environment Using Actor-Critic Design and Echo-State Networks, 2012 12 | P. Koprinkova-Hristova; Heuristic dynamic programming using echo state network as online trainable adaptive critic, 2012 13 | 14 | .. [RL] Sutton, Barto; Reinforcement Learning: An Introduction, 1998 15 | 16 | .. [Oger] http://organic.elis.ugent.be/organic/engine 17 | 18 | .. [MDP] http://mdp-toolkit.sourceforge.net/ 19 | 20 | .. [Webots] http://www.cyberbotics.com/ 21 | 22 | .. [HDF5] http://www.hdfgroup.org/HDF5/ 23 | 24 | .. [matplotlib] http://matplotlib.org/ 25 | 26 | .. [scipy] http://scipy.org/ 27 | 28 | .. [numpy] http://www.numpy.org/ 29 | -------------------------------------------------------------------------------- /doc/source/rl.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _reinforcement-learning: 3 | 4 | Reinforcement Learning 5 | ====================== 6 | 7 | .. contents:: 8 | 9 | Introduction 10 | ------------ 11 | 12 | .. automodule:: HDPy.rl 13 | 14 | .. automodule:: HDPy.hdp 15 | 16 | Reference 17 | --------- 18 | 19 | .. module:: HDPy 20 | 21 | .. autoclass:: Plant 22 | :members: 23 | :noindex: 24 | 25 | .. autoclass:: Policy 26 | :members: 27 | :noindex: 28 | 29 | .. autoclass:: ActorCritic 30 | :members: new_episode, __call__, init_episode, _step, _pre_increment_hook, _next_action_hook, save, load, set_normalization, set_alpha, set_gamma, set_momentum 31 | 32 | .. autoclass:: Momentum 33 | :members: __call__ 34 | 35 | .. autoclass:: ConstMomentum 36 | :show-inheritance: 37 | 38 | .. autoclass:: RadialMomentum 39 | :show-inheritance: 40 | 41 | .. autoclass:: ADHDP 42 | :show-inheritance: 43 | :members: _critic_eval, _critic_deriv, init_episode, _step 44 | 45 | .. autoclass:: ActionGradient 46 | :show-inheritance: 47 | 48 | .. autoclass:: ActionRecomputation 49 | :show-inheritance: 50 | 51 | .. autoclass:: ActionBruteForce 52 | :show-inheritance: 53 | -------------------------------------------------------------------------------- /doc/source/download.rst: -------------------------------------------------------------------------------- 1 | 2 | Download 3 | ======== 4 | 5 | .. contents:: 6 | 7 | Installation 8 | ------------ 9 | 10 | Using `Pip Installs Python (Pip) `_, 11 | simply type:: 12 | 13 | pip install http://www.igsor.net/research/HDPy/_downloads/latest.tar.gz 14 | 15 | if you want to use the package from the webpage. If you have downloaded it yourself, use:: 16 | 17 | pip install path/to/HDPy.tar.gz 18 | 19 | If you're using `distutils `_, type:: 20 | 21 | tar -xzf path/to/HDPy.tgz # extract files. 22 | cd HDPy* # change into HDPy directory. 23 | sudo python setup.py install # install using distutils (as root). 24 | #rm -R . # remove source. If desired, uncomment this line. 25 | #cd .. && rmdir HDPy* # remove working directory. If desired, uncomment this line. 26 | 27 | The project is also available on git, with the package and all supplementary data:: 28 | 29 | git clone https://github.com/igsor/PuPy 30 | 31 | Make sure, [numpy]_ and [scipy]_ are 32 | installed on your system. For plotting, [matplotlib]_ is required. 33 | 34 | - :download:`HDPy-1.0 <_downloads/HDPy-1.0.tar.gz>` (latest) 35 | 36 | - :download:`This documentation (pdf) <_downloads/HDPy-1.0-doc.pdf>` 37 | 38 | License 39 | ------- 40 | 41 | This project is released under the terms of the 3-clause BSD License. See the section 42 | :ref:`license` for details. 43 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Matthias Baumgartner 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the HDPy nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL MATTHIAS BAUMGARTNER BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /test/puppy_example_trajectory_sequence.py: -------------------------------------------------------------------------------- 1 | 2 | import h5py 3 | import numpy as np 4 | 5 | # Number of steps that will always be executed 6 | init_steps = 4 7 | # Sample action execution step increment 8 | step_size = 3 9 | # Target location 10 | sequence_file = '/tmp/example_sequence.hdf5' 11 | 12 | # main trajectory 13 | main_trajectory = [[0.8, 0.8]] * 3 + [[0.8, 0.78]] * 2 + [[0.82, 0.9]] * 10 + [[0.8, 0.9]] * 5 + [[0.82, 0.9]] * 10 14 | main_trajectory = np.array(main_trajectory) 15 | 16 | # example actions, to be executed at some steps of the main trajectory 17 | action_samples = np.array([ 18 | [ 0.4, 0.8], 19 | [ 0.8, 0.4], 20 | [ 0.6, 0.8], 21 | [ 0.8, 0.6], 22 | [ 0.8, 1. ], 23 | [ 1. , 0.8], 24 | [ 0.8, 0.8] 25 | ]) 26 | 27 | # Create the trajectories to be executed. This is the main trajectory 28 | # up to step i, then each example action for three steps 29 | ex_trajectory = [main_trajectory] 30 | for i in range(init_steps, main_trajectory.shape[0] + 1, step_size): 31 | ex_trajectory += [np.vstack((main_trajectory[:i], sample, sample, sample)) for sample in action_samples] 32 | 33 | # Store the example trajectories in a HDF5 file 34 | # The trajectories are stored in seperate datasets (traj_000) and an 35 | # index (idx) is initialized for progress bookkeeping. 36 | f = h5py.File(sequence_file, 'w') 37 | f.create_dataset('idx', data=0) 38 | f.create_dataset('main', data=main_trajectory) 39 | for idx, traj in enumerate(ex_trajectory): 40 | name='traj_%03i' % idx 41 | f.create_dataset(name, data=traj) 42 | 43 | f.close() 44 | 45 | print "Stored", len(ex_trajectory), "sequences" 46 | -------------------------------------------------------------------------------- /data/puppy_unit.json: -------------------------------------------------------------------------------- 1 | {"touch1": [318.79300866155143, 318.79300866155143], "touch0": [416.51093258988385, 416.51093258988385], "touch3": [93.367137677804635, 93.367137677804635], "touch2": [91.339993788601305, 91.339993788601305], "accelerometer_y": [125.82104299169822, 484.73654626435808], "accelerometer_x": [16.481284764750427, 240.26805028986931], "accelerometer_z": [8.6216357004890369, 763.59255751903424], "trg3": [-0.36999999999995326, 1.7939550005402431], "trg2": [-0.36999999999995326, 1.8010496963983], "trg1": [-0.23000000000004661, 1.7939550005403366], "trg0": [-0.22999999999995313, 1.8010496963983], "puppyGPS_y": [3.9754356744413162, 14.010466560779145], "puppyGPS_x": [0.19506757643533312, 10.248893013477851], "puppyGPS_z": [1.2872867042949137, 1.2540506437910039], "hip1": [0.020197128948414633, 1.5946117265067943], "hip0": [1.2896001857700594, 3.2298778091748535], "hip3": [-1.2155965132847109, 2.1905430233360428], "hip2": [-0.97646833307138503, 1.9532006250465155], "knee2": [-0.34111541743978924, 1.57988888529936], "knee3": [0.1725124163642322, 2.1038787759286786], "knee0": [-0.17982543233382797, 1.6488687140115976], "knee1": [0.25999539867561805, 2.1109918545054964], "compass_x": [1.5113113871478845e-08, 0.99999998100110943], "compass_y": [-2.0735495542822946e-09, 0.99999999717288612], "gyro_z": [-1.3809654710283041, 93.3398601954394], "compass_z": [-2.412434418896936e-08, 0.99999992550827077], "gyro_x": [-62.471712345347527, 161.38280248187169], "gyro_y": [-1.5297053400941252, 86.861033891238151], "a_curr" : [1.0023086296125743, 0.80230205374438834], "a_next": [1.0023086296125743, 0.80230205374438834], "landmark_dist": [10.739577828006809, 4.452661157861807]} 2 | -------------------------------------------------------------------------------- /data/puppy_stat.json: -------------------------------------------------------------------------------- 1 | {"touch1": [1.8205343822841078, 6.2456455370456458], "touch0": [1.8077355030158033, 6.2163911930927789], "touch3": [0.78094450507051227, 3.5567929404623104], "touch2": [0.77865152797154391, 3.5566699652357108], "accelerometer_y": [0.0052592965293057339, 4.0331391763913951], "accelerometer_x": [0.31926554829406006, 6.4376879740242048], "accelerometer_z": [7.7485002758765118, 6.4018921917828049], "trg3": [-0.36999999999935246, 0.53618385189057927], "trg2": [-0.36999999999931188, 0.53663473970933218], "trg1": [-0.23000000000761395, 0.53618385189056794], "trg0": [-0.2300000000077049, 0.53663473970932629], "puppyGPS_y": [-0.48713510136415211, 3.4430779971106529], "puppyGPS_x": [-0.51553505401105415, 3.452080563583682], "puppyGPS_z": [0.082007755878077127, 0.015098264420749501], "hip1": [-0.23262800153461199, 0.45348593855348529], "hip0": [-0.2325851976123004, 0.45389519948856594], "hip3": [-0.37030397094335737, 0.45578243275684055], "hip2": [-0.37036884850694912, 0.45629441360468687], "knee2": [0.01585728894698487, 0.31866762188472231], "knee3": [0.016154538793381523, 0.31827911763809236], "knee0": [0.12376281856748228, 0.18315571166606334], "knee1": [0.12348114248564791, 0.18283449297610352], "compass_x": [-0.0032868095280248217, 0.68986740172661298], "compass_y": [-0.0091208433194725647, 0.66571483441303869], "gyro_z": [-0.0019240353085375921, 0.50516156782102606], "compass_z": [6.7525306390130052e-05, 0.28427579886077453], "gyro_x": [-0.0011709593429102395, 0.98989036887649229], "gyro_y": [-0.020663182790115593, 1.9425298200825714], "a_curr": [0.72683929129987579, 0.25596165799046389], "a_next": [0.72683929129987579, 0.25596165799046389], "landmark_dist": [16.963004359285215, 16.961933883080803]} 2 | -------------------------------------------------------------------------------- /test/puppy_example_trajectory_eval.py: -------------------------------------------------------------------------------- 1 | import HDPy 2 | import pickle 3 | import os 4 | import sys 5 | import PuPy 6 | import itertools 7 | 8 | # Load reservoir 9 | f = open('/tmp/puppy_reservoir.pic', 'r') 10 | reservoir = pickle.load(f) 11 | reservoir.reset() 12 | f.close() 13 | 14 | # Load readout 15 | f = open('/tmp/puppy_readout.pic', 'r') 16 | readout = pickle.load(f) 17 | f.close() 18 | 19 | # Critic is evaluated, thus don't train it anymore 20 | readout.stop_training() 21 | 22 | # Create a policy 23 | bound_gait = { 24 | 'amplitude' : ( 0.8, 1.0, 0.8, 1.0), 25 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 26 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 27 | 'phase' : (0.0, 0.0, 0.5, 0.5) 28 | } 29 | 30 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait)) 31 | 32 | # Create a plant 33 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))] 34 | target_loc = (6.0, 4.0) 35 | plant = HDPy.puppy.plant.TargetLocationLandmarks( 36 | target_loc, 37 | landmarks, 38 | reward_noise = 0.0 39 | ) 40 | 41 | # Load the normalization 42 | nrm = PuPy.Normalization('../data/puppy_unit.json') 43 | 44 | # Initialize the collector 45 | collector = PuPy.RobotCollector( 46 | child = policy, 47 | expfile = '/tmp/example_eval.hdf5' 48 | ) 49 | 50 | # Create HDP instance 51 | actor = HDPy.PuppyHDP( 52 | tumbled_reward =0.0, 53 | reservoir = reservoir, 54 | readout = readout, 55 | plant = plant, 56 | policy = collector, 57 | gamma = 0.0, 58 | alpha = 1.0, 59 | init_steps = 10, 60 | norm = nrm 61 | ) 62 | 63 | HDPy.puppy.offline_playback( 64 | '/tmp/example_data.hdf5', 65 | actor, 66 | 150, 67 | 20 68 | ) 69 | 70 | -------------------------------------------------------------------------------- /test/puppy_offline_replay.py: -------------------------------------------------------------------------------- 1 | import HDPy 2 | import PuPy 3 | import numpy as np 4 | import itertools 5 | 6 | # Create a policy 7 | bound_gait = { 8 | 'amplitude' : ( 0.8, 1.0, 0.8, 1.0), 9 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 10 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 11 | 'phase' : (0.0, 0.0, 0.5, 0.5) 12 | } 13 | 14 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait)) 15 | 16 | # Create a plant 17 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))] 18 | target_loc = (6.0, 4.0) 19 | plant = HDPy.puppy.plant.TargetLocationLandmarks( 20 | target_loc, 21 | landmarks, 22 | reward_noise = 0.0 23 | ) 24 | 25 | # Load the normalization 26 | nrm = PuPy.Normalization('../data/puppy_unit.json') 27 | 28 | # Create a reservoir 29 | reservoir = HDPy.ReservoirNode( 30 | output_dim = 10, 31 | input_dim = policy.action_space_dim() + plant.state_space_dim(), 32 | spectral_radius = 0.98, 33 | w = HDPy.sparse_reservoir(20), 34 | ) 35 | 36 | reservoir.initialize() 37 | reservoir.save('/tmp/puppy_reservoir.pic') 38 | 39 | # Create a readout 40 | readout = HDPy.StabilizedRLS( 41 | with_bias = True, 42 | input_dim = reservoir.get_output_dim() + reservoir.get_input_dim(), 43 | output_dim = 1, 44 | lambda_ = 1.0 45 | ) 46 | 47 | # Initialize the collector 48 | collector = PuPy.RobotCollector( 49 | child = policy, 50 | expfile = '/tmp/puppy_critic.hdf5' 51 | ) 52 | 53 | # Initialize the Critic 54 | critic = HDPy.PuppyHDP( 55 | tumbled_reward = 0.0, 56 | reservoir = reservoir, 57 | readout = readout, 58 | plant = plant, 59 | policy = collector, 60 | gamma = 0.5, 61 | alpha = 1.0, 62 | init_steps = 10, 63 | norm = nrm 64 | ) 65 | 66 | # Train the critic on offline data 67 | HDPy.puppy.offline_playback( 68 | '/tmp/puppy_offline_data.hdf5', 69 | critic, 70 | samples_per_action = 150, 71 | ms_per_step = 20, 72 | episode_start = 0, 73 | episode_end = 1000, 74 | min_episode_len = 30 75 | ) 76 | 77 | # Store the readout for later use 78 | readout.save('/tmp/puppy_readout.pic') 79 | -------------------------------------------------------------------------------- /HDPy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module combines Reinforcement Learning and Reservoir Computing by 3 | means of an Actor-Critic design. In Reinforcement Learning, the learning 4 | subject is expressed through the agent while the teacher denoted as 5 | environment or plant. At each time step, the agent chooses an action 6 | :math:`a_t`, which leads it from state :math:`s_t` to state 7 | :math:`s_{t+1}`. The state information is provided to the agent by the 8 | environment, together with a reward :math:`r_{t+1}` which announces how 9 | good or bad the state is considered. Note that the reward cannot be used 10 | as learning target, as it is not an error but merely a hint if the agent 11 | goes into the right direction. Instead, the agent's goal is to collect 12 | as much reward as possible with time. The Return expresses this by 13 | taking future rewards into account: 14 | 15 | .. math:: 16 | R_t = \sum\limits_{k=0}^T \gamma^k r_{t+k+1} 17 | 18 | As it may not be meaningful to consider the whole future, the influence 19 | of rewards is decreased the farther they are off. This is controlled 20 | through the discount rate :math:`\gamma`. Further, experiments are often 21 | episodic (meaning that they terminate somewhen). This is accounted for 22 | by summing until the episode length :math:`T` [RL]_. 23 | 24 | An Actor-Critic design splits the agent into two parts: The Actor 25 | decides on the action, for which it is in turn criticised by the Critic. 26 | Meaning, that the Critic learns long-time behaviour, i.e. approximates 27 | the Return, while the Actor uses the Critic's approximation to select 28 | the action which maximizes the Return in a single step. This module 29 | incorporates Reservoir Computing as the Critic's function approximator 30 | [ESN-ACD]_. 31 | 32 | """ 33 | from rc import * 34 | from rl import * 35 | from analysis import * 36 | from puppy import * 37 | from inout import * 38 | from hdp import * 39 | 40 | import puppy 41 | import epuck 42 | 43 | from epuck.analysis_epuck import * 44 | from puppy.analysis_puppy import * 45 | from puppy.puppy import * 46 | from puppy.policy import FRA, LRA, LRP 47 | 48 | #from plants import * 49 | #from analysis_epuck import * 50 | #from analysis_puppy import * 51 | #import policies as policy 52 | #from policies_puppy import FRA, LRA, LRP # Deprecated, don't use like this but through policy.puppy.{FRA,LRA,LRP} 53 | 54 | 55 | #__all__ = [''] 56 | -------------------------------------------------------------------------------- /doc/source/puppy_online.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _puppy_online: 3 | 4 | Puppy online workflow 5 | ===================== 6 | 7 | .. note:: 8 | The data recorded this way cannot be used to train another robot 9 | offline on the same dataset. This is because vital metadata for 10 | replay is not stored by the normal HDP implementation. 11 | 12 | In the online setup, all comutations are done within a running [Webots]_ 13 | instance. For [Webots]_, a supervisor and robot controller script is 14 | required, as documented in :py:mod:`PuPy`. In this example, the 15 | simulation is to be reverted whenever the robot falls or leaves a 16 | predefined arena. In this case, reverting the simulation is preferred 17 | over respawning the robot, since this guarantees that the robot is 18 | started in the same state in every episode. 19 | 20 | .. literalinclude:: ../../test/puppy_online_supervisor.py 21 | 22 | The robot controller is structured similar to the ones in the offline 23 | case. First, the preliminaries for :py:class:`ADHDP` are to be prepared. 24 | Hence, in the initialization the policy, plant and echo-state network 25 | is created. Furthermore, an :math:`\epsilon`-greedy acting schema is 26 | set up, by subtyping :py:class:`PuppyHDP` and specifying 27 | :py:meth`ActorCritic._next_action_hook`. Once, the actor-critic instance 28 | is ready in *acd*, the simulation is set up and finally run. In contrast 29 | to the offline case, the Actor-Critic instance is combined with Webots, 30 | as documented in :py:mod:`PuPy`. 31 | 32 | .. literalinclude:: ../../test/puppy_online_robot.py 33 | 34 | These two controllers can be loaded into webots and the simulation 35 | executed. All observations will be stored in the file 36 | ``/tmp/puppy_online.hdf5``, the reservoir and readout are saved at 37 | ``/tmp/puppy_reservoir.pic`` and ``/tmp/puppy_readout.pic``. 38 | 39 | The simulation is reverted once in a while, hence the controller script 40 | will be terminated and reloaded several times. For the 41 | controller to work, it must load the reservoir and readout if they 42 | already exist. Note that the approach shown below saves the readout 43 | before exiting. This will fail for large reservoirs, as the teardown 44 | timeframe is limited by webots. In such a case, the readout weights 45 | may be written into a file (and regained from it upon startup) at every 46 | iteration (this can efficiently be done by means of a seperate HDF5 47 | file). 48 | -------------------------------------------------------------------------------- /HDPy/epuck/policy/policies.py: -------------------------------------------------------------------------------- 1 | from ...rl import Policy 2 | import warnings 3 | import numpy as np 4 | 5 | class Heading(Policy): 6 | """ePuck policy with the heading as action. 7 | 8 | Due to historical reasons, it is up to the implementation of the 9 | robot to interprete the action (i.e. if it's considered relative 10 | or absolute). 11 | 12 | Note that since Webots is not used for ePuck simulation, the action 13 | sequence is reduced to a single item and hence not returned as list. 14 | This behaviour works fine with the :py:class:`Robot` class. 15 | 16 | """ 17 | def __init__(self): 18 | super(Heading, self).__init__(action_space_dim=1) 19 | self.action = self.initial_action() 20 | 21 | def initial_action(self): 22 | """Return the initial action (0.0).""" 23 | return np.atleast_2d([0.0]).T 24 | 25 | def update(self, action_upd): 26 | """Update the action.""" 27 | self.action = action_upd 28 | 29 | def get_iterator(self, time_start_ms, time_end_ms, step_size_ms): 30 | """Return the heading.""" 31 | return self.action 32 | 33 | def reset(self): 34 | """Reset the action to the initial one (0.0).""" 35 | self.action = self.initial_action() 36 | 37 | class HeadingRandInit(Heading): 38 | """ePuck policy with the heading as action and random 39 | initialization. 40 | 41 | The only difference to :py:class:`Heading` is that the initial 42 | action is not 0.0 but randomly sampled in [0, 2*pi]. 43 | 44 | """ 45 | def initial_action(self): 46 | """Sample an action and return it as initial one.""" 47 | rnd = np.random.uniform(0.0, 2*np.pi) 48 | return np.atleast_2d([rnd]).T 49 | 50 | class Trivial(Heading): 51 | """ePuck policy with the heading as action. 52 | 53 | .. deprecated:: 1.0 54 | Use :py:class:`Heading` instead 55 | 56 | """ 57 | def __init__(self): 58 | warnings.warn("This class is deprecated. Use 'Heading' instead") 59 | super(Trivial, self).__init__() 60 | 61 | class RandInit(HeadingRandInit): 62 | """ePuck policy with the heading as action and random 63 | initialization. 64 | 65 | .. deprecated:: 1.0 66 | Use :py:class:`HeadingRandInit` instead 67 | 68 | """ 69 | def __init__(self): 70 | warnings.warn("This class is deprecated. Use 'HeadingRandInit' instead") 71 | super(RandInit, self).__init__() 72 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Reinforcement Reservoir Learning documentation master file, created by 2 | sphinx-quickstart on Wed May 22 19:50:46 2013. 3 | 4 | Heuristic Dynamic Programming in Python 5 | ======================================= 6 | 7 | .. automodule:: HDPy 8 | 9 | 10 | This documentation gives an overview over the module's functionality, 11 | gives an usage example and lists the interfaces. This order is kept 12 | constant over all (i.e. most) pages. The first four pages 13 | (:ref:`idx_basics`) list the basic interfaces and describe the methods 14 | which implement Reservoir Computing and Reinforcement Learning. These 15 | structures are independent on the experimental platform. 16 | 17 | This package was originally implemented for two platforms, the Puppy 18 | and ePuck robots. The corresponding (and hence platform dependent) code 19 | is documented in the second section (:ref:`idx_platforms`). 20 | 21 | The third section (:ref:`idx_resources`) provides further information 22 | and download and installation resources. 23 | 24 | Note that some of the examples write files. In this case, the paths are 25 | usually hardcoded and valid for a unix-like file tree. As data is 26 | temporary, it is hence stored in ``/tmp``. When working on other 27 | systems, the paths have to be adapted. 28 | 29 | Furthermore, due to Python's magnificent online help, the interface 30 | documentation is also available from within the interactive interpreter 31 | (e.g. IPython): 32 | 33 | >>> import HDPy 34 | >>> help(HDPy) 35 | 36 | .. note:: 37 | The examples have been written for linux. As most of them include 38 | paths, they are also specified for a unix-like filesystem. On other 39 | systems, they have to be adapted. Also note that some of the paths 40 | may require adaptions, even on a linux machine (e.g. normalization 41 | data files). 42 | 43 | Contents 44 | -------- 45 | 46 | .. _idx_basics: 47 | 48 | Basics 49 | ^^^^^^ 50 | 51 | .. toctree:: 52 | :maxdepth: 1 53 | 54 | rc 55 | rl 56 | utils 57 | analysis 58 | 59 | .. _idx_platforms: 60 | 61 | Platforms 62 | ^^^^^^^^^ 63 | 64 | .. toctree:: 65 | :maxdepth: 1 66 | 67 | pp 68 | epuck 69 | puppy 70 | 71 | .. _idx_resources: 72 | 73 | Resources 74 | ^^^^^^^^^ 75 | 76 | .. toctree:: 77 | :maxdepth: 1 78 | 79 | todopg 80 | download 81 | license 82 | references 83 | 84 | 85 | Indices and tables 86 | ------------------ 87 | 88 | * :ref:`genindex` 89 | * :ref:`modindex` 90 | * :ref:`search` 91 | 92 | -------------------------------------------------------------------------------- /test/puppy_offline_sampling_robot.py: -------------------------------------------------------------------------------- 1 | from controller import Robot 2 | import HDPy 3 | import PuPy 4 | import numpy as np 5 | 6 | # setup: 7 | sampling_period_ms = 20 8 | ctrl_period_ms = 3000 9 | 10 | # Policy setup 11 | bound_gait = { 12 | 'amplitude' : ( 0.8, 1.0, 0.8, 1.0), 13 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 14 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 15 | 'phase' : (0.0, 0.0, 0.5, 0.5) 16 | } 17 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait)) 18 | 19 | def random_initial_action(): 20 | """Select a random action initially instead of a fixed one, 21 | specified by the gait definition. 22 | """ 23 | N = policy.action_space_dim() 24 | action = np.atleast_2d([-1.0] * N) 25 | while (action < 0.4).any() or (action > 2.0).any() or action.ptp() > 0.5: 26 | action = np.random.normal(0.9, 0.3, size=action.shape) 27 | return action.T 28 | 29 | policy.initial_action = random_initial_action 30 | 31 | # Offline data collector setup 32 | class OfflinePuppy(HDPy.puppy.OfflineCollector): 33 | def _next_action_hook(self, a_next): 34 | """Define the schema according to which actions will be selected. 35 | Hence, this function defines the action and state space sampling 36 | schema. Note that the influence on training is intense. 37 | 38 | """ 39 | a_next = np.zeros(self.a_curr.shape) 40 | # Prohibit too small or large amplitudes 41 | while (a_next < 0.2).any() or (a_next > 2.0).any() or ((a_next > 1.0).any() and a_next.ptp() > 0.4): 42 | a_next = self.a_curr + np.random.normal(0.0, 0.15, size=self.a_curr.shape) 43 | 44 | return a_next 45 | 46 | # Initialize the collector 47 | data_collector = PuPy.RobotCollector( 48 | child = policy, 49 | expfile = '/tmp/puppy_offline_data.hdf5' 50 | ) 51 | tumble_collector = PuPy.TumbleCollector( 52 | child = data_collector, 53 | sampling_period_ms = sampling_period_ms, 54 | ctrl_period_ms = ctrl_period_ms 55 | ) 56 | collector = PuPy.ResetCollector( 57 | child = tumble_collector, 58 | sampling_period_ms = sampling_period_ms, 59 | ctrl_period_ms = ctrl_period_ms 60 | ) 61 | 62 | 63 | # actor instantiation 64 | actor = OfflinePuppy( 65 | # policy = collector, 66 | policy = data_collector, 67 | init_steps = 10, 68 | ) 69 | 70 | # robot instantiation 71 | r = PuPy.robotBuilder( 72 | Robot, 73 | actor, 74 | sampling_period_ms = sampling_period_ms, 75 | ctrl_period_ms = ctrl_period_ms, 76 | # event_handlers = [actor.event_handler, tumble_collector.event_handler, collector.event_handler] 77 | ) 78 | 79 | # invoke the main loop, starts the simulation 80 | r.run() 81 | -------------------------------------------------------------------------------- /test/epuck_online.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import HDPy 3 | import PuPy 4 | import os 5 | import pylab 6 | 7 | ## INITIALIZATION ## 8 | 9 | # Robot 10 | obstacles = [ 11 | HDPy.epuck.env.train_lower, 12 | HDPy.epuck.env.train_middle, 13 | HDPy.epuck.env.train_left, 14 | HDPy.epuck.env.train_upper 15 | ] 16 | 17 | robot = HDPy.epuck.AbsoluteRobot( 18 | walls = HDPy.epuck.env.obstacles_box, 19 | obstacles = obstacles, 20 | tol = 0.0, 21 | speed = 0.5, 22 | step_time = 0.5, 23 | ) 24 | 25 | # Plant and Policy 26 | policy = HDPy.epuck.policy.HeadingRandInit() 27 | plant = HDPy.epuck.plant.CollisionAvoidanceFrontal( 28 | theta = 1.0, 29 | obs_noise = 0.05 30 | ) 31 | 32 | # Set up reservoir 33 | reservoir = HDPy.ReservoirNode( 34 | output_dim = 50, 35 | input_dim = policy.action_space_dim() + plant.state_space_dim(), 36 | spectral_radius = 0.95, 37 | input_scaling = 1.0/3.0, 38 | bias_scaling = -3.0, 39 | fan_in_w = 20 40 | ) 41 | 42 | reservoir.initialize() 43 | 44 | # Set up readout 45 | readout = HDPy.StabilizedRLS( 46 | with_bias = True, 47 | input_dim = reservoir.get_output_dim() + policy.action_space_dim() + plant.state_space_dim(), 48 | output_dim = 1, 49 | lambda_ = 1.0 50 | ) 51 | 52 | # Custom ADHDP 53 | class ExperimentingHDP(HDPy.ADHDP): 54 | def _next_action_hook(self, a_next): 55 | """Project action into the interval [0,2pi].""" 56 | return a_next % (2*np.pi) 57 | 58 | # Remove old data file 59 | if os.path.exists('/tmp/epuck_data.hdf5'): 60 | os.unlink('/tmp/epuck_data.hdf5') 61 | 62 | collector = PuPy.RobotCollector( 63 | child = policy, 64 | expfile = '/tmp/epuck_data.hdf5') 65 | 66 | # Create ADHDP instance 67 | acd = ExperimentingHDP( 68 | # Demanded by ADHDP 69 | reservoir = reservoir, 70 | readout = readout, 71 | # Demanded by ActorCritic 72 | plant = plant, 73 | policy = collector, 74 | gamma = 0.5, 75 | alpha = 1.0, 76 | init_steps = 5, 77 | ) 78 | 79 | ## SIMULATION LOOP ## 80 | 81 | # Execute the simulation for 10 episodes, with 100 steps tops each 82 | HDPy.epuck.simulation_loop( 83 | acd, 84 | robot, 85 | max_step = 100, 86 | max_episodes = 10, 87 | max_total_iter = -1 88 | ) 89 | 90 | ## EVALUATION ## 91 | 92 | # Load the data file 93 | analysis = HDPy.Analysis('/tmp/epuck_data.hdf5') 94 | 95 | # Plot the trajectories and obstacles 96 | axis = pylab.figure().add_subplot(111) 97 | robot._plot_obstacles(axis=axis) 98 | HDPy.epuck.plot_all_trajectories(analysis, axis) 99 | 100 | # Show the figure 101 | pylab.show(block=False) 102 | -------------------------------------------------------------------------------- /test/puppy_example_trajectory_robot.py: -------------------------------------------------------------------------------- 1 | from controller import Robot 2 | import PuPy 3 | import HDPy 4 | import numpy as np 5 | import h5py 6 | 7 | # Initialize a policy 8 | bound_gait = { 9 | 'amplitude' : ( 0.8, 1.0, 0.8, 1.0), 10 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 11 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 12 | 'phase' : (0.0, 0.0, 0.5, 0.5) 13 | } 14 | 15 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait, 'bounding')) 16 | 17 | # OfflineCollector which follows a predefined sequence of actions 18 | # after the initial behaviour (policy with default params for 25 steps). 19 | class TrajectoryFollower(HDPy.puppy.OfflineCollector): 20 | def __init__(self, trajectory, *args, **kwargs): 21 | super(TrajectoryFollower, self).__init__(*args, **kwargs) 22 | self.trajectory = trajectory 23 | self._traj_idx = 0 24 | 25 | def _next_action_hook(self, a_next): 26 | if self._traj_idx >= self.trajectory.shape[0]: 27 | # If all actions have been executed, signal the supervisor 28 | # to revert the simulation 29 | self.robot.send_msg('revert_on_demand') 30 | return self.a_curr 31 | 32 | # If there's a next action, execute it 33 | a_next = np.atleast_2d(self.trajectory[self._traj_idx]).T 34 | self._traj_idx += 1 35 | 36 | return a_next 37 | 38 | # Load the sequence file 39 | f = h5py.File('/tmp/example_sequence.hdf5','a') 40 | # Get the index of the trajectory to be executed 41 | idx = f['idx'][()] 42 | grp_name = 'traj_%03i' % idx 43 | if grp_name in f: 44 | # Not yet finished, increment the index such that the next 45 | # trajectory is executed in the next experiment. 46 | trajectory = f[grp_name][:] 47 | do_quit = False 48 | f['idx'][()] += 1 49 | else: 50 | # Simulation is finished, execute any trajectory and prepare for 51 | # termination 52 | while grp_name not in f and idx >= 0: 53 | idx -= 1 54 | grp_name = 'traj_%03i' % (idx) 55 | 56 | if idx < 0: 57 | raise Exception('Could not find last trajectory') 58 | 59 | trajectory = f[grp_name][:] 60 | do_quit = True 61 | 62 | f.close() 63 | 64 | # Initialize the collector 65 | collector = PuPy.RobotCollector( 66 | child = policy, 67 | expfile = '/tmp/example_data.hdf5' 68 | ) 69 | 70 | # Initialize the actor 71 | actor = TrajectoryFollower( 72 | trajectory = trajectory, 73 | policy = collector, 74 | init_steps = 10, 75 | ) 76 | 77 | # Initialize the robot, bind it to webots 78 | r = PuPy.robotBuilder( 79 | Robot, 80 | actor, 81 | sampling_period_ms = 20, 82 | ctrl_period_ms = 3000, 83 | ) 84 | 85 | # Register robot in actor for signalling 86 | actor.robot = r 87 | 88 | if do_quit: 89 | # Quit the simulation when all trajectories are handled 90 | r.send_msg('quit_on_demand') 91 | 92 | # Run the simulation 93 | r.run() 94 | -------------------------------------------------------------------------------- /test/acd.py: -------------------------------------------------------------------------------- 1 | 2 | import HDPy 3 | import PuPy 4 | import pylab 5 | import numpy as np 6 | 7 | # Create and initialize Policy 8 | gait = PuPy.Gait(params={ 9 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 10 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 11 | 'amplitude' : ( 0.56, 0.56, 0.65, 0.65), 12 | 'phase' : (0.0, 0.0, 0.5, 0.5) 13 | }) 14 | 15 | policy = HDPy.FRA(gait) 16 | 17 | # Plot action 18 | it = policy.get_iterator(0, 100, 20) 19 | pylab.subplot(311) 20 | data = [it.next() for i in range(100)] 21 | pylab.title('Motor action, untampered for 100 steps, 20ms each') 22 | pylab.xlabel('time') 23 | pylab.plot(data) 24 | pylab.show(block=False) 25 | 26 | 27 | # Create and initialize Plant 28 | plant = HDPy.puppy.plant.SpeedReward() 29 | 30 | # Create and initialize ACD 31 | reservoir = HDPy.SparseReservoirNode( 32 | output_dim=10, 33 | input_dim=policy.action_space_dim() + plant.state_space_dim(), 34 | reset_states=False, 35 | spectral_radius=0.9, 36 | fan_in_i=100, 37 | fan_in_w=20 38 | ) 39 | 40 | readout = HDPy.StabilizedRLS( 41 | with_bias=True, 42 | input_dim=reservoir.get_output_dim() + reservoir.get_input_dim(), 43 | output_dim=1, 44 | lambda_=1.0 45 | ) 46 | expfile = '/tmp/acd.hdf5' 47 | collector = PuPy.RobotCollector(child=policy, expfile=expfile) 48 | acd = HDPy.ADHDP( 49 | reservoir, 50 | readout, 51 | plant, 52 | collector 53 | ) 54 | 55 | acd.set_alpha(0.5) 56 | 57 | 58 | N = 100 59 | ep0 = { 60 | 'accelerometer_z' : np.ones(N) * 2.0 + np.random.randn(N)+0.2, 61 | 'puppyGPS_x' : np.ones([N,2]) * [0.0, 1.0] + np.random.randn(N,2)*0.2, 62 | 'puppyGPS_y' : np.ones([N,2]) * [0.0, 10.0] + np.random.randn(N,2)*0.2 63 | } 64 | 65 | ep1 = { 66 | 'accelerometer_z' : np.ones(N) * 2.0 + np.random.randn(N)+0.2, 67 | 'puppyGPS_x' : np.ones([N,2]) * [1.0, 3.0] + np.random.randn(N,2)*0.5, 68 | 'puppyGPS_y' : np.ones([N,2]) * [10.0, 18.0] + np.random.randn(N,2)*0.5 69 | } 70 | 71 | # Initialize for some epochs 72 | it = acd(ep0, time_start_ms= 0, time_end_ms=100, step_size_ms=1) 73 | it = acd(ep0, time_start_ms=100, time_end_ms=200, step_size_ms=1) 74 | it = acd(ep0, time_start_ms=200, time_end_ms=300, step_size_ms=1) 75 | 76 | # First epoch 77 | it = acd(ep0, time_start_ms=300, time_end_ms=400, step_size_ms=1) 78 | data = [it.next() for i in range(100)] 79 | pylab.subplot(312) 80 | pylab.title('') 81 | pylab.xlabel('time') 82 | pylab.plot(data) 83 | pylab.show(block=False) 84 | 85 | # Second epoch 86 | it = acd(ep1, time_start_ms=400, time_end_ms=500, step_size_ms=1) 87 | data = [it.next() for i in range(100)] 88 | pylab.subplot(313) 89 | pylab.title('') 90 | pylab.xlabel('time') 91 | pylab.plot(data) 92 | pylab.show(block=False) 93 | 94 | # Test load/save 95 | import tempfile, os 96 | fh, pth = tempfile.mkstemp() 97 | acd.save(pth) 98 | acd2 = HDPy.ADHDP.load(pth) 99 | os.unlink(pth) 100 | 101 | -------------------------------------------------------------------------------- /test/puppy_online_robot.py: -------------------------------------------------------------------------------- 1 | from controller import Robot 2 | import PuPy 3 | import HDPy 4 | import numpy as np 5 | import os 6 | import itertools 7 | import pickle 8 | 9 | 10 | ## INITIALIZATION ## 11 | 12 | # Create a policy 13 | bound_gait = { 14 | 'amplitude' : ( 0.8, 1.0, 0.8, 1.0), 15 | 'frequency' : (1.0, 1.0, 1.0, 1.0), 16 | 'offset' : ( -0.23, -0.23, -0.37, -0.37), 17 | 'phase' : (0.0, 0.0, 0.5, 0.5) 18 | } 19 | 20 | policy = HDPy.puppy.policy.LRA(PuPy.Gait(bound_gait)) 21 | 22 | # Create a plant 23 | landmarks = [i for i in itertools.product((-10.0, -3.3, 3.3, 10.0), (-10.0, -3.3, 3.3, 10.0))] 24 | target_loc = (6.0, 4.0) 25 | plant = HDPy.puppy.plant.TargetLocationLandmarks( 26 | target_loc, 27 | landmarks, 28 | reward_noise = 0.0 29 | ) 30 | # Load the normalization 31 | nrm = PuPy.Normalization(os.path.split(HDPy.__file__)[0]+'/../data/puppy_unit.json') 32 | 33 | # Reservoir 34 | if os.path.exists('/tmp/puppy_reservoir.pic'): 35 | reservoir = pickle.load(open('/tmp/puppy_reservoir.pic','r')) 36 | else: 37 | reservoir = HDPy.ReservoirNode( 38 | output_dim = 100, 39 | input_dim = policy.action_space_dim() + plant.state_space_dim(), 40 | reset_states = False, 41 | spectral_radius = 0.7, 42 | w = HDPy.sparse_reservoir(20), 43 | ) 44 | reservoir.initialize() 45 | reservoir.save('/tmp/puppy_reservoir.pic') 46 | 47 | # Readout 48 | if os.path.exists('/tmp/puppy_readout.pic'): 49 | readout = pickle.load(open('/tmp/puppy_readout.pic','r')) 50 | else: 51 | readout = HDPy.StabilizedRLS( 52 | input_dim = reservoir.get_output_dim() + reservoir.get_input_dim(), 53 | output_dim = 1, 54 | with_bias = True, 55 | lambda_ = 1.0 56 | ) 57 | 58 | # Acting schema 59 | class OnlinePuppy(HDPy.PuppyHDP): 60 | def _next_action_hook(self, a_next): 61 | """Choose the action in an eps-greedy 62 | fashion, meaning that a random action 63 | is preferred over the suggested one with 64 | probability eps. 65 | """ 66 | if np.random.rand() < 0.2: 67 | a_next = np.random.uniform(low=0.2, high=1.0, size=a_next.shape) 68 | # clip the action to a bounded range 69 | a_next[a_next < 0.2] = 0.2 70 | a_next[a_next > 1.0] = 1.0 71 | return a_next 72 | 73 | # Initialize the collector 74 | collector = PuPy.RobotCollector( 75 | child = policy, 76 | expfile = '/tmp/puppy_online.hdf5' 77 | ) 78 | 79 | # actor 80 | actor = OnlinePuppy( 81 | # HDPy.puppy.PuppyHDP 82 | tumbled_reward = 0.0, 83 | # HDPy.ADHDP 84 | reservoir = reservoir, 85 | readout = readout, 86 | # HDPy.ActorCritic 87 | plant = plant, 88 | policy = collector, 89 | gamma = 0.5, 90 | alpha = 1.0, 91 | init_steps = 10, 92 | norm = nrm 93 | ) 94 | 95 | # robot 96 | r = PuPy.robotBuilder( 97 | Robot, 98 | actor, 99 | sampling_period_ms = 20, 100 | ctrl_period_ms = 3000, 101 | # event_handlers = actor.event_handler 102 | ) 103 | 104 | ## SIMULATION LOOP ## 105 | 106 | # run the simulation 107 | r.run() 108 | 109 | # teardown 110 | readout.save('/tmp/puppy_readout.pic') 111 | -------------------------------------------------------------------------------- /test/rc_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import HDPy 4 | import numpy as np 5 | import pylab 6 | 7 | ## PARAMS ## 8 | 9 | washout = 200 10 | num_train = 5000 11 | num_test = 1000 12 | reservoir_size = 100 13 | 14 | ## INITIALIZATION ## 15 | 16 | # Reservoir 17 | reservoir_sparse = HDPy.ReservoirNode( 18 | input_dim = 1, 19 | output_dim = reservoir_size, 20 | spectral_radius = 0.9, 21 | w_bias = None, 22 | w = HDPy.sparse_reservoir(20), 23 | w_in = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal), 24 | ) 25 | 26 | reservoir_orthogonal = HDPy.ReservoirNode( 27 | input_dim = 1, 28 | output_dim = reservoir_size, 29 | spectral_radius = 0.9, 30 | w_bias = None, 31 | w = HDPy.orthogonal_reservoir(20.0), 32 | w_in = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal), 33 | ) 34 | 35 | reservoir_ring = HDPy.ReservoirNode( 36 | input_dim = 1, 37 | output_dim = reservoir_size, 38 | spectral_radius = 0.9, 39 | w_bias = None, 40 | w = HDPy.ring_of_neurons, 41 | w_in = HDPy.sparse_w_in(1.5, 50, rnd_fu=np.random.normal), 42 | ) 43 | 44 | 45 | # Readout 46 | readout = HDPy.StabilizedRLS( 47 | input_dim = reservoir_size, 48 | output_dim = 1, 49 | with_bias = True, 50 | lambda_ = 1.0, 51 | ) 52 | 53 | readout_orthogonal = readout.copy() 54 | readout_ring = readout.copy() 55 | readout_sparse = readout.copy() 56 | 57 | # Data 58 | def narma30(num_samples=1000): 59 | """30th order NARMA dataset. Copied from [Oger]_.""" 60 | system_order = 30 61 | inputs = np.random.rand(num_samples, 1) * 0.5 62 | outputs = np.zeros((num_samples, 1)) 63 | 64 | for k in range(system_order-1, num_samples-1): 65 | outputs[k + 1] = 0.2 * outputs[k] + 0.04 * \ 66 | outputs[k] * np.sum(outputs[k - (system_order-1):k+1]) + \ 67 | 1.5 * inputs[k - 29] * inputs[k] + 0.001 68 | return inputs, outputs 69 | 70 | src, trg = narma30(washout + num_train + num_test) 71 | 72 | ## TRAINING ## 73 | 74 | setups = ('Sparse', 'Orthogonal', 'Ring of Neurons') 75 | reservoirs = (reservoir_sparse, reservoir_orthogonal, reservoir_ring) 76 | readouts = (readout_sparse, readout_orthogonal, readout_ring) 77 | 78 | # Initialize the reservoirs 79 | # Propagate data through the reservoirs, no training 80 | for res in reservoirs: 81 | res(src[:washout]) 82 | 83 | # Train the readout 84 | # Propagate data through reservoir, train the readout online 85 | for res, out in zip(reservoirs, readouts): 86 | r_state = res(src[washout:num_train]) 87 | out.train(r_state, trg[washout:num_train]) 88 | 89 | # Test the networks 90 | signals = [] 91 | for res, out in zip(reservoirs, readouts): 92 | r_state = res(src[washout+num_train:]) 93 | pred = out(r_state) 94 | signals.append(pred) 95 | 96 | ## PLOTTING ## 97 | 98 | # Error measurement 99 | mse = lambda sig_pred, sig_trg: ((sig_pred - sig_trg)**2).mean() 100 | rmse = lambda sig_pred, sig_trg: np.sqrt(mse(sig_pred, sig_trg)) 101 | nrmse = lambda sig_pred, sig_trg: rmse(sig_pred, sig_trg) / sig_trg.std() 102 | 103 | # Output and reservoir output plotting 104 | pretty_str = "{0:<" + str(max(map(len, setups))) + "}\t{1:0.6f}\t{2:0.6f}" 105 | print "Reservoir type\tMSE\t\tNRMSE" 106 | for sig, lbl in zip(signals, setups): 107 | pylab.plot(sig, label=lbl) 108 | err_mse = mse(sig, trg[washout + num_train:]) 109 | err_nrmse = nrmse(sig, trg[washout + num_train:]) 110 | print pretty_str.format(lbl, err_mse, err_nrmse) 111 | 112 | # Target plotting 113 | pylab.plot(trg[washout+num_train:], 'c', label='Target') 114 | 115 | # Show the plot 116 | pylab.axis((0.0, 70.0, 0.0, 0.45)) 117 | pylab.legend(loc=0) 118 | pylab.show(block=False) 119 | -------------------------------------------------------------------------------- /HDPy/epuck/epuck_arena.py: -------------------------------------------------------------------------------- 1 | """ 2 | The environment of the ePuck robot consists of several walls and 3 | obstacles. The difference between those two is that walls are isolated 4 | lines while obstacles are polygons, hence closed shapes. Some obstacles 5 | and arena arrangements have been prepared in :py:mod:`HDPy.epuck.env`. 6 | 7 | """ 8 | # open rectangle 9 | _left, _right = -1.0, 5.0 10 | _bottom, _top = -4.0, 20.0 11 | obstacles_open = [ 12 | (_left, _bottom, _right, _bottom), # bottom line 13 | (_left, _bottom, _left, _top), # left line 14 | (_right, _bottom, _right, _top) # right line 15 | ] 16 | 17 | # rhomboid 18 | _rad_x, _rad_y = 10.0, 10.0 19 | obstacles_rhomb = [ 20 | (0.0, _rad_y, _rad_x, 0.0), # top to right 21 | (0.0, -_rad_y, _rad_x, 0.0), # bottom to right 22 | (-_rad_x, 0.0, 0.0, _rad_y), # left to top 23 | (-_rad_x, 0.0, 0.0, -_rad_y) # left to bottom 24 | ] 25 | 26 | # box 27 | _rad_x, _rad_y = 10.0, 10.0 28 | obstacles_box = [ 29 | ( _rad_x, -_rad_y, _rad_x, _rad_y), # right 30 | (-_rad_x, -_rad_y, -_rad_x, _rad_y), # left 31 | (-_rad_x, _rad_y, _rad_x, _rad_y), # top 32 | (-_rad_x, -_rad_y, _rad_x, -_rad_y) # bottom 33 | ] 34 | 35 | # wall 36 | _dist = 5.0 37 | obstacles_wall = [ 38 | ( _dist, -10, _dist, 10 ) # wall at dist 39 | ] 40 | 41 | # lower box 42 | train_lower = [ 43 | (6.0, -6.0), 44 | (10.0, -6.0), 45 | (10.0, -10.0), 46 | (6.0, -10.0) 47 | ] 48 | 49 | # middle lower box 50 | train_middle = [ 51 | (0.0, -3.0), 52 | (2.0, -5.0), 53 | (0.0, -7.0), 54 | (-2.0, -5.0) 55 | ] 56 | 57 | # left rectangle 58 | train_left = [ 59 | (-6.0, 4.0), 60 | (-4.0, 4.0), 61 | (-4.0, -2.0), 62 | (-6.0, -2.0) 63 | ] 64 | 65 | # upper right box 66 | train_upper = [ 67 | (5.0, 6.0), 68 | (6.0, 2.0), 69 | (5.0, -1.0), 70 | (0.0, 4.0) 71 | ] 72 | 73 | # right triangle 74 | test_right = [ 75 | (2.0, -7.0), 76 | (4.0, 1.0), 77 | (6.0, -2.0) 78 | ] 79 | 80 | # upper rectangle 81 | test_upper = [ 82 | (-6.0, 4.0), 83 | (-6.0, 6.0), 84 | (6.0, 6.0), 85 | (6.0, 4.0) 86 | ] 87 | 88 | # left lower rectangle 89 | test_left = [ 90 | (-4.0, 0.0), 91 | (-1.0, -4.0), 92 | (-4.0, -7.0), 93 | (-7.0, -4.0) 94 | ] 95 | 96 | def box_gen_lines((cx, cy), (sx, sy)): 97 | """Create a rectangle using a center ``(cx,cy)`` and side length 98 | ``(sx, sy)``.""" 99 | return [ 100 | (cx - sx, cy - sy, cx - sx, cy + sy), 101 | (cx + sx, cy - sy, cx + sx, cy + sy), 102 | (cx - sx, cy + sy, cx + sx, cy + sy), 103 | (cx - sx, cy - sy, cx + sx, cy - sy) 104 | ] 105 | 106 | def box_gen_corners((cx, cy), (sx, sy)): 107 | """Create a rectangle using two corners ``(cx,cy)`` and ``(sx,sy)``.""" 108 | return [ 109 | (cx - sx, cy - sy), 110 | (cx + sx, cy - sy), 111 | (cx + sx, cy + sy), 112 | (cx - sx, cy + sy) 113 | ] 114 | 115 | obstacles_boxes = box_gen_lines((5.0, 5.0), (1.0, 1.0)) \ 116 | + box_gen_lines((0.0, 3.0), (1.0, 1.0)) \ 117 | + box_gen_lines((-2.0, -3.0), (1.0, 1.0)) \ 118 | + box_gen_lines((4.0, -2.0), (1.0, 1.0)) \ 119 | + box_gen_lines((-6.0, 5.0), (1.0, 1.0)) 120 | 121 | obstacles_maze = box_gen_lines((3.0, 3.0), (2.5, 1.5)) + box_gen_lines((3.0, 3.0), (5.0, 4.5)) 122 | 123 | obstacles_pipe = [ 124 | (-1.0, 1.5, -1.0, -1.5), # behind 125 | (-1.0, 1.5, 5.0, 1.5), # top 126 | (-1.0, -1.5, 8.0, -1.5), # bottom 127 | (5.0, 1.5, 5.0, 8.0), # ascent, left 128 | (8.0, -1.5, 8.0, 5.0), # ascent, right 129 | (5.0, 8.0, 15.0, 8.0), # opening, left 130 | (8.0, 5.0, 15.0, 5.0) # opening, right 131 | ] 132 | 133 | 134 | # Inverse crown 135 | obstacle_crown = [ 136 | (0.0, 0.0), 137 | (1.0, 1.0), 138 | (2.0, -1.0), 139 | (3.0, 1.0), 140 | (4.0, 0.0), 141 | (4.0, 2.0), 142 | (0.0, 2.0) 143 | ] 144 | -------------------------------------------------------------------------------- /test/puppy_offline_analysis.py: -------------------------------------------------------------------------------- 1 | import HDPy 2 | import pylab 3 | import numpy as np 4 | import h5py 5 | import sys 6 | 7 | # global config var 8 | step_width = 150 9 | step_width_plotting = 50 10 | 11 | # the observations file lists the initial epoch, while the critic datafile doesn't 12 | # thus, the sensor data must be shifted by one step_width 13 | obs_offset = step_width 14 | # The analysis experiments are always reverted, hence there's only one initial sample 15 | # (check out puppy.offline_playback:"if 'init_step' in data_grp: [...]", ActorCritic.__call__ and PuppyHDP.init_episode) 16 | # For this initial sample, nothing is written into the analysis_critic_pth file. Hence, there's an offset 17 | # of one epoch for data in analysis_data_pth and analysis_critic_pth. 18 | # Note that if the experiments are restarted instead of reverted, this offset would be =2 19 | 20 | robot_radius = 0.2 21 | 22 | 23 | # Open files 24 | a = HDPy.Analysis(HDPy.H5CombinedFile('/tmp/example_eval.hdf5', '/tmp/example_data.hdf5')) 25 | 26 | # Create figure 27 | fig = pylab.figure() 28 | axis = fig.add_subplot(111) 29 | 30 | # Plot target 31 | target_loc = (6.0, 4.0) 32 | HDPy.puppy.plot_locationtarget(axis, target=target_loc, distance=0.5) 33 | axis.invert_xaxis() # positive x-axis in webots goes to the left! 34 | pylab.show(block=False) 35 | 36 | # Retrieve and plot the initial trajectory 37 | grp = a['0'] # this is assumed to be the main trajectory 38 | main_pth = grp['a_curr'][:] 39 | main_len = main_pth.shape[0] * step_width 40 | HDPy.puppy.plot_trajectory(a, axis, '0', step_width, offset=step_width*25, label='Initial trajectory') 41 | pylab.show(block=False) 42 | 43 | def find_offset(a0, a1): 44 | """Return the number of steps for which the sequences ``a0`` and 45 | ``a1`` are identical. 46 | """ 47 | offset = min(a0.shape[0], a1.shape[0]) 48 | while not (a0[:offset] == a1[:offset]).all(): 49 | offset -= 1 50 | if offset < 0: 51 | raise IndexError() 52 | 53 | return offset 54 | 55 | # group experiments with respect to the main trajectory cutoff and also 56 | # get normalization data 57 | pth_data = {} 58 | for expno in a.experiments: 59 | if expno == '0': 60 | # '0' is the vanilla trajectory, don't consider it 61 | continue 62 | 63 | grp = a[expno] 64 | data_offset = find_offset(main_pth, grp['a_curr'][:]) 65 | 66 | if data_offset not in pth_data: 67 | pth_data[data_offset] = [] 68 | 69 | pth_data[data_offset].append((expno, grp['j_curr'][-3])) 70 | 71 | # Compute normalization params over the whole experiment 72 | returns_total = np.vstack([map(lambda i: i[1], lst) for lst in pth_data.values()]) 73 | nrm_total_min = returns_total.min() 74 | nrm_total_ptp = returns_total.ptp() 75 | 76 | # Go through data, plot the actions/returns 77 | for data_offset in pth_data: 78 | 79 | # get data 80 | experiments, nrm_data = zip(*pth_data[data_offset]) 81 | 82 | # Compute the normalization params over the current state 83 | p_returns = np.hstack(nrm_data) 84 | p_min = p_returns.min() 85 | p_ptp = p_returns.ptp() 86 | 87 | # Plot the robot disc 88 | if len(pth_data[data_offset]) > 1: 89 | loc_robot = (a['0']['puppyGPS_x'][(data_offset+1)*step_width-1], a['0']['puppyGPS_y'][(data_offset+1)*step_width-1]) 90 | robot_color = (np.median(p_returns) - nrm_total_min) / (nrm_total_ptp) 91 | robot_color = 1.0 - robot_color 92 | rob = pylab.Circle(loc_robot, robot_radius, fill=True, facecolor=str(robot_color)) 93 | axis.add_artist(rob) 94 | 95 | # Plot the rays 96 | for expno, return_ in pth_data[data_offset]: 97 | grp = a[expno] 98 | lbl = expno 99 | 100 | sensor_offset = obs_offset + data_offset * step_width 101 | data_x_plot = grp['puppyGPS_x'][sensor_offset-1::step_width_plotting] 102 | data_y_plot = grp['puppyGPS_y'][sensor_offset-1::step_width_plotting] 103 | 104 | col = 0.25 + (return_ - p_min) / (2.0 * p_ptp+1e-7) 105 | col = 1.0 - col 106 | col = col[0] 107 | 108 | axis.plot(data_x_plot, data_y_plot, linewidth=1, label=lbl, color=str(col)) 109 | pylab.draw() 110 | -------------------------------------------------------------------------------- /doc/source/pp.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _plants-and-policies: 3 | 4 | Plants and Policies 5 | =================== 6 | 7 | .. contents:: 8 | 9 | 10 | Introduction 11 | ------------ 12 | 13 | .. module:: HDPy 14 | 15 | As described in :ref:`Reinforcement Learning `, 16 | the learning problem formulation is achieved by specifying a 17 | :py:class:`Plant` and a :py:class:`Policy`. For *Puppy* and *ePuck*, 18 | some examples have already been implemented. 19 | 20 | .. plant 21 | 22 | To create a custom plant, basically the :py:class:`Plant` class has to 23 | be subtyped. As the plant models the environment, it has to compute 24 | a reward and state from sensor measurements. Together, they encode the 25 | abstract learning target, a problem designer has in mind. The 26 | implementation of a plant is quite straight-forward. The two functions 27 | :py:meth:`Plant.state_input` and :py:meth:`Plant.reward` are called 28 | whenever the state or reward is requested. They are expected to return 29 | a vector (:math:`N \times 1`) and scalar, respectively. The state space 30 | dimension :math:`N` may be announced through the plant's constructor and 31 | later queried by calling :py:meth:`Plant.state_space_dim`. If the 32 | plant is dependent on the episode, the :py:meth:`reset` method can be 33 | implemented as well to reset the instance's internal state. Note that 34 | the sensor values are not preprocessed, specifically not normalized. 35 | To do so, a normalization instance (:py:class:`PuPy.Normalization`) is 36 | automatically registered at :py:attr:`Plant.normalization`. Note that 37 | normalization is mandatory for :py:meth:`Plant.input_state`. 38 | 39 | .. policy 40 | 41 | The implementation of a custom policy is analogous to the creation 42 | of a new :py:class:`Plant`. Here, the class :py:class:`Policy` is to be 43 | subtyped, some of its methods are to be implemented. As with 44 | :py:class:`Plant`, the normalization and action space dimensions are 45 | automatically registered, in the later case through the default 46 | constructor. Furthermore, the policy is reset at the beginning of a new 47 | episode through :py:meth:`Policy.reset`. 48 | 49 | The action itself is completely defined through the methods 50 | :py:meth:`Policy.initial_action`, :py:meth:`Policy.update` and 51 | :py:meth:`Policy.get_iterator`. The first gives a valid action, used 52 | for initial behaviour (i.e. before the actor was in operation). The 53 | other two define the behaviour during the experiment. After an action 54 | has been selected, the :py:meth:`Policy.update` method is called, 55 | which should note the new action and update internal structures. As with 56 | the state, the action is passed as :math:`M \times 1` vector. This 57 | will be followed by a call to :py:meth:`Policy.get_iterator`, which 58 | in turn produces the sequence of motor targets, as requested by 59 | :py:class:`WebotsRobotMixin`. 60 | 61 | 62 | Reference 63 | --------- 64 | 65 | .. autoclass:: Plant 66 | :members: 67 | 68 | .. autoclass:: Policy 69 | :members: 70 | 71 | 72 | .. _plants_puppy: 73 | 74 | Puppy Plants 75 | ^^^^^^^^^^^^ 76 | 77 | .. autoclass:: HDPy.puppy.plant.SpeedReward 78 | 79 | .. autoclass:: HDPy.puppy.plant.LineFollower 80 | 81 | .. autoclass:: HDPy.puppy.plant.TargetLocation 82 | 83 | .. autoclass:: HDPy.puppy.plant.TargetLocationLandmarks 84 | 85 | .. autoclass:: HDPy.puppy.plant.DiffTargetLocationLandmarks 86 | 87 | 88 | 89 | .. _policies_puppy: 90 | 91 | Puppy Policies 92 | ^^^^^^^^^^^^^^ 93 | 94 | .. GaitPolicy 95 | 96 | .. automodule:: HDPy.puppy.policy.policies 97 | 98 | 99 | Examples: 100 | 101 | .. autoclass:: HDPy.puppy.policy.FRA 102 | 103 | .. autoclass:: HDPy.puppy.policy.LRA 104 | 105 | .. autoclass:: HDPy.puppy.policy.LRP 106 | 107 | .. autoclass:: HDPy.puppy.policy.IIAPFO 108 | 109 | 110 | .. _plants_epuck: 111 | 112 | ePuck Plants 113 | ^^^^^^^^^^^^ 114 | 115 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceFrontal 116 | :show-inheritance: 117 | 118 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceSideways 119 | :show-inheritance: 120 | 121 | .. autoclass:: HDPy.epuck.plant.CollisionAvoidanceFull 122 | :show-inheritance: 123 | 124 | .. autoclass:: HDPy.epuck.plant.Attractor 125 | :show-inheritance: 126 | 127 | 128 | 129 | .. _policies_epuck: 130 | 131 | ePuck Policies 132 | ^^^^^^^^^^^^^^ 133 | 134 | .. autoclass:: HDPy.epuck.policy.Heading 135 | :show-inheritance: 136 | 137 | .. autoclass:: HDPy.epuck.policy.HeadingRandInit 138 | :show-inheritance: 139 | -------------------------------------------------------------------------------- /HDPy/puppy/plant/AccelerationReward.py: -------------------------------------------------------------------------------- 1 | """ 2 | ACD plants 3 | 4 | 5 | """ 6 | from HDPy import Plant 7 | import numpy as np 8 | import scipy.constants 9 | import scipy.signal 10 | 11 | class AccelerationReward(Plant): 12 | """A :py:class:`Plant` with focus on the speed and acceleration of the robot. 13 | """ 14 | 15 | def __init__(self): 16 | super(AccelerationReward, self).__init__(state_space_dim=24) 17 | self.x = [] 18 | self.y = [] 19 | self.ax = [] 20 | self.ay = [] 21 | self.az = [] 22 | 23 | 24 | def state_input(self, state): 25 | """Full state 26 | """ 27 | sio = np.atleast_2d([ 28 | self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-1]), 29 | self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-1]), 30 | self.normalization.normalize_value('puppyGPS_z', state['puppyGPS_z'][-1]), 31 | self.normalization.normalize_value('accelerometer_x', state['accelerometer_x'][-1]), 32 | self.normalization.normalize_value('accelerometer_y', state['accelerometer_y'][-1]), 33 | self.normalization.normalize_value('accelerometer_z', state['accelerometer_z'][-1]), 34 | self.normalization.normalize_value('compass_x', state['compass_x'][-1]), 35 | self.normalization.normalize_value('compass_y', state['compass_y'][-1]), 36 | self.normalization.normalize_value('compass_z', state['compass_z'][-1]), 37 | self.normalization.normalize_value('gyro_x', state['gyro_x'][-1]), 38 | self.normalization.normalize_value('gyro_y', state['gyro_y'][-1]), 39 | self.normalization.normalize_value('gyro_z', state['gyro_z'][-1]), 40 | self.normalization.normalize_value('hip0', state['hip0'][-1]), 41 | self.normalization.normalize_value('hip1', state['hip1'][-1]), 42 | self.normalization.normalize_value('hip2', state['hip2'][-1]), 43 | self.normalization.normalize_value('hip3', state['hip3'][-1]), 44 | self.normalization.normalize_value('knee0', state['knee0'][-1]), 45 | self.normalization.normalize_value('knee1', state['knee1'][-1]), 46 | self.normalization.normalize_value('knee2', state['knee2'][-1]), 47 | self.normalization.normalize_value('knee3', state['knee3'][-1]), 48 | # state['touch0'][-1], 49 | # state['touch1'][-1], 50 | # state['touch2'][-1], 51 | # state['touch3'][-1], 52 | self.normalization.normalize_value('touch0', state['touch0'][-1]), 53 | self.normalization.normalize_value('touch1', state['touch1'][-1]), 54 | self.normalization.normalize_value('touch2', state['touch2'][-1]), 55 | self.normalization.normalize_value('touch3', state['touch3'][-1]) 56 | ]).T 57 | return sio 58 | 59 | 60 | def reward(self, epoch): 61 | """Return -100.0 if the robot tumbled. 62 | Maximizes speed while minimizing total acceleration 63 | The speed measurement is the average/covered distance since last epoch 64 | and sum of the acceleration minus gravity is used as negative reinforcement. 65 | """ 66 | 67 | # if (epoch['accelerometer_z'] < 1.0).mean() > 0.8: 68 | # return -100.0 69 | 70 | n = epoch['puppyGPS_x'].size 71 | 72 | #keep last position 73 | self.x = np.concatenate([self.x[-1:], epoch['puppyGPS_x']]) 74 | self.y = np.concatenate([self.y[-1:], epoch['puppyGPS_y']]) 75 | 76 | #store last 2 epochs plus current one 77 | self.ax = np.concatenate([self.ax[-2*n:], epoch['accelerometer_x']]) 78 | self.ay = np.concatenate([self.ay[-2*n:], epoch['accelerometer_y']]) 79 | self.az = np.concatenate([self.az[-2*n:], epoch['accelerometer_z']]) 80 | 81 | spd = 0 82 | if self.x.size > 1: 83 | mov = np.linalg.norm(np.array([self.x[-1] - self.x[0], self.y[-1] - self.y[0]])) 84 | #check consistency 85 | if mov < 0.1*n: 86 | # calculate displacement in a reasonable scale 87 | spd = (3000.0/n) * mov; 88 | 89 | 90 | 91 | s = np.ceil(self.ax.size/3.0) 92 | fr = 0.3 93 | sr = 2*fr + (s/10.0) #should be smaller than s 94 | 95 | #filtered to remove noise; borders of the result always tend to zero and have to be trimmed 96 | end = -np.ceil(sr) 97 | beg = -s+end 98 | fax = firfilt(self.ax, fr, sr)[beg:end] 99 | fay = firfilt(self.ay, fr, sr)[beg:end] 100 | faz = firfilt(self.az, fr, sr)[beg:end] 101 | 102 | if fax.size > 0: 103 | acc = abs(fax + fay + faz - scipy.constants.g).mean() 104 | else: 105 | acc = scipy.constants.g; 106 | 107 | #acc = abs(epoch['accelerometer_x'] + epoch['accelerometer_y'] + epoch['accelerometer_z'] - scipy.constants.g).mean() 108 | return spd - acc; 109 | 110 | 111 | def firfilt(interval, freq, sampling_rate): 112 | """ Second Order LowPass Filter 113 | """ 114 | nfreq = freq/(0.5*sampling_rate) 115 | taps = sampling_rate + 1 116 | a = 1 117 | b = scipy.signal.firwin(taps, cutoff=nfreq) 118 | firstpass = scipy.signal.lfilter(b, a, interval) 119 | secondpass = scipy.signal.lfilter(b, a, firstpass[::-1])[::-1] 120 | return secondpass 121 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\ReinforcementReservoirLearning.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\ReinforcementReservoirLearning.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ReinforcementReservoirLearning.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ReinforcementReservoirLearning.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/ReinforcementReservoirLearning" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ReinforcementReservoirLearning" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /HDPy/epuck/plant/plants.py: -------------------------------------------------------------------------------- 1 | from ...rl import Plant 2 | import warnings 3 | import numpy as np 4 | 5 | class CollisionAvoidanceFrontal(Plant): 6 | """Plant for ePuck to realize collision avoidance. The state 7 | consists of the three frontal infrared sensors. The reward is 8 | negative, if one of the three frontal sensors reads a proximity 9 | lower than ``theta``. Gaussian noise is added to the reward if 10 | ``obs_noise`` is positive. 11 | 12 | """ 13 | def __init__(self, theta, obs_noise=0.0): 14 | super(CollisionAvoidanceFrontal, self).__init__(state_space_dim=3) 15 | self.theta = float(theta) 16 | self.obs_sigma = abs(float(obs_noise)) 17 | 18 | def state_input(self, state): 19 | """Return the state from observations ``state``""" 20 | input_ = np.hstack((state['ir'][0, :2], [state['ir'][0, -1]])) 21 | input_ = self.normalization.normalize_value('ir', input_) 22 | #input_ += np.random.normal(scale=0.001, size=input_.shape) # Additive noise 23 | return np.atleast_2d(input_).T 24 | 25 | def reward(self, epoch): 26 | """Return the reward produced by ``epoch``.""" 27 | ir_front = np.hstack((epoch['ir'][:2], [epoch['ir'][-1]])) 28 | ret = float(sum([min(ir - self.theta, 0) for ir in ir_front.T])) 29 | #ret += np.random.normal(scale=0.00001) 30 | if self.obs_sigma > 0.0: 31 | ret += np.random.normal(scale=self.obs_sigma) 32 | return ret 33 | 34 | class CollisionAvoidanceSideways(Plant): 35 | """Plant for ePuck to realize collision avoidance. The state 36 | consists of the frontal and two sideways infrared sensors. The 37 | reward is negative, if one of those sensors reads a proximity 38 | lower than ``theta``. Gaussian noise is added to the reward if 39 | ``obs_noise`` is positive. 40 | 41 | """ 42 | def __init__(self, theta, obs_noise=0.0): 43 | super(CollisionAvoidanceSideways, self).__init__(state_space_dim=3) 44 | self.theta = float(theta) 45 | self.obs_sigma = abs(float(obs_noise)) 46 | 47 | def state_input(self, state): 48 | """Return the state from observations ``state``""" 49 | input_ = np.array((state['ir'][0, 0], state['ir'][0, 2], state['ir'][0, 6])) 50 | input_ = self.normalization.normalize_value('ir', input_) 51 | #input_ += np.random.normal(scale=0.001, size=input_.shape) # Additive noise 52 | return np.atleast_2d(input_).T 53 | 54 | def reward(self, epoch): 55 | """Return the reward produced by ``epoch``.""" 56 | sensors = np.array((epoch['ir'][0, 0], epoch['ir'][0, 2], epoch['ir'][0, 6])) 57 | ret = float(sum([min(ir - self.theta, 0) for ir in sensors])) 58 | if self.obs_sigma > 0.0: 59 | ret += np.random.normal(scale=self.obs_sigma) 60 | return ret 61 | 62 | class CollisionAvoidanceFull(Plant): 63 | """Plant for ePuck to realize collision avoidance. The state 64 | consists of all eight infrared sensors. The reward is 65 | negative, if one of the sensors reads a proximity lower than 66 | ``theta``. Gaussian noise is added to the reward if 67 | ``obs_noise`` is positive. 68 | 69 | """ 70 | def __init__(self, theta, obs_noise=0.0): 71 | super(CollisionAvoidanceFull, self).__init__(state_space_dim=8) 72 | self.theta = float(theta) 73 | self.obs_sigma = abs(float(obs_noise)) 74 | 75 | def state_input(self, state): 76 | """Return the state from observations ``state``""" 77 | input_ = state['ir'].T 78 | input_ = self.normalization.normalize_value('ir', input_) 79 | return input_ 80 | 81 | def reward(self, epoch): 82 | """Return the reward produced by ``epoch``.""" 83 | ret = float(sum([min(ir - self.theta, 0) for ir in epoch['ir'].T])) 84 | #ret += np.random.normal(scale=0.00001) 85 | if self.obs_sigma > 0.0: 86 | ret += np.random.normal(scale=self.obs_sigma) 87 | return ret 88 | 89 | class Attractor(Plant): 90 | """Plant for ePuck to guide it to an ``attractor`` and away from a 91 | ``repeller``. Both points are to be passed as tuples. The state 92 | consists of the robot's location. The reward is inversely 93 | proportional with factor ``scale`` to the distances to the 94 | attractor and repeller, i.e. 95 | 96 | .. math:: 97 | r = \\frac{s}{\Delta_a} - \\frac{s}{\Delta_r} 98 | 99 | """ 100 | def __init__(self, attractor, repeller, scale): 101 | self.attractor = map(float, attractor) 102 | self.repeller = map(float, repeller) 103 | self.scale = scale 104 | super(Attractor, self).__init__(state_space_dim=2) 105 | 106 | def state_input(self, state): 107 | """Return the state from observations ``state``""" 108 | input_ = np.atleast_2d(state['loc']).T 109 | return input_ 110 | 111 | def _idist(self, pt0, pt1): 112 | """Compute the inverse distance between two points ``pt0`` and 113 | ``pt1``. The points are expected to be coordinate tuples. 114 | """ 115 | x_0, y_0 = pt0 116 | x_1, y_1 = pt1 117 | return 1.0 / np.sqrt((x_0 - x_1)**2 + (y_0 - y_1)**2) 118 | 119 | def reward(self, epoch): 120 | """Return the reward produced by ``epoch``.""" 121 | reward = 0.0 122 | reward += self.scale * self._idist(epoch['loc'][0], self.attractor) 123 | reward -= self.scale * self._idist(epoch['loc'][0], self.repeller) 124 | return reward 125 | 126 | 127 | class Trivial(CollisionAvoidanceFrontal): 128 | """Plant for ePuck to realize collision avoidance, using the three 129 | frontal sensors. 130 | 131 | .. deprecated:: 1.0 132 | Use :py:class:`CollisionAvoidanceFrontal` instead 133 | 134 | """ 135 | def __init__(self, *args, **kwargs): 136 | warnings.warn("This class is deprecated. Use 'CollisionAvoidance' instead") 137 | super(Trivial, self).__init__(*args, **kwargs) 138 | 139 | class SidewaysTrivial(CollisionAvoidanceSideways): 140 | """Plant for ePuck to realize collision avoidance, using the frontal 141 | and two sideways sensors. 142 | 143 | .. deprecated:: 1.0 144 | Use :py:class:`CollisionAvoidanceFrontal` instead 145 | 146 | """ 147 | def __init__(self, *args, **kwargs): 148 | warnings.warn("This class is deprecated. Use 'CollisionAvoidanceSideways' instead") 149 | super(SidewaysTrivial, self).__init__(*args, **kwargs) 150 | 151 | class FullTrivial(CollisionAvoidanceFull): 152 | """Plant for ePuck to realize collision avoidance, using the all 153 | eight infrared sensors. 154 | 155 | .. deprecated:: 1.0 156 | Use :py:class:`CollisionAvoidanceFrontal` instead 157 | 158 | """ 159 | def __init__(self, *args, **kwargs): 160 | warnings.warn("This class is deprecated. Use 'CollisionAvoidanceFull' instead") 161 | super(FullTrivial, self).__init__(*args, **kwargs) 162 | -------------------------------------------------------------------------------- /HDPy/inout.py: -------------------------------------------------------------------------------- 1 | """ 2 | When storing experimental data in HDF5 files, some extra operations 3 | may be useful to process them on a low level. The operations provided 4 | by this module mangle HDF5 files directly (through h5py), without 5 | relying on higher-level functionality. In turn, some of the 6 | functionality may be useful for more advanced stuff. 7 | 8 | Note that all functions rely on a specific file format, specifically on 9 | the format which is written by :py:class:`PuPy.RobotCollector`, with 10 | experiments in groups and sensor data in seperate datasets within the 11 | experiment group. On this ground, too short experiments can be removed 12 | (:py:func:`remove_init_only_groups`) or files merged together 13 | (:py:func:`h5_merge_experiments`). When data is split up between two 14 | files, they can easily be put together by :py:class:`H5CombinedFile`. 15 | 16 | """ 17 | import h5py 18 | import warnings 19 | 20 | def remove_init_only_groups(pth, init_steps): 21 | """Remove groups from HDF5 data files, which 22 | 23 | a) Are empty (0 members) 24 | b) Have collected less than ``init_steps`` epochs 25 | 26 | """ 27 | if isinstance(pth, str): 28 | f = h5py.File(pth, 'a') 29 | else: 30 | f = pth 31 | 32 | all_keys = f.keys() 33 | remove_zero = [k for k in all_keys if len(f[k]) == 0] 34 | remove_short = [k for k in all_keys if len(f[k]) > 0 and f[k]['a_curr'].shape[0] < init_steps] 35 | 36 | for k in remove_zero + remove_short: 37 | print "Removing", k 38 | del f[k] 39 | 40 | print "Removed", (len(remove_zero) + len(remove_short)), "groups" 41 | 42 | return f 43 | 44 | def h5_reorder(pth): 45 | """Rearrange the experiments in ``pth`` such that the experiment 46 | indices are in the range [0,N], without missing ones. 47 | No order of the experiments is guaranteed. 48 | 49 | """ 50 | if isinstance(pth, str): 51 | f = h5py.File(pth, 'a') 52 | else: 53 | f = pth 54 | 55 | # keys must be ascending 56 | old_keys = map(str, sorted(map(int, f.keys()))) 57 | for new_key, old_key in enumerate(old_keys): 58 | new_key = str(new_key) 59 | if new_key != old_key: 60 | if new_key not in f.keys(): 61 | print old_key, "->", new_key 62 | f[new_key] = f[old_key] 63 | del f[old_key] 64 | else: 65 | print "Cannot move", old_key, "to", new_key, "(new key exists)" 66 | 67 | return f 68 | 69 | def h5_merge_experiments(pth0, pth1, trg=None): 70 | """Merge groups of the HDF5 files ``pth0`` and ``pth1``. If ``trg`` 71 | is given, a new file will be created. Otherwise the data is merged 72 | into ``pth0``. 73 | 74 | """ 75 | fh1 = h5py.File(pth1, 'r') 76 | 77 | if trg is None: 78 | f_trg = fh0 = h5py.File(pth0, 'a') 79 | else: 80 | f_trg = h5py.File(trg, 'w') 81 | fh0 = h5py.File(pth0, 'r') 82 | # Copy groups of file0 to trg 83 | for k in fh0.keys(): 84 | fh0.copy(k, f_trg) 85 | 86 | groups_0 = map(int, fh0.keys()) 87 | groups_1 = map(int, fh1.keys()) 88 | 89 | # Copy groups of file1 to trg 90 | offset = 1 + max(groups_0) - min(groups_1) 91 | for k in groups_1: 92 | src = str(k) 93 | dst = str(k + offset) 94 | fh1.copy(src, f_trg, name=dst) 95 | 96 | return f_trg 97 | 98 | def remove_boundary_groups(pth): 99 | """Remove the first and last experiment with respect to webots 100 | restart/revert in ``pth``. The boundaries are determined through 101 | the *init_step* group. This method is to save possibly corrupted 102 | experimental data files, due to webots' memory issues. To work 103 | properly, the groups must not be altered before this method, e.g. 104 | by :py:func:`remove_init_only_groups`. 105 | 106 | """ 107 | if isinstance(pth, str): 108 | f = h5py.File(pth, 'a') 109 | else: 110 | f = pth 111 | 112 | keys = sorted(map(int, f.keys())) 113 | restarts = [k for k in keys if 'init_step' in f[str(k)]] 114 | restarts += [k-1 for k in restarts if k > 0] 115 | restarts += [keys[-1]] 116 | restarts = set(sorted(restarts)) 117 | for k in restarts: 118 | del f[str(k)] 119 | 120 | return f 121 | 122 | class H5CombinedFile(object): 123 | """Combine two HDF5 files which have the same groups on the root 124 | level but different datasets within these groups. The files are 125 | packed together such that they can be handled as if a single file 126 | was present. 127 | 128 | ``pth_main`` 129 | Path to the first HDF5 file. If a dataset is available in 130 | both files, the one from this file will be used. 131 | 132 | 133 | ``pth_additional`` 134 | Path to the second HDF5 file. 135 | 136 | """ 137 | def __init__(self, pth_main, pth_additional): 138 | self.pth0 = pth_main 139 | self.pth1 = pth_additional 140 | self.fh0 = h5py.File(pth_main, 'r') 141 | self.fh1 = h5py.File(pth_additional, 'r') 142 | self.keys0 = [k for k in self.fh0 if len(self.fh0[k]) > 0] 143 | self.keys1 = [k for k in self.fh1 if len(self.fh1[k]) > 0] 144 | self.keys_common = [k for k in self.keys0 if k in self.keys1] 145 | 146 | def __getitem__(self, key): 147 | """Return a :py:class:`H5CombinedGroup` instance, binding the 148 | groups ``key`` of the two files together. 149 | """ 150 | if key not in self.keys_common: 151 | raise KeyError() 152 | 153 | return H5CombinedGroup(self.fh0[key], self.fh1[key]) 154 | 155 | def __len__(self): 156 | """Return the length of all (shared) groups.""" 157 | return len(self.keys_common) 158 | 159 | def __contains__(self, item): 160 | """True iff ``item`` is a group known in both files.""" 161 | return item in self.keys_common 162 | 163 | def keys(self): 164 | """Return all group names which are present in both files.""" 165 | return self.keys_common[:] 166 | 167 | def close(self): 168 | """Close all filehandlers.""" 169 | self.fh0.close() 170 | self.fh1.close() 171 | 172 | def attributes(self, key): 173 | """Return two attribute manager instances, one pointing to group 174 | ``key`` in each file. 175 | """ 176 | assert key in self.keys_common 177 | attrs0 = h5py.AttributeManager(self.fh0[key]) 178 | attrs1 = h5py.AttributeManager(self.fh1[key]) 179 | return attrs0, attrs1 180 | 181 | class H5CombinedGroup(object): 182 | """Combine two related HDF5 groups which store different datasets 183 | and present them as a single group. Instances to this class are 184 | typically exclusively created through :py:class:`H5CombinedFile`. 185 | 186 | ``grp0`` 187 | Group of the first file. If a dataset is present in both groups, 188 | the one from this group will be used. 189 | 190 | ``grp1`` 191 | Group of the second file. 192 | 193 | """ 194 | def __init__(self, grp0, grp1): 195 | self.grp0 = grp0 196 | self.grp1 = grp1 197 | 198 | def __getitem__(self, key): 199 | """Return dataset ``key`` or raise an exception if neither of 200 | the groups contains this key. 201 | """ 202 | if key in self.grp0: 203 | return self.grp0[key] 204 | elif key in self.grp1: 205 | return self.grp1[key] 206 | else: 207 | raise KeyError() 208 | 209 | def __len__(self): 210 | """Return the number of keys in both groups.""" 211 | return len(self.grp0) + len(self.grp1) 212 | 213 | def __contains__(self, item): 214 | """True iff ``item`` is a key of one of the groups.""" 215 | return item in self.grp0 or item in self.grp1 216 | 217 | def keys(self): 218 | """Return a list of datasets names found in any of the groups.""" 219 | return self.grp0.keys() + self.grp1.keys() 220 | 221 | def attributes(self): 222 | """Return two attribute manager instances, one pointing to each 223 | group.""" 224 | attrs0 = h5py.AttributeManager(self.grp0) 225 | attrs1 = h5py.AttributeManager(self.grp1) 226 | return attrs0, attrs1 227 | 228 | 229 | class DataMerge(H5CombinedFile): 230 | """Identical to :py:class:`H5Combine` 231 | 232 | .. deprecated:: 1.0 233 | Use :py:class:`H5Combine`. 234 | 235 | """ 236 | def __init__(self, *args, **kwargs): 237 | warnings.warn('This class is depcreated. Use H5CombinedFile instead') 238 | super(DataMerge, self).__init__(*args, **kwargs) 239 | 240 | class DataMergeGroup(H5CombinedGroup): 241 | """Identical to :py:class:`H5CombinedGroup` 242 | 243 | .. deprecated:: 1.0 244 | Use :py:class:`H5CombinedGroup` 245 | 246 | """ 247 | def __init__(self, *args, **kwargs): 248 | warnings.warn('This class is depcreated. Use H5CombinedGroup instead') 249 | super(DataMergeGroup, self).__init__(*args, **kwargs) 250 | 251 | -------------------------------------------------------------------------------- /HDPy/epuck/analysis_epuck.py: -------------------------------------------------------------------------------- 1 | """ 2 | The analysis of ePuck experiments is conducted by looking at isolated 3 | time steps of a testing or training situation. Three tools have been 4 | implemented to support this procedure: 5 | :py:func:`epuck_plot_all_trajectories` plots all training trajectories, 6 | :py:func:`epuck_plot_value_over_action` creates a graph of the expected 7 | return in a state as a function of the action. 8 | :py:func:`epuck_plot_snapshot` plots the predicted return over an 9 | example trajectory for several actions. 10 | 11 | """ 12 | import pylab 13 | import warnings 14 | import numpy as np 15 | 16 | def plot_all_trajectories(analysis, axis=None, key='loc'): 17 | """Plot trajectories of all episodes in ``analysis`` in the same 18 | plot ``axis``. The later an episode, the darker its trajectory is 19 | displayed. The trajectory data must be stored as ``key`` (default 20 | *loc*), a two-dimensional array. This function is intended to be 21 | used for analysis of **ePuck** experiments. 22 | """ 23 | if axis is None: 24 | axis = pylab.figure().add_subplot(111) 25 | 26 | data = analysis.get_data(key) 27 | N = len(data)-1.0 28 | if N == 0.0: 29 | for idx, episode in enumerate(data): 30 | col = 0.0 31 | axis.plot(episode[:, 0], episode[:, 1], color=str(col), label=str(idx)) 32 | else: 33 | for idx, episode in enumerate(data): 34 | #col = 0.75 - (0.75 * (idx - 1))/N 35 | col = 0.75 * (1.0 - float(idx) / N) 36 | axis.plot(episode[:, 0], episode[:, 1], color=str(col), label=str(idx)) 37 | 38 | return axis 39 | 40 | def _plot_line(axis, origin, angle, size_hi, size_lo=0.0, **kwargs): 41 | """Plot a straight line into ``axis``. The line is described through 42 | the ``origin`` and the ``angle``. It is drawn from ``size_lo`` to 43 | ``size_hi``, where both parameters are passed as fractions of said 44 | line. ``kwargs`` are passed to :py:meth:`pylab.plot`. 45 | """ 46 | src = (origin[0] + np.cos(angle) * size_lo, origin[1] + np.sin(angle) * size_lo) 47 | trg = (origin[0] + np.cos(angle) * size_hi, origin[1] + np.sin(angle) * size_hi) 48 | axis.plot((src[0], trg[0]), (src[1], trg[1]), **kwargs) 49 | 50 | def plot_value_over_action(critic, state, axis, a_range=None): 51 | """Given a trained ``critic``, plot the expected return as function 52 | of the action, given a ``state`` into ``axis``. Assuming 1-d action 53 | (otherwise, it becomes messy to plot). The default sampled actions 54 | range ``a_range`` is :math:`[0, 2\pi]` with step size 0.01. 55 | """ 56 | if a_range is None: 57 | a_range = np.arange(0.0, 2*np.pi, 0.01) 58 | exp_return = np.vstack([critic(state, action%(2*np.pi), simulate=True) for action in a_range]) 59 | axis.plot(a_range, exp_return, label='J(a|s)') 60 | axis.set_xlabel('action') 61 | axis.set_ylabel('Expected return') 62 | return axis 63 | 64 | def plot_snapshot(axis, robot, critic, trajectory, sample_actions, init_steps=1, traj_chosen=None, inspected_steps=None): 65 | """Plot a snapshot of an *ePuck* experiment. The plot shows an 66 | example trajectory of the ``robot``, together with the expected 67 | return - i.e. evaluation of the ``critic`` at each state for some 68 | ``sample_actions``. Obviously, the ``critic`` needs to be 69 | pre-trained for this to make sense. 70 | 71 | .. note:: 72 | The action is assumed to represent the absolute heading. 73 | 74 | ``axis`` 75 | A :py:class:`pylab.Axis` to draw into. 76 | 77 | ``robot`` 78 | The ePuck robot. 79 | 80 | ``critic`` 81 | The pre-trained critic. It's supposed to be generated by 82 | :py:meth:`critic` (or implement the :py:meth:`critic_fu` 83 | interface). 84 | 85 | ``trajectory`` 86 | Example trajectory the robot is moved along. 87 | 88 | ``sample_actions`` 89 | List of actions to be sampled and displayed at each step. 90 | 91 | ``init_steps`` 92 | Number of steps the robot is initialized. During these steps, 93 | the robot is moved with action=0 but the ``critic`` not updated. 94 | 95 | ``traj_chosen`` 96 | Represents the sequence of actions which was chosen by the 97 | algorithm at each step of the trajectory. If it is 98 | :py:const:`None`, it will be ignored. If not, it must be a 99 | list at least as long as ``trajectory``. 100 | 101 | ``inspected_steps`` 102 | List of step numbers, for which the expected return is plotted 103 | over the action, given the state at the respective step. 104 | 105 | """ 106 | if traj_chosen is not None: 107 | assert len(traj_chosen) >= len(trajectory) 108 | else: 109 | traj_chosen = [None] * len(trajectory) 110 | 111 | if inspected_steps is None: 112 | inspected_steps = [] 113 | 114 | robot_radius = 0.1 115 | robot_color = (0.0, 0.0, 0.0, 0.0) # white 116 | ray_len = robot_radius + 0.05 117 | 118 | for i in range(init_steps): # initialize 119 | robot.take_action(robot.pose) 120 | 121 | rays = [] 122 | for num_step, (action_ex, action_chosen) in enumerate(zip(trajectory, traj_chosen)): 123 | 124 | # execute action, get the robot into the next state 125 | collided = robot.take_action(action_ex) 126 | s_curr = robot.read_sensors() 127 | 128 | # plot the robot 129 | loc_robot = s_curr['loc'][0] 130 | pose = s_curr['pose'][0, 0] 131 | if num_step % 2 == 0: 132 | rob = pylab.Circle(loc_robot, robot_radius, fill=True, facecolor=robot_color) 133 | axis.add_artist(rob) 134 | # plot the robot orientation 135 | _plot_line(axis, loc_robot, pose, robot_radius, color='k') 136 | 137 | if num_step % 2 == 0: 138 | # evaluate the critic on the actions 139 | p_returns = [] 140 | #print "" 141 | for action_eval in sample_actions: 142 | predicted_return = critic(s_curr, action_eval, simulate=True) 143 | predicted_return = predicted_return[0, 0] 144 | p_returns.append((action_eval, predicted_return)) 145 | #print action_eval, predicted_return 146 | 147 | # normalize returns 148 | r_offset = min([return_ for (action, return_) in p_returns]) 149 | r_scale = max([return_ for (action, return_) in p_returns]) - r_offset 150 | 151 | for action_eval, predicted_return in p_returns: 152 | length = ray_len + 0.1 * (predicted_return - r_offset) / r_scale 153 | #rays.append((loc_robot, (pose+action_eval) % (2*np.pi), length, predicted_return)) # relative heading 154 | rays.append((loc_robot, (action_eval) % (2*np.pi), length, predicted_return)) # absolute heading 155 | 156 | if num_step in inspected_steps: 157 | fig_inspected = pylab.figure() 158 | epuck_plot_value_over_action(critic, s_curr, fig_inspected.add_subplot(111), a_range=np.arange(-2.0*np.pi, 2.0*np.pi, 0.01)) 159 | fig_inspected.suptitle('Expected return in after %i steps (%s)' % (num_step, str(loc_robot))) 160 | 161 | # advance critic 162 | critic(s_curr, action_ex, simulate=False) 163 | 164 | if collided: 165 | break 166 | 167 | # normalize returns 168 | r_offset = min([predicted_return for (loc, ori, length, predicted_return) in rays]) 169 | r_scale = max([predicted_return for (loc, ori, length, predicted_return) in rays]) - r_offset 170 | 171 | # plot rays 172 | for (loc, ori, length, predicted_return) in rays: 173 | nrm_return = (predicted_return - r_offset) / r_scale 174 | col = pylab.cm.hot(0.7 * nrm_return) # for the report 175 | #col = pylab.cm.spectral(nrm_return*0.25) # for the presentation 176 | 177 | # plot ray 178 | _plot_line(axis, loc, ori, size_hi=length+0.03, size_lo=robot_radius+0.03, color=col, linewidth=4) 179 | 180 | return axis 181 | 182 | 183 | ## DEPRECATED ## 184 | 185 | def epuck_plot_all_trajectories(*args, **kwargs): 186 | """Alias of :py:func:`plot_all_trajectories` 187 | 188 | .. deprecated:: 1.0 189 | Use :py:func:`plot_all_trajectories` instead 190 | 191 | """ 192 | warnings.warn("Deprecated. Use 'plot_all_trajectories' instead") 193 | return plot_all_trajectories(*args, **kwargs) 194 | 195 | def epuck_plot_value_over_action(*args, **kwargs): 196 | """Alias of :py:func:`plot_value_over_action` 197 | 198 | .. deprecated:: 1.0 199 | Use :py:func:`plot_value_over_action` instead 200 | 201 | """ 202 | warnings.warn("Deprecated. Use 'plot_value_over_action' instead") 203 | return plot_value_over_action(*args, **kwargs) 204 | 205 | def epuck_plot_snapshot(*args, **kwargs): 206 | """Alias of :py:func:`plot_snapshot` 207 | 208 | .. deprecated:: 1.0 209 | Use :py:func:`plot_snapshot` instead 210 | 211 | """ 212 | warnings.warn("Deprecated. Use 'plot_snapshot' instead") 213 | return plot_snapshot(*args, **kwargs) 214 | -------------------------------------------------------------------------------- /HDPy/puppy/plant/plants.py: -------------------------------------------------------------------------------- 1 | from ...rl import Plant 2 | import numpy as np 3 | import warnings 4 | 5 | class SpeedReward(Plant): 6 | """A :py:class:`Plant` with focus on the speed of the robot. 7 | """ 8 | def __init__(self): 9 | super(SpeedReward, self).__init__(state_space_dim=2) 10 | 11 | def state_input(self, state): 12 | """Return the location, sampled from the *GPS* (x,y) values. 13 | The sample is an average over the last 10 GPS coordinates. 14 | """ 15 | sio = np.atleast_2d([ 16 | self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(), 17 | self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean() 18 | ]).T 19 | return sio 20 | 21 | def reward(self, epoch): 22 | """Return the covered distance and -1.0 if the robot tumbled. 23 | The speed measurement is taken from the 100th to the last sample. 24 | """ 25 | if (epoch['accelerometer_z'][-100:] < 1.0).sum() > 80: 26 | return -1.0 27 | 28 | x = epoch['puppyGPS_x'] 29 | y = epoch['puppyGPS_y'] 30 | return np.linalg.norm(np.array([x[-1] - x[-100], y[-1] - y[-100]])) 31 | 32 | class LineFollower(Plant): 33 | """A :py:class:`Plant` which gives negative reward proportional to 34 | the distance to a line in the xy plane. The line is described by 35 | its ``origin`` and the ``direction``. 36 | """ 37 | def __init__(self, origin, direction, reward_noise=0.01): 38 | super(LineFollower, self).__init__(state_space_dim=2) 39 | self.origin = np.atleast_2d(origin) 40 | self.direction = np.atleast_2d(direction) 41 | self.reward_noise = reward_noise 42 | 43 | if self.origin.shape[0] < self.origin.shape[1]: 44 | self.origin = self.origin.T 45 | 46 | if self.direction.shape[0] < self.direction.shape[1]: 47 | self.direction = self.direction.T 48 | 49 | self.direction /= np.linalg.norm(self.direction) 50 | 51 | assert self.direction.shape == (2, 1) 52 | assert self.origin.shape == (2, 1) 53 | 54 | def state_input(self, state): 55 | """Return the latest *GPS* (x,y) values. 56 | """ 57 | sio = np.atleast_2d([ 58 | self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(), 59 | self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean() 60 | ]).T 61 | return sio 62 | 63 | def reward(self, epoch): 64 | """Return the distance between the current robot location and 65 | the line. 66 | """ 67 | x = epoch['puppyGPS_x'][-1] 68 | y = epoch['puppyGPS_y'][-1] 69 | point = np.atleast_2d([x, y]).T 70 | 71 | #(origin - point) - () * dir 72 | diff = self.origin - point 73 | proj = diff - self.direction.T.dot(diff).dot(self.direction.T).T 74 | #return np.tanh(1.0/np.linalg.norm(proj)) 75 | 76 | reward = -np.linalg.norm(proj) 77 | reward += np.random.normal(scale=self.reward_noise, size=reward.shape) 78 | return reward 79 | 80 | class TargetLocation(Plant): 81 | """A :py:class:`Plant` which gives negative reward proportional to 82 | the distance to point ``target`` in the xy plane. If the robot is 83 | closer than ``radius`` to the target, the reward will be 0.0. 84 | 85 | """ 86 | def __init__(self, target, radius=0.0, reward_noise=0.01): 87 | super(TargetLocation, self).__init__(state_space_dim=2) 88 | self.target = np.atleast_2d(target) 89 | self.radius = radius 90 | self.reward_noise = reward_noise 91 | 92 | if self.target.shape[0] < self.target.shape[1]: 93 | self.target = self.target.T 94 | 95 | assert self.target.shape == (2, 1) 96 | 97 | def state_input(self, state): 98 | """Return the latest *GPS* (x,y) values.""" 99 | sio = np.atleast_2d([ 100 | self.normalization.normalize_value('puppyGPS_x', state['puppyGPS_x'][-10:]).mean(), 101 | self.normalization.normalize_value('puppyGPS_y', state['puppyGPS_y'][-10:]).mean() 102 | ]).T 103 | return sio 104 | 105 | def reward(self, epoch): 106 | """Return the distance between the current robot location and 107 | the target point. 108 | 109 | """ 110 | x = epoch['puppyGPS_x'][-1] 111 | y = epoch['puppyGPS_y'][-1] 112 | point = np.atleast_2d([x, y]).T 113 | 114 | 115 | #(target - point) 116 | diff = self.target - point 117 | dist = np.linalg.norm(diff) 118 | 119 | if dist < self.radius: 120 | dist = 0.0 121 | 122 | reward = np.exp(-0.25 * (dist - 9.0)) + 1.0 123 | 124 | if self.reward_noise > 0.0: 125 | reward += np.random.normal(scale=self.reward_noise) 126 | 127 | return reward 128 | 129 | class TargetLocationLandmarks(TargetLocation): 130 | """A :py:class:`Plant` which gives negative reward proportional to 131 | the distance to point ``target`` in the xy plane. If the robot is 132 | closer than ``radius`` to the target, the reward will be 0.0. 133 | The state is composed of the distance to predefined ``landmarks``, 134 | specified with their coordinates in the xy plane. Gaussian noise 135 | will be added to the reward, if ``reward_noise`` is positive. 136 | 137 | """ 138 | def __init__(self, target, landmarks, radius=0.0, reward_noise=0.01): 139 | super(TargetLocationLandmarks, self).__init__(target, radius, reward_noise) 140 | self._state_space_dim = len(landmarks) 141 | 142 | # add landmarks 143 | self.landmarks = [] 144 | for mark in landmarks: 145 | mark = np.atleast_2d(mark) 146 | if mark.shape[0] < mark.shape[1]: 147 | mark = mark.T 148 | self.landmarks.append(mark) 149 | 150 | def state_input(self, state): 151 | """Return the distance to the landmarks.""" 152 | sio = np.atleast_2d([ 153 | state['puppyGPS_x'][-10:].mean(), 154 | state['puppyGPS_y'][-10:].mean() 155 | ]).T 156 | 157 | dist = [np.linalg.norm(sio - mark) for mark in self.landmarks] 158 | dist = np.atleast_2d(dist).T 159 | dist = self.normalization.normalize_value('landmark_dist', dist) 160 | return dist 161 | 162 | class DiffTargetLocationLandmarks(TargetLocationLandmarks): 163 | """A :py:class:`Plant` which gives positive reward proportional to 164 | the absolute difference (between two episodes) in distance to 165 | point ``target`` in the xy plane. The state is composed of the 166 | distance to predefined ``landmarks``, 167 | specified with their coordinates in the xy plane. Gaussian noise 168 | will be added to the reward, if ``reward_noise`` is positive. 169 | 170 | Before the first call, the distance is set to ``init_distance``. 171 | 172 | """ 173 | def __init__(self, target, landmarks, reward_noise=0.01, init_distance=100): 174 | super(DiffTargetLocationLandmarks, self).__init__(target, landmarks, 0.0, reward_noise) 175 | self.init_distance = init_distance 176 | self._last_target_distance = self.init_distance # TODO: what is good init value? 177 | 178 | def reward(self, epoch): 179 | """Return the reward of ``epoch``.""" 180 | x = epoch['puppyGPS_x'][-1] 181 | y = epoch['puppyGPS_y'][-1] 182 | point = np.atleast_2d([x, y]).T 183 | 184 | 185 | #(target - point) 186 | diff = self.target - point 187 | dist = np.linalg.norm(diff) 188 | 189 | # reward is difference of distance between current and previous episode 190 | reward = dist - self._last_target_distance 191 | self._last_target_distance = dist 192 | reward += np.random.normal(scale=self.reward_noise, size=reward.shape) 193 | return reward 194 | 195 | def reset(self): 196 | """Reset the last distance to the initial one.""" 197 | self._last_target_distance = self.init_distance 198 | 199 | class LandmarksTarLoc(TargetLocationLandmarks): 200 | """A :py:class:`Plant` which gives negative reward proportional to 201 | the distance to point ``target`` in the xy plane. 202 | 203 | .. deprecated:: 1.0 204 | Use :py:class:`TargetLocationLandmarks` instead. 205 | 206 | """ 207 | def __init__(self, *args, **kwargs): 208 | warnings.warn('This class is depcreated. Use TargetLocationLandmarks instead') 209 | super(LandmarksTarLoc, self).__init__(*args, **kwargs) 210 | 211 | class LandmarksTarLocDiff(DiffTargetLocationLandmarks): 212 | """A :py:class:`Plant` which gives positive reward proportional to 213 | the absolute difference (between two episodes) in distance to 214 | point ``target`` in the xy plane. 215 | 216 | .. deprecated:: 1.0 217 | Use :py:class:`DiffTargetLocationLandmarks` instead. 218 | 219 | """ 220 | def __init__(self, *args, **kwargs): 221 | warnings.warn('This class is depcreated. Use DiffTargetLocationLandmarks instead') 222 | super(LandmarksTarLocDiff, self).__init__(*args, **kwargs) 223 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Heuristic Dynamic Programming documentation build configuration file, created by 4 | # sphinx-quickstart on Wed May 22 19:50:46 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | #sys.path.insert(0, os.path.abspath('../../HDPy/puppy/')) 21 | #sys.path.insert(0, os.path.abspath('../../HDPy/epuck/')) 22 | 23 | # -- General configuration ----------------------------------------------------- 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be extensions 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.inheritance_diagram'] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ['_templates'] 34 | 35 | # The suffix of source filenames. 36 | source_suffix = '.rst' 37 | 38 | # The encoding of source files. 39 | #source_encoding = 'utf-8-sig' 40 | 41 | # The master toctree document. 42 | master_doc = 'index' 43 | 44 | # General information about the project. 45 | project = u'Heuristic Dynamic Programming with Python' 46 | copyright = u'2013, Matthias Baumgartner' 47 | 48 | # The version info for the project you're documenting, acts as replacement for 49 | # |version| and |release|, also used in various other places throughout the 50 | # built documents. 51 | # 52 | # The short X.Y version. 53 | version = '1.0' 54 | # The full version, including alpha/beta/rc tags. 55 | release = '1.0' 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | #language = None 60 | 61 | # There are two options for replacing |today|: either, you set today to some 62 | # non-false value, then it is used: 63 | #today = '' 64 | # Else, today_fmt is used as the format for a strftime call. 65 | #today_fmt = '%B %d, %Y' 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | exclude_patterns = [] 70 | 71 | # The reST default role (used for this markup: `text`) to use for all documents. 72 | #default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | #add_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | #add_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | #show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | #modindex_common_prefix = [] 90 | 91 | 92 | # -- Options for HTML output --------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | html_theme = 'default' 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | #html_theme_options = {} 102 | 103 | # Add any paths that contain custom themes here, relative to this directory. 104 | #html_theme_path = [] 105 | 106 | # The name for this set of Sphinx documents. If None, it defaults to 107 | # " v documentation". 108 | #html_title = None 109 | 110 | # A shorter title for the navigation bar. Default is the same as html_title. 111 | #html_short_title = None 112 | 113 | # The name of an image file (relative to this directory) to place at the top 114 | # of the sidebar. 115 | #html_logo = None 116 | 117 | # The name of an image file (within the static path) to use as favicon of the 118 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 119 | # pixels large. 120 | #html_favicon = None 121 | 122 | # Add any paths that contain custom static files (such as style sheets) here, 123 | # relative to this directory. They are copied after the builtin static files, 124 | # so a file named "default.css" will overwrite the builtin "default.css". 125 | html_static_path = ['_static'] 126 | 127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 128 | # using the given strftime format. 129 | #html_last_updated_fmt = '%b %d, %Y' 130 | 131 | # If true, SmartyPants will be used to convert quotes and dashes to 132 | # typographically correct entities. 133 | #html_use_smartypants = True 134 | 135 | # Custom sidebar templates, maps document names to template names. 136 | #html_sidebars = {} 137 | 138 | # Additional templates that should be rendered to pages, maps page names to 139 | # template names. 140 | #html_additional_pages = {} 141 | 142 | # If false, no module index is generated. 143 | #html_domain_indices = True 144 | 145 | # If false, no index is generated. 146 | #html_use_index = True 147 | 148 | # If true, the index is split into individual pages for each letter. 149 | #html_split_index = False 150 | 151 | # If true, links to the reST sources are added to the pages. 152 | #html_show_sourcelink = True 153 | 154 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 155 | #html_show_sphinx = True 156 | 157 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 158 | #html_show_copyright = True 159 | 160 | # If true, an OpenSearch description file will be output, and all pages will 161 | # contain a tag referring to it. The value of this option must be the 162 | # base URL from which the finished HTML is served. 163 | #html_use_opensearch = '' 164 | 165 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 166 | #html_file_suffix = None 167 | 168 | # Output file base name for HTML help builder. 169 | htmlhelp_basename = 'HeuristicDynamicProgrammingdoc' 170 | 171 | 172 | # -- Options for LaTeX output -------------------------------------------------- 173 | 174 | latex_elements = { 175 | # The paper size ('letterpaper' or 'a4paper'). 176 | #'papersize': 'letterpaper', 177 | 178 | # The font size ('10pt', '11pt' or '12pt'). 179 | #'pointsize': '10pt', 180 | 181 | # Additional stuff for the LaTeX preamble. 182 | #'preamble': '', 183 | } 184 | 185 | # Grouping the document tree into LaTeX files. List of tuples 186 | # (source start file, target name, title, author, documentclass [howto/manual]). 187 | latex_documents = [ 188 | ('index', 'HeuristicDynamicProgramming.tex', u'Heuristic Dynamic Programming Documentation', 189 | u'Matthias Baumgartner', 'manual'), 190 | ] 191 | 192 | # The name of an image file (relative to this directory) to place at the top of 193 | # the title page. 194 | #latex_logo = None 195 | 196 | # For "manual" documents, if this is true, then toplevel headings are parts, 197 | # not chapters. 198 | #latex_use_parts = False 199 | 200 | # If true, show page references after internal links. 201 | #latex_show_pagerefs = False 202 | 203 | # If true, show URL addresses after external links. 204 | #latex_show_urls = False 205 | 206 | # Documents to append as an appendix to all manuals. 207 | #latex_appendices = [] 208 | 209 | # If false, no module index is generated. 210 | #latex_domain_indices = True 211 | 212 | 213 | # -- Options for manual page output -------------------------------------------- 214 | 215 | # One entry per manual page. List of tuples 216 | # (source start file, name, description, authors, manual section). 217 | man_pages = [ 218 | ('index', 'heuristicdynamicprogramming', u'Heuristic Dynamic Programming Documentation', 219 | [u'Matthias Baumgartner'], 1) 220 | ] 221 | 222 | # If true, show URL addresses after external links. 223 | #man_show_urls = False 224 | 225 | 226 | # -- Options for Texinfo output ------------------------------------------------ 227 | 228 | # Grouping the document tree into Texinfo files. List of tuples 229 | # (source start file, target name, title, author, 230 | # dir menu entry, description, category) 231 | texinfo_documents = [ 232 | ('index', 'HeuristicDynamicProgramming', u'Heuristic Dynamic Programming Documentation', 233 | u'Matthias Baumgartner', 'HeuristicDynamicProgramming', 'One line description of project.', 234 | 'Miscellaneous'), 235 | ] 236 | 237 | # Documents to append as an appendix to all manuals. 238 | #texinfo_appendices = [] 239 | 240 | # If false, no module index is generated. 241 | #texinfo_domain_indices = True 242 | 243 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 244 | #texinfo_show_urls = 'footnote' 245 | 246 | 247 | # -- Options for Epub output --------------------------------------------------- 248 | 249 | # Bibliographic Dublin Core info. 250 | epub_title = u'Heuristic Dynamic Programming' 251 | epub_author = u'Matthias Baumgartner' 252 | epub_publisher = u'Matthias Baumgartner' 253 | epub_copyright = u'2013, Matthias Baumgartner' 254 | 255 | # The language of the text. It defaults to the language option 256 | # or en if the language is not set. 257 | #epub_language = '' 258 | 259 | # The scheme of the identifier. Typical schemes are ISBN or URL. 260 | #epub_scheme = '' 261 | 262 | # The unique identifier of the text. This can be a ISBN number 263 | # or the project homepage. 264 | #epub_identifier = '' 265 | 266 | # A unique identification for the text. 267 | #epub_uid = '' 268 | 269 | # A tuple containing the cover image and cover page html template filenames. 270 | #epub_cover = () 271 | 272 | # HTML files that should be inserted before the pages created by sphinx. 273 | # The format is a list of tuples containing the path and title. 274 | #epub_pre_files = [] 275 | 276 | # HTML files shat should be inserted after the pages created by sphinx. 277 | # The format is a list of tuples containing the path and title. 278 | #epub_post_files = [] 279 | 280 | # A list of files that should not be packed into the epub file. 281 | #epub_exclude_files = [] 282 | 283 | # The depth of the table of contents in toc.ncx. 284 | #epub_tocdepth = 3 285 | 286 | # Allow duplicate toc entries. 287 | #epub_tocdup = True 288 | 289 | 290 | # Example configuration for intersphinx: refer to the Python standard library. 291 | intersphinx_mapping = {'http://docs.python.org/': None} 292 | # -- Custom config, added by hand -------------------------------------------- 293 | 294 | todo_include_todos=True 295 | 296 | -------------------------------------------------------------------------------- /doc/source/puppy_offline.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _puppy_offline: 3 | 4 | Puppy offline workflow 5 | ====================== 6 | 7 | In summary, the workflow of offline puppy experiments is: 8 | 9 | 1. :ref:`Capture offline training data ` 10 | 2. :ref:`Train a Critic on offline data ` 11 | 3. :ref:`Create example trajectories ` 12 | 4. :ref:`Simulate the Critic on an example trajectory ` 13 | 5. :ref:`Evaluate the Critic on the example trajectory ` 14 | 15 | In this document, these steps will be discussed in detail. 16 | 17 | Working offline has the advantage over working online that the 18 | relatively slow part - the data acquisition - is executed once, then 19 | several Critics can be trained on the same dataset. Since the simulation 20 | is only invoked once for all Critics, this approach is much faster. 21 | Also, since the dataset is the same for all Critics, a comparison of the 22 | results is possible. 23 | 24 | .. note:: 25 | Some parameters are global over all scripts, for example the 26 | sampling period or file paths. It must be ensured that the exact 27 | same values are used throughout the whole process. 28 | 29 | 30 | 31 | .. _offline-data: 32 | 33 | Gathering offline data 34 | ^^^^^^^^^^^^^^^^^^^^^^ 35 | 36 | When working with [Webots]_, two scripts are required: a robot and a 37 | supervisor. Note that this setup is fully described in the :py:mod:`PuPy` 38 | documentation. For offline data acquisition, a supervisor is created 39 | which resets the simulation whenever Puppy tumbles or leaves a 40 | predefined arena. 41 | 42 | .. literalinclude:: ../../test/puppy_offline_sampling_supervisor.py 43 | 44 | The robot script is a bit more complex. The controller has to select 45 | actions according to a predefined schema and store all data in a HDF5 46 | file for later processing. To have the file in the correct format, the 47 | class :py:class:`OfflineCollector` has to be used. It records all data 48 | such that the simulation behaviour can be reproduced. 49 | 50 | For the action selection mechanism, first a :py:class:`Policy` is 51 | created. It defines the action and links it 52 | to a motor target sequence, as explained in :ref:`plants-and-policies`. 53 | In this case, the action is based on a gait and controlls the 54 | amplitudes of the left and right legs. The procedure to create an 55 | initial action is overwritten such that the initial action is randomly 56 | chosen. The same is achieved by subtyping :py:class:`OfflineCollector` 57 | and overwriting the :py:meth:`OfflineCollector._next_action_hook` for 58 | action selection during the experiment. Hence, actions are chosen 59 | randomly at all times, according to the respective schema. Note that 60 | the action selection schema may have a huge influence on Critic training 61 | later on. 62 | 63 | .. literalinclude:: ../../test/puppy_offline_sampling_robot.py 64 | 65 | With these two These two scripts, [Webots]_ can be executed and run for 66 | some time. All sensor readouts and simulation metadata will be stored 67 | in the file ``/tmp/puppy_offline_data.hdf5``. On this basis, a Critic 68 | should be trained next. 69 | 70 | $ webots_builder -c -s -t styrofoam -m fast /tmp/webots 71 | 72 | 73 | .. _offline-critic-training: 74 | 75 | Critic training 76 | ^^^^^^^^^^^^^^^ 77 | 78 | For training, the :py:class:`Plant` must be specified and in case of 79 | Puppy its ADHDP implementation in :py:class:`PuppyHDP`. Note that 80 | although the :py:class:`Policy` is not in effect (as the selected 81 | actions are fixed due to the offline setup), a valid instance must 82 | be provided to the Critic. Here, the same one as for offline training is 83 | initialized. 84 | 85 | For Critic training, now also a reservoir and readout must be available, 86 | as initialized in the example. Furthermore, the 87 | :py:class:`PuPy.Normalization` is provided to the Critic, as during 88 | offline data gathering the sensor data is not processed at all. 89 | 90 | After the required objects have been created, they are bound together 91 | in :py:class:`PuppyHDP`. It is also directed to store critic output 92 | in the file ``/tmp/puppy_critic.hdf5``. Note that in this configuration, 93 | sensor data is not copied, i.e. they are not included in the Critic's 94 | data file, which is very convenient to save disk space. 95 | 96 | Finally, the function :py:func:`puppy.offline_playback` is invoked. This 97 | function replays the offline data such that the Critic sees it as if it 98 | was run online in [Webots]_. Hence, the Critic is trained as in the 99 | simulator. Only the data file has to be specified and optionally the 100 | training set can be limited (in this case to 1000 episodes). 101 | 102 | .. literalinclude:: ../../test/puppy_offline_replay.py 103 | 104 | After the script was successfully executed, the trained critic is 105 | available in three files: 106 | 107 | - ``/tmp/puppy_critic.hdf5`` 108 | - ``/tmp/puppy_readout.pic`` 109 | - ``/tmp/puppy_reservoir.pic`` 110 | 111 | All data that is saved by the Critic is in the first file. The latter 112 | two contain the reservoir and readout, as they cannot be stored in the 113 | datafile. For further processing, the readout and reservoir files will 114 | be required. The datafile mainly serves static training analysis. 115 | 116 | 117 | 118 | .. _offline-examples: 119 | 120 | Creating example trajectories and Critic evaluation 121 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 122 | 123 | When a Critic was trained, it is usually evaluated on a different 124 | dataset. One possibility is to train the Critic on a part of the 125 | training set and use the rest for testing. Then, it will be evaluated 126 | on action sequences, sampled as in the training dataset. If this is not 127 | desired, another set of offline data has to be acquired. In the 128 | following code, such a set is created by a predefined action sequence. 129 | 130 | Specifically, a main trajectory is defined, with some example 131 | actions executed at every k'th steps. For this, the robot's state at the 132 | k'th step must be identical for all example actions. As the robot cannot 133 | easily be reset in [Webots]_, an easier approach is to revert the 134 | simulation and keep the robot movement identical up to the k'th step. 135 | 136 | Three scripts are given to achieve this task. The first script creates 137 | a file which includes the action sequences and a reference to the one to 138 | be executed next. The other two scripts are a robot and supervisor 139 | controller for webots. Basically, an action sequence is loaded and 140 | executed, the measurements stored in a file in the same fashion as in 141 | the last section (:ref:`offline-data`). Hence, the file structure and 142 | called functions are the same as before. 143 | 144 | 145 | First, a number of action sequences is stored in a file at 146 | ``/tmp/example_sequence.hdf5``. 147 | 148 | .. literalinclude:: ../../test/puppy_example_trajectory_sequence.py 149 | 150 | To collect the simulation data, again a supervisor and robot controller 151 | have to be created. As noted before, the simulation is to be reverted 152 | (not restarted!) after an action sequence has finished. In this example, 153 | this is implemented by two guards which react accordingly to a signal 154 | from the robot. 155 | 156 | .. literalinclude:: ../../test/puppy_example_trajectory_supervisor.py 157 | 158 | Hence, the main logic is implemented in the robot controller. A special 159 | case of an :py:class:`OfflineCollector` is defined, enforcing the action 160 | to follow a specified sequence. If the sequence has ended, a signal is 161 | sent to the supervisor. The action sequence is loaded from the HDF5 162 | file, which was written before and the file updated such that all the 163 | sequences will be executed. The initialization of the robot is then 164 | analoguous to the previous section. 165 | 166 | .. literalinclude:: ../../test/puppy_example_trajectory_robot.py 167 | 168 | With the scripts set up, [Webots]_ can be executed. It automatically 169 | quits after all trajectories have been handled. Note that the setup 170 | of the policy, the number of initial steps and robot timings have been 171 | set to the same values as in the training data collection process. 172 | 173 | $ webots_builder -c -s -t styrofoam -m fast /tmp/webots 174 | 175 | As with offline data acquisition, the robot data is written into a HDF5, 176 | in this example at ``/tmp/example_data.hdf5``. Note that once this data 177 | is available, it can be used for testing of several Critics (as for now, 178 | all data is offline). Hence, the same process can be repeated for 179 | several example trajectories to have a more representative testing 180 | dataset. 181 | 182 | 183 | 184 | .. _offline-analysis: 185 | 186 | Critic analysis 187 | ^^^^^^^^^^^^^^^ 188 | 189 | If the example was followed until here, several files should be 190 | available: 191 | 192 | - ``/tmp/puppy_readout.pic``, the trained Critic's readout weights 193 | - ``/tmp/puppy_reservoir.pic``, the Critic's reservoir 194 | - ``/tmp/example_data.hdf5``, the testing dataset 195 | 196 | With those, the Critic can finally be analyzed. To do so, the Critic 197 | is executed on the testing dataset and then the result is plotted. The 198 | first part works similar to the Critic's training. The testing data is 199 | replayed, but this time the Critic is loaded instead of trained. The 200 | following script achieves this, storing the evaluation result in 201 | ``/tmp/example_eval.hdf5``. As before, plant and policy are initialized, 202 | then the reservoir and readout is loaded. Note that the readout training 203 | is disabled. After creation of the :py:class:`PuppyHDP`, it is executed 204 | on the testing data. 205 | 206 | .. literalinclude:: ../../test/puppy_example_trajectory_eval.py 207 | 208 | Now, the predicted return along the testing trajectory is stored in 209 | ``/tmp/example_eval.hdf5``. Based on this file, the Critic behaviour 210 | can be analysed. It does not include the data collected during 211 | simulation, hence the experiment is only completely described by also 212 | considering ``/tmp/example_data.hdf5``. This is exactly what 213 | :py:class:`H5CombineFile` is for. 214 | 215 | Due to the initial behaviour of :py:class:`PuppyHDP` and 216 | :py:class:`OfflinePuppy`, the datasets in the two files have a different 217 | offset (indicated by ``obs_offset`` in the script). For the first epoch, 218 | sensor data is available but no actions or reward. They are only stored 219 | after the second step, hence are offset by one epoch (150 sensor samples 220 | in this case). The predicted return is delayed even more, as it is not 221 | stored during the whole initial phase (25 steps). The dataset can also 222 | be thought of being aligned backwards. 223 | 224 | The analysis script goes through all executions of the example 225 | trajectory (one for each sample action) and orders them according to the 226 | state in which the sample action execution started. For each of those 227 | states, the sample actions are plotted as lines, colored with respect 228 | to the respective predicted return. States itself are related by 229 | plotting a circle, colored according to the median return of actions 230 | executed from it. 231 | 232 | .. literalinclude:: ../../test/puppy_offline_analysis.py 233 | 234 | If it worked correctly, a plot should be generated which shows the 235 | example trajectory, the sampled actions and states with the color 236 | corresponding to the predicted return (darker is better). 237 | 238 | 239 | .. image:: ../../data/doc/puppy_offline_result.png 240 | -------------------------------------------------------------------------------- /HDPy/epuck/epuck.py: -------------------------------------------------------------------------------- 1 | """ 2 | For the ePuck robot, a small simulator is provided. It allows to place 3 | ePuck in an arena, with unpassable walls and obstacles at (almost) 4 | arbitrary locations. Some environment objects are predefined in 5 | :py:mod:`HDPy.epuck.env`. The class :py:class:`Robot` provides the 6 | implementation of the simulated ePuck. Obstacles are directly inserted 7 | into this instance, hence it combines the robot with the environment. 8 | 9 | As for other problems, a :py:class:`ADHDP` instance can be used on top 10 | of this to control the robot motion. A plant and a policy have to be 11 | provided (see :ref:`plants-and-policies`). Due to historical reasons, 12 | the interpretation of the policy (i.e. action) is done in the robot. 13 | In :py:class:`Robot`, the action is a relative heading, 14 | :py:class:`AbsoluteRobot` implements an absolute one. 15 | 16 | The robot and adhdp instances are combined in the 17 | :py:func:`simulation_loop` function to run the simulation for a 18 | fixed amount of time. 19 | 20 | """ 21 | import numpy as np 22 | import pylab 23 | import warnings 24 | 25 | 26 | def _intersect((o1x, o1y), (d1x, d1y), (o2x, o2y), (d2x, d2y)): 27 | """Intersection of two bounded lines. The lines are given 28 | with the origin and direction. Returned is the step length for 29 | both lines, in the same order as the input. 30 | 31 | o1x + t1 * d1x = o2x + t2 * d2x 32 | o1y + t1 * d1y = o2y + t2 * d2y 33 | => t1 = (o2x + t2 * d2x - o1x)/d1x 34 | => o1y + ((o2x + t2 * d2x - o1x)/d1x) * d1y = o2y + t2 * d2y 35 | => o1y + (o2x + t2 * d2x - o1x) * d1y/d1x = o2y + t2 * d2y 36 | => o1y + (t2 * d2x + o2x - o1x) * d1y/d1x = o2y + t2 * d2y 37 | => o1y + t2*d2x*d1y/d1x + (o2x - o1x) * d1y/d1x = o2y + t2 * d2y 38 | => o1y - o2y + (o2x - o1x) * d1y/d1x = t2 * d2y - t2*d2x*d1y/d1x 39 | => o1y - o2y + (o2x - o1x) * d1y/d1x = t2 * (d2y - d2x*d1y/d1x) 40 | => t2 = (o1y - o2y + (o2x - o1x) * d1y/d1x) / (d2y - d2x*d1y/d1x) 41 | 42 | """ 43 | tol = 1e-14 44 | if abs(d1y - 0.0) < tol : 45 | # o_dir = (!0.0, 0.0) 46 | if abs(d2y - d2x*d1y/d1x) < tol: # parallel 47 | t0, t1 = float('inf'), float('inf') 48 | else: 49 | nom = o2y - o1y - d1y * (o2x - o1x)/d1x 50 | denom = (d1y*d2x)/d1x - d2y 51 | t0 = nom/denom 52 | t1 = (o2x - o1x + t0 * d2x)/d1x 53 | else: 54 | # o_dir = (0.0, !0.0) 55 | if abs(d2x - d2y*d1x/d1y) < tol: # parallel 56 | t0, t1 = float('inf'), float('inf') 57 | else: 58 | nom = o2x - o1x - d1x * (o2y - o1y)/d1y 59 | denom = (d1x*d2y)/d1y - d2x 60 | t0 = nom/denom 61 | t1 = (o2y - o1y + t0 * d2y) / d1y 62 | 63 | return t1, t0 64 | 65 | def _in_obstacle(loc, obstacle): 66 | """Check if a location is within an obstacle. 67 | 68 | Assuming the obstacle edges are given in the right order (meaning 69 | that the polygon is defined through lines between successive 70 | points). 71 | 72 | As reference, the origin is picked. This implies that the obstacle 73 | must not include the origin. 74 | 75 | Edges and corners count as within the obstacle 76 | 77 | """ 78 | if any([loc == obs for obs in obstacle]): 79 | return True 80 | 81 | faces = [(p0, p1) for p0, p1 in zip(obstacle[:-1], obstacle[1:])] 82 | faces.append((obstacle[-1], obstacle[0])) 83 | 84 | num_intersect = sum([_obs_intersect((loc, (0.0, 0.0)), line) for line in faces]) 85 | if num_intersect % 2 == 0: 86 | return False 87 | else: 88 | return True 89 | 90 | def _obs_intersect(((x0, y0), (x1, y1)), ((x2, y2), (x3, y3))): 91 | """Check if two lines intersect. The boundaries don't count as 92 | intersection.""" 93 | base1 = (x0, y0) 94 | base2 = (x2, y2) 95 | dir1 = (x1-x0, y1-y0) 96 | dir2 = (x3-x2, y3-y2) 97 | t1, t2 = _intersect(base1, dir1, base2, dir2) 98 | 99 | eps = 0.00001 100 | if -eps < t1 and t1 < 1.0 + eps and -eps < t2 and t2 < 1.0 + eps: 101 | return True 102 | else: 103 | return False 104 | 105 | class Robot(object): 106 | """Simulated ePuck robot. 107 | 108 | The robot may be steered by means of change in its orientation (i.e. 109 | the heading relative to the robot). Every time an action is 110 | executed, the robot turns to the target orientation, then moves 111 | forward. How much it moves is proportional to the ``speed`` and 112 | ``step_time``. In between, infrared sensor readouts can be taken. 113 | The robot is placed in an arena, with some obstacles and walls it 114 | can collide with but not pass. Upon collision, the robot stops 115 | moving. 116 | 117 | ``walls`` 118 | List of wall lines which cannot be passed. The lines are to be 119 | given by their endpoints. 120 | 121 | ``obstacles`` 122 | List of obstacles which cannot be passed. In contrast to walls, 123 | the obstacles are closed polygons. They have to be given 124 | as list of corner points. Obstacles may not include the origin 125 | (0, 0). 126 | 127 | ``speed`` 128 | Speed of the robot. 129 | 130 | ``step_time`` 131 | Time quantum for movement, i.e. for how long the robot drives 132 | forward. 133 | 134 | ``tol`` 135 | Minimal distance from any obstacle or wall which counts as 136 | collision. 137 | 138 | .. note:: 139 | Obstacles may not include the origin (0, 0). 140 | 141 | .. todo:: 142 | wall tolerance does not operate correctly. 143 | 144 | """ 145 | def __init__(self, walls=None, obstacles=None, speed=0.5, step_time=1.0, tol=0.0): 146 | 147 | if obstacles is None: 148 | obstacles = [] 149 | 150 | if walls is None: 151 | walls = [] 152 | 153 | walls = walls[:] 154 | for obs in obstacles: 155 | walls.extend([(x0, y0, x1, y1) for (x0, y0), (x1, y1) in zip(obs[:-1], obs[1:])]) 156 | walls.append((obs[-1][0], obs[-1][1], obs[0][0], obs[0][1])) 157 | 158 | if tol > 0.0: 159 | warnings.warn("tolerance > 0 doesn't work properly; It only works if the robot faces the wall (not when parallel or away from the wall).") 160 | 161 | self.sensors = [2*np.pi*i/8.0 for i in range(8)] 162 | #self.obstacles = [ (x0,y0,x1,y1) ] 163 | self.obstacle_line = walls 164 | self._ir_max, self.tol = 15.0, tol 165 | self.obstacles = self._cmp_obstacles(self.obstacle_line) 166 | self.polygons = obstacles[:] 167 | self.speed, self.step_time = speed, step_time 168 | self.loc = (0.0, 0.0) 169 | self.pose = 0.0 170 | self.trajectory = [] 171 | self.reset() 172 | 173 | def _cmp_obstacles(self, lines): 174 | """Convert lines given by their endpoints to their corresponding 175 | vector representation""" 176 | obstacles = [] 177 | for x0, y0, x1, y1 in lines: 178 | o_vec = (x1-x0, y1-y0) 179 | if o_vec[0] == 0.0 and o_vec[1] == 0.0: 180 | raise Exception('Obstacle line must have a direction') 181 | o_base = (x0, y0) 182 | o_limit = 1.0 183 | obstacles.append((o_vec, o_base, o_limit)) 184 | return obstacles 185 | 186 | def _cmp_obstacle_lines(self, obstacles): 187 | """Convert lines given by as vector to their corresponding 188 | endpoint representation.""" 189 | lines = [] 190 | for o_vec, o_base, o_limit in obstacles: 191 | x0, y0 = o_base 192 | if o_limit == float('inf'): 193 | raise Exception('Infinite lines not supported') 194 | x1 = o_base[0] + o_limit * o_vec[0] 195 | y1 = o_base[1] + o_limit * o_vec[1] 196 | lines.append((x0, y0, x1, y1)) 197 | return lines 198 | 199 | def reset(self): 200 | """Reset the robot to the origin.""" 201 | self.loc = (0.0, 0.0) 202 | self.pose = 0.0 203 | self.trajectory = [self.loc] 204 | 205 | def reset_random(self, loc_lo=-10.0, loc_hi=10.0): 206 | """Reset the robot to a random location, outside the obstacles.""" 207 | for i in xrange(1000): 208 | loc = self.loc = (np.random.uniform(loc_lo, loc_hi), np.random.uniform(loc_lo, loc_hi)) 209 | pose = self.pose = np.random.uniform(0, 2*np.pi) 210 | 211 | if not any([_in_obstacle(self.loc, obs) for obs in self.polygons]) and not self.take_action(0.0): 212 | break 213 | 214 | if i == 1000: 215 | warnings.warn('Random reset iterations maximum exceeded') 216 | 217 | self.loc = loc 218 | self.pose = pose 219 | self.trajectory = [self.loc] 220 | 221 | def read_ir(self): 222 | """Compute the proximities to obstacles in all infrared sensor 223 | directions.""" 224 | # view-direction 225 | readout = [] 226 | for sensor in self.sensors: 227 | s_dist = self._ir_max 228 | s_ori = self.pose + sensor 229 | s_dir = (np.cos(s_ori), np.sin(s_ori)) 230 | s_base = self.loc 231 | 232 | for o_dir, o_base, o_limit in self.obstacles: 233 | # obstacles intersection 234 | t0, t1 = _intersect(o_base, o_dir, s_base, s_dir) 235 | 236 | eps = 0.00001 237 | if t1 >= 0 and (o_limit == float('inf') or (-eps <= t0 and t0 <= o_limit + eps)): 238 | #if t0 >= 0 and t1 >= 0 and t1 <= 1.0: 239 | # intersection at distance (t0 * s_dir) 240 | dist = np.linalg.norm((t1 * s_dir[0], t1 * s_dir[1])) 241 | else: 242 | # no intersection 243 | dist = self._ir_max 244 | 245 | if dist < s_dist: 246 | s_dist = dist 247 | 248 | readout.append(s_dist) 249 | 250 | return readout 251 | 252 | def read_sensors(self): 253 | """Read all sensors. A :py:keyword:`dict` is returned.""" 254 | ir = self.read_ir() 255 | #noise = np.random.normal(scale=0.01, size=(len(ir))) 256 | #ir = map(operator.add, ir, noise) 257 | 258 | return {'loc': np.atleast_2d(self.loc), 'pose': np.atleast_2d(self.pose), 'ir': np.atleast_2d(ir)} 259 | 260 | def take_action(self, action): 261 | """Execute an ``action`` and move forward 262 | (speed * step_time units or until collision). Return 263 | :py:const:`True` if the robot collided. 264 | 265 | """ 266 | # turn 267 | if isinstance(action, np.ndarray): 268 | action = action.flatten()[0] 269 | self.pose = (self.pose + action) % (2*np.pi) 270 | #self.pose = action % (2*np.pi) 271 | 272 | # move forward 273 | t = self.speed * self.step_time # distance per step 274 | 275 | # Collision detection 276 | eps = 0.00001 277 | r_vec = (np.cos(self.pose), np.sin(self.pose)) 278 | wall_dists = [(idx, _intersect(self.loc, r_vec, o_base, o_vec), o_limit) for idx, (o_vec, o_base, o_limit) in enumerate(self.obstacles)] 279 | wall_dists = [(idx, r_dist) for idx, (r_dist, o_dist), o_limit in wall_dists if r_dist >= 0.0 and r_dist < float('inf') and -eps <= o_dist and o_dist <= o_limit + eps] 280 | if len(wall_dists) > 0: 281 | # Distance to the wall 282 | wall_idx, min_wall_dist = min(wall_dists, key=lambda (idx, dist): dist) 283 | dist = np.linalg.norm((min_wall_dist * r_vec[0], min_wall_dist * r_vec[1])) 284 | 285 | # angle between wall and robot trajectory 286 | o_vec = self.obstacles[wall_idx][0] 287 | a = np.arccos( (o_vec[0] * r_vec[0] + o_vec[1] * r_vec[1]) / (np.linalg.norm(o_vec) * np.linalg.norm(r_vec)) ) 288 | if a > np.pi/2.0: 289 | a = np.pi - a 290 | 291 | # maximum driving distance 292 | k = self.tol / np.sin(a) 293 | t_max = dist - k 294 | 295 | else: 296 | # no wall ahead 297 | t_max = float('inf') 298 | 299 | collide = t >= t_max 300 | t = min(t, t_max) 301 | 302 | # next location 303 | self.loc = (self.loc[0] + np.cos(self.pose) * t, self.loc[1] + np.sin(self.pose) * t) # t doesn't denote the distance in moving direction! 304 | self.trajectory.append(self.loc) 305 | return collide 306 | 307 | def plot_trajectory(self, wait=False, with_tol=True, tol=None, full_view=True, axis=None): 308 | """Plot the robot trajectory in a :py:mod:`pylab` figure. 309 | 310 | ``wait`` 311 | True for blocking until the figure is closed. 312 | 313 | ``with_tol`` 314 | Plot obstacle tolerance lines. 315 | 316 | ``tol`` 317 | Overwrite the obstacle tolerance. 318 | 319 | ``full_view`` 320 | Keep the original clipping of the window. If false, the 321 | clipping will be adjusted to the data. 322 | 323 | ``axis`` 324 | A :py:mod:`pylab` axis, which should be used for plotting. 325 | If not provided, the first axis of the first figure is used. 326 | 327 | """ 328 | if axis is None: 329 | axis = pylab.figure(1).axes[0] 330 | 331 | axis.clear() 332 | self._plot_obstacles(axis, with_tol, tol) 333 | x, y = zip(*self.trajectory) 334 | axis.plot(x, y, 'b-') 335 | axis.plot(x, y, 'b*') 336 | if full_view: 337 | x0, x1, y0, y1 = axis.axis() 338 | else: 339 | x0, x1, y0, y1 = min(x), max(x), min(y), max(y) 340 | axis.axis(( 341 | x0 + x0*0.1, 342 | x1 + x1*0.1, 343 | y0 + y0*0.1, 344 | y1 + y1*0.1 345 | )) 346 | 347 | pylab.show(block=wait) 348 | 349 | def _plot_obstacles(self, axis, with_tol=True, tol=None): 350 | """Plot all obstacles and walls into a :py:mod:`pylab` figure. 351 | 352 | ``axis`` 353 | The axis where stuff is plotted into. 354 | 355 | ``with_tol`` 356 | Plot obstacle tolerance lines. 357 | 358 | ``tol`` 359 | Overwrite the obstacle tolerance. 360 | 361 | """ 362 | if tol is None: 363 | tol = self.tol 364 | 365 | for vec, base, limit in self.obstacles: 366 | # obstacle line 367 | axis.plot((base[0], base[0]+limit*vec[0]), (base[1], base[1]+limit*vec[1]), 'k') 368 | 369 | if with_tol and tol > 0: 370 | if vec[1] == 0.0: 371 | y = (-vec[1]/vec[0], 1.0) 372 | else: 373 | y = (1.0, -vec[0]/vec[1]) 374 | 375 | y = (y[0] * tol / np.linalg.norm(y), y[1] * tol / np.linalg.norm(y)) 376 | base_tn = (base[0] - y[0], base[1] - y[1]) 377 | base_tp = (base[0] + y[0], base[1] + y[1]) 378 | 379 | # obstacle tolerance 380 | axis.plot((base_tn[0], base_tn[0]+limit*vec[0]), (base_tn[1], base_tn[1]+limit*vec[1]), 'k:') 381 | axis.plot((base_tp[0], base_tp[0]+limit*vec[0]), (base_tp[1], base_tp[1]+limit*vec[1]), 'k:') 382 | 383 | class AbsoluteRobot(Robot): 384 | """Simulated ePuck robot. 385 | 386 | In contrast to :py:class:`Robot`, the heading is with respect to 387 | the arena instead of the robot - i.e. it is absolute, not relative 388 | to the robot. 389 | 390 | """ 391 | def take_action(self, action): 392 | """Execute an ``action`` and move forward 393 | (speed * step_time units or until collision). Return 394 | :py:const:`True` if the robot collided. 395 | 396 | """ 397 | if isinstance(action, np.ndarray): 398 | action = action.flatten()[0] 399 | self.pose = action % (2*np.pi) 400 | return super(AbsoluteRobot, self).take_action(0.0) 401 | 402 | def simulation_loop(acd, robot, max_step=-1, max_episodes=-1, max_total_iter=-1): 403 | """Simulate some episodes of the ePuck robot. 404 | 405 | This method handles data passing between the ``acd`` and ``robot`` 406 | instances in two loops, one for the episode and one for the whole 407 | experiment. 408 | 409 | ``acd`` 410 | Actor-Critic instance (:py:class:`ADHDP`). 411 | 412 | ``robot`` 413 | Robot instance (:py:class:`Robot`). 414 | 415 | ``max_step`` 416 | Maximum number of steps in an episode. Negative means no limit. 417 | 418 | ``max_episodes`` 419 | Maximum number of episodes. Negative means no limit. 420 | 421 | ``max_total_iter`` 422 | Maximum number of steps in total. Negative means no limit. 423 | 424 | """ 425 | if max_step < 0 and max_episodes < 0 and max_total_iter < 0: 426 | raise Exception('The simulation cannot run forever.') 427 | 428 | num_episode = 0 429 | num_total_iter = 0 430 | while True: 431 | 432 | # init episode 433 | acd.new_episode() 434 | acd.signal('new_episode') # collectors will create new group 435 | robot.reset() 436 | acd.child.reset() 437 | a_curr = np.atleast_2d([acd.child.action]) 438 | 439 | num_step = 0 # k 440 | while True: 441 | 442 | # Apply current action 443 | collided = robot.take_action(a_curr) 444 | 445 | # Observe sensors 446 | s_next = robot.read_sensors() 447 | 448 | # Execute ACD 449 | a_next = acd(s_next, num_step, num_step+1, 1) 450 | 451 | # Iterate 452 | num_step += 1 453 | num_total_iter += 1 454 | if collided: 455 | break 456 | if max_step > 0 and num_step >= max_step: 457 | break 458 | acd.a_curr = a_curr = a_next 459 | 460 | if num_step <= 3: 461 | print "Warning: episode ended prematurely" 462 | 463 | num_episode += 1 464 | if max_episodes > 0 and num_episode >= max_episodes: 465 | break 466 | if max_total_iter > 0 and num_total_iter >= max_total_iter: 467 | break 468 | 469 | return acd 470 | -------------------------------------------------------------------------------- /HDPy/rl.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Reinforcement Learning Problem is approached by means of an 3 | Actor-Critic design. This method splits the agent into a 4 | return-estimator (Critic) and an action-selection mechanism (Actor). 5 | Information about state and reward is provided by the plant to the 6 | agent. As the agent is still viewed as one unit, both of its parts are 7 | embedded in the same class, the :py:class:`ActorCritic`. It does not 8 | itself implement a method to solve the learning problem but only 9 | provides preliminaries for an algorithm doing so. Meaning, that it 10 | defines common members and method interfaces. Furthermore, it binds 11 | the Actor-Critic approach to a :py:class:`PuPy.RobotActor`, such that 12 | any of its descendants can be used within :py:mod:`PuPy`. 13 | 14 | The Actor-Critic implementation is kept general, meaning that it is not 15 | limited to a specific learning problem. For this, the template classes 16 | :py:class:`Plant` and :py:class:`Policy` are defined. Using the former, 17 | a concrete environment can be implemented by specifying state and reward. 18 | The latter class is required to hide the representation of the action 19 | from the :py:class:`ActorCritic`. Due to the integration in 20 | :py:mod:`PuPy`, motor targets (a low-level representation of an 21 | action) have to be generated, but the action representation for the 22 | Reinforcement Learning problem may be more abstract. For example, gait 23 | parameters could be used as action. From these, a motor target sequence 24 | has to be generated to actually steer the robot. 25 | 26 | """ 27 | import PuPy 28 | import numpy as np 29 | import cPickle as pickle 30 | 31 | class Plant(object): 32 | """A template for Actor-Critic *plants*. The *Plant* describes the 33 | interaction of the Actor-Critic with the environment. Given a robot 34 | which follows a certain *Policy*, the environment generates rewards 35 | and robot states. 36 | 37 | An additional instance to :py:class:`PuPy.Normalization` may be 38 | supplied in ``norm`` for normalizing sensor values. 39 | """ 40 | def __init__(self, state_space_dim=None, norm=None): 41 | self._state_space_dim = state_space_dim 42 | self.normalization = None 43 | self.set_normalization(norm) 44 | 45 | def state_input(self, state): 46 | """Return the state-part of the critic input 47 | (i.e. the reservoir input). 48 | 49 | The state-part is derived from the current robot ``state`` and 50 | possibly also its ``action``. As return format, a Nx1 numpy 51 | vector is expected, where 2 dimensions should exist (e.g. 52 | :py:meth:`numpy.atleast_2d`). 53 | 54 | Although the reservoir input will consist of both, the *state* 55 | and *action*, this method must only return the *state* part of 56 | it. 57 | 58 | """ 59 | raise NotImplementedError() 60 | 61 | def reward(self, epoch): 62 | """A reward generated by the *Plant* based on the current 63 | sensor readings in ``epoch``. The reward is single-dimensional. 64 | 65 | The reward is evaluated in every step. It builds the foundation 66 | of the approximated return. 67 | """ 68 | raise NotImplementedError() 69 | 70 | def state_space_dim(self): 71 | """Return the dimension of the state space. 72 | This value is equal to the size of the vector returned by 73 | :py:meth:`state_input`. 74 | """ 75 | if self._state_space_dim is None: 76 | raise NotImplementedError() 77 | return self._state_space_dim 78 | 79 | def set_normalization(self, norm): 80 | """Set the normalization instance to ``norm``.""" 81 | if norm is None: 82 | norm = PuPy.Normalization() 83 | self.normalization = norm 84 | 85 | def reset(self): 86 | """Reset plant to initial state.""" 87 | pass 88 | 89 | class Policy(PuPy.RobotActor): 90 | """A template for Actor-Critic *policies*. The *Policy* defines how 91 | an action is translated into a control (motor) signal. It 92 | continously receives action updates from the *Critic* which it has 93 | to digest. 94 | 95 | An additional instance to :py:class:`PuPy.Normalization` may be 96 | supplied in ``norm`` for normalizing sensor values. 97 | """ 98 | def __init__(self, action_space_dim=None, norm=None): 99 | super(Policy, self).__init__() 100 | self._action_space_dim = action_space_dim 101 | self.normalization = None 102 | self.set_normalization(norm) 103 | 104 | def initial_action(self): 105 | """Return the initial action. A valid action must be returned 106 | since the :py:class:`ActorCritic` relies on the format. 107 | 108 | The action has to be a 2-dimensional numpy vector, with both 109 | dimensions available. 110 | """ 111 | raise NotImplementedError() 112 | 113 | def update(self, action_upd): 114 | """Update the *Policy* according to the current action update 115 | ``action_upd``, which was in turn computed by the 116 | :py:class:`ActorCritic`. 117 | """ 118 | raise NotImplementedError() 119 | 120 | def get_iterator(self, time_start_ms, time_end_ms, step_size_ms): 121 | """Return an iterator for the *motor_target* sequence, according 122 | to the current action configuration. 123 | 124 | The *motor_targets* glue the *Policy* and *Plant* together. 125 | Since they are applied in the robot and effect the sensor 126 | readouts, they are an "input" to the environment. As the targets 127 | are generated as effect of the action update, they are an output 128 | of the policy. 129 | 130 | """ 131 | raise NotImplementedError() 132 | 133 | def action_space_dim(self): 134 | """Return the dimension of the action space. 135 | This value is equal to the size of the vector returned by 136 | :py:meth:`initial_action`. 137 | """ 138 | if self._action_space_dim is None: 139 | raise NotImplementedError() 140 | return self._action_space_dim 141 | 142 | def reset(self): 143 | """Undo any policy updates.""" 144 | raise NotImplementedError() 145 | 146 | def set_normalization(self, norm): 147 | """Set the normalization instance to ``norm``.""" 148 | if norm is None: 149 | norm = PuPy.Normalization() 150 | self.normalization = norm 151 | 152 | 153 | def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms): 154 | if epoch.has_key('a_next'): 155 | self.update(np.atleast_2d(epoch['a_next']).T) 156 | return self.get_iterator(time_start_ms, time_end_ms, step_size_ms) 157 | 158 | def _get_initial_targets(self, time_start_ms, time_end_ms, step_size_ms): 159 | return self.__call__({}, time_start_ms, time_end_ms, step_size_ms) 160 | 161 | 162 | class _ConstParam(object): 163 | """Stub for wrapping constant values into an executable function.""" 164 | def __init__(self, value): 165 | self._value = value 166 | def __call__(self, time0=None, time1=None): 167 | """Return the constant value.""" 168 | return self._value 169 | 170 | class Momentum(object): 171 | """Template class for an action momentum. 172 | 173 | With a momentum, the next action is computed from the lastest one 174 | and the proposed action :math:`a^*`. The momentum controls how much 175 | each of the two influences the next action. Generally, a momentum 176 | of zero implies following strictly the proposal, while a momentum 177 | of one does the opposite. Usually, the (linear) momentum is 178 | formulated as 179 | 180 | .. math:: 181 | a_{t+1} = m a_t + (1-m) a^* 182 | 183 | The momentum may be time dependent with 184 | - time0: Episode counter 185 | - time1: Episode's step counter 186 | 187 | """ 188 | def __call__(self, a_curr, a_prop, time0=None, time1=None): 189 | """Return the next action from a current action ``a_curr``, 190 | a proposal ``a_prop`` at episode ``time0`` in step ``time1``.""" 191 | raise NotImplementedError() 192 | 193 | class ConstMomentum(Momentum): 194 | """Linear momentum equation, as specified in :py:class:`Momentum` 195 | with time-constant momentum value (m). 196 | 197 | ``value`` 198 | Momentum value, [0,1]. 199 | 200 | """ 201 | def __init__(self, value): 202 | super(ConstMomentum, self).__init__() 203 | self._value = value 204 | assert 0 <= self._value and self._value <= 1 205 | 206 | def __call__(self, a_curr, a_prop, time0=None, time1=None): 207 | """Return the next action from a current action ``a_curr``, 208 | a proposal ``a_prop`` at episode ``time0`` in step ``time1``.""" 209 | return self._value * a_curr + (1.0 - self._value) * a_prop 210 | 211 | class RadialMomentum(Momentum): 212 | """Momentum with respect to angular action. The resulting action 213 | is the (smaller) intermediate angle of the latest action 214 | and proposal (with respect to the momentum). The actions are 215 | supposed to be in radians, hence the output is in the range 216 | :math:`[0,2\pi]`. The momentum is a time-constant value (m). 217 | 218 | ``value`` 219 | Momentum value, [0,1]. 220 | 221 | """ 222 | def __init__(self, value): 223 | super(RadialMomentum, self).__init__() 224 | self._value = value 225 | assert 0 <= self._value and self._value <= 1 226 | 227 | def __call__(self, a_curr, a_prop, time0=None, time1=None): 228 | """Return the next action from a current action ``a_curr``, 229 | a proposal ``a_prop`` at episode ``time0`` in step ``time1``.""" 230 | phi_0 = a_curr % (2*np.pi) 231 | phi_1 = a_prop % (2*np.pi) 232 | imag_0 = np.exp(phi_0*1j) 233 | imag_1 = np.exp(phi_1*1j) 234 | imag_r = self._value * imag_0 + (1.0 - self._value) * imag_1 235 | return np.angle(imag_r) % (2*np.pi) 236 | 237 | class ActorCritic(PuPy.RobotActor): 238 | """Actor-critic design. 239 | 240 | The Actor-Critic estimates the return function 241 | 242 | .. math:: 243 | J_t = \sum\limits_{k=t}^{T} \gamma^k r_{t+k+1} 244 | 245 | while the return is optimized at the same time. This is done by 246 | incrementally updating the estimate for :math:`J_t` and choosing 247 | the next action by optimizing the return in a single step. See 248 | [ESN-ACD]_ for details. 249 | 250 | ``plant`` 251 | An instance of :py:class:`Plant`. The plant defines the 252 | interaction with the environment. 253 | 254 | ``child`` 255 | An instance of :py:class:`RobotActor` which should be a 256 | :py:class:`Policy` or have one as child. The policy defines the 257 | interaction with the robot's actuators. 258 | 259 | ``gamma`` 260 | Choice of *gamma* in the return function. May be a constant or 261 | a function of the time (relative to the episode start). 262 | 263 | ``alpha`` 264 | Choice of *alpha* in the action update. May be a constant or a 265 | function of the time (relative to the episode start). 266 | 267 | The corresponding formula is 268 | 269 | .. math:: 270 | a_{t+1} = a_{t} + \\alpha \\frac{\partial J_t}{\partial a_t} 271 | 272 | See [ESN-ACD]_ for details. 273 | 274 | ``norm`` 275 | A :py:class:`PuPy.Normalization` for normalization purposes. 276 | Note that the parameters for *a_curr* and *a_next* should be 277 | exchangable, since it's really the same kind of 'sensor'. 278 | 279 | """ 280 | def __init__(self, plant, policy, gamma=1.0, alpha=1.0, init_steps=1, norm=None, momentum=0.0): 281 | super(ActorCritic, self).__init__(child=policy) 282 | 283 | # Initial members 284 | self.plant = plant 285 | self.normalizer = None 286 | self.num_episode = 0 287 | self._init_steps = init_steps 288 | self.a_curr = None 289 | self._motor_action_dim = None 290 | self.s_curr = dict() 291 | self.alpha = None 292 | self.momentum = None 293 | self.gamma = None 294 | self.num_step = 0 295 | 296 | # Init members through dedicated routines 297 | self.set_normalization(norm) 298 | self.set_alpha(alpha) 299 | self.set_gamma(gamma) 300 | self.set_momentum(momentum) 301 | 302 | # Check assumptions 303 | assert self.child.initial_action().shape[0] >= 1 304 | assert self.child.initial_action().shape[1] == 1 305 | 306 | # Start a new episode 307 | self.new_episode() 308 | 309 | def new_episode(self): 310 | """Start a new episode of the same experiment. This method can 311 | also be used to initialize the *ActorCritic*, for example when 312 | it is loaded from a file. 313 | """ 314 | self.num_episode += 1 315 | self.a_curr = self.child.initial_action() 316 | self._motor_action_dim = self.child.action_space_dim() 317 | self.s_curr = dict() 318 | self.num_step = 0 319 | 320 | def init_episode(self, epoch, time_start_ms, time_end_ms, step_size_ms): 321 | """Define the behaviour during the initial phase, i.e. as long 322 | as 323 | 324 | num_step <= init_steps 325 | 326 | with ``num_step`` the episode's step iterator and ``init_steps`` 327 | given at construction (default 1). The default is to store the 328 | ``epoch`` but do nothing else. 329 | 330 | .. note:: 331 | The step iterator ``num_step`` is incremented before this 332 | method is called. 333 | 334 | """ 335 | self.s_curr = epoch 336 | self._pre_increment_hook(epoch) 337 | return self.child(epoch, time_start_ms, time_end_ms, step_size_ms) 338 | 339 | def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms): 340 | """One round in the actor-critic cycle. The current observations 341 | are given in ``epoch`` and the timing information in the rest of 342 | the parameters. For a detailed description of the parameters, 343 | see :py:class:`PuPy.PuppyActor`. 344 | 345 | This routine computes the reward from the *epoch* and manages 346 | consecutive epochs, then lets :py:meth:`_step` compute the next 347 | action. 348 | 349 | """ 350 | if self.num_step <= self._init_steps: 351 | self.num_step += 1 352 | return self.init_episode(epoch, time_start_ms, time_end_ms, step_size_ms) 353 | 354 | # extern through the robot: 355 | # take action (a_curr = a_next in the previous run) 356 | # observe sensors values produced by the action (a_curr = previous a_next) 357 | 358 | # Generate reinforcement signal U(k), given in(k) 359 | reward = self.plant.reward(epoch) 360 | #reward = self.plant.reward(self.s_curr) 361 | # It's not clear, which reward should be the input to the critic: 362 | # While the ACD papers imply the reward of time step n, the book 363 | # by Sutton/Barto indicate the reward as being from the next 364 | # state, n+1. Experiments indicate that it doesn't really matter. 365 | # To be consistent with other work, I go with time n. 366 | # Nico: I changed it to be epoch. This is just a notation thing with 367 | # the n and n+1, but it should be the reward of the newest state. 368 | 369 | # do the actual work 370 | epoch = self._step(self.s_curr, epoch, self.a_curr, reward) 371 | 372 | # increment 373 | self.a_curr = np.atleast_2d(epoch['a_next']).T 374 | self.s_curr = epoch 375 | self.num_step += 1 376 | 377 | # return next action 378 | return self.child(epoch, time_start_ms, time_end_ms, step_size_ms) 379 | 380 | def _step(self, s_curr, s_next, a_curr, reward): 381 | """Execute one step of the actor and return the next action. 382 | 383 | When overloading this method, it must be ensured that 384 | :py:meth:`_next_action_hook` is executed as soon as the next 385 | action is determined and also :py:meth:`_pre_increment_hook` 386 | should be called before the method returns (passing relevant 387 | intermediate results). 388 | 389 | ``s_curr`` 390 | Previous observed state. :py:keyword:`dict`, same as ``epoch`` 391 | of the :py:meth:`__call__`. 392 | 393 | ``s_next`` 394 | Latest observed state. :py:keyword:`dict`, same as ``epoch`` 395 | of the :py:meth:`__call__`. 396 | 397 | ``a_curr`` 398 | Previously executed action. This is the action which lead 399 | from ``s_curr`` into ``s_next``. Type specified through 400 | the :py:class:`Policy`. 401 | 402 | ``reward`` 403 | Reward of ``s_next`` 404 | 405 | """ 406 | raise NotImplementedError() 407 | 408 | def _pre_increment_hook(self, epoch, **kwargs): 409 | """Template method for subclasses. 410 | 411 | Before the actor-critic cycle increments, this method is invoked 412 | with all relevant locals of the :py:meth:`ADHDP.__call__` 413 | method. 414 | """ 415 | pass 416 | 417 | def _next_action_hook(self, a_next): 418 | """Postprocessing hook, after the next action ``a_next`` was 419 | proposed by the algorithm. Must return the possibly altered 420 | next action in the same format.""" 421 | return a_next 422 | 423 | def save(self, pth): 424 | """Store the current instance in a file at ``pth``. 425 | 426 | .. note:: 427 | If ``alpha`` or ``gamma`` was set to a user-defined 428 | function, make sure it's pickable. Especially, anonymous 429 | functions (:keyword:`lambda`) can't be pickled. 430 | 431 | """ 432 | child = self.child 433 | self.child = None 434 | 435 | f = open(pth, 'w') 436 | pickle.dump(self, f) 437 | f.close() 438 | 439 | self.child = child 440 | 441 | @staticmethod 442 | def load(pth): 443 | """Load an instance from a file ``pth``. 444 | """ 445 | f = open(pth, 'r') 446 | cls = pickle.load(f) 447 | cls.new_episode() 448 | return cls 449 | 450 | def set_alpha(self, alpha): 451 | """Define a value for ``alpha``. May be either a constant or 452 | a function of the time. 453 | """ 454 | if callable(alpha): 455 | self.alpha = alpha 456 | else: 457 | self.alpha = _ConstParam(alpha) 458 | 459 | def set_gamma(self, gamma): 460 | """Define a value for ``gamma``. May be either a constant or 461 | a function of the time. 462 | """ 463 | if callable(gamma): 464 | self.gamma = gamma 465 | else: 466 | self.gamma = _ConstParam(gamma) 467 | 468 | def set_momentum(self, momentum): 469 | """Define a value for ``momentum``. May be either a constant or 470 | a function of the time. 471 | """ 472 | if callable(momentum): 473 | self.momentum = momentum 474 | else: 475 | self.momentum = ConstMomentum(momentum) 476 | 477 | def set_normalization(self, norm): 478 | """Set the normalization instance to ``norm``. The normalization 479 | is propagated to the plant and policy.""" 480 | if norm is None: 481 | norm = PuPy.Normalization() 482 | self.normalizer = norm 483 | self.plant.set_normalization(norm) 484 | self.child.set_normalization(norm) # for the policy. 485 | 486 | -------------------------------------------------------------------------------- /HDPy/puppy/puppy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Puppy experiments are executed within the [Webots]_ simulator. Since 3 | this module is linked to :py:mod:`PuPy` through the class 4 | :py:class:`ActorCritic`, this is the native approach. For the purpose of 5 | Puppy, an adapted Actor-Critic is implemented in :py:class:`PuppyHDP`, 6 | handling Puppy specifics. It inherits from :py:class:`ADHDP`, 7 | hence can be used in the same fashion. 8 | 9 | Simulation with [Webots]_ is often time consuming. Therefore, a method 10 | is provided to collect data in the simulation and replay it later. This 11 | is implemented through :py:class:`OfflineCollector` and 12 | :py:func:`puppy.offline_playback`. An example of how to approach this 13 | is documented in :ref:`puppy_offline`. 14 | 15 | """ 16 | from ..hdp import ADHDP 17 | from ..rl import Plant 18 | import numpy as np 19 | import warnings 20 | import h5py 21 | import HDPy 22 | 23 | SENSOR_NAMES = ['trg0', 'trg1', 'trg2', 'trg3', 'accelerometer_x', 'accelerometer_y', 'accelerometer_z', 'compass_x', 'compass_y', 'compass_z', 'gyro_x', 'gyro_y', 'gyro_z', 'hip0', 'hip1', 'hip2', 'hip3', 'knee0', 'knee1', 'knee2', 'knee3', 'puppyGPS_x', 'puppyGPS_y', 'puppyGPS_z', 'touch0', 'touch0', 'touch1', 'touch2', 'touch3'] 24 | 25 | class PuppyHDP(ADHDP): 26 | """ADHDP subtype for simulations using Puppy in webots. 27 | 28 | This class adds some code considering restarts of Puppy. It adds 29 | an optional argument ``tumbled_reward``. The reward will be forced 30 | to this value after the supervisor detected tumbling. If 31 | :py:const:`None` (the default) is used, the reward remains 32 | unchanged. 33 | 34 | """ 35 | def __init__(self, *args, **kwargs): 36 | self._tumbled_reward = kwargs.pop('tumbled_reward', None) 37 | self.has_tumbled = False 38 | self.supervisor_tumbled_notice = 0 39 | super(PuppyHDP, self).__init__(*args, **kwargs) 40 | 41 | def _signal(self, msg, **kwargs): 42 | """Handle messages from the supervisor. Messages are expected 43 | when the robot has tumbled and thus the robot has to be reset. 44 | """ 45 | super(PuppyHDP, self)._signal(msg, **kwargs) 46 | # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled' 47 | # for msg==reset, the robot is reset immediately 48 | # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot 49 | if msg == 'tumbled': 50 | #print "Tumbling received", self.num_step 51 | self.supervisor_tumbled_notice = 1 52 | 53 | if msg == 'reset': 54 | #print "Reset received", self.num_step 55 | self.child.reset() 56 | self.new_episode() 57 | 58 | # DEPRICATED: use of event_handler replaced by RobotActor's signal chain. 59 | #def event_handler(self, robot, epoch, current_time, msg): 60 | # """Handle messages from the supervisor. Messages are expected 61 | # when the robot has tumbled and thus the robot has to be reset. 62 | # """ 63 | # # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled' 64 | # # for msg==reset, the robot is reset immediately 65 | # # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot 66 | # if msg == 'tumbled_grace_start': 67 | # #print "Tumbling received", self.num_step 68 | # self.supervisor_tumbled_notice = 1 69 | # 70 | # if msg == 'reset': 71 | # #print "Reset received", self.num_step 72 | # self.child.reset() 73 | # self.new_episode() 74 | # self.signal('new_episode') 75 | 76 | def new_episode(self): 77 | """After restarting, reset the tumbled values and start the 78 | new episode. 79 | """ 80 | super(PuppyHDP, self).new_episode() 81 | self.has_tumbled = False 82 | self.supervisor_tumbled_notice = 0 83 | 84 | def _step(self, s_curr, epoch, a_curr, reward): 85 | """Ensure the tumbled reward and initiate behaviour between 86 | restarts. The step of the parent is then invoked. 87 | """ 88 | if self.has_tumbled: 89 | epoch['a_next'] = np.zeros(shape=a_curr.shape[::-1]) 90 | return epoch 91 | 92 | if self.supervisor_tumbled_notice > 0: 93 | if self.supervisor_tumbled_notice > 1: 94 | if self._tumbled_reward is not None: 95 | reward = np.atleast_2d([self._tumbled_reward]) 96 | 97 | #reward /= (1.0 - self.gamma(self.num_episode, self.num_step)) 98 | # geometric series to incorporate future rewards 99 | # note that with this, its err = r/(1-gamma) - J * (1-gamma) 100 | # but should be err = r/(1-gamma) - J 101 | # thus, there's an difference of J*gamma 102 | # is solved this by temporarily set gamma = 0.0 103 | self.has_tumbled = True 104 | #old_gamma = self.gamma 105 | #self.set_gamma(0.0) 106 | self.supervisor_tumbled_notice += 1 107 | 108 | #reward += np.random.normal(scale=0.001) 109 | epoch = super(PuppyHDP, self)._step(s_curr, epoch, a_curr, reward) 110 | 111 | #if self.supervisor_tumbled_notice > 2: 112 | # self.gamma = old_gamma 113 | 114 | #print self.num_step, reward, a_curr.T, a_next.T, epoch['puppyGPS_x'][-1] 115 | return epoch 116 | 117 | def init_episode(self, epoch, time_start_ms, time_end_ms, step_size_ms): 118 | """Initial behaviour (after reset) 119 | 120 | .. note:: 121 | Assuming identical initial trajectories, the initial state 122 | is the same - and thus doesn't matter. 123 | Non-identical initial trajectories will result in 124 | non-identical behaviour, therefore the initial state should 125 | be different (initial state w.r.t. start of learning). 126 | Due to this, the critic is already updated in the initial 127 | trajectory. 128 | """ 129 | if self.num_step > 2: 130 | # in_state = self.plant.state_input(self.s_curr) 131 | # a_curr_nrm = self.normalizer.normalize_value('a_curr', self.a_curr) 132 | # i_curr = np.vstack((in_state, a_curr_nrm)).T 133 | # x_curr = self.reservoir(i_curr, simulate=False) 134 | # x_curr = np.hstack((x_curr, i_curr)) # FIXME: Input/Output ESN Model 135 | i_curr, x_curr, _ = self._critic_eval(self.s_curr, self.a_curr, False, 'a_curr') 136 | epoch['x_curr'] = x_curr 137 | epoch['i_curr'] = i_curr 138 | epoch['a_next'] = self.a_curr.T 139 | epoch['a_curr'] = self.a_curr.T 140 | 141 | self.s_curr = epoch 142 | return self.child(epoch, time_start_ms, time_end_ms, step_size_ms) 143 | 144 | class OfflineCollector(ADHDP): 145 | """Collect sensor data for Puppy in webots, such that it can be 146 | reused later to train a critic offline. 147 | 148 | Note that in contrast to :py:class:`ADHDP`, some 149 | structures are not required (reservoir, plant). They will be set 150 | to stubs, hence don't need to be passed. 151 | 152 | Some extra metadata is stored in the datafile, which allows 153 | processing of the experiment in an offline fashion through the 154 | function :py:func:`puppy.offline_playback`. 155 | 156 | """ 157 | def __init__(self, *args, **kwargs): 158 | # look for policy's member 'action_space_dim' (policy is hidden in child or sub-child) 159 | policy = kwargs['policy'] 160 | if hasattr(policy, 'action_space_dim'): 161 | action_space_dim = policy.action_space_dim 162 | else: 163 | from ..hdp import return_none 164 | action_space_dim = return_none 165 | 166 | class Phony: 167 | """Stub for a reservoir.""" 168 | reset_states = False 169 | def get_input_dim(self): 170 | """Return input dimension (action space dim.)""" 171 | return action_space_dim() 172 | def reset(self): 173 | """Reset to the initial state (no effect)""" 174 | pass 175 | 176 | kwargs['plant'] = Plant(state_space_dim=0) 177 | kwargs['reservoir'] = Phony() 178 | kwargs['readout'] = None 179 | self.supervisor_tumbled_notice = 0 180 | super(OfflineCollector, self).__init__(*args, **kwargs) 181 | 182 | def new_episode(self): 183 | """After restarting, reset the tumbled values and start the 184 | new episode. 185 | """ 186 | super(OfflineCollector, self).new_episode() 187 | self.supervisor_tumbled_notice = 0 188 | 189 | def __call__(self, epoch, time_start_ms, time_end_ms, step_size_ms): 190 | """Store the sensor measurements of an epoch in the datafile 191 | as well as relevant metadata. The robot detects if the 192 | simulation was reverted and if it has tumbled (through the 193 | supervisor message). Other guards are not considered, as none 194 | are covered by :py:class:`PuppyHDP`. 195 | 196 | """ 197 | #print "(call)", time_start_ms, self.a_curr.T, ('puppyGPS_x' in epoch and epoch['puppyGPS_x'][-1] or 'NOX') 198 | if len(epoch) == 0: 199 | # TODO: epoch length will never be 0 I think(?) Use _get_initial_target() for this purpose. 200 | # the very first initial epoch of the first episode 201 | # this case occurs when the simulation starts or after it is reverted 202 | self.num_step += 1 203 | #self._pre_increment_hook(dict(), empty_initial_step=np.array([1])) 204 | self._pre_increment_hook(dict(), init_step=np.array([self.num_step])) 205 | return self.child.get_iterator(time_start_ms, time_end_ms, step_size_ms) 206 | 207 | # Determine next action 208 | if self.num_step <= self._init_steps: 209 | # Init 210 | a_next = self.a_curr 211 | elif self.supervisor_tumbled_notice > 2: 212 | # Tumbled, prepare for reset 213 | a_next = np.zeros(shape=self.a_curr.shape) 214 | self.supervisor_tumbled_notice += 1 215 | elif self.supervisor_tumbled_notice > 0: 216 | # Tumbled, still walking 217 | a_next = self._next_action_hook(self.a_curr) 218 | self.supervisor_tumbled_notice += 1 219 | else: 220 | # Normal walking 221 | a_next = self._next_action_hook(self.a_curr) 222 | 223 | # if self.num_step <= self._init_steps: 224 | # print "(init)", a_next.T 225 | # elif self.supervisor_tumbled_notice > 2: 226 | # print time_start_ms, self.a_curr.T, self.num_step 227 | # else: 228 | # print time_start_ms, self.a_curr.T, epoch['puppyGPS_x'][-1] 229 | 230 | epoch['a_curr'] = self.a_curr.T 231 | epoch['a_next'] = a_next.T 232 | 233 | self.a_curr = a_next 234 | self.num_step += 1 235 | 236 | #print "(call-end)", self.num_step, a_next.T, a_next.shape, self.a_curr.shape 237 | return self.child(epoch, time_start_ms, time_end_ms, step_size_ms) 238 | 239 | def _signal(self, msg, **kwargs): 240 | """Handle messages from the supervisor. Messages are expected 241 | when the robot has tumbled and thus the robot has to be reset. 242 | """ 243 | super(OfflineCollector, self)._signal(msg, **kwargs) 244 | # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled' 245 | # for msg==reset, the robot is reset immediately 246 | # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot 247 | if msg == 'tumbled_grace_start': 248 | #print "Tumbling received", self.num_step 249 | self.supervisor_tumbled_notice = 1 250 | self._pre_increment_hook(dict(), tumbled=np.array([self.num_step])) 251 | 252 | if msg == 'reset': 253 | #print "Reset received", self.num_step 254 | self.child.reset() 255 | self.new_episode() 256 | 257 | # DEPRICATED: use of event_handler replaced by RobotActor's signal chain. 258 | #def event_handler(self, robot, epoch, current_time, msg): 259 | # """Handle messages from the supervisor. Messages are expected 260 | # when the robot has tumbled and thus the robot has to be reset. 261 | # """ 262 | # # msg is 'reset', 'out_of_arena', 'tumbled_grace_start' or 'tumbled' 263 | # # for msg==reset, the robot is reset immediately 264 | # # msg==tumbled_grace_start marks the start of the grace period of the tumbled robot 265 | # if msg == 'tumbled_grace_start': 266 | # #print "Tumbling received", self.num_step 267 | # self.supervisor_tumbled_notice = 1 268 | # self._pre_increment_hook(dict(), tumbled=np.array([self.num_step])) 269 | # 270 | # if msg == 'reset': 271 | # #print "Reset received", self.num_step 272 | # self.child.reset() 273 | # self.new_episode() 274 | # self.signal('new_episode') 275 | 276 | def _next_action_hook(self, a_next): 277 | """Defines the action sampling policy of the offline data 278 | gathering. Note that this policy is very relevant to later 279 | experiments, hence this methods should be overloaded (although 280 | a default policy is provided). 281 | """ 282 | warnings.warn('Default sampling policy is used.') 283 | a_next = np.zeros(self.a_curr.shape) 284 | # Prohibit too small or large amplitudes 285 | while (a_next < 0.2).any() or (a_next > 2.0).any() or ((a_next > 1.0).any() and a_next.ptp() > 0.4): 286 | a_next = self.a_curr + np.random.normal(0.0, 0.15, size=self.a_curr.shape) 287 | 288 | return a_next 289 | 290 | def offline_playback(pth_data, critic, samples_per_action, ms_per_step, episode_start=None, episode_end=None, min_episode_len=0, err_coefficient=0.01, episode_start_test=None): 291 | """Simulate an experiment run for the critic by using offline data. 292 | The data has to be collected in webots, using the respective 293 | robot and supervisor. Note that the behaviour of the simulation 294 | should match what's expected by the critic. The critic is fed the 295 | sensor data, in order. Of course, it can't react to it since 296 | the next action is predefined. 297 | 298 | Additional to the sensor fields, the 'tumbling' dataset is expected 299 | which indicates, if and when the robot has tumbled. It is used such 300 | that the respective signals can be sent to the critic. 301 | 302 | The critic won't store any sensory data again. 303 | 304 | ``pth_data`` 305 | Path to the datafile with the sensory information (HDF5). 306 | 307 | ``critic`` 308 | PuppyHDP instance. 309 | 310 | ``samples_per_action`` 311 | Number of samples per control step. Must correspond to the data. 312 | 313 | ``ms_per_step`` 314 | Sensor sampling period. 315 | 316 | ``episode_start`` 317 | Defines a lower limit on the episode number. Passed as int, 318 | is with respect to the episode index, not its identifier. 319 | 320 | ``episode_stop`` 321 | Defines an upper limit on the episode number. Passed as int, 322 | is with respect to the episode index, not its identifier. 323 | 324 | ``min_episode_len`` 325 | Only pick episodes longer than this threshold. 326 | 327 | ``err_coefficient`` 328 | coefficient for the TD-error exponential moving average (EMA) 329 | 330 | ``episode_start_test`` 331 | starting point for the test, i.e. when we start accounting the TD-error. 332 | 333 | :returns: accumulated TD-error average 334 | 335 | """ 336 | # Open data file, get valid experiments 337 | f = h5py.File(pth_data,'r') 338 | storages = map(str, sorted(map(int, f.keys()))) 339 | storages = filter(lambda s: len(f[s]) > 0, storages) 340 | if min_episode_len > 0: 341 | storages = filter(lambda s: f[s]['a_curr'].shape[0] > min_episode_len, storages) 342 | 343 | if episode_end is not None: 344 | storages = storages[:episode_end] 345 | 346 | if episode_start is not None: 347 | storages = storages[episode_start:] 348 | 349 | assert len(storages) > 0 350 | 351 | if episode_start_test is None: 352 | episode_start_test = len(storages)/2 - 1; #use last half for testing 353 | 354 | # Prepare critic; redirect hooks to avoid storing epoch data twice 355 | # and feed the actions 356 | global accError 357 | accError = 0 # accumulated error 358 | next_action = None 359 | episode = None 360 | critic._pre_increment_hook_orig = critic._pre_increment_hook 361 | critic._next_action_hook_orig = critic._next_action_hook 362 | 363 | def pre_increment_hook(epoch, **kwargs): 364 | kwargs['offline_episode'] = np.array([episode]) 365 | critic._pre_increment_hook_orig(dict(), **kwargs) 366 | if int(episode) > episode_start_test and kwargs.has_key('err'): 367 | global accError 368 | accError = accError*(1-err_coefficient) + (kwargs['err'][0][0]**2)*err_coefficient # accumulated squared error 369 | #accError = accError*(1-err_coefficient) + np.abs(kwargs['err'][0][0])*err_coefficient # accumulated absolute error 370 | 371 | def next_action_hook(a_next): 372 | #print "(next)", a_next.T, next_action.T 373 | return next_action 374 | 375 | critic._next_action_hook = next_action_hook 376 | critic._pre_increment_hook = pre_increment_hook 377 | 378 | # Main loop, feed data to the critic 379 | time_step_ms = ms_per_step * samples_per_action 380 | time_start_ms = 0 381 | for episode_idx, episode in enumerate(storages): 382 | print episode_idx 383 | 384 | data_grp = f[episode] 385 | N = len(data_grp['trg0']) 386 | 387 | # get the stored ratio 388 | #db_samples_per_action = N / len(data_grp['a_next']) 389 | #assert N % db_samples_per_action == 0 390 | assert N % samples_per_action == 0 391 | 392 | # get tumbled infos 393 | if 'tumble' in data_grp: 394 | from pylab import find 395 | time_tumbled = find(data_grp['tumble'])[0] / samples_per_action * samples_per_action 396 | #time_tumbled = data_grp['tumbled'][0] * db_samples_per_action 397 | #time_tumbled = data_grp['tumble'][0] * samples_per_action 398 | else: 399 | time_tumbled = -1 400 | 401 | # initial, empty call 402 | if 'init_step' in data_grp: 403 | print "Simulation was started/reverted" 404 | time_start_ms = 0 405 | critic(dict(), time_start_ms, time_start_ms + samples_per_action, ms_per_step) 406 | time_tumbled -= samples_per_action 407 | 408 | # initial action 409 | critic.a_curr = np.atleast_2d(data_grp['a_curr'][0]).T 410 | 411 | # loop through data, incrementally feed the critic 412 | for num_iter in np.arange(0, N, samples_per_action): 413 | # next action 414 | next_action = np.atleast_2d(data_grp['a_next'][num_iter/db_samples_per_action]).T 415 | 416 | # get data 417 | time_start_ms += time_step_ms 418 | time_end_ms = time_start_ms + time_step_ms 419 | chunk = dict([(k, data_grp[k][num_iter:(num_iter+samples_per_action)]) for k in SENSOR_NAMES]) 420 | 421 | # send tumbled message 422 | if num_iter == time_tumbled: 423 | #critic.event_handler(None, dict(), time_tumbled, 'tumbled_grace_start') 424 | critic.signal('tumbled_grace_start') 425 | 426 | # update critic 427 | critic(chunk, time_start_ms, time_end_ms, time_step_ms) 428 | 429 | # send reset after episode has finished 430 | if episode_idx < len(storages) - 1: 431 | #critic.event_handler(None, dict(), ms_per_step * N, 'reset') 432 | critic.signal('reset') 433 | critic.signal('new_episode') # collectors will create new group 434 | 435 | # cleanup 436 | critic._pre_increment_hook = critic._pre_increment_hook_orig 437 | critic._next_action_hook = critic._next_action_hook_orig 438 | del critic._pre_increment_hook_orig 439 | del critic._next_action_hook_orig 440 | 441 | return accError 442 | 443 | 444 | ## DEPRECATED ## 445 | 446 | def puppy_offline_playback(*args, **kwargs): 447 | """Alias of offline_playback. 448 | 449 | .. deprecated:: 1.0 450 | Use :py:func:`offline_playback` instead 451 | 452 | """ 453 | warnings.warn("This function is deprecated. Use 'offline_playback' instead") 454 | return offline_playback(*args, **kwargs) 455 | -------------------------------------------------------------------------------- /HDPy/puppy/analysis_puppy.py: -------------------------------------------------------------------------------- 1 | """ 2 | For puppy experiment analysis, snapshot functions are implemented in a 3 | similar fashion as for the :ref:`ePuck robot `. However, for 4 | Puppy, the action is assumed to be two dimensional. The action snapshot 5 | is hence an image (2d plot). Through :py:func:`puppy_plot_action`, the 6 | figure is plotted at a specific state (identified by the epoch index 7 | of a recorded episode). Furthermore, the overall trajectory and the 8 | location of the inspected states can be plotted through 9 | :py:func:`puppy_plot_inspected_trajectory`. 10 | This method can either be used at some isolated states (with the 11 | mentioned methods) or in a video-like fashion. For the latter case, 12 | :py:class:`PuppyActionVideo` implements the necessary routines. 13 | 14 | The environment plotting can be managed through the functions 15 | :py:func:`puppy_plot_linetarget`, :py:func:`puppy_plot_locationtarget` 16 | and :py:func:`puppy_plot_landmarks`, dependent on the training target 17 | (as defined in :ref:`plants_puppy`). For plotting the robot's trajectory 18 | the functions :py:func:`puppy_plot_trajectory` and 19 | :py:func:`puppy_plot_all_trajectories` can be used. 20 | 21 | """ 22 | import pylab 23 | import numpy as np 24 | import itertools 25 | import warnings 26 | from puppy import SENSOR_NAMES 27 | 28 | def plot_trajectory(analysis, axis, episode, step_width=1, offset=0, legend=True, **kwargs): 29 | """Plot the trajectory of an episode 30 | """ 31 | gps_x = analysis[episode]['puppyGPS_x'][offset+step_width-1::step_width] 32 | gps_y = analysis[episode]['puppyGPS_y'][offset+step_width-1::step_width] 33 | if step_width > 1: 34 | gps_x = np.concatenate(([analysis[episode]['puppyGPS_x'][offset]], gps_x)) 35 | gps_y = np.concatenate(([analysis[episode]['puppyGPS_y'][offset]], gps_y)) 36 | 37 | col = kwargs.pop('color', 'k') 38 | label = kwargs.pop('label', 'Trajectory') 39 | axis.plot(gps_x, gps_y, color=col, label=label, linewidth=3, **kwargs) 40 | axis.axis('equal') 41 | if legend: 42 | axis.plot(gps_x[0], gps_y[0], 'ks', label='Start') 43 | axis.plot(gps_x[-1], gps_y[-1], 'kv', label='End') 44 | 45 | return axis 46 | 47 | def plot_all_trajectories(analysis, axis, step_width=1, **kwargs): 48 | """Plot all trajectories in ``analysis`` into ``axis``. 49 | """ 50 | gps_x = analysis.get_data('puppyGPS_x') 51 | gps_y = analysis.get_data('puppyGPS_y') 52 | 53 | N = len(gps_x)-1 54 | kwargs.pop('color', None) # remove color argument 55 | for idx, (x, y) in enumerate(zip(gps_x, gps_y)): 56 | col = 0.75 - (0.75 * (idx - 1))/N 57 | 58 | x_plot = np.concatenate(([x[0]], x[step_width-1::step_width])) 59 | y_plot = np.concatenate(([y[0]], y[step_width-1::step_width])) 60 | 61 | axis.plot(x_plot, y_plot, color=str(col), **kwargs) 62 | 63 | return axis 64 | 65 | def plot_linetarget(axis, origin=(2.0, 0.0), direction=(1.0, 1.0), range_=(-5.0, 5.0)): 66 | """Plot a line given by ``origin`` and ``direction``. The ``range_`` 67 | may be supplid, which corresponds to the length of the line (from 68 | the origin). 69 | """ 70 | origin = np.array(origin) 71 | dir_ = np.array(direction) 72 | dir_ /= np.linalg.norm(dir_) 73 | line = [origin + t * dir_ for t in range_] 74 | line_x, line_y = zip(*line) 75 | axis.plot(line_x, line_y, 'k', label='Target') 76 | return axis 77 | 78 | def plot_locationtarget(axis, target=(4.0, 4.0), distance=0.5, **kwargs): 79 | """Plot the ``target`` location with a sphere of radius ``distance`` 80 | into ``axis`` to mark the target location. ``kwargs`` will be passed 81 | to all :py:mod:`pylab` calls.""" 82 | linewidth = kwargs.pop('linewidth', 2) 83 | color = kwargs.pop('facecolor', 'k') 84 | fill = kwargs.pop('fill', False) 85 | lbl = kwargs.pop('label', 'Target') 86 | axis.plot([target[0]], [target[1]], 'kD', label=lbl, **kwargs) 87 | if distance > 0.0: 88 | trg_field = pylab.Circle(target, distance, fill=fill, facecolor=color, linewidth=linewidth, label=lbl, **kwargs) 89 | axis.add_artist(trg_field) 90 | 91 | return axis 92 | 93 | def plot_landmarks(axis, landmarks, **kwargs): 94 | """Plot markers at ``landmark`` locations in ``axis``.""" 95 | color = kwargs.pop('color', 'k') 96 | lbl = kwargs.pop('label', '') 97 | marker = kwargs.pop('marker','^') 98 | for x, y in landmarks: 99 | axis.plot([x], [y], marker=marker, color=color, label=lbl, **kwargs) 100 | return axis 101 | 102 | def _action_eval(grp, reservoir, critic, trg_epoch, obs_offset, step_width, actions_range_x, actions_range_y): 103 | """Evaluate a set of two-dimensional actions [``action_range_x``, 104 | ``actions_range_y``] at a specific state ``trg_epoch`` and return 105 | the matrix of predicted returns. 106 | 107 | ``grp`` 108 | Observed data of the underlying experiment. Usually a 109 | :py:class:`H5CombinedGroup` or [HDF5]_ group (e.g. through 110 | :py:class:`Analysis`). 111 | 112 | ``critic`` 113 | :py:func:`critic` instance to be used for evaluation for a 114 | certain critic input (action and state). 115 | 116 | ``reservoir`` 117 | Reservoir to be used. Note that this must be the same instance 118 | as used in ``critic``. 119 | 120 | ``obs_offset`` 121 | Offset between robot observations (e.g. GPS) and reinforcement 122 | learning data (i.e. actions). For offline data, the offset is 123 | one epoch (i.e. ``step_width``), for online data, it is zero. 124 | 125 | ``step_width`` 126 | Number of observations per epoch. In terms of :py:mod:`PuPy`, 127 | this is the control period over the sensor polling period. 128 | 129 | 130 | """ 131 | reservoir.reset() 132 | reservoir.states = np.atleast_2d(grp['x_curr'][trg_epoch-1, :reservoir.get_output_dim()]) 133 | 134 | # evaluate actions 135 | # Note: epoch is one step ahead (of a_curr, same time as a_next)! 136 | # Note: sensor values are shifted w.r.t a_curr by obs_offset 137 | s_curr = dict([(sensor, grp[sensor][obs_offset+step_width*(trg_epoch-1):obs_offset+trg_epoch*step_width]) for sensor in SENSOR_NAMES]) 138 | a_ret = np.zeros((len(actions_range_x), len(actions_range_y))) 139 | 140 | actions_iter = itertools.product(range(len(actions_range_x)), range(len(actions_range_y))) 141 | for idx_x, idx_y in actions_iter: 142 | action_candidate = np.atleast_2d((actions_range_x[idx_x], actions_range_y[idx_y])).T 143 | j_curr = critic(s_curr, action_candidate, simulate=True) 144 | a_ret[idx_x, idx_y] = j_curr[0, 0] 145 | #print actions_range_x[idx_x], actions_range_y[idx_y], j_curr[0, 0] 146 | 147 | return a_ret 148 | 149 | def plot_action(analysis, episode, critic, reservoir, inspect_epochs, actions_range_x, actions_range_y, step_width, obs_offset, epoch_actions=None): 150 | """Along a trajectory ``episode`` of a conducted experiment given 151 | by ``analysis``, plot the predicted return over a 2D-action at some 152 | fixed states. For each of the states (given by ``inspect_epochs``), 153 | a figure is created including the return prediction as an image 154 | (i.e. 2D). 155 | 156 | ``analysis`` 157 | :py:class:`Analysis` instance containing the experimental data. 158 | 159 | ``episode`` 160 | Episode which is analysed. 161 | 162 | ``critic`` 163 | :py:func:`critic` instance to be used for evaluation for a 164 | certain critic input (action and state). 165 | 166 | ``reservoir`` 167 | Reservoir to be used. Note that this must be the same instance 168 | as used in ``critic``. 169 | 170 | ``inspect_epochs`` 171 | Epochs numbers for which the predicted actions should be 172 | plotted. 173 | 174 | ``actions_range_x`` 175 | Action range in the first dimension. The return is predicted 176 | for any combination of ``actions_range_x`` and 177 | ``actions_range_y``. 178 | 179 | ``actions_range_y`` 180 | Action range in the second dimension. The return is predicted 181 | for any combination of ``actions_range_x`` and 182 | ``actions_range_y``. 183 | 184 | ``step_width`` 185 | Number of observations per epoch. In terms of :py:mod:`PuPy`, 186 | this is the control period over the sensor polling period. 187 | 188 | ``obs_offset`` 189 | Offset between robot observations (e.g. GPS) and reinforcement 190 | learning data (i.e. actions). For offline data, the offset is 191 | one epoch (i.e. ``step_width``), for online data, it is zero. 192 | 193 | ``epoch_actions`` 194 | A list of actually executed actions (as tuple), for each 195 | inspected epoch. The action is indicated in the plot by a 196 | marker. The argument or list items may be :py:const:`None`, 197 | in which case nothing is plotted. 198 | 199 | """ 200 | grp = analysis[episode] 201 | if epoch_actions is None: 202 | epoch_actions = [None] * len(inspect_epochs) 203 | 204 | for trg_epoch, actions in zip(inspect_epochs, epoch_actions): 205 | 206 | # simulate the actions 207 | a_ret = _action_eval(grp, reservoir, critic, trg_epoch, obs_offset, step_width, actions_range_x, actions_range_y) 208 | 209 | # plot results 210 | fig = pylab.figure() 211 | # In the image, the y-axis is the rows, the x-axis the columns of the matrix 212 | # Having index (0,0) in the left/bottom corner: origin='lower' 213 | pylab.plot((0, len(actions_range_x)-1), (0, len(actions_range_y)-1), 'b') 214 | pylab.imshow(a_ret, origin='lower', cmap=pylab.cm.gray) 215 | pylab.colorbar() 216 | pylab.xticks(range(len(actions_range_y)), actions_range_y) 217 | pylab.yticks(range(len(actions_range_x)), actions_range_x) 218 | pylab.title('Expected Return per action at epoch ' + str(trg_epoch)) 219 | pylab.xlabel('Amplitude right legs') # cols are idx_y, right legs 220 | pylab.ylabel('Amplitude left legs') # rows are idx_x, left legs 221 | 222 | if actions is not None: 223 | a_left, a_right = zip(*actions) 224 | pylab.plot(a_left, a_right, 'r') 225 | pylab.plot([a_left[0]], [a_right[0]], 'rs') 226 | 227 | return fig 228 | 229 | def plot_inspected_trajectory(analysis, episode_idx, step_width, axis, inspect_epochs, obs_offset): 230 | """Plot the robot trajectory of the experiment ``episode_idx`` 231 | found in ``analysis`` and a marker at all ``inspect_epochs``. This 232 | function was created to support :py:func:`puppy_plot_action` by 233 | giving an overview over the whole path. 234 | 235 | ``axis`` 236 | plotting canvas. 237 | 238 | ``step_width`` 239 | Number of observations per epoch. In terms of :py:mod:`PuPy`, 240 | this is the control period over the sensor polling period. 241 | 242 | ``obs_offset`` 243 | Offset between robot observations (e.g. GPS) and reinforcement 244 | learning data (i.e. actions). For offline data, the offset is 245 | one epoch (i.e. ``step_width``), for online data, it is zero. 246 | 247 | """ 248 | puppy_plot_trajectory(analysis, axis, episode_idx, step_width, color='b', offset=obs_offset) 249 | trg_x = [analysis[episode_idx]['puppyGPS_x'][obs_offset + step_width*trg_epoch+step_width-1] for trg_epoch in inspect_epochs] 250 | trg_y = [analysis[episode_idx]['puppyGPS_y'][obs_offset + step_width*trg_epoch+step_width-1] for trg_epoch in inspect_epochs] 251 | axis.plot(trg_x, trg_y, 'k*', label='Inspected states') 252 | return axis 253 | 254 | class ActionVideo: 255 | """Set up a structure such that the predicted return over 2D 256 | actions can be successively plotted in the same figure. 257 | 258 | .. todo:: 259 | The selected action isn't displayed correctly (offset?) 260 | 261 | ``data`` 262 | Observed data of the underlying experiment. Usually a 263 | :py:class:`H5CombinedGroup` or [HDF5]_ group (e.g. through 264 | :py:class:`Analysis`). 265 | 266 | ``critic`` 267 | :py:func:`critic` instance to be used for evaluation for a 268 | certain critic input (action and state). 269 | 270 | ``reservoir`` 271 | Reservoir to be used. Note that this must be the same instance 272 | as used in ``critic``. 273 | 274 | ``actions_range_x`` 275 | Action range in the first dimension. The return is predicted 276 | for any combination of ``actions_range_x`` and 277 | ``actions_range_y``. 278 | 279 | ``actions_range_y`` 280 | Action range in the second dimension. The return is predicted 281 | for any combination of ``actions_range_x`` and 282 | ``actions_range_y``. 283 | 284 | ``step_width`` 285 | Number of observations per epoch. In terms of :py:mod:`PuPy`, 286 | this is the control period over the sensor polling period. 287 | 288 | ``obs_offset`` 289 | Offset between robot observations (e.g. GPS) and reinforcement 290 | learning data (i.e. actions). For offline data, the offset is 291 | one epoch (i.e. ``step_width``), for online data, it is zero. 292 | 293 | ``with_actions`` 294 | Plot markers and lines between them which represent the 295 | actually selected action. 296 | 297 | """ 298 | def __init__(self, data, critic, reservoir, actions_range_x, actions_range_y, step_width, obs_offset, with_actions=True): 299 | 300 | # Basic figure 301 | self.fig = None 302 | self.title = None 303 | self.axis = None 304 | self.axis_image = None 305 | 306 | # Actions 307 | self.with_actions = with_actions 308 | self.actions_nrm = ((None, None), (None, None)) 309 | self.actions_line = None 310 | self.actions_marker = None 311 | 312 | # Experiment data 313 | self.data = data 314 | self.critic = critic 315 | self.reservoir = reservoir 316 | self.step_width = step_width 317 | self.obs_offset = obs_offset 318 | 319 | def draw_init(self, fig=None): 320 | """Set up the initial video figure. A new figure is created 321 | unless one is provided in ``fig``. 322 | """ 323 | if fig is None: 324 | fig = pylab.figure() 325 | 326 | # Create the figure 327 | self.fig = fig 328 | self.title = self.fig.suptitle('Expected Return per action') 329 | 330 | # Configure the axis 331 | self.axis = self.fig.add_subplot(111) 332 | self.axis.set_xticks(range(len(self.actions_range_y))) 333 | self.axis.set_xticklabels(self.actions_range_y) 334 | self.axis.set_yticks(range(len(self.actions_range_x))) 335 | self.axis.set_yticklabels(self.actions_range_x) 336 | self.axis.set_xlabel('Amplitude right legs') # cols are idx_y, right legs 337 | self.axis.set_ylabel('Amplitude left legs') # rows are idx_x, left legs 338 | 339 | # Plot the diagonal 340 | self.axis.plot((0, len(self.actions_range_x)-1), (0, len(self.actions_range_y)-1), 'b') 341 | 342 | # Prepare the image 343 | img_data = np.zeros((len(actions_range_x), len(actions_range_y))) 344 | self.axis_image = self.axis.imshow(img_data, origin='lower', cmap=pylab.cm.Greys) 345 | self.fig.colorbar(self.axis_image) 346 | 347 | # action line 348 | if self.with_actions: 349 | ox, sx = self.actions_range_x[0], len(self.actions_range_x)-1 350 | oy, sy = self.actions_range_y[0], len(self.actions_range_y)-1 351 | self.actions_nrm = ((ox, sx), (oy, sy)) 352 | self.actions_line = self.axis.plot([sx*(0.5-ox), sx*(0.5-ox)], [sy*(0.5-oy), sy*(1.0-oy)], 'r')[0] 353 | self.actions_marker = self.axis.plot([sx*(0.5-ox)], [sy*(0.5-oy)], 'rs')[0] 354 | 355 | return self 356 | 357 | def draw_step(self, epoch, actions=None): 358 | """Update the figure by showing the action plot for ``epoch``. 359 | If `with_actions` is set, a list of actions to be plotted should 360 | be present in ``actions``. 361 | """ 362 | # evaluate the actions 363 | a_ret = _action_eval( 364 | self.data, 365 | self.reservoir, 366 | self.critic, 367 | epoch, 368 | self.obs_offset, 369 | self.step_width, 370 | self.actions_range_x, 371 | self.actions_range_y 372 | ) 373 | 374 | # update plot 375 | self.axis_image.set_data(a_ret) 376 | self.axis_image.set_clim(vmin=a_ret.min(), vmax=a_ret.max()) 377 | self.axis_image.changed() 378 | 379 | # update action line and marker 380 | if self.with_actions: 381 | if actions is None: 382 | warnings.warn('with_actions set but no actions provided') 383 | else: 384 | ox, sx = self.actions_nrm[0] 385 | oy, sy = self.actiosn_nrm[1] 386 | actions[:, 0] = (actions[:, 0] - oy) * sy 387 | actions[:, 1] = (actions[:, 1] - ox) * sx 388 | self.actions_line.set_data((actions[:, 1], actions[:, 0])) 389 | self.actions_marker.set_data(([actions[-1, 1]], [actions[-1, 0]])) 390 | 391 | return self 392 | 393 | def draw_trajectory(self, loc_marker, epoch_idx): 394 | """Update the marker of the current state in a trajectory plot. 395 | The current state is read from *data* at ``epoch_idx``, the 396 | marker plot given in ``loc_marker``. 397 | """ 398 | loc_x = self.data['puppyGPS_x'][self.obs_offset+self.step_width*epoch_idx+self.step_width-1] 399 | loc_y = self.data['puppyGPS_y'][self.obs_offset+self.step_width*epoch_idx+self.step_width-1] 400 | loc_marker.set_data([loc_x], [loc_y]) 401 | return self 402 | 403 | 404 | ## DEPRECATED ## 405 | 406 | def puppy_plot_trajectory(*args, **kwargs): 407 | """Alias of plot_trajectory. 408 | 409 | .. deprecated:: 1.0 410 | Use :py:func:`plot_trajectory` instead 411 | 412 | """ 413 | warnings.warn("This function is deprecated. Use 'plot_trajectory' instead") 414 | return plot_trajectory(*args, **kwargs) 415 | 416 | def puppy_plot_all_trajectories(*args, **kwargs): 417 | """Alias of plot_all_trajectories. 418 | 419 | .. deprecated:: 1.0 420 | Use :py:func:`plot_all_trajectories` instead 421 | 422 | """ 423 | warnings.warn("This function is deprecated. Use 'plot_all_trajectories' instead") 424 | return plot_all_trajectories(*args, **kwargs) 425 | 426 | def puppy_plot_linetarget(*args, **kwargs): 427 | """Alias of plot_linetarget. 428 | 429 | .. deprecated:: 1.0 430 | Use :py:func:`plot_linetarget` instead 431 | 432 | """ 433 | warnings.warn("This function is deprecated. Use 'plot_linetarget' instead") 434 | return plot_linetarget(*args, **kwargs) 435 | 436 | def puppy_plot_locationtarget(*args, **kwargs): 437 | """Alias of plot_locationtarget. 438 | 439 | .. deprecated:: 1.0 440 | Use :py:func:`plot_locationtarget` instead 441 | 442 | """ 443 | warnings.warn("This function is deprecated. Use 'plot_locationtarget' instead") 444 | return plot_locationtarget(*args, **kwargs) 445 | 446 | def puppy_plot_landmarks(*args, **kwargs): 447 | """Alias of plot_landmarks. 448 | 449 | .. deprecated:: 1.0 450 | Use :py:func:`plot_landmarks` instead 451 | 452 | """ 453 | warnings.warn("This function is deprecated. Use 'plot_landmarks' instead") 454 | return plot_landmarks(*args, **kwargs) 455 | 456 | def puppy_plot_action(*args, **kwargs): 457 | """Alias of plot_action. 458 | 459 | .. deprecated:: 1.0 460 | Use :py:func:`plot_action` instead 461 | 462 | """ 463 | warnings.warn("This function is deprecated. Use 'plot_action' instead") 464 | return plot_action(*args, **kwargs) 465 | 466 | def puppy_plot_inspected_trajectory(*args, **kwargs): 467 | """Alias of plot_inspected_trajectory. 468 | 469 | .. deprecated:: 1.0 470 | Use :py:func:`plot_inspected_trajectory` instead 471 | 472 | """ 473 | warnings.warn("This function is deprecated. Use 'plot_inspected_trajectory' instead") 474 | return plot_inspected_trajectory(*args, **kwargs) 475 | 476 | def puppy_vid_init(actions_range_x, actions_range_y, with_actions=True): 477 | """ 478 | 479 | .. deprecated:: 1.0 480 | Use :py:class:`PuppyActionVideo` instead 481 | 482 | """ 483 | warnings.warn('deprecated, use PuppyActionVideo instead') 484 | fig = pylab.figure() 485 | axis = fig.add_subplot(111) 486 | axis.set_xticks(range(len(actions_range_y))) 487 | axis.set_xticklabels(actions_range_y) 488 | axis.set_yticks(range(len(actions_range_x))) 489 | axis.set_yticklabels(actions_range_x) 490 | title = fig.suptitle('Expected Return per action') 491 | axis.set_xlabel('Amplitude right legs') # cols are idx_y, right legs 492 | axis.set_ylabel('Amplitude left legs') # rows are idx_x, left legs 493 | axis.plot((0, len(actions_range_x)-1), (0, len(actions_range_y)-1), 'b') 494 | img_data = np.zeros((len(actions_range_x), len(actions_range_y))) 495 | axim = axis.imshow(img_data, origin='lower', cmap=pylab.cm.Greys) 496 | fig.colorbar(axim) 497 | 498 | # action line 499 | if with_actions: 500 | ox, sx = actions_range_x[0], len(actions_range_x)-1 501 | oy, sy = actions_range_y[0], len(actions_range_y)-1 502 | a_line = axis.plot([sx*(0.5-ox), sx*(0.5-ox)], [sy*(0.5-oy), sy*(1.0-oy)], 'r')[0] 503 | a_marker = axis.plot([sx*(0.5-ox)], [sy*(0.5-oy)], 'rs')[0] 504 | else: 505 | ox = sx = oy = sy = a_line = a_marker = None 506 | 507 | return fig, axis, axim, title, (a_line, a_marker, (ox, sx), (oy, sy)) 508 | 509 | def puppy_vid_action(image, (a_line, a_marker, px, py), grp, critic, reservoir, epoch, actions_range_x, actions_range_y, step_width, obs_offset, actions=None): 510 | """ 511 | 512 | .. deprecated:: 1.0 513 | Use :py:class:`PuppyActionVideo` instead 514 | 515 | """ 516 | warnings.warn('deprecated, use PuppyActionVideo instead') 517 | a_ret = _action_eval(grp, reservoir, critic, epoch, obs_offset, step_width, actions_range_x, actions_range_y) 518 | 519 | # update plot 520 | image.set_data(a_ret) 521 | image.set_clim(vmin=a_ret.min(), vmax=a_ret.max()) 522 | image.changed() 523 | 524 | if actions is not None: 525 | actions[:, 0] = (actions[:, 0] - py[0]) * py[1] 526 | actions[:, 1] = (actions[:, 1] - px[0]) * px[1] 527 | a_line.set_data((actions[:, 1], actions[:, 0])) 528 | a_marker.set_data(([actions[-1, 1]], [actions[-1, 0]])) 529 | 530 | return image 531 | 532 | def puppy_vid_inspected_trajectory(grp, step_width, loc_marker, epoch_idx, obs_offset): 533 | """ 534 | 535 | .. deprecated:: 1.0 536 | Use :py:class:`PuppyActionVideo` instead 537 | 538 | """ 539 | warnings.warn('deprecated, use PuppyActionVideo instead') 540 | loc_x = grp['puppyGPS_x'][obs_offset+step_width*epoch_idx+step_width-1] 541 | loc_y = grp['puppyGPS_y'][obs_offset+step_width*epoch_idx+step_width-1] 542 | loc_marker.set_data([loc_x], [loc_y]) 543 | return loc_marker 544 | 545 | --------------------------------------------------------------------------------