├── LICENSE
├── README.md
├── base
    ├── applyController.m
    ├── calcCost.m
    ├── learnPolicy.m
    ├── pred.m
    ├── predcost.m
    ├── prop.pdf
    ├── propagate.m
    ├── propagated.m
    ├── rollout.m
    ├── simulate.m
    ├── trainDynModel.m
    └── value.m
├── control
    ├── conCat.m
    ├── conCat.m~
    ├── congp.m
    └── conlin.m
├── doc
    ├── 3dplot.sty
    ├── covFunctions.m
    ├── doc.aux
    ├── doc.log
    ├── doc.out
    ├── doc.pdf
    ├── doc.tex
    ├── doc.toc
    ├── figures
    │   ├── cartPole.pdf
    │   ├── cdp_sketch.pdf
    │   ├── cost_error_bar.pdf
    │   ├── cp.pdf
    │   ├── cp_rollout.pdf
    │   ├── example_trajectories.pdf
    │   ├── example_trajectories2.pdf
    │   ├── fX3.pdf
    │   ├── flow.pdf
    │   ├── learnP.pdf
    │   ├── pendubot.pdf
    │   ├── pendulum.pdf
    │   ├── pendulum.pdf.ipe7
    │   ├── policy_opt.pdf
    │   ├── preliminary_policy.pdf
    │   ├── satLossPlot.pdf
    │   ├── squashed_preliminary_policy.pdf
    │   ├── squashing_fct.pdf
    │   ├── squashing_fct2.pdf
    │   ├── squashing_function.pdf
    │   ├── unicoords_body.tex
    │   ├── unicoords_body.tex~
    │   ├── unicoords_frame.tex
    │   ├── unicoords_frame.tex~
    │   ├── unicoords_frame2.tex
    │   ├── unicoords_frame2.tex~
    │   ├── unicoords_turntable.tex
    │   ├── unicoords_turntable.tex~
    │   ├── unicoords_turntable2.tex
    │   ├── unicoords_turntable2.tex~
    │   ├── unicoords_wheel.tex
    │   ├── unicoords_wheel.tex~
    │   ├── unicoords_wheel2.tex
    │   ├── unicoords_wheel2.tex~
    │   └── unicycle.pdf
    ├── generate_docs.m
    ├── header.tex
    ├── html
    │   ├── DoublePend_learn.html
    │   ├── applyController.html
    │   ├── augment_unicycle.html
    │   ├── augment_unicycle_eq57428.png
    │   ├── calcCost.html
    │   ├── cartDouble_learn.html
    │   ├── cartPole_learn.html
    │   ├── checkgrad.html
    │   ├── conCat.html
    │   ├── conCat_eq10310.png
    │   ├── conCat_eq35435.png
    │   ├── conT.html
    │   ├── conT_eq96160.png
    │   ├── congp.html
    │   ├── congp_eq35435.png
    │   ├── congp_eq43551.png
    │   ├── conlin.html
    │   ├── conlin_eq10310.png
    │   ├── conlin_eq42328.png
    │   ├── conlin_eq81653.png
    │   ├── conlin_eq95465.png
    │   ├── covNoise.html
    │   ├── covSEard.html
    │   ├── covSum.html
    │   ├── draw_cdp.html
    │   ├── draw_cp.html
    │   ├── draw_dp.html
    │   ├── draw_pendubot.html
    │   ├── draw_pendulum.html
    │   ├── draw_rollout_cdp.html
    │   ├── draw_rollout_cp.html
    │   ├── draw_rollout_dp.html
    │   ├── draw_rollout_pendubot.html
    │   ├── draw_rollout_pendulum.html
    │   ├── draw_rollout_unicycle.html
    │   ├── draw_unicycle.html
    │   ├── dynamics_cdp.html
    │   ├── dynamics_cp.html
    │   ├── dynamics_dp.html
    │   ├── dynamics_pendubot.html
    │   ├── dynamics_pendulum.html
    │   ├── dynamics_unicycle.html
    │   ├── error_ellipse.html
    │   ├── fitc.html
    │   ├── gSat.html
    │   ├── gSat_eq24615.png
    │   ├── gSat_eq28128.png
    │   ├── gSat_eq35427.png
    │   ├── gSat_eq53557.png
    │   ├── gSat_eq97420.png
    │   ├── gSin.html
    │   ├── gSinSatT.html
    │   ├── gSin_eq00934.png
    │   ├── gSin_eq24615.png
    │   ├── gSin_eq28128.png
    │   ├── gSin_eq35427.png
    │   ├── gSin_eq53557.png
    │   ├── gTrig.html
    │   ├── gTrigT.html
    │   ├── gTrigT_eq41681.png
    │   ├── gTrig_eq00934.png
    │   ├── gTrig_eq24615.png
    │   ├── gTrig_eq28128.png
    │   ├── gTrig_eq35427.png
    │   ├── gTrig_eq53557.png
    │   ├── gaussian.html
    │   ├── getPlotDistr_cdp.html
    │   ├── getPlotDistr_cdp_eq30108.png
    │   ├── getPlotDistr_cdp_eq43551.png
    │   ├── getPlotDistr_cp.html
    │   ├── getPlotDistr_cp_eq30108.png
    │   ├── getPlotDistr_cp_eq43551.png
    │   ├── getPlotDistr_dp.html
    │   ├── getPlotDistr_dp_eq30108.png
    │   ├── getPlotDistr_dp_eq43551.png
    │   ├── getPlotDistr_pendubot.html
    │   ├── getPlotDistr_pendubot_eq30108.png
    │   ├── getPlotDistr_pendubot_eq43551.png
    │   ├── gp0.html
    │   ├── gp0d.html
    │   ├── gp1.html
    │   ├── gp1d.html
    │   ├── gp2.html
    │   ├── gp2d.html
    │   ├── gpT.html
    │   ├── gpT_eq96160.png
    │   ├── gpr.html
    │   ├── hypCurb.html
    │   ├── lossAdd.html
    │   ├── lossHinge.html
    │   ├── lossLin.html
    │   ├── lossQuad.html
    │   ├── lossQuad_eq15684.png
    │   ├── lossQuad_eq95740.png
    │   ├── lossSat.html
    │   ├── lossSat_eq21635.png
    │   ├── lossSat_eq65484.png
    │   ├── lossT.html
    │   ├── lossT_eq96160.png
    │   ├── loss_cdp.html
    │   ├── loss_cdp_eq26389.png
    │   ├── loss_cdp_eq30075.png
    │   ├── loss_cdp_eq32292.png
    │   ├── loss_cdp_eq39223.png
    │   ├── loss_cdp_eq72999.png
    │   ├── loss_cdp_eq81831.png
    │   ├── loss_cp.html
    │   ├── loss_cp_eq26389.png
    │   ├── loss_cp_eq30075.png
    │   ├── loss_cp_eq32292.png
    │   ├── loss_cp_eq39223.png
    │   ├── loss_cp_eq72999.png
    │   ├── loss_cp_eq81831.png
    │   ├── loss_dp.html
    │   ├── loss_dp_eq26389.png
    │   ├── loss_dp_eq30075.png
    │   ├── loss_dp_eq32292.png
    │   ├── loss_dp_eq39223.png
    │   ├── loss_dp_eq72999.png
    │   ├── loss_dp_eq81831.png
    │   ├── loss_pendubot.html
    │   ├── loss_pendubot_eq26389.png
    │   ├── loss_pendubot_eq30075.png
    │   ├── loss_pendubot_eq32292.png
    │   ├── loss_pendubot_eq39223.png
    │   ├── loss_pendubot_eq72999.png
    │   ├── loss_pendubot_eq81831.png
    │   ├── loss_pendulum.html
    │   ├── loss_pendulum_eq26389.png
    │   ├── loss_pendulum_eq30075.png
    │   ├── loss_pendulum_eq32292.png
    │   ├── loss_pendulum_eq39223.png
    │   ├── loss_pendulum_eq72999.png
    │   ├── loss_pendulum_eq81831.png
    │   ├── loss_unicycle.html
    │   ├── loss_unicycle_eq22825.png
    │   ├── loss_unicycle_eq26389.png
    │   ├── loss_unicycle_eq32292.png
    │   ├── loss_unicycle_eq34276.png
    │   ├── loss_unicycle_eq72999.png
    │   ├── loss_unicycle_eq81831.png
    │   ├── maha.html
    │   ├── minimize.html
    │   ├── pendubot_learn.html
    │   ├── pendulum_learn.html
    │   ├── pred.html
    │   ├── predcost.html
    │   ├── propagate.html
    │   ├── propagateT.html
    │   ├── propagated.html
    │   ├── reward.html
    │   ├── reward_eq37930.png
    │   ├── reward_eq81456.png
    │   ├── rewrap.html
    │   ├── rewrap_eq72999.png
    │   ├── rewrap_eq77996.png
    │   ├── rollout.html
    │   ├── rollout_eq25411.png
    │   ├── rollout_eq35435.png
    │   ├── rollout_eq43551.png
    │   ├── settings_cdp.html
    │   ├── settings_cp.html
    │   ├── settings_dp.html
    │   ├── settings_pendubot.html
    │   ├── settings_pendulum.html
    │   ├── settings_unicycle.html
    │   ├── simulate.html
    │   ├── solve_chol.html
    │   ├── sq_dist.html
    │   ├── train.html
    │   ├── trainDynModel.html
    │   ├── unicycle_learn.html
    │   ├── unwrap.html
    │   ├── unwrap_eq72999.png
    │   ├── unwrap_eq77996.png
    │   ├── value.html
    │   └── valueT.html
    ├── literature.bib
    ├── m2doctex.xsl
    ├── m2latex.xsl
    ├── plots
    │   ├── header.m
    │   ├── plot_policySquash.m
    │   ├── plot_satLoss.m
    │   ├── plot_squash.m
    │   └── print_pdf.m
    └── tex
    │   ├── DoublePend_learn.tex
    │   ├── applyController.tex
    │   ├── augment_unicycle.tex
    │   ├── calcCost.tex
    │   ├── cartDouble_learn.tex
    │   ├── cartPole_learn.tex
    │   ├── checkgrad.tex
    │   ├── conCat.tex
    │   ├── conT.tex
    │   ├── congp.tex
    │   ├── conlin.tex
    │   ├── covNoise.tex
    │   ├── covSEard.tex
    │   ├── covSum.tex
    │   ├── draw_cdp.tex
    │   ├── draw_cp.tex
    │   ├── draw_dp.tex
    │   ├── draw_pendubot.tex
    │   ├── draw_pendulum.tex
    │   ├── draw_rollout_cdp.tex
    │   ├── draw_rollout_cp.tex
    │   ├── draw_rollout_dp.tex
    │   ├── draw_rollout_pendubot.tex
    │   ├── draw_rollout_pendulum.tex
    │   ├── draw_rollout_unicycle.tex
    │   ├── draw_unicycle.tex
    │   ├── dynamics_cdp.tex
    │   ├── dynamics_cp.tex
    │   ├── dynamics_dp.tex
    │   ├── dynamics_pendubot.tex
    │   ├── dynamics_pendulum.tex
    │   ├── dynamics_unicycle.tex
    │   ├── error_ellipse.tex
    │   ├── fitc.tex
    │   ├── gSat.tex
    │   ├── gSin.tex
    │   ├── gSinSatT.tex
    │   ├── gTrig.tex
    │   ├── gTrigT.tex
    │   ├── gaussian.tex
    │   ├── getPlotDistr_cdp.tex
    │   ├── getPlotDistr_cp.tex
    │   ├── getPlotDistr_dp.tex
    │   ├── getPlotDistr_pendubot.tex
    │   ├── gp0.tex
    │   ├── gp0d.tex
    │   ├── gp1.tex
    │   ├── gp1d.tex
    │   ├── gp2.tex
    │   ├── gp2d.tex
    │   ├── gpT.tex
    │   ├── gpr.tex
    │   ├── hypCurb.tex
    │   ├── lossAdd.tex
    │   ├── lossHinge.tex
    │   ├── lossLin.tex
    │   ├── lossQuad.tex
    │   ├── lossSat.tex
    │   ├── lossT.tex
    │   ├── loss_cdp.tex
    │   ├── loss_cp.tex
    │   ├── loss_dp.tex
    │   ├── loss_pendubot.tex
    │   ├── loss_pendulum.tex
    │   ├── loss_unicycle.tex
    │   ├── maha.tex
    │   ├── minimize.tex
    │   ├── pendubot_learn.tex
    │   ├── pendulum_learn.tex
    │   ├── pred.tex
    │   ├── predcost.tex
    │   ├── propagate.tex
    │   ├── propagateT.tex
    │   ├── propagated.tex
    │   ├── reward.tex
    │   ├── rewrap.tex
    │   ├── rollout.tex
    │   ├── settings_cdp.tex
    │   ├── settings_cp.tex
    │   ├── settings_dp.tex
    │   ├── settings_pendubot.tex
    │   ├── settings_pendulum.tex
    │   ├── settings_unicycle.tex
    │   ├── simulate.tex
    │   ├── solve_chol.tex
    │   ├── sq_dist.tex
    │   ├── train.tex
    │   ├── trainDynModel.tex
    │   ├── unicycle_learn.tex
    │   ├── unwrap.tex
    │   ├── value.tex
    │   └── valueT.tex
├── gp
    ├── covNoise.m
    ├── covSEard.m
    ├── covSum.m
    ├── fitc.m
    ├── gp0.m
    ├── gp0d.m
    ├── gp1.m
    ├── gp1d.m
    ├── gp2.m
    ├── gp2d.m
    ├── gpr.m
    ├── hypCurb.m
    └── train.m
├── loss
    ├── lossAdd.m
    ├── lossHinge.m
    ├── lossLin.m
    ├── lossQuad.m
    ├── lossSat.m
    ├── reward.m
    ├── reward.pdf
    └── reward.tex
├── scenarios
    ├── cartDoublePendulum
    │   ├── cartDouble_learn.m
    │   ├── draw_cdp.m
    │   ├── draw_rollout_cdp.m
    │   ├── dynamics_cdp.m
    │   ├── getPlotDistr_cdp.m
    │   ├── loss_cdp.m
    │   └── settings_cdp.m
    ├── cartPole
    │   ├── cartPole_learn.m
    │   ├── draw_cp.m
    │   ├── draw_rollout_cp.m
    │   ├── dynamics_cp.m
    │   ├── getPlotDistr_cp.m
    │   ├── loss_cp.m
    │   └── settings_cp.m
    ├── doublePendulum
    │   ├── DoublePend_learn.m
    │   ├── draw_dp.m
    │   ├── draw_rollout_dp.m
    │   ├── dynamics_dp.m
    │   ├── getPlotDistr_dp.m
    │   ├── loss_dp.m
    │   └── settings_dp.m
    ├── pendubot
    │   ├── draw_pendubot.m
    │   ├── draw_rollout_pendubot.m
    │   ├── dynamics_pendubot.m
    │   ├── getPlotDistr_pendubot.m
    │   ├── loss_pendubot.m
    │   ├── pendubot_learn.m
    │   └── settings_pendubot.m
    ├── pendulum
    │   ├── draw_pendulum.m
    │   ├── draw_rollout_pendulum.m
    │   ├── dynamics_pendulum.m
    │   ├── loss_pendulum.m
    │   ├── pendulum_learn.m
    │   └── settings_pendulum.m
    └── unicycle
    │   ├── augment_unicycle.m
    │   ├── draw_rollout_all.m
    │   ├── draw_rollout_unicycle.m
    │   ├── draw_unicycle.m
    │   ├── dynamics_unicycle.m
    │   ├── loss.pdf
    │   ├── loss.tex
    │   ├── loss_unicycle.m
    │   ├── settings_unicycle.m
    │   └── unicycle_learn.m
├── test
    ├── checkgrad.m
    ├── conT.m
    ├── gSinSatT.m
    ├── gTrigT.m
    ├── gpT.m
    ├── lossT.m
    ├── propagateT.m
    └── valueT.m
└── util
    ├── error_ellipse.m
    ├── gSat.m
    ├── gSin.m
    ├── gTrig.m
    ├── gaussian.m
    ├── maha.m
    ├── minimize.m
    ├── rewrap.m
    ├── solve_chol.c
    ├── solve_chol.m
    ├── sq_dist.c
    ├── sq_dist.m
    ├── squash.tex
    ├── trigSquash_old.m
    └── unwrap.m


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, ICL-SML
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of pilco-matlab nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | PILCO Software Package V0.9 (2013-07-04)
 2 | 
 3 | I. Introduction
 4 | This software package implements the PILCO RL policy search framework. The learning framework can be applied to MDPs with continuous states and controls/actions and is based on probabilistic modeling of the dynamics and approximate Bayesian inference for policy evaluation and improvement.
 5 | 
 6 | 
 7 | 
 8 | II. Quick Start
 9 | We have already implemented some scenarios that can be found in
10 | <PILCO-ROOT>/scenarios .
11 | 
12 | If you want to get started immediately, go to 
13 | <PILCO-ROOT>/scenarios/cartPole
14 | 
15 | and execute
16 | 
17 | cartPole_learn
18 | 
19 | 
20 | 
21 | III. Documentation
22 | A detailed documentation can be found in
23 | 
24 | <PILCO-ROOT>/doc/doc.pdf
25 | 
26 | which also includes a description of how to set up your own scenario (there are only a few files that are scenario specific). 
27 | 
28 | 
29 | 
30 | IV. Contact
31 | If you find bugs, have questions, or want to give us feedback, please send an email to
32 | m.deisenroth@imperial.ac.uk
33 | 
34 | 
35 | 
36 | V. References
37 | M.P. Deisenroth and C.E. Rasmussen: PILCO: A Data-Efficient and Model-based Approach to Policy Search (ICML 2011)
38 | M.P. Deisenroth: Efficient Reinforcement Learning Using Gaussian Processes (KIT Scientific Publishing, 2010)
39 | 
40 | 
41 | 
42 | Marc Deisenroth
43 | Andrew McHutchon
44 | Joe Hall
45 | Carl Edward Rasmussen
46 | 
47 | 


--------------------------------------------------------------------------------
/base/applyController.m:
--------------------------------------------------------------------------------
 1 | %% applyController.m
 2 | % *Summary:* Script to apply the learned controller to a (simulated) system
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-06-04
 8 | %
 9 | %% High-Level Steps
10 | % # Generate a single trajectory rollout by applying the controller
11 | % # Generate many rollouts for testing the performance of the controller
12 | % # Save the data
13 | 
14 | %% Code
15 | 
16 | % 1. Generate trajectory rollout given the current policy
17 | if isfield(plant,'constraint'), HH = maxH; else HH = H; end
18 | [xx, yy, realCost{j+J}, latent{j}] = ...
19 |   rollout(gaussian(mu0, S0), policy, HH, plant, cost);
20 | disp(xx);                           % display states of observed trajectory
21 | x = [x; xx]; y = [y; yy];                            % augment training set
22 | if plotting.verbosity > 0
23 |   if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end
24 |   hold on; plot(1:length(realCost{J+j}),realCost{J+j},'r'); drawnow;
25 | end
26 | 
27 | % 2. Make many rollouts to test the controller quality
28 | if plotting.verbosity > 1
29 |   lat = cell(1,10);
30 |   for i=1:10
31 |     [~,~,~,lat{i}] = rollout(gaussian(mu0, S0), policy, HH, plant, cost);
32 |   end
33 |   
34 |   if ~ishandle(4); figure(4); else set(0,'CurrentFigure',4); end; clf(4);
35 |   
36 |   ldyno = length(dyno);
37 |   for i=1:ldyno       % plot the rollouts on top of predicted error bars
38 |     subplot(ceil(ldyno/sqrt(ldyno)),ceil(sqrt(ldyno)),i); hold on;
39 |     errorbar( 0:length(M{j}(i,:))-1, M{j}(i,:), ...
40 |       2*sqrt(squeeze(Sigma{j}(i,i,:))) );
41 |     for ii=1:10
42 |       plot( 0:size(lat{ii}(:,dyno(i)),1)-1, lat{ii}(:,dyno(i)), 'r' );
43 |     end
44 |     plot( 0:size(latent{j}(:,dyno(i)),1)-1, latent{j}(:,dyno(i)),'g');
45 |     axis tight
46 |   end
47 |   drawnow;
48 | end
49 | 
50 | % 3. Save data
51 | filename = [basename num2str(j) '_H' num2str(H)]; save(filename);
52 | 


--------------------------------------------------------------------------------
/base/calcCost.m:
--------------------------------------------------------------------------------
 1 | %% calcCost.m
 2 | % *Summary:* Function to calculate the incurred cost and its standard deviation, 
 3 | % given a sequence of predicted state distributions and the cost struct
 4 | %
 5 | %   [L sL] = calcCost(cost, M, S)
 6 | %
 7 | % *Input arguments:*
 8 | %
 9 | %   cost               cost structure
10 | %   M                  mean vectors of state trajectory (D-by-H matrix)
11 | %   S                  covariance matrices at each time step (D-by-D-by-H)
12 | %
13 | % *Output arguments:*
14 | %
15 | %   L                  expected incurred cost of state trajectory
16 | %   sL                 standard deviation of incurred cost
17 | %
18 | %
19 | % Copyright (C) 2008-2013 by 
20 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
21 | %
22 | % Last modified: 2013-01-23
23 | %
24 | %% High-Level Steps
25 | % # Augment state distribution with trigonometric functions
26 | % # Compute distribution of the control signal
27 | % # Compute dynamics-GP prediction
28 | % # Compute distribution of the next state
29 | %
30 | 
31 | function [L sL] = calcCost(cost, M, S)
32 | %% Code
33 | H = size(M,2);                                             % horizon length
34 | L = zeros(1,H); SL = zeros(1,H);
35 | 
36 | % for each time step, compute the expected cost and its variance
37 | for h = 1:H
38 |   [L(h),d1,d2,SL(h)]  = cost.fcn(cost, M(:,h), S(:,:,h));
39 | end
40 | 
41 | sL = sqrt(SL);                                         % standard deviation
42 | 
43 | 


--------------------------------------------------------------------------------
/base/learnPolicy.m:
--------------------------------------------------------------------------------
 1 | %% learnPolicy.m
 2 | % *Summary:* Script to perform the policy search
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-03-06
 8 | %
 9 | %% High-Level Steps
10 | % # Learn the policy (call optimizer)
11 | % # Predict trajectory from a sampled start state and compute expected cost
12 | 
13 | %% Code
14 | 
15 | % 1. Update the policy
16 | opt.fh = 1;
17 | [policy.p fX3] = minimize(policy.p, 'value', opt, mu0Sim, S0Sim, ...
18 |   dynmodel, policy, plant, cost, H);
19 | 
20 | % (optional) Plot overall optimization progress
21 | if exist('plotting', 'var') && isfield(plotting, 'verbosity') ...
22 |     && plotting.verbosity > 1
23 |   if ~ishandle(2); figure(2); else set(0,'CurrentFigure',2); end
24 |   hold on; plot(fX3); drawnow; 
25 |   xlabel('line search iteration'); ylabel('function value')
26 | end
27 | 
28 | % 2. Predict state trajectory from p(x0) and compute cost trajectory
29 | [M{j} Sigma{j}] = pred(policy, plant, dynmodel, mu0Sim(:,1), S0Sim, H);
30 | [fantasy.mean{j} fantasy.std{j}] = ...
31 |   calcCost(cost, M{j}, Sigma{j}); % predict cost trajectory
32 | 
33 | % (optional) Plot predicted immediate costs (as a function of the time steps)
34 | if exist('plotting', 'var') && isfield(plotting, 'verbosity') ...
35 |     && plotting.verbosity > 0
36 |   if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end
37 |   clf(3); errorbar(0:H,fantasy.mean{j},2*fantasy.std{j}); drawnow;
38 |   xlabel('time step'); ylabel('immediate cost');
39 | end


--------------------------------------------------------------------------------
/base/pred.m:
--------------------------------------------------------------------------------
 1 | %% pred.m
 2 | % *Summary:* Compute predictive (marginal) distributions of a trajecory
 3 | %
 4 | %   [M S] = pred(policy, plant, dynmodel, m, s, H)
 5 | %
 6 | % *Input arguments:*
 7 | %
 8 | %   policy             policy structure
 9 | %   plant              plant structure
10 | %   dynmodel           dynamics model structure
11 | %   m                  D-by-1 mean of the initial state distribution
12 | %   s                  D-by-D covariance of the initial state distribution
13 | %   H                  length of prediction horizon
14 | %
15 | % *Output arguments:*
16 | %
17 | %   M                  D-by-(H+1) sequence of predicted mean vectors
18 | %   S                  D-by-D-(H+1) sequence of predicted covariance
19 | %                      matrices
20 | % 
21 | % Copyright (C) 2008-2013 by 
22 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
23 | %
24 | % Last modified: 2013-01-23
25 | %
26 | %% High-Level Steps
27 | % # Predict successor state distribution
28 | 
29 | function [M S] = pred(policy, plant, dynmodel, m, s, H)
30 | %% Code
31 | 
32 | D = length(m); S = zeros(D,D,H+1); M = zeros(D,H+1);
33 | M(:,1) = m; S(:,:,1) = s;
34 | for i = 1:H
35 |   [m s] = plant.prop(m, s, plant, dynmodel, policy);
36 |   M(:,i+1) = m(end-D+1:end); 
37 |   S(:,:,i+1) = s(end-D+1:end,end-D+1:end);
38 | end
39 | 


--------------------------------------------------------------------------------
/base/predcost.m:
--------------------------------------------------------------------------------
 1 | %% predcost.m
 2 | % *Summary:* Compute trajectory of expected costs for a given set of 
 3 | % state distributions
 4 | %
 5 | % inputs:
 6 | % m0          mean of states, D-by-1 or D-by-K for multiple means
 7 | % S           covariance matrix of state distributions
 8 | % dynmodel    (struct) for dynamics model (GP)
 9 | % plant	      (struct) of system parameters
10 | % policy      (struct) for policy to be implemented
11 | % cost        (struct) of cost function parameters
12 | % H           length of optimization horizon
13 | %
14 | % outputs:
15 | % L            expected cumulative (discounted) cost
16 | % s            standard deviation of cost
17 | %
18 | % Copyright (C) 2008-2013 by 
19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
20 | %
21 | % Last modified: 2012-01-12
22 | %
23 | %% High-Level Steps
24 | % # Predict successor state distribution
25 | % # Predict corresponding cost distribution
26 | 
27 | function [L, s] = predcost(m0, S, dynmodel, plant, policy, cost, H)
28 | %% Code 
29 | 
30 | L = zeros(size(m0,2),H); s = zeros(size(m0,2),H);
31 | for k = 1:size(m0,2);
32 |   m = m0(:,k);
33 |   for t = 1:H
34 |     [m, S] = plant.prop(m, S, plant, dynmodel, policy);	     % get next state
35 |     [L(k,t), d1, d2, v] = cost.fcn(cost, m, S);              % compute cost
36 |     s(k,t) = sqrt(v);
37 |   end
38 | end
39 | L = mean(L,1); s = mean(s,1); 
40 | 


--------------------------------------------------------------------------------
/base/prop.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/base/prop.pdf


--------------------------------------------------------------------------------
/base/trainDynModel.m:
--------------------------------------------------------------------------------
 1 | %% trainDynModel.m
 2 | % *Summary:* Script to learn the dynamics model
 3 | %
 4 | % Copyright (C) 2008-2013 by 
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modification: 2013-05-20
 8 | %
 9 | %% High-Level Steps
10 | % # Extract states and controls from x-matrix
11 | % # Define the training inputs and targets of the GP
12 | % # Train the GP 
13 | 
14 | %% Code
15 | 
16 | 
17 | % 1. Train GP dynamics model
18 | Du = length(policy.maxU); Da = length(plant.angi); % no. of ctrl and angles
19 | xaug = [x(:,dyno) x(:,end-Du-2*Da+1:end-Du)];     % x augmented with angles
20 | dynmodel.inputs = [xaug(:,dyni) x(:,end-Du+1:end)];     % use dyni and ctrl
21 | dynmodel.targets = y(:,dyno);
22 | dynmodel.targets(:,difi) = dynmodel.targets(:,difi) - x(:,dyno(difi));
23 | 
24 | dynmodel = dynmodel.train(dynmodel, plant, trainOpt);  %  train dynamics GP
25 | 
26 | % display some hyperparameters
27 | Xh = dynmodel.hyp;     
28 | % noise standard deviations
29 | disp(['Learned noise std: ' num2str(exp(Xh(end,:)))]);
30 | % signal-to-noise ratios (values > 500 can cause numerical problems)
31 | disp(['SNRs             : ' num2str(exp(Xh(end-1,:)-Xh(end,:)))]);


--------------------------------------------------------------------------------
/base/value.m:
--------------------------------------------------------------------------------
 1 | %% value.m
 2 | % *Summary:* Compute expected (discounted) cumulative cost for a given (set of) initial
 3 | % state distributions
 4 | %
 5 | %     function [J, dJdp] = value(p, m0, S0, dynmodel, policy, plant, cost, H)
 6 | %
 7 | % *Input arguments:*
 8 | %
 9 | %   p            policy parameters chosen by minimize
10 | %   policy       policy structure
11 | %     .fcn       function which implements the policy
12 | %     .p         parameters passed to the policy
13 | %   m0           matrix (D by k) of initial state means
14 | %   S0           covariance matrix (D by D) for initial state
15 | %   dynmodel     dynamics model structure
16 | %   plant        plant structure
17 | %   cost         cost function structure
18 | %     .fcn       function handle to the cost
19 | %     .gamma     discount factor
20 | %  H             length of prediction horizon
21 | %
22 | % *Output arguments:*
23 | %
24 | %  J             expected cumulative (discounted) cost
25 | %  dJdp          (optional) derivative of J wrt the policy parameters
26 | %
27 | % Copyright (C) 2008-2013 by 
28 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
29 | %
30 | % Last modification: 2013-03-21
31 | %
32 | %% High-Level Steps
33 | % # Compute distribution of next state
34 | % # Compute corresponding expected immediate cost (discounted)
35 | % # At end of prediction horizon: sum all immediate costs up
36 | 
37 | function [J, dJdp] = value(p, m0, S0, dynmodel, policy, plant, cost, H)
38 | %% Code
39 | 
40 | policy.p = p;            % overwrite policy.p with new parameters from minimize
41 | p = unwrap(policy.p); dp = 0*p;
42 | m = m0; S = S0; L = zeros(1,H);
43 | 
44 | if nargout <= 1                                       % no derivatives required
45 |   
46 |   for t = 1:H                                  % for all time steps in horizon
47 |     [m, S] = plant.prop(m, S, plant, dynmodel, policy);      % get next state
48 |     L(t) = cost.gamma^t.*cost.fcn(cost, m, S);     % expected discounted cost
49 |   end
50 |   
51 | else                                               % otherwise, get derivatives
52 |   
53 |   dmOdp = zeros([size(m0,1), length(p)]);
54 |   dSOdp = zeros([size(m0,1)*size(m0,1), length(p)]);
55 |   
56 |   for t = 1:H                                  % for all time steps in horizon
57 |     [m, S, dmdmO, dSdmO, dmdSO, dSdSO, dmdp, dSdp] = ...
58 |       plant.prop(m, S, plant, dynmodel, policy); % get next state
59 |     
60 |     dmdp = dmdmO*dmOdp + dmdSO*dSOdp + dmdp;
61 |     dSdp = dSdmO*dmOdp + dSdSO*dSOdp + dSdp;
62 |     
63 |     [L(t), dLdm, dLdS] = cost.fcn(cost, m, S);              % predictive cost
64 |     L(t) = cost.gamma^t*L(t);                                      % discount
65 |     dp = dp + cost.gamma^t*( dLdm(:)'*dmdp + dLdS(:)'*dSdp )';
66 |     
67 |     dmOdp = dmdp; dSOdp = dSdp;                                 % bookkeeping
68 |   end
69 |   
70 | end
71 | 
72 | J = sum(L); dJdp = rewrap(policy.p, dp);
73 | 


--------------------------------------------------------------------------------
/doc/doc.out:
--------------------------------------------------------------------------------
 1 | \BOOKMARK [0][-]{chapter.1}{Introduction}{}% 1
 2 | \BOOKMARK [1][-]{section.1.1}{Intended Use}{chapter.1}% 2
 3 | \BOOKMARK [1][-]{section.1.2}{Software Design and Implementation}{chapter.1}% 3
 4 | \BOOKMARK [2][-]{subsection.1.2.1}{Model Learning}{section.1.2}% 4
 5 | \BOOKMARK [2][-]{subsection.1.2.2}{Policy Learning}{section.1.2}% 5
 6 | \BOOKMARK [2][-]{subsection.1.2.3}{Policy Application}{section.1.2}% 6
 7 | \BOOKMARK [1][-]{section.1.3}{User Interface by Example}{chapter.1}% 7
 8 | \BOOKMARK [1][-]{section.1.4}{Quick Start}{chapter.1}% 8
 9 | \BOOKMARK [0][-]{chapter.2}{Software Package Overview}{}% 9
10 | \BOOKMARK [1][-]{section.2.1}{Main Modules}{chapter.2}% 10
11 | \BOOKMARK [2][-]{subsection.2.1.1}{applyController}{section.2.1}% 11
12 | \BOOKMARK [2][-]{subsection.2.1.2}{trainDynModel}{section.2.1}% 12
13 | \BOOKMARK [2][-]{subsection.2.1.3}{learnPolicy}{section.2.1}% 13
14 | \BOOKMARK [1][-]{section.2.2}{Working with a Real Robot}{chapter.2}% 14
15 | \BOOKMARK [0][-]{chapter.3}{Important Function Interfaces}{}% 15
16 | \BOOKMARK [1][-]{section.3.1}{GP Predictions}{chapter.3}% 16
17 | \BOOKMARK [2][-]{subsection.3.1.1}{Input Arguments}{section.3.1}% 17
18 | \BOOKMARK [2][-]{subsection.3.1.2}{Output Arguments}{section.3.1}% 18
19 | \BOOKMARK [1][-]{section.3.2}{Controller}{chapter.3}% 19
20 | \BOOKMARK [2][-]{subsection.3.2.1}{Interface}{section.3.2}% 20
21 | \BOOKMARK [1][-]{section.3.3}{Cost Functions}{chapter.3}% 21
22 | \BOOKMARK [2][-]{subsection.3.3.1}{Interface for Scenario-specific Cost Functions}{section.3.3}% 22
23 | \BOOKMARK [2][-]{subsection.3.3.2}{General Building Blocks}{section.3.3}% 23
24 | \BOOKMARK [0][-]{chapter.4}{How to Create Your Own Scenario}{}% 24
25 | \BOOKMARK [1][-]{section.4.1}{Necessary Files}{chapter.4}% 25
26 | \BOOKMARK [1][-]{section.4.2}{ODE Dynamics}{chapter.4}% 26
27 | \BOOKMARK [1][-]{section.4.3}{Scenario-specific Settings}{chapter.4}% 27
28 | \BOOKMARK [2][-]{subsection.4.3.1}{Adding Paths}{section.4.3}% 28
29 | \BOOKMARK [2][-]{subsection.4.3.2}{Indices}{section.4.3}% 29
30 | \BOOKMARK [2][-]{subsection.4.3.3}{General Settings}{section.4.3}% 30
31 | \BOOKMARK [2][-]{subsection.4.3.4}{Plant Structure}{section.4.3}% 31
32 | \BOOKMARK [2][-]{subsection.4.3.5}{Policy Structure}{section.4.3}% 32
33 | \BOOKMARK [2][-]{subsection.4.3.6}{Cost Function Structure}{section.4.3}% 33
34 | 


--------------------------------------------------------------------------------
/doc/doc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/doc.pdf


--------------------------------------------------------------------------------
/doc/doc.toc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/doc.toc


--------------------------------------------------------------------------------
/doc/figures/cartPole.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cartPole.pdf


--------------------------------------------------------------------------------
/doc/figures/cdp_sketch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cdp_sketch.pdf


--------------------------------------------------------------------------------
/doc/figures/cost_error_bar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cost_error_bar.pdf


--------------------------------------------------------------------------------
/doc/figures/cp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cp.pdf


--------------------------------------------------------------------------------
/doc/figures/cp_rollout.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cp_rollout.pdf


--------------------------------------------------------------------------------
/doc/figures/example_trajectories.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/example_trajectories.pdf


--------------------------------------------------------------------------------
/doc/figures/example_trajectories2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/example_trajectories2.pdf


--------------------------------------------------------------------------------
/doc/figures/fX3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/fX3.pdf


--------------------------------------------------------------------------------
/doc/figures/flow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/flow.pdf


--------------------------------------------------------------------------------
/doc/figures/learnP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/learnP.pdf


--------------------------------------------------------------------------------
/doc/figures/pendubot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendubot.pdf


--------------------------------------------------------------------------------
/doc/figures/pendulum.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendulum.pdf


--------------------------------------------------------------------------------
/doc/figures/pendulum.pdf.ipe7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendulum.pdf.ipe7


--------------------------------------------------------------------------------
/doc/figures/policy_opt.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/policy_opt.pdf


--------------------------------------------------------------------------------
/doc/figures/preliminary_policy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/preliminary_policy.pdf


--------------------------------------------------------------------------------
/doc/figures/satLossPlot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/satLossPlot.pdf


--------------------------------------------------------------------------------
/doc/figures/squashed_preliminary_policy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashed_preliminary_policy.pdf


--------------------------------------------------------------------------------
/doc/figures/squashing_fct.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_fct.pdf


--------------------------------------------------------------------------------
/doc/figures/squashing_fct2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_fct2.pdf


--------------------------------------------------------------------------------
/doc/figures/squashing_function.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_function.pdf


--------------------------------------------------------------------------------
/doc/figures/unicoords_frame2.tex:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (0,0,0);
20 | 
21 | \coordinate (B) at (3,5.1962,0);
22 | \coordinate (W) at (3.2256,5.0659,1.4772);
23 | \coordinate (Wt) at (3.6767,4.8054,4.4316);
24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316);
25 | \coordinate (Wbp) at (3.2256,5.0659,0);
26 | \coordinate (T) at (3.2969,3.6799,5.7578);
27 | \coordinate (F) at (3.2612,4.3729,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw wheel
34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
35 | \tdplotsetrotatedcoordsorigin{(W)}
36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
37 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
38 | 
39 | % draw frame
40 | \draw[very thick,black] (W) -- (T);
41 | \tdplotsetrotatedcoordsorigin{(F)}
42 | \tdplotsetrotatedcoords{\psiW}{100}{0}
43 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
44 | 
45 | % draw turntable
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
47 | \tdplotsetrotatedcoordsorigin{(T)}
48 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
50 | 
51 | % draw forces and moments
52 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{}
53 | \tdplotsetrotatedcoords{0}{0}{0}
54 | \tdplotsetrotatedcoordsorigin{(W)}
55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
56 | 
57 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{}
58 | \tdplotsetrotatedcoords{0}{0}{0}
59 | \tdplotsetrotatedcoordsorigin{(T)}
60 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
61 | 
62 | \tdplotsetrotatedcoordsorigin{(F)}
63 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.2);
64 | 
65 | \node at (0,3.5,1.5) {\color{red}$W_\te{f}$};
66 | \node at (0,4,4.7) {\color{red}$G_\te{f}$};
67 | \node at (0,2.3,4) {\color{red}$P_\te{f}$};
68 | \node at (0,4.8,0.65) {\color{red}$F_\te{f}$};
69 | \node at (0,3.95,0.25) {\color{red}$Q_\te{f}$};
70 | 
71 | \end{tikzpicture}
72 | 


--------------------------------------------------------------------------------
/doc/figures/unicoords_frame2.tex~:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (0,0,0);
20 | 
21 | \coordinate (B) at (3,5.1962,0);
22 | \coordinate (W) at (3.2256,5.0659,1.4772);
23 | \coordinate (Wt) at (3.6767,4.8054,4.4316);
24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316);
25 | \coordinate (Wbp) at (3.2256,5.0659,0);
26 | \coordinate (T) at (3.2969,3.6799,5.7578);
27 | \coordinate (F) at (3.2612,4.3729,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw wheel
34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
35 | \tdplotsetrotatedcoordsorigin{(W)}
36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
37 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
38 | 
39 | % draw frame
40 | \draw[very thick,black] (W) -- (T);
41 | \tdplotsetrotatedcoordsorigin{(F)}
42 | \tdplotsetrotatedcoords{\psiW}{100}{0}
43 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
44 | 
45 | % draw turntable
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
47 | \tdplotsetrotatedcoordsorigin{(T)}
48 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
50 | 
51 | % draw forces and moments
52 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{}
53 | \tdplotsetrotatedcoords{0}{0}{0}
54 | \tdplotsetrotatedcoordsorigin{(W)}
55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
56 | 
57 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{}
58 | \tdplotsetrotatedcoords{0}{0}{0}
59 | \tdplotsetrotatedcoordsorigin{(T)}
60 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
61 | 
62 | \tdplotsetrotatedcoordsorigin{(F)}
63 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.2);
64 | 
65 | \node at (0,3.5,1.5) {\color{red}$W_\te{f}$};
66 | \node at (0,4,4.7) {\color{red}$G_\te{f}$};
67 | \node at (0,2.3,4) {\color{red}$P_\te{f}$};
68 | \node at (0,4.8,0.65) {\color{red}$F_\te{f}$};
69 | \node at (0,3.95,0.25) {\color{red}$Q_\te{f}$};
70 | 
71 | \end{tikzpicture}


--------------------------------------------------------------------------------
/doc/figures/unicoords_turntable2.tex:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (0,0,0);
20 | 
21 | \coordinate (B) at (3,5.1962,0);
22 | \coordinate (W) at (3.2256,5.0659,1.4772);
23 | \coordinate (Wt) at (3.6767,4.8054,4.4316);
24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316);
25 | \coordinate (Wbp) at (3.2256,5.0659,0);
26 | \coordinate (T) at (3.2969,3.6799,5.7578);
27 | \coordinate (F) at (3.2612,4.3729,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw wheel
34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
35 | \tdplotsetrotatedcoordsorigin{(W)}
36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
38 | 
39 | % draw frame
40 | \draw[very thick,black!30] (W) -- (T);
41 | \tdplotsetrotatedcoordsorigin{(F)}
42 | \tdplotsetrotatedcoords{\psiW}{100}{0}
43 | \draw[tdplot_rotated_coords,black!30,fill=black!30] (0,0,0) circle (0.12);
44 | 
45 | % draw turntable
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
47 | \tdplotsetrotatedcoordsorigin{(T)}
48 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5);
49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
50 | 
51 | % draw forces and moments
52 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{}
53 | \tdplotsetrotatedcoords{0}{0}{0}
54 | \tdplotsetrotatedcoordsorigin{(T)}
55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
56 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.06) -- (0,0,-1.2);
57 | 
58 | \node at (0,2.9,2.9) {\color{red}$W_\te{t}$};
59 | \node at (0,4,4.7) {\color{red}$G_\te{t}$};
60 | \node at (0,2.3,4) {\color{red}$P_\te{t}$};
61 | 
62 | \end{tikzpicture}
63 | 


--------------------------------------------------------------------------------
/doc/figures/unicoords_turntable2.tex~:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (0,0,0);
20 | 
21 | \coordinate (B) at (3,5.1962,0);
22 | \coordinate (W) at (3.2256,5.0659,1.4772);
23 | \coordinate (Wt) at (3.6767,4.8054,4.4316);
24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316);
25 | \coordinate (Wbp) at (3.2256,5.0659,0);
26 | \coordinate (T) at (3.2969,3.6799,5.7578);
27 | \coordinate (F) at (3.2612,4.3729,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw wheel
34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
35 | \tdplotsetrotatedcoordsorigin{(W)}
36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
38 | 
39 | % draw frame
40 | \draw[very thick,black!30] (W) -- (T);
41 | \tdplotsetrotatedcoordsorigin{(F)}
42 | \tdplotsetrotatedcoords{\psiW}{100}{0}
43 | \draw[tdplot_rotated_coords,black!30,fill=black!30] (0,0,0) circle (0.12);
44 | 
45 | % draw turntable
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
47 | \tdplotsetrotatedcoordsorigin{(T)}
48 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5);
49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
50 | 
51 | % draw forces and moments
52 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{}
53 | \tdplotsetrotatedcoords{0}{0}{0}
54 | \tdplotsetrotatedcoordsorigin{(T)}
55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
56 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.06) -- (0,0,-1.2);
57 | 
58 | \node at (0,2.9,2.9) {\color{red}$W_\te{t}$};
59 | \node at (0,4,4.7) {\color{red}$G_\te{t}$};
60 | \node at (0,2.3,4) {\color{red}$P_\te{t}$};
61 | 
62 | \end{tikzpicture}


--------------------------------------------------------------------------------
/doc/figures/unicoords_wheel2.tex:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (1.7321,-1,0);
20 | \coordinate (B) at (2.5,4.3301,0);
21 | \coordinate (Bp) at (4.2321,3.3301,0);
22 | \coordinate (W) at (2.7256,4.1999,1.4772);
23 | \coordinate (Wt) at (3.1767,3.9394,4.4316);
24 | \coordinate (Wtp) at (2.7256,4.1999,4.4316);
25 | \coordinate (Wbp) at (2.7256,4.1999,0);
26 | \coordinate (T) at (2.7969,2.8138,5.7578);
27 | \coordinate (F) at (2.7612,3.5069,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw frame
34 | \draw[very thick,black!30] (W) -- (T);
35 | \tdplotsetrotatedcoords{\psiW}{100}{0}
36 | \tdplotsetrotatedcoordsorigin{(F)}
37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
38 | 
39 | % draw turntable
40 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
41 | \tdplotsetrotatedcoordsorigin{(T)}
42 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
43 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
44 | 
45 | % draw wheel
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{0}
47 | \tdplotsetrotatedcoordsorigin{(W)}
48 | 
49 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
50 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5);
51 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
52 | 
53 | % draw forces and moments
54 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{}
55 | \tdplotsetrotatedcoords{0}{0}{0}
56 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
57 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.1);
58 | 
59 | \tdplotdrawarcarrow[red,thick]{(B)}{0.5}{50}{360}{}{}
60 | \tdplotsetrotatedcoordsorigin{(B)}
61 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
62 | 
63 | \node at (0,4.0,0.85) {\color{red}$F_\te{w}$};
64 | \node at (0,3.2,0.55) {\color{red}$Q_\te{w}$};
65 | \node at (0,4.3,-0.4) {\color{red}$R$};
66 | \node at (0,3.2,-1.2) {\color{red}$T$};
67 | \node at (0,3.4,-0.45) {\color{red}$W_\te{w}$};
68 | 
69 | \end{tikzpicture}
70 | 


--------------------------------------------------------------------------------
/doc/figures/unicoords_wheel2.tex~:
--------------------------------------------------------------------------------
 1 | %set the plot display orientation
 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d}
 3 | \tdplotsetmaincoords{65}{100}
 4 | 
 5 | %define polar coordinates for some vector
 6 | %TODO: look into using 3d spherical coordinate system
 7 | \pgfmathsetmacro{\magW}{4}
 8 | \pgfmathsetmacro{\psiW}{-30}
 9 | \pgfmathsetmacro{\theW}{10}
10 | \pgfmathsetmacro{\phiW}{15}
11 | 
12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 
13 | %coordinate transformation provided by 3dplot
14 | \footnotesize
15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords]
16 | 
17 | %set up some coordinates ... GET FROM MATLAB
18 | %-----------------------
19 | \coordinate (O) at (1.7321,-1,0);
20 | \coordinate (B) at (2.5,4.3301,0);
21 | \coordinate (Bp) at (4.2321,3.3301,0);
22 | \coordinate (W) at (2.7256,4.1999,1.4772);
23 | \coordinate (Wt) at (3.1767,3.9394,4.4316);
24 | \coordinate (Wtp) at (2.7256,4.1999,4.4316);
25 | \coordinate (Wbp) at (2.7256,4.1999,0);
26 | \coordinate (T) at (2.7969,2.8138,5.7578);
27 | \coordinate (F) at (2.7612,3.5069,3.6175);
28 | 
29 | %draw figure contents
30 | %--------------------
31 | 
32 | 
33 | % draw frame
34 | \draw[very thick,black!30] (W) -- (T);
35 | \tdplotsetrotatedcoords{\psiW}{100}{0}
36 | \tdplotsetrotatedcoordsorigin{(F)}
37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
38 | 
39 | % draw turntable
40 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW}
41 | \tdplotsetrotatedcoordsorigin{(T)}
42 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5);
43 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12);
44 | 
45 | % draw wheel
46 | \tdplotsetrotatedcoords{\psiW}{\theW}{0}
47 | \tdplotsetrotatedcoordsorigin{(W)}
48 | 
49 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle
50 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5);
51 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12);
52 | 
53 | % draw forces and moments
54 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{}
55 | \tdplotsetrotatedcoords{0}{0}{0}
56 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
57 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.1);
58 | 
59 | \tdplotdrawarcarrow[red,thick]{(B)}{0.5}{50}{360}{}{}
60 | \tdplotsetrotatedcoordsorigin{(B)}
61 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05);
62 | 
63 | \node at (0,4.0,0.85) {\color{red}$F_\te{w}$};
64 | \node at (0,3.2,0.55) {\color{red}$Q_\te{w}$};
65 | \node at (0,4.3,-0.4) {\color{red}$R$};
66 | \node at (0,3.2,-1.2) {\color{red}$T$};
67 | \node at (0,3.4,-0.45) {\color{red}$W_\te{w}$};
68 | 
69 | \end{tikzpicture}


--------------------------------------------------------------------------------
/doc/figures/unicycle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/unicycle.pdf


--------------------------------------------------------------------------------
/doc/html/augment_unicycle_eq57428.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/augment_unicycle_eq57428.png


--------------------------------------------------------------------------------
/doc/html/conCat_eq10310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conCat_eq10310.png


--------------------------------------------------------------------------------
/doc/html/conCat_eq35435.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conCat_eq35435.png


--------------------------------------------------------------------------------
/doc/html/conT_eq96160.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conT_eq96160.png


--------------------------------------------------------------------------------
/doc/html/congp_eq35435.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/congp_eq35435.png


--------------------------------------------------------------------------------
/doc/html/congp_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/congp_eq43551.png


--------------------------------------------------------------------------------
/doc/html/conlin_eq10310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq10310.png


--------------------------------------------------------------------------------
/doc/html/conlin_eq42328.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq42328.png


--------------------------------------------------------------------------------
/doc/html/conlin_eq81653.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq81653.png


--------------------------------------------------------------------------------
/doc/html/conlin_eq95465.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq95465.png


--------------------------------------------------------------------------------
/doc/html/gSat_eq24615.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq24615.png


--------------------------------------------------------------------------------
/doc/html/gSat_eq28128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq28128.png


--------------------------------------------------------------------------------
/doc/html/gSat_eq35427.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq35427.png


--------------------------------------------------------------------------------
/doc/html/gSat_eq53557.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq53557.png


--------------------------------------------------------------------------------
/doc/html/gSat_eq97420.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq97420.png


--------------------------------------------------------------------------------
/doc/html/gSin_eq00934.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq00934.png


--------------------------------------------------------------------------------
/doc/html/gSin_eq24615.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq24615.png


--------------------------------------------------------------------------------
/doc/html/gSin_eq28128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq28128.png


--------------------------------------------------------------------------------
/doc/html/gSin_eq35427.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq35427.png


--------------------------------------------------------------------------------
/doc/html/gSin_eq53557.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq53557.png


--------------------------------------------------------------------------------
/doc/html/gTrigT_eq41681.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrigT_eq41681.png


--------------------------------------------------------------------------------
/doc/html/gTrig_eq00934.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq00934.png


--------------------------------------------------------------------------------
/doc/html/gTrig_eq24615.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq24615.png


--------------------------------------------------------------------------------
/doc/html/gTrig_eq28128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq28128.png


--------------------------------------------------------------------------------
/doc/html/gTrig_eq35427.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq35427.png


--------------------------------------------------------------------------------
/doc/html/gTrig_eq53557.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq53557.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_cdp_eq30108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cdp_eq30108.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_cdp_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cdp_eq43551.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_cp_eq30108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cp_eq30108.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_cp_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cp_eq43551.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_dp_eq30108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_dp_eq30108.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_dp_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_dp_eq43551.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_pendubot_eq30108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_pendubot_eq30108.png


--------------------------------------------------------------------------------
/doc/html/getPlotDistr_pendubot_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_pendubot_eq43551.png


--------------------------------------------------------------------------------
/doc/html/gpT_eq96160.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gpT_eq96160.png


--------------------------------------------------------------------------------
/doc/html/lossQuad_eq15684.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossQuad_eq15684.png


--------------------------------------------------------------------------------
/doc/html/lossQuad_eq95740.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossQuad_eq95740.png


--------------------------------------------------------------------------------
/doc/html/lossSat_eq21635.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossSat_eq21635.png


--------------------------------------------------------------------------------
/doc/html/lossSat_eq65484.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossSat_eq65484.png


--------------------------------------------------------------------------------
/doc/html/lossT_eq96160.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossT_eq96160.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq30075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq30075.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq39223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq39223.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_cdp_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq81831.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq30075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq30075.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq39223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq39223.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_cp_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq81831.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq30075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq30075.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq39223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq39223.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_dp_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq81831.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq30075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq30075.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq39223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq39223.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_pendubot_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq81831.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq30075.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq30075.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq39223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq39223.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_pendulum_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq81831.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq22825.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq22825.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq26389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq26389.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq32292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq32292.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq34276.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq34276.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq72999.png


--------------------------------------------------------------------------------
/doc/html/loss_unicycle_eq81831.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq81831.png


--------------------------------------------------------------------------------
/doc/html/reward_eq37930.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/reward_eq37930.png


--------------------------------------------------------------------------------
/doc/html/reward_eq81456.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/reward_eq81456.png


--------------------------------------------------------------------------------
/doc/html/rewrap_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rewrap_eq72999.png


--------------------------------------------------------------------------------
/doc/html/rewrap_eq77996.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rewrap_eq77996.png


--------------------------------------------------------------------------------
/doc/html/rollout_eq25411.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq25411.png


--------------------------------------------------------------------------------
/doc/html/rollout_eq35435.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq35435.png


--------------------------------------------------------------------------------
/doc/html/rollout_eq43551.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq43551.png


--------------------------------------------------------------------------------
/doc/html/unwrap_eq72999.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/unwrap_eq72999.png


--------------------------------------------------------------------------------
/doc/html/unwrap_eq77996.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/unwrap_eq77996.png


--------------------------------------------------------------------------------
/doc/plots/header.m:
--------------------------------------------------------------------------------
1 | set(0,'defaultaxesfontsize',24);
2 | set(0,'defaultaxesfontunits', 'points')
3 | set(0,'defaulttextfontsize',26);
4 | set(0,'defaulttextfontunits','points')
5 | set(0,'defaultaxeslinewidth',1);
6 | set(0,'defaultlinelinewidth',2);
7 | % set(0,'DefaultTextInterpreter','Latex')
8 | set(0,'DefaultAxesLineStyleOrder','-|--|:|-.');
9 | 


--------------------------------------------------------------------------------
/doc/plots/plot_policySquash.m:
--------------------------------------------------------------------------------
 1 | header;
 2 | clear all; close all
 3 | addpath(genpath('../../'));
 4 | randn('state',7); rand('state', 1);
 5 | 
 6 | covfunc = {'covSum', {'covSEard', 'covNoise'}};        % specify ARD covariance
 7 | 
 8 | 
 9 | n = 50;
10 | 
11 | Axis = [-5 5 -1.5 2];
12 | 
13 | dynmodel.inputs = rand(n,1)*10 - 5;
14 | dynmodel.hyp = [0.2, 1, 0.01]';
15 | dynmodel.target = 1.5*randn(n,1);
16 | 
17 | xx = linspace(-5,5,101)';
18 | xx2 = linspace(-pi/2,pi/2, 101);
19 | 
20 | prelPolicy = gpr(dynmodel.hyp, covfunc, dynmodel.inputs, dynmodel.target, xx);
21 | 
22 | figure(1); clf; hold on;
23 | plot(xx, prelPolicy);
24 | plot(xx, ones(size(xx)), 'k--');
25 | plot(xx, -ones(size(xx)), 'k--');
26 | 
27 | xlabel('$x$', 'interpreter', 'latex')
28 | ylabel('$\tilde\pi(x)$', 'interpreter', 'latex')
29 | axis(Axis)
30 | set(gcf,'PaperSize', [12 6]); 
31 | set(gcf,'PaperPosition',[0.1 0.1 12 6]);
32 | print_pdf('../figures/preliminary_policy');
33 | 
34 | for i = 1:size(xx,1)
35 |   squashedPolicy(i) = gSat(prelPolicy(i), 0, 1);
36 |   sqashingFct(i) = gSat(xx2(i), 0, 1);
37 | end
38 | 
39 | figure(2); clf; hold on;
40 | plot(xx, squashedPolicy);
41 | plot(xx, ones(size(xx)), 'k--');
42 | plot(xx, -ones(size(xx)), 'k--');
43 | xlabel('$x$', 'interpreter', 'latex')
44 | ylabel('$\pi(x)$','interpreter', 'latex')
45 | axis(Axis)
46 | set(gcf,'PaperSize', [12 6]); set(gcf,'PaperPosition',[0.1 0.1 12 6]);
47 | print_pdf('../figures/squashed_preliminary_policy');
48 | 


--------------------------------------------------------------------------------
/doc/plots/plot_satLoss.m:
--------------------------------------------------------------------------------
 1 | % plot the saturating cost function
 2 | 
 3 | clear all; close all;
 4 | header;
 5 | 
 6 | addpath ('../../loss/');
 7 | addpath ('../../util/');
 8 | 
 9 | cost.fcn = @lossSat;
10 | cost.z = 0;              % target state
11 | cost.width = 0.5;
12 | 
13 | 
14 | cost.W = 1/(cost.width).^2; % weight matrix
15 | 
16 | x = linspace(0,3,200)';  % array of distances
17 | 
18 | c = zeros(1,length(x));
19 | 
20 | % evaluate cost for all distances x
21 | for i = 1:length(x)
22 |   c(i) = cost.fcn(cost, x(i), 0);
23 | end
24 | 
25 | % evaluate cost at 2-sigma bound
26 | c2 = cost.fcn(cost, 2*cost.width, 0);
27 | 
28 | figure; 
29 | hold on
30 | plot(x,c); % plot cost
31 | plot(2*cost.width, c2, 'ro', 'markersize', 8); % plot 2-sigma cost value
32 | plot(linspace(0,2*cost.width,100), c2*ones(1,100), 'k-'); % plot line
33 | text(cost.width/4, c2+0.05, '$2\texttt{cost.width}$','interpreter', 'latex')
34 | xlabel('Distance to target')
35 | ylabel('Cost')
36 | axis([x(1) x(end) 0 1.01]);
37 | 
38 | set(gcf,'PaperSize', [12 6]);
39 | set(gcf,'PaperPosition',[0.1 0.1 12 6]);
40 | print_pdf('../figures/satLossPlot');
41 | 


--------------------------------------------------------------------------------
/doc/plots/plot_squash.m:
--------------------------------------------------------------------------------
 1 | % plot the squashing function
 2 | 
 3 | clear all; close all;
 4 | header;
 5 | 
 6 | 
 7 | x = linspace(-5/2*pi,5/2*pi,1000);
 8 | 
 9 | squash = @(x) (9*sin(x) + sin(3.*x))./8;
10 | 
11 | 
12 | y = squash(x);
13 | 
14 | % plot a few periods 
15 | figure(1);
16 | clf; hold on;
17 | plot(x,y);
18 | xlabel('$x$', 'interpreter', 'latex');
19 | ylabel('$\sigma(x)$', 'interpreter', 'latex');
20 | axis tight;
21 | 
22 | set(gcf,'PaperSize', [12 6]);
23 | set(gcf,'PaperPosition',[0.1 0.1 12 6]);
24 | print_pdf('../figures/squashing_fct');
25 | 
26 | % plot a single period
27 | axis([-pi/2, pi/2, -1, 1])
28 | print_pdf('../figures/squashing_fct2');


--------------------------------------------------------------------------------
/doc/tex/DoublePend_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{DoublePend\_learn}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for the double-pendulum swingup with two actuated joints
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_dp;                  % load scenario-specific settings
39 | basename = 'doublepend_';     % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_dp;
49 |   end
50 | end
51 | 
52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
54 | 
55 | % 3. Controlled learning (N iterations)
56 | for j = 1:N
57 |   trainDynModel;   % train (GP) dynamics model
58 |   learnPolicy;     % learn policy
59 |   applyController; % apply controller to system
60 |   disp(['controlled trial # ' num2str(j)]);
61 |   if plotting.verbosity > 0;      % visualization of trajectory
62 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
63 |     draw_rollout_dp;
64 |   end
65 | end
66 | \end{lstlisting}
67 | 


--------------------------------------------------------------------------------
/doc/tex/applyController.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{applyController.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to apply the learned controller to a (simulated) system
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-06-04
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Generate a single trajectory rollout by applying the controller
27 |    \item Generate many rollouts for testing the performance of the controller
28 |    \item Save the data
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Generate trajectory rollout given the current policy
37 | if isfield(plant,'constraint'), HH = maxH; else HH = H; end
38 | [xx, yy, realCost{j+J}, latent{j}] = ...
39 |   rollout(gaussian(mu0, S0), policy, HH, plant, cost);
40 | disp(xx);                           % display states of observed trajectory
41 | x = [x; xx]; y = [y; yy];                            % augment training set
42 | if plotting.verbosity > 0
43 |   if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end
44 |   hold on; plot(1:length(realCost{J+j}),realCost{J+j},'r'); drawnow;
45 | end
46 | 
47 | % 2. Make many rollouts to test the controller quality
48 | if plotting.verbosity > 1
49 |   lat = cell(1,10);
50 |   for i=1:10
51 |     [~,~,~,lat{i}] = rollout(gaussian(mu0, S0), policy, HH, plant, cost);
52 |   end
53 | 
54 |   if ~ishandle(4); figure(4); else set(0,'CurrentFigure',4); end; clf(4);
55 | 
56 |   ldyno = length(dyno);
57 |   for i=1:ldyno       % plot the rollouts on top of predicted error bars
58 |     subplot(ceil(ldyno/sqrt(ldyno)),ceil(sqrt(ldyno)),i); hold on;
59 |     errorbar( 0:length(M{j}(i,:))-1, M{j}(i,:), ...
60 |       2*sqrt(squeeze(Sigma{j}(i,i,:))) );
61 |     for ii=1:10
62 |       plot( 0:size(lat{ii}(:,dyno(i)),1)-1, lat{ii}(:,dyno(i)), 'r' );
63 |     end
64 |     plot( 0:size(latent{j}(:,dyno(i)),1)-1, latent{j}(:,dyno(i)),'g');
65 |     axis tight
66 |   end
67 |   drawnow;
68 | end
69 | 
70 | % 3. Save data
71 | filename = [basename num2str(j) '_H' num2str(H)]; save(filename);
72 | \end{lstlisting}
73 | 


--------------------------------------------------------------------------------
/doc/tex/augment_unicycle.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{augment\_unicycle.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} The function computes the $(x,y)$ velocities of the contact point in both absolute and unicycle coordinates as well as the the unicycle coordinates of the contact point themselves.
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}     function r = augment(s)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | 
21 | \begin{lstlisting}
22 | %		s     state of the unicycle (including the torques).             [1 x 18]
23 | %         The state is assumed to be given as follows:
24 | %         dx      empty (to be filled by this function)
25 | %         dy      empty (to be filled by this function)
26 | %         dxc     empty (to be filled by this function)
27 | %         dyc     empty (to be filled by this function)
28 | %         dtheta  roll angular velocity
29 | %         dphi    yaw angular velocity
30 | %         dpsiw   wheel angular velocity
31 | %         dpsif   pitch angular velocity
32 | %         dpsit   turn table angular velocity
33 | %         x       x position
34 | %         y       y position
35 | %         xc      empty (to be filled by this function)
36 | %         yc      empty (to be filled by this function)
37 | %         theta   roll angle
38 | %         phi     yaw angle
39 | %         psiw    wheel angle
40 | %         psif    pitch angle
41 | %         psit    turn table angle
42 | %
43 | % *Output arguments:*
44 | %
45 | %   r     additional variables that are computed based on s:          [1 x 6]
46 | %         dx    x velocity of contact point (global coordinates)
47 | %         dy    y velocity of contact point (global coordinates)
48 | %         dxc   x velocity of contact point (unicycle coordinates)
49 | %         dyc   y velocity of contact point (unicycle coordinates)
50 | %         xc    x position of contact point (unicycle coordinates)
51 | %         yc    y position of contact point (unicycle coordinates)
52 | %
53 | %
54 | % Copyright (C) 2008-2013 by
55 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
56 | %
57 | % Last modified: 2013-03-27
58 | 
59 | 
60 | function r = augment_unicycle(s)
61 | \end{lstlisting}
62 | 
63 | 
64 | \subsection*{Code} 
65 | 
66 | 
67 | \begin{lstlisting}
68 | rw = 0.225;                               % wheel radius in meters
69 | 
70 | % x velocity of contact point (global coordinates)
71 | r(1) = rw*cos(s(15))*s(7);
72 | % y velocity of contact point (global coordinates)
73 | r(2) = rw*sin(s(15))*s(7);
74 | % (x,y) velocities of contact point (unicycle coordinates)
75 | A = -[cos(s(15)) sin(s(15)); -sin(s(15)) cos(s(15))];
76 | dA = -s(6)*[-sin(s(15)) cos(s(15)); -cos(s(15)) -sin(s(15))];
77 | r(3:4) = A*r(1:2)' + dA*s(10:11)';
78 | % (x,y) coordinates of contact point (unicycle coordinates)
79 | r(5:6) = A*s(10:11)';
80 | \end{lstlisting}
81 | 


--------------------------------------------------------------------------------
/doc/tex/calcCost.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{calcCost.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Function to calculate the incurred cost and its standard deviation, given a sequence of predicted state distributions and the cost struct
13 | \end{par} \vspace{1em}
14 | \begin{verbatim}[L sL] = calcCost(cost, M, S)\end{verbatim}
15 | \begin{par}
16 | \textbf{Input arguments:}
17 | \end{par} \vspace{1em}
18 | \begin{verbatim}cost               cost structure
19 | M                  mean vectors of state trajectory (D-by-H matrix)
20 | S                  covariance matrices at each time step (D-by-D-by-H)\end{verbatim}
21 | \begin{par}
22 | \textbf{Output arguments:}
23 | \end{par} \vspace{1em}
24 | \begin{verbatim}L                  expected incurred cost of state trajectory
25 | sL                 standard deviation of incurred cost\end{verbatim}
26 | \begin{par}
27 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
28 | \end{par} \vspace{1em}
29 | \begin{par}
30 | Last modified: 2013-01-23
31 | \end{par} \vspace{1em}
32 | 
33 | 
34 | \subsection*{High-Level Steps} 
35 | 
36 | \begin{enumerate}
37 | \setlength{\itemsep}{-1ex}
38 |    \item Augment state distribution with trigonometric functions
39 |    \item Compute distribution of the control signal
40 |    \item Compute dynamics-GP prediction
41 |    \item Compute distribution of the next state
42 | \end{enumerate}
43 | 
44 | \begin{lstlisting}
45 | function [L sL] = calcCost(cost, M, S)
46 | \end{lstlisting}
47 | 
48 | 
49 | \subsection*{Code} 
50 | 
51 | 
52 | \begin{lstlisting}
53 | H = size(M,2);                                             % horizon length
54 | L = zeros(1,H); SL = zeros(1,H);
55 | 
56 | % for each time step, compute the expected cost and its variance
57 | for h = 1:H
58 |   [L(h),d1,d2,SL(h)]  = cost.fcn(cost, M(:,h), S(:,:,h));
59 | end
60 | 
61 | sL = sqrt(SL);                                         % standard deviation
62 | \end{lstlisting}
63 | 


--------------------------------------------------------------------------------
/doc/tex/cartDouble_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{cartDouble\_learn.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for the cart-doube-pendulum swingup
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_cdp;                 % load scenario-specific settings
39 | basename = 'CartDoublePend_'; % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_cdp;
49 |   end
50 | 
51 | end
52 | 
53 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
54 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
55 | 
56 | % 3. Controlled learning (N iterations)
57 | for j = 1:N
58 |   trainDynModel;   % train (GP) dynamics model
59 |   learnPolicy;     % learn policy
60 |   applyController; % apply controller to system
61 |   disp(['controlled trial # ' num2str(j)]);
62 |   if plotting.verbosity > 0;      % visualization of trajectory
63 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
64 |     draw_rollout_cdp;
65 |   end
66 | end
67 | \end{lstlisting}
68 | 


--------------------------------------------------------------------------------
/doc/tex/cartPole_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{cartPole\_learn.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for the cart-pole swingup
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_cp;                      % load scenario-specific settings
39 | basename = 'cartPole_';           % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_cp;
49 |   end
50 | 
51 | end
52 | 
53 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
54 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
55 | 
56 | % 3. Controlled learning (N iterations)
57 | for j = 1:N
58 |   trainDynModel;   % train (GP) dynamics model
59 |   learnPolicy;     % learn policy
60 |   applyController; % apply controller to system
61 |   disp(['controlled trial # ' num2str(j)]);
62 |   if plotting.verbosity > 0;      % visualization of trajectory
63 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
64 |     draw_rollout_cp;
65 |   end
66 | end
67 | \end{lstlisting}
68 | 


--------------------------------------------------------------------------------
/doc/tex/covNoise.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{covNoise.m} 
11 | 
12 | \begin{par}
13 | Independent covariance function, ie "white noise", with specified variance. The covariance function is specified as:
14 | \end{par} \vspace{1em}
15 | \begin{par}
16 | k(x\^{}p,x\^{}q) = s2 * \ensuremath{\backslash}delta(p,q)
17 | \end{par} \vspace{1em}
18 | \begin{par}
19 | where s2 is the noise variance and \ensuremath{\backslash}delta(p,q) is a Kronecker delta function which is 1 iff p=q and zero otherwise. The hyperparameter is
20 | \end{par} \vspace{1em}
21 | \begin{par}
22 | logtheta = [ log(sqrt(s2)) ]
23 | \end{par} \vspace{1em}
24 | \begin{par}
25 | For more help on design of covariance functions, try "help covFunctions".
26 | \end{par} \vspace{1em}
27 | \begin{par}
28 | (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24.
29 | \end{par} \vspace{1em}
30 | 
31 | \begin{lstlisting}
32 | function [A, B] = covNoise(logtheta, x, z)
33 | \end{lstlisting}
34 | 
35 | 
36 | \subsection*{Code} 
37 | 
38 | 
39 | \begin{lstlisting}
40 | if nargin == 0, A = '1'; return; end              % report number of parameters
41 | 
42 | s2 = exp(2*logtheta);                                          % noise variance
43 | 
44 | if nargin == 2                                      % compute covariance matrix
45 |   A = s2*eye(size(x,1));
46 | elseif nargout == 2                              % compute test set covariances
47 |   A = s2;
48 |   B = 0;                               % zeros cross covariance by independence
49 | else                                                % compute derivative matrix
50 |   A = 2*s2*eye(size(x,1));
51 | end
52 | \end{lstlisting}
53 | 


--------------------------------------------------------------------------------
/doc/tex/covSEard.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{covSEard.m} 
11 | 
12 | \begin{par}
13 | Squared Exponential covariance function with Automatic Relevance Detemination (ARD) distance measure. The covariance function is parameterized as:
14 | \end{par} \vspace{1em}
15 | \begin{par}
16 | k(x\^{}p,x\^{}q) = sf2 * exp(-(x\^{}p - x\^{}q)'*inv(P)*(x\^{}p - x\^{}q)/2)
17 | \end{par} \vspace{1em}
18 | \begin{par}
19 | where the P matrix is diagonal with ARD parameters ell\_1\^{}2,...,ell\_D\^{}2, where D is the dimension of the input space and sf2 is the signal variance. The hyperparameters are:
20 | \end{par} \vspace{1em}
21 | \begin{par}
22 | loghyper = [ log(ell\_1)              log(ell\_2)               .              log(ell\_D)              log(sqrt(sf2)) ]
23 | \end{par} \vspace{1em}
24 | \begin{par}
25 | For more help on design of covariance functions, try "help covFunctions".
26 | \end{par} \vspace{1em}
27 | \begin{par}
28 | (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
29 | \end{par} \vspace{1em}
30 | 
31 | \begin{lstlisting}
32 | function [A, B] = covSEard(loghyper, x, z)
33 | \end{lstlisting}
34 | 
35 | 
36 | \subsection*{Code} 
37 | 
38 | 
39 | \begin{lstlisting}
40 | if nargin == 0, A = '(D+1)'; return; end          % report number of parameters
41 | 
42 | persistent K;
43 | 
44 | [n D] = size(x);
45 | ell = exp(loghyper(1:D));                         % characteristic length scale
46 | sf2 = exp(2*loghyper(D+1));                                   % signal variance
47 | 
48 | if nargin == 2
49 |   K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
50 |   A = K;
51 | elseif nargout == 2                              % compute test set covariances
52 |   A = sf2*ones(size(z,1),1);
53 |   B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2);
54 | else                                                % compute derivative matrix
55 | 
56 |   % check for correct dimension of the previously calculated kernel matrix
57 |   if any(size(K)~=n)
58 |     K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
59 |   end
60 | 
61 |   if z <= D                                           % length scale parameters
62 |     A = K.*sq_dist(x(:,z)'/ell(z));
63 |   else                                                    % magnitude parameter
64 |     A = 2*K;
65 |     clear K;
66 |   end
67 | end
68 | \end{lstlisting}
69 | 


--------------------------------------------------------------------------------
/doc/tex/covSum.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{covSum.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Compose a covariance function as the sum of other covariance functions. This function doesn't actually compute very much on its own, it merely does some bookkeeping, and calls other covariance functions to do the actual work.
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}   function [A, B] = covSum(covfunc, logtheta, x, z)\end{verbatim}
17 |     \begin{par}
18 | (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20.
19 | \end{par} \vspace{1em}
20 | 
21 | \begin{lstlisting}
22 | function [A, B] = covSum(covfunc, logtheta, x, z)
23 | \end{lstlisting}
24 | 
25 | 
26 | \subsection*{Code} 
27 | 
28 | 
29 | \begin{lstlisting}
30 | for i = 1:length(covfunc)                   % iterate over covariance functions
31 |   f = covfunc(i);
32 |   if iscell(f{:}), f = f{:}; end          % dereference cell array if necessary
33 |   j(i) = cellstr(feval(f{:}));
34 | end
35 | 
36 | if nargin == 1,                                   % report number of parameters
37 |   A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end
38 |   return
39 | end
40 | 
41 | [n, D] = size(x);
42 | 
43 | v = [];              % v vector indicates to which covariance parameters belong
44 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end
45 | 
46 | switch nargin
47 | case 3                                              % compute covariance matrix
48 |   A = zeros(n, n);                       % allocate space for covariance matrix
49 |   for i = 1:length(covfunc)                  % iteration over summand functions
50 |     f = covfunc(i);
51 |     if iscell(f{:}), f = f{:}; end        % dereference cell array if necessary
52 |     A = A + feval(f{:}, logtheta(v==i), x);            % accumulate covariances
53 |   end
54 | 
55 | case 4                      % compute derivative matrix or test set covariances
56 |   if nargout == 2                                % compute test set cavariances
57 |     A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1));    % allocate space
58 |     for i = 1:length(covfunc)
59 |       f = covfunc(i);
60 |       if iscell(f{:}), f = f{:}; end      % dereference cell array if necessary
61 |       [AA BB] = feval(f{:}, logtheta(v==i), x, z);   % compute test covariances
62 |       A = A + AA; B = B + BB;                                  % and accumulate
63 |     end
64 |   else                                            % compute derivative matrices
65 |     i = v(z);                                       % which covariance function
66 |     j = sum(v(1:z)==i);                    % which parameter in that covariance
67 |     f = covfunc(i);
68 |     if iscell(f{:}), f = f{:}; end        % dereference cell array if necessary
69 |     A = feval(f{:}, logtheta(v==i), x, j);                 % compute derivative
70 |   end
71 | 
72 | end
73 | \end{lstlisting}
74 | 


--------------------------------------------------------------------------------
/doc/tex/draw_cp.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | % This LaTeX was auto-generated from an M-file by MATLAB.
  3 | % To make changes, update the M-file and republish this document.
  4 | 
  5 | 
  6 | 
  7 |     
  8 |     
  9 | 
 10 | \subsection*{draw\_cp.m} 
 11 | 
 12 | \begin{par}
 13 | \textbf{Summary:} Draw the cart-pole system with reward, applied force, and predictive uncertainty of the tip of the pendulum
 14 | \end{par} \vspace{1em}
 15 | 
 16 | \begin{verbatim}  function draw_cp(x, theta, force, cost, text1, text2, M, S)\end{verbatim}
 17 |     \begin{par}
 18 | \textbf{Input arguments:}
 19 | \end{par} \vspace{1em}
 20 | 
 21 | \begin{lstlisting}
 22 | %		x          position of the cart
 23 | %   theta      angle of pendulum
 24 | %   force      force applied to cart
 25 | %   cost       cost structure
 26 | %     .fcn     function handle (it is assumed to use saturating cost)
 27 | %     .<>      other fields that are passed to cost
 28 | %   M          (optional) mean of state
 29 | %   S          (optional) covariance of state
 30 | %   text1      (optional) text field 1
 31 | %   text2      (optional) text field 2
 32 | %
 33 | %
 34 | % Copyright (C) 2008-2013 by
 35 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 36 | %
 37 | % Last modified: 2013-03-07
 38 | 
 39 | function draw_cp(x, theta, force, cost, text1, text2, M, S)
 40 | \end{lstlisting}
 41 | 
 42 | 
 43 | \subsection*{Code} 
 44 | 
 45 | 
 46 | \begin{lstlisting}
 47 | l = 0.6;
 48 | xmin = -3;
 49 | xmax = 3;
 50 | height = 0.1;
 51 | width  = 0.3;
 52 | maxU = 10;
 53 | 
 54 | % Compute positions
 55 | cart = [ x + width,  height
 56 |          x + width, -height
 57 |          x - width, -height
 58 |          x - width,  height
 59 |          x + width,  height ];
 60 | pendulum = [x, 0; x+2*l*sin(theta), -cos(theta)*2*l];
 61 | 
 62 | 
 63 | clf; hold on
 64 | plot(0,2*l,'k+','MarkerSize',20,'linewidth',2)
 65 | plot([xmin, xmax], [-height-0.03, -height-0.03],'k','linewidth',2)
 66 | 
 67 | % Plot force
 68 | plot([0 force/maxU*xmax],[-0.3, -0.3],'g','linewidth',10)
 69 | 
 70 | % Plot reward
 71 | reward = 1-cost.fcn(cost,[x, 0, 0, theta]', zeros(4));
 72 | plot([0 reward*xmax],[-0.5, -0.5],'y','linewidth',10)
 73 | 
 74 | % Plot the cart-pole
 75 | fill(cart(:,1), cart(:,2),'k','edgecolor','k');
 76 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)
 77 | 
 78 | % Plot the joint and the tip
 79 | plot(x,0,'y.','markersize',24)
 80 | plot(pendulum(2,1),pendulum(2,2),'y.','markersize',24)
 81 | 
 82 | % plot ellipse around tip of pendulum (if M, S exist)
 83 | try
 84 |   [M1 S1] = getPlotDistr_cp(M,S,2*l);
 85 |   error_ellipse(S1,M1,'style','b');
 86 | catch
 87 | end
 88 | 
 89 | % Text
 90 | text(0,-0.3,'applied force')
 91 | text(0,-0.5,'immediate reward')
 92 | if exist('text1','var')
 93 |   text(0,-0.9, text1)
 94 | end
 95 | if exist('text2','var')
 96 |   text(0,-1.1, text2)
 97 | end
 98 | 
 99 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-1.4 1.4]);
100 | axis off;
101 | drawnow;
102 | \end{lstlisting}
103 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_cdp.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_cdp.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw a trajectory of the cart-double-pendulum system and  the predicted uncertainties around the tips of the pendulums
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | 
33 | \begin{lstlisting}
34 | for r = 1:size(xx,1)
35 |   if exist('j','var') && ~isempty(M{j})
36 |     draw_cdp(latent{j}(r,1), latent{j}(r,5), latent{j}(r,6), ...
37 |       latent{j}(r,end), cost, M{j}(:,r), Sigma{j}(:,:,r), ...
38 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
39 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
40 |       ' sec']);
41 |   else
42 |     draw_cdp(latent{jj}(r,1), latent{jj}(r,5), latent{jj}(r,6), ...
43 |       latent{jj}(r,end), cost, [], [], ...
44 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
45 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
46 |       ' sec']);
47 |   end
48 |   pause(dt);
49 | end
50 | \end{lstlisting}
51 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_cp.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_cp.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw the most recent trajectory of the cart-pole system together with the predicted uncertainties around the tip of the pendulum
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-05-20
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tip of the pendulum
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | 
33 | \begin{lstlisting}
34 | % Loop over states in trajectory (= time steps)
35 | for r = 1:size(xx,1)
36 |   if exist('j','var') && ~isempty(M{j})
37 |     draw_cp(latent{j}(r,1), latent{j}(r,4), latent{j}(r,end), cost,  ...
38 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
39 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
40 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
41 |   else
42 |      draw_cp(latent{jj}(r,1), latent{jj}(r,4), latent{jj}(r,end), cost,  ...
43 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
44 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
45 |       ' sec'])
46 |   end
47 |   pause(dt);
48 | end
49 | \end{lstlisting}
50 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_dp.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_dp.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw a trajectory of theobserved double-pendulum system and the predicted uncertainties around the tips of the pendulums
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | \begin{par}
33 | Loop over states in trajectory
34 | \end{par} \vspace{1em}
35 | 
36 | \begin{lstlisting}
37 | for r = 1:size(xx,1)
38 |   cost.t = r;
39 |   if exist('j','var') && ~isempty(M{j})
40 |     draw_dp(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end-1), ...
41 |       latent{j}(r,end), cost,  ...
42 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
43 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
44 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
45 |   else
46 |     draw_dp(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end-1), ...
47 |       latent{jj}(r,end), cost,  ...
48 |       ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
49 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
50 |       ' sec'])
51 |   end
52 |   pause(dt);
53 | end
54 | \end{lstlisting}
55 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_pendubot.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_pendubot}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw the most recent trajectory of the Pendubot system and the predicted uncertainties around the tips of the pendulums
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | 
33 | \begin{lstlisting}
34 | % Loop over states in trajectory
35 | for r = 1:size(xx,1)
36 |   cost.t = r;
37 |   if exist('j','var') && ~isempty(M{j})
38 |     draw_pendubot(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end), cost,  ...
39 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
40 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
41 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
42 |   else
43 |     draw_pendubot(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end), cost, ...
44 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
45 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
46 |       ' sec'])
47 |   end
48 |   pause(dt);
49 | end
50 | \end{lstlisting}
51 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_pendulum.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_pendulum.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw a trajectory of the most recent pendulum trajectory
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | 
33 | \begin{lstlisting}
34 | % Loop over states in trajectory
35 | for r = 1:size(xx,1)
36 |   cost.t = r;
37 |   if exist('j','var') && ~isempty(M{j})
38 |     draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost,  ...
39 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
40 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
41 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
42 |   else
43 |      draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost,  ...
44 |       ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
45 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
46 |       ' sec'])
47 |   end
48 |   pause(dt);
49 | end
50 | \end{lstlisting}
51 | 


--------------------------------------------------------------------------------
/doc/tex/draw_rollout_unicycle.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{draw\_rollout\_unicycle.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to draw a rollout of the unicycle
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-04-04
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item For each time step, plot the observed trajectory, the applied torques, and the incurred cost
27 | \end{enumerate}
28 | 
29 | 
30 | \subsection*{Code} 
31 | 
32 | 
33 | \begin{lstlisting}
34 | if j > J
35 | 
36 |   draw_unicycle(xx, plant, plant.dt/10, cost, ...
37 |     ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
38 |     ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
39 |     ' sec']);
40 | else
41 |   draw_unicycle(xx, plant, plant.dt/10, cost, ...
42 |     ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
43 |     ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
44 |     ' sec']);
45 | end
46 | \end{lstlisting}
47 | 


--------------------------------------------------------------------------------
/doc/tex/dynamics_cp.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{dynamics\_cp.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Implements ths ODE for simulating the cart-pole dynamics.
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  function dz = dynamics_cp(t, z, f)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | 
21 | \begin{lstlisting}
22 | %		t     current time step (called from ODE solver)
23 | %   z     state                                                    [4 x 1]
24 | %   f     (optional): force f(t)
25 | %
26 | % *Output arguments:*
27 | %
28 | %   dz    if 3 input arguments:      state derivative wrt time
29 | %         if only 2 input arguments: total mechanical energy
30 | %
31 | %
32 | % Note: It is assumed that the state variables are of the following order:
33 | %       x:        [m]     position of cart
34 | %       dx:       [m/s]   velocity of cart
35 | %       dtheta:   [rad/s] angular velocity
36 | %       theta:    [rad]   angle
37 | %
38 | %
39 | % A detailed derivation of the dynamics can be found in:
40 | %
41 | % M.P. Deisenroth:
42 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C,
43 | % KIT Scientific Publishing, 2010.
44 | %
45 | % Copyright (C) 2008-2013 by
46 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
47 | %
48 | % Last modified: 2013-03-08
49 | 
50 | function dz = dynamics_cp(t,z,f)
51 | \end{lstlisting}
52 | 
53 | 
54 | \subsection*{Code} 
55 | 
56 | 
57 | \begin{lstlisting}
58 | l = 0.5;  % [m]      length of pendulum
59 | m = 0.5;  % [kg]     mass of pendulum
60 | M = 0.5;  % [kg]     mass of cart
61 | b = 0.1;  % [N/m/s]  coefficient of friction between cart and ground
62 | g = 9.82; % [m/s^2]  acceleration of gravity
63 | 
64 | if nargin==3
65 |   dz = zeros(4,1);
66 |   dz(1) = z(2);
67 |   dz(2) = ( 2*m*l*z(3)^2*sin(z(4)) + 3*m*g*sin(z(4))*cos(z(4)) ...
68 |           + 4*f(t) - 4*b*z(2) )/( 4*(M+m)-3*m*cos(z(4))^2 );
69 |   dz(3) = (-3*m*l*z(3)^2*sin(z(4))*cos(z(4)) - 6*(M+m)*g*sin(z(4)) ...
70 |           - 6*(f(t)-b*z(2))*cos(z(4)) )/( 4*l*(m+M)-3*m*l*cos(z(4))^2 );
71 |   dz(4) = z(3);
72 | else
73 |   dz = (M+m)*z(2)^2/2 + 1/6*m*l^2*z(3)^2 + m*l*(z(2)*z(3)-g)*cos(z(4))/2;
74 | end
75 | \end{lstlisting}
76 | 


--------------------------------------------------------------------------------
/doc/tex/dynamics_pendulum.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{dynamics\_pendulum.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Implements ths ODE for simulating the pendulum dynamics, where an input torque f can be applied
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  function dz = dynamics_pendulum(t,z,u)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | 
21 | \begin{lstlisting}
22 | %		t     current time step (called from ODE solver)
23 | %   z     state                                                    [2 x 1]
24 | %   u     (optional): torque f(t) applied to pendulum
25 | %
26 | % *Output arguments:*
27 | %
28 | %   dz    if 3 input arguments:      state derivative wrt time
29 | %
30 | %   Note: It is assumed that the state variables are of the following order:
31 | %         dtheta:  [rad/s] angular velocity of pendulum
32 | %         theta:   [rad]   angle of pendulum
33 | %
34 | % A detailed derivation of the dynamics can be found in:
35 | %
36 | % M.P. Deisenroth:
37 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C,
38 | % KIT Scientific Publishing, 2010.
39 | %
40 | %
41 | % Copyright (C) 2008-2013 by
42 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
43 | %
44 | % Last modified: 2013-03-18
45 | 
46 | function dz = dynamics_pendulum(t,z,u)
47 | \end{lstlisting}
48 | 
49 | 
50 | \subsection*{Code} 
51 | 
52 | 
53 | \begin{lstlisting}
54 | l = 1;    % [m]        length of pendulum
55 | m = 1;    % [kg]       mass of pendulum
56 | g = 9.82; % [m/s^2]    acceleration of gravity
57 | b = 0.01; % [s*Nm/rad] friction coefficient
58 | 
59 | dz = zeros(2,1);
60 | dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3);
61 | dz(2) = z(1);
62 | \end{lstlisting}
63 | 


--------------------------------------------------------------------------------
/doc/tex/gaussian.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{gaussian.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Generate n samples from a Gaussian \$p(x)=\ensuremath{\backslash}mathcal N(m,S). Sampling is based on the Cholesky factorization of the covariance matrix S
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  function x = gaussian(m, S, n)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | \begin{verbatim}m      mean of Gaussian                                         [D x 1]
21 | S      covariance matrix of Gaussian                            [D x D]
22 | n      (optional) number of samples; default: n=1\end{verbatim}
23 | \begin{par}
24 | \textbf{Output arguments:}
25 | \end{par} \vspace{1em}
26 | \begin{verbatim}x      matrix of samples from Gaussian                          [D x n]\end{verbatim}
27 | \begin{par}
28 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
29 | \end{par} \vspace{1em}
30 | \begin{par}
31 | Last modified: 2013-03-21
32 | \end{par} \vspace{1em}
33 | 
34 | \begin{lstlisting}
35 | function x = gaussian(m, S, n)
36 | \end{lstlisting}
37 | 
38 | 
39 | \subsection*{Code} 
40 | 
41 | 
42 | \begin{lstlisting}
43 | if nargin < 3; n = 1; end
44 | 
45 | x = bsxfun(@plus, m(:), chol(S)'*randn(size(S,2),n));
46 | \end{lstlisting}
47 | 


--------------------------------------------------------------------------------
/doc/tex/getPlotDistr_cp.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{getPlotDistr\_cp.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Compute means and covariances of the Cartesian coordinates of the tips both the inner and outer pendulum assuming that the joint state $x$ of the cart-double-pendulum system is Gaussian, i.e., $x\sim N(m, s)$
13 | \end{par} \vspace{1em}
14 | 
15 | \begin{verbatim}   function [M, S] = getPlotDistr_cp(m, s, ell)\end{verbatim}
16 |     \begin{par}
17 | \textbf{Input arguments:}
18 | \end{par} \vspace{1em}
19 | \begin{verbatim}m       mean of full state                                    [4 x 1]
20 | s       covariance of full state                              [4 x 4]
21 | ell     length of pendulum\end{verbatim}
22 | \begin{verbatim}Note: this code assumes that the following order of the state:
23 |        1: cart pos.,
24 |        2: cart vel.,
25 |        3: pendulum angular velocity,
26 |        4: pendulum angle\end{verbatim}
27 | \begin{par}
28 | \textbf{Output arguments:}
29 | \end{par} \vspace{1em}
30 | \begin{verbatim}M      mean of tip of pendulum                               [2 x 1]
31 | S      covariance of tip of pendulum                         [2 x 2]\end{verbatim}
32 | \begin{par}
33 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
34 | \end{par} \vspace{1em}
35 | \begin{par}
36 | Last modification: 2013-03-27
37 | \end{par} \vspace{1em}
38 | 
39 | 
40 | \subsection*{High-Level Steps} 
41 | 
42 | \begin{enumerate}
43 | \setlength{\itemsep}{-1ex}
44 |    \item Augment input distribution to complex angle representation
45 |    \item Compute means of tips of pendulums (in Cartesian coordinates)
46 |    \item Compute covariances of tips of pendulums (in Cartesian coordinates)
47 | \end{enumerate}
48 | 
49 | \begin{lstlisting}
50 | function [M, S] = getPlotDistr_cp(m, s, ell)
51 | \end{lstlisting}
52 | 
53 | 
54 | \subsection*{Code} 
55 | 
56 | 
57 | \begin{lstlisting}
58 | % 1. Augment input distribution to complex angle representation
59 | [m1 s1 c1] = gTrig(m,s,4,ell); % map input distribution through sin/cos
60 | m1 = [m; m1];        % mean of joint
61 | c1 = s*c1;           % cross-covariance between input and prediction
62 | s1 = [s c1; c1' s1]; % covariance of joint
63 | 
64 | % 2. Compute means of tips of pendulums (in Cartesian coordinates)
65 | M = [m1(1)+m1(5); -m1(6)];
66 | 
67 | % 3. Compute covariances of tips of pendulums (in Cartesian coordinates)
68 | s11 = s1(1,1) + s1(5,5) + s1(1,5) + s1(5,1); % x+l sin(theta)
69 | s22 = s1(6,6); % -l*cos(theta)
70 | s12 = -(s1(1,6)+s1(5,6)); % cov(x+l*sin(th), -l*cos(th)
71 | 
72 | S = [s11 s12; s12' s22];
73 | try
74 |   chol(S);
75 | catch
76 |   warning('matrix S not pos.def. (getPlotDistr)');
77 |   S = S + (1e-6 - min(eig(S)))*eye(2);
78 | end
79 | \end{lstlisting}
80 | 


--------------------------------------------------------------------------------
/doc/tex/lossLin.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{lossLin.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Function to compute the expected loss and its derivatives, given an input distribution, under a linear loss function: L = a\^{}T(x - b). Note, this loss function can return negative loss.
13 | \end{par} \vspace{1em}
14 | \begin{verbatim}[L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s)\end{verbatim}
15 | \begin{par}
16 | \textbf{Input arguments:}
17 | \end{par} \vspace{1em}
18 | 
19 | \begin{verbatim}  cost
20 |     .a      gradient of linear loss function,                       [D x 1]
21 |     .b      targets, the value of x for which there is zero loss    [D x 1]
22 |   m         mean of input distribution                              [D x 1]
23 |   s         covariance matrix of input distribution                 [D x D]\end{verbatim}
24 |     \begin{par}
25 | \textbf{Output arguments:}
26 | \end{par} \vspace{1em}
27 | \begin{verbatim}L               expected loss                                  [1   x    1 ]
28 | dLdm            derivative of L wrt input mean                 [1   x    D ]
29 | dLds            derivative of L wrt input covariance           [1   x   D^2]
30 | S               variance of loss                               [1   x    1 ]
31 | dSdm            derivative of S wrt input mean                 [1   x    D ]
32 | dSds            derivative of S wrt input covariance           [1   x   D^2]
33 | C               inv(S) times input-output covariance           [D   x    1 ]
34 | dCdm            derivative of C wrt input mean                 [D   x    D ]
35 | dCds            derivative of C wrt input covariance           [D   x   D^2]\end{verbatim}
36 | \begin{par}
37 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
38 | \end{par} \vspace{1em}
39 | \begin{par}
40 | Last modified: 2013-03-05
41 | \end{par} \vspace{1em}
42 | 
43 | 
44 | \subsection*{High-Level Steps} 
45 | 
46 | \begin{enumerate}
47 | \setlength{\itemsep}{-1ex}
48 |    \item Expected cost
49 |    \item Variance of cost
50 |    \item inv(s)* cov(x,L)
51 | \end{enumerate}
52 | 
53 | \begin{lstlisting}
54 | function [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s)
55 | \end{lstlisting}
56 | 
57 | 
58 | \subsection*{Code} 
59 | 
60 | 
61 | \begin{lstlisting}
62 | a = cost.a(:); b = cost.b(:); D = length(m);
63 | if length(a) ~= D || length(b) ~= D;
64 |     error('a or b not the same length as m'); end
65 | 
66 | % 1) Mean
67 | L = a'*(m - b);
68 | dLdm = a';
69 | dLds = zeros(D);
70 | 
71 | % 2) Variance
72 | S = a'*s*a;
73 | dSdm = zeros(1,D);
74 | dSds = a*a';
75 | 
76 | % 3) inv(s) * input-output covariance cov(x,L)
77 | C = a;
78 | dCdm = zeros(D); dCds = zeros(D,D^2);
79 | \end{lstlisting}
80 | 


--------------------------------------------------------------------------------
/doc/tex/maha.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{maha.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Point-wise squared Mahalanobis distance (a-b)*Q*(a-b)'. Vectors are row-vectors
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  function K = maha(a, b, Q)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | \begin{verbatim}a   matrix containing n row vectors                                 [n x D]
21 | b   matrix containing n row vectors                                 [n x D]
22 | Q   weight matrix. Default: eye(D)                                  [D x D]\end{verbatim}
23 | \begin{par}
24 | \textbf{Output arguments:}  K    point-wise squared distances                                    [n x n]
25 | \end{par} \vspace{1em}
26 | \begin{par}
27 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
28 | \end{par} \vspace{1em}
29 | \begin{par}
30 | Last modified: 2013-03-21
31 | \end{par} \vspace{1em}
32 | 
33 | \begin{lstlisting}
34 | function K = maha(a, b, Q)
35 | \end{lstlisting}
36 | 
37 | 
38 | \subsection*{Code} 
39 | 
40 | 
41 | \begin{lstlisting}
42 | if nargin == 2                                                  % assume unit Q
43 |   K = bsxfun(@plus,sum(a.*a,2),sum(b.*b,2)')-2*a*b';
44 | else
45 |   aQ = a*Q; K = bsxfun(@plus,sum(aQ.*a,2),sum(b*Q.*b,2)')-2*aQ*b';
46 | end
47 | \end{lstlisting}
48 | 


--------------------------------------------------------------------------------
/doc/tex/pendubot_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{pendubot\_learn.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for the pendubot swingup (a double pendulum with the inner joint actuated)
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_pendubot;            % load scenario-specific settings
39 | basename = 'pendubot_';       % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_pendubot;
49 |   end
50 | end
51 | 
52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
54 | 
55 | % 3. Controlled learning (N iterations)
56 | for j = 1:N
57 |   trainDynModel;   % train (GP) dynamics model
58 |   learnPolicy;     % learn policy
59 |   applyController; % apply controller to system
60 |   disp(['controlled trial # ' num2str(j)]);
61 |   if plotting.verbosity > 0;      % visualization of trajectory
62 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
63 |     draw_rollout_pendubot;
64 |   end
65 | end
66 | \end{lstlisting}
67 | 


--------------------------------------------------------------------------------
/doc/tex/pendulum_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{pendulum\_learn.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for the pendulum swingup
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_pendulum;            % load scenario-specific settings
39 | basename = 'pendulum_';       % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_pendulum;
49 |   end
50 | end
51 | 
52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
54 | 
55 | % 3. Controlled learning (N iterations)
56 | for j = 1:N
57 |   trainDynModel;   % train (GP) dynamics model
58 |   learnPolicy;     % learn policy
59 |   applyController; % apply controller to system
60 |   disp(['controlled trial # ' num2str(j)]);
61 |   if plotting.verbosity > 0;      % visualization of trajectory
62 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
63 |     draw_rollout_pendulum;
64 |   end
65 | end
66 | \end{lstlisting}
67 | 


--------------------------------------------------------------------------------
/doc/tex/pred.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{pred.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Compute predictive (marginal) distributions of a trajecory
13 | \end{par} \vspace{1em}
14 | \begin{verbatim}[M S] = pred(policy, plant, dynmodel, m, s, H)\end{verbatim}
15 | \begin{par}
16 | \textbf{Input arguments:}
17 | \end{par} \vspace{1em}
18 | \begin{verbatim}policy             policy structure
19 | plant              plant structure
20 | dynmodel           dynamics model structure
21 | m                  D-by-1 mean of the initial state distribution
22 | s                  D-by-D covariance of the initial state distribution
23 | H                  length of prediction horizon\end{verbatim}
24 | \begin{par}
25 | \textbf{Output arguments:}
26 | \end{par} \vspace{1em}
27 | \begin{verbatim}M                  D-by-(H+1) sequence of predicted mean vectors
28 | S                  D-by-D-(H+1) sequence of predicted covariance
29 |                    matrices\end{verbatim}
30 | \begin{par}
31 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
32 | \end{par} \vspace{1em}
33 | \begin{par}
34 | Last modified: 2013-01-23
35 | \end{par} \vspace{1em}
36 | 
37 | 
38 | \subsection*{High-Level Steps} 
39 | 
40 | \begin{enumerate}
41 | \setlength{\itemsep}{-1ex}
42 |    \item Predict successor state distribution
43 | \end{enumerate}
44 | 
45 | \begin{lstlisting}
46 | function [M S] = pred(policy, plant, dynmodel, m, s, H)
47 | \end{lstlisting}
48 | 
49 | 
50 | \subsection*{Code} 
51 | 
52 | 
53 | \begin{lstlisting}
54 | D = length(m); S = zeros(D,D,H+1); M = zeros(D,H+1);
55 | M(:,1) = m; S(:,:,1) = s;
56 | for i = 1:H
57 |   [m s] = plant.prop(m, s, plant, dynmodel, policy);
58 |   M(:,i+1) = m(end-D+1:end);
59 |   S(:,:,i+1) = s(end-D+1:end,end-D+1:end);
60 | end
61 | \end{lstlisting}
62 | 


--------------------------------------------------------------------------------
/doc/tex/predcost.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{predcost.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Compute trajectory of expected costs for a given set of state distributions
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | inputs: m0          mean of states, D-by-1 or D-by-K for multiple means S           covariance matrix of state distributions dynmodel    (struct) for dynamics model (GP) plant	      (struct) of system parameters policy      (struct) for policy to be implemented cost        (struct) of cost function parameters H           length of optimization horizon
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | outputs: L            expected cumulative (discounted) cost s            standard deviation of cost
19 | \end{par} \vspace{1em}
20 | \begin{par}
21 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
22 | \end{par} \vspace{1em}
23 | \begin{par}
24 | Last modified: 2012-01-12
25 | \end{par} \vspace{1em}
26 | 
27 | 
28 | \subsection*{High-Level Steps} 
29 | 
30 | \begin{enumerate}
31 | \setlength{\itemsep}{-1ex}
32 |    \item Predict successor state distribution
33 |    \item Predict corresponding cost distribution
34 | \end{enumerate}
35 | 
36 | \begin{lstlisting}
37 | function [L, s] = predcost(m0, S, dynmodel, plant, policy, cost, H)
38 | \end{lstlisting}
39 | 
40 | 
41 | \subsection*{Code} 
42 | 
43 | 
44 | \begin{lstlisting}
45 | L = zeros(size(m0,2),H); s = zeros(size(m0,2),H);
46 | for k = 1:size(m0,2);
47 |   m = m0(:,k);
48 |   for t = 1:H
49 |     [m, S] = plant.prop(m, S, plant, dynmodel, policy);	     % get next state
50 |     [L(k,t), d1, d2, v] = cost.fcn(cost, m, S);              % compute cost
51 |     s(k,t) = sqrt(v);
52 |   end
53 | end
54 | L = mean(L,1); s = mean(s,1);
55 | \end{lstlisting}
56 | 


--------------------------------------------------------------------------------
/doc/tex/reward.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{reward.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Compute expectation, variance, and their derivatives of an exponentiated negative quadratic cost $\exp(-(x-z)'W(x-z)/2)$, where $x\sim\mathcal N(m,S)$
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | \textbf{Input arguments:}
16 | \end{par} \vspace{1em}
17 | \begin{verbatim}m:          D-by-1 mean of the state distribution
18 | S:          D-by-D covariance matrix of the state distribution
19 | z:          D-by-1 target state
20 | W:          D-by-D weight matrix\end{verbatim}
21 | \begin{par}
22 | \textbf{Output arguments:}
23 | \end{par} \vspace{1em}
24 | \begin{verbatim}muR:        1-by-1 expected reward
25 | dmuRdm:     1-by-D derivative of expected reward wrt input mean
26 | dmuRdS:     D-by-D derivative of expected reward wrt input covariance matrix
27 | sR:         1-by-1 variance of reward
28 | dsRdm:      1-by-D derivative of variance of reward wrt input mean
29 | dsRdS:      D-by-D derivative reward variance wrt input covariance matrix\end{verbatim}
30 | \begin{par}
31 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
32 | \end{par} \vspace{1em}
33 | \begin{par}
34 | Last modification: 2013-01-20
35 | \end{par} \vspace{1em}
36 | 
37 | 
38 | \subsection*{High-Level Steps} 
39 | 
40 | \begin{enumerate}
41 | \setlength{\itemsep}{-1ex}
42 |    \item Compute expected reward
43 |    \item Compute the derivatives of the expected reward with respect to the input   distribution (optional)
44 |    \item Compute variance of reward
45 |    \item Compute the derivatives of the variance of the reward with respect to the input distribution (optional)
46 | \end{enumerate}
47 | 
48 | \begin{lstlisting}
49 | function [muR, dmuRdm, dmuRdS, sR, dsRdm, dsRdS] = reward(m, S, z, W)
50 | \end{lstlisting}
51 | 
52 | 
53 | \subsection*{Code} 
54 | 
55 | 
56 | \begin{lstlisting}
57 | % some precomputations
58 | D = length(m); % get state dimension
59 | SW = S*W;
60 | iSpW = W/(eye(D)+SW);
61 | 
62 | % 1. expected reward
63 | muR = exp(-(m-z)'*iSpW*(m-z)/2)/sqrt(det(eye(D)+SW));
64 | 
65 | % 2. derivatives of expected reward
66 | if nargout > 1
67 |   dmuRdm = -muR*(m-z)'*iSpW;  % wrt input mean
68 |   dmuRdS = muR*(iSpW*(m-z)*(m-z)'-eye(D))*iSpW/2;  % wrt input covariance matrix
69 | end
70 | 
71 | % 3. reward variance
72 | if nargout > 3
73 |   i2SpW = W/(eye(D)+2*SW);
74 |   r2 = exp(-(m-z)'*i2SpW*(m-z))/sqrt(det(eye(D)+2*SW));
75 |   sR = r2 - muR^2;
76 |   if sR < 1e-12; sR=0; end % for numerical reasons
77 | end
78 | 
79 | % 4. derivatives of reward variance
80 | if nargout > 4
81 |   % wrt input mean
82 |   dsRdm = -2*r2*(m-z)'*i2SpW-2*muR*dmuRdm;
83 |   % wrt input covariance matrix
84 |   dsRdS = r2*(2*i2SpW*(m-z)*(m-z)'-eye(D))*i2SpW-2*muR*dmuRdS;
85 | end
86 | \end{lstlisting}
87 | 


--------------------------------------------------------------------------------
/doc/tex/rewrap.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{rewrap.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Map the numerical elements in the vector $v$ onto the variables $s$, which can be of any type. The number of numerical elements must match; on exit, $v$ should be empty. Non-numerical entries are just copied. See also the reverse unwrap.m.
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  [s v] = rewrap(s, v)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | \begin{verbatim}s     structure, cell, or numeric values
21 | v     structure, cell, or numeric values\end{verbatim}
22 | \begin{par}
23 | \textbf{Output arguments:}
24 | \end{par} \vspace{1em}
25 | \begin{verbatim}s     structure, cell, or numeric values
26 | v     [empty]\end{verbatim}
27 | \begin{par}
28 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
29 | \end{par} \vspace{1em}
30 | \begin{par}
31 | Last modified: 2013-03-25
32 | \end{par} \vspace{1em}
33 | 
34 | \begin{lstlisting}
35 | function [s v] = rewrap(s, v)
36 | \end{lstlisting}
37 | 
38 | 
39 | \subsection*{Code} 
40 | 
41 | 
42 | \begin{lstlisting}
43 | if isnumeric(s)
44 |   if numel(v) < numel(s)
45 |     error('The vector for conversion contains too few elements')
46 |   end
47 |   s = reshape(v(1:numel(s)), size(s));            % numeric values are reshaped
48 |   v = v(numel(s)+1:end);                        % remaining arguments passed on
49 | elseif isstruct(s)
50 |   [s p] = orderfields(s); p(p) = 1:numel(p);      % alphabetize, store ordering
51 |   [t v] = rewrap(struct2cell(s), v);                 % convert to cell, recurse
52 |   s = orderfields(cell2struct(t,fieldnames(s),1),p);  % conv to struct, reorder
53 | elseif iscell(s)
54 |   for i = 1:numel(s)             % cell array elements are handled sequentially
55 |     [s{i} v] = rewrap(s{i}, v);
56 |   end
57 | end
58 | \end{lstlisting}
59 | 


--------------------------------------------------------------------------------
/doc/tex/solve_chol.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{solve\_chol.m}
10 | 
11 | \begin{par}
12 | solve\_chol - solve linear equations from the Cholesky factorization. Solve A*X = B for X, where A is square, symmetric, positive definite. The input to the function is R the Cholesky decomposition of A and the matrix B. Example: X = solve\_chol(chol(A),B);
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | NOTE: The program code is written in the C language for efficiency and is contained in the file solve\_chol.c, and should be compiled using matlabs mex facility. However, this file also contains a (less efficient) matlab implementation, supplied only as a help to people unfamiliar with mex. If the C code has been properly compiled and is avaiable, it automatically takes precendence over the matlab code in this file.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08.
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{Code} 
23 | 
24 | 
25 | \begin{lstlisting}
26 | function x = solve_chol(A, B)
27 | 
28 | if nargin ~= 2 | nargout > 1
29 |   error('Wrong number of arguments.');
30 | end
31 | 
32 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1)
33 |   error('Wrong sizes of matrix arguments.');
34 | end
35 | 
36 | x = A\(A'\B);
37 | \end{lstlisting}
38 | 


--------------------------------------------------------------------------------
/doc/tex/sq_dist.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{sq\_dist.m} 
11 | 
12 | \begin{par}
13 | sq\_dist - a function to compute a matrix of all pairwise squared distances between two sets of vectors, stored in the columns of the two matrices, a (of size D by n) and b (of size D by m). If only a single argument is given or the second matrix is empty, the missing matrix is taken to be identical to the first.
14 | \end{par} \vspace{1em}
15 | \begin{par}
16 | Special functionality: If an optional third matrix argument Q is given, it must be of size n by m, and in this case a vector of the traces of the product of Q' and the coordinatewise squared distances is returned.
17 | \end{par} \vspace{1em}
18 | \begin{par}
19 | NOTE: The program code is written in the C language for efficiency and is contained in the file sq\_dist.c, and should be compiled using matlabs mex facility. However, this file also contains a (less efficient) matlab implementation, supplied only as a help to people unfamiliar with mex. If the C code has been properly compiled and is avaiable, it automatically takes precendence over the matlab code in this file.
20 | \end{par} \vspace{1em}
21 | \begin{par}
22 | Usage: C = sq\_dist(a, b)    or: C = sq\_dist(a)  or equiv.: C = sq\_dist(a, [])    or: c = sq\_dist(a, b, Q) where the b matrix may be empty.
23 | \end{par} \vspace{1em}
24 | \begin{par}
25 | where a is of size D by n, b is of size D by m (or empty), C and Q are of size n by m and c is of size D by 1.
26 | \end{par} \vspace{1em}
27 | \begin{par}
28 | Copyright (c) 2003, 2004, 2005 and 2006 Carl Edward Rasmussen. 2006-03-09.
29 | \end{par} \vspace{1em}
30 | 
31 | \begin{lstlisting}
32 | function C = sq_dist(a, b, Q);
33 | \end{lstlisting}
34 | 
35 | 
36 | \subsection*{Code} 
37 | 
38 | 
39 | \begin{lstlisting}
40 | if nargin < 1 | nargin > 3 | nargout > 1
41 |   error('Wrong number of arguments.');
42 | end
43 | 
44 | if nargin == 1 | isempty(b)                   % input arguments are taken to be
45 |   b = a;                                   % identical if b is missing or empty
46 | end
47 | 
48 | [D, n] = size(a);
49 | [d, m] = size(b);
50 | if d ~= D
51 |   error('Error: column lengths must agree.');
52 | end
53 | 
54 | if nargin < 3
55 |   C = zeros(n,m);
56 |   for d = 1:D
57 |     C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2;
58 |   end
59 |   % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to
60 |   % replace the 3 lines above; it would be faster, but numerically less stable.
61 | else
62 |   if [n m] == size(Q)
63 |     C = zeros(D,1);
64 |     for d = 1:D
65 |       C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q));
66 |     end
67 |   else
68 |     error('Third argument has wrong size.');
69 |   end
70 | end
71 | \end{lstlisting}
72 | 


--------------------------------------------------------------------------------
/doc/tex/trainDynModel.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{trainDynModel.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn the dynamics model
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modification: 2013-05-20
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Extract states and controls from x-matrix
27 |    \item Define the training inputs and targets of the GP
28 |    \item Train the GP
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Train GP dynamics model
37 | Du = length(policy.maxU); Da = length(plant.angi); % no. of ctrl and angles
38 | xaug = [x(:,dyno) x(:,end-Du-2*Da+1:end-Du)];     % x augmented with angles
39 | dynmodel.inputs = [xaug(:,dyni) x(:,end-Du+1:end)];     % use dyni and ctrl
40 | dynmodel.targets = y(:,dyno);
41 | dynmodel.targets(:,difi) = dynmodel.targets(:,difi) - x(:,dyno(difi));
42 | 
43 | dynmodel = dynmodel.train(dynmodel, plant, trainOpt);  %  train dynamics GP
44 | 
45 | % display some hyperparameters
46 | Xh = dynmodel.hyp;
47 | % noise standard deviations
48 | disp(['Learned noise std: ' num2str(exp(Xh(end,:)))]);
49 | % signal-to-noise ratios (values > 500 can cause numerical problems)
50 | disp(['SNRs             : ' num2str(exp(Xh(end-1,:)-Xh(end,:)))]);
51 | \end{lstlisting}
52 | 


--------------------------------------------------------------------------------
/doc/tex/unicycle_learn.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 |       \subsection{unicycle\_learn.m}
10 | 
11 | \begin{par}
12 | \textbf{Summary:} Script to learn a controller for unicycling
13 | \end{par} \vspace{1em}
14 | \begin{par}
15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
16 | \end{par} \vspace{1em}
17 | \begin{par}
18 | Last modified: 2013-03-27
19 | \end{par} \vspace{1em}
20 | 
21 | 
22 | \subsection*{High-Level Steps} 
23 | 
24 | \begin{enumerate}
25 | \setlength{\itemsep}{-1ex}
26 |    \item Load parameters
27 |    \item Create J initial trajectories by applying random controls
28 |    \item Controlled learning (train dynamics model, policy learning, policy application)
29 | \end{enumerate}
30 | 
31 | 
32 | \subsection*{Code} 
33 | 
34 | 
35 | \begin{lstlisting}
36 | % 1. Initialization
37 | clear all; close all;
38 | settings_unicycle;                     % load scenario-specific settings
39 | basename = 'unicycle_';                % filename used for saving data
40 | 
41 | % 2. Initial J random rollouts
42 | for jj = 1:J                                        % get the first observations
43 |   [xx, yy, realCost{jj}, latent{jj}] = ...
44 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU/5), H, plant, cost);
45 |   x = [x; xx]; y = [y; yy];
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_unicycle;
49 |   end
50 | end
51 | 
52 | z(odei,:) = bsxfun(@plus, mu0, chol(S0)'*randn(length(odei),1000));   % compute
53 | for i = 1:size(z,2), z(augi,i) = plant.augment(z(:,i)'); end % the distribution
54 | mu0Sim = mean(z,2); S0Sim = cov(z');         % of augmented start state by MCMC
55 | mu0Sim(odei) = mu0; S0Sim(odei,odei) = S0;        % Put in known correct values
56 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); clear z i;
57 | 
58 | % 3. Controlled learning (N iterations)
59 | for j = 1:N
60 |   trainDynModel;
61 |   learnPolicy;
62 |   applyController;
63 |   disp(['controlled trial # ' num2str(j)]);
64 |   if plotting.verbosity > 0;      % visualization of trajectory
65 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
66 |     draw_rollout_unicycle;
67 |   end
68 | end
69 | \end{lstlisting}
70 | 


--------------------------------------------------------------------------------
/doc/tex/unwrap.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{unwrap.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Extract the numerical values from $s$ into the column vector $v$. The variable \$sS can be of any type, including struct and cell array. Non-numerical elements are ignored. See also the reverse rewrap.m.
14 | \end{par} \vspace{1em}
15 | 
16 | \begin{verbatim}  v = unwrap(s)\end{verbatim}
17 |     \begin{par}
18 | \textbf{Input arguments:}
19 | \end{par} \vspace{1em}
20 | \begin{verbatim}s     structure, cell, or numeric values\end{verbatim}
21 | \begin{par}
22 | \textbf{Output arguments:}
23 | \end{par} \vspace{1em}
24 | \begin{verbatim}v     structure, cell, or numeric values\end{verbatim}
25 | \begin{par}
26 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
27 | \end{par} \vspace{1em}
28 | \begin{par}
29 | Last modified: 2013-03-25
30 | \end{par} \vspace{1em}
31 | 
32 | \begin{lstlisting}
33 | function v = unwrap(s)
34 | \end{lstlisting}
35 | 
36 | 
37 | \subsection*{Code} 
38 | 
39 | 
40 | \begin{lstlisting}
41 | v = [];
42 | if isnumeric(s)
43 |   v = s(:);                        % numeric values are recast to column vector
44 | elseif isstruct(s)
45 |   v = unwrap(struct2cell(orderfields(s))); % alphabetize, conv to cell, recurse
46 | elseif iscell(s)
47 |   for i = 1:numel(s)             % cell array elements are handled sequentially
48 |     v = [v; unwrap(s{i})];
49 |   end
50 | end
51 | \end{lstlisting}
52 | 


--------------------------------------------------------------------------------
/doc/tex/valueT.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | % This LaTeX was auto-generated from an M-file by MATLAB.
 3 | % To make changes, update the M-file and republish this document.
 4 | 
 5 | 
 6 | 
 7 |     
 8 |     
 9 | 
10 | \subsection*{valueT.m} 
11 | 
12 | \begin{par}
13 | \textbf{Summary:} Test derivatives of the propagate function, which computes the mean and the variance of the successor state distribution, assuming that the current state is Gaussian distributed with mean m and covariance matrix s.
14 | \end{par} \vspace{1em}
15 | \begin{verbatim}[d dy dh] = valueT(p, delta, m, s, dynmodel, policy, plant, cost, H)\end{verbatim}
16 | \begin{par}
17 | \textbf{Input arguments:}
18 | \end{par} \vspace{1em}
19 | \begin{verbatim}p          policy parameters (can be a structure)
20 |   .\ensuremath{<}\ensuremath{>}      fields that contain the policy parameters (nothing else)
21 | m          mean of the input distribution
22 | s          covariance of the input distribution
23 | dynmodel   GP dynamics model (structure)
24 | policy     policy structure
25 | plant      plant structure
26 | cost       cost structure
27 | H          prediction horizon
28 | delta      (optional) finite difference parameter. Default: 1e-4\end{verbatim}
29 | \begin{par}
30 | \textbf{Output arguments:}
31 | \end{par} \vspace{1em}
32 | \begin{verbatim}dd         relative error of analytical vs. finite difference gradient
33 | dy         analytical gradient
34 | dh         finite difference gradient\end{verbatim}
35 | \begin{par}
36 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
37 | \end{par} \vspace{1em}
38 | \begin{par}
39 | Last modified: 2013-03-21
40 | \end{par} \vspace{1em}
41 | 
42 | \begin{lstlisting}
43 | function [d dy dh] = valueT(p, m, s, dynmodel, policy, plant, cost, H, delta)
44 | \end{lstlisting}
45 | 
46 | 
47 | \subsection*{Code} 
48 | 
49 | 
50 | \begin{lstlisting}
51 | if nargin < 9; delta = 1e-4; end
52 | if nargin < 8; H = 4; end
53 | 
54 | % call checkgrad directly
55 | [d dy dh] = checkgrad('value',p,delta,m,s,dynmodel,policy,plant,cost,H);
56 | \end{lstlisting}
57 | 


--------------------------------------------------------------------------------
/gp/covNoise.m:
--------------------------------------------------------------------------------
 1 | %% covNoise.m
 2 | % Independent covariance function, ie "white noise", with specified variance.
 3 | % The covariance function is specified as:
 4 | %
 5 | % k(x^p,x^q) = s2 * \delta(p,q)
 6 | %
 7 | % where s2 is the noise variance and \delta(p,q) is a Kronecker delta function
 8 | % which is 1 iff p=q and zero otherwise. The hyperparameter is
 9 | %
10 | % logtheta = [ log(sqrt(s2)) ]
11 | %
12 | % For more help on design of covariance functions, try "help covFunctions".
13 | %
14 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24.
15 | 
16 | function [A, B] = covNoise(logtheta, x, z)
17 | %% Code
18 | 
19 | if nargin == 0, A = '1'; return; end              % report number of parameters
20 | 
21 | s2 = exp(2*logtheta);                                          % noise variance
22 | 
23 | if nargin == 2                                      % compute covariance matrix
24 |   A = s2*eye(size(x,1));
25 | elseif nargout == 2                              % compute test set covariances
26 |   A = s2;
27 |   B = 0;                               % zeros cross covariance by independence
28 | else                                                % compute derivative matrix
29 |   A = 2*s2*eye(size(x,1));
30 | end
31 | 


--------------------------------------------------------------------------------
/gp/covSEard.m:
--------------------------------------------------------------------------------
 1 | %% covSEard.m
 2 | % Squared Exponential covariance function with Automatic Relevance Detemination
 3 | % (ARD) distance measure. The covariance function is parameterized as:
 4 | %
 5 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2)
 6 | %
 7 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
 8 | % D is the dimension of the input space and sf2 is the signal variance. The
 9 | % hyperparameters are:
10 | %
11 | % loghyper = [ log(ell_1)
12 | %              log(ell_2)
13 | %               .
14 | %              log(ell_D)
15 | %              log(sqrt(sf2)) ]
16 | %
17 | % For more help on design of covariance functions, try "help covFunctions".
18 | %
19 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24)
20 | 
21 | function [A, B] = covSEard(loghyper, x, z)
22 | %% Code
23 | if nargin == 0, A = '(D+1)'; return; end          % report number of parameters
24 | 
25 | persistent K;    
26 | 
27 | [n D] = size(x);
28 | ell = exp(loghyper(1:D));                         % characteristic length scale
29 | sf2 = exp(2*loghyper(D+1));                                   % signal variance
30 | 
31 | if nargin == 2
32 |   K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
33 |   A = K;                 
34 | elseif nargout == 2                              % compute test set covariances
35 |   A = sf2*ones(size(z,1),1);
36 |   B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2);
37 | else                                                % compute derivative matrix
38 |   
39 |   % check for correct dimension of the previously calculated kernel matrix
40 |   if any(size(K)~=n)  
41 |     K = sf2*exp(-sq_dist(diag(1./ell)*x')/2);
42 |   end
43 |    
44 |   if z <= D                                           % length scale parameters
45 |     A = K.*sq_dist(x(:,z)'/ell(z));  
46 |   else                                                    % magnitude parameter
47 |     A = 2*K;
48 |     clear K;
49 |   end
50 | end
51 | 
52 | 


--------------------------------------------------------------------------------
/gp/covSum.m:
--------------------------------------------------------------------------------
 1 | %% covSum.m
 2 | % *Summary:* Compose a covariance function as the sum of other covariance
 3 | % functions. This function doesn't actually compute very much on its own, it
 4 | % merely does some bookkeeping, and calls other covariance functions to do the
 5 | % actual work.
 6 | %
 7 | %     function [A, B] = covSum(covfunc, logtheta, x, z)
 8 | %
 9 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20.
10 | %
11 | 
12 | function [A, B] = covSum(covfunc, logtheta, x, z)
13 | %% Code
14 | 
15 | for i = 1:length(covfunc)                   % iterate over covariance functions
16 |   f = covfunc(i);
17 |   if iscell(f{:}), f = f{:}; end          % dereference cell array if necessary
18 |   j(i) = cellstr(feval(f{:}));
19 | end
20 | 
21 | if nargin == 1,                                   % report number of parameters
22 |   A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end
23 |   return
24 | end
25 | 
26 | [n, D] = size(x);
27 | 
28 | v = [];              % v vector indicates to which covariance parameters belong
29 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end
30 | 
31 | switch nargin
32 | case 3                                              % compute covariance matrix
33 |   A = zeros(n, n);                       % allocate space for covariance matrix
34 |   for i = 1:length(covfunc)                  % iteration over summand functions
35 |     f = covfunc(i);
36 |     if iscell(f{:}), f = f{:}; end        % dereference cell array if necessary
37 |     A = A + feval(f{:}, logtheta(v==i), x);            % accumulate covariances
38 |   end
39 | 
40 | case 4                      % compute derivative matrix or test set covariances
41 |   if nargout == 2                                % compute test set cavariances
42 |     A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1));    % allocate space
43 |     for i = 1:length(covfunc)
44 |       f = covfunc(i);
45 |       if iscell(f{:}), f = f{:}; end      % dereference cell array if necessary
46 |       [AA BB] = feval(f{:}, logtheta(v==i), x, z);   % compute test covariances
47 |       A = A + AA; B = B + BB;                                  % and accumulate
48 |     end
49 |   else                                            % compute derivative matrices
50 |     i = v(z);                                       % which covariance function
51 |     j = sum(v(1:z)==i);                    % which parameter in that covariance
52 |     f = covfunc(i);
53 |     if iscell(f{:}), f = f{:}; end        % dereference cell array if necessary
54 |     A = feval(f{:}, logtheta(v==i), x, j);                 % compute derivative
55 |   end
56 | 
57 | end
58 | 
59 | 


--------------------------------------------------------------------------------
/gp/gpr.m:
--------------------------------------------------------------------------------
 1 | %% gpr.m
 2 | % *Summary:* Gaussian process regression, with a named covariance function. Two
 3 | % modes are possible: training and prediction: if no test data are given, the
 4 | % function returns minus the log likelihood and its partial derivatives with
 5 | % respect to the hyperparameters; this mode is used to fit the hyperparameters.
 6 | % If test data are given, then (marginal) Gaussian predictions are computed,
 7 | % whose mean and variance are returned. Note that in cases where the covariance
 8 | % function has noise contributions, the variance returned in S2 is for noisy
 9 | % test targets; if you want the variance of the noise-free latent function, you
10 | % must substract the noise variance.
11 | %
12 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y)
13 | %    or: [mu S2]  = gpr(logtheta, covfunc, x, y, xstar)
14 | %
15 | % where:
16 | %
17 | %   logtheta is a (column) vector of log hyperparameters
18 | %   covfunc  is the covariance function
19 | %   x        is a n by D matrix of training inputs
20 | %   y        is a (column) vector (of size n) of targets
21 | %   xstar    is a nn by D matrix of test inputs
22 | %   nlml     is the returned value of the negative log marginal likelihood
23 | %   dnlml    is a (column) vector of partial derivatives of the negative
24 | %                 log marginal likelihood wrt each log hyperparameter
25 | %   mu       is a (column) vector (of size nn) of prediced means
26 | %   S2       is a (column) vector (of size nn) of predicted variances
27 | %
28 | % For more help on covariance functions, see "help covFunctions".
29 | %
30 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-20).
31 | 
32 | 
33 | function [out1, out2] = gpr(logtheta, covfunc, x, y, xstar)
34 | %% Code
35 | 
36 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed
37 | [n, D] = size(x);
38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1)
39 |   error('Error: Number of parameters do not agree with covariance function')
40 | end
41 | 
42 | K = feval(covfunc{:}, logtheta, x);    % compute training set covariance matrix
43 | 
44 | L = chol(K)';                        % cholesky factorization of the covariance
45 | alpha = solve_chol(L',y);
46 | 
47 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood
48 | 
49 |   out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi);
50 | 
51 |   if nargout == 2               % ... and if requested, its partial derivatives
52 |     out2 = zeros(size(logtheta));       % set the size of the derivative vector
53 |     W = L'\(L\eye(n))-alpha*alpha';                % precompute for convenience
54 |     for i = 1:length(out2)
55 |       out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2;
56 |     end
57 |   end
58 | 
59 | else                    % ... otherwise compute (marginal) test predictions ...
60 | 
61 |   [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar);     %  test covariances
62 | 
63 |   out1 = Kstar' * alpha;                                      % predicted means
64 | 
65 |   if nargout == 2
66 |     v = L\Kstar;
67 |     out2 = Kss - sum(v.*v)';
68 |   end  
69 | 
70 | end
71 | 


--------------------------------------------------------------------------------
/gp/hypCurb.m:
--------------------------------------------------------------------------------
 1 | %% hypCurb.m
 2 | % *Summary:* Wrapper for GP training (via gpr.m), penalizing large SNR and 
 3 | % extreme length-scales to avoid numerical instabilities
 4 | %
 5 | %     function [f df] = hypCurb(lh, covfunc, x, y, curb)
 6 | %
 7 | % *Input arguments:*
 8 | %
 9 | %   lh       log-hyper-parameters                                   [D+2 x  E ]
10 | %   covfunc  covariance function, e.g., 
11 | %                               covfunc = {'covSum', {'covSEard', 'covNoise'}};
12 | %   x        training inputs                                        [ n  x  D ]
13 | %   y        training targets                                      [ n  x  E ]
14 | %   curb     (optional) parameters to penalize extreme hyper-parameters
15 | %     .ls    length-scales
16 | %     .snr   signal-to-noise ratio (try to keep it below 500)
17 | %     .std   additional parameter required for length-scale penalty 
18 | %
19 | % *Output arguments:*
20 | %
21 | %   f        penalized negative log-marginal likelihood
22 | %   df       derivative of penalized negative log-marginal likelihood wrt
23 | %            GP log-hyper-parameters
24 | %
25 | %
26 | % Copyright (C) 2008-2013 by
27 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
28 | %
29 | % Last modified: 2011-12-19
30 | %
31 | %% High-Level Steps
32 | % # Compute the negative log-marginal likelihood (plus derivatives)
33 | % # Add penalties and change derivatives accordingly
34 | 
35 | function [f df] = hypCurb(lh, covfunc, x, y, curb)
36 | %% Code
37 | if nargin < 5, curb.snr = 500; curb.ls = 100; curb.std = 1; end   % set default
38 | 
39 | p = 30;                                                         % penalty power
40 | D = size(x,2);
41 | if size(lh,1) == 3*D+2; li = 1:2*D; sfi = 2*D+1:3*D+1; % 1D and DD terms
42 | elseif size(lh,1) == 2*D+1; li = 1:D; sfi = D+1:2*D;   % Just 1D terms
43 | elseif size(lh,1) == D+2; li = 1:D; sfi = D+1;         % Just DD terms
44 | else error('Incorrect number of hyperparameters'); 
45 | end
46 | 
47 | ll = lh(li); lsf = lh(sfi); lsn = lh(end);
48 | 
49 | % 1) compute the negative log-marginal likelihood (plus derivatives)
50 | [f df] = gpr(lh, covfunc, x, y);                              % first, call gpr
51 | 
52 | % 2) add penalties and change derivatives accordingly
53 | f = f + sum(((ll - log(curb.std'))./log(curb.ls)).^p);   % length-scales
54 | df(li) = df(li) + p*(ll - log(curb.std')).^(p-1)/log(curb.ls)^p;
55 | 
56 | f = f + sum(((lsf - lsn)/log(curb.snr)).^p); % signal to noise ratio
57 | df(sfi) = df(sfi) + p*(lsf - lsn).^(p-1)/log(curb.snr)^p;
58 | df(end) = df(end) - p*sum((lsf - lsn).^(p-1)/log(curb.snr)^p);
59 | 


--------------------------------------------------------------------------------
/loss/lossLin.m:
--------------------------------------------------------------------------------
 1 | %% lossLin.m
 2 | % *Summary:* Function to compute the expected loss and its derivatives, given an 
 3 | % input distribution, under a linear loss function: L = a^T(x - b). Note, this
 4 | % loss function can return negative loss.
 5 | %
 6 | %   [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s)
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %    cost
11 | %      .a      gradient of linear loss function,                       [D x 1]
12 | %      .b      targets, the value of x for which there is zero loss    [D x 1]
13 | %    m         mean of input distribution                              [D x 1]
14 | %    s         covariance matrix of input distribution                 [D x D]
15 | %
16 | % *Output arguments:*
17 | %
18 | %   L               expected loss                                  [1   x    1 ]
19 | %   dLdm            derivative of L wrt input mean                 [1   x    D ]
20 | %   dLds            derivative of L wrt input covariance           [1   x   D^2] 
21 | %   S               variance of loss                               [1   x    1 ]
22 | %   dSdm            derivative of S wrt input mean                 [1   x    D ]
23 | %   dSds            derivative of S wrt input covariance           [1   x   D^2]    
24 | %   C               inv(S) times input-output covariance           [D   x    1 ]   
25 | %   dCdm            derivative of C wrt input mean                 [D   x    D ]  
26 | %   dCds            derivative of C wrt input covariance           [D   x   D^2]  
27 | %
28 | % Copyright (C) 2008-2013 by
29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
30 | %
31 | % Last modified: 2013-03-05
32 | %
33 | %% High-Level Steps
34 | % # Expected cost
35 | % # Variance of cost
36 | % # inv(s)* cov(x,L)
37 | 
38 | function [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s)
39 | %% Code
40 | 
41 | a = cost.a(:); b = cost.b(:); D = length(m);
42 | if length(a) ~= D || length(b) ~= D;
43 |     error('a or b not the same length as m'); end
44 | 
45 | % 1) Mean
46 | L = a'*(m - b);
47 | dLdm = a';
48 | dLds = zeros(D);
49 | 
50 | % 2) Variance
51 | S = a'*s*a;
52 | dSdm = zeros(1,D);
53 | dSds = a*a';
54 | 
55 | % 3) inv(s) * input-output covariance cov(x,L)
56 | C = a;
57 | dCdm = zeros(D); dCds = zeros(D,D^2);
58 | 
59 | 


--------------------------------------------------------------------------------
/loss/lossQuad.m:
--------------------------------------------------------------------------------
 1 | %% lossQuad.m
 2 | % *Summary:* Compute expectation and variance of a quadratic cost 
 3 | % $(x-z)'*W*(x-z)$
 4 | % and their derivatives, where $x \sim N(m,S)$
 5 | %
 6 | %
 7 | %  function [L, dLdm, dLds, S, dSdm, dSds, C, dCdm, dCds] = lossQuad(cost, m, S)
 8 | %
 9 | %
10 | %
11 | % *Input arguments:*
12 | %
13 | %    cost
14 | %      .z:     target state                                              [D x 1]
15 | %      .W:     weight matrix                                             [D x D]
16 | %    m         mean of input distribution                                [D x 1]
17 | %    s         covariance matrix of input distribution                   [D x D]
18 | %
19 | %
20 | % *Output arguments:*
21 | %
22 | %   L               expected loss                                  [1   x    1 ]
23 | %   dLdm            derivative of L wrt input mean                 [1   x    D ]
24 | %   dLds            derivative of L wrt input covariance           [1   x   D^2] 
25 | %   S               variance of loss                               [1   x    1 ]
26 | %   dSdm            derivative of S wrt input mean                 [1   x    D ]
27 | %   dSds            derivative of S wrt input covariance           [1   x   D^2]    
28 | %   C               inv(S) times input-output covariance           [D   x    1 ]   
29 | %   dCdm            derivative of C wrt input mean                 [D   x    D ]  
30 | %   dCds            derivative of C wrt input covariance           [D   x   D^2]  
31 | %
32 | % Copyright (C) 2008-2013 by
33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
34 | %
35 | % Last modified: 2013-05-30
36 | %
37 | %% High-Level Steps
38 | % # Expected cost
39 | % # Variance of cost
40 | % # inv(s)* cov(x,L)
41 | 
42 | function [L, dLdm, dLds, S, dSdm, dSds, C, dCdm, dCds] = lossQuad(cost, m, S)
43 | %% Code
44 | D = length(m); % get state dimension
45 | 
46 | % set some defaults if necessary
47 | if isfield(cost,'W'); W = cost.W; else W = eye(D); end
48 | if isfield(cost,'z'); z = cost.z; else z = zeros(D,1); end
49 | 
50 | % 1. expected cost
51 | L = S(:)'*W(:) + (z-m)'*W*(z-m); 
52 | 
53 | % 1a. derivatives of expected cost
54 | if nargout > 1
55 |   dLdm = 2*(m-z)'*W; % wrt input mean
56 |   dLds = W';         % wrt input covariance matrix
57 | end
58 | 
59 | % 2. variance of cost
60 | if nargout > 3
61 |   S = trace(W*S*(W + W')*S) + (z-m)'*(W + W')*S*(W + W')*(z-m);
62 |   if S < 1e-12; S = 0; end % for numerical reasons
63 | end
64 | 
65 | % 2a. derivatives of variance of cost
66 | if nargout > 4
67 |   % wrt input mean
68 |   dSdm = -(2*(W+W')*S*(W+W)*(z-m))'; 
69 |   % wrt input covariance matrix
70 |   dSds = W'*S'*(W + W')'+(W + W')'*S'*W' + (W + W')*(z-m)*((W + W')*(z-m))'; 
71 | end
72 | 
73 | % 3. inv(s) times IO covariance with derivatives
74 | if nargout > 6
75 |     C = 2*W*(m-z);
76 |     dCdm = 2*W;
77 |     dCds = zeros(D,D^2);
78 | end
79 | 


--------------------------------------------------------------------------------
/loss/reward.m:
--------------------------------------------------------------------------------
 1 | %% reward.m
 2 | % *Summary:* Compute expectation, variance, and their derivatives of an 
 3 | % exponentiated negative quadratic cost $\exp(-(x-z)'W(x-z)/2)$,
 4 | % where $x\sim\mathcal N(m,S)$
 5 | %
 6 | % *Input arguments:*
 7 | %
 8 | %   m:          D-by-1 mean of the state distribution
 9 | %   S:          D-by-D covariance matrix of the state distribution
10 | %   z:          D-by-1 target state
11 | %   W:          D-by-D weight matrix
12 | %
13 | % *Output arguments:*
14 | %
15 | %   muR:        1-by-1 expected reward
16 | %   dmuRdm:     1-by-D derivative of expected reward wrt input mean
17 | %   dmuRdS:     D-by-D derivative of expected reward wrt input covariance matrix
18 | %   sR:         1-by-1 variance of reward
19 | %   dsRdm:      1-by-D derivative of variance of reward wrt input mean
20 | %   dsRdS:      D-by-D derivative reward variance wrt input covariance matrix
21 | %
22 | % Copyright (C) 2008-2013 by 
23 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 
24 | %
25 | % Last modification: 2013-01-20
26 | %
27 | %% High-Level Steps
28 | % # Compute expected reward
29 | % # Compute the derivatives of the expected reward with respect to the input 
30 | %   distribution (optional)
31 | % # Compute variance of reward
32 | % # Compute the derivatives of the variance of the reward with
33 | % respect to the input distribution (optional)
34 | 
35 | function [muR, dmuRdm, dmuRdS, sR, dsRdm, dsRdS] = reward(m, S, z, W)
36 | %% Code
37 | 
38 | % some precomputations
39 | D = length(m); % get state dimension
40 | SW = S*W;
41 | iSpW = W/(eye(D)+SW);
42 | 
43 | % 1. expected reward
44 | muR = exp(-(m-z)'*iSpW*(m-z)/2)/sqrt(det(eye(D)+SW)); 
45 | 
46 | % 2. derivatives of expected reward
47 | if nargout > 1
48 |   dmuRdm = -muR*(m-z)'*iSpW;  % wrt input mean
49 |   dmuRdS = muR*(iSpW*(m-z)*(m-z)'-eye(D))*iSpW/2;  % wrt input covariance matrix
50 | end
51 | 
52 | % 3. reward variance
53 | if nargout > 3
54 |   i2SpW = W/(eye(D)+2*SW);
55 |   r2 = exp(-(m-z)'*i2SpW*(m-z))/sqrt(det(eye(D)+2*SW));
56 |   sR = r2 - muR^2;
57 |   if sR < 1e-12; sR=0; end % for numerical reasons
58 | end
59 | 
60 | % 4. derivatives of reward variance
61 | if nargout > 4
62 |   % wrt input mean
63 |   dsRdm = -2*r2*(m-z)'*i2SpW-2*muR*dmuRdm;
64 |   % wrt input covariance matrix
65 |   dsRdS = r2*(2*i2SpW*(m-z)*(m-z)'-eye(D))*i2SpW-2*muR*dmuRdS;
66 | end
67 | 
68 | 


--------------------------------------------------------------------------------
/loss/reward.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/loss/reward.pdf


--------------------------------------------------------------------------------
/loss/reward.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | \renewcommand{\rmdefault}{psbx}
 3 | \usepackage[utf8]{inputenc}
 4 | \usepackage[T1]{fontenc}
 5 | \usepackage{textcomp}
 6 | \usepackage{eulervm}
 7 | \usepackage{amsmath}
 8 | \usepackage{amssymb}
 9 | 
10 | \setlength{\textwidth}{160mm}
11 | \setlength{\oddsidemargin}{0mm}
12 | \setlength{\parindent}{0 mm}
13 | 
14 | \newcommand{\bfm}{{\bf m}}
15 | \newcommand{\bfs}{{\bf s}}
16 | \newcommand{\bfz}{{\bf z}}
17 | \newcommand{\E}{{\mathbb E}}
18 | \newcommand{\V}{{\mathbb V}}
19 | 
20 | \title{The Reward Function}
21 | \author{Carl Edward Rasmussen}
22 | \date{July 17th 2008}
23 | 
24 | \begin{document}
25 | 
26 | \maketitle
27 | 
28 | The \emph{reward} $R$ of being in state $\bfs$ is 
29 | \[
30 | R(\bfs)\;=\;\exp\big(\!-\tfrac{1}{2}(\bfs-\bfz)^\top T^{-1}(\bfs-\bfz)\big),
31 | \]
32 | where $\bfz$ and $T$ are parameters of the reward function, which
33 | typically depend on the specific task. We assume that the state is
34 | uncertain, with a Gaussian distribution
35 | \[
36 | \bfs\;\sim\;{\cal N}(\bfm, S),
37 | \]
38 | with mean $\bfm$ and covariance matrix $S$. We wish to compute the
39 | \emph{expected} reward
40 | \[
41 | \mu\;=\;\E[R]\;=\;\langle R(\bfs)\rangle_{\bfs\sim{\cal N}(\bfm,S)}\;=\;|I+ST^{-1}|^{-1/2}
42 | \exp\big(\!-\tfrac{1}{2}(\bfm-\bfz)^\top (S+T)^{-1}(\bfm-\bfz)\big),
43 | \]
44 | and to compute the \emph{variance} of the reward we first define
45 | \[
46 | r^2\;=\;\E[R^2]\;=\;|I+2ST^{-1}|^{-1/2}
47 | \exp\big(\!-(\bfm-\bfz)^\top (2S+T)^{-1}(\bfm-\bfz)\big),
48 | \]
49 | so that
50 | \[
51 | \sigma^2\;=\;\V[R]\;=\;\E[R^2]-\E^2[R]\;=\;r^2-\mu^2.
52 | \]
53 | We also need the derivatives of these two quanteties wrt the
54 | parameters of the state distribution. For the expected reward we have
55 | \[
56 | \frac{d\mu}{d\bfm}\;=\;-\mu(S+T)^{-1}(\bfm-\bfz),\qquad
57 | \frac{d\mu}{dS}\;=\;\tfrac{1}{2}\mu\big[(S+T)^{-1}(\bfm-\bfz)(\bfm-\bfz)^\top
58 | -I\big](S+T)^{-1},
59 | \]
60 | and for the variance
61 | \[
62 | \frac{d\sigma^2}{d\bfm}\;=\;-2r^2(2S+T)^{-1}(\bfm-\bfz)-2\mu\frac{d\mu}{d\bfm},
63 | \qquad\frac{d\sigma^2}{dS}\;=\;r^2\big
64 | [2(2S+T)^{-1}(\bfm-\bfz)(\bfm-\bfz)^\top -I\big](2S+T)^{-1} -2\mu\frac{d\mu}{dS}.
65 | \]
66 | \end{document}
67 | 


--------------------------------------------------------------------------------
/scenarios/cartDoublePendulum/cartDouble_learn.m:
--------------------------------------------------------------------------------
 1 | %% cartDouble_learn.m
 2 | % *Summary:* Script to learn a controller for the cart-doube-pendulum
 3 | % swingup
 4 | %
 5 | % Copyright (C) 2008-2013 by
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %% High-Level Steps
11 | % # Load parameters
12 | % # Create J initial trajectories by applying random controls
13 | % # Controlled learning (train dynamics model, policy learning, policy
14 | % application)
15 | 
16 | %% Code
17 | 
18 | % 1. Initialization
19 | clear all; close all;
20 | settings_cdp;                 % load scenario-specific settings
21 | basename = 'CartDoublePend_'; % filename used for saving data
22 | 
23 | % 2. Initial J random rollouts
24 | for jj = 1:J                                        
25 |   [xx, yy, realCost{jj}, latent{jj}] = ...
26 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
27 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
28 |   if plotting.verbosity > 0;      % visualization of trajectory
29 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
30 |     draw_rollout_cdp; 
31 |   end 
32 |                    
33 | end
34 | 
35 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
36 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
37 | 
38 | % 3. Controlled learning (N iterations)
39 | for j = 1:N
40 |   trainDynModel;   % train (GP) dynamics model
41 |   learnPolicy;     % learn policy 
42 |   applyController; % apply controller to system
43 |   disp(['controlled trial # ' num2str(j)]);
44 |   if plotting.verbosity > 0;      % visualization of trajectory
45 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
46 |     draw_rollout_cdp; 
47 |   end 
48 | end
49 | 


--------------------------------------------------------------------------------
/scenarios/cartDoublePendulum/draw_rollout_cdp.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_cdp.m
 2 | % *Summary:* Script to draw a trajectory of the cart-double-pendulum system and
 3 | %  the predicted uncertainties around the tips of the pendulums
 4 | %
 5 | % Copyright (C) 2008-2013 by
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %% High-Level Steps
11 | % # For each time step, plot the observed trajectory and the predicted
12 | % means and covariances of the Cartesian coordinates of the tips of both
13 | % pendulums
14 | 
15 | %% Code
16 | for r = 1:size(xx,1)
17 |   if exist('j','var') && ~isempty(M{j})
18 |     draw_cdp(latent{j}(r,1), latent{j}(r,5), latent{j}(r,6), ...
19 |       latent{j}(r,end), cost, M{j}(:,r), Sigma{j}(:,:,r), ...
20 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
21 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
22 |       ' sec']);
23 |   else
24 |     draw_cdp(latent{jj}(r,1), latent{jj}(r,5), latent{jj}(r,6), ...
25 |       latent{jj}(r,end), cost, [], [], ...
26 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
27 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
28 |       ' sec']);
29 |   end
30 |   pause(dt);
31 | end
32 | 


--------------------------------------------------------------------------------
/scenarios/cartPole/cartPole_learn.m:
--------------------------------------------------------------------------------
 1 | %% cartPole_learn.m
 2 | % *Summary:* Script to learn a controller for the cart-pole swingup
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-03-27
 8 | %
 9 | %% High-Level Steps
10 | % # Load parameters
11 | % # Create J initial trajectories by applying random controls
12 | % # Controlled learning (train dynamics model, policy learning, policy
13 | % application)
14 | 
15 | %% Code
16 | 
17 | % 1. Initialization
18 | clear all; close all;
19 | settings_cp;                      % load scenario-specific settings
20 | basename = 'cartPole_';           % filename used for saving data
21 | 
22 | % 2. Initial J random rollouts
23 | for jj = 1:J
24 |   [xx, yy, realCost{jj}, latent{jj}] = ...
25 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
26 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
27 |   if plotting.verbosity > 0;      % visualization of trajectory
28 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
29 |     draw_rollout_cp;
30 |   end
31 |   
32 | end
33 | 
34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
36 | 
37 | % 3. Controlled learning (N iterations)
38 | for j = 1:N
39 |   trainDynModel;   % train (GP) dynamics model
40 |   learnPolicy;     % learn policy
41 |   applyController; % apply controller to system
42 |   disp(['controlled trial # ' num2str(j)]);
43 |   if plotting.verbosity > 0;      % visualization of trajectory
44 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
45 |     draw_rollout_cp;
46 |   end
47 | end


--------------------------------------------------------------------------------
/scenarios/cartPole/draw_cp.m:
--------------------------------------------------------------------------------
 1 | %% draw_cp.m
 2 | % *Summary:* Draw the cart-pole system with reward, applied force, and 
 3 | % predictive uncertainty of the tip of the pendulum
 4 | %
 5 | %    function draw_cp(x, theta, force, cost, text1, text2, M, S)
 6 | %
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %		x          position of the cart
11 | %   theta      angle of pendulum
12 | %   force      force applied to cart
13 | %   cost       cost structure
14 | %     .fcn     function handle (it is assumed to use saturating cost)
15 | %     .<>      other fields that are passed to cost
16 | %   M          (optional) mean of state
17 | %   S          (optional) covariance of state
18 | %   text1      (optional) text field 1
19 | %   text2      (optional) text field 2
20 | %
21 | %
22 | % Copyright (C) 2008-2013 by
23 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
24 | %
25 | % Last modified: 2013-03-07
26 | 
27 | function draw_cp(x, theta, force, cost, text1, text2, M, S)
28 | %% Code
29 | 
30 | l = 0.6;
31 | xmin = -3; 
32 | xmax = 3;    
33 | height = 0.1;
34 | width  = 0.3;
35 | maxU = 10;
36 | 
37 | % Compute positions 
38 | cart = [ x + width,  height
39 |          x + width, -height
40 |          x - width, -height
41 |          x - width,  height
42 |          x + width,  height ];
43 | pendulum = [x, 0; x+2*l*sin(theta), -cos(theta)*2*l];
44 | 
45 | 
46 | clf; hold on
47 | plot(0,2*l,'k+','MarkerSize',20,'linewidth',2)
48 | plot([xmin, xmax], [-height-0.03, -height-0.03],'k','linewidth',2)
49 | 
50 | % Plot force
51 | plot([0 force/maxU*xmax],[-0.3, -0.3],'g','linewidth',10)
52 | 
53 | % Plot reward
54 | reward = 1-cost.fcn(cost,[x, 0, 0, theta]', zeros(4));
55 | plot([0 reward*xmax],[-0.5, -0.5],'y','linewidth',10)
56 | 
57 | % Plot the cart-pole
58 | fill(cart(:,1), cart(:,2),'k','edgecolor','k');
59 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)
60 | 
61 | % Plot the joint and the tip
62 | plot(x,0,'y.','markersize',24)
63 | plot(pendulum(2,1),pendulum(2,2),'y.','markersize',24)
64 | 
65 | % plot ellipse around tip of pendulum (if M, S exist)
66 | try
67 |   [M1 S1] = getPlotDistr_cp(M,S,2*l);
68 |   error_ellipse(S1,M1,'style','b');
69 | catch
70 | end
71 | 
72 | % Text
73 | text(0,-0.3,'applied force')
74 | text(0,-0.5,'immediate reward')
75 | if exist('text1','var')
76 |   text(0,-0.9, text1)
77 | end
78 | if exist('text2','var')
79 |   text(0,-1.1, text2)
80 | end
81 | 
82 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-1.4 1.4]);
83 | axis off;
84 | drawnow;


--------------------------------------------------------------------------------
/scenarios/cartPole/draw_rollout_cp.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_cp.m
 2 | % *Summary:* Script to draw the most recent trajectory of the cart-pole
 3 | % system together with the predicted uncertainties around the tip of the 
 4 | % pendulum
 5 | %
 6 | % Copyright (C) 2008-2013 by 
 7 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 8 | %
 9 | % Last modified: 2013-05-20
10 | %
11 | %% High-Level Steps
12 | % # For each time step, plot the observed trajectory and the predicted
13 | % means and covariances of the Cartesian coordinates of the tip of the
14 | % pendulum
15 | 
16 | %% Code
17 | 
18 | % Loop over states in trajectory (= time steps)
19 | for r = 1:size(xx,1)
20 |   if exist('j','var') && ~isempty(M{j})
21 |     draw_cp(latent{j}(r,1), latent{j}(r,4), latent{j}(r,end), cost,  ...
22 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
23 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
24 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
25 |   else
26 |      draw_cp(latent{jj}(r,1), latent{jj}(r,4), latent{jj}(r,end), cost,  ...
27 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
28 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
29 |       ' sec'])
30 |   end
31 |   pause(dt);
32 | end
33 | 


--------------------------------------------------------------------------------
/scenarios/cartPole/dynamics_cp.m:
--------------------------------------------------------------------------------
 1 | %% dynamics_cp.m
 2 | % *Summary:* Implements ths ODE for simulating the cart-pole dynamics.
 3 | %
 4 | %    function dz = dynamics_cp(t, z, f)
 5 | %
 6 | %
 7 | % *Input arguments:*
 8 | %
 9 | %		t     current time step (called from ODE solver)
10 | %   z     state                                                    [4 x 1]
11 | %   f     (optional): force f(t)
12 | %
13 | % *Output arguments:*
14 | %
15 | %   dz    if 3 input arguments:      state derivative wrt time
16 | %         if only 2 input arguments: total mechanical energy
17 | %
18 | %
19 | % Note: It is assumed that the state variables are of the following order:
20 | %       x:        [m]     position of cart
21 | %       dx:       [m/s]   velocity of cart
22 | %       dtheta:   [rad/s] angular velocity
23 | %       theta:    [rad]   angle
24 | %
25 | %
26 | % A detailed derivation of the dynamics can be found in:
27 | %
28 | % M.P. Deisenroth:
29 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C,
30 | % KIT Scientific Publishing, 2010.
31 | %
32 | % Copyright (C) 2008-2013 by
33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
34 | %
35 | % Last modified: 2013-03-08
36 | 
37 | function dz = dynamics_cp(t,z,f)
38 | %% Code
39 | 
40 | l = 0.5;  % [m]      length of pendulum
41 | m = 0.5;  % [kg]     mass of pendulum
42 | M = 0.5;  % [kg]     mass of cart
43 | b = 0.1;  % [N/m/s]  coefficient of friction between cart and ground
44 | g = 9.82; % [m/s^2]  acceleration of gravity
45 | 
46 | if nargin==3
47 |   dz = zeros(4,1);
48 |   dz(1) = z(2);
49 |   dz(2) = ( 2*m*l*z(3)^2*sin(z(4)) + 3*m*g*sin(z(4))*cos(z(4)) ...
50 |           + 4*f(t) - 4*b*z(2) )/( 4*(M+m)-3*m*cos(z(4))^2 );
51 |   dz(3) = (-3*m*l*z(3)^2*sin(z(4))*cos(z(4)) - 6*(M+m)*g*sin(z(4)) ...
52 |           - 6*(f(t)-b*z(2))*cos(z(4)) )/( 4*l*(m+M)-3*m*l*cos(z(4))^2 );
53 |   dz(4) = z(3);
54 | else
55 |   dz = (M+m)*z(2)^2/2 + 1/6*m*l^2*z(3)^2 + m*l*(z(2)*z(3)-g)*cos(z(4))/2;
56 | end


--------------------------------------------------------------------------------
/scenarios/cartPole/getPlotDistr_cp.m:
--------------------------------------------------------------------------------
 1 | %% getPlotDistr_cp.m
 2 | % *Summary:* Compute means and covariances of the Cartesian coordinates of
 3 | % the tips both the inner and outer pendulum assuming that the joint state
 4 | % $x$ of the cart-double-pendulum system is Gaussian, i.e., $x\sim N(m, s)$
 5 | %
 6 | %
 7 | %     function [M, S] = getPlotDistr_cp(m, s, ell)
 8 | %
 9 | %
10 | %
11 | % *Input arguments:*
12 | %
13 | %   m       mean of full state                                    [4 x 1]
14 | %   s       covariance of full state                              [4 x 4]
15 | %   ell     length of pendulum
16 | %
17 | %   Note: this code assumes that the following order of the state:
18 | %          1: cart pos., 
19 | %          2: cart vel., 
20 | %          3: pendulum angular velocity,
21 | %          4: pendulum angle
22 | %
23 | % *Output arguments:*
24 | %
25 | %   M      mean of tip of pendulum                               [2 x 1]
26 | %   S      covariance of tip of pendulum                         [2 x 2]
27 | %
28 | % Copyright (C) 2008-2013 by 
29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
30 | %
31 | % Last modification: 2013-03-27
32 | %
33 | %% High-Level Steps
34 | % # Augment input distribution to complex angle representation
35 | % # Compute means of tips of pendulums (in Cartesian coordinates)
36 | % # Compute covariances of tips of pendulums (in Cartesian coordinates) 
37 | 
38 | function [M, S] = getPlotDistr_cp(m, s, ell)
39 | %% Code
40 | 
41 | % 1. Augment input distribution to complex angle representation
42 | [m1 s1 c1] = gTrig(m,s,4,ell); % map input distribution through sin/cos
43 | m1 = [m; m1];        % mean of joint
44 | c1 = s*c1;           % cross-covariance between input and prediction
45 | s1 = [s c1; c1' s1]; % covariance of joint
46 | 
47 | % 2. Compute means of tips of pendulums (in Cartesian coordinates)
48 | M = [m1(1)+m1(5); -m1(6)];
49 | 
50 | % 3. Compute covariances of tips of pendulums (in Cartesian coordinates) 
51 | s11 = s1(1,1) + s1(5,5) + s1(1,5) + s1(5,1); % x+l sin(theta)
52 | s22 = s1(6,6); % -l*cos(theta)
53 | s12 = -(s1(1,6)+s1(5,6)); % cov(x+l*sin(th), -l*cos(th)
54 | 
55 | S = [s11 s12; s12' s22];
56 | try
57 |   chol(S);
58 | catch
59 |   warning('matrix S not pos.def. (getPlotDistr)');
60 |   S = S + (1e-6 - min(eig(S)))*eye(2);
61 | end
62 | 


--------------------------------------------------------------------------------
/scenarios/doublePendulum/DoublePend_learn.m:
--------------------------------------------------------------------------------
 1 | %% DoublePend_learn
 2 | % *Summary:* Script to learn a controller for the double-pendulum
 3 | % swingup with two actuated joints
 4 | %
 5 | % Copyright (C) 2008-2013 by
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %% High-Level Steps
11 | % # Load parameters
12 | % # Create J initial trajectories by applying random controls
13 | % # Controlled learning (train dynamics model, policy learning, policy
14 | % application)
15 | 
16 | %% Code
17 | 
18 | % 1. Initialization
19 | clear all; close all;
20 | settings_dp;                  % load scenario-specific settings
21 | basename = 'doublepend_';     % filename used for saving data
22 | 
23 | % 2. Initial J random rollouts
24 | for jj = 1:J
25 |   [xx, yy, realCost{jj}, latent{jj}] = ...
26 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
27 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
28 |   if plotting.verbosity > 0;      % visualization of trajectory
29 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
30 |     draw_rollout_dp;
31 |   end
32 | end
33 | 
34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
36 | 
37 | % 3. Controlled learning (N iterations)
38 | for j = 1:N
39 |   trainDynModel;   % train (GP) dynamics model
40 |   learnPolicy;     % learn policy
41 |   applyController; % apply controller to system
42 |   disp(['controlled trial # ' num2str(j)]);
43 |   if plotting.verbosity > 0;      % visualization of trajectory
44 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
45 |     draw_rollout_dp;
46 |   end
47 | end


--------------------------------------------------------------------------------
/scenarios/doublePendulum/draw_rollout_dp.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_dp.m
 2 | % *Summary:* Script to draw a trajectory of theobserved double-pendulum system 
 3 | % and the predicted uncertainties around the tips of the pendulums
 4 | %
 5 | % Copyright (C) 2008-2013 by 
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %% High-Level Steps
11 | % # For each time step, plot the observed trajectory and the predicted
12 | % means and covariances of the Cartesian coordinates of the tips of both
13 | % pendulums
14 | 
15 | %% Code
16 | % Loop over states in trajectory
17 | for r = 1:size(xx,1)
18 |   cost.t = r;
19 |   if exist('j','var') && ~isempty(M{j})
20 |     draw_dp(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end-1), ...
21 |       latent{j}(r,end), cost,  ...
22 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
23 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
24 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
25 |   else
26 |     draw_dp(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end-1), ...
27 |       latent{jj}(r,end), cost,  ...
28 |       ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
29 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
30 |       ' sec'])
31 |   end
32 |   pause(dt);
33 | end
34 |   


--------------------------------------------------------------------------------
/scenarios/doublePendulum/dynamics_dp.m:
--------------------------------------------------------------------------------
 1 | %% dynamics_dp.m
 2 | % *Summary:* Implements ths ODE for simulating the double pendulum 
 3 | % dynamics, where an input torque can be applied to both links, 
 4 | % f1:torque at inner joint, f2:torque at outer joint
 5 | %
 6 | %    function dz = dynamics_dp(t, z, f1, f2)
 7 | %
 8 | %
 9 | % *Input arguments:*
10 | %
11 | %		t     current time step (called from ODE solver)
12 | %   z     state                                                    [4 x 1]
13 | %   f1    (optional): torque f1(t) applied to inner pendulum
14 | %   f2    (optional): torque f2(t) applied to outer pendulum
15 | %
16 | % *Output arguments:*
17 | %   
18 | %   dz    if 4 input arguments:      state derivative wrt time
19 | %         if only 2 input arguments: total mechanical energy
20 | %
21 | %   Note: It is assumed that the state variables are of the following order:
22 | %         dtheta1:  [rad/s] angular velocity of inner pendulum
23 | %         dtheta2:  [rad/s] angular velocity of outer pendulum
24 | %         theta1:   [rad]   angle of inner pendulum
25 | %         theta2:   [rad]   angle of outer pendulum
26 | %
27 | % A detailed derivation of the dynamics can be found in:
28 | %
29 | % M.P. Deisenroth: 
30 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 
31 | % KIT Scientific Publishing, 2010.
32 | %
33 | %
34 | % Copyright (C) 2008-2013 by
35 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
36 | %
37 | % Last modified: 2013-03-08
38 | 
39 | function dz = dynamics_dp(t, z, f1, f2)
40 | %% Code
41 | m1 = 0.5;  % [kg]     mass of 1st link
42 | m2 = 0.5;  % [kg]     mass of 2nd link
43 | b1 = 0.0;  % [Ns/m]   coefficient of friction (1st joint)
44 | b2 = 0.0;  % [Ns/m]   coefficient of friction (2nd joint)
45 | l1 = 0.5;  % [m]      length of 1st pendulum
46 | l2 = 0.5;  % [m]      length of 2nd pendulum
47 | g  = 9.82; % [m/s^2]  acceleration of gravity
48 | I1 = m1*l1^2/12;  % moment of inertia around pendulum midpoint (1st link)
49 | I2 = m2*l2^2/12;  % moment of inertia around pendulum midpoint (2nd link)
50 | 
51 | if nargin == 4 % compute time derivatives
52 | 
53 |   A = [l1^2*(0.25*m1+m2) + I1,      0.5*m2*l1*l2*cos(z(3)-z(4));
54 |        0.5*m2*l1*l2*cos(z(3)-z(4)), l2^2*0.25*m2 + I2          ];
55 |   b = [g*l1*sin(z(3))*(0.5*m1+m2) - 0.5*m2*l1*l2*z(2)^2*sin(z(3)-z(4)) ...
56 |                                                         + f1(t)-b1*z(1);
57 |        0.5*m2*l2*(l1*z(1)^2*sin(z(3)-z(4))+g*sin(z(4))) + f2(t)-b2*z(2)];
58 |   x = A\b;
59 | 
60 |   dz = zeros(4,1);
61 |   dz(1) = x(1);
62 |   dz(2) = x(2);
63 |   dz(3) = z(1);
64 |   dz(4) = z(2);
65 | 
66 | else % compute total mechanical energy
67 |   dz = m1*l1^2*z(1)^2/8 + I1*z(1)^2/2 + m2/2*(l1^2*z(1)^2 ...
68 |     + l2^2*z(2)^2/4 + l1*l2*z(1)*z(2)*cos(z(3)-z(4))) + I2*z(2)^2/2 ...
69 |     + m1*g*l1*cos(z(3))/2 + m2*g*(l1*cos(z(3))+l2*cos(z(4))/2);
70 | end


--------------------------------------------------------------------------------
/scenarios/pendubot/draw_rollout_pendubot.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_pendubot
 2 | % *Summary:* Script to draw the most recent trajectory of the Pendubot system 
 3 | % and the predicted uncertainties around the tips of the pendulums
 4 | %
 5 | % Copyright (C) 2008-2013 by 
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %
11 | %% High-Level Steps
12 | % # For each time step, plot the observed trajectory and the predicted
13 | % means and covariances of the Cartesian coordinates of the tips of both
14 | % pendulums
15 | 
16 | %% Code
17 | 
18 | % Loop over states in trajectory
19 | for r = 1:size(xx,1)
20 |   cost.t = r;
21 |   if exist('j','var') && ~isempty(M{j})
22 |     draw_pendubot(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end), cost,  ...
23 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
24 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
25 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
26 |   else
27 |     draw_pendubot(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end), cost, ...
28 |       ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
29 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
30 |       ' sec'])
31 |   end
32 |   pause(dt);
33 | end


--------------------------------------------------------------------------------
/scenarios/pendubot/dynamics_pendubot.m:
--------------------------------------------------------------------------------
 1 | %% dynamics_pendubot.m
 2 | % *Summary:* Implements ths ODE for simulating the Pendubot 
 3 | % dynamics, where an input torque f can be applied to the inner link 
 4 | %
 5 | %    function dz = dynamics_pendubot(t,z,f)
 6 | %
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %		t     current time step (called from ODE solver)
11 | %   z     state                                                    [4 x 1]
12 | %   f     (optional): torque f(t) applied to inner pendulum
13 | %
14 | % *Output arguments:*
15 | %   
16 | %   dz    if 3 input arguments:      state derivative wrt time
17 | %         if only 2 input arguments: total mechanical energy
18 | %
19 | %   Note: It is assumed that the state variables are of the following order:
20 | %         dtheta1:  [rad/s] angular velocity of inner pendulum
21 | %         dtheta2:  [rad/s] angular velocity of outer pendulum
22 | %         theta1:   [rad]   angle of inner pendulum
23 | %         theta2:   [rad]   angle of outer pendulum
24 | %
25 | % A detailed derivation of the dynamics can be found in:
26 | %
27 | % M.P. Deisenroth: 
28 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 
29 | % KIT Scientific Publishing, 2010.
30 | %
31 | %
32 | % Copyright (C) 2008-2013 by
33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
34 | %
35 | % Last modified: 2013-03-08
36 | 
37 | function dz = dynamics_pendubot(t,z,f)
38 | %% Code
39 | m1 = 0.5;  % [kg]     mass of 1st link
40 | m2 = 0.5;  % [kg]     mass of 2nd link
41 | b1 = 0.0;  % [Ns/m]  coefficient of friction (1st joint)
42 | b2 = 0.0;  % [Ns/m]  coefficient of friction (2nd joint)
43 | l1 = 0.5;  % [m]      length of 1st pendulum
44 | l2 = 0.5;  % [m]      length of 2nd pendulum
45 | g  = 9.82; % [m/s^2]  acceleration of gravity
46 | I1 = m1*l1^2/12;  % moment of inertia around pendulum midpoint (inner link)
47 | I2 = m2*l2^2/12;  % moment of inertia around pendulum midpoint (outer link)
48 | 
49 | if nargin == 3 % compute time derivatives
50 | 
51 |   A = [l1^2*(0.25*m1+m2) + I1,      0.5*m2*l1*l2*cos(z(3)-z(4));
52 |        0.5*m2*l1*l2*cos(z(3)-z(4)), l2^2*0.25*m2 + I2          ];
53 |   b = [g*l1*sin(z(3))*(0.5*m1+m2) - 0.5*m2*l1*l2*z(2)^2*sin(z(3)-z(4))...
54 |                                                         + f(t) - b1*z(1);
55 |        0.5*m2*l2*( l1*z(1)^2*sin(z(3)-z(4)) + g*sin(z(4)) )    - b2*z(2)];
56 |   x = A\b;
57 | 
58 |   dz = zeros(4,1);
59 |   dz(1) = x(1);
60 |   dz(2) = x(2);
61 |   dz(3) = z(1);
62 |   dz(4) = z(2);
63 | 
64 | else % compute total mechanical energy
65 |   dz = m1*l1^2*z(1)^2/8 + I1*z(1)^2/2 + m2/2*(l1^2*z(1)^2 ...
66 |     + l2^2*z(2)^2/4 + l1*l2*z(1)*z(2)*cos(z(3)-z(4))) + I2*z(2)^2/2 ...
67 |     + m1*g*l1*cos(z(3))/2 + m2*g*(l1*cos(z(3))+l2*cos(z(4))/2);
68 | end


--------------------------------------------------------------------------------
/scenarios/pendubot/pendubot_learn.m:
--------------------------------------------------------------------------------
 1 | %% pendubot_learn.m
 2 | % *Summary:* Script to learn a controller for the pendubot
 3 | % swingup (a double pendulum with the inner joint actuated)
 4 | %
 5 | % Copyright (C) 2008-2013 by
 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 7 | %
 8 | % Last modified: 2013-03-27
 9 | %
10 | %% High-Level Steps
11 | % # Load parameters
12 | % # Create J initial trajectories by applying random controls
13 | % # Controlled learning (train dynamics model, policy learning, policy
14 | % application)
15 | 
16 | %% Code
17 | 
18 | % 1. Initialization
19 | clear all; close all;
20 | settings_pendubot;            % load scenario-specific settings
21 | basename = 'pendubot_';       % filename used for saving data
22 | 
23 | % 2. Initial J random rollouts
24 | for jj = 1:J
25 |   [xx, yy, realCost{jj}, latent{jj}] = ...
26 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
27 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
28 |   if plotting.verbosity > 0;      % visualization of trajectory
29 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
30 |     draw_rollout_pendubot;
31 |   end
32 | end
33 | 
34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
36 | 
37 | % 3. Controlled learning (N iterations)
38 | for j = 1:N
39 |   trainDynModel;   % train (GP) dynamics model
40 |   learnPolicy;     % learn policy
41 |   applyController; % apply controller to system
42 |   disp(['controlled trial # ' num2str(j)]);
43 |   if plotting.verbosity > 0;      % visualization of trajectory
44 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
45 |     draw_rollout_pendubot;
46 |   end
47 | end


--------------------------------------------------------------------------------
/scenarios/pendulum/draw_pendulum.m:
--------------------------------------------------------------------------------
 1 | %% draw_pendulum.m
 2 | % *Summary:* Draw the pendulum system with reward, applied torque, 
 3 | % and predictive uncertainty of the tips of the pendulums
 4 | %
 5 | %    function draw_pendulum(theta, torque, cost, text1, text2, M, S)
 6 | %
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %   theta1     angle of inner pendulum
11 | %   theta2     angle of outer pendulum
12 | %   f1         torque applied to inner pendulum
13 | %   f2         torque applied to outer pendulum
14 | %   cost       cost structure
15 | %     .fcn     function handle (it is assumed to use saturating cost)
16 | %     .<>      other fields that are passed to cost
17 | %   text1      (optional) text field 1
18 | %   text2      (optional) text field 2
19 | %   M          (optional) mean of state
20 | %   S          (optional) covariance of state
21 | %
22 | %
23 | % Copyright (C) 2008-2013 by
24 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
25 | %
26 | % Last modified: 2013-03-18
27 | 
28 | 
29 | function draw_pendulum(theta, torque, cost, text1, text2, M, S)
30 | %% Code
31 | 
32 | l = 0.6;
33 | xmin = -1.2*l; 
34 | xmax = 1.2*l;    
35 | umax = 0.5;
36 | height = 0;
37 | 
38 | % Draw pendulum
39 | pendulum = [0, 0; l*sin(theta), -l*cos(theta)];
40 | clf; hold on
41 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)
42 | 
43 | % plot ellipses around tips of pendulum (if M, S exist)
44 | try
45 |   if max(max(S))>0
46 |     err = linspace(-1,1,100)*sqrt(S(2,2));
47 |     plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1)
48 |     plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2)
49 |     plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20)
50 |   end
51 | catch
52 | end
53 | 
54 | % Draw useful information
55 | % target location
56 | plot(0,l,'k+','MarkerSize',20);
57 | plot([xmin, xmax], [-height, -height],'k','linewidth',2)
58 | % joint
59 | plot(0,0,'k.','markersize',24)
60 | plot(0,0,'y.','markersize',14)
61 | % tip of pendulum
62 | plot(l*sin(theta),-l*cos(theta),'k.','markersize',24)
63 | plot(l*sin(theta),-l*cos(theta),'y.','markersize',14)
64 | plot(0,-2*l,'.w','markersize',0.005)
65 | % applied torque
66 | plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10);
67 | % immediate reward
68 | reward = 1-cost.fcn(cost,[0, theta]',zeros(2));
69 | plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10);
70 | text(0,-0.5,'applied  torque')
71 | text(0,-0.7,'immediate reward')
72 | if exist('text1','var')  
73 |   text(0,-0.9, text1)
74 | end
75 | if exist('text2','var')  
76 |   text(0,-1.1, text2)
77 | end
78 | 
79 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]);
80 | axis off;
81 | drawnow;


--------------------------------------------------------------------------------
/scenarios/pendulum/draw_rollout_pendulum.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_pendulum.m
 2 | % *Summary:* Script to draw a trajectory of the most recent pendulum trajectory
 3 | %
 4 | % Copyright (C) 2008-2013 by 
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-03-27
 8 | %
 9 | %% High-Level Steps
10 | % # For each time step, plot the observed trajectory
11 | 
12 | %% Code
13 | 
14 | % Loop over states in trajectory
15 | for r = 1:size(xx,1)
16 |   cost.t = r;
17 |   if exist('j','var') && ~isempty(M{j})
18 |     draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost,  ...
19 |       ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
20 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
21 |       ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
22 |   else
23 |      draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost,  ...
24 |       ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
25 |       ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
26 |       ' sec'])
27 |   end
28 |   pause(dt);
29 | end


--------------------------------------------------------------------------------
/scenarios/pendulum/dynamics_pendulum.m:
--------------------------------------------------------------------------------
 1 | %% dynamics_pendulum.m
 2 | % *Summary:* Implements ths ODE for simulating the pendulum dynamics, where 
 3 | % an input torque f can be applied  
 4 | %
 5 | %    function dz = dynamics_pendulum(t,z,u)
 6 | %
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %		t     current time step (called from ODE solver)
11 | %   z     state                                                    [2 x 1]
12 | %   u     (optional): torque f(t) applied to pendulum
13 | %
14 | % *Output arguments:*
15 | %   
16 | %   dz    if 3 input arguments:      state derivative wrt time
17 | %
18 | %   Note: It is assumed that the state variables are of the following order:
19 | %         dtheta:  [rad/s] angular velocity of pendulum
20 | %         theta:   [rad]   angle of pendulum
21 | %
22 | % A detailed derivation of the dynamics can be found in:
23 | %
24 | % M.P. Deisenroth: 
25 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 
26 | % KIT Scientific Publishing, 2010.
27 | %
28 | %
29 | % Copyright (C) 2008-2013 by
30 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
31 | %
32 | % Last modified: 2013-03-18
33 | 
34 | function dz = dynamics_pendulum(t,z,u)
35 | %% Code
36 | 
37 | l = 1;    % [m]        length of pendulum
38 | m = 1;    % [kg]       mass of pendulum
39 | g = 9.82; % [m/s^2]    acceleration of gravity
40 | b = 0.01; % [s*Nm/rad] friction coefficient
41 | 
42 | dz = zeros(2,1);
43 | dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3);
44 | dz(2) = z(1);


--------------------------------------------------------------------------------
/scenarios/pendulum/pendulum_learn.m:
--------------------------------------------------------------------------------
 1 | %% pendulum_learn.m
 2 | % *Summary:* Script to learn a controller for the pendulum swingup
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-03-27
 8 | %
 9 | %% High-Level Steps
10 | % # Load parameters
11 | % # Create J initial trajectories by applying random controls
12 | % # Controlled learning (train dynamics model, policy learning, policy
13 | % application)
14 | 
15 | %% Code
16 | 
17 | % 1. Initialization
18 | clear all; close all;
19 | settings_pendulum;            % load scenario-specific settings
20 | basename = 'pendulum_';       % filename used for saving data
21 | 
22 | % 2. Initial J random rollouts
23 | for jj = 1:J
24 |   [xx, yy, realCost{jj}, latent{jj}] = ...
25 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
26 |   x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
27 |   if plotting.verbosity > 0;      % visualization of trajectory
28 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
29 |     draw_rollout_pendulum;
30 |   end
31 | end
32 | 
33 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
34 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
35 | 
36 | % 3. Controlled learning (N iterations)
37 | for j = 1:N
38 |   trainDynModel;   % train (GP) dynamics model
39 |   learnPolicy;     % learn policy
40 |   applyController; % apply controller to system
41 |   disp(['controlled trial # ' num2str(j)]);
42 |   if plotting.verbosity > 0;      % visualization of trajectory
43 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
44 |     draw_rollout_pendulum;
45 |   end
46 | end


--------------------------------------------------------------------------------
/scenarios/unicycle/augment_unicycle.m:
--------------------------------------------------------------------------------
 1 | %% augment_unicycle.m
 2 | % *Summary:* The function computes the $(x,y)$ velocities of the contact point 
 3 | % in both absolute and unicycle coordinates as well as the the unicycle 
 4 | % coordinates of the contact point themselves.
 5 | %
 6 | %       function r = augment(s)
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %		s     state of the unicycle (including the torques).             [1 x 18]
11 | %         The state is assumed to be given as follows:
12 | %         dx      empty (to be filled by this function)
13 | %         dy      empty (to be filled by this function)    
14 | %         dxc     empty (to be filled by this function)
15 | %         dyc     empty (to be filled by this function)
16 | %         dtheta  roll angular velocity
17 | %         dphi    yaw angular velocity
18 | %         dpsiw   wheel angular velocity
19 | %         dpsif   pitch angular velocity
20 | %         dpsit   turn table angular velocity
21 | %         x       x position
22 | %         y       y position
23 | %         xc      empty (to be filled by this function)
24 | %         yc      empty (to be filled by this function)
25 | %         theta   roll angle
26 | %         phi     yaw angle
27 | %         psiw    wheel angle
28 | %         psif    pitch angle
29 | %         psit    turn table angle
30 | %
31 | % *Output arguments:*
32 | %
33 | %   r     additional variables that are computed based on s:          [1 x 6]
34 | %         dx    x velocity of contact point (global coordinates)
35 | %         dy    y velocity of contact point (global coordinates)
36 | %         dxc   x velocity of contact point (unicycle coordinates)
37 | %         dyc   y velocity of contact point (unicycle coordinates)
38 | %         xc    x position of contact point (unicycle coordinates)
39 | %         yc    y position of contact point (unicycle coordinates)
40 | %
41 | %
42 | % Copyright (C) 2008-2013 by
43 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
44 | %
45 | % Last modified: 2013-03-27
46 | 
47 | 
48 | function r = augment_unicycle(s)
49 | %% Code
50 | 
51 | rw = 0.225;                               % wheel radius in meters
52 | 
53 | % x velocity of contact point (global coordinates)
54 | r(1) = rw*cos(s(15))*s(7); 
55 | % y velocity of contact point (global coordinates)
56 | r(2) = rw*sin(s(15))*s(7);
57 | % (x,y) velocities of contact point (unicycle coordinates)
58 | A = -[cos(s(15)) sin(s(15)); -sin(s(15)) cos(s(15))];
59 | dA = -s(6)*[-sin(s(15)) cos(s(15)); -cos(s(15)) -sin(s(15))];
60 | r(3:4) = A*r(1:2)' + dA*s(10:11)'; 
61 | % (x,y) coordinates of contact point (unicycle coordinates)
62 | r(5:6) = A*s(10:11)';
63 | 
64 | 


--------------------------------------------------------------------------------
/scenarios/unicycle/draw_rollout_unicycle.m:
--------------------------------------------------------------------------------
 1 | %% draw_rollout_unicycle.m
 2 | % *Summary:* Script to draw a rollout of the unicycle
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-04-04
 8 | %
 9 | %% High-Level Steps
10 | % # For each time step, plot the observed trajectory, the applied torques,
11 | % and the incurred cost
12 | 
13 | %% Code
14 | 
15 | if j > J
16 |   
17 |   draw_unicycle(xx, plant, plant.dt/10, cost, ...
18 |     ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
19 |     ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
20 |     ' sec']);  
21 | else
22 |   draw_unicycle(xx, plant, plant.dt/10, cost, ...
23 |     ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ...
24 |     ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
25 |     ' sec']);
26 | end
27 | 


--------------------------------------------------------------------------------
/scenarios/unicycle/loss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/scenarios/unicycle/loss.pdf


--------------------------------------------------------------------------------
/scenarios/unicycle/unicycle_learn.m:
--------------------------------------------------------------------------------
 1 | %% unicycle_learn.m
 2 | % *Summary:* Script to learn a controller for unicycling
 3 | %
 4 | % Copyright (C) 2008-2013 by
 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
 6 | %
 7 | % Last modified: 2013-03-27
 8 | %
 9 | %% High-Level Steps
10 | % # Load parameters
11 | % # Create J initial trajectories by applying random controls
12 | % # Controlled learning (train dynamics model, policy learning, policy
13 | % application)
14 | 
15 | %% Code
16 | 
17 | 
18 | % 1. Initialization
19 | clear all; close all;
20 | settings_unicycle;                     % load scenario-specific settings
21 | basename = 'unicycle_';                % filename used for saving data
22 | 
23 | % 2. Initial J random rollouts
24 | for jj = 1:J                                        % get the first observations
25 |   [xx, yy, realCost{jj}, latent{jj}] = ...
26 |     rollout(gaussian(mu0, S0), struct('maxU',policy.maxU/5), H, plant, cost);
27 |   x = [x; xx]; y = [y; yy];
28 |   if plotting.verbosity > 0;      % visualization of trajectory
29 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
30 |     draw_rollout_unicycle;
31 |   end
32 | end
33 | 
34 | z(odei,:) = bsxfun(@plus, mu0, chol(S0)'*randn(length(odei),1000));   % compute
35 | for i = 1:size(z,2), z(augi,i) = plant.augment(z(:,i)'); end % the distribution
36 | mu0Sim = mean(z,2); S0Sim = cov(z');         % of augmented start state by MCMC
37 | mu0Sim(odei) = mu0; S0Sim(odei,odei) = S0;        % Put in known correct values
38 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); clear z i;
39 | 
40 | % 3. Controlled learning (N iterations)
41 | for j = 1:N
42 |   trainDynModel;
43 |   learnPolicy;
44 |   applyController;
45 |   disp(['controlled trial # ' num2str(j)]);
46 |   if plotting.verbosity > 0;      % visualization of trajectory
47 |     if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
48 |     draw_rollout_unicycle;
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/test/checkgrad.m:
--------------------------------------------------------------------------------
 1 | %% checkgrad.m
 2 | % *Summary:* checkgrad checks the derivatives in a function, by comparing them 
 3 | % to finite differences approximations. The partial derivatives and the 
 4 | % approximation are printed and the norm of the difference divided by the 
 5 | % norm of the sum is returned as an indication of accuracy.
 6 | %
 7 | %    function [d dy dh] = checkgrad(f, X, e, varargin)
 8 | %
 9 | %
10 | % *Input arguments:*
11 | %
12 | %		f          function handle to function that needs to be checked. 
13 | %              The function f should be of the type [fX, dfX] = f(X, varargin)
14 | %              where fX is the function value and dfX is the gradient of fX
15 | %              with respect to the parameters X
16 | %   X          parameters (can be a vector or a struct)
17 | %   e          small perturbation used for finite differences (1e-4 is good)
18 | %   varargin   other arguments that are passed on to the function f
19 | %
20 | %
21 | % *Output arguments:*
22 | %
23 | %   d          relative error of analytical vs. finite difference gradient
24 | %   dy         analytical gradient
25 | %   dh         finite difference gradient
26 | %
27 | %
28 | % Copyright (C) 2008-2013 by
29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
30 | %
31 | % Last modified: 2013-03-21
32 | %
33 | %% High-Level Steps
34 | % # Analytical gradient
35 | % # Numerical gradient via finite differences
36 | % # Relative error
37 | 
38 | function [d dy dh] = checkgrad(f, X, e, varargin)
39 | %% Code
40 | 
41 | % 1. Analytical gradient
42 | Z = unwrap(X); NZ = length(Z);                 % number of input variables
43 | [y dy] = feval(f, X, varargin{:});             % get the partial derivatives dy
44 | [D E] = size(y); y = y(:); Ny = length(y);     % number of output variables
45 | if iscell(dy) || isstruct(dy); dy = unwrap(dy); end;
46 | dy = reshape(dy,Ny,NZ);
47 | 
48 | % 2. Finite difference approximation
49 | dh = zeros(Ny,NZ);
50 | for j = 1:NZ
51 |   dx = zeros(length(Z),1);
52 |   dx(j) = dx(j) + e;                               % perturb a single dimension
53 |   y2 = feval(f, rewrap(X,Z+dx), varargin{:});                        
54 |   y1 = feval(f, rewrap(X,Z-dx), varargin{:});
55 |   dh(:,j) = (y2(:) - y1(:))/(2*e);
56 | end
57 | 
58 | % 3. Compute error 
59 | % norm of diff divided by norm of sum
60 | d = sqrt(sum((dh-dy).^2,2)./sum((dh+dy).^2,2)); 
61 | small = max(abs([dy dh]),[],2) < 1e-5; % small derivatives are poorly tested ...
62 | d(d > 1e-3 & small) = NaN;             % ... by finite differences
63 | d = reshape(d,D,E);
64 | 
65 | disp('   Analytic  Numerical');
66 | for i=1:Ny;
67 |     disp([dy(i,:)' dh(i,:)']);                           % print the two vectors
68 |     fprintf('d = %e\n\n',d(i))
69 | end 
70 | 
71 | if Ny > 1; disp('For all outputs, d = '); disp(d); end; fprintf('\n');
72 | 


--------------------------------------------------------------------------------
/test/valueT.m:
--------------------------------------------------------------------------------
 1 | %% valueT.m
 2 | % *Summary:* Test derivatives of the propagate function, which computes the 
 3 | % mean and the variance of the successor state distribution, assuming that the
 4 | % current state is Gaussian distributed with mean m and covariance matrix
 5 | % s.
 6 | %
 7 | %   [d dy dh] = valueT(p, delta, m, s, dynmodel, policy, plant, cost, H)
 8 | %
 9 | %
10 | % *Input arguments:*
11 | %
12 | %   p          policy parameters (can be a structure)
13 | %     .<>      fields that contain the policy parameters (nothing else)
14 | %   m          mean of the input distribution
15 | %   s          covariance of the input distribution
16 | %   dynmodel   GP dynamics model (structure)
17 | %   policy     policy structure
18 | %   plant      plant structure
19 | %   cost       cost structure
20 | %   H          prediction horizon
21 | %   delta      (optional) finite difference parameter. Default: 1e-4
22 | %
23 | %
24 | % *Output arguments:*
25 | %
26 | %   dd         relative error of analytical vs. finite difference gradient
27 | %   dy         analytical gradient
28 | %   dh         finite difference gradient
29 | %
30 | % Copyright (C) 2008-2013 by 
31 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
32 | %
33 | % Last modified: 2013-03-21
34 | 
35 | function [d dy dh] = valueT(p, m, s, dynmodel, policy, plant, cost, H, delta)
36 | %% Code 
37 | 
38 | if nargin < 9; delta = 1e-4; end
39 | if nargin < 8; H = 4; end
40 | 
41 | % call checkgrad directly
42 | [d dy dh] = checkgrad('value',p,delta,m,s,dynmodel,policy,plant,cost,H);
43 | 


--------------------------------------------------------------------------------
/util/gSat.m:
--------------------------------------------------------------------------------
 1 | %% gSat.m
 2 | % *Summary:* Compute moments of the saturating function
 3 | % $e*(9*\sin(x(i))+\sin(3*x(i)))/8$,
 4 | % where $x \sim\mathcal N(m,v)$ and $i$ is a (possibly empty) set of $I$
 5 | % indices. The optional  scaling factor $e$ is a vector of length $I$.
 6 | % Optionally, compute derivatives of the moments.
 7 | %
 8 | %    function [M, S, C, dMdm, dSdm, dCdm, dMdv, dSdv, dCdv] = gSat(m, v, i, e)
 9 | %
10 | % *Input arguments:*
11 | %
12 | %   m     mean vector of Gaussian                                    [ d       ]
13 | %   v     covariance matrix                                          [ d  x  d ]
14 | %   i     vector of indices of elements to augment                   [ I  x  1 ]
15 | %   e     (optional) scale vector; default: 1                        [ I  x  1 ]
16 | %
17 | % *Output arguments:*
18 | %
19 | %  M     output means                                                [ I       ]
20 | %  V     output covariance matrix                                    [ I  x  I ]
21 | %  C     inv(v) times input-output covariance                        [ d  x  I ]
22 | %  dMdm  derivatives of M w.r.t m                                    [ I  x  d ]
23 | %  dVdm  derivatives of V w.r.t m                                    [I*^2 x  d ]
24 | %  dCdm  derivatives of C w.r.t m                                    [d*I x  d ]
25 | %  dMdv  derivatives of M w.r.t v                                    [ I  x d^2]
26 | %  dVdv  derivatives of V w.r.t v                                    [I^2 x d^2]
27 | %  dCdv  derivatives of C w.r.t v                                    [d*I x d^2]
28 | %
29 | %
30 | %
31 | % Copyright (C) 2008-2013 by
32 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
33 | %
34 | % Last modified: 2013-03-25
35 | 
36 | function [M, S, C, dMdm, dSdm, dCdm, dMdv, dSdv, dCdv] = gSat(m, v, i, e)
37 | %% Code
38 | d = length(m); I = length(i); i = i(:)';
39 | if nargin < 4; e = ones(1, I); end; e = e(:)';
40 | 
41 | P = [eye(d); 3*eye(d)];                                        % augment inputs
42 | ma = P*m;    madm = P;
43 | va = P*v*P'; vadv = kron(P,P); va = (va+va')/2;
44 | 
45 | % do the actual augmentation with the right parameters
46 | [M2, S2, C2, Mdma, Sdma, Cdma, Mdva, Sdva, Cdva] ...
47 |   = gSin(ma, va, [i d+i], [9*e e]/8);
48 | 
49 | P = [eye(I) eye(I)]; Q = [eye(d) 3*eye(d)];
50 | M = P*M2;                                                                % mean
51 | S = P*S2*P'; S = (S+S')/2;                                           % variance
52 | C = Q*C2*P';                                    % inv(v) times input-output cov
53 | 
54 | if nargout > 3                                        % derivatives if required
55 |   dMdm = P*Mdma*madm;         dMdv = P*Mdva*vadv;
56 |   dSdm = kron(P,P)*Sdma*madm; dSdv = kron(P,P)*Sdva*vadv;
57 |   dCdm = kron(P,Q)*Cdma*madm; dCdv = kron(P,Q)*Cdva*vadv;
58 | end
59 | 


--------------------------------------------------------------------------------
/util/gaussian.m:
--------------------------------------------------------------------------------
 1 | %% gaussian.m
 2 | % *Summary:* Generate n samples from a Gaussian $p(x)=\mathcal N(m,S).
 3 | % Sampling is based on the Cholesky factorization of the covariance matrix S
 4 | %
 5 | %    function x = gaussian(m, S, n)
 6 | %
 7 | % *Input arguments:*
 8 | %
 9 | %   m      mean of Gaussian                                         [D x 1]
10 | %   S      covariance matrix of Gaussian                            [D x D]
11 | %   n      (optional) number of samples; default: n=1
12 | %
13 | % *Output arguments:*
14 | %
15 | %   x      matrix of samples from Gaussian                          [D x n]
16 | %
17 | %
18 | % Copyright (C) 2008-2013 by 
19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
20 | %
21 | % Last modified: 2013-03-21
22 | 
23 | function x = gaussian(m, S, n)
24 | %% Code
25 | 
26 | if nargin < 3; n = 1; end
27 | 
28 | x = bsxfun(@plus, m(:), chol(S)'*randn(size(S,2),n));
29 | 


--------------------------------------------------------------------------------
/util/maha.m:
--------------------------------------------------------------------------------
 1 | %% maha.m
 2 | % *Summary:* Point-wise squared Mahalanobis distance (a-b)*Q*(a-b)'.
 3 | % Vectors are row-vectors
 4 | %
 5 | %    function K = maha(a, b, Q)                         
 6 | %
 7 | % *Input arguments:*
 8 | %  
 9 | %   a   matrix containing n row vectors                                 [n x D]
10 | %   b   matrix containing n row vectors                                 [n x D]
11 | %   Q   weight matrix. Default: eye(D)                                  [D x D]
12 | %
13 | %
14 | % *Output arguments:*
15 | %  K    point-wise squared distances                                    [n x n]
16 | %
17 | % Copyright (C) 2008-2013 by 
18 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
19 | %
20 | % Last modified: 2013-03-21
21 | 
22 | function K = maha(a, b, Q)                         
23 | %% Code
24 | 
25 | if nargin == 2                                                  % assume unit Q
26 |   K = bsxfun(@plus,sum(a.*a,2),sum(b.*b,2)')-2*a*b';
27 | else
28 |   aQ = a*Q; K = bsxfun(@plus,sum(aQ.*a,2),sum(b*Q.*b,2)')-2*aQ*b';
29 | end


--------------------------------------------------------------------------------
/util/rewrap.m:
--------------------------------------------------------------------------------
 1 | %% rewrap.m
 2 | % *Summary:* Map the numerical elements in the vector $v$ onto the variables
 3 | % $s$, which can be of any type. The number of numerical elements must match;
 4 | % on exit, $v$ should be empty. Non-numerical entries are just copied.
 5 | % See also the reverse unwrap.m.
 6 | %
 7 | %    [s v] = rewrap(s, v)
 8 | %
 9 | % *Input arguments:*
10 | %
11 | %   s     structure, cell, or numeric values
12 | %   v     structure, cell, or numeric values
13 | %
14 | %
15 | % *Output arguments:*
16 | %
17 | %   s     structure, cell, or numeric values
18 | %   v     [empty]
19 | %
20 | %
21 | % Copyright (C) 2008-2013 by
22 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
23 | %
24 | % Last modified: 2013-03-25
25 | 
26 | function [s v] = rewrap(s, v)
27 | %% Code
28 | 
29 | if isnumeric(s)
30 |   if numel(v) < numel(s)
31 |     error('The vector for conversion contains too few elements')
32 |   end
33 |   s = reshape(v(1:numel(s)), size(s));            % numeric values are reshaped
34 |   v = v(numel(s)+1:end);                        % remaining arguments passed on
35 | elseif isstruct(s)
36 |   [s p] = orderfields(s); p(p) = 1:numel(p);      % alphabetize, store ordering
37 |   [t v] = rewrap(struct2cell(s), v);                 % convert to cell, recurse
38 |   s = orderfields(cell2struct(t,fieldnames(s),1),p);  % conv to struct, reorder
39 | elseif iscell(s)
40 |   for i = 1:numel(s)             % cell array elements are handled sequentially
41 |     [s{i} v] = rewrap(s{i}, v);
42 |   end
43 | end


--------------------------------------------------------------------------------
/util/solve_chol.c:
--------------------------------------------------------------------------------
 1 | /* solve_chol - solve a linear system A*X = B using the cholesky factorization
 2 |    of A (where A is square, symmetric and positive definite.
 3 | 
 4 |    Copyright (c) 2004 Carl Edward Rasmussen. 2004-10-19. */
 5 | 
 6 | #include "mex.h"
 7 | #include "math.h"
 8 | #include "string.h"
 9 | #include "lapack.h"
10 | 
11 | 
12 | 
13 | /*extern int dpotrs_(char *, int *, int *, double *, int *, double *, int *, int *);*/
14 | 
15 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
16 | {
17 |   double *C;
18 |   mwSignedIndex n, m, q;
19 | 
20 |   if (nrhs != 2 || nlhs > 1)                              /* check the input */
21 |     mexErrMsgTxt("Usage: X = solve_chol(R, B)");
22 |   n = mxGetN(prhs[0]);
23 |   if (n != mxGetM(prhs[0]))
24 |     mexErrMsgTxt("Error: First argument matrix must be square");
25 |   if (n != mxGetM(prhs[1]))
26 |     mexErrMsgTxt("Error: First and second argument matrices must have same number of rows");
27 |   m = mxGetN(prhs[1]);
28 | 
29 |   plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */
30 |   C = mxGetPr(plhs[0]);
31 | 
32 |   if (n==0) return;              /* if argument was empty matrix, do no more */
33 |     memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double));   /* copy argument matrix */
34 |     dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q);   /* solve system */
35 |   if (q > 0)
36 |     mexErrMsgTxt("Error: illegal input to solve_chol");
37 | }
38 | 


--------------------------------------------------------------------------------
/util/solve_chol.m:
--------------------------------------------------------------------------------
 1 | %% solve_chol.m
 2 | % solve_chol - solve linear equations from the Cholesky factorization.
 3 | % Solve A*X = B for X, where A is square, symmetric, positive definite. The
 4 | % input to the function is R the Cholesky decomposition of A and the matrix B.
 5 | % Example: X = solve_chol(chol(A),B);
 6 | %
 7 | % NOTE: The program code is written in the C language for efficiency and is
 8 | % contained in the file solve_chol.c, and should be compiled using matlabs mex
 9 | % facility. However, this file also contains a (less efficient) matlab
10 | % implementation, supplied only as a help to people unfamiliar with mex. If
11 | % the C code has been properly compiled and is avaiable, it automatically
12 | % takes precendence over the matlab code in this file.
13 | %
14 | % Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08.
15 | 
16 | %% Code
17 | function x = solve_chol(A, B)
18 | 
19 | if nargin ~= 2 | nargout > 1
20 |   error('Wrong number of arguments.');
21 | end
22 | 
23 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1)
24 |   error('Wrong sizes of matrix arguments.');
25 | end
26 | 
27 | x = A\(A'\B);
28 | 


--------------------------------------------------------------------------------
/util/sq_dist.c:
--------------------------------------------------------------------------------
 1 | /* sq_dist - a mex function to compute a matrix of all pairwise squared
 2 |    distances between two sets of vectors, stored in the columns of the two 
 3 |    matrices that are arguments to the function. The length of the vectors must
 4 |    agree. If only a single argument is given, the missing argument is taken to
 5 |    be identical to the first. If an optional third matrix argument Q is given,
 6 |    it must be of the same size as the output, but in this case a vector of the
 7 |    traces of the product of Q and the coordinatewise squared distances is
 8 |    returned.
 9 | 
10 |    Copyright (c) 2003, 2004 Carl Edward Rasmussen. 2003-04-22. */
11 |  
12 | #include "mex.h"
13 | #include <math.h>
14 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
15 | {
16 |   double *a, *b, *C, *Q, z, t;
17 |   int    D, n, m, i, j, k;
18 |   if (nrhs < 1 || nrhs > 3 || nlhs > 1)
19 |     mexErrMsgTxt("Usage: C = sq_dist(a,b)\n       or: C = sq_dist(a)\n       or: c = sq_dist(a,b,Q)\nwhere the b matrix may be empty.");
20 |   a = mxGetPr(prhs[0]);
21 |   m = mxGetN(prhs[0]);
22 |   D = mxGetM(prhs[0]);
23 |   if (nrhs == 1 || mxIsEmpty(prhs[1])) {
24 |     b = a;
25 |     n = m;
26 |   } else {
27 |     b = mxGetPr(prhs[1]);
28 |     n = mxGetN(prhs[1]);
29 |     if (D != mxGetM(prhs[1]))
30 |       mexErrMsgTxt("Error: column lengths must agree");
31 |   }
32 |   if (nrhs < 3) {
33 |     plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL);
34 |     C = mxGetPr(plhs[0]);
35 |     for (i=0; i<m; i++) for (j=0; j<n; j++) {
36 |       z = 0.0;
37 |       for (k=0; k<D; k++) { t = a[D*i+k] - b[D*j+k]; z += t*t; }
38 |       C[i+j*m] = z;
39 |     }
40 |   } else {
41 |     Q = mxGetPr(prhs[2]);
42 |     if (mxGetN(prhs[2]) != n || mxGetM(prhs[2]) != m)
43 | 	mexErrMsgTxt("Error: 3rd matrix argument has wrong dimensions");
44 |     plhs[0] = mxCreateDoubleMatrix(D, 1, mxREAL);
45 |     C = mxGetPr(plhs[0]);
46 |     for (k=0; k<D; k++) C[k] = 0.0;
47 |     for (i=0; i<m; i++) for (j=0; j<n; j++) {
48 |       t = Q[i+j*m];
49 |       for (k=0; k<D; k++) {
50 |         z = a[i*D+k] - b[j*D+k]; C[k] += t*z*z;
51 |       }
52 |     }
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/util/sq_dist.m:
--------------------------------------------------------------------------------
 1 | %% sq_dist.m
 2 | % sq_dist - a function to compute a matrix of all pairwise squared distances
 3 | % between two sets of vectors, stored in the columns of the two matrices, a
 4 | % (of size D by n) and b (of size D by m). If only a single argument is given
 5 | % or the second matrix is empty, the missing matrix is taken to be identical
 6 | % to the first.
 7 | %
 8 | % Special functionality: If an optional third matrix argument Q is given, it
 9 | % must be of size n by m, and in this case a vector of the traces of the
10 | % product of Q' and the coordinatewise squared distances is returned.
11 | %
12 | % NOTE: The program code is written in the C language for efficiency and is
13 | % contained in the file sq_dist.c, and should be compiled using matlabs mex
14 | % facility. However, this file also contains a (less efficient) matlab
15 | % implementation, supplied only as a help to people unfamiliar with mex. If
16 | % the C code has been properly compiled and is avaiable, it automatically
17 | % takes precendence over the matlab code in this file.
18 | %
19 | % Usage: C = sq_dist(a, b)
20 | %    or: C = sq_dist(a)  or equiv.: C = sq_dist(a, [])
21 | %    or: c = sq_dist(a, b, Q)
22 | % where the b matrix may be empty.
23 | %
24 | % where a is of size D by n, b is of size D by m (or empty), C and Q are of
25 | % size n by m and c is of size D by 1.
26 | %
27 | % Copyright (c) 2003, 2004, 2005 and 2006 Carl Edward Rasmussen. 2006-03-09.
28 | 
29 | function C = sq_dist(a, b, Q);
30 | %% Code
31 | 
32 | if nargin < 1 | nargin > 3 | nargout > 1
33 |   error('Wrong number of arguments.');
34 | end
35 | 
36 | if nargin == 1 | isempty(b)                   % input arguments are taken to be
37 |   b = a;                                   % identical if b is missing or empty
38 | end 
39 | 
40 | [D, n] = size(a); 
41 | [d, m] = size(b);
42 | if d ~= D
43 |   error('Error: column lengths must agree.');
44 | end
45 | 
46 | if nargin < 3
47 |   C = zeros(n,m);
48 |   for d = 1:D
49 |     C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2;
50 |   end
51 |   % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to 
52 |   % replace the 3 lines above; it would be faster, but numerically less stable.
53 | else
54 |   if [n m] == size(Q)
55 |     C = zeros(D,1);
56 |     for d = 1:D
57 |       C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q));
58 |     end
59 |   else
60 |     error('Third argument has wrong size.');
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/util/squash.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[11pt, a4paper]{article}
 2 | \renewcommand{\rmdefault}{psbx}
 3 | \usepackage[utf8]{inputenc}
 4 | \usepackage[T1]{fontenc}
 5 | \usepackage{textcomp}
 6 | \usepackage{eulervm}
 7 | \usepackage{amsmath}
 8 | \usepackage{amssymb}
 9 | \usepackage{wrapfig}
10 | 
11 | \setlength{\textwidth}{160mm}
12 | \setlength{\oddsidemargin}{0mm}
13 | \setlength{\parindent}{0 mm}
14 | 
15 | \renewcommand{\vec}{\boldsymbol}
16 | \newcommand{\mat}{\boldsymbol}
17 | \newcommand{\E}{{\mathbb E}}
18 | \newcommand{\V}{{\mathbb V}}
19 | 
20 | \title{Policy Squashing}
21 | \author{Marc Peter Deisenroth}
22 | \date{\today}
23 | 
24 | 
25 | \begin{document}
26 | \maketitle
27 | 
28 | 
29 | We consider a third-order Fourier series expansion 
30 | %
31 | \begin{align*}
32 | s(x) =  a\sin(x) + b\sin(3x)
33 | \end{align*}
34 | %
35 | %
36 | % \begin{wrapfigure}{r}{0.5\hsize}
37 | % \vspace{-5mm}
38 | % \centering
39 | % \includegraphics[width = \hsize]{./figures/squashing_function}
40 | % \caption{Different squashing functions.}
41 | % \label{fig:squashing function}
42 | % \end{wrapfigure}
43 | % 
44 | of a trapezoidal wave with modified Fourier coefficients $a$ and $b$
45 | that satisfy the following constraints: First, when restricted to the
46 | interval $[-\tfrac{\pi}{2}, \tfrac{\pi}{2}]$, $s(x)$ is monotonically
47 | increasing, i.e., $s^\prime(x) \geq 0$ with $s^\prime(x)=0$ if and
48 | only if $x\in\{-\tfrac{\pi}{2}, \tfrac{\pi}{2}\}$. Second, at the
49 | boundaries $\pm\tfrac{\pi}{2}$, we require that $s(\tfrac{\pi}{2}) =
50 | 1$ and $s(-\tfrac{\pi}{2}) = -1$.  To closely approximate a (normalized)
51 | trapezoidal wave, we additionally require that $s$ has stationary
52 | points at the boundaries $\pm\tfrac{\pi}{2}$, i.e.,
53 | $s^{\prime\prime}(\pm\tfrac{\pi}{2}) = 0$. The first and second
54 | derivatives of $s(x)$ are
55 | %
56 | \begin{align*}
57 | s^\prime(x) &= a\cos (x) + 3b\cos(3x)\,,\\
58 | s^{\prime\prime}(x) &= -a\sin(x) - 9b\sin(3x)\,.
59 | \end{align*}
60 |  %
61 | From $s^\prime(\pm\tfrac{\pi}{2})=0$, we obtain $a = 1+b$. For
62 | $b>\tfrac{1}{8}$, $s(x)$ is no longer monotonically increasing in
63 | $[-\tfrac{\pi}{2}, \tfrac{\pi}{2}]$: The first derivative
64 | $s^\prime(x)$ crosses 0 for $b=-\tfrac{\cos x}{\cos x +
65 |   3\cos(3x)}$. For $b>\tfrac{1}{8}$, $s(x)$ has a local optimum for
66 | $x\in(-\tfrac{\pi}{2},\tfrac{\pi}{2})$. Therefore,
67 | $b\in[0,\tfrac{1}{8}$ are valid choices.  Using now any of the
68 | stationary point constraints yields $s^{\prime\prime}(-\tfrac{\pi}{2})
69 | = 1+b - 9b = 0 \Leftrightarrow b = \tfrac{1}{8}$ and $a = 1+b =
70 | \tfrac{9}{8}$. Hence, our squashing function is
71 | %
72 | \begin{align*}
73 | s(x) = \tfrac{9}{8}\sin(x) + \tfrac{1}{8}\sin(3x)\,.
74 | \end{align*}
75 | %
76 | For $b=0$, the squashing function equals the
77 | sine wave (red), the black dashed curve is the squashing function for
78 | $b=\tfrac{1}{20}$. However, only for $b=\tfrac{1}{8}$, the squashing
79 | function possesses stationary points at the boundaries.
80 | 
81 | 
82 | \end{document}
83 | 
84 | %%% Local Variables: 
85 | %%% mode: latex
86 | %%% TeX-master: t
87 | %%% End: 
88 | 


--------------------------------------------------------------------------------
/util/unwrap.m:
--------------------------------------------------------------------------------
 1 | %% unwrap.m
 2 | % *Summary:* Extract the numerical values from $s$ into the column vector $v$.
 3 | % The variable $sS can be of any type, including struct and cell array.
 4 | % Non-numerical elements are ignored. See also the reverse rewrap.m.
 5 | %
 6 | %    v = unwrap(s)
 7 | %
 8 | % *Input arguments:*
 9 | %
10 | %   s     structure, cell, or numeric values
11 | %
12 | %
13 | % *Output arguments:*
14 | %
15 | %   v     structure, cell, or numeric values
16 | %
17 | %
18 | % Copyright (C) 2008-2013 by
19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
20 | %
21 | % Last modified: 2013-03-25
22 | 
23 | function v = unwrap(s)
24 | %% Code
25 | 
26 | v = [];
27 | if isnumeric(s)
28 |   v = s(:);                        % numeric values are recast to column vector
29 | elseif isstruct(s)
30 |   v = unwrap(struct2cell(orderfields(s))); % alphabetize, conv to cell, recurse
31 | elseif iscell(s)
32 |   for i = 1:numel(s)             % cell array elements are handled sequentially
33 |     v = [v; unwrap(s{i})];
34 |   end
35 | end


--------------------------------------------------------------------------------