├── LICENSE ├── README.md ├── base ├── applyController.m ├── calcCost.m ├── learnPolicy.m ├── pred.m ├── predcost.m ├── prop.pdf ├── propagate.m ├── propagated.m ├── rollout.m ├── simulate.m ├── trainDynModel.m └── value.m ├── control ├── conCat.m ├── conCat.m~ ├── congp.m └── conlin.m ├── doc ├── 3dplot.sty ├── covFunctions.m ├── doc.aux ├── doc.log ├── doc.out ├── doc.pdf ├── doc.tex ├── doc.toc ├── figures │ ├── cartPole.pdf │ ├── cdp_sketch.pdf │ ├── cost_error_bar.pdf │ ├── cp.pdf │ ├── cp_rollout.pdf │ ├── example_trajectories.pdf │ ├── example_trajectories2.pdf │ ├── fX3.pdf │ ├── flow.pdf │ ├── learnP.pdf │ ├── pendubot.pdf │ ├── pendulum.pdf │ ├── pendulum.pdf.ipe7 │ ├── policy_opt.pdf │ ├── preliminary_policy.pdf │ ├── satLossPlot.pdf │ ├── squashed_preliminary_policy.pdf │ ├── squashing_fct.pdf │ ├── squashing_fct2.pdf │ ├── squashing_function.pdf │ ├── unicoords_body.tex │ ├── unicoords_body.tex~ │ ├── unicoords_frame.tex │ ├── unicoords_frame.tex~ │ ├── unicoords_frame2.tex │ ├── unicoords_frame2.tex~ │ ├── unicoords_turntable.tex │ ├── unicoords_turntable.tex~ │ ├── unicoords_turntable2.tex │ ├── unicoords_turntable2.tex~ │ ├── unicoords_wheel.tex │ ├── unicoords_wheel.tex~ │ ├── unicoords_wheel2.tex │ ├── unicoords_wheel2.tex~ │ └── unicycle.pdf ├── generate_docs.m ├── header.tex ├── html │ ├── DoublePend_learn.html │ ├── applyController.html │ ├── augment_unicycle.html │ ├── augment_unicycle_eq57428.png │ ├── calcCost.html │ ├── cartDouble_learn.html │ ├── cartPole_learn.html │ ├── checkgrad.html │ ├── conCat.html │ ├── conCat_eq10310.png │ ├── conCat_eq35435.png │ ├── conT.html │ ├── conT_eq96160.png │ ├── congp.html │ ├── congp_eq35435.png │ ├── congp_eq43551.png │ ├── conlin.html │ ├── conlin_eq10310.png │ ├── conlin_eq42328.png │ ├── conlin_eq81653.png │ ├── conlin_eq95465.png │ ├── covNoise.html │ ├── covSEard.html │ ├── covSum.html │ ├── draw_cdp.html │ ├── draw_cp.html │ ├── draw_dp.html │ ├── draw_pendubot.html │ ├── draw_pendulum.html │ ├── draw_rollout_cdp.html │ ├── draw_rollout_cp.html │ ├── draw_rollout_dp.html │ ├── draw_rollout_pendubot.html │ ├── draw_rollout_pendulum.html │ ├── draw_rollout_unicycle.html │ ├── draw_unicycle.html │ ├── dynamics_cdp.html │ ├── dynamics_cp.html │ ├── dynamics_dp.html │ ├── dynamics_pendubot.html │ ├── dynamics_pendulum.html │ ├── dynamics_unicycle.html │ ├── error_ellipse.html │ ├── fitc.html │ ├── gSat.html │ ├── gSat_eq24615.png │ ├── gSat_eq28128.png │ ├── gSat_eq35427.png │ ├── gSat_eq53557.png │ ├── gSat_eq97420.png │ ├── gSin.html │ ├── gSinSatT.html │ ├── gSin_eq00934.png │ ├── gSin_eq24615.png │ ├── gSin_eq28128.png │ ├── gSin_eq35427.png │ ├── gSin_eq53557.png │ ├── gTrig.html │ ├── gTrigT.html │ ├── gTrigT_eq41681.png │ ├── gTrig_eq00934.png │ ├── gTrig_eq24615.png │ ├── gTrig_eq28128.png │ ├── gTrig_eq35427.png │ ├── gTrig_eq53557.png │ ├── gaussian.html │ ├── getPlotDistr_cdp.html │ ├── getPlotDistr_cdp_eq30108.png │ ├── getPlotDistr_cdp_eq43551.png │ ├── getPlotDistr_cp.html │ ├── getPlotDistr_cp_eq30108.png │ ├── getPlotDistr_cp_eq43551.png │ ├── getPlotDistr_dp.html │ ├── getPlotDistr_dp_eq30108.png │ ├── getPlotDistr_dp_eq43551.png │ ├── getPlotDistr_pendubot.html │ ├── getPlotDistr_pendubot_eq30108.png │ ├── getPlotDistr_pendubot_eq43551.png │ ├── gp0.html │ ├── gp0d.html │ ├── gp1.html │ ├── gp1d.html │ ├── gp2.html │ ├── gp2d.html │ ├── gpT.html │ ├── gpT_eq96160.png │ ├── gpr.html │ ├── hypCurb.html │ ├── lossAdd.html │ ├── lossHinge.html │ ├── lossLin.html │ ├── lossQuad.html │ ├── lossQuad_eq15684.png │ ├── lossQuad_eq95740.png │ ├── lossSat.html │ ├── lossSat_eq21635.png │ ├── lossSat_eq65484.png │ ├── lossT.html │ ├── lossT_eq96160.png │ ├── loss_cdp.html │ ├── loss_cdp_eq26389.png │ ├── loss_cdp_eq30075.png │ ├── loss_cdp_eq32292.png │ ├── loss_cdp_eq39223.png │ ├── loss_cdp_eq72999.png │ ├── loss_cdp_eq81831.png │ ├── loss_cp.html │ ├── loss_cp_eq26389.png │ ├── loss_cp_eq30075.png │ ├── loss_cp_eq32292.png │ ├── loss_cp_eq39223.png │ ├── loss_cp_eq72999.png │ ├── loss_cp_eq81831.png │ ├── loss_dp.html │ ├── loss_dp_eq26389.png │ ├── loss_dp_eq30075.png │ ├── loss_dp_eq32292.png │ ├── loss_dp_eq39223.png │ ├── loss_dp_eq72999.png │ ├── loss_dp_eq81831.png │ ├── loss_pendubot.html │ ├── loss_pendubot_eq26389.png │ ├── loss_pendubot_eq30075.png │ ├── loss_pendubot_eq32292.png │ ├── loss_pendubot_eq39223.png │ ├── loss_pendubot_eq72999.png │ ├── loss_pendubot_eq81831.png │ ├── loss_pendulum.html │ ├── loss_pendulum_eq26389.png │ ├── loss_pendulum_eq30075.png │ ├── loss_pendulum_eq32292.png │ ├── loss_pendulum_eq39223.png │ ├── loss_pendulum_eq72999.png │ ├── loss_pendulum_eq81831.png │ ├── loss_unicycle.html │ ├── loss_unicycle_eq22825.png │ ├── loss_unicycle_eq26389.png │ ├── loss_unicycle_eq32292.png │ ├── loss_unicycle_eq34276.png │ ├── loss_unicycle_eq72999.png │ ├── loss_unicycle_eq81831.png │ ├── maha.html │ ├── minimize.html │ ├── pendubot_learn.html │ ├── pendulum_learn.html │ ├── pred.html │ ├── predcost.html │ ├── propagate.html │ ├── propagateT.html │ ├── propagated.html │ ├── reward.html │ ├── reward_eq37930.png │ ├── reward_eq81456.png │ ├── rewrap.html │ ├── rewrap_eq72999.png │ ├── rewrap_eq77996.png │ ├── rollout.html │ ├── rollout_eq25411.png │ ├── rollout_eq35435.png │ ├── rollout_eq43551.png │ ├── settings_cdp.html │ ├── settings_cp.html │ ├── settings_dp.html │ ├── settings_pendubot.html │ ├── settings_pendulum.html │ ├── settings_unicycle.html │ ├── simulate.html │ ├── solve_chol.html │ ├── sq_dist.html │ ├── train.html │ ├── trainDynModel.html │ ├── unicycle_learn.html │ ├── unwrap.html │ ├── unwrap_eq72999.png │ ├── unwrap_eq77996.png │ ├── value.html │ └── valueT.html ├── literature.bib ├── m2doctex.xsl ├── m2latex.xsl ├── plots │ ├── header.m │ ├── plot_policySquash.m │ ├── plot_satLoss.m │ ├── plot_squash.m │ └── print_pdf.m └── tex │ ├── DoublePend_learn.tex │ ├── applyController.tex │ ├── augment_unicycle.tex │ ├── calcCost.tex │ ├── cartDouble_learn.tex │ ├── cartPole_learn.tex │ ├── checkgrad.tex │ ├── conCat.tex │ ├── conT.tex │ ├── congp.tex │ ├── conlin.tex │ ├── covNoise.tex │ ├── covSEard.tex │ ├── covSum.tex │ ├── draw_cdp.tex │ ├── draw_cp.tex │ ├── draw_dp.tex │ ├── draw_pendubot.tex │ ├── draw_pendulum.tex │ ├── draw_rollout_cdp.tex │ ├── draw_rollout_cp.tex │ ├── draw_rollout_dp.tex │ ├── draw_rollout_pendubot.tex │ ├── draw_rollout_pendulum.tex │ ├── draw_rollout_unicycle.tex │ ├── draw_unicycle.tex │ ├── dynamics_cdp.tex │ ├── dynamics_cp.tex │ ├── dynamics_dp.tex │ ├── dynamics_pendubot.tex │ ├── dynamics_pendulum.tex │ ├── dynamics_unicycle.tex │ ├── error_ellipse.tex │ ├── fitc.tex │ ├── gSat.tex │ ├── gSin.tex │ ├── gSinSatT.tex │ ├── gTrig.tex │ ├── gTrigT.tex │ ├── gaussian.tex │ ├── getPlotDistr_cdp.tex │ ├── getPlotDistr_cp.tex │ ├── getPlotDistr_dp.tex │ ├── getPlotDistr_pendubot.tex │ ├── gp0.tex │ ├── gp0d.tex │ ├── gp1.tex │ ├── gp1d.tex │ ├── gp2.tex │ ├── gp2d.tex │ ├── gpT.tex │ ├── gpr.tex │ ├── hypCurb.tex │ ├── lossAdd.tex │ ├── lossHinge.tex │ ├── lossLin.tex │ ├── lossQuad.tex │ ├── lossSat.tex │ ├── lossT.tex │ ├── loss_cdp.tex │ ├── loss_cp.tex │ ├── loss_dp.tex │ ├── loss_pendubot.tex │ ├── loss_pendulum.tex │ ├── loss_unicycle.tex │ ├── maha.tex │ ├── minimize.tex │ ├── pendubot_learn.tex │ ├── pendulum_learn.tex │ ├── pred.tex │ ├── predcost.tex │ ├── propagate.tex │ ├── propagateT.tex │ ├── propagated.tex │ ├── reward.tex │ ├── rewrap.tex │ ├── rollout.tex │ ├── settings_cdp.tex │ ├── settings_cp.tex │ ├── settings_dp.tex │ ├── settings_pendubot.tex │ ├── settings_pendulum.tex │ ├── settings_unicycle.tex │ ├── simulate.tex │ ├── solve_chol.tex │ ├── sq_dist.tex │ ├── train.tex │ ├── trainDynModel.tex │ ├── unicycle_learn.tex │ ├── unwrap.tex │ ├── value.tex │ └── valueT.tex ├── gp ├── covNoise.m ├── covSEard.m ├── covSum.m ├── fitc.m ├── gp0.m ├── gp0d.m ├── gp1.m ├── gp1d.m ├── gp2.m ├── gp2d.m ├── gpr.m ├── hypCurb.m └── train.m ├── loss ├── lossAdd.m ├── lossHinge.m ├── lossLin.m ├── lossQuad.m ├── lossSat.m ├── reward.m ├── reward.pdf └── reward.tex ├── scenarios ├── cartDoublePendulum │ ├── cartDouble_learn.m │ ├── draw_cdp.m │ ├── draw_rollout_cdp.m │ ├── dynamics_cdp.m │ ├── getPlotDistr_cdp.m │ ├── loss_cdp.m │ └── settings_cdp.m ├── cartPole │ ├── cartPole_learn.m │ ├── draw_cp.m │ ├── draw_rollout_cp.m │ ├── dynamics_cp.m │ ├── getPlotDistr_cp.m │ ├── loss_cp.m │ └── settings_cp.m ├── doublePendulum │ ├── DoublePend_learn.m │ ├── draw_dp.m │ ├── draw_rollout_dp.m │ ├── dynamics_dp.m │ ├── getPlotDistr_dp.m │ ├── loss_dp.m │ └── settings_dp.m ├── pendubot │ ├── draw_pendubot.m │ ├── draw_rollout_pendubot.m │ ├── dynamics_pendubot.m │ ├── getPlotDistr_pendubot.m │ ├── loss_pendubot.m │ ├── pendubot_learn.m │ └── settings_pendubot.m ├── pendulum │ ├── draw_pendulum.m │ ├── draw_rollout_pendulum.m │ ├── dynamics_pendulum.m │ ├── loss_pendulum.m │ ├── pendulum_learn.m │ └── settings_pendulum.m └── unicycle │ ├── augment_unicycle.m │ ├── draw_rollout_all.m │ ├── draw_rollout_unicycle.m │ ├── draw_unicycle.m │ ├── dynamics_unicycle.m │ ├── loss.pdf │ ├── loss.tex │ ├── loss_unicycle.m │ ├── settings_unicycle.m │ └── unicycle_learn.m ├── test ├── checkgrad.m ├── conT.m ├── gSinSatT.m ├── gTrigT.m ├── gpT.m ├── lossT.m ├── propagateT.m └── valueT.m └── util ├── error_ellipse.m ├── gSat.m ├── gSin.m ├── gTrig.m ├── gaussian.m ├── maha.m ├── minimize.m ├── rewrap.m ├── solve_chol.c ├── solve_chol.m ├── sq_dist.c ├── sq_dist.m ├── squash.tex ├── trigSquash_old.m └── unwrap.m /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, ICL-SML 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of pilco-matlab nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PILCO Software Package V0.9 (2013-07-04) 2 | 3 | I. Introduction 4 | This software package implements the PILCO RL policy search framework. The learning framework can be applied to MDPs with continuous states and controls/actions and is based on probabilistic modeling of the dynamics and approximate Bayesian inference for policy evaluation and improvement. 5 | 6 | 7 | 8 | II. Quick Start 9 | We have already implemented some scenarios that can be found in 10 | /scenarios . 11 | 12 | If you want to get started immediately, go to 13 | /scenarios/cartPole 14 | 15 | and execute 16 | 17 | cartPole_learn 18 | 19 | 20 | 21 | III. Documentation 22 | A detailed documentation can be found in 23 | 24 | /doc/doc.pdf 25 | 26 | which also includes a description of how to set up your own scenario (there are only a few files that are scenario specific). 27 | 28 | 29 | 30 | IV. Contact 31 | If you find bugs, have questions, or want to give us feedback, please send an email to 32 | m.deisenroth@imperial.ac.uk 33 | 34 | 35 | 36 | V. References 37 | M.P. Deisenroth and C.E. Rasmussen: PILCO: A Data-Efficient and Model-based Approach to Policy Search (ICML 2011) 38 | M.P. Deisenroth: Efficient Reinforcement Learning Using Gaussian Processes (KIT Scientific Publishing, 2010) 39 | 40 | 41 | 42 | Marc Deisenroth 43 | Andrew McHutchon 44 | Joe Hall 45 | Carl Edward Rasmussen 46 | 47 | -------------------------------------------------------------------------------- /base/applyController.m: -------------------------------------------------------------------------------- 1 | %% applyController.m 2 | % *Summary:* Script to apply the learned controller to a (simulated) system 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-06-04 8 | % 9 | %% High-Level Steps 10 | % # Generate a single trajectory rollout by applying the controller 11 | % # Generate many rollouts for testing the performance of the controller 12 | % # Save the data 13 | 14 | %% Code 15 | 16 | % 1. Generate trajectory rollout given the current policy 17 | if isfield(plant,'constraint'), HH = maxH; else HH = H; end 18 | [xx, yy, realCost{j+J}, latent{j}] = ... 19 | rollout(gaussian(mu0, S0), policy, HH, plant, cost); 20 | disp(xx); % display states of observed trajectory 21 | x = [x; xx]; y = [y; yy]; % augment training set 22 | if plotting.verbosity > 0 23 | if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end 24 | hold on; plot(1:length(realCost{J+j}),realCost{J+j},'r'); drawnow; 25 | end 26 | 27 | % 2. Make many rollouts to test the controller quality 28 | if plotting.verbosity > 1 29 | lat = cell(1,10); 30 | for i=1:10 31 | [~,~,~,lat{i}] = rollout(gaussian(mu0, S0), policy, HH, plant, cost); 32 | end 33 | 34 | if ~ishandle(4); figure(4); else set(0,'CurrentFigure',4); end; clf(4); 35 | 36 | ldyno = length(dyno); 37 | for i=1:ldyno % plot the rollouts on top of predicted error bars 38 | subplot(ceil(ldyno/sqrt(ldyno)),ceil(sqrt(ldyno)),i); hold on; 39 | errorbar( 0:length(M{j}(i,:))-1, M{j}(i,:), ... 40 | 2*sqrt(squeeze(Sigma{j}(i,i,:))) ); 41 | for ii=1:10 42 | plot( 0:size(lat{ii}(:,dyno(i)),1)-1, lat{ii}(:,dyno(i)), 'r' ); 43 | end 44 | plot( 0:size(latent{j}(:,dyno(i)),1)-1, latent{j}(:,dyno(i)),'g'); 45 | axis tight 46 | end 47 | drawnow; 48 | end 49 | 50 | % 3. Save data 51 | filename = [basename num2str(j) '_H' num2str(H)]; save(filename); 52 | -------------------------------------------------------------------------------- /base/calcCost.m: -------------------------------------------------------------------------------- 1 | %% calcCost.m 2 | % *Summary:* Function to calculate the incurred cost and its standard deviation, 3 | % given a sequence of predicted state distributions and the cost struct 4 | % 5 | % [L sL] = calcCost(cost, M, S) 6 | % 7 | % *Input arguments:* 8 | % 9 | % cost cost structure 10 | % M mean vectors of state trajectory (D-by-H matrix) 11 | % S covariance matrices at each time step (D-by-D-by-H) 12 | % 13 | % *Output arguments:* 14 | % 15 | % L expected incurred cost of state trajectory 16 | % sL standard deviation of incurred cost 17 | % 18 | % 19 | % Copyright (C) 2008-2013 by 20 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 21 | % 22 | % Last modified: 2013-01-23 23 | % 24 | %% High-Level Steps 25 | % # Augment state distribution with trigonometric functions 26 | % # Compute distribution of the control signal 27 | % # Compute dynamics-GP prediction 28 | % # Compute distribution of the next state 29 | % 30 | 31 | function [L sL] = calcCost(cost, M, S) 32 | %% Code 33 | H = size(M,2); % horizon length 34 | L = zeros(1,H); SL = zeros(1,H); 35 | 36 | % for each time step, compute the expected cost and its variance 37 | for h = 1:H 38 | [L(h),d1,d2,SL(h)] = cost.fcn(cost, M(:,h), S(:,:,h)); 39 | end 40 | 41 | sL = sqrt(SL); % standard deviation 42 | 43 | -------------------------------------------------------------------------------- /base/learnPolicy.m: -------------------------------------------------------------------------------- 1 | %% learnPolicy.m 2 | % *Summary:* Script to perform the policy search 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-03-06 8 | % 9 | %% High-Level Steps 10 | % # Learn the policy (call optimizer) 11 | % # Predict trajectory from a sampled start state and compute expected cost 12 | 13 | %% Code 14 | 15 | % 1. Update the policy 16 | opt.fh = 1; 17 | [policy.p fX3] = minimize(policy.p, 'value', opt, mu0Sim, S0Sim, ... 18 | dynmodel, policy, plant, cost, H); 19 | 20 | % (optional) Plot overall optimization progress 21 | if exist('plotting', 'var') && isfield(plotting, 'verbosity') ... 22 | && plotting.verbosity > 1 23 | if ~ishandle(2); figure(2); else set(0,'CurrentFigure',2); end 24 | hold on; plot(fX3); drawnow; 25 | xlabel('line search iteration'); ylabel('function value') 26 | end 27 | 28 | % 2. Predict state trajectory from p(x0) and compute cost trajectory 29 | [M{j} Sigma{j}] = pred(policy, plant, dynmodel, mu0Sim(:,1), S0Sim, H); 30 | [fantasy.mean{j} fantasy.std{j}] = ... 31 | calcCost(cost, M{j}, Sigma{j}); % predict cost trajectory 32 | 33 | % (optional) Plot predicted immediate costs (as a function of the time steps) 34 | if exist('plotting', 'var') && isfield(plotting, 'verbosity') ... 35 | && plotting.verbosity > 0 36 | if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end 37 | clf(3); errorbar(0:H,fantasy.mean{j},2*fantasy.std{j}); drawnow; 38 | xlabel('time step'); ylabel('immediate cost'); 39 | end -------------------------------------------------------------------------------- /base/pred.m: -------------------------------------------------------------------------------- 1 | %% pred.m 2 | % *Summary:* Compute predictive (marginal) distributions of a trajecory 3 | % 4 | % [M S] = pred(policy, plant, dynmodel, m, s, H) 5 | % 6 | % *Input arguments:* 7 | % 8 | % policy policy structure 9 | % plant plant structure 10 | % dynmodel dynamics model structure 11 | % m D-by-1 mean of the initial state distribution 12 | % s D-by-D covariance of the initial state distribution 13 | % H length of prediction horizon 14 | % 15 | % *Output arguments:* 16 | % 17 | % M D-by-(H+1) sequence of predicted mean vectors 18 | % S D-by-D-(H+1) sequence of predicted covariance 19 | % matrices 20 | % 21 | % Copyright (C) 2008-2013 by 22 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 23 | % 24 | % Last modified: 2013-01-23 25 | % 26 | %% High-Level Steps 27 | % # Predict successor state distribution 28 | 29 | function [M S] = pred(policy, plant, dynmodel, m, s, H) 30 | %% Code 31 | 32 | D = length(m); S = zeros(D,D,H+1); M = zeros(D,H+1); 33 | M(:,1) = m; S(:,:,1) = s; 34 | for i = 1:H 35 | [m s] = plant.prop(m, s, plant, dynmodel, policy); 36 | M(:,i+1) = m(end-D+1:end); 37 | S(:,:,i+1) = s(end-D+1:end,end-D+1:end); 38 | end 39 | -------------------------------------------------------------------------------- /base/predcost.m: -------------------------------------------------------------------------------- 1 | %% predcost.m 2 | % *Summary:* Compute trajectory of expected costs for a given set of 3 | % state distributions 4 | % 5 | % inputs: 6 | % m0 mean of states, D-by-1 or D-by-K for multiple means 7 | % S covariance matrix of state distributions 8 | % dynmodel (struct) for dynamics model (GP) 9 | % plant (struct) of system parameters 10 | % policy (struct) for policy to be implemented 11 | % cost (struct) of cost function parameters 12 | % H length of optimization horizon 13 | % 14 | % outputs: 15 | % L expected cumulative (discounted) cost 16 | % s standard deviation of cost 17 | % 18 | % Copyright (C) 2008-2013 by 19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 20 | % 21 | % Last modified: 2012-01-12 22 | % 23 | %% High-Level Steps 24 | % # Predict successor state distribution 25 | % # Predict corresponding cost distribution 26 | 27 | function [L, s] = predcost(m0, S, dynmodel, plant, policy, cost, H) 28 | %% Code 29 | 30 | L = zeros(size(m0,2),H); s = zeros(size(m0,2),H); 31 | for k = 1:size(m0,2); 32 | m = m0(:,k); 33 | for t = 1:H 34 | [m, S] = plant.prop(m, S, plant, dynmodel, policy); % get next state 35 | [L(k,t), d1, d2, v] = cost.fcn(cost, m, S); % compute cost 36 | s(k,t) = sqrt(v); 37 | end 38 | end 39 | L = mean(L,1); s = mean(s,1); 40 | -------------------------------------------------------------------------------- /base/prop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/base/prop.pdf -------------------------------------------------------------------------------- /base/trainDynModel.m: -------------------------------------------------------------------------------- 1 | %% trainDynModel.m 2 | % *Summary:* Script to learn the dynamics model 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modification: 2013-05-20 8 | % 9 | %% High-Level Steps 10 | % # Extract states and controls from x-matrix 11 | % # Define the training inputs and targets of the GP 12 | % # Train the GP 13 | 14 | %% Code 15 | 16 | 17 | % 1. Train GP dynamics model 18 | Du = length(policy.maxU); Da = length(plant.angi); % no. of ctrl and angles 19 | xaug = [x(:,dyno) x(:,end-Du-2*Da+1:end-Du)]; % x augmented with angles 20 | dynmodel.inputs = [xaug(:,dyni) x(:,end-Du+1:end)]; % use dyni and ctrl 21 | dynmodel.targets = y(:,dyno); 22 | dynmodel.targets(:,difi) = dynmodel.targets(:,difi) - x(:,dyno(difi)); 23 | 24 | dynmodel = dynmodel.train(dynmodel, plant, trainOpt); % train dynamics GP 25 | 26 | % display some hyperparameters 27 | Xh = dynmodel.hyp; 28 | % noise standard deviations 29 | disp(['Learned noise std: ' num2str(exp(Xh(end,:)))]); 30 | % signal-to-noise ratios (values > 500 can cause numerical problems) 31 | disp(['SNRs : ' num2str(exp(Xh(end-1,:)-Xh(end,:)))]); -------------------------------------------------------------------------------- /base/value.m: -------------------------------------------------------------------------------- 1 | %% value.m 2 | % *Summary:* Compute expected (discounted) cumulative cost for a given (set of) initial 3 | % state distributions 4 | % 5 | % function [J, dJdp] = value(p, m0, S0, dynmodel, policy, plant, cost, H) 6 | % 7 | % *Input arguments:* 8 | % 9 | % p policy parameters chosen by minimize 10 | % policy policy structure 11 | % .fcn function which implements the policy 12 | % .p parameters passed to the policy 13 | % m0 matrix (D by k) of initial state means 14 | % S0 covariance matrix (D by D) for initial state 15 | % dynmodel dynamics model structure 16 | % plant plant structure 17 | % cost cost function structure 18 | % .fcn function handle to the cost 19 | % .gamma discount factor 20 | % H length of prediction horizon 21 | % 22 | % *Output arguments:* 23 | % 24 | % J expected cumulative (discounted) cost 25 | % dJdp (optional) derivative of J wrt the policy parameters 26 | % 27 | % Copyright (C) 2008-2013 by 28 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 29 | % 30 | % Last modification: 2013-03-21 31 | % 32 | %% High-Level Steps 33 | % # Compute distribution of next state 34 | % # Compute corresponding expected immediate cost (discounted) 35 | % # At end of prediction horizon: sum all immediate costs up 36 | 37 | function [J, dJdp] = value(p, m0, S0, dynmodel, policy, plant, cost, H) 38 | %% Code 39 | 40 | policy.p = p; % overwrite policy.p with new parameters from minimize 41 | p = unwrap(policy.p); dp = 0*p; 42 | m = m0; S = S0; L = zeros(1,H); 43 | 44 | if nargout <= 1 % no derivatives required 45 | 46 | for t = 1:H % for all time steps in horizon 47 | [m, S] = plant.prop(m, S, plant, dynmodel, policy); % get next state 48 | L(t) = cost.gamma^t.*cost.fcn(cost, m, S); % expected discounted cost 49 | end 50 | 51 | else % otherwise, get derivatives 52 | 53 | dmOdp = zeros([size(m0,1), length(p)]); 54 | dSOdp = zeros([size(m0,1)*size(m0,1), length(p)]); 55 | 56 | for t = 1:H % for all time steps in horizon 57 | [m, S, dmdmO, dSdmO, dmdSO, dSdSO, dmdp, dSdp] = ... 58 | plant.prop(m, S, plant, dynmodel, policy); % get next state 59 | 60 | dmdp = dmdmO*dmOdp + dmdSO*dSOdp + dmdp; 61 | dSdp = dSdmO*dmOdp + dSdSO*dSOdp + dSdp; 62 | 63 | [L(t), dLdm, dLdS] = cost.fcn(cost, m, S); % predictive cost 64 | L(t) = cost.gamma^t*L(t); % discount 65 | dp = dp + cost.gamma^t*( dLdm(:)'*dmdp + dLdS(:)'*dSdp )'; 66 | 67 | dmOdp = dmdp; dSOdp = dSdp; % bookkeeping 68 | end 69 | 70 | end 71 | 72 | J = sum(L); dJdp = rewrap(policy.p, dp); 73 | -------------------------------------------------------------------------------- /doc/doc.out: -------------------------------------------------------------------------------- 1 | \BOOKMARK [0][-]{chapter.1}{Introduction}{}% 1 2 | \BOOKMARK [1][-]{section.1.1}{Intended Use}{chapter.1}% 2 3 | \BOOKMARK [1][-]{section.1.2}{Software Design and Implementation}{chapter.1}% 3 4 | \BOOKMARK [2][-]{subsection.1.2.1}{Model Learning}{section.1.2}% 4 5 | \BOOKMARK [2][-]{subsection.1.2.2}{Policy Learning}{section.1.2}% 5 6 | \BOOKMARK [2][-]{subsection.1.2.3}{Policy Application}{section.1.2}% 6 7 | \BOOKMARK [1][-]{section.1.3}{User Interface by Example}{chapter.1}% 7 8 | \BOOKMARK [1][-]{section.1.4}{Quick Start}{chapter.1}% 8 9 | \BOOKMARK [0][-]{chapter.2}{Software Package Overview}{}% 9 10 | \BOOKMARK [1][-]{section.2.1}{Main Modules}{chapter.2}% 10 11 | \BOOKMARK [2][-]{subsection.2.1.1}{applyController}{section.2.1}% 11 12 | \BOOKMARK [2][-]{subsection.2.1.2}{trainDynModel}{section.2.1}% 12 13 | \BOOKMARK [2][-]{subsection.2.1.3}{learnPolicy}{section.2.1}% 13 14 | \BOOKMARK [1][-]{section.2.2}{Working with a Real Robot}{chapter.2}% 14 15 | \BOOKMARK [0][-]{chapter.3}{Important Function Interfaces}{}% 15 16 | \BOOKMARK [1][-]{section.3.1}{GP Predictions}{chapter.3}% 16 17 | \BOOKMARK [2][-]{subsection.3.1.1}{Input Arguments}{section.3.1}% 17 18 | \BOOKMARK [2][-]{subsection.3.1.2}{Output Arguments}{section.3.1}% 18 19 | \BOOKMARK [1][-]{section.3.2}{Controller}{chapter.3}% 19 20 | \BOOKMARK [2][-]{subsection.3.2.1}{Interface}{section.3.2}% 20 21 | \BOOKMARK [1][-]{section.3.3}{Cost Functions}{chapter.3}% 21 22 | \BOOKMARK [2][-]{subsection.3.3.1}{Interface for Scenario-specific Cost Functions}{section.3.3}% 22 23 | \BOOKMARK [2][-]{subsection.3.3.2}{General Building Blocks}{section.3.3}% 23 24 | \BOOKMARK [0][-]{chapter.4}{How to Create Your Own Scenario}{}% 24 25 | \BOOKMARK [1][-]{section.4.1}{Necessary Files}{chapter.4}% 25 26 | \BOOKMARK [1][-]{section.4.2}{ODE Dynamics}{chapter.4}% 26 27 | \BOOKMARK [1][-]{section.4.3}{Scenario-specific Settings}{chapter.4}% 27 28 | \BOOKMARK [2][-]{subsection.4.3.1}{Adding Paths}{section.4.3}% 28 29 | \BOOKMARK [2][-]{subsection.4.3.2}{Indices}{section.4.3}% 29 30 | \BOOKMARK [2][-]{subsection.4.3.3}{General Settings}{section.4.3}% 30 31 | \BOOKMARK [2][-]{subsection.4.3.4}{Plant Structure}{section.4.3}% 31 32 | \BOOKMARK [2][-]{subsection.4.3.5}{Policy Structure}{section.4.3}% 32 33 | \BOOKMARK [2][-]{subsection.4.3.6}{Cost Function Structure}{section.4.3}% 33 34 | -------------------------------------------------------------------------------- /doc/doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/doc.pdf -------------------------------------------------------------------------------- /doc/doc.toc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/doc.toc -------------------------------------------------------------------------------- /doc/figures/cartPole.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cartPole.pdf -------------------------------------------------------------------------------- /doc/figures/cdp_sketch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cdp_sketch.pdf -------------------------------------------------------------------------------- /doc/figures/cost_error_bar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cost_error_bar.pdf -------------------------------------------------------------------------------- /doc/figures/cp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cp.pdf -------------------------------------------------------------------------------- /doc/figures/cp_rollout.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/cp_rollout.pdf -------------------------------------------------------------------------------- /doc/figures/example_trajectories.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/example_trajectories.pdf -------------------------------------------------------------------------------- /doc/figures/example_trajectories2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/example_trajectories2.pdf -------------------------------------------------------------------------------- /doc/figures/fX3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/fX3.pdf -------------------------------------------------------------------------------- /doc/figures/flow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/flow.pdf -------------------------------------------------------------------------------- /doc/figures/learnP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/learnP.pdf -------------------------------------------------------------------------------- /doc/figures/pendubot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendubot.pdf -------------------------------------------------------------------------------- /doc/figures/pendulum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendulum.pdf -------------------------------------------------------------------------------- /doc/figures/pendulum.pdf.ipe7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/pendulum.pdf.ipe7 -------------------------------------------------------------------------------- /doc/figures/policy_opt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/policy_opt.pdf -------------------------------------------------------------------------------- /doc/figures/preliminary_policy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/preliminary_policy.pdf -------------------------------------------------------------------------------- /doc/figures/satLossPlot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/satLossPlot.pdf -------------------------------------------------------------------------------- /doc/figures/squashed_preliminary_policy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashed_preliminary_policy.pdf -------------------------------------------------------------------------------- /doc/figures/squashing_fct.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_fct.pdf -------------------------------------------------------------------------------- /doc/figures/squashing_fct2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_fct2.pdf -------------------------------------------------------------------------------- /doc/figures/squashing_function.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/squashing_function.pdf -------------------------------------------------------------------------------- /doc/figures/unicoords_frame2.tex: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (0,0,0); 20 | 21 | \coordinate (B) at (3,5.1962,0); 22 | \coordinate (W) at (3.2256,5.0659,1.4772); 23 | \coordinate (Wt) at (3.6767,4.8054,4.4316); 24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316); 25 | \coordinate (Wbp) at (3.2256,5.0659,0); 26 | \coordinate (T) at (3.2969,3.6799,5.7578); 27 | \coordinate (F) at (3.2612,4.3729,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw wheel 34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 35 | \tdplotsetrotatedcoordsorigin{(W)} 36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 37 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 38 | 39 | % draw frame 40 | \draw[very thick,black] (W) -- (T); 41 | \tdplotsetrotatedcoordsorigin{(F)} 42 | \tdplotsetrotatedcoords{\psiW}{100}{0} 43 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 44 | 45 | % draw turntable 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 47 | \tdplotsetrotatedcoordsorigin{(T)} 48 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 50 | 51 | % draw forces and moments 52 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{} 53 | \tdplotsetrotatedcoords{0}{0}{0} 54 | \tdplotsetrotatedcoordsorigin{(W)} 55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 56 | 57 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{} 58 | \tdplotsetrotatedcoords{0}{0}{0} 59 | \tdplotsetrotatedcoordsorigin{(T)} 60 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 61 | 62 | \tdplotsetrotatedcoordsorigin{(F)} 63 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.2); 64 | 65 | \node at (0,3.5,1.5) {\color{red}$W_\te{f}$}; 66 | \node at (0,4,4.7) {\color{red}$G_\te{f}$}; 67 | \node at (0,2.3,4) {\color{red}$P_\te{f}$}; 68 | \node at (0,4.8,0.65) {\color{red}$F_\te{f}$}; 69 | \node at (0,3.95,0.25) {\color{red}$Q_\te{f}$}; 70 | 71 | \end{tikzpicture} 72 | -------------------------------------------------------------------------------- /doc/figures/unicoords_frame2.tex~: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (0,0,0); 20 | 21 | \coordinate (B) at (3,5.1962,0); 22 | \coordinate (W) at (3.2256,5.0659,1.4772); 23 | \coordinate (Wt) at (3.6767,4.8054,4.4316); 24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316); 25 | \coordinate (Wbp) at (3.2256,5.0659,0); 26 | \coordinate (T) at (3.2969,3.6799,5.7578); 27 | \coordinate (F) at (3.2612,4.3729,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw wheel 34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 35 | \tdplotsetrotatedcoordsorigin{(W)} 36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 37 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 38 | 39 | % draw frame 40 | \draw[very thick,black] (W) -- (T); 41 | \tdplotsetrotatedcoordsorigin{(F)} 42 | \tdplotsetrotatedcoords{\psiW}{100}{0} 43 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 44 | 45 | % draw turntable 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 47 | \tdplotsetrotatedcoordsorigin{(T)} 48 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 50 | 51 | % draw forces and moments 52 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{} 53 | \tdplotsetrotatedcoords{0}{0}{0} 54 | \tdplotsetrotatedcoordsorigin{(W)} 55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 56 | 57 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{} 58 | \tdplotsetrotatedcoords{0}{0}{0} 59 | \tdplotsetrotatedcoordsorigin{(T)} 60 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 61 | 62 | \tdplotsetrotatedcoordsorigin{(F)} 63 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.2); 64 | 65 | \node at (0,3.5,1.5) {\color{red}$W_\te{f}$}; 66 | \node at (0,4,4.7) {\color{red}$G_\te{f}$}; 67 | \node at (0,2.3,4) {\color{red}$P_\te{f}$}; 68 | \node at (0,4.8,0.65) {\color{red}$F_\te{f}$}; 69 | \node at (0,3.95,0.25) {\color{red}$Q_\te{f}$}; 70 | 71 | \end{tikzpicture} -------------------------------------------------------------------------------- /doc/figures/unicoords_turntable2.tex: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (0,0,0); 20 | 21 | \coordinate (B) at (3,5.1962,0); 22 | \coordinate (W) at (3.2256,5.0659,1.4772); 23 | \coordinate (Wt) at (3.6767,4.8054,4.4316); 24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316); 25 | \coordinate (Wbp) at (3.2256,5.0659,0); 26 | \coordinate (T) at (3.2969,3.6799,5.7578); 27 | \coordinate (F) at (3.2612,4.3729,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw wheel 34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 35 | \tdplotsetrotatedcoordsorigin{(W)} 36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 38 | 39 | % draw frame 40 | \draw[very thick,black!30] (W) -- (T); 41 | \tdplotsetrotatedcoordsorigin{(F)} 42 | \tdplotsetrotatedcoords{\psiW}{100}{0} 43 | \draw[tdplot_rotated_coords,black!30,fill=black!30] (0,0,0) circle (0.12); 44 | 45 | % draw turntable 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 47 | \tdplotsetrotatedcoordsorigin{(T)} 48 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5); 49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 50 | 51 | % draw forces and moments 52 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{} 53 | \tdplotsetrotatedcoords{0}{0}{0} 54 | \tdplotsetrotatedcoordsorigin{(T)} 55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 56 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.06) -- (0,0,-1.2); 57 | 58 | \node at (0,2.9,2.9) {\color{red}$W_\te{t}$}; 59 | \node at (0,4,4.7) {\color{red}$G_\te{t}$}; 60 | \node at (0,2.3,4) {\color{red}$P_\te{t}$}; 61 | 62 | \end{tikzpicture} 63 | -------------------------------------------------------------------------------- /doc/figures/unicoords_turntable2.tex~: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (0,0,0); 20 | 21 | \coordinate (B) at (3,5.1962,0); 22 | \coordinate (W) at (3.2256,5.0659,1.4772); 23 | \coordinate (Wt) at (3.6767,4.8054,4.4316); 24 | \coordinate (Wtp) at (3.2256,5.0659,4.4316); 25 | \coordinate (Wbp) at (3.2256,5.0659,0); 26 | \coordinate (T) at (3.2969,3.6799,5.7578); 27 | \coordinate (F) at (3.2612,4.3729,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw wheel 34 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 35 | \tdplotsetrotatedcoordsorigin{(W)} 36 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 38 | 39 | % draw frame 40 | \draw[very thick,black!30] (W) -- (T); 41 | \tdplotsetrotatedcoordsorigin{(F)} 42 | \tdplotsetrotatedcoords{\psiW}{100}{0} 43 | \draw[tdplot_rotated_coords,black!30,fill=black!30] (0,0,0) circle (0.12); 44 | 45 | % draw turntable 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 47 | \tdplotsetrotatedcoordsorigin{(T)} 48 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5); 49 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 50 | 51 | % draw forces and moments 52 | \tdplotdrawarcarrow[red,thick,dashed]{(T)}{0.5}{50}{360}{}{} 53 | \tdplotsetrotatedcoords{0}{0}{0} 54 | \tdplotsetrotatedcoordsorigin{(T)} 55 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 56 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.06) -- (0,0,-1.2); 57 | 58 | \node at (0,2.9,2.9) {\color{red}$W_\te{t}$}; 59 | \node at (0,4,4.7) {\color{red}$G_\te{t}$}; 60 | \node at (0,2.3,4) {\color{red}$P_\te{t}$}; 61 | 62 | \end{tikzpicture} -------------------------------------------------------------------------------- /doc/figures/unicoords_wheel2.tex: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.85,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (1.7321,-1,0); 20 | \coordinate (B) at (2.5,4.3301,0); 21 | \coordinate (Bp) at (4.2321,3.3301,0); 22 | \coordinate (W) at (2.7256,4.1999,1.4772); 23 | \coordinate (Wt) at (3.1767,3.9394,4.4316); 24 | \coordinate (Wtp) at (2.7256,4.1999,4.4316); 25 | \coordinate (Wbp) at (2.7256,4.1999,0); 26 | \coordinate (T) at (2.7969,2.8138,5.7578); 27 | \coordinate (F) at (2.7612,3.5069,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw frame 34 | \draw[very thick,black!30] (W) -- (T); 35 | \tdplotsetrotatedcoords{\psiW}{100}{0} 36 | \tdplotsetrotatedcoordsorigin{(F)} 37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 38 | 39 | % draw turntable 40 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 41 | \tdplotsetrotatedcoordsorigin{(T)} 42 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 43 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 44 | 45 | % draw wheel 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{0} 47 | \tdplotsetrotatedcoordsorigin{(W)} 48 | 49 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 50 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5); 51 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 52 | 53 | % draw forces and moments 54 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{} 55 | \tdplotsetrotatedcoords{0}{0}{0} 56 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 57 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.1); 58 | 59 | \tdplotdrawarcarrow[red,thick]{(B)}{0.5}{50}{360}{}{} 60 | \tdplotsetrotatedcoordsorigin{(B)} 61 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 62 | 63 | \node at (0,4.0,0.85) {\color{red}$F_\te{w}$}; 64 | \node at (0,3.2,0.55) {\color{red}$Q_\te{w}$}; 65 | \node at (0,4.3,-0.4) {\color{red}$R$}; 66 | \node at (0,3.2,-1.2) {\color{red}$T$}; 67 | \node at (0,3.4,-0.45) {\color{red}$W_\te{w}$}; 68 | 69 | \end{tikzpicture} 70 | -------------------------------------------------------------------------------- /doc/figures/unicoords_wheel2.tex~: -------------------------------------------------------------------------------- 1 | %set the plot display orientation 2 | %synatax: \tdplotsetdisplay{\theta_d}{\phi_d} 3 | \tdplotsetmaincoords{65}{100} 4 | 5 | %define polar coordinates for some vector 6 | %TODO: look into using 3d spherical coordinate system 7 | \pgfmathsetmacro{\magW}{4} 8 | \pgfmathsetmacro{\psiW}{-30} 9 | \pgfmathsetmacro{\theW}{10} 10 | \pgfmathsetmacro{\phiW}{15} 11 | 12 | %start tikz picture, and use the tdplot_main_coords style to implement the display 13 | %coordinate transformation provided by 3dplot 14 | \footnotesize 15 | \begin{tikzpicture}[scale=0.95,tdplot_main_coords] 16 | 17 | %set up some coordinates ... GET FROM MATLAB 18 | %----------------------- 19 | \coordinate (O) at (1.7321,-1,0); 20 | \coordinate (B) at (2.5,4.3301,0); 21 | \coordinate (Bp) at (4.2321,3.3301,0); 22 | \coordinate (W) at (2.7256,4.1999,1.4772); 23 | \coordinate (Wt) at (3.1767,3.9394,4.4316); 24 | \coordinate (Wtp) at (2.7256,4.1999,4.4316); 25 | \coordinate (Wbp) at (2.7256,4.1999,0); 26 | \coordinate (T) at (2.7969,2.8138,5.7578); 27 | \coordinate (F) at (2.7612,3.5069,3.6175); 28 | 29 | %draw figure contents 30 | %-------------------- 31 | 32 | 33 | % draw frame 34 | \draw[very thick,black!30] (W) -- (T); 35 | \tdplotsetrotatedcoords{\psiW}{100}{0} 36 | \tdplotsetrotatedcoordsorigin{(F)} 37 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 38 | 39 | % draw turntable 40 | \tdplotsetrotatedcoords{\psiW}{\theW}{\phiW} 41 | \tdplotsetrotatedcoordsorigin{(T)} 42 | \draw[tdplot_rotated_coords,very thick,black!30,fill=black!30,fill opacity=0.2] (0,0,0) circle (1.5); 43 | \draw[tdplot_rotated_coords,fill=black!30,black!30] (0,0,0) circle (0.12); 44 | 45 | % draw wheel 46 | \tdplotsetrotatedcoords{\psiW}{\theW}{0} 47 | \tdplotsetrotatedcoordsorigin{(W)} 48 | 49 | \tdplotsetrotatedcoords{\psiW}{100}{0} % add 90 to j angle 50 | \draw[tdplot_rotated_coords,very thick,fill=black,fill opacity=0.2] (0,0,0) circle (1.5); 51 | \draw[tdplot_rotated_coords,fill=black] (0,0,0) circle (0.12); 52 | 53 | % draw forces and moments 54 | \tdplotdrawarcarrow[red,thick,dashed]{(W)}{0.5}{50}{360}{}{} 55 | \tdplotsetrotatedcoords{0}{0}{0} 56 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 57 | \draw[tdplot_rotated_coords,thick,-latex,red] (0,0,-0.12) -- (0,0,-1.1); 58 | 59 | \tdplotdrawarcarrow[red,thick]{(B)}{0.5}{50}{360}{}{} 60 | \tdplotsetrotatedcoordsorigin{(B)} 61 | \draw[tdplot_rotated_coords,thick,-latex,red,dashed] (0,0.8,0.8) -- (0,0.05,0.05); 62 | 63 | \node at (0,4.0,0.85) {\color{red}$F_\te{w}$}; 64 | \node at (0,3.2,0.55) {\color{red}$Q_\te{w}$}; 65 | \node at (0,4.3,-0.4) {\color{red}$R$}; 66 | \node at (0,3.2,-1.2) {\color{red}$T$}; 67 | \node at (0,3.4,-0.45) {\color{red}$W_\te{w}$}; 68 | 69 | \end{tikzpicture} -------------------------------------------------------------------------------- /doc/figures/unicycle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/figures/unicycle.pdf -------------------------------------------------------------------------------- /doc/html/augment_unicycle_eq57428.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/augment_unicycle_eq57428.png -------------------------------------------------------------------------------- /doc/html/conCat_eq10310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conCat_eq10310.png -------------------------------------------------------------------------------- /doc/html/conCat_eq35435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conCat_eq35435.png -------------------------------------------------------------------------------- /doc/html/conT_eq96160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conT_eq96160.png -------------------------------------------------------------------------------- /doc/html/congp_eq35435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/congp_eq35435.png -------------------------------------------------------------------------------- /doc/html/congp_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/congp_eq43551.png -------------------------------------------------------------------------------- /doc/html/conlin_eq10310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq10310.png -------------------------------------------------------------------------------- /doc/html/conlin_eq42328.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq42328.png -------------------------------------------------------------------------------- /doc/html/conlin_eq81653.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq81653.png -------------------------------------------------------------------------------- /doc/html/conlin_eq95465.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/conlin_eq95465.png -------------------------------------------------------------------------------- /doc/html/gSat_eq24615.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq24615.png -------------------------------------------------------------------------------- /doc/html/gSat_eq28128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq28128.png -------------------------------------------------------------------------------- /doc/html/gSat_eq35427.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq35427.png -------------------------------------------------------------------------------- /doc/html/gSat_eq53557.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq53557.png -------------------------------------------------------------------------------- /doc/html/gSat_eq97420.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSat_eq97420.png -------------------------------------------------------------------------------- /doc/html/gSin_eq00934.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq00934.png -------------------------------------------------------------------------------- /doc/html/gSin_eq24615.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq24615.png -------------------------------------------------------------------------------- /doc/html/gSin_eq28128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq28128.png -------------------------------------------------------------------------------- /doc/html/gSin_eq35427.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq35427.png -------------------------------------------------------------------------------- /doc/html/gSin_eq53557.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gSin_eq53557.png -------------------------------------------------------------------------------- /doc/html/gTrigT_eq41681.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrigT_eq41681.png -------------------------------------------------------------------------------- /doc/html/gTrig_eq00934.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq00934.png -------------------------------------------------------------------------------- /doc/html/gTrig_eq24615.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq24615.png -------------------------------------------------------------------------------- /doc/html/gTrig_eq28128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq28128.png -------------------------------------------------------------------------------- /doc/html/gTrig_eq35427.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq35427.png -------------------------------------------------------------------------------- /doc/html/gTrig_eq53557.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gTrig_eq53557.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_cdp_eq30108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cdp_eq30108.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_cdp_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cdp_eq43551.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_cp_eq30108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cp_eq30108.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_cp_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_cp_eq43551.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_dp_eq30108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_dp_eq30108.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_dp_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_dp_eq43551.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_pendubot_eq30108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_pendubot_eq30108.png -------------------------------------------------------------------------------- /doc/html/getPlotDistr_pendubot_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/getPlotDistr_pendubot_eq43551.png -------------------------------------------------------------------------------- /doc/html/gpT_eq96160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/gpT_eq96160.png -------------------------------------------------------------------------------- /doc/html/lossQuad_eq15684.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossQuad_eq15684.png -------------------------------------------------------------------------------- /doc/html/lossQuad_eq95740.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossQuad_eq95740.png -------------------------------------------------------------------------------- /doc/html/lossSat_eq21635.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossSat_eq21635.png -------------------------------------------------------------------------------- /doc/html/lossSat_eq65484.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossSat_eq65484.png -------------------------------------------------------------------------------- /doc/html/lossT_eq96160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/lossT_eq96160.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq30075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq30075.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq39223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq39223.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_cdp_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cdp_eq81831.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq30075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq30075.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq39223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq39223.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_cp_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_cp_eq81831.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq30075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq30075.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq39223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq39223.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_dp_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_dp_eq81831.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq30075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq30075.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq39223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq39223.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_pendubot_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendubot_eq81831.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq30075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq30075.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq39223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq39223.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_pendulum_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_pendulum_eq81831.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq22825.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq22825.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq26389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq26389.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq32292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq32292.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq34276.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq34276.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq72999.png -------------------------------------------------------------------------------- /doc/html/loss_unicycle_eq81831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/loss_unicycle_eq81831.png -------------------------------------------------------------------------------- /doc/html/reward_eq37930.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/reward_eq37930.png -------------------------------------------------------------------------------- /doc/html/reward_eq81456.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/reward_eq81456.png -------------------------------------------------------------------------------- /doc/html/rewrap_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rewrap_eq72999.png -------------------------------------------------------------------------------- /doc/html/rewrap_eq77996.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rewrap_eq77996.png -------------------------------------------------------------------------------- /doc/html/rollout_eq25411.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq25411.png -------------------------------------------------------------------------------- /doc/html/rollout_eq35435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq35435.png -------------------------------------------------------------------------------- /doc/html/rollout_eq43551.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/rollout_eq43551.png -------------------------------------------------------------------------------- /doc/html/unwrap_eq72999.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/unwrap_eq72999.png -------------------------------------------------------------------------------- /doc/html/unwrap_eq77996.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/doc/html/unwrap_eq77996.png -------------------------------------------------------------------------------- /doc/plots/header.m: -------------------------------------------------------------------------------- 1 | set(0,'defaultaxesfontsize',24); 2 | set(0,'defaultaxesfontunits', 'points') 3 | set(0,'defaulttextfontsize',26); 4 | set(0,'defaulttextfontunits','points') 5 | set(0,'defaultaxeslinewidth',1); 6 | set(0,'defaultlinelinewidth',2); 7 | % set(0,'DefaultTextInterpreter','Latex') 8 | set(0,'DefaultAxesLineStyleOrder','-|--|:|-.'); 9 | -------------------------------------------------------------------------------- /doc/plots/plot_policySquash.m: -------------------------------------------------------------------------------- 1 | header; 2 | clear all; close all 3 | addpath(genpath('../../')); 4 | randn('state',7); rand('state', 1); 5 | 6 | covfunc = {'covSum', {'covSEard', 'covNoise'}}; % specify ARD covariance 7 | 8 | 9 | n = 50; 10 | 11 | Axis = [-5 5 -1.5 2]; 12 | 13 | dynmodel.inputs = rand(n,1)*10 - 5; 14 | dynmodel.hyp = [0.2, 1, 0.01]'; 15 | dynmodel.target = 1.5*randn(n,1); 16 | 17 | xx = linspace(-5,5,101)'; 18 | xx2 = linspace(-pi/2,pi/2, 101); 19 | 20 | prelPolicy = gpr(dynmodel.hyp, covfunc, dynmodel.inputs, dynmodel.target, xx); 21 | 22 | figure(1); clf; hold on; 23 | plot(xx, prelPolicy); 24 | plot(xx, ones(size(xx)), 'k--'); 25 | plot(xx, -ones(size(xx)), 'k--'); 26 | 27 | xlabel('$x$', 'interpreter', 'latex') 28 | ylabel('$\tilde\pi(x)$', 'interpreter', 'latex') 29 | axis(Axis) 30 | set(gcf,'PaperSize', [12 6]); 31 | set(gcf,'PaperPosition',[0.1 0.1 12 6]); 32 | print_pdf('../figures/preliminary_policy'); 33 | 34 | for i = 1:size(xx,1) 35 | squashedPolicy(i) = gSat(prelPolicy(i), 0, 1); 36 | sqashingFct(i) = gSat(xx2(i), 0, 1); 37 | end 38 | 39 | figure(2); clf; hold on; 40 | plot(xx, squashedPolicy); 41 | plot(xx, ones(size(xx)), 'k--'); 42 | plot(xx, -ones(size(xx)), 'k--'); 43 | xlabel('$x$', 'interpreter', 'latex') 44 | ylabel('$\pi(x)$','interpreter', 'latex') 45 | axis(Axis) 46 | set(gcf,'PaperSize', [12 6]); set(gcf,'PaperPosition',[0.1 0.1 12 6]); 47 | print_pdf('../figures/squashed_preliminary_policy'); 48 | -------------------------------------------------------------------------------- /doc/plots/plot_satLoss.m: -------------------------------------------------------------------------------- 1 | % plot the saturating cost function 2 | 3 | clear all; close all; 4 | header; 5 | 6 | addpath ('../../loss/'); 7 | addpath ('../../util/'); 8 | 9 | cost.fcn = @lossSat; 10 | cost.z = 0; % target state 11 | cost.width = 0.5; 12 | 13 | 14 | cost.W = 1/(cost.width).^2; % weight matrix 15 | 16 | x = linspace(0,3,200)'; % array of distances 17 | 18 | c = zeros(1,length(x)); 19 | 20 | % evaluate cost for all distances x 21 | for i = 1:length(x) 22 | c(i) = cost.fcn(cost, x(i), 0); 23 | end 24 | 25 | % evaluate cost at 2-sigma bound 26 | c2 = cost.fcn(cost, 2*cost.width, 0); 27 | 28 | figure; 29 | hold on 30 | plot(x,c); % plot cost 31 | plot(2*cost.width, c2, 'ro', 'markersize', 8); % plot 2-sigma cost value 32 | plot(linspace(0,2*cost.width,100), c2*ones(1,100), 'k-'); % plot line 33 | text(cost.width/4, c2+0.05, '$2\texttt{cost.width}$','interpreter', 'latex') 34 | xlabel('Distance to target') 35 | ylabel('Cost') 36 | axis([x(1) x(end) 0 1.01]); 37 | 38 | set(gcf,'PaperSize', [12 6]); 39 | set(gcf,'PaperPosition',[0.1 0.1 12 6]); 40 | print_pdf('../figures/satLossPlot'); 41 | -------------------------------------------------------------------------------- /doc/plots/plot_squash.m: -------------------------------------------------------------------------------- 1 | % plot the squashing function 2 | 3 | clear all; close all; 4 | header; 5 | 6 | 7 | x = linspace(-5/2*pi,5/2*pi,1000); 8 | 9 | squash = @(x) (9*sin(x) + sin(3.*x))./8; 10 | 11 | 12 | y = squash(x); 13 | 14 | % plot a few periods 15 | figure(1); 16 | clf; hold on; 17 | plot(x,y); 18 | xlabel('$x$', 'interpreter', 'latex'); 19 | ylabel('$\sigma(x)$', 'interpreter', 'latex'); 20 | axis tight; 21 | 22 | set(gcf,'PaperSize', [12 6]); 23 | set(gcf,'PaperPosition',[0.1 0.1 12 6]); 24 | print_pdf('../figures/squashing_fct'); 25 | 26 | % plot a single period 27 | axis([-pi/2, pi/2, -1, 1]) 28 | print_pdf('../figures/squashing_fct2'); -------------------------------------------------------------------------------- /doc/tex/DoublePend_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{DoublePend\_learn} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for the double-pendulum swingup with two actuated joints 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_dp; % load scenario-specific settings 39 | basename = 'doublepend_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_dp; 49 | end 50 | end 51 | 52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 54 | 55 | % 3. Controlled learning (N iterations) 56 | for j = 1:N 57 | trainDynModel; % train (GP) dynamics model 58 | learnPolicy; % learn policy 59 | applyController; % apply controller to system 60 | disp(['controlled trial # ' num2str(j)]); 61 | if plotting.verbosity > 0; % visualization of trajectory 62 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 63 | draw_rollout_dp; 64 | end 65 | end 66 | \end{lstlisting} 67 | -------------------------------------------------------------------------------- /doc/tex/applyController.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{applyController.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to apply the learned controller to a (simulated) system 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-06-04 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Generate a single trajectory rollout by applying the controller 27 | \item Generate many rollouts for testing the performance of the controller 28 | \item Save the data 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Generate trajectory rollout given the current policy 37 | if isfield(plant,'constraint'), HH = maxH; else HH = H; end 38 | [xx, yy, realCost{j+J}, latent{j}] = ... 39 | rollout(gaussian(mu0, S0), policy, HH, plant, cost); 40 | disp(xx); % display states of observed trajectory 41 | x = [x; xx]; y = [y; yy]; % augment training set 42 | if plotting.verbosity > 0 43 | if ~ishandle(3); figure(3); else set(0,'CurrentFigure',3); end 44 | hold on; plot(1:length(realCost{J+j}),realCost{J+j},'r'); drawnow; 45 | end 46 | 47 | % 2. Make many rollouts to test the controller quality 48 | if plotting.verbosity > 1 49 | lat = cell(1,10); 50 | for i=1:10 51 | [~,~,~,lat{i}] = rollout(gaussian(mu0, S0), policy, HH, plant, cost); 52 | end 53 | 54 | if ~ishandle(4); figure(4); else set(0,'CurrentFigure',4); end; clf(4); 55 | 56 | ldyno = length(dyno); 57 | for i=1:ldyno % plot the rollouts on top of predicted error bars 58 | subplot(ceil(ldyno/sqrt(ldyno)),ceil(sqrt(ldyno)),i); hold on; 59 | errorbar( 0:length(M{j}(i,:))-1, M{j}(i,:), ... 60 | 2*sqrt(squeeze(Sigma{j}(i,i,:))) ); 61 | for ii=1:10 62 | plot( 0:size(lat{ii}(:,dyno(i)),1)-1, lat{ii}(:,dyno(i)), 'r' ); 63 | end 64 | plot( 0:size(latent{j}(:,dyno(i)),1)-1, latent{j}(:,dyno(i)),'g'); 65 | axis tight 66 | end 67 | drawnow; 68 | end 69 | 70 | % 3. Save data 71 | filename = [basename num2str(j) '_H' num2str(H)]; save(filename); 72 | \end{lstlisting} 73 | -------------------------------------------------------------------------------- /doc/tex/augment_unicycle.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{augment\_unicycle.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} The function computes the $(x,y)$ velocities of the contact point in both absolute and unicycle coordinates as well as the the unicycle coordinates of the contact point themselves. 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function r = augment(s)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | 21 | \begin{lstlisting} 22 | % s state of the unicycle (including the torques). [1 x 18] 23 | % The state is assumed to be given as follows: 24 | % dx empty (to be filled by this function) 25 | % dy empty (to be filled by this function) 26 | % dxc empty (to be filled by this function) 27 | % dyc empty (to be filled by this function) 28 | % dtheta roll angular velocity 29 | % dphi yaw angular velocity 30 | % dpsiw wheel angular velocity 31 | % dpsif pitch angular velocity 32 | % dpsit turn table angular velocity 33 | % x x position 34 | % y y position 35 | % xc empty (to be filled by this function) 36 | % yc empty (to be filled by this function) 37 | % theta roll angle 38 | % phi yaw angle 39 | % psiw wheel angle 40 | % psif pitch angle 41 | % psit turn table angle 42 | % 43 | % *Output arguments:* 44 | % 45 | % r additional variables that are computed based on s: [1 x 6] 46 | % dx x velocity of contact point (global coordinates) 47 | % dy y velocity of contact point (global coordinates) 48 | % dxc x velocity of contact point (unicycle coordinates) 49 | % dyc y velocity of contact point (unicycle coordinates) 50 | % xc x position of contact point (unicycle coordinates) 51 | % yc y position of contact point (unicycle coordinates) 52 | % 53 | % 54 | % Copyright (C) 2008-2013 by 55 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 56 | % 57 | % Last modified: 2013-03-27 58 | 59 | 60 | function r = augment_unicycle(s) 61 | \end{lstlisting} 62 | 63 | 64 | \subsection*{Code} 65 | 66 | 67 | \begin{lstlisting} 68 | rw = 0.225; % wheel radius in meters 69 | 70 | % x velocity of contact point (global coordinates) 71 | r(1) = rw*cos(s(15))*s(7); 72 | % y velocity of contact point (global coordinates) 73 | r(2) = rw*sin(s(15))*s(7); 74 | % (x,y) velocities of contact point (unicycle coordinates) 75 | A = -[cos(s(15)) sin(s(15)); -sin(s(15)) cos(s(15))]; 76 | dA = -s(6)*[-sin(s(15)) cos(s(15)); -cos(s(15)) -sin(s(15))]; 77 | r(3:4) = A*r(1:2)' + dA*s(10:11)'; 78 | % (x,y) coordinates of contact point (unicycle coordinates) 79 | r(5:6) = A*s(10:11)'; 80 | \end{lstlisting} 81 | -------------------------------------------------------------------------------- /doc/tex/calcCost.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{calcCost.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Function to calculate the incurred cost and its standard deviation, given a sequence of predicted state distributions and the cost struct 13 | \end{par} \vspace{1em} 14 | \begin{verbatim}[L sL] = calcCost(cost, M, S)\end{verbatim} 15 | \begin{par} 16 | \textbf{Input arguments:} 17 | \end{par} \vspace{1em} 18 | \begin{verbatim}cost cost structure 19 | M mean vectors of state trajectory (D-by-H matrix) 20 | S covariance matrices at each time step (D-by-D-by-H)\end{verbatim} 21 | \begin{par} 22 | \textbf{Output arguments:} 23 | \end{par} \vspace{1em} 24 | \begin{verbatim}L expected incurred cost of state trajectory 25 | sL standard deviation of incurred cost\end{verbatim} 26 | \begin{par} 27 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 28 | \end{par} \vspace{1em} 29 | \begin{par} 30 | Last modified: 2013-01-23 31 | \end{par} \vspace{1em} 32 | 33 | 34 | \subsection*{High-Level Steps} 35 | 36 | \begin{enumerate} 37 | \setlength{\itemsep}{-1ex} 38 | \item Augment state distribution with trigonometric functions 39 | \item Compute distribution of the control signal 40 | \item Compute dynamics-GP prediction 41 | \item Compute distribution of the next state 42 | \end{enumerate} 43 | 44 | \begin{lstlisting} 45 | function [L sL] = calcCost(cost, M, S) 46 | \end{lstlisting} 47 | 48 | 49 | \subsection*{Code} 50 | 51 | 52 | \begin{lstlisting} 53 | H = size(M,2); % horizon length 54 | L = zeros(1,H); SL = zeros(1,H); 55 | 56 | % for each time step, compute the expected cost and its variance 57 | for h = 1:H 58 | [L(h),d1,d2,SL(h)] = cost.fcn(cost, M(:,h), S(:,:,h)); 59 | end 60 | 61 | sL = sqrt(SL); % standard deviation 62 | \end{lstlisting} 63 | -------------------------------------------------------------------------------- /doc/tex/cartDouble_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{cartDouble\_learn.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for the cart-doube-pendulum swingup 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_cdp; % load scenario-specific settings 39 | basename = 'CartDoublePend_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_cdp; 49 | end 50 | 51 | end 52 | 53 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 54 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 55 | 56 | % 3. Controlled learning (N iterations) 57 | for j = 1:N 58 | trainDynModel; % train (GP) dynamics model 59 | learnPolicy; % learn policy 60 | applyController; % apply controller to system 61 | disp(['controlled trial # ' num2str(j)]); 62 | if plotting.verbosity > 0; % visualization of trajectory 63 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 64 | draw_rollout_cdp; 65 | end 66 | end 67 | \end{lstlisting} 68 | -------------------------------------------------------------------------------- /doc/tex/cartPole_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{cartPole\_learn.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for the cart-pole swingup 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_cp; % load scenario-specific settings 39 | basename = 'cartPole_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_cp; 49 | end 50 | 51 | end 52 | 53 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 54 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 55 | 56 | % 3. Controlled learning (N iterations) 57 | for j = 1:N 58 | trainDynModel; % train (GP) dynamics model 59 | learnPolicy; % learn policy 60 | applyController; % apply controller to system 61 | disp(['controlled trial # ' num2str(j)]); 62 | if plotting.verbosity > 0; % visualization of trajectory 63 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 64 | draw_rollout_cp; 65 | end 66 | end 67 | \end{lstlisting} 68 | -------------------------------------------------------------------------------- /doc/tex/covNoise.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{covNoise.m} 11 | 12 | \begin{par} 13 | Independent covariance function, ie "white noise", with specified variance. The covariance function is specified as: 14 | \end{par} \vspace{1em} 15 | \begin{par} 16 | k(x\^{}p,x\^{}q) = s2 * \ensuremath{\backslash}delta(p,q) 17 | \end{par} \vspace{1em} 18 | \begin{par} 19 | where s2 is the noise variance and \ensuremath{\backslash}delta(p,q) is a Kronecker delta function which is 1 iff p=q and zero otherwise. The hyperparameter is 20 | \end{par} \vspace{1em} 21 | \begin{par} 22 | logtheta = [ log(sqrt(s2)) ] 23 | \end{par} \vspace{1em} 24 | \begin{par} 25 | For more help on design of covariance functions, try "help covFunctions". 26 | \end{par} \vspace{1em} 27 | \begin{par} 28 | (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24. 29 | \end{par} \vspace{1em} 30 | 31 | \begin{lstlisting} 32 | function [A, B] = covNoise(logtheta, x, z) 33 | \end{lstlisting} 34 | 35 | 36 | \subsection*{Code} 37 | 38 | 39 | \begin{lstlisting} 40 | if nargin == 0, A = '1'; return; end % report number of parameters 41 | 42 | s2 = exp(2*logtheta); % noise variance 43 | 44 | if nargin == 2 % compute covariance matrix 45 | A = s2*eye(size(x,1)); 46 | elseif nargout == 2 % compute test set covariances 47 | A = s2; 48 | B = 0; % zeros cross covariance by independence 49 | else % compute derivative matrix 50 | A = 2*s2*eye(size(x,1)); 51 | end 52 | \end{lstlisting} 53 | -------------------------------------------------------------------------------- /doc/tex/covSEard.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{covSEard.m} 11 | 12 | \begin{par} 13 | Squared Exponential covariance function with Automatic Relevance Detemination (ARD) distance measure. The covariance function is parameterized as: 14 | \end{par} \vspace{1em} 15 | \begin{par} 16 | k(x\^{}p,x\^{}q) = sf2 * exp(-(x\^{}p - x\^{}q)'*inv(P)*(x\^{}p - x\^{}q)/2) 17 | \end{par} \vspace{1em} 18 | \begin{par} 19 | where the P matrix is diagonal with ARD parameters ell\_1\^{}2,...,ell\_D\^{}2, where D is the dimension of the input space and sf2 is the signal variance. The hyperparameters are: 20 | \end{par} \vspace{1em} 21 | \begin{par} 22 | loghyper = [ log(ell\_1) log(ell\_2) . log(ell\_D) log(sqrt(sf2)) ] 23 | \end{par} \vspace{1em} 24 | \begin{par} 25 | For more help on design of covariance functions, try "help covFunctions". 26 | \end{par} \vspace{1em} 27 | \begin{par} 28 | (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 29 | \end{par} \vspace{1em} 30 | 31 | \begin{lstlisting} 32 | function [A, B] = covSEard(loghyper, x, z) 33 | \end{lstlisting} 34 | 35 | 36 | \subsection*{Code} 37 | 38 | 39 | \begin{lstlisting} 40 | if nargin == 0, A = '(D+1)'; return; end % report number of parameters 41 | 42 | persistent K; 43 | 44 | [n D] = size(x); 45 | ell = exp(loghyper(1:D)); % characteristic length scale 46 | sf2 = exp(2*loghyper(D+1)); % signal variance 47 | 48 | if nargin == 2 49 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 50 | A = K; 51 | elseif nargout == 2 % compute test set covariances 52 | A = sf2*ones(size(z,1),1); 53 | B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2); 54 | else % compute derivative matrix 55 | 56 | % check for correct dimension of the previously calculated kernel matrix 57 | if any(size(K)~=n) 58 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 59 | end 60 | 61 | if z <= D % length scale parameters 62 | A = K.*sq_dist(x(:,z)'/ell(z)); 63 | else % magnitude parameter 64 | A = 2*K; 65 | clear K; 66 | end 67 | end 68 | \end{lstlisting} 69 | -------------------------------------------------------------------------------- /doc/tex/covSum.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{covSum.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Compose a covariance function as the sum of other covariance functions. This function doesn't actually compute very much on its own, it merely does some bookkeeping, and calls other covariance functions to do the actual work. 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function [A, B] = covSum(covfunc, logtheta, x, z)\end{verbatim} 17 | \begin{par} 18 | (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20. 19 | \end{par} \vspace{1em} 20 | 21 | \begin{lstlisting} 22 | function [A, B] = covSum(covfunc, logtheta, x, z) 23 | \end{lstlisting} 24 | 25 | 26 | \subsection*{Code} 27 | 28 | 29 | \begin{lstlisting} 30 | for i = 1:length(covfunc) % iterate over covariance functions 31 | f = covfunc(i); 32 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 33 | j(i) = cellstr(feval(f{:})); 34 | end 35 | 36 | if nargin == 1, % report number of parameters 37 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end 38 | return 39 | end 40 | 41 | [n, D] = size(x); 42 | 43 | v = []; % v vector indicates to which covariance parameters belong 44 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end 45 | 46 | switch nargin 47 | case 3 % compute covariance matrix 48 | A = zeros(n, n); % allocate space for covariance matrix 49 | for i = 1:length(covfunc) % iteration over summand functions 50 | f = covfunc(i); 51 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 52 | A = A + feval(f{:}, logtheta(v==i), x); % accumulate covariances 53 | end 54 | 55 | case 4 % compute derivative matrix or test set covariances 56 | if nargout == 2 % compute test set cavariances 57 | A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1)); % allocate space 58 | for i = 1:length(covfunc) 59 | f = covfunc(i); 60 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 61 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances 62 | A = A + AA; B = B + BB; % and accumulate 63 | end 64 | else % compute derivative matrices 65 | i = v(z); % which covariance function 66 | j = sum(v(1:z)==i); % which parameter in that covariance 67 | f = covfunc(i); 68 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 69 | A = feval(f{:}, logtheta(v==i), x, j); % compute derivative 70 | end 71 | 72 | end 73 | \end{lstlisting} 74 | -------------------------------------------------------------------------------- /doc/tex/draw_cp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{draw\_cp.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Draw the cart-pole system with reward, applied force, and predictive uncertainty of the tip of the pendulum 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function draw_cp(x, theta, force, cost, text1, text2, M, S)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | 21 | \begin{lstlisting} 22 | % x position of the cart 23 | % theta angle of pendulum 24 | % force force applied to cart 25 | % cost cost structure 26 | % .fcn function handle (it is assumed to use saturating cost) 27 | % .<> other fields that are passed to cost 28 | % M (optional) mean of state 29 | % S (optional) covariance of state 30 | % text1 (optional) text field 1 31 | % text2 (optional) text field 2 32 | % 33 | % 34 | % Copyright (C) 2008-2013 by 35 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 36 | % 37 | % Last modified: 2013-03-07 38 | 39 | function draw_cp(x, theta, force, cost, text1, text2, M, S) 40 | \end{lstlisting} 41 | 42 | 43 | \subsection*{Code} 44 | 45 | 46 | \begin{lstlisting} 47 | l = 0.6; 48 | xmin = -3; 49 | xmax = 3; 50 | height = 0.1; 51 | width = 0.3; 52 | maxU = 10; 53 | 54 | % Compute positions 55 | cart = [ x + width, height 56 | x + width, -height 57 | x - width, -height 58 | x - width, height 59 | x + width, height ]; 60 | pendulum = [x, 0; x+2*l*sin(theta), -cos(theta)*2*l]; 61 | 62 | 63 | clf; hold on 64 | plot(0,2*l,'k+','MarkerSize',20,'linewidth',2) 65 | plot([xmin, xmax], [-height-0.03, -height-0.03],'k','linewidth',2) 66 | 67 | % Plot force 68 | plot([0 force/maxU*xmax],[-0.3, -0.3],'g','linewidth',10) 69 | 70 | % Plot reward 71 | reward = 1-cost.fcn(cost,[x, 0, 0, theta]', zeros(4)); 72 | plot([0 reward*xmax],[-0.5, -0.5],'y','linewidth',10) 73 | 74 | % Plot the cart-pole 75 | fill(cart(:,1), cart(:,2),'k','edgecolor','k'); 76 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4) 77 | 78 | % Plot the joint and the tip 79 | plot(x,0,'y.','markersize',24) 80 | plot(pendulum(2,1),pendulum(2,2),'y.','markersize',24) 81 | 82 | % plot ellipse around tip of pendulum (if M, S exist) 83 | try 84 | [M1 S1] = getPlotDistr_cp(M,S,2*l); 85 | error_ellipse(S1,M1,'style','b'); 86 | catch 87 | end 88 | 89 | % Text 90 | text(0,-0.3,'applied force') 91 | text(0,-0.5,'immediate reward') 92 | if exist('text1','var') 93 | text(0,-0.9, text1) 94 | end 95 | if exist('text2','var') 96 | text(0,-1.1, text2) 97 | end 98 | 99 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-1.4 1.4]); 100 | axis off; 101 | drawnow; 102 | \end{lstlisting} 103 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_cdp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_cdp.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw a trajectory of the cart-double-pendulum system and the predicted uncertainties around the tips of the pendulums 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | 33 | \begin{lstlisting} 34 | for r = 1:size(xx,1) 35 | if exist('j','var') && ~isempty(M{j}) 36 | draw_cdp(latent{j}(r,1), latent{j}(r,5), latent{j}(r,6), ... 37 | latent{j}(r,end), cost, M{j}(:,r), Sigma{j}(:,:,r), ... 38 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 39 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 40 | ' sec']); 41 | else 42 | draw_cdp(latent{jj}(r,1), latent{jj}(r,5), latent{jj}(r,6), ... 43 | latent{jj}(r,end), cost, [], [], ... 44 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 45 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 46 | ' sec']); 47 | end 48 | pause(dt); 49 | end 50 | \end{lstlisting} 51 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_cp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_cp.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw the most recent trajectory of the cart-pole system together with the predicted uncertainties around the tip of the pendulum 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-05-20 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tip of the pendulum 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | 33 | \begin{lstlisting} 34 | % Loop over states in trajectory (= time steps) 35 | for r = 1:size(xx,1) 36 | if exist('j','var') && ~isempty(M{j}) 37 | draw_cp(latent{j}(r,1), latent{j}(r,4), latent{j}(r,end), cost, ... 38 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 39 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 40 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 41 | else 42 | draw_cp(latent{jj}(r,1), latent{jj}(r,4), latent{jj}(r,end), cost, ... 43 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 44 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 45 | ' sec']) 46 | end 47 | pause(dt); 48 | end 49 | \end{lstlisting} 50 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_dp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_dp.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw a trajectory of theobserved double-pendulum system and the predicted uncertainties around the tips of the pendulums 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | \begin{par} 33 | Loop over states in trajectory 34 | \end{par} \vspace{1em} 35 | 36 | \begin{lstlisting} 37 | for r = 1:size(xx,1) 38 | cost.t = r; 39 | if exist('j','var') && ~isempty(M{j}) 40 | draw_dp(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end-1), ... 41 | latent{j}(r,end), cost, ... 42 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 43 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 44 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 45 | else 46 | draw_dp(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end-1), ... 47 | latent{jj}(r,end), cost, ... 48 | ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ... 49 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 50 | ' sec']) 51 | end 52 | pause(dt); 53 | end 54 | \end{lstlisting} 55 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_pendubot.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_pendubot} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw the most recent trajectory of the Pendubot system and the predicted uncertainties around the tips of the pendulums 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory and the predicted means and covariances of the Cartesian coordinates of the tips of both pendulums 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | 33 | \begin{lstlisting} 34 | % Loop over states in trajectory 35 | for r = 1:size(xx,1) 36 | cost.t = r; 37 | if exist('j','var') && ~isempty(M{j}) 38 | draw_pendubot(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end), cost, ... 39 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 40 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 41 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 42 | else 43 | draw_pendubot(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end), cost, ... 44 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 45 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 46 | ' sec']) 47 | end 48 | pause(dt); 49 | end 50 | \end{lstlisting} 51 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_pendulum.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_pendulum.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw a trajectory of the most recent pendulum trajectory 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | 33 | \begin{lstlisting} 34 | % Loop over states in trajectory 35 | for r = 1:size(xx,1) 36 | cost.t = r; 37 | if exist('j','var') && ~isempty(M{j}) 38 | draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost, ... 39 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 40 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 41 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 42 | else 43 | draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost, ... 44 | ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ... 45 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 46 | ' sec']) 47 | end 48 | pause(dt); 49 | end 50 | \end{lstlisting} 51 | -------------------------------------------------------------------------------- /doc/tex/draw_rollout_unicycle.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{draw\_rollout\_unicycle.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to draw a rollout of the unicycle 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-04-04 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item For each time step, plot the observed trajectory, the applied torques, and the incurred cost 27 | \end{enumerate} 28 | 29 | 30 | \subsection*{Code} 31 | 32 | 33 | \begin{lstlisting} 34 | if j > J 35 | 36 | draw_unicycle(xx, plant, plant.dt/10, cost, ... 37 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 38 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 39 | ' sec']); 40 | else 41 | draw_unicycle(xx, plant, plant.dt/10, cost, ... 42 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 43 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 44 | ' sec']); 45 | end 46 | \end{lstlisting} 47 | -------------------------------------------------------------------------------- /doc/tex/dynamics_cp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{dynamics\_cp.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Implements ths ODE for simulating the cart-pole dynamics. 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function dz = dynamics_cp(t, z, f)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | 21 | \begin{lstlisting} 22 | % t current time step (called from ODE solver) 23 | % z state [4 x 1] 24 | % f (optional): force f(t) 25 | % 26 | % *Output arguments:* 27 | % 28 | % dz if 3 input arguments: state derivative wrt time 29 | % if only 2 input arguments: total mechanical energy 30 | % 31 | % 32 | % Note: It is assumed that the state variables are of the following order: 33 | % x: [m] position of cart 34 | % dx: [m/s] velocity of cart 35 | % dtheta: [rad/s] angular velocity 36 | % theta: [rad] angle 37 | % 38 | % 39 | % A detailed derivation of the dynamics can be found in: 40 | % 41 | % M.P. Deisenroth: 42 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 43 | % KIT Scientific Publishing, 2010. 44 | % 45 | % Copyright (C) 2008-2013 by 46 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 47 | % 48 | % Last modified: 2013-03-08 49 | 50 | function dz = dynamics_cp(t,z,f) 51 | \end{lstlisting} 52 | 53 | 54 | \subsection*{Code} 55 | 56 | 57 | \begin{lstlisting} 58 | l = 0.5; % [m] length of pendulum 59 | m = 0.5; % [kg] mass of pendulum 60 | M = 0.5; % [kg] mass of cart 61 | b = 0.1; % [N/m/s] coefficient of friction between cart and ground 62 | g = 9.82; % [m/s^2] acceleration of gravity 63 | 64 | if nargin==3 65 | dz = zeros(4,1); 66 | dz(1) = z(2); 67 | dz(2) = ( 2*m*l*z(3)^2*sin(z(4)) + 3*m*g*sin(z(4))*cos(z(4)) ... 68 | + 4*f(t) - 4*b*z(2) )/( 4*(M+m)-3*m*cos(z(4))^2 ); 69 | dz(3) = (-3*m*l*z(3)^2*sin(z(4))*cos(z(4)) - 6*(M+m)*g*sin(z(4)) ... 70 | - 6*(f(t)-b*z(2))*cos(z(4)) )/( 4*l*(m+M)-3*m*l*cos(z(4))^2 ); 71 | dz(4) = z(3); 72 | else 73 | dz = (M+m)*z(2)^2/2 + 1/6*m*l^2*z(3)^2 + m*l*(z(2)*z(3)-g)*cos(z(4))/2; 74 | end 75 | \end{lstlisting} 76 | -------------------------------------------------------------------------------- /doc/tex/dynamics_pendulum.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{dynamics\_pendulum.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Implements ths ODE for simulating the pendulum dynamics, where an input torque f can be applied 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function dz = dynamics_pendulum(t,z,u)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | 21 | \begin{lstlisting} 22 | % t current time step (called from ODE solver) 23 | % z state [2 x 1] 24 | % u (optional): torque f(t) applied to pendulum 25 | % 26 | % *Output arguments:* 27 | % 28 | % dz if 3 input arguments: state derivative wrt time 29 | % 30 | % Note: It is assumed that the state variables are of the following order: 31 | % dtheta: [rad/s] angular velocity of pendulum 32 | % theta: [rad] angle of pendulum 33 | % 34 | % A detailed derivation of the dynamics can be found in: 35 | % 36 | % M.P. Deisenroth: 37 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 38 | % KIT Scientific Publishing, 2010. 39 | % 40 | % 41 | % Copyright (C) 2008-2013 by 42 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 43 | % 44 | % Last modified: 2013-03-18 45 | 46 | function dz = dynamics_pendulum(t,z,u) 47 | \end{lstlisting} 48 | 49 | 50 | \subsection*{Code} 51 | 52 | 53 | \begin{lstlisting} 54 | l = 1; % [m] length of pendulum 55 | m = 1; % [kg] mass of pendulum 56 | g = 9.82; % [m/s^2] acceleration of gravity 57 | b = 0.01; % [s*Nm/rad] friction coefficient 58 | 59 | dz = zeros(2,1); 60 | dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3); 61 | dz(2) = z(1); 62 | \end{lstlisting} 63 | -------------------------------------------------------------------------------- /doc/tex/gaussian.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{gaussian.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Generate n samples from a Gaussian \$p(x)=\ensuremath{\backslash}mathcal N(m,S). Sampling is based on the Cholesky factorization of the covariance matrix S 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function x = gaussian(m, S, n)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | \begin{verbatim}m mean of Gaussian [D x 1] 21 | S covariance matrix of Gaussian [D x D] 22 | n (optional) number of samples; default: n=1\end{verbatim} 23 | \begin{par} 24 | \textbf{Output arguments:} 25 | \end{par} \vspace{1em} 26 | \begin{verbatim}x matrix of samples from Gaussian [D x n]\end{verbatim} 27 | \begin{par} 28 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 29 | \end{par} \vspace{1em} 30 | \begin{par} 31 | Last modified: 2013-03-21 32 | \end{par} \vspace{1em} 33 | 34 | \begin{lstlisting} 35 | function x = gaussian(m, S, n) 36 | \end{lstlisting} 37 | 38 | 39 | \subsection*{Code} 40 | 41 | 42 | \begin{lstlisting} 43 | if nargin < 3; n = 1; end 44 | 45 | x = bsxfun(@plus, m(:), chol(S)'*randn(size(S,2),n)); 46 | \end{lstlisting} 47 | -------------------------------------------------------------------------------- /doc/tex/getPlotDistr_cp.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{getPlotDistr\_cp.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Compute means and covariances of the Cartesian coordinates of the tips both the inner and outer pendulum assuming that the joint state $x$ of the cart-double-pendulum system is Gaussian, i.e., $x\sim N(m, s)$ 13 | \end{par} \vspace{1em} 14 | 15 | \begin{verbatim} function [M, S] = getPlotDistr_cp(m, s, ell)\end{verbatim} 16 | \begin{par} 17 | \textbf{Input arguments:} 18 | \end{par} \vspace{1em} 19 | \begin{verbatim}m mean of full state [4 x 1] 20 | s covariance of full state [4 x 4] 21 | ell length of pendulum\end{verbatim} 22 | \begin{verbatim}Note: this code assumes that the following order of the state: 23 | 1: cart pos., 24 | 2: cart vel., 25 | 3: pendulum angular velocity, 26 | 4: pendulum angle\end{verbatim} 27 | \begin{par} 28 | \textbf{Output arguments:} 29 | \end{par} \vspace{1em} 30 | \begin{verbatim}M mean of tip of pendulum [2 x 1] 31 | S covariance of tip of pendulum [2 x 2]\end{verbatim} 32 | \begin{par} 33 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 34 | \end{par} \vspace{1em} 35 | \begin{par} 36 | Last modification: 2013-03-27 37 | \end{par} \vspace{1em} 38 | 39 | 40 | \subsection*{High-Level Steps} 41 | 42 | \begin{enumerate} 43 | \setlength{\itemsep}{-1ex} 44 | \item Augment input distribution to complex angle representation 45 | \item Compute means of tips of pendulums (in Cartesian coordinates) 46 | \item Compute covariances of tips of pendulums (in Cartesian coordinates) 47 | \end{enumerate} 48 | 49 | \begin{lstlisting} 50 | function [M, S] = getPlotDistr_cp(m, s, ell) 51 | \end{lstlisting} 52 | 53 | 54 | \subsection*{Code} 55 | 56 | 57 | \begin{lstlisting} 58 | % 1. Augment input distribution to complex angle representation 59 | [m1 s1 c1] = gTrig(m,s,4,ell); % map input distribution through sin/cos 60 | m1 = [m; m1]; % mean of joint 61 | c1 = s*c1; % cross-covariance between input and prediction 62 | s1 = [s c1; c1' s1]; % covariance of joint 63 | 64 | % 2. Compute means of tips of pendulums (in Cartesian coordinates) 65 | M = [m1(1)+m1(5); -m1(6)]; 66 | 67 | % 3. Compute covariances of tips of pendulums (in Cartesian coordinates) 68 | s11 = s1(1,1) + s1(5,5) + s1(1,5) + s1(5,1); % x+l sin(theta) 69 | s22 = s1(6,6); % -l*cos(theta) 70 | s12 = -(s1(1,6)+s1(5,6)); % cov(x+l*sin(th), -l*cos(th) 71 | 72 | S = [s11 s12; s12' s22]; 73 | try 74 | chol(S); 75 | catch 76 | warning('matrix S not pos.def. (getPlotDistr)'); 77 | S = S + (1e-6 - min(eig(S)))*eye(2); 78 | end 79 | \end{lstlisting} 80 | -------------------------------------------------------------------------------- /doc/tex/lossLin.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{lossLin.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Function to compute the expected loss and its derivatives, given an input distribution, under a linear loss function: L = a\^{}T(x - b). Note, this loss function can return negative loss. 13 | \end{par} \vspace{1em} 14 | \begin{verbatim}[L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s)\end{verbatim} 15 | \begin{par} 16 | \textbf{Input arguments:} 17 | \end{par} \vspace{1em} 18 | 19 | \begin{verbatim} cost 20 | .a gradient of linear loss function, [D x 1] 21 | .b targets, the value of x for which there is zero loss [D x 1] 22 | m mean of input distribution [D x 1] 23 | s covariance matrix of input distribution [D x D]\end{verbatim} 24 | \begin{par} 25 | \textbf{Output arguments:} 26 | \end{par} \vspace{1em} 27 | \begin{verbatim}L expected loss [1 x 1 ] 28 | dLdm derivative of L wrt input mean [1 x D ] 29 | dLds derivative of L wrt input covariance [1 x D^2] 30 | S variance of loss [1 x 1 ] 31 | dSdm derivative of S wrt input mean [1 x D ] 32 | dSds derivative of S wrt input covariance [1 x D^2] 33 | C inv(S) times input-output covariance [D x 1 ] 34 | dCdm derivative of C wrt input mean [D x D ] 35 | dCds derivative of C wrt input covariance [D x D^2]\end{verbatim} 36 | \begin{par} 37 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 38 | \end{par} \vspace{1em} 39 | \begin{par} 40 | Last modified: 2013-03-05 41 | \end{par} \vspace{1em} 42 | 43 | 44 | \subsection*{High-Level Steps} 45 | 46 | \begin{enumerate} 47 | \setlength{\itemsep}{-1ex} 48 | \item Expected cost 49 | \item Variance of cost 50 | \item inv(s)* cov(x,L) 51 | \end{enumerate} 52 | 53 | \begin{lstlisting} 54 | function [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s) 55 | \end{lstlisting} 56 | 57 | 58 | \subsection*{Code} 59 | 60 | 61 | \begin{lstlisting} 62 | a = cost.a(:); b = cost.b(:); D = length(m); 63 | if length(a) ~= D || length(b) ~= D; 64 | error('a or b not the same length as m'); end 65 | 66 | % 1) Mean 67 | L = a'*(m - b); 68 | dLdm = a'; 69 | dLds = zeros(D); 70 | 71 | % 2) Variance 72 | S = a'*s*a; 73 | dSdm = zeros(1,D); 74 | dSds = a*a'; 75 | 76 | % 3) inv(s) * input-output covariance cov(x,L) 77 | C = a; 78 | dCdm = zeros(D); dCds = zeros(D,D^2); 79 | \end{lstlisting} 80 | -------------------------------------------------------------------------------- /doc/tex/maha.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{maha.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Point-wise squared Mahalanobis distance (a-b)*Q*(a-b)'. Vectors are row-vectors 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} function K = maha(a, b, Q)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | \begin{verbatim}a matrix containing n row vectors [n x D] 21 | b matrix containing n row vectors [n x D] 22 | Q weight matrix. Default: eye(D) [D x D]\end{verbatim} 23 | \begin{par} 24 | \textbf{Output arguments:} K point-wise squared distances [n x n] 25 | \end{par} \vspace{1em} 26 | \begin{par} 27 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 28 | \end{par} \vspace{1em} 29 | \begin{par} 30 | Last modified: 2013-03-21 31 | \end{par} \vspace{1em} 32 | 33 | \begin{lstlisting} 34 | function K = maha(a, b, Q) 35 | \end{lstlisting} 36 | 37 | 38 | \subsection*{Code} 39 | 40 | 41 | \begin{lstlisting} 42 | if nargin == 2 % assume unit Q 43 | K = bsxfun(@plus,sum(a.*a,2),sum(b.*b,2)')-2*a*b'; 44 | else 45 | aQ = a*Q; K = bsxfun(@plus,sum(aQ.*a,2),sum(b*Q.*b,2)')-2*aQ*b'; 46 | end 47 | \end{lstlisting} 48 | -------------------------------------------------------------------------------- /doc/tex/pendubot_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{pendubot\_learn.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for the pendubot swingup (a double pendulum with the inner joint actuated) 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_pendubot; % load scenario-specific settings 39 | basename = 'pendubot_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_pendubot; 49 | end 50 | end 51 | 52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 54 | 55 | % 3. Controlled learning (N iterations) 56 | for j = 1:N 57 | trainDynModel; % train (GP) dynamics model 58 | learnPolicy; % learn policy 59 | applyController; % apply controller to system 60 | disp(['controlled trial # ' num2str(j)]); 61 | if plotting.verbosity > 0; % visualization of trajectory 62 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 63 | draw_rollout_pendubot; 64 | end 65 | end 66 | \end{lstlisting} 67 | -------------------------------------------------------------------------------- /doc/tex/pendulum_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{pendulum\_learn.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for the pendulum swingup 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_pendulum; % load scenario-specific settings 39 | basename = 'pendulum_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_pendulum; 49 | end 50 | end 51 | 52 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 53 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 54 | 55 | % 3. Controlled learning (N iterations) 56 | for j = 1:N 57 | trainDynModel; % train (GP) dynamics model 58 | learnPolicy; % learn policy 59 | applyController; % apply controller to system 60 | disp(['controlled trial # ' num2str(j)]); 61 | if plotting.verbosity > 0; % visualization of trajectory 62 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 63 | draw_rollout_pendulum; 64 | end 65 | end 66 | \end{lstlisting} 67 | -------------------------------------------------------------------------------- /doc/tex/pred.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{pred.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Compute predictive (marginal) distributions of a trajecory 13 | \end{par} \vspace{1em} 14 | \begin{verbatim}[M S] = pred(policy, plant, dynmodel, m, s, H)\end{verbatim} 15 | \begin{par} 16 | \textbf{Input arguments:} 17 | \end{par} \vspace{1em} 18 | \begin{verbatim}policy policy structure 19 | plant plant structure 20 | dynmodel dynamics model structure 21 | m D-by-1 mean of the initial state distribution 22 | s D-by-D covariance of the initial state distribution 23 | H length of prediction horizon\end{verbatim} 24 | \begin{par} 25 | \textbf{Output arguments:} 26 | \end{par} \vspace{1em} 27 | \begin{verbatim}M D-by-(H+1) sequence of predicted mean vectors 28 | S D-by-D-(H+1) sequence of predicted covariance 29 | matrices\end{verbatim} 30 | \begin{par} 31 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 32 | \end{par} \vspace{1em} 33 | \begin{par} 34 | Last modified: 2013-01-23 35 | \end{par} \vspace{1em} 36 | 37 | 38 | \subsection*{High-Level Steps} 39 | 40 | \begin{enumerate} 41 | \setlength{\itemsep}{-1ex} 42 | \item Predict successor state distribution 43 | \end{enumerate} 44 | 45 | \begin{lstlisting} 46 | function [M S] = pred(policy, plant, dynmodel, m, s, H) 47 | \end{lstlisting} 48 | 49 | 50 | \subsection*{Code} 51 | 52 | 53 | \begin{lstlisting} 54 | D = length(m); S = zeros(D,D,H+1); M = zeros(D,H+1); 55 | M(:,1) = m; S(:,:,1) = s; 56 | for i = 1:H 57 | [m s] = plant.prop(m, s, plant, dynmodel, policy); 58 | M(:,i+1) = m(end-D+1:end); 59 | S(:,:,i+1) = s(end-D+1:end,end-D+1:end); 60 | end 61 | \end{lstlisting} 62 | -------------------------------------------------------------------------------- /doc/tex/predcost.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{predcost.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Compute trajectory of expected costs for a given set of state distributions 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | inputs: m0 mean of states, D-by-1 or D-by-K for multiple means S covariance matrix of state distributions dynmodel (struct) for dynamics model (GP) plant (struct) of system parameters policy (struct) for policy to be implemented cost (struct) of cost function parameters H length of optimization horizon 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | outputs: L expected cumulative (discounted) cost s standard deviation of cost 19 | \end{par} \vspace{1em} 20 | \begin{par} 21 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 22 | \end{par} \vspace{1em} 23 | \begin{par} 24 | Last modified: 2012-01-12 25 | \end{par} \vspace{1em} 26 | 27 | 28 | \subsection*{High-Level Steps} 29 | 30 | \begin{enumerate} 31 | \setlength{\itemsep}{-1ex} 32 | \item Predict successor state distribution 33 | \item Predict corresponding cost distribution 34 | \end{enumerate} 35 | 36 | \begin{lstlisting} 37 | function [L, s] = predcost(m0, S, dynmodel, plant, policy, cost, H) 38 | \end{lstlisting} 39 | 40 | 41 | \subsection*{Code} 42 | 43 | 44 | \begin{lstlisting} 45 | L = zeros(size(m0,2),H); s = zeros(size(m0,2),H); 46 | for k = 1:size(m0,2); 47 | m = m0(:,k); 48 | for t = 1:H 49 | [m, S] = plant.prop(m, S, plant, dynmodel, policy); % get next state 50 | [L(k,t), d1, d2, v] = cost.fcn(cost, m, S); % compute cost 51 | s(k,t) = sqrt(v); 52 | end 53 | end 54 | L = mean(L,1); s = mean(s,1); 55 | \end{lstlisting} 56 | -------------------------------------------------------------------------------- /doc/tex/reward.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{reward.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Compute expectation, variance, and their derivatives of an exponentiated negative quadratic cost $\exp(-(x-z)'W(x-z)/2)$, where $x\sim\mathcal N(m,S)$ 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | \textbf{Input arguments:} 16 | \end{par} \vspace{1em} 17 | \begin{verbatim}m: D-by-1 mean of the state distribution 18 | S: D-by-D covariance matrix of the state distribution 19 | z: D-by-1 target state 20 | W: D-by-D weight matrix\end{verbatim} 21 | \begin{par} 22 | \textbf{Output arguments:} 23 | \end{par} \vspace{1em} 24 | \begin{verbatim}muR: 1-by-1 expected reward 25 | dmuRdm: 1-by-D derivative of expected reward wrt input mean 26 | dmuRdS: D-by-D derivative of expected reward wrt input covariance matrix 27 | sR: 1-by-1 variance of reward 28 | dsRdm: 1-by-D derivative of variance of reward wrt input mean 29 | dsRdS: D-by-D derivative reward variance wrt input covariance matrix\end{verbatim} 30 | \begin{par} 31 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 32 | \end{par} \vspace{1em} 33 | \begin{par} 34 | Last modification: 2013-01-20 35 | \end{par} \vspace{1em} 36 | 37 | 38 | \subsection*{High-Level Steps} 39 | 40 | \begin{enumerate} 41 | \setlength{\itemsep}{-1ex} 42 | \item Compute expected reward 43 | \item Compute the derivatives of the expected reward with respect to the input distribution (optional) 44 | \item Compute variance of reward 45 | \item Compute the derivatives of the variance of the reward with respect to the input distribution (optional) 46 | \end{enumerate} 47 | 48 | \begin{lstlisting} 49 | function [muR, dmuRdm, dmuRdS, sR, dsRdm, dsRdS] = reward(m, S, z, W) 50 | \end{lstlisting} 51 | 52 | 53 | \subsection*{Code} 54 | 55 | 56 | \begin{lstlisting} 57 | % some precomputations 58 | D = length(m); % get state dimension 59 | SW = S*W; 60 | iSpW = W/(eye(D)+SW); 61 | 62 | % 1. expected reward 63 | muR = exp(-(m-z)'*iSpW*(m-z)/2)/sqrt(det(eye(D)+SW)); 64 | 65 | % 2. derivatives of expected reward 66 | if nargout > 1 67 | dmuRdm = -muR*(m-z)'*iSpW; % wrt input mean 68 | dmuRdS = muR*(iSpW*(m-z)*(m-z)'-eye(D))*iSpW/2; % wrt input covariance matrix 69 | end 70 | 71 | % 3. reward variance 72 | if nargout > 3 73 | i2SpW = W/(eye(D)+2*SW); 74 | r2 = exp(-(m-z)'*i2SpW*(m-z))/sqrt(det(eye(D)+2*SW)); 75 | sR = r2 - muR^2; 76 | if sR < 1e-12; sR=0; end % for numerical reasons 77 | end 78 | 79 | % 4. derivatives of reward variance 80 | if nargout > 4 81 | % wrt input mean 82 | dsRdm = -2*r2*(m-z)'*i2SpW-2*muR*dmuRdm; 83 | % wrt input covariance matrix 84 | dsRdS = r2*(2*i2SpW*(m-z)*(m-z)'-eye(D))*i2SpW-2*muR*dmuRdS; 85 | end 86 | \end{lstlisting} 87 | -------------------------------------------------------------------------------- /doc/tex/rewrap.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{rewrap.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Map the numerical elements in the vector $v$ onto the variables $s$, which can be of any type. The number of numerical elements must match; on exit, $v$ should be empty. Non-numerical entries are just copied. See also the reverse unwrap.m. 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} [s v] = rewrap(s, v)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | \begin{verbatim}s structure, cell, or numeric values 21 | v structure, cell, or numeric values\end{verbatim} 22 | \begin{par} 23 | \textbf{Output arguments:} 24 | \end{par} \vspace{1em} 25 | \begin{verbatim}s structure, cell, or numeric values 26 | v [empty]\end{verbatim} 27 | \begin{par} 28 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 29 | \end{par} \vspace{1em} 30 | \begin{par} 31 | Last modified: 2013-03-25 32 | \end{par} \vspace{1em} 33 | 34 | \begin{lstlisting} 35 | function [s v] = rewrap(s, v) 36 | \end{lstlisting} 37 | 38 | 39 | \subsection*{Code} 40 | 41 | 42 | \begin{lstlisting} 43 | if isnumeric(s) 44 | if numel(v) < numel(s) 45 | error('The vector for conversion contains too few elements') 46 | end 47 | s = reshape(v(1:numel(s)), size(s)); % numeric values are reshaped 48 | v = v(numel(s)+1:end); % remaining arguments passed on 49 | elseif isstruct(s) 50 | [s p] = orderfields(s); p(p) = 1:numel(p); % alphabetize, store ordering 51 | [t v] = rewrap(struct2cell(s), v); % convert to cell, recurse 52 | s = orderfields(cell2struct(t,fieldnames(s),1),p); % conv to struct, reorder 53 | elseif iscell(s) 54 | for i = 1:numel(s) % cell array elements are handled sequentially 55 | [s{i} v] = rewrap(s{i}, v); 56 | end 57 | end 58 | \end{lstlisting} 59 | -------------------------------------------------------------------------------- /doc/tex/solve_chol.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{solve\_chol.m} 10 | 11 | \begin{par} 12 | solve\_chol - solve linear equations from the Cholesky factorization. Solve A*X = B for X, where A is square, symmetric, positive definite. The input to the function is R the Cholesky decomposition of A and the matrix B. Example: X = solve\_chol(chol(A),B); 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | NOTE: The program code is written in the C language for efficiency and is contained in the file solve\_chol.c, and should be compiled using matlabs mex facility. However, this file also contains a (less efficient) matlab implementation, supplied only as a help to people unfamiliar with mex. If the C code has been properly compiled and is avaiable, it automatically takes precendence over the matlab code in this file. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08. 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{Code} 23 | 24 | 25 | \begin{lstlisting} 26 | function x = solve_chol(A, B) 27 | 28 | if nargin ~= 2 | nargout > 1 29 | error('Wrong number of arguments.'); 30 | end 31 | 32 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1) 33 | error('Wrong sizes of matrix arguments.'); 34 | end 35 | 36 | x = A\(A'\B); 37 | \end{lstlisting} 38 | -------------------------------------------------------------------------------- /doc/tex/sq_dist.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{sq\_dist.m} 11 | 12 | \begin{par} 13 | sq\_dist - a function to compute a matrix of all pairwise squared distances between two sets of vectors, stored in the columns of the two matrices, a (of size D by n) and b (of size D by m). If only a single argument is given or the second matrix is empty, the missing matrix is taken to be identical to the first. 14 | \end{par} \vspace{1em} 15 | \begin{par} 16 | Special functionality: If an optional third matrix argument Q is given, it must be of size n by m, and in this case a vector of the traces of the product of Q' and the coordinatewise squared distances is returned. 17 | \end{par} \vspace{1em} 18 | \begin{par} 19 | NOTE: The program code is written in the C language for efficiency and is contained in the file sq\_dist.c, and should be compiled using matlabs mex facility. However, this file also contains a (less efficient) matlab implementation, supplied only as a help to people unfamiliar with mex. If the C code has been properly compiled and is avaiable, it automatically takes precendence over the matlab code in this file. 20 | \end{par} \vspace{1em} 21 | \begin{par} 22 | Usage: C = sq\_dist(a, b) or: C = sq\_dist(a) or equiv.: C = sq\_dist(a, []) or: c = sq\_dist(a, b, Q) where the b matrix may be empty. 23 | \end{par} \vspace{1em} 24 | \begin{par} 25 | where a is of size D by n, b is of size D by m (or empty), C and Q are of size n by m and c is of size D by 1. 26 | \end{par} \vspace{1em} 27 | \begin{par} 28 | Copyright (c) 2003, 2004, 2005 and 2006 Carl Edward Rasmussen. 2006-03-09. 29 | \end{par} \vspace{1em} 30 | 31 | \begin{lstlisting} 32 | function C = sq_dist(a, b, Q); 33 | \end{lstlisting} 34 | 35 | 36 | \subsection*{Code} 37 | 38 | 39 | \begin{lstlisting} 40 | if nargin < 1 | nargin > 3 | nargout > 1 41 | error('Wrong number of arguments.'); 42 | end 43 | 44 | if nargin == 1 | isempty(b) % input arguments are taken to be 45 | b = a; % identical if b is missing or empty 46 | end 47 | 48 | [D, n] = size(a); 49 | [d, m] = size(b); 50 | if d ~= D 51 | error('Error: column lengths must agree.'); 52 | end 53 | 54 | if nargin < 3 55 | C = zeros(n,m); 56 | for d = 1:D 57 | C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2; 58 | end 59 | % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to 60 | % replace the 3 lines above; it would be faster, but numerically less stable. 61 | else 62 | if [n m] == size(Q) 63 | C = zeros(D,1); 64 | for d = 1:D 65 | C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q)); 66 | end 67 | else 68 | error('Third argument has wrong size.'); 69 | end 70 | end 71 | \end{lstlisting} 72 | -------------------------------------------------------------------------------- /doc/tex/trainDynModel.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{trainDynModel.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn the dynamics model 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modification: 2013-05-20 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Extract states and controls from x-matrix 27 | \item Define the training inputs and targets of the GP 28 | \item Train the GP 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Train GP dynamics model 37 | Du = length(policy.maxU); Da = length(plant.angi); % no. of ctrl and angles 38 | xaug = [x(:,dyno) x(:,end-Du-2*Da+1:end-Du)]; % x augmented with angles 39 | dynmodel.inputs = [xaug(:,dyni) x(:,end-Du+1:end)]; % use dyni and ctrl 40 | dynmodel.targets = y(:,dyno); 41 | dynmodel.targets(:,difi) = dynmodel.targets(:,difi) - x(:,dyno(difi)); 42 | 43 | dynmodel = dynmodel.train(dynmodel, plant, trainOpt); % train dynamics GP 44 | 45 | % display some hyperparameters 46 | Xh = dynmodel.hyp; 47 | % noise standard deviations 48 | disp(['Learned noise std: ' num2str(exp(Xh(end,:)))]); 49 | % signal-to-noise ratios (values > 500 can cause numerical problems) 50 | disp(['SNRs : ' num2str(exp(Xh(end-1,:)-Xh(end,:)))]); 51 | \end{lstlisting} 52 | -------------------------------------------------------------------------------- /doc/tex/unicycle_learn.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | \subsection{unicycle\_learn.m} 10 | 11 | \begin{par} 12 | \textbf{Summary:} Script to learn a controller for unicycling 13 | \end{par} \vspace{1em} 14 | \begin{par} 15 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 16 | \end{par} \vspace{1em} 17 | \begin{par} 18 | Last modified: 2013-03-27 19 | \end{par} \vspace{1em} 20 | 21 | 22 | \subsection*{High-Level Steps} 23 | 24 | \begin{enumerate} 25 | \setlength{\itemsep}{-1ex} 26 | \item Load parameters 27 | \item Create J initial trajectories by applying random controls 28 | \item Controlled learning (train dynamics model, policy learning, policy application) 29 | \end{enumerate} 30 | 31 | 32 | \subsection*{Code} 33 | 34 | 35 | \begin{lstlisting} 36 | % 1. Initialization 37 | clear all; close all; 38 | settings_unicycle; % load scenario-specific settings 39 | basename = 'unicycle_'; % filename used for saving data 40 | 41 | % 2. Initial J random rollouts 42 | for jj = 1:J % get the first observations 43 | [xx, yy, realCost{jj}, latent{jj}] = ... 44 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU/5), H, plant, cost); 45 | x = [x; xx]; y = [y; yy]; 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_unicycle; 49 | end 50 | end 51 | 52 | z(odei,:) = bsxfun(@plus, mu0, chol(S0)'*randn(length(odei),1000)); % compute 53 | for i = 1:size(z,2), z(augi,i) = plant.augment(z(:,i)'); end % the distribution 54 | mu0Sim = mean(z,2); S0Sim = cov(z'); % of augmented start state by MCMC 55 | mu0Sim(odei) = mu0; S0Sim(odei,odei) = S0; % Put in known correct values 56 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); clear z i; 57 | 58 | % 3. Controlled learning (N iterations) 59 | for j = 1:N 60 | trainDynModel; 61 | learnPolicy; 62 | applyController; 63 | disp(['controlled trial # ' num2str(j)]); 64 | if plotting.verbosity > 0; % visualization of trajectory 65 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 66 | draw_rollout_unicycle; 67 | end 68 | end 69 | \end{lstlisting} 70 | -------------------------------------------------------------------------------- /doc/tex/unwrap.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{unwrap.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Extract the numerical values from $s$ into the column vector $v$. The variable \$sS can be of any type, including struct and cell array. Non-numerical elements are ignored. See also the reverse rewrap.m. 14 | \end{par} \vspace{1em} 15 | 16 | \begin{verbatim} v = unwrap(s)\end{verbatim} 17 | \begin{par} 18 | \textbf{Input arguments:} 19 | \end{par} \vspace{1em} 20 | \begin{verbatim}s structure, cell, or numeric values\end{verbatim} 21 | \begin{par} 22 | \textbf{Output arguments:} 23 | \end{par} \vspace{1em} 24 | \begin{verbatim}v structure, cell, or numeric values\end{verbatim} 25 | \begin{par} 26 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 27 | \end{par} \vspace{1em} 28 | \begin{par} 29 | Last modified: 2013-03-25 30 | \end{par} \vspace{1em} 31 | 32 | \begin{lstlisting} 33 | function v = unwrap(s) 34 | \end{lstlisting} 35 | 36 | 37 | \subsection*{Code} 38 | 39 | 40 | \begin{lstlisting} 41 | v = []; 42 | if isnumeric(s) 43 | v = s(:); % numeric values are recast to column vector 44 | elseif isstruct(s) 45 | v = unwrap(struct2cell(orderfields(s))); % alphabetize, conv to cell, recurse 46 | elseif iscell(s) 47 | for i = 1:numel(s) % cell array elements are handled sequentially 48 | v = [v; unwrap(s{i})]; 49 | end 50 | end 51 | \end{lstlisting} 52 | -------------------------------------------------------------------------------- /doc/tex/valueT.tex: -------------------------------------------------------------------------------- 1 | 2 | % This LaTeX was auto-generated from an M-file by MATLAB. 3 | % To make changes, update the M-file and republish this document. 4 | 5 | 6 | 7 | 8 | 9 | 10 | \subsection*{valueT.m} 11 | 12 | \begin{par} 13 | \textbf{Summary:} Test derivatives of the propagate function, which computes the mean and the variance of the successor state distribution, assuming that the current state is Gaussian distributed with mean m and covariance matrix s. 14 | \end{par} \vspace{1em} 15 | \begin{verbatim}[d dy dh] = valueT(p, delta, m, s, dynmodel, policy, plant, cost, H)\end{verbatim} 16 | \begin{par} 17 | \textbf{Input arguments:} 18 | \end{par} \vspace{1em} 19 | \begin{verbatim}p policy parameters (can be a structure) 20 | .\ensuremath{<}\ensuremath{>} fields that contain the policy parameters (nothing else) 21 | m mean of the input distribution 22 | s covariance of the input distribution 23 | dynmodel GP dynamics model (structure) 24 | policy policy structure 25 | plant plant structure 26 | cost cost structure 27 | H prediction horizon 28 | delta (optional) finite difference parameter. Default: 1e-4\end{verbatim} 29 | \begin{par} 30 | \textbf{Output arguments:} 31 | \end{par} \vspace{1em} 32 | \begin{verbatim}dd relative error of analytical vs. finite difference gradient 33 | dy analytical gradient 34 | dh finite difference gradient\end{verbatim} 35 | \begin{par} 36 | Copyright (C) 2008-2013 by Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 37 | \end{par} \vspace{1em} 38 | \begin{par} 39 | Last modified: 2013-03-21 40 | \end{par} \vspace{1em} 41 | 42 | \begin{lstlisting} 43 | function [d dy dh] = valueT(p, m, s, dynmodel, policy, plant, cost, H, delta) 44 | \end{lstlisting} 45 | 46 | 47 | \subsection*{Code} 48 | 49 | 50 | \begin{lstlisting} 51 | if nargin < 9; delta = 1e-4; end 52 | if nargin < 8; H = 4; end 53 | 54 | % call checkgrad directly 55 | [d dy dh] = checkgrad('value',p,delta,m,s,dynmodel,policy,plant,cost,H); 56 | \end{lstlisting} 57 | -------------------------------------------------------------------------------- /gp/covNoise.m: -------------------------------------------------------------------------------- 1 | %% covNoise.m 2 | % Independent covariance function, ie "white noise", with specified variance. 3 | % The covariance function is specified as: 4 | % 5 | % k(x^p,x^q) = s2 * \delta(p,q) 6 | % 7 | % where s2 is the noise variance and \delta(p,q) is a Kronecker delta function 8 | % which is 1 iff p=q and zero otherwise. The hyperparameter is 9 | % 10 | % logtheta = [ log(sqrt(s2)) ] 11 | % 12 | % For more help on design of covariance functions, try "help covFunctions". 13 | % 14 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-24. 15 | 16 | function [A, B] = covNoise(logtheta, x, z) 17 | %% Code 18 | 19 | if nargin == 0, A = '1'; return; end % report number of parameters 20 | 21 | s2 = exp(2*logtheta); % noise variance 22 | 23 | if nargin == 2 % compute covariance matrix 24 | A = s2*eye(size(x,1)); 25 | elseif nargout == 2 % compute test set covariances 26 | A = s2; 27 | B = 0; % zeros cross covariance by independence 28 | else % compute derivative matrix 29 | A = 2*s2*eye(size(x,1)); 30 | end 31 | -------------------------------------------------------------------------------- /gp/covSEard.m: -------------------------------------------------------------------------------- 1 | %% covSEard.m 2 | % Squared Exponential covariance function with Automatic Relevance Detemination 3 | % (ARD) distance measure. The covariance function is parameterized as: 4 | % 5 | % k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) 6 | % 7 | % where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where 8 | % D is the dimension of the input space and sf2 is the signal variance. The 9 | % hyperparameters are: 10 | % 11 | % loghyper = [ log(ell_1) 12 | % log(ell_2) 13 | % . 14 | % log(ell_D) 15 | % log(sqrt(sf2)) ] 16 | % 17 | % For more help on design of covariance functions, try "help covFunctions". 18 | % 19 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-24) 20 | 21 | function [A, B] = covSEard(loghyper, x, z) 22 | %% Code 23 | if nargin == 0, A = '(D+1)'; return; end % report number of parameters 24 | 25 | persistent K; 26 | 27 | [n D] = size(x); 28 | ell = exp(loghyper(1:D)); % characteristic length scale 29 | sf2 = exp(2*loghyper(D+1)); % signal variance 30 | 31 | if nargin == 2 32 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 33 | A = K; 34 | elseif nargout == 2 % compute test set covariances 35 | A = sf2*ones(size(z,1),1); 36 | B = sf2*exp(-sq_dist(diag(1./ell)*x',diag(1./ell)*z')/2); 37 | else % compute derivative matrix 38 | 39 | % check for correct dimension of the previously calculated kernel matrix 40 | if any(size(K)~=n) 41 | K = sf2*exp(-sq_dist(diag(1./ell)*x')/2); 42 | end 43 | 44 | if z <= D % length scale parameters 45 | A = K.*sq_dist(x(:,z)'/ell(z)); 46 | else % magnitude parameter 47 | A = 2*K; 48 | clear K; 49 | end 50 | end 51 | 52 | -------------------------------------------------------------------------------- /gp/covSum.m: -------------------------------------------------------------------------------- 1 | %% covSum.m 2 | % *Summary:* Compose a covariance function as the sum of other covariance 3 | % functions. This function doesn't actually compute very much on its own, it 4 | % merely does some bookkeeping, and calls other covariance functions to do the 5 | % actual work. 6 | % 7 | % function [A, B] = covSum(covfunc, logtheta, x, z) 8 | % 9 | % (C) Copyright 2006 by Carl Edward Rasmussen, 2006-03-20. 10 | % 11 | 12 | function [A, B] = covSum(covfunc, logtheta, x, z) 13 | %% Code 14 | 15 | for i = 1:length(covfunc) % iterate over covariance functions 16 | f = covfunc(i); 17 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 18 | j(i) = cellstr(feval(f{:})); 19 | end 20 | 21 | if nargin == 1, % report number of parameters 22 | A = char(j(1)); for i=2:length(covfunc), A = [A, '+', char(j(i))]; end 23 | return 24 | end 25 | 26 | [n, D] = size(x); 27 | 28 | v = []; % v vector indicates to which covariance parameters belong 29 | for i = 1:length(covfunc), v = [v repmat(i, 1, eval(char(j(i))))]; end 30 | 31 | switch nargin 32 | case 3 % compute covariance matrix 33 | A = zeros(n, n); % allocate space for covariance matrix 34 | for i = 1:length(covfunc) % iteration over summand functions 35 | f = covfunc(i); 36 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 37 | A = A + feval(f{:}, logtheta(v==i), x); % accumulate covariances 38 | end 39 | 40 | case 4 % compute derivative matrix or test set covariances 41 | if nargout == 2 % compute test set cavariances 42 | A = zeros(size(z,1),1); B = zeros(size(x,1),size(z,1)); % allocate space 43 | for i = 1:length(covfunc) 44 | f = covfunc(i); 45 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 46 | [AA BB] = feval(f{:}, logtheta(v==i), x, z); % compute test covariances 47 | A = A + AA; B = B + BB; % and accumulate 48 | end 49 | else % compute derivative matrices 50 | i = v(z); % which covariance function 51 | j = sum(v(1:z)==i); % which parameter in that covariance 52 | f = covfunc(i); 53 | if iscell(f{:}), f = f{:}; end % dereference cell array if necessary 54 | A = feval(f{:}, logtheta(v==i), x, j); % compute derivative 55 | end 56 | 57 | end 58 | 59 | -------------------------------------------------------------------------------- /gp/gpr.m: -------------------------------------------------------------------------------- 1 | %% gpr.m 2 | % *Summary:* Gaussian process regression, with a named covariance function. Two 3 | % modes are possible: training and prediction: if no test data are given, the 4 | % function returns minus the log likelihood and its partial derivatives with 5 | % respect to the hyperparameters; this mode is used to fit the hyperparameters. 6 | % If test data are given, then (marginal) Gaussian predictions are computed, 7 | % whose mean and variance are returned. Note that in cases where the covariance 8 | % function has noise contributions, the variance returned in S2 is for noisy 9 | % test targets; if you want the variance of the noise-free latent function, you 10 | % must substract the noise variance. 11 | % 12 | % usage: [nlml dnlml] = gpr(logtheta, covfunc, x, y) 13 | % or: [mu S2] = gpr(logtheta, covfunc, x, y, xstar) 14 | % 15 | % where: 16 | % 17 | % logtheta is a (column) vector of log hyperparameters 18 | % covfunc is the covariance function 19 | % x is a n by D matrix of training inputs 20 | % y is a (column) vector (of size n) of targets 21 | % xstar is a nn by D matrix of test inputs 22 | % nlml is the returned value of the negative log marginal likelihood 23 | % dnlml is a (column) vector of partial derivatives of the negative 24 | % log marginal likelihood wrt each log hyperparameter 25 | % mu is a (column) vector (of size nn) of prediced means 26 | % S2 is a (column) vector (of size nn) of predicted variances 27 | % 28 | % For more help on covariance functions, see "help covFunctions". 29 | % 30 | % (C) Copyright 2006 by Carl Edward Rasmussen (2006-03-20). 31 | 32 | 33 | function [out1, out2] = gpr(logtheta, covfunc, x, y, xstar) 34 | %% Code 35 | 36 | if ischar(covfunc), covfunc = cellstr(covfunc); end % convert to cell if needed 37 | [n, D] = size(x); 38 | if eval(feval(covfunc{:})) ~= size(logtheta, 1) 39 | error('Error: Number of parameters do not agree with covariance function') 40 | end 41 | 42 | K = feval(covfunc{:}, logtheta, x); % compute training set covariance matrix 43 | 44 | L = chol(K)'; % cholesky factorization of the covariance 45 | alpha = solve_chol(L',y); 46 | 47 | if nargin == 4 % if no test cases, compute the negative log marginal likelihood 48 | 49 | out1 = 0.5*y'*alpha + sum(log(diag(L))) + 0.5*n*log(2*pi); 50 | 51 | if nargout == 2 % ... and if requested, its partial derivatives 52 | out2 = zeros(size(logtheta)); % set the size of the derivative vector 53 | W = L'\(L\eye(n))-alpha*alpha'; % precompute for convenience 54 | for i = 1:length(out2) 55 | out2(i) = sum(sum(W.*feval(covfunc{:}, logtheta, x, i)))/2; 56 | end 57 | end 58 | 59 | else % ... otherwise compute (marginal) test predictions ... 60 | 61 | [Kss, Kstar] = feval(covfunc{:}, logtheta, x, xstar); % test covariances 62 | 63 | out1 = Kstar' * alpha; % predicted means 64 | 65 | if nargout == 2 66 | v = L\Kstar; 67 | out2 = Kss - sum(v.*v)'; 68 | end 69 | 70 | end 71 | -------------------------------------------------------------------------------- /gp/hypCurb.m: -------------------------------------------------------------------------------- 1 | %% hypCurb.m 2 | % *Summary:* Wrapper for GP training (via gpr.m), penalizing large SNR and 3 | % extreme length-scales to avoid numerical instabilities 4 | % 5 | % function [f df] = hypCurb(lh, covfunc, x, y, curb) 6 | % 7 | % *Input arguments:* 8 | % 9 | % lh log-hyper-parameters [D+2 x E ] 10 | % covfunc covariance function, e.g., 11 | % covfunc = {'covSum', {'covSEard', 'covNoise'}}; 12 | % x training inputs [ n x D ] 13 | % y training targets [ n x E ] 14 | % curb (optional) parameters to penalize extreme hyper-parameters 15 | % .ls length-scales 16 | % .snr signal-to-noise ratio (try to keep it below 500) 17 | % .std additional parameter required for length-scale penalty 18 | % 19 | % *Output arguments:* 20 | % 21 | % f penalized negative log-marginal likelihood 22 | % df derivative of penalized negative log-marginal likelihood wrt 23 | % GP log-hyper-parameters 24 | % 25 | % 26 | % Copyright (C) 2008-2013 by 27 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 28 | % 29 | % Last modified: 2011-12-19 30 | % 31 | %% High-Level Steps 32 | % # Compute the negative log-marginal likelihood (plus derivatives) 33 | % # Add penalties and change derivatives accordingly 34 | 35 | function [f df] = hypCurb(lh, covfunc, x, y, curb) 36 | %% Code 37 | if nargin < 5, curb.snr = 500; curb.ls = 100; curb.std = 1; end % set default 38 | 39 | p = 30; % penalty power 40 | D = size(x,2); 41 | if size(lh,1) == 3*D+2; li = 1:2*D; sfi = 2*D+1:3*D+1; % 1D and DD terms 42 | elseif size(lh,1) == 2*D+1; li = 1:D; sfi = D+1:2*D; % Just 1D terms 43 | elseif size(lh,1) == D+2; li = 1:D; sfi = D+1; % Just DD terms 44 | else error('Incorrect number of hyperparameters'); 45 | end 46 | 47 | ll = lh(li); lsf = lh(sfi); lsn = lh(end); 48 | 49 | % 1) compute the negative log-marginal likelihood (plus derivatives) 50 | [f df] = gpr(lh, covfunc, x, y); % first, call gpr 51 | 52 | % 2) add penalties and change derivatives accordingly 53 | f = f + sum(((ll - log(curb.std'))./log(curb.ls)).^p); % length-scales 54 | df(li) = df(li) + p*(ll - log(curb.std')).^(p-1)/log(curb.ls)^p; 55 | 56 | f = f + sum(((lsf - lsn)/log(curb.snr)).^p); % signal to noise ratio 57 | df(sfi) = df(sfi) + p*(lsf - lsn).^(p-1)/log(curb.snr)^p; 58 | df(end) = df(end) - p*sum((lsf - lsn).^(p-1)/log(curb.snr)^p); 59 | -------------------------------------------------------------------------------- /loss/lossLin.m: -------------------------------------------------------------------------------- 1 | %% lossLin.m 2 | % *Summary:* Function to compute the expected loss and its derivatives, given an 3 | % input distribution, under a linear loss function: L = a^T(x - b). Note, this 4 | % loss function can return negative loss. 5 | % 6 | % [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s) 7 | % 8 | % *Input arguments:* 9 | % 10 | % cost 11 | % .a gradient of linear loss function, [D x 1] 12 | % .b targets, the value of x for which there is zero loss [D x 1] 13 | % m mean of input distribution [D x 1] 14 | % s covariance matrix of input distribution [D x D] 15 | % 16 | % *Output arguments:* 17 | % 18 | % L expected loss [1 x 1 ] 19 | % dLdm derivative of L wrt input mean [1 x D ] 20 | % dLds derivative of L wrt input covariance [1 x D^2] 21 | % S variance of loss [1 x 1 ] 22 | % dSdm derivative of S wrt input mean [1 x D ] 23 | % dSds derivative of S wrt input covariance [1 x D^2] 24 | % C inv(S) times input-output covariance [D x 1 ] 25 | % dCdm derivative of C wrt input mean [D x D ] 26 | % dCds derivative of C wrt input covariance [D x D^2] 27 | % 28 | % Copyright (C) 2008-2013 by 29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 30 | % 31 | % Last modified: 2013-03-05 32 | % 33 | %% High-Level Steps 34 | % # Expected cost 35 | % # Variance of cost 36 | % # inv(s)* cov(x,L) 37 | 38 | function [L dLdm dLds S dSdm dSds C dCdm dCds] = lossLin(cost,m,s) 39 | %% Code 40 | 41 | a = cost.a(:); b = cost.b(:); D = length(m); 42 | if length(a) ~= D || length(b) ~= D; 43 | error('a or b not the same length as m'); end 44 | 45 | % 1) Mean 46 | L = a'*(m - b); 47 | dLdm = a'; 48 | dLds = zeros(D); 49 | 50 | % 2) Variance 51 | S = a'*s*a; 52 | dSdm = zeros(1,D); 53 | dSds = a*a'; 54 | 55 | % 3) inv(s) * input-output covariance cov(x,L) 56 | C = a; 57 | dCdm = zeros(D); dCds = zeros(D,D^2); 58 | 59 | -------------------------------------------------------------------------------- /loss/lossQuad.m: -------------------------------------------------------------------------------- 1 | %% lossQuad.m 2 | % *Summary:* Compute expectation and variance of a quadratic cost 3 | % $(x-z)'*W*(x-z)$ 4 | % and their derivatives, where $x \sim N(m,S)$ 5 | % 6 | % 7 | % function [L, dLdm, dLds, S, dSdm, dSds, C, dCdm, dCds] = lossQuad(cost, m, S) 8 | % 9 | % 10 | % 11 | % *Input arguments:* 12 | % 13 | % cost 14 | % .z: target state [D x 1] 15 | % .W: weight matrix [D x D] 16 | % m mean of input distribution [D x 1] 17 | % s covariance matrix of input distribution [D x D] 18 | % 19 | % 20 | % *Output arguments:* 21 | % 22 | % L expected loss [1 x 1 ] 23 | % dLdm derivative of L wrt input mean [1 x D ] 24 | % dLds derivative of L wrt input covariance [1 x D^2] 25 | % S variance of loss [1 x 1 ] 26 | % dSdm derivative of S wrt input mean [1 x D ] 27 | % dSds derivative of S wrt input covariance [1 x D^2] 28 | % C inv(S) times input-output covariance [D x 1 ] 29 | % dCdm derivative of C wrt input mean [D x D ] 30 | % dCds derivative of C wrt input covariance [D x D^2] 31 | % 32 | % Copyright (C) 2008-2013 by 33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 34 | % 35 | % Last modified: 2013-05-30 36 | % 37 | %% High-Level Steps 38 | % # Expected cost 39 | % # Variance of cost 40 | % # inv(s)* cov(x,L) 41 | 42 | function [L, dLdm, dLds, S, dSdm, dSds, C, dCdm, dCds] = lossQuad(cost, m, S) 43 | %% Code 44 | D = length(m); % get state dimension 45 | 46 | % set some defaults if necessary 47 | if isfield(cost,'W'); W = cost.W; else W = eye(D); end 48 | if isfield(cost,'z'); z = cost.z; else z = zeros(D,1); end 49 | 50 | % 1. expected cost 51 | L = S(:)'*W(:) + (z-m)'*W*(z-m); 52 | 53 | % 1a. derivatives of expected cost 54 | if nargout > 1 55 | dLdm = 2*(m-z)'*W; % wrt input mean 56 | dLds = W'; % wrt input covariance matrix 57 | end 58 | 59 | % 2. variance of cost 60 | if nargout > 3 61 | S = trace(W*S*(W + W')*S) + (z-m)'*(W + W')*S*(W + W')*(z-m); 62 | if S < 1e-12; S = 0; end % for numerical reasons 63 | end 64 | 65 | % 2a. derivatives of variance of cost 66 | if nargout > 4 67 | % wrt input mean 68 | dSdm = -(2*(W+W')*S*(W+W)*(z-m))'; 69 | % wrt input covariance matrix 70 | dSds = W'*S'*(W + W')'+(W + W')'*S'*W' + (W + W')*(z-m)*((W + W')*(z-m))'; 71 | end 72 | 73 | % 3. inv(s) times IO covariance with derivatives 74 | if nargout > 6 75 | C = 2*W*(m-z); 76 | dCdm = 2*W; 77 | dCds = zeros(D,D^2); 78 | end 79 | -------------------------------------------------------------------------------- /loss/reward.m: -------------------------------------------------------------------------------- 1 | %% reward.m 2 | % *Summary:* Compute expectation, variance, and their derivatives of an 3 | % exponentiated negative quadratic cost $\exp(-(x-z)'W(x-z)/2)$, 4 | % where $x\sim\mathcal N(m,S)$ 5 | % 6 | % *Input arguments:* 7 | % 8 | % m: D-by-1 mean of the state distribution 9 | % S: D-by-D covariance matrix of the state distribution 10 | % z: D-by-1 target state 11 | % W: D-by-D weight matrix 12 | % 13 | % *Output arguments:* 14 | % 15 | % muR: 1-by-1 expected reward 16 | % dmuRdm: 1-by-D derivative of expected reward wrt input mean 17 | % dmuRdS: D-by-D derivative of expected reward wrt input covariance matrix 18 | % sR: 1-by-1 variance of reward 19 | % dsRdm: 1-by-D derivative of variance of reward wrt input mean 20 | % dsRdS: D-by-D derivative reward variance wrt input covariance matrix 21 | % 22 | % Copyright (C) 2008-2013 by 23 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 24 | % 25 | % Last modification: 2013-01-20 26 | % 27 | %% High-Level Steps 28 | % # Compute expected reward 29 | % # Compute the derivatives of the expected reward with respect to the input 30 | % distribution (optional) 31 | % # Compute variance of reward 32 | % # Compute the derivatives of the variance of the reward with 33 | % respect to the input distribution (optional) 34 | 35 | function [muR, dmuRdm, dmuRdS, sR, dsRdm, dsRdS] = reward(m, S, z, W) 36 | %% Code 37 | 38 | % some precomputations 39 | D = length(m); % get state dimension 40 | SW = S*W; 41 | iSpW = W/(eye(D)+SW); 42 | 43 | % 1. expected reward 44 | muR = exp(-(m-z)'*iSpW*(m-z)/2)/sqrt(det(eye(D)+SW)); 45 | 46 | % 2. derivatives of expected reward 47 | if nargout > 1 48 | dmuRdm = -muR*(m-z)'*iSpW; % wrt input mean 49 | dmuRdS = muR*(iSpW*(m-z)*(m-z)'-eye(D))*iSpW/2; % wrt input covariance matrix 50 | end 51 | 52 | % 3. reward variance 53 | if nargout > 3 54 | i2SpW = W/(eye(D)+2*SW); 55 | r2 = exp(-(m-z)'*i2SpW*(m-z))/sqrt(det(eye(D)+2*SW)); 56 | sR = r2 - muR^2; 57 | if sR < 1e-12; sR=0; end % for numerical reasons 58 | end 59 | 60 | % 4. derivatives of reward variance 61 | if nargout > 4 62 | % wrt input mean 63 | dsRdm = -2*r2*(m-z)'*i2SpW-2*muR*dmuRdm; 64 | % wrt input covariance matrix 65 | dsRdS = r2*(2*i2SpW*(m-z)*(m-z)'-eye(D))*i2SpW-2*muR*dmuRdS; 66 | end 67 | 68 | -------------------------------------------------------------------------------- /loss/reward.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/loss/reward.pdf -------------------------------------------------------------------------------- /loss/reward.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \renewcommand{\rmdefault}{psbx} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{textcomp} 6 | \usepackage{eulervm} 7 | \usepackage{amsmath} 8 | \usepackage{amssymb} 9 | 10 | \setlength{\textwidth}{160mm} 11 | \setlength{\oddsidemargin}{0mm} 12 | \setlength{\parindent}{0 mm} 13 | 14 | \newcommand{\bfm}{{\bf m}} 15 | \newcommand{\bfs}{{\bf s}} 16 | \newcommand{\bfz}{{\bf z}} 17 | \newcommand{\E}{{\mathbb E}} 18 | \newcommand{\V}{{\mathbb V}} 19 | 20 | \title{The Reward Function} 21 | \author{Carl Edward Rasmussen} 22 | \date{July 17th 2008} 23 | 24 | \begin{document} 25 | 26 | \maketitle 27 | 28 | The \emph{reward} $R$ of being in state $\bfs$ is 29 | \[ 30 | R(\bfs)\;=\;\exp\big(\!-\tfrac{1}{2}(\bfs-\bfz)^\top T^{-1}(\bfs-\bfz)\big), 31 | \] 32 | where $\bfz$ and $T$ are parameters of the reward function, which 33 | typically depend on the specific task. We assume that the state is 34 | uncertain, with a Gaussian distribution 35 | \[ 36 | \bfs\;\sim\;{\cal N}(\bfm, S), 37 | \] 38 | with mean $\bfm$ and covariance matrix $S$. We wish to compute the 39 | \emph{expected} reward 40 | \[ 41 | \mu\;=\;\E[R]\;=\;\langle R(\bfs)\rangle_{\bfs\sim{\cal N}(\bfm,S)}\;=\;|I+ST^{-1}|^{-1/2} 42 | \exp\big(\!-\tfrac{1}{2}(\bfm-\bfz)^\top (S+T)^{-1}(\bfm-\bfz)\big), 43 | \] 44 | and to compute the \emph{variance} of the reward we first define 45 | \[ 46 | r^2\;=\;\E[R^2]\;=\;|I+2ST^{-1}|^{-1/2} 47 | \exp\big(\!-(\bfm-\bfz)^\top (2S+T)^{-1}(\bfm-\bfz)\big), 48 | \] 49 | so that 50 | \[ 51 | \sigma^2\;=\;\V[R]\;=\;\E[R^2]-\E^2[R]\;=\;r^2-\mu^2. 52 | \] 53 | We also need the derivatives of these two quanteties wrt the 54 | parameters of the state distribution. For the expected reward we have 55 | \[ 56 | \frac{d\mu}{d\bfm}\;=\;-\mu(S+T)^{-1}(\bfm-\bfz),\qquad 57 | \frac{d\mu}{dS}\;=\;\tfrac{1}{2}\mu\big[(S+T)^{-1}(\bfm-\bfz)(\bfm-\bfz)^\top 58 | -I\big](S+T)^{-1}, 59 | \] 60 | and for the variance 61 | \[ 62 | \frac{d\sigma^2}{d\bfm}\;=\;-2r^2(2S+T)^{-1}(\bfm-\bfz)-2\mu\frac{d\mu}{d\bfm}, 63 | \qquad\frac{d\sigma^2}{dS}\;=\;r^2\big 64 | [2(2S+T)^{-1}(\bfm-\bfz)(\bfm-\bfz)^\top -I\big](2S+T)^{-1} -2\mu\frac{d\mu}{dS}. 65 | \] 66 | \end{document} 67 | -------------------------------------------------------------------------------- /scenarios/cartDoublePendulum/cartDouble_learn.m: -------------------------------------------------------------------------------- 1 | %% cartDouble_learn.m 2 | % *Summary:* Script to learn a controller for the cart-doube-pendulum 3 | % swingup 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | %% High-Level Steps 11 | % # Load parameters 12 | % # Create J initial trajectories by applying random controls 13 | % # Controlled learning (train dynamics model, policy learning, policy 14 | % application) 15 | 16 | %% Code 17 | 18 | % 1. Initialization 19 | clear all; close all; 20 | settings_cdp; % load scenario-specific settings 21 | basename = 'CartDoublePend_'; % filename used for saving data 22 | 23 | % 2. Initial J random rollouts 24 | for jj = 1:J 25 | [xx, yy, realCost{jj}, latent{jj}] = ... 26 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 27 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 28 | if plotting.verbosity > 0; % visualization of trajectory 29 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 30 | draw_rollout_cdp; 31 | end 32 | 33 | end 34 | 35 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 36 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 37 | 38 | % 3. Controlled learning (N iterations) 39 | for j = 1:N 40 | trainDynModel; % train (GP) dynamics model 41 | learnPolicy; % learn policy 42 | applyController; % apply controller to system 43 | disp(['controlled trial # ' num2str(j)]); 44 | if plotting.verbosity > 0; % visualization of trajectory 45 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 46 | draw_rollout_cdp; 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /scenarios/cartDoublePendulum/draw_rollout_cdp.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_cdp.m 2 | % *Summary:* Script to draw a trajectory of the cart-double-pendulum system and 3 | % the predicted uncertainties around the tips of the pendulums 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | %% High-Level Steps 11 | % # For each time step, plot the observed trajectory and the predicted 12 | % means and covariances of the Cartesian coordinates of the tips of both 13 | % pendulums 14 | 15 | %% Code 16 | for r = 1:size(xx,1) 17 | if exist('j','var') && ~isempty(M{j}) 18 | draw_cdp(latent{j}(r,1), latent{j}(r,5), latent{j}(r,6), ... 19 | latent{j}(r,end), cost, M{j}(:,r), Sigma{j}(:,:,r), ... 20 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 21 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 22 | ' sec']); 23 | else 24 | draw_cdp(latent{jj}(r,1), latent{jj}(r,5), latent{jj}(r,6), ... 25 | latent{jj}(r,end), cost, [], [], ... 26 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 27 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 28 | ' sec']); 29 | end 30 | pause(dt); 31 | end 32 | -------------------------------------------------------------------------------- /scenarios/cartPole/cartPole_learn.m: -------------------------------------------------------------------------------- 1 | %% cartPole_learn.m 2 | % *Summary:* Script to learn a controller for the cart-pole swingup 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-03-27 8 | % 9 | %% High-Level Steps 10 | % # Load parameters 11 | % # Create J initial trajectories by applying random controls 12 | % # Controlled learning (train dynamics model, policy learning, policy 13 | % application) 14 | 15 | %% Code 16 | 17 | % 1. Initialization 18 | clear all; close all; 19 | settings_cp; % load scenario-specific settings 20 | basename = 'cartPole_'; % filename used for saving data 21 | 22 | % 2. Initial J random rollouts 23 | for jj = 1:J 24 | [xx, yy, realCost{jj}, latent{jj}] = ... 25 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 26 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 27 | if plotting.verbosity > 0; % visualization of trajectory 28 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 29 | draw_rollout_cp; 30 | end 31 | 32 | end 33 | 34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 36 | 37 | % 3. Controlled learning (N iterations) 38 | for j = 1:N 39 | trainDynModel; % train (GP) dynamics model 40 | learnPolicy; % learn policy 41 | applyController; % apply controller to system 42 | disp(['controlled trial # ' num2str(j)]); 43 | if plotting.verbosity > 0; % visualization of trajectory 44 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 45 | draw_rollout_cp; 46 | end 47 | end -------------------------------------------------------------------------------- /scenarios/cartPole/draw_cp.m: -------------------------------------------------------------------------------- 1 | %% draw_cp.m 2 | % *Summary:* Draw the cart-pole system with reward, applied force, and 3 | % predictive uncertainty of the tip of the pendulum 4 | % 5 | % function draw_cp(x, theta, force, cost, text1, text2, M, S) 6 | % 7 | % 8 | % *Input arguments:* 9 | % 10 | % x position of the cart 11 | % theta angle of pendulum 12 | % force force applied to cart 13 | % cost cost structure 14 | % .fcn function handle (it is assumed to use saturating cost) 15 | % .<> other fields that are passed to cost 16 | % M (optional) mean of state 17 | % S (optional) covariance of state 18 | % text1 (optional) text field 1 19 | % text2 (optional) text field 2 20 | % 21 | % 22 | % Copyright (C) 2008-2013 by 23 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 24 | % 25 | % Last modified: 2013-03-07 26 | 27 | function draw_cp(x, theta, force, cost, text1, text2, M, S) 28 | %% Code 29 | 30 | l = 0.6; 31 | xmin = -3; 32 | xmax = 3; 33 | height = 0.1; 34 | width = 0.3; 35 | maxU = 10; 36 | 37 | % Compute positions 38 | cart = [ x + width, height 39 | x + width, -height 40 | x - width, -height 41 | x - width, height 42 | x + width, height ]; 43 | pendulum = [x, 0; x+2*l*sin(theta), -cos(theta)*2*l]; 44 | 45 | 46 | clf; hold on 47 | plot(0,2*l,'k+','MarkerSize',20,'linewidth',2) 48 | plot([xmin, xmax], [-height-0.03, -height-0.03],'k','linewidth',2) 49 | 50 | % Plot force 51 | plot([0 force/maxU*xmax],[-0.3, -0.3],'g','linewidth',10) 52 | 53 | % Plot reward 54 | reward = 1-cost.fcn(cost,[x, 0, 0, theta]', zeros(4)); 55 | plot([0 reward*xmax],[-0.5, -0.5],'y','linewidth',10) 56 | 57 | % Plot the cart-pole 58 | fill(cart(:,1), cart(:,2),'k','edgecolor','k'); 59 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4) 60 | 61 | % Plot the joint and the tip 62 | plot(x,0,'y.','markersize',24) 63 | plot(pendulum(2,1),pendulum(2,2),'y.','markersize',24) 64 | 65 | % plot ellipse around tip of pendulum (if M, S exist) 66 | try 67 | [M1 S1] = getPlotDistr_cp(M,S,2*l); 68 | error_ellipse(S1,M1,'style','b'); 69 | catch 70 | end 71 | 72 | % Text 73 | text(0,-0.3,'applied force') 74 | text(0,-0.5,'immediate reward') 75 | if exist('text1','var') 76 | text(0,-0.9, text1) 77 | end 78 | if exist('text2','var') 79 | text(0,-1.1, text2) 80 | end 81 | 82 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-1.4 1.4]); 83 | axis off; 84 | drawnow; -------------------------------------------------------------------------------- /scenarios/cartPole/draw_rollout_cp.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_cp.m 2 | % *Summary:* Script to draw the most recent trajectory of the cart-pole 3 | % system together with the predicted uncertainties around the tip of the 4 | % pendulum 5 | % 6 | % Copyright (C) 2008-2013 by 7 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 8 | % 9 | % Last modified: 2013-05-20 10 | % 11 | %% High-Level Steps 12 | % # For each time step, plot the observed trajectory and the predicted 13 | % means and covariances of the Cartesian coordinates of the tip of the 14 | % pendulum 15 | 16 | %% Code 17 | 18 | % Loop over states in trajectory (= time steps) 19 | for r = 1:size(xx,1) 20 | if exist('j','var') && ~isempty(M{j}) 21 | draw_cp(latent{j}(r,1), latent{j}(r,4), latent{j}(r,end), cost, ... 22 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 23 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 24 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 25 | else 26 | draw_cp(latent{jj}(r,1), latent{jj}(r,4), latent{jj}(r,end), cost, ... 27 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 28 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 29 | ' sec']) 30 | end 31 | pause(dt); 32 | end 33 | -------------------------------------------------------------------------------- /scenarios/cartPole/dynamics_cp.m: -------------------------------------------------------------------------------- 1 | %% dynamics_cp.m 2 | % *Summary:* Implements ths ODE for simulating the cart-pole dynamics. 3 | % 4 | % function dz = dynamics_cp(t, z, f) 5 | % 6 | % 7 | % *Input arguments:* 8 | % 9 | % t current time step (called from ODE solver) 10 | % z state [4 x 1] 11 | % f (optional): force f(t) 12 | % 13 | % *Output arguments:* 14 | % 15 | % dz if 3 input arguments: state derivative wrt time 16 | % if only 2 input arguments: total mechanical energy 17 | % 18 | % 19 | % Note: It is assumed that the state variables are of the following order: 20 | % x: [m] position of cart 21 | % dx: [m/s] velocity of cart 22 | % dtheta: [rad/s] angular velocity 23 | % theta: [rad] angle 24 | % 25 | % 26 | % A detailed derivation of the dynamics can be found in: 27 | % 28 | % M.P. Deisenroth: 29 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 30 | % KIT Scientific Publishing, 2010. 31 | % 32 | % Copyright (C) 2008-2013 by 33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 34 | % 35 | % Last modified: 2013-03-08 36 | 37 | function dz = dynamics_cp(t,z,f) 38 | %% Code 39 | 40 | l = 0.5; % [m] length of pendulum 41 | m = 0.5; % [kg] mass of pendulum 42 | M = 0.5; % [kg] mass of cart 43 | b = 0.1; % [N/m/s] coefficient of friction between cart and ground 44 | g = 9.82; % [m/s^2] acceleration of gravity 45 | 46 | if nargin==3 47 | dz = zeros(4,1); 48 | dz(1) = z(2); 49 | dz(2) = ( 2*m*l*z(3)^2*sin(z(4)) + 3*m*g*sin(z(4))*cos(z(4)) ... 50 | + 4*f(t) - 4*b*z(2) )/( 4*(M+m)-3*m*cos(z(4))^2 ); 51 | dz(3) = (-3*m*l*z(3)^2*sin(z(4))*cos(z(4)) - 6*(M+m)*g*sin(z(4)) ... 52 | - 6*(f(t)-b*z(2))*cos(z(4)) )/( 4*l*(m+M)-3*m*l*cos(z(4))^2 ); 53 | dz(4) = z(3); 54 | else 55 | dz = (M+m)*z(2)^2/2 + 1/6*m*l^2*z(3)^2 + m*l*(z(2)*z(3)-g)*cos(z(4))/2; 56 | end -------------------------------------------------------------------------------- /scenarios/cartPole/getPlotDistr_cp.m: -------------------------------------------------------------------------------- 1 | %% getPlotDistr_cp.m 2 | % *Summary:* Compute means and covariances of the Cartesian coordinates of 3 | % the tips both the inner and outer pendulum assuming that the joint state 4 | % $x$ of the cart-double-pendulum system is Gaussian, i.e., $x\sim N(m, s)$ 5 | % 6 | % 7 | % function [M, S] = getPlotDistr_cp(m, s, ell) 8 | % 9 | % 10 | % 11 | % *Input arguments:* 12 | % 13 | % m mean of full state [4 x 1] 14 | % s covariance of full state [4 x 4] 15 | % ell length of pendulum 16 | % 17 | % Note: this code assumes that the following order of the state: 18 | % 1: cart pos., 19 | % 2: cart vel., 20 | % 3: pendulum angular velocity, 21 | % 4: pendulum angle 22 | % 23 | % *Output arguments:* 24 | % 25 | % M mean of tip of pendulum [2 x 1] 26 | % S covariance of tip of pendulum [2 x 2] 27 | % 28 | % Copyright (C) 2008-2013 by 29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 30 | % 31 | % Last modification: 2013-03-27 32 | % 33 | %% High-Level Steps 34 | % # Augment input distribution to complex angle representation 35 | % # Compute means of tips of pendulums (in Cartesian coordinates) 36 | % # Compute covariances of tips of pendulums (in Cartesian coordinates) 37 | 38 | function [M, S] = getPlotDistr_cp(m, s, ell) 39 | %% Code 40 | 41 | % 1. Augment input distribution to complex angle representation 42 | [m1 s1 c1] = gTrig(m,s,4,ell); % map input distribution through sin/cos 43 | m1 = [m; m1]; % mean of joint 44 | c1 = s*c1; % cross-covariance between input and prediction 45 | s1 = [s c1; c1' s1]; % covariance of joint 46 | 47 | % 2. Compute means of tips of pendulums (in Cartesian coordinates) 48 | M = [m1(1)+m1(5); -m1(6)]; 49 | 50 | % 3. Compute covariances of tips of pendulums (in Cartesian coordinates) 51 | s11 = s1(1,1) + s1(5,5) + s1(1,5) + s1(5,1); % x+l sin(theta) 52 | s22 = s1(6,6); % -l*cos(theta) 53 | s12 = -(s1(1,6)+s1(5,6)); % cov(x+l*sin(th), -l*cos(th) 54 | 55 | S = [s11 s12; s12' s22]; 56 | try 57 | chol(S); 58 | catch 59 | warning('matrix S not pos.def. (getPlotDistr)'); 60 | S = S + (1e-6 - min(eig(S)))*eye(2); 61 | end 62 | -------------------------------------------------------------------------------- /scenarios/doublePendulum/DoublePend_learn.m: -------------------------------------------------------------------------------- 1 | %% DoublePend_learn 2 | % *Summary:* Script to learn a controller for the double-pendulum 3 | % swingup with two actuated joints 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | %% High-Level Steps 11 | % # Load parameters 12 | % # Create J initial trajectories by applying random controls 13 | % # Controlled learning (train dynamics model, policy learning, policy 14 | % application) 15 | 16 | %% Code 17 | 18 | % 1. Initialization 19 | clear all; close all; 20 | settings_dp; % load scenario-specific settings 21 | basename = 'doublepend_'; % filename used for saving data 22 | 23 | % 2. Initial J random rollouts 24 | for jj = 1:J 25 | [xx, yy, realCost{jj}, latent{jj}] = ... 26 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 27 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 28 | if plotting.verbosity > 0; % visualization of trajectory 29 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 30 | draw_rollout_dp; 31 | end 32 | end 33 | 34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 36 | 37 | % 3. Controlled learning (N iterations) 38 | for j = 1:N 39 | trainDynModel; % train (GP) dynamics model 40 | learnPolicy; % learn policy 41 | applyController; % apply controller to system 42 | disp(['controlled trial # ' num2str(j)]); 43 | if plotting.verbosity > 0; % visualization of trajectory 44 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 45 | draw_rollout_dp; 46 | end 47 | end -------------------------------------------------------------------------------- /scenarios/doublePendulum/draw_rollout_dp.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_dp.m 2 | % *Summary:* Script to draw a trajectory of theobserved double-pendulum system 3 | % and the predicted uncertainties around the tips of the pendulums 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | %% High-Level Steps 11 | % # For each time step, plot the observed trajectory and the predicted 12 | % means and covariances of the Cartesian coordinates of the tips of both 13 | % pendulums 14 | 15 | %% Code 16 | % Loop over states in trajectory 17 | for r = 1:size(xx,1) 18 | cost.t = r; 19 | if exist('j','var') && ~isempty(M{j}) 20 | draw_dp(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end-1), ... 21 | latent{j}(r,end), cost, ... 22 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 23 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 24 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 25 | else 26 | draw_dp(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end-1), ... 27 | latent{jj}(r,end), cost, ... 28 | ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ... 29 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 30 | ' sec']) 31 | end 32 | pause(dt); 33 | end 34 | -------------------------------------------------------------------------------- /scenarios/doublePendulum/dynamics_dp.m: -------------------------------------------------------------------------------- 1 | %% dynamics_dp.m 2 | % *Summary:* Implements ths ODE for simulating the double pendulum 3 | % dynamics, where an input torque can be applied to both links, 4 | % f1:torque at inner joint, f2:torque at outer joint 5 | % 6 | % function dz = dynamics_dp(t, z, f1, f2) 7 | % 8 | % 9 | % *Input arguments:* 10 | % 11 | % t current time step (called from ODE solver) 12 | % z state [4 x 1] 13 | % f1 (optional): torque f1(t) applied to inner pendulum 14 | % f2 (optional): torque f2(t) applied to outer pendulum 15 | % 16 | % *Output arguments:* 17 | % 18 | % dz if 4 input arguments: state derivative wrt time 19 | % if only 2 input arguments: total mechanical energy 20 | % 21 | % Note: It is assumed that the state variables are of the following order: 22 | % dtheta1: [rad/s] angular velocity of inner pendulum 23 | % dtheta2: [rad/s] angular velocity of outer pendulum 24 | % theta1: [rad] angle of inner pendulum 25 | % theta2: [rad] angle of outer pendulum 26 | % 27 | % A detailed derivation of the dynamics can be found in: 28 | % 29 | % M.P. Deisenroth: 30 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 31 | % KIT Scientific Publishing, 2010. 32 | % 33 | % 34 | % Copyright (C) 2008-2013 by 35 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 36 | % 37 | % Last modified: 2013-03-08 38 | 39 | function dz = dynamics_dp(t, z, f1, f2) 40 | %% Code 41 | m1 = 0.5; % [kg] mass of 1st link 42 | m2 = 0.5; % [kg] mass of 2nd link 43 | b1 = 0.0; % [Ns/m] coefficient of friction (1st joint) 44 | b2 = 0.0; % [Ns/m] coefficient of friction (2nd joint) 45 | l1 = 0.5; % [m] length of 1st pendulum 46 | l2 = 0.5; % [m] length of 2nd pendulum 47 | g = 9.82; % [m/s^2] acceleration of gravity 48 | I1 = m1*l1^2/12; % moment of inertia around pendulum midpoint (1st link) 49 | I2 = m2*l2^2/12; % moment of inertia around pendulum midpoint (2nd link) 50 | 51 | if nargin == 4 % compute time derivatives 52 | 53 | A = [l1^2*(0.25*m1+m2) + I1, 0.5*m2*l1*l2*cos(z(3)-z(4)); 54 | 0.5*m2*l1*l2*cos(z(3)-z(4)), l2^2*0.25*m2 + I2 ]; 55 | b = [g*l1*sin(z(3))*(0.5*m1+m2) - 0.5*m2*l1*l2*z(2)^2*sin(z(3)-z(4)) ... 56 | + f1(t)-b1*z(1); 57 | 0.5*m2*l2*(l1*z(1)^2*sin(z(3)-z(4))+g*sin(z(4))) + f2(t)-b2*z(2)]; 58 | x = A\b; 59 | 60 | dz = zeros(4,1); 61 | dz(1) = x(1); 62 | dz(2) = x(2); 63 | dz(3) = z(1); 64 | dz(4) = z(2); 65 | 66 | else % compute total mechanical energy 67 | dz = m1*l1^2*z(1)^2/8 + I1*z(1)^2/2 + m2/2*(l1^2*z(1)^2 ... 68 | + l2^2*z(2)^2/4 + l1*l2*z(1)*z(2)*cos(z(3)-z(4))) + I2*z(2)^2/2 ... 69 | + m1*g*l1*cos(z(3))/2 + m2*g*(l1*cos(z(3))+l2*cos(z(4))/2); 70 | end -------------------------------------------------------------------------------- /scenarios/pendubot/draw_rollout_pendubot.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_pendubot 2 | % *Summary:* Script to draw the most recent trajectory of the Pendubot system 3 | % and the predicted uncertainties around the tips of the pendulums 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | % 11 | %% High-Level Steps 12 | % # For each time step, plot the observed trajectory and the predicted 13 | % means and covariances of the Cartesian coordinates of the tips of both 14 | % pendulums 15 | 16 | %% Code 17 | 18 | % Loop over states in trajectory 19 | for r = 1:size(xx,1) 20 | cost.t = r; 21 | if exist('j','var') && ~isempty(M{j}) 22 | draw_pendubot(latent{j}(r,3), latent{j}(r,4), latent{j}(r,end), cost, ... 23 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 24 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 25 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 26 | else 27 | draw_pendubot(latent{jj}(r,3), latent{jj}(r,4), latent{jj}(r,end), cost, ... 28 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 29 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 30 | ' sec']) 31 | end 32 | pause(dt); 33 | end -------------------------------------------------------------------------------- /scenarios/pendubot/dynamics_pendubot.m: -------------------------------------------------------------------------------- 1 | %% dynamics_pendubot.m 2 | % *Summary:* Implements ths ODE for simulating the Pendubot 3 | % dynamics, where an input torque f can be applied to the inner link 4 | % 5 | % function dz = dynamics_pendubot(t,z,f) 6 | % 7 | % 8 | % *Input arguments:* 9 | % 10 | % t current time step (called from ODE solver) 11 | % z state [4 x 1] 12 | % f (optional): torque f(t) applied to inner pendulum 13 | % 14 | % *Output arguments:* 15 | % 16 | % dz if 3 input arguments: state derivative wrt time 17 | % if only 2 input arguments: total mechanical energy 18 | % 19 | % Note: It is assumed that the state variables are of the following order: 20 | % dtheta1: [rad/s] angular velocity of inner pendulum 21 | % dtheta2: [rad/s] angular velocity of outer pendulum 22 | % theta1: [rad] angle of inner pendulum 23 | % theta2: [rad] angle of outer pendulum 24 | % 25 | % A detailed derivation of the dynamics can be found in: 26 | % 27 | % M.P. Deisenroth: 28 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 29 | % KIT Scientific Publishing, 2010. 30 | % 31 | % 32 | % Copyright (C) 2008-2013 by 33 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 34 | % 35 | % Last modified: 2013-03-08 36 | 37 | function dz = dynamics_pendubot(t,z,f) 38 | %% Code 39 | m1 = 0.5; % [kg] mass of 1st link 40 | m2 = 0.5; % [kg] mass of 2nd link 41 | b1 = 0.0; % [Ns/m] coefficient of friction (1st joint) 42 | b2 = 0.0; % [Ns/m] coefficient of friction (2nd joint) 43 | l1 = 0.5; % [m] length of 1st pendulum 44 | l2 = 0.5; % [m] length of 2nd pendulum 45 | g = 9.82; % [m/s^2] acceleration of gravity 46 | I1 = m1*l1^2/12; % moment of inertia around pendulum midpoint (inner link) 47 | I2 = m2*l2^2/12; % moment of inertia around pendulum midpoint (outer link) 48 | 49 | if nargin == 3 % compute time derivatives 50 | 51 | A = [l1^2*(0.25*m1+m2) + I1, 0.5*m2*l1*l2*cos(z(3)-z(4)); 52 | 0.5*m2*l1*l2*cos(z(3)-z(4)), l2^2*0.25*m2 + I2 ]; 53 | b = [g*l1*sin(z(3))*(0.5*m1+m2) - 0.5*m2*l1*l2*z(2)^2*sin(z(3)-z(4))... 54 | + f(t) - b1*z(1); 55 | 0.5*m2*l2*( l1*z(1)^2*sin(z(3)-z(4)) + g*sin(z(4)) ) - b2*z(2)]; 56 | x = A\b; 57 | 58 | dz = zeros(4,1); 59 | dz(1) = x(1); 60 | dz(2) = x(2); 61 | dz(3) = z(1); 62 | dz(4) = z(2); 63 | 64 | else % compute total mechanical energy 65 | dz = m1*l1^2*z(1)^2/8 + I1*z(1)^2/2 + m2/2*(l1^2*z(1)^2 ... 66 | + l2^2*z(2)^2/4 + l1*l2*z(1)*z(2)*cos(z(3)-z(4))) + I2*z(2)^2/2 ... 67 | + m1*g*l1*cos(z(3))/2 + m2*g*(l1*cos(z(3))+l2*cos(z(4))/2); 68 | end -------------------------------------------------------------------------------- /scenarios/pendubot/pendubot_learn.m: -------------------------------------------------------------------------------- 1 | %% pendubot_learn.m 2 | % *Summary:* Script to learn a controller for the pendubot 3 | % swingup (a double pendulum with the inner joint actuated) 4 | % 5 | % Copyright (C) 2008-2013 by 6 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 7 | % 8 | % Last modified: 2013-03-27 9 | % 10 | %% High-Level Steps 11 | % # Load parameters 12 | % # Create J initial trajectories by applying random controls 13 | % # Controlled learning (train dynamics model, policy learning, policy 14 | % application) 15 | 16 | %% Code 17 | 18 | % 1. Initialization 19 | clear all; close all; 20 | settings_pendubot; % load scenario-specific settings 21 | basename = 'pendubot_'; % filename used for saving data 22 | 23 | % 2. Initial J random rollouts 24 | for jj = 1:J 25 | [xx, yy, realCost{jj}, latent{jj}] = ... 26 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 27 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 28 | if plotting.verbosity > 0; % visualization of trajectory 29 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 30 | draw_rollout_pendubot; 31 | end 32 | end 33 | 34 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 35 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 36 | 37 | % 3. Controlled learning (N iterations) 38 | for j = 1:N 39 | trainDynModel; % train (GP) dynamics model 40 | learnPolicy; % learn policy 41 | applyController; % apply controller to system 42 | disp(['controlled trial # ' num2str(j)]); 43 | if plotting.verbosity > 0; % visualization of trajectory 44 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 45 | draw_rollout_pendubot; 46 | end 47 | end -------------------------------------------------------------------------------- /scenarios/pendulum/draw_pendulum.m: -------------------------------------------------------------------------------- 1 | %% draw_pendulum.m 2 | % *Summary:* Draw the pendulum system with reward, applied torque, 3 | % and predictive uncertainty of the tips of the pendulums 4 | % 5 | % function draw_pendulum(theta, torque, cost, text1, text2, M, S) 6 | % 7 | % 8 | % *Input arguments:* 9 | % 10 | % theta1 angle of inner pendulum 11 | % theta2 angle of outer pendulum 12 | % f1 torque applied to inner pendulum 13 | % f2 torque applied to outer pendulum 14 | % cost cost structure 15 | % .fcn function handle (it is assumed to use saturating cost) 16 | % .<> other fields that are passed to cost 17 | % text1 (optional) text field 1 18 | % text2 (optional) text field 2 19 | % M (optional) mean of state 20 | % S (optional) covariance of state 21 | % 22 | % 23 | % Copyright (C) 2008-2013 by 24 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 25 | % 26 | % Last modified: 2013-03-18 27 | 28 | 29 | function draw_pendulum(theta, torque, cost, text1, text2, M, S) 30 | %% Code 31 | 32 | l = 0.6; 33 | xmin = -1.2*l; 34 | xmax = 1.2*l; 35 | umax = 0.5; 36 | height = 0; 37 | 38 | % Draw pendulum 39 | pendulum = [0, 0; l*sin(theta), -l*cos(theta)]; 40 | clf; hold on 41 | plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4) 42 | 43 | % plot ellipses around tips of pendulum (if M, S exist) 44 | try 45 | if max(max(S))>0 46 | err = linspace(-1,1,100)*sqrt(S(2,2)); 47 | plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1) 48 | plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2) 49 | plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20) 50 | end 51 | catch 52 | end 53 | 54 | % Draw useful information 55 | % target location 56 | plot(0,l,'k+','MarkerSize',20); 57 | plot([xmin, xmax], [-height, -height],'k','linewidth',2) 58 | % joint 59 | plot(0,0,'k.','markersize',24) 60 | plot(0,0,'y.','markersize',14) 61 | % tip of pendulum 62 | plot(l*sin(theta),-l*cos(theta),'k.','markersize',24) 63 | plot(l*sin(theta),-l*cos(theta),'y.','markersize',14) 64 | plot(0,-2*l,'.w','markersize',0.005) 65 | % applied torque 66 | plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10); 67 | % immediate reward 68 | reward = 1-cost.fcn(cost,[0, theta]',zeros(2)); 69 | plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10); 70 | text(0,-0.5,'applied torque') 71 | text(0,-0.7,'immediate reward') 72 | if exist('text1','var') 73 | text(0,-0.9, text1) 74 | end 75 | if exist('text2','var') 76 | text(0,-1.1, text2) 77 | end 78 | 79 | set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]); 80 | axis off; 81 | drawnow; -------------------------------------------------------------------------------- /scenarios/pendulum/draw_rollout_pendulum.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_pendulum.m 2 | % *Summary:* Script to draw a trajectory of the most recent pendulum trajectory 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-03-27 8 | % 9 | %% High-Level Steps 10 | % # For each time step, plot the observed trajectory 11 | 12 | %% Code 13 | 14 | % Loop over states in trajectory 15 | for r = 1:size(xx,1) 16 | cost.t = r; 17 | if exist('j','var') && ~isempty(M{j}) 18 | draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost, ... 19 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 20 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 21 | ' sec'], M{j}(:,r), Sigma{j}(:,:,r)); 22 | else 23 | draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost, ... 24 | ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ... 25 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 26 | ' sec']) 27 | end 28 | pause(dt); 29 | end -------------------------------------------------------------------------------- /scenarios/pendulum/dynamics_pendulum.m: -------------------------------------------------------------------------------- 1 | %% dynamics_pendulum.m 2 | % *Summary:* Implements ths ODE for simulating the pendulum dynamics, where 3 | % an input torque f can be applied 4 | % 5 | % function dz = dynamics_pendulum(t,z,u) 6 | % 7 | % 8 | % *Input arguments:* 9 | % 10 | % t current time step (called from ODE solver) 11 | % z state [2 x 1] 12 | % u (optional): torque f(t) applied to pendulum 13 | % 14 | % *Output arguments:* 15 | % 16 | % dz if 3 input arguments: state derivative wrt time 17 | % 18 | % Note: It is assumed that the state variables are of the following order: 19 | % dtheta: [rad/s] angular velocity of pendulum 20 | % theta: [rad] angle of pendulum 21 | % 22 | % A detailed derivation of the dynamics can be found in: 23 | % 24 | % M.P. Deisenroth: 25 | % Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 26 | % KIT Scientific Publishing, 2010. 27 | % 28 | % 29 | % Copyright (C) 2008-2013 by 30 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 31 | % 32 | % Last modified: 2013-03-18 33 | 34 | function dz = dynamics_pendulum(t,z,u) 35 | %% Code 36 | 37 | l = 1; % [m] length of pendulum 38 | m = 1; % [kg] mass of pendulum 39 | g = 9.82; % [m/s^2] acceleration of gravity 40 | b = 0.01; % [s*Nm/rad] friction coefficient 41 | 42 | dz = zeros(2,1); 43 | dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3); 44 | dz(2) = z(1); -------------------------------------------------------------------------------- /scenarios/pendulum/pendulum_learn.m: -------------------------------------------------------------------------------- 1 | %% pendulum_learn.m 2 | % *Summary:* Script to learn a controller for the pendulum swingup 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-03-27 8 | % 9 | %% High-Level Steps 10 | % # Load parameters 11 | % # Create J initial trajectories by applying random controls 12 | % # Controlled learning (train dynamics model, policy learning, policy 13 | % application) 14 | 15 | %% Code 16 | 17 | % 1. Initialization 18 | clear all; close all; 19 | settings_pendulum; % load scenario-specific settings 20 | basename = 'pendulum_'; % filename used for saving data 21 | 22 | % 2. Initial J random rollouts 23 | for jj = 1:J 24 | [xx, yy, realCost{jj}, latent{jj}] = ... 25 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); 26 | x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model 27 | if plotting.verbosity > 0; % visualization of trajectory 28 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 29 | draw_rollout_pendulum; 30 | end 31 | end 32 | 33 | mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; 34 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); 35 | 36 | % 3. Controlled learning (N iterations) 37 | for j = 1:N 38 | trainDynModel; % train (GP) dynamics model 39 | learnPolicy; % learn policy 40 | applyController; % apply controller to system 41 | disp(['controlled trial # ' num2str(j)]); 42 | if plotting.verbosity > 0; % visualization of trajectory 43 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 44 | draw_rollout_pendulum; 45 | end 46 | end -------------------------------------------------------------------------------- /scenarios/unicycle/augment_unicycle.m: -------------------------------------------------------------------------------- 1 | %% augment_unicycle.m 2 | % *Summary:* The function computes the $(x,y)$ velocities of the contact point 3 | % in both absolute and unicycle coordinates as well as the the unicycle 4 | % coordinates of the contact point themselves. 5 | % 6 | % function r = augment(s) 7 | % 8 | % *Input arguments:* 9 | % 10 | % s state of the unicycle (including the torques). [1 x 18] 11 | % The state is assumed to be given as follows: 12 | % dx empty (to be filled by this function) 13 | % dy empty (to be filled by this function) 14 | % dxc empty (to be filled by this function) 15 | % dyc empty (to be filled by this function) 16 | % dtheta roll angular velocity 17 | % dphi yaw angular velocity 18 | % dpsiw wheel angular velocity 19 | % dpsif pitch angular velocity 20 | % dpsit turn table angular velocity 21 | % x x position 22 | % y y position 23 | % xc empty (to be filled by this function) 24 | % yc empty (to be filled by this function) 25 | % theta roll angle 26 | % phi yaw angle 27 | % psiw wheel angle 28 | % psif pitch angle 29 | % psit turn table angle 30 | % 31 | % *Output arguments:* 32 | % 33 | % r additional variables that are computed based on s: [1 x 6] 34 | % dx x velocity of contact point (global coordinates) 35 | % dy y velocity of contact point (global coordinates) 36 | % dxc x velocity of contact point (unicycle coordinates) 37 | % dyc y velocity of contact point (unicycle coordinates) 38 | % xc x position of contact point (unicycle coordinates) 39 | % yc y position of contact point (unicycle coordinates) 40 | % 41 | % 42 | % Copyright (C) 2008-2013 by 43 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 44 | % 45 | % Last modified: 2013-03-27 46 | 47 | 48 | function r = augment_unicycle(s) 49 | %% Code 50 | 51 | rw = 0.225; % wheel radius in meters 52 | 53 | % x velocity of contact point (global coordinates) 54 | r(1) = rw*cos(s(15))*s(7); 55 | % y velocity of contact point (global coordinates) 56 | r(2) = rw*sin(s(15))*s(7); 57 | % (x,y) velocities of contact point (unicycle coordinates) 58 | A = -[cos(s(15)) sin(s(15)); -sin(s(15)) cos(s(15))]; 59 | dA = -s(6)*[-sin(s(15)) cos(s(15)); -cos(s(15)) -sin(s(15))]; 60 | r(3:4) = A*r(1:2)' + dA*s(10:11)'; 61 | % (x,y) coordinates of contact point (unicycle coordinates) 62 | r(5:6) = A*s(10:11)'; 63 | 64 | -------------------------------------------------------------------------------- /scenarios/unicycle/draw_rollout_unicycle.m: -------------------------------------------------------------------------------- 1 | %% draw_rollout_unicycle.m 2 | % *Summary:* Script to draw a rollout of the unicycle 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-04-04 8 | % 9 | %% High-Level Steps 10 | % # For each time step, plot the observed trajectory, the applied torques, 11 | % and the incurred cost 12 | 13 | %% Code 14 | 15 | if j > J 16 | 17 | draw_unicycle(xx, plant, plant.dt/10, cost, ... 18 | ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... 19 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 20 | ' sec']); 21 | else 22 | draw_unicycle(xx, plant, plant.dt/10, cost, ... 23 | ['(random) trial # ' num2str(jj) ', T=' num2str(H*dt) ' sec'], ... 24 | ['total experience (after this trial): ' num2str(dt*size(x,1)) ... 25 | ' sec']); 26 | end 27 | -------------------------------------------------------------------------------- /scenarios/unicycle/loss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCL-SML/pilco-matlab/a0b48b7831911837d060617903c76c22e4180d0b/scenarios/unicycle/loss.pdf -------------------------------------------------------------------------------- /scenarios/unicycle/unicycle_learn.m: -------------------------------------------------------------------------------- 1 | %% unicycle_learn.m 2 | % *Summary:* Script to learn a controller for unicycling 3 | % 4 | % Copyright (C) 2008-2013 by 5 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 6 | % 7 | % Last modified: 2013-03-27 8 | % 9 | %% High-Level Steps 10 | % # Load parameters 11 | % # Create J initial trajectories by applying random controls 12 | % # Controlled learning (train dynamics model, policy learning, policy 13 | % application) 14 | 15 | %% Code 16 | 17 | 18 | % 1. Initialization 19 | clear all; close all; 20 | settings_unicycle; % load scenario-specific settings 21 | basename = 'unicycle_'; % filename used for saving data 22 | 23 | % 2. Initial J random rollouts 24 | for jj = 1:J % get the first observations 25 | [xx, yy, realCost{jj}, latent{jj}] = ... 26 | rollout(gaussian(mu0, S0), struct('maxU',policy.maxU/5), H, plant, cost); 27 | x = [x; xx]; y = [y; yy]; 28 | if plotting.verbosity > 0; % visualization of trajectory 29 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 30 | draw_rollout_unicycle; 31 | end 32 | end 33 | 34 | z(odei,:) = bsxfun(@plus, mu0, chol(S0)'*randn(length(odei),1000)); % compute 35 | for i = 1:size(z,2), z(augi,i) = plant.augment(z(:,i)'); end % the distribution 36 | mu0Sim = mean(z,2); S0Sim = cov(z'); % of augmented start state by MCMC 37 | mu0Sim(odei) = mu0; S0Sim(odei,odei) = S0; % Put in known correct values 38 | mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); clear z i; 39 | 40 | % 3. Controlled learning (N iterations) 41 | for j = 1:N 42 | trainDynModel; 43 | learnPolicy; 44 | applyController; 45 | disp(['controlled trial # ' num2str(j)]); 46 | if plotting.verbosity > 0; % visualization of trajectory 47 | if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); 48 | draw_rollout_unicycle; 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /test/checkgrad.m: -------------------------------------------------------------------------------- 1 | %% checkgrad.m 2 | % *Summary:* checkgrad checks the derivatives in a function, by comparing them 3 | % to finite differences approximations. The partial derivatives and the 4 | % approximation are printed and the norm of the difference divided by the 5 | % norm of the sum is returned as an indication of accuracy. 6 | % 7 | % function [d dy dh] = checkgrad(f, X, e, varargin) 8 | % 9 | % 10 | % *Input arguments:* 11 | % 12 | % f function handle to function that needs to be checked. 13 | % The function f should be of the type [fX, dfX] = f(X, varargin) 14 | % where fX is the function value and dfX is the gradient of fX 15 | % with respect to the parameters X 16 | % X parameters (can be a vector or a struct) 17 | % e small perturbation used for finite differences (1e-4 is good) 18 | % varargin other arguments that are passed on to the function f 19 | % 20 | % 21 | % *Output arguments:* 22 | % 23 | % d relative error of analytical vs. finite difference gradient 24 | % dy analytical gradient 25 | % dh finite difference gradient 26 | % 27 | % 28 | % Copyright (C) 2008-2013 by 29 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 30 | % 31 | % Last modified: 2013-03-21 32 | % 33 | %% High-Level Steps 34 | % # Analytical gradient 35 | % # Numerical gradient via finite differences 36 | % # Relative error 37 | 38 | function [d dy dh] = checkgrad(f, X, e, varargin) 39 | %% Code 40 | 41 | % 1. Analytical gradient 42 | Z = unwrap(X); NZ = length(Z); % number of input variables 43 | [y dy] = feval(f, X, varargin{:}); % get the partial derivatives dy 44 | [D E] = size(y); y = y(:); Ny = length(y); % number of output variables 45 | if iscell(dy) || isstruct(dy); dy = unwrap(dy); end; 46 | dy = reshape(dy,Ny,NZ); 47 | 48 | % 2. Finite difference approximation 49 | dh = zeros(Ny,NZ); 50 | for j = 1:NZ 51 | dx = zeros(length(Z),1); 52 | dx(j) = dx(j) + e; % perturb a single dimension 53 | y2 = feval(f, rewrap(X,Z+dx), varargin{:}); 54 | y1 = feval(f, rewrap(X,Z-dx), varargin{:}); 55 | dh(:,j) = (y2(:) - y1(:))/(2*e); 56 | end 57 | 58 | % 3. Compute error 59 | % norm of diff divided by norm of sum 60 | d = sqrt(sum((dh-dy).^2,2)./sum((dh+dy).^2,2)); 61 | small = max(abs([dy dh]),[],2) < 1e-5; % small derivatives are poorly tested ... 62 | d(d > 1e-3 & small) = NaN; % ... by finite differences 63 | d = reshape(d,D,E); 64 | 65 | disp(' Analytic Numerical'); 66 | for i=1:Ny; 67 | disp([dy(i,:)' dh(i,:)']); % print the two vectors 68 | fprintf('d = %e\n\n',d(i)) 69 | end 70 | 71 | if Ny > 1; disp('For all outputs, d = '); disp(d); end; fprintf('\n'); 72 | -------------------------------------------------------------------------------- /test/valueT.m: -------------------------------------------------------------------------------- 1 | %% valueT.m 2 | % *Summary:* Test derivatives of the propagate function, which computes the 3 | % mean and the variance of the successor state distribution, assuming that the 4 | % current state is Gaussian distributed with mean m and covariance matrix 5 | % s. 6 | % 7 | % [d dy dh] = valueT(p, delta, m, s, dynmodel, policy, plant, cost, H) 8 | % 9 | % 10 | % *Input arguments:* 11 | % 12 | % p policy parameters (can be a structure) 13 | % .<> fields that contain the policy parameters (nothing else) 14 | % m mean of the input distribution 15 | % s covariance of the input distribution 16 | % dynmodel GP dynamics model (structure) 17 | % policy policy structure 18 | % plant plant structure 19 | % cost cost structure 20 | % H prediction horizon 21 | % delta (optional) finite difference parameter. Default: 1e-4 22 | % 23 | % 24 | % *Output arguments:* 25 | % 26 | % dd relative error of analytical vs. finite difference gradient 27 | % dy analytical gradient 28 | % dh finite difference gradient 29 | % 30 | % Copyright (C) 2008-2013 by 31 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 32 | % 33 | % Last modified: 2013-03-21 34 | 35 | function [d dy dh] = valueT(p, m, s, dynmodel, policy, plant, cost, H, delta) 36 | %% Code 37 | 38 | if nargin < 9; delta = 1e-4; end 39 | if nargin < 8; H = 4; end 40 | 41 | % call checkgrad directly 42 | [d dy dh] = checkgrad('value',p,delta,m,s,dynmodel,policy,plant,cost,H); 43 | -------------------------------------------------------------------------------- /util/gSat.m: -------------------------------------------------------------------------------- 1 | %% gSat.m 2 | % *Summary:* Compute moments of the saturating function 3 | % $e*(9*\sin(x(i))+\sin(3*x(i)))/8$, 4 | % where $x \sim\mathcal N(m,v)$ and $i$ is a (possibly empty) set of $I$ 5 | % indices. The optional scaling factor $e$ is a vector of length $I$. 6 | % Optionally, compute derivatives of the moments. 7 | % 8 | % function [M, S, C, dMdm, dSdm, dCdm, dMdv, dSdv, dCdv] = gSat(m, v, i, e) 9 | % 10 | % *Input arguments:* 11 | % 12 | % m mean vector of Gaussian [ d ] 13 | % v covariance matrix [ d x d ] 14 | % i vector of indices of elements to augment [ I x 1 ] 15 | % e (optional) scale vector; default: 1 [ I x 1 ] 16 | % 17 | % *Output arguments:* 18 | % 19 | % M output means [ I ] 20 | % V output covariance matrix [ I x I ] 21 | % C inv(v) times input-output covariance [ d x I ] 22 | % dMdm derivatives of M w.r.t m [ I x d ] 23 | % dVdm derivatives of V w.r.t m [I*^2 x d ] 24 | % dCdm derivatives of C w.r.t m [d*I x d ] 25 | % dMdv derivatives of M w.r.t v [ I x d^2] 26 | % dVdv derivatives of V w.r.t v [I^2 x d^2] 27 | % dCdv derivatives of C w.r.t v [d*I x d^2] 28 | % 29 | % 30 | % 31 | % Copyright (C) 2008-2013 by 32 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 33 | % 34 | % Last modified: 2013-03-25 35 | 36 | function [M, S, C, dMdm, dSdm, dCdm, dMdv, dSdv, dCdv] = gSat(m, v, i, e) 37 | %% Code 38 | d = length(m); I = length(i); i = i(:)'; 39 | if nargin < 4; e = ones(1, I); end; e = e(:)'; 40 | 41 | P = [eye(d); 3*eye(d)]; % augment inputs 42 | ma = P*m; madm = P; 43 | va = P*v*P'; vadv = kron(P,P); va = (va+va')/2; 44 | 45 | % do the actual augmentation with the right parameters 46 | [M2, S2, C2, Mdma, Sdma, Cdma, Mdva, Sdva, Cdva] ... 47 | = gSin(ma, va, [i d+i], [9*e e]/8); 48 | 49 | P = [eye(I) eye(I)]; Q = [eye(d) 3*eye(d)]; 50 | M = P*M2; % mean 51 | S = P*S2*P'; S = (S+S')/2; % variance 52 | C = Q*C2*P'; % inv(v) times input-output cov 53 | 54 | if nargout > 3 % derivatives if required 55 | dMdm = P*Mdma*madm; dMdv = P*Mdva*vadv; 56 | dSdm = kron(P,P)*Sdma*madm; dSdv = kron(P,P)*Sdva*vadv; 57 | dCdm = kron(P,Q)*Cdma*madm; dCdv = kron(P,Q)*Cdva*vadv; 58 | end 59 | -------------------------------------------------------------------------------- /util/gaussian.m: -------------------------------------------------------------------------------- 1 | %% gaussian.m 2 | % *Summary:* Generate n samples from a Gaussian $p(x)=\mathcal N(m,S). 3 | % Sampling is based on the Cholesky factorization of the covariance matrix S 4 | % 5 | % function x = gaussian(m, S, n) 6 | % 7 | % *Input arguments:* 8 | % 9 | % m mean of Gaussian [D x 1] 10 | % S covariance matrix of Gaussian [D x D] 11 | % n (optional) number of samples; default: n=1 12 | % 13 | % *Output arguments:* 14 | % 15 | % x matrix of samples from Gaussian [D x n] 16 | % 17 | % 18 | % Copyright (C) 2008-2013 by 19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 20 | % 21 | % Last modified: 2013-03-21 22 | 23 | function x = gaussian(m, S, n) 24 | %% Code 25 | 26 | if nargin < 3; n = 1; end 27 | 28 | x = bsxfun(@plus, m(:), chol(S)'*randn(size(S,2),n)); 29 | -------------------------------------------------------------------------------- /util/maha.m: -------------------------------------------------------------------------------- 1 | %% maha.m 2 | % *Summary:* Point-wise squared Mahalanobis distance (a-b)*Q*(a-b)'. 3 | % Vectors are row-vectors 4 | % 5 | % function K = maha(a, b, Q) 6 | % 7 | % *Input arguments:* 8 | % 9 | % a matrix containing n row vectors [n x D] 10 | % b matrix containing n row vectors [n x D] 11 | % Q weight matrix. Default: eye(D) [D x D] 12 | % 13 | % 14 | % *Output arguments:* 15 | % K point-wise squared distances [n x n] 16 | % 17 | % Copyright (C) 2008-2013 by 18 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 19 | % 20 | % Last modified: 2013-03-21 21 | 22 | function K = maha(a, b, Q) 23 | %% Code 24 | 25 | if nargin == 2 % assume unit Q 26 | K = bsxfun(@plus,sum(a.*a,2),sum(b.*b,2)')-2*a*b'; 27 | else 28 | aQ = a*Q; K = bsxfun(@plus,sum(aQ.*a,2),sum(b*Q.*b,2)')-2*aQ*b'; 29 | end -------------------------------------------------------------------------------- /util/rewrap.m: -------------------------------------------------------------------------------- 1 | %% rewrap.m 2 | % *Summary:* Map the numerical elements in the vector $v$ onto the variables 3 | % $s$, which can be of any type. The number of numerical elements must match; 4 | % on exit, $v$ should be empty. Non-numerical entries are just copied. 5 | % See also the reverse unwrap.m. 6 | % 7 | % [s v] = rewrap(s, v) 8 | % 9 | % *Input arguments:* 10 | % 11 | % s structure, cell, or numeric values 12 | % v structure, cell, or numeric values 13 | % 14 | % 15 | % *Output arguments:* 16 | % 17 | % s structure, cell, or numeric values 18 | % v [empty] 19 | % 20 | % 21 | % Copyright (C) 2008-2013 by 22 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 23 | % 24 | % Last modified: 2013-03-25 25 | 26 | function [s v] = rewrap(s, v) 27 | %% Code 28 | 29 | if isnumeric(s) 30 | if numel(v) < numel(s) 31 | error('The vector for conversion contains too few elements') 32 | end 33 | s = reshape(v(1:numel(s)), size(s)); % numeric values are reshaped 34 | v = v(numel(s)+1:end); % remaining arguments passed on 35 | elseif isstruct(s) 36 | [s p] = orderfields(s); p(p) = 1:numel(p); % alphabetize, store ordering 37 | [t v] = rewrap(struct2cell(s), v); % convert to cell, recurse 38 | s = orderfields(cell2struct(t,fieldnames(s),1),p); % conv to struct, reorder 39 | elseif iscell(s) 40 | for i = 1:numel(s) % cell array elements are handled sequentially 41 | [s{i} v] = rewrap(s{i}, v); 42 | end 43 | end -------------------------------------------------------------------------------- /util/solve_chol.c: -------------------------------------------------------------------------------- 1 | /* solve_chol - solve a linear system A*X = B using the cholesky factorization 2 | of A (where A is square, symmetric and positive definite. 3 | 4 | Copyright (c) 2004 Carl Edward Rasmussen. 2004-10-19. */ 5 | 6 | #include "mex.h" 7 | #include "math.h" 8 | #include "string.h" 9 | #include "lapack.h" 10 | 11 | 12 | 13 | /*extern int dpotrs_(char *, int *, int *, double *, int *, double *, int *, int *);*/ 14 | 15 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 16 | { 17 | double *C; 18 | mwSignedIndex n, m, q; 19 | 20 | if (nrhs != 2 || nlhs > 1) /* check the input */ 21 | mexErrMsgTxt("Usage: X = solve_chol(R, B)"); 22 | n = mxGetN(prhs[0]); 23 | if (n != mxGetM(prhs[0])) 24 | mexErrMsgTxt("Error: First argument matrix must be square"); 25 | if (n != mxGetM(prhs[1])) 26 | mexErrMsgTxt("Error: First and second argument matrices must have same number of rows"); 27 | m = mxGetN(prhs[1]); 28 | 29 | plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */ 30 | C = mxGetPr(plhs[0]); 31 | 32 | if (n==0) return; /* if argument was empty matrix, do no more */ 33 | memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double)); /* copy argument matrix */ 34 | dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q); /* solve system */ 35 | if (q > 0) 36 | mexErrMsgTxt("Error: illegal input to solve_chol"); 37 | } 38 | -------------------------------------------------------------------------------- /util/solve_chol.m: -------------------------------------------------------------------------------- 1 | %% solve_chol.m 2 | % solve_chol - solve linear equations from the Cholesky factorization. 3 | % Solve A*X = B for X, where A is square, symmetric, positive definite. The 4 | % input to the function is R the Cholesky decomposition of A and the matrix B. 5 | % Example: X = solve_chol(chol(A),B); 6 | % 7 | % NOTE: The program code is written in the C language for efficiency and is 8 | % contained in the file solve_chol.c, and should be compiled using matlabs mex 9 | % facility. However, this file also contains a (less efficient) matlab 10 | % implementation, supplied only as a help to people unfamiliar with mex. If 11 | % the C code has been properly compiled and is avaiable, it automatically 12 | % takes precendence over the matlab code in this file. 13 | % 14 | % Copyright (c) 2004, 2005, 2006 by Carl Edward Rasmussen. 2006-02-08. 15 | 16 | %% Code 17 | function x = solve_chol(A, B) 18 | 19 | if nargin ~= 2 | nargout > 1 20 | error('Wrong number of arguments.'); 21 | end 22 | 23 | if size(A,1) ~= size(A,2) | size(A,1) ~= size(B,1) 24 | error('Wrong sizes of matrix arguments.'); 25 | end 26 | 27 | x = A\(A'\B); 28 | -------------------------------------------------------------------------------- /util/sq_dist.c: -------------------------------------------------------------------------------- 1 | /* sq_dist - a mex function to compute a matrix of all pairwise squared 2 | distances between two sets of vectors, stored in the columns of the two 3 | matrices that are arguments to the function. The length of the vectors must 4 | agree. If only a single argument is given, the missing argument is taken to 5 | be identical to the first. If an optional third matrix argument Q is given, 6 | it must be of the same size as the output, but in this case a vector of the 7 | traces of the product of Q and the coordinatewise squared distances is 8 | returned. 9 | 10 | Copyright (c) 2003, 2004 Carl Edward Rasmussen. 2003-04-22. */ 11 | 12 | #include "mex.h" 13 | #include 14 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 15 | { 16 | double *a, *b, *C, *Q, z, t; 17 | int D, n, m, i, j, k; 18 | if (nrhs < 1 || nrhs > 3 || nlhs > 1) 19 | mexErrMsgTxt("Usage: C = sq_dist(a,b)\n or: C = sq_dist(a)\n or: c = sq_dist(a,b,Q)\nwhere the b matrix may be empty."); 20 | a = mxGetPr(prhs[0]); 21 | m = mxGetN(prhs[0]); 22 | D = mxGetM(prhs[0]); 23 | if (nrhs == 1 || mxIsEmpty(prhs[1])) { 24 | b = a; 25 | n = m; 26 | } else { 27 | b = mxGetPr(prhs[1]); 28 | n = mxGetN(prhs[1]); 29 | if (D != mxGetM(prhs[1])) 30 | mexErrMsgTxt("Error: column lengths must agree"); 31 | } 32 | if (nrhs < 3) { 33 | plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL); 34 | C = mxGetPr(plhs[0]); 35 | for (i=0; i 3 | nargout > 1 33 | error('Wrong number of arguments.'); 34 | end 35 | 36 | if nargin == 1 | isempty(b) % input arguments are taken to be 37 | b = a; % identical if b is missing or empty 38 | end 39 | 40 | [D, n] = size(a); 41 | [d, m] = size(b); 42 | if d ~= D 43 | error('Error: column lengths must agree.'); 44 | end 45 | 46 | if nargin < 3 47 | C = zeros(n,m); 48 | for d = 1:D 49 | C = C + (repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2; 50 | end 51 | % C = repmat(sum(a.*a)',1,m)+repmat(sum(b.*b),n,1)-2*a'*b could be used to 52 | % replace the 3 lines above; it would be faster, but numerically less stable. 53 | else 54 | if [n m] == size(Q) 55 | C = zeros(D,1); 56 | for d = 1:D 57 | C(d) = sum(sum((repmat(b(d,:), n, 1) - repmat(a(d,:)', 1, m)).^2.*Q)); 58 | end 59 | else 60 | error('Third argument has wrong size.'); 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /util/squash.tex: -------------------------------------------------------------------------------- 1 | \documentclass[11pt, a4paper]{article} 2 | \renewcommand{\rmdefault}{psbx} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{textcomp} 6 | \usepackage{eulervm} 7 | \usepackage{amsmath} 8 | \usepackage{amssymb} 9 | \usepackage{wrapfig} 10 | 11 | \setlength{\textwidth}{160mm} 12 | \setlength{\oddsidemargin}{0mm} 13 | \setlength{\parindent}{0 mm} 14 | 15 | \renewcommand{\vec}{\boldsymbol} 16 | \newcommand{\mat}{\boldsymbol} 17 | \newcommand{\E}{{\mathbb E}} 18 | \newcommand{\V}{{\mathbb V}} 19 | 20 | \title{Policy Squashing} 21 | \author{Marc Peter Deisenroth} 22 | \date{\today} 23 | 24 | 25 | \begin{document} 26 | \maketitle 27 | 28 | 29 | We consider a third-order Fourier series expansion 30 | % 31 | \begin{align*} 32 | s(x) = a\sin(x) + b\sin(3x) 33 | \end{align*} 34 | % 35 | % 36 | % \begin{wrapfigure}{r}{0.5\hsize} 37 | % \vspace{-5mm} 38 | % \centering 39 | % \includegraphics[width = \hsize]{./figures/squashing_function} 40 | % \caption{Different squashing functions.} 41 | % \label{fig:squashing function} 42 | % \end{wrapfigure} 43 | % 44 | of a trapezoidal wave with modified Fourier coefficients $a$ and $b$ 45 | that satisfy the following constraints: First, when restricted to the 46 | interval $[-\tfrac{\pi}{2}, \tfrac{\pi}{2}]$, $s(x)$ is monotonically 47 | increasing, i.e., $s^\prime(x) \geq 0$ with $s^\prime(x)=0$ if and 48 | only if $x\in\{-\tfrac{\pi}{2}, \tfrac{\pi}{2}\}$. Second, at the 49 | boundaries $\pm\tfrac{\pi}{2}$, we require that $s(\tfrac{\pi}{2}) = 50 | 1$ and $s(-\tfrac{\pi}{2}) = -1$. To closely approximate a (normalized) 51 | trapezoidal wave, we additionally require that $s$ has stationary 52 | points at the boundaries $\pm\tfrac{\pi}{2}$, i.e., 53 | $s^{\prime\prime}(\pm\tfrac{\pi}{2}) = 0$. The first and second 54 | derivatives of $s(x)$ are 55 | % 56 | \begin{align*} 57 | s^\prime(x) &= a\cos (x) + 3b\cos(3x)\,,\\ 58 | s^{\prime\prime}(x) &= -a\sin(x) - 9b\sin(3x)\,. 59 | \end{align*} 60 | % 61 | From $s^\prime(\pm\tfrac{\pi}{2})=0$, we obtain $a = 1+b$. For 62 | $b>\tfrac{1}{8}$, $s(x)$ is no longer monotonically increasing in 63 | $[-\tfrac{\pi}{2}, \tfrac{\pi}{2}]$: The first derivative 64 | $s^\prime(x)$ crosses 0 for $b=-\tfrac{\cos x}{\cos x + 65 | 3\cos(3x)}$. For $b>\tfrac{1}{8}$, $s(x)$ has a local optimum for 66 | $x\in(-\tfrac{\pi}{2},\tfrac{\pi}{2})$. Therefore, 67 | $b\in[0,\tfrac{1}{8}$ are valid choices. Using now any of the 68 | stationary point constraints yields $s^{\prime\prime}(-\tfrac{\pi}{2}) 69 | = 1+b - 9b = 0 \Leftrightarrow b = \tfrac{1}{8}$ and $a = 1+b = 70 | \tfrac{9}{8}$. Hence, our squashing function is 71 | % 72 | \begin{align*} 73 | s(x) = \tfrac{9}{8}\sin(x) + \tfrac{1}{8}\sin(3x)\,. 74 | \end{align*} 75 | % 76 | For $b=0$, the squashing function equals the 77 | sine wave (red), the black dashed curve is the squashing function for 78 | $b=\tfrac{1}{20}$. However, only for $b=\tfrac{1}{8}$, the squashing 79 | function possesses stationary points at the boundaries. 80 | 81 | 82 | \end{document} 83 | 84 | %%% Local Variables: 85 | %%% mode: latex 86 | %%% TeX-master: t 87 | %%% End: 88 | -------------------------------------------------------------------------------- /util/unwrap.m: -------------------------------------------------------------------------------- 1 | %% unwrap.m 2 | % *Summary:* Extract the numerical values from $s$ into the column vector $v$. 3 | % The variable $sS can be of any type, including struct and cell array. 4 | % Non-numerical elements are ignored. See also the reverse rewrap.m. 5 | % 6 | % v = unwrap(s) 7 | % 8 | % *Input arguments:* 9 | % 10 | % s structure, cell, or numeric values 11 | % 12 | % 13 | % *Output arguments:* 14 | % 15 | % v structure, cell, or numeric values 16 | % 17 | % 18 | % Copyright (C) 2008-2013 by 19 | % Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. 20 | % 21 | % Last modified: 2013-03-25 22 | 23 | function v = unwrap(s) 24 | %% Code 25 | 26 | v = []; 27 | if isnumeric(s) 28 | v = s(:); % numeric values are recast to column vector 29 | elseif isstruct(s) 30 | v = unwrap(struct2cell(orderfields(s))); % alphabetize, conv to cell, recurse 31 | elseif iscell(s) 32 | for i = 1:numel(s) % cell array elements are handled sequentially 33 | v = [v; unwrap(s{i})]; 34 | end 35 | end --------------------------------------------------------------------------------