├── .gitignore ├── README.md ├── ae4803-hw-1-report-v1.1.pdf ├── cart-pole.gif └── src ├── CartPoleDynamics.m ├── Cost.m ├── Dynamics.m ├── InvertedPendulumDynamics.m ├── QuadraticCost.m ├── ddp.m ├── main_cart_pole.m └── main_inverted_pendulum.m /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.m~ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Differential Dynamic Programming Simulation Using MATLAB 2 | 3 | This code is a MATLAB implementation of the **Differential Dynamic Programming** (DDP) algorithm, implemented in **MATLAB**, and utilized to control a simulated inverted pendulum and a simulated cart-pole system. This project is for the AE4803-ROB class, *Robotic Systems and Autonomy*, taught by Prof. Evangelos Theodorou at the Georgia Institute of Technology during the spring 2020 semester. 4 | 5 | The DDP algorithm is a powerful method for optimal control, that can be used to control many different types of systems from a variety of application domains. It is for this reason that the approach to the implementation in this code was to create a DDP routine that can be used for any system without the need to change any of the actual algorithm. The two things that chage between problems that could utilize DDP are: 6 | 7 | 1. Dynamics of the system 8 | 2. Cost functions used in the optimization 9 | 10 | The approach to ensuring that this code can effectively used for many different problems efficiently is abstracting these two elements and create abstract classes for both the dynamics and the cost functions. These classes are defined in [`Dynamics.m`](src/Dynamics.m) and [`Cost.m`](src/Cost.m) as `Dynamics` and `Cost`, respectively. This way, anyone who wants to apply the DDP algorithm to their problem of interest need only write: 11 | 12 | 1. A class that inherits `Dynamics` and implements its abstract methods (e.g. `CartPoleDynamics` in [`CartPoleDynamics.m`](src/CartPoleDynamics.m)) 13 | 2. A class that inherits `Cost` and implements its abstract methods (e.g. `QuadraticCost` in [`QuadraticCost.m`](src/QuadraticCost.m)) 14 | 15 | The code that performs the actual DDP procedure to generate an optimal control sequence is implemented as a routine in [`ddp.m`](src/ddp.m), and does not need to be changed at all to use DDP for a specific problem. 16 | 17 |

18 | -------------------------------------------------------------------------------- /ae4803-hw-1-report-v1.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitreyakv/ddp-simulation/b73c4c13bf560b5a1cc720977fb5d08e53776f5a/ae4803-hw-1-report-v1.1.pdf -------------------------------------------------------------------------------- /cart-pole.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitreyakv/ddp-simulation/b73c4c13bf560b5a1cc720977fb5d08e53776f5a/cart-pole.gif -------------------------------------------------------------------------------- /src/CartPoleDynamics.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | % Class for dynamics of cart-pole system that inherits from abstract 4 | % Dynamics class. 5 | classdef CartPoleDynamics < Dynamics 6 | properties 7 | m_c; % mass of pole 8 | m_p; % mass of cart 9 | l; % length of pole 10 | end 11 | 12 | methods 13 | % Constructor 14 | function obj = CartPoleDynamics(m_c, m_p, l) 15 | obj.m_c = m_c; 16 | obj.m_p = m_p; 17 | obj.l = l; 18 | end 19 | 20 | % Equations of motion in state space representation 21 | function dxdt = F(obj, x, u) 22 | s = sin(x(3)); 23 | c = cos(x(3)); 24 | 25 | lambda = obj.m_c + obj.m_p .* s.^2; 26 | psi = u + obj.m_p .* s .* (obj.l .* x(4).^2 + ... 27 | 9.81 .* c); 28 | 29 | eta = -u .* c - obj.m_p .* obj.l .* x(4).^2 ... 30 | .* c .* s - (obj.m_c + obj.m_p) ... 31 | .* 9.81 .* s; 32 | 33 | z_ddot = psi ./ lambda; 34 | theta_ddot = eta ./ (obj.l .* lambda); 35 | 36 | dxdt = [x(2); z_ddot; x(4); theta_ddot]; 37 | end 38 | 39 | % Linearized equations of motion (determined using CAS) 40 | function Phi = Phi(obj, x, u, dt) 41 | F_x = zeros(numel(x)); 42 | 43 | s = sin(x(3)); 44 | c = cos(x(3)); 45 | 46 | F_x(1,1) = 0.0; 47 | F_x(1,2) = 1.0; 48 | F_x(1,3) = 0.0; 49 | F_x(1,4) = 0.0; 50 | 51 | F_x(2,1) = 0.0; 52 | F_x(2,2) = 0.0; 53 | F_x(2,3) = (obj.m_p .* c .* (obj.l .* x(4).^2 + ... 54 | 9.81 .* c) - 9.81 .* obj.m_p .* ... 55 | s.^2) ./ (obj.m_c + obj.m_p .* ... 56 | s.^2) - (2.0 .* obj.m_p .* c ... 57 | .* s .* (u + obj.m_p .* s ... 58 | .* (obj.l .* x(4).^2 + 9.81 .* c))) ... 59 | ./ (obj.m_c + obj.m_p .* s.^2)^2; 60 | F_x(2,4) = (2.0 .* obj.l .* obj.m_p .* x(4) .* s) ... 61 | ./ (obj.m_c + obj.m_p .* s.^2); 62 | 63 | F_x(3,1) = 0.0; 64 | F_x(3,2) = 0.0; 65 | F_x(3,3) = 0.0; 66 | F_x(3,4) = 1.0; 67 | 68 | F_x(4,1) = 0.0; 69 | F_x(4,2) = 0.0; 70 | F_x(4,3) = (u .* s - 9.81 .* c .* ... 71 | (obj.m_c + obj.m_p) - obj.l .* obj.m_p ... 72 | .* x(4).^2 .* c.^2 + obj.l .* obj.m_p ... 73 | .* x(4)^2 .* s.^2) ./ (obj.l ... 74 | .* (obj.m_c + obj.m_p .* s.^2)) ... 75 | + (2.0 .* obj.m_p .* c .* s ... 76 | .* (obj.l .* obj.m_p .* c .* s ... 77 | .* x(4).^2 + u .* c + 9.81 ... 78 | .* s .* (obj.m_c + obj.m_p))) ./ (obj.l ... 79 | .* (obj.m_c + obj.m_p .* s.^2)^2); 80 | F_x(4,4) = -(2.0 .* obj.m_p .* x(4) .* c ... 81 | .* s) ./ (obj.m_c + obj.m_p .* s^2); 82 | 83 | Phi = eye(numel(x)) + F_x .* dt; 84 | end 85 | function beta = beta(obj, x, ~, dt) 86 | F_u = zeros(numel(x), 1); 87 | 88 | F_u(1) = 0.0; 89 | F_u(2) = 1.0 ./ (obj.m_c + obj.m_p .* sin(x(3)).^2); 90 | F_u(3) = 0.0; 91 | F_u(4) = -cos(x(3)) ./ (obj.l .* (obj.m_c + obj.m_p ... 92 | .* sin(x(3)).^2)); 93 | 94 | beta = F_u .* dt; 95 | end 96 | end 97 | end 98 | 99 | -------------------------------------------------------------------------------- /src/Cost.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | % Abstract class for terminal and running costs 4 | classdef (Abstract) Cost 5 | methods 6 | % Terminal state cost 7 | phi = phi(obj, x_f, x_star); 8 | 9 | % Terminal state cost derivatives 10 | phi_x = phi_x(obj, x_f, x_star); 11 | phi_xx = phi_xx(obj, x_f, x_star); 12 | 13 | % Running cost 14 | L = L(obj, x, u, dt); 15 | 16 | % Running cost derivatives 17 | L_x = L_x(obj, x, u, dt); 18 | L_u = L_u(obj, x, u, dt); 19 | L_xx = L_xx(obj, x, u, dt); 20 | L_uu = L_uu(obj, x, u, dt); 21 | L_xu = L_xu(obj, x, u, dt); 22 | L_ux = L_ux(obj, x, u, dt); 23 | end 24 | end 25 | 26 | -------------------------------------------------------------------------------- /src/Dynamics.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | % Abstract class for system dynamics 4 | classdef (Abstract) Dynamics 5 | methods 6 | % Equations of motion in state space representation 7 | dxdt = F(obj, x, u) 8 | 9 | % Linearized equations of motion 10 | Phi = Phi(obj, x, u, dt); 11 | beta = beta(obj, x, u, dt); 12 | end 13 | end 14 | 15 | -------------------------------------------------------------------------------- /src/InvertedPendulumDynamics.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | % Class for dynamics of inverted pendulum system that inherits from 4 | % abstract Dynamics class. 5 | classdef InvertedPendulumDynamics < Dynamics 6 | properties 7 | m; % mass pendulum 8 | l; % length of pendulum 9 | b; % damping coefficient 10 | I; % moment of intertia of pendulum 11 | end 12 | 13 | methods 14 | % Constructor 15 | function obj = InvertedPendulumDynamics(m, l, b) 16 | obj.m = m; 17 | obj.l = l; 18 | obj.b = b; 19 | obj.I = m .* l.^2; 20 | end 21 | 22 | % Equations of motion in state space representation 23 | function dxdt = F(obj, x, u) 24 | theta_ddot = (u - obj.b .* x(2) - obj.m .* 9.81 ... 25 | .* obj.l .* sin(x(1))) ./ obj.I; 26 | 27 | dxdt = [x(2); theta_ddot]; 28 | end 29 | 30 | % Linearized equations of motion (determined using symbolic math) 31 | function Phi = Phi(obj, x, ~, dt) 32 | F_x = zeros(numel(x)); 33 | 34 | F_x(1,1) = 0.0; 35 | F_x(1,2) = 1.0; 36 | 37 | F_x(2,1) = -(9.81 .* obj.l .* obj.m .* cos(x(1))) ./ obj.I; 38 | F_x(2,2) = -obj.b ./ obj.I; 39 | 40 | Phi = eye(numel(x)) + F_x .* dt; 41 | end 42 | function beta = beta(obj, x, ~, dt) 43 | F_u = zeros(numel(x), 1); 44 | 45 | F_u(1) = 0.0; 46 | F_u(2) = 1 ./ obj.I; 47 | 48 | beta = F_u .* dt; 49 | end 50 | end 51 | end 52 | 53 | -------------------------------------------------------------------------------- /src/QuadraticCost.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | % Class for quadratic terminal and running costs that inherits from 4 | % abstract Cost class 5 | classdef QuadraticCost < Cost 6 | properties 7 | Q_f; % terminal cost weights 8 | R; % control cost weights 9 | end 10 | 11 | methods 12 | % Constructor 13 | function obj = QuadraticCost(Q_f, R) 14 | obj.Q_f = Q_f; 15 | obj.R = R; 16 | end 17 | 18 | % Terminal state cost 19 | function phi = phi(obj, x_f, x_star) 20 | phi = 0.5 .* (x_f - x_star).' * obj.Q_f * (x_f - x_star); 21 | end 22 | 23 | % Terminal state cost derivatives 24 | function phi_x = phi_x(obj, x_f, x_star) 25 | phi_x = obj.Q_f * (x_f - x_star); 26 | end 27 | function phi_xx = phi_xx(obj, ~, ~) 28 | phi_xx = obj.Q_f; 29 | end 30 | 31 | % Running cost 32 | function L = L(obj, ~, u, dt) 33 | L = (0.5 .* u.' * obj.R * u) .* dt; 34 | end 35 | 36 | % Running cost derivatives 37 | function L_x = L_x(~, x, ~, ~) 38 | L_x = zeros(numel(x), 1); 39 | end 40 | function L_u = L_u(obj, ~, u, dt) 41 | L_u = (obj.R * u) .* dt; 42 | end 43 | function L_xx = L_xx(~, x, ~, ~) 44 | L_xx = zeros(numel(x)); 45 | end 46 | function L_uu = L_uu(obj, ~, ~, dt) 47 | L_uu = obj.R .* dt; 48 | end 49 | function L_xu = L_xu(~, x, u, ~) 50 | L_xu = zeros(numel(x), numel(u)); 51 | end 52 | function L_ux = L_ux(~, x, u, ~) 53 | L_ux = zeros(numel(u), numel(x)); 54 | end 55 | end 56 | end 57 | 58 | -------------------------------------------------------------------------------- /src/ddp.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | %% Differential Dynamic Programming Algorithm Function 4 | 5 | % Performs Differential Dynamic Programming Algorithm to produce a 6 | % locally optimal control sequence. 7 | % 8 | % Inputs 9 | % 10 | % x_0 : initial state vector 11 | % x_star : target state vector 12 | % t_f : time horizon 13 | % N : number of discretizations of time 14 | % dyn : instance of Dynamics class 15 | % cost : instance of Cost class 16 | % u_max : maximum magnitude of control, used for clamping 17 | % num_iter : number of DDP iterations 18 | % alpha : DDP learning parameter for line searching 19 | % 20 | % Outputs 21 | % 22 | % sol : structure with solution components 23 | % 24 | function sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha) 25 | %% Allocate arrays for DDP 26 | 27 | % Time stamp array and timestep 28 | t = linspace(0.0, t_f, N); 29 | dt = t(2) - t(1); 30 | 31 | % Cost history 32 | J = zeros(num_iter, 1); 33 | 34 | % Control energy history 35 | E = zeros(num_iter, 1); 36 | 37 | % State trajectory 38 | x = cell(N, 1); 39 | x_new = cell(N, 1); 40 | x{1} = x_0; 41 | x_new{1} = x_0; 42 | 43 | % Control Input trajectory 44 | u = cell(N, 1); 45 | 46 | % Value function and derivatives 47 | V = zeros(N, 1); 48 | V_x = cell(N, 1); 49 | V_xx = cell(N, 1); 50 | 51 | % State action value derivatives 52 | Q_x = cell(N, 1); 53 | Q_u = cell(N, 1); 54 | Q_xx = cell(N, 1); 55 | Q_uu = cell(N, 1); 56 | Q_xu = cell(N, 1); 57 | Q_ux = cell(N, 1); 58 | 59 | %% Initialize DDP with a random input sequence 60 | 61 | fprintf("initializing random control sequence...\n") 62 | 63 | % Generate random control sequence 64 | for k = 1:N-1 65 | u{k} = 2 .* u_max .* rand(length(u_max), 1) - u_max; 66 | end 67 | u{N} = zeros(numel(u_max), 1); 68 | 69 | fprintf("generating initial trajectory...\n") 70 | 71 | % Generate initial trajectory using random control sequence 72 | for k = 1:N-1 73 | x_new{k+1} = x_new{k} + dyn.F(x_new{k}, u{k}) .* dt; 74 | end 75 | 76 | %% Perform main DDP iterations on the trajectory and input sequence 77 | 78 | fprintf("beginning DDP...\n") 79 | 80 | for i = 1:num_iter 81 | fprintf("DDP iteration %d out of %d...\n", i, num_iter); 82 | 83 | % Update control sequence from previous iteration 84 | if i > 1 85 | for k = 1:N-1 86 | % Compute control update feed-forward and feed-back 87 | du_ff = -inv(Q_uu{k}) * Q_u{k}; 88 | du_fb = -inv(Q_uu{k}) * Q_ux{k} * (x_new{k} - x{k}); 89 | 90 | % Limit feed forward control modification with clamping 91 | for m = 1:numel(u_max) 92 | du_ff(m) = min(u_max(m), max(-u_max(m), ... 93 | du_ff(m) + u{k}(m))) - u{k}(m); 94 | end 95 | 96 | % Update control 97 | u{k} = u{k} + alpha .* (du_ff + du_fb); 98 | 99 | % Compute next state in trajectory with new control 100 | x_new{k+1} = x_new{k} + dyn.F(x_new{k}, u{k}) .* dt; 101 | 102 | % Return error if problem with trajectory 103 | if isnan(x_new{k+1}) 104 | sol = assemble_solution(x, u, t, J, E, Q_u, ... 105 | Q_uu, Q_ux, 1); 106 | return 107 | end 108 | end 109 | end 110 | 111 | % Update the current trajectory 112 | x = x_new; 113 | 114 | % Compute total cost 115 | J(i) = cost.phi(x{N}, x_star); 116 | for k = 1:N-1 117 | J(i) = J(i) + cost.L(x{k}, u{k}, dt); 118 | end 119 | 120 | % Compute control energy usage 121 | for k = 1:N-1 122 | E(i) = E(i) + 0.5 .* u{k}.' * u{k} .* dt; 123 | end 124 | 125 | % Compute terminal value function and derivatives 126 | V(N) = cost.phi(x{N}, x_star); 127 | V_x{N} = cost.phi_x(x{N}, x_star); 128 | V_xx{N} = cost.phi_xx(x{N}, x_star); 129 | 130 | % Perform backwards pass 131 | for k = N-1:-1:1 132 | % Compute state-action value function derivatives 133 | Q_x{k} = cost.L_x(x{k}, u{k}, dt) + ... 134 | dyn.Phi(x{k}, u{k}, dt).' * V_x{k+1}; 135 | Q_u{k} = cost.L_u(x{k}, u{k}, dt) + ... 136 | dyn.beta(x{k}, u{k}, dt).' * V_x{k+1}; 137 | Q_xx{k} = cost.L_xx(x{k}, u{k}, dt) ... 138 | + dyn.Phi(x{k}, u{k}, dt).' * V_xx{k+1} ... 139 | * dyn.Phi(x{k}, u{k}, dt); 140 | Q_uu{k} = cost.L_uu(x{k}, u{k}, dt) ... 141 | + dyn.beta(x{k}, u{k}, dt).' * V_xx{k+1} ... 142 | * dyn.beta(x{k}, u{k}, dt); 143 | Q_xu{k} = cost.L_xu(x{k}, u{k}, dt) ... 144 | + dyn.Phi(x{k}, u{k}, dt).' * V_xx{k+1} ... 145 | * dyn.beta(x{k}, u{k}, dt); 146 | Q_ux{k} = cost.L_ux(x{k}, u{k}, dt) ... 147 | + dyn.beta(x{k}, u{k}, dt).' * V_xx{k+1} ... 148 | * dyn.Phi(x{k}, u{k}, dt); 149 | 150 | % Compute the value function derivatives 151 | V_x{k} = Q_x{k} - Q_xu{k} * (Q_uu{k} \ Q_u{k}); 152 | V_xx{k} = Q_xx{k} - Q_xu{k} * (Q_uu{k} \ Q_ux{k}); 153 | end 154 | end 155 | 156 | %% Assemble and return solution structure 157 | 158 | fprintf("finished DDP, assembling results for post-processing...\n"); 159 | 160 | % Assemble solution 161 | sol = assemble_solution(x, u, t, J, E, Q_u, Q_uu, Q_ux, 0); 162 | end 163 | 164 | %% Helper Functions for DDP Algorithm 165 | 166 | % Assembles and returns solution structure 167 | % 168 | % Inputs 169 | % 170 | % x : locally optimal state trajectory 171 | % u : locally optimal control sequence 172 | % t : discretized time stamps 173 | % J : iteration history of cost function 174 | % E : iteration history of control energy 175 | % Q_u, Q_uu, Q_ux : derivatives of state action value function 176 | % error : zero if no error, nonzero else 177 | % 178 | % Outputs 179 | % 180 | % sol : solution structure 181 | function sol = assemble_solution(x, u, t, J, E, Q_u, Q_uu, Q_ux, error) 182 | 183 | % Solution structure 184 | sol = struct; 185 | sol.error = error; 186 | sol.x = x; 187 | sol.u = u; 188 | sol.t = t; 189 | sol.dt = t(2) - t(1); 190 | sol.J = J; 191 | sol.E = E; 192 | sol.Q_u = Q_u; 193 | sol.Q_uu = Q_uu; 194 | sol.Q_ux = Q_ux; 195 | 196 | return 197 | end -------------------------------------------------------------------------------- /src/main_cart_pole.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | %% Setup 4 | 5 | clc; 6 | clear; 7 | close all; 8 | rng(0); 9 | 10 | %% Cart-pendulum problem definition 11 | 12 | % Initial state 13 | x_0 = [0.0; 0.0; 0.0; 0.0]; 14 | 15 | % Target state 16 | x_star = [0.0; 0.0; pi; 0.0]; 17 | 18 | % Time horizon 19 | t_f = 2.0; 20 | 21 | % Number of states along trajectory 22 | N = floor(t_f ./ 0.01); 23 | 24 | % Maximum magnitude of control 25 | u_max = [10.0]; 26 | 27 | % Initialize dynamics 28 | fprintf("initializing cart-pole dynamics...\n") 29 | m_c = 1.0; 30 | m_p = 0.01; 31 | l = 0.25; 32 | dyn = CartPoleDynamics(m_c, m_p, l); 33 | 34 | % Initialize cost 35 | fprintf("initializing quadratic cost function...\n") 36 | Q_f = [10.0 0 0 0; 37 | 0 10.0 0 0; 38 | 0 0 10.0 0; 39 | 0 0 0 10.0]; 40 | R = 0.01; 41 | cost = QuadraticCost(Q_f, R); 42 | 43 | % Number of DDP iterations 44 | num_iter = 100; 45 | 46 | % DDP learning rate 47 | alpha = 0.1; 48 | 49 | % Video framerate 50 | fps = 30; 51 | 52 | %% Execution of DDP 53 | 54 | fprintf("executing DDP..."); 55 | 56 | tic; 57 | sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha); 58 | toc; 59 | 60 | %% Begin post-processing of solution 61 | 62 | if sol.error == 1 63 | fprintf("DIVERGENCE ERROR: try decreasing learning rate\n"); 64 | return; 65 | end 66 | 67 | % Extract the cart position and pole angle information from the solution 68 | z = zeros(1, length(sol.x)); 69 | theta = zeros(1, length(sol.x)); 70 | z_dot = zeros(1, length(sol.x)); 71 | theta_dot = zeros(1, length(sol.x)); 72 | u = zeros(1, length(sol.x)); 73 | for k = 1:N 74 | z(k) = sol.x{k}(1); 75 | theta(k) = sol.x{k}(3); 76 | z_dot(k) = sol.x{k}(2); 77 | theta_dot(k) = sol.x{k}(4); 78 | u(k) = sol.u{k}; 79 | end 80 | 81 | %% Create video of system 82 | 83 | % Turn off plot visibility temporarily during frame rendering 84 | set(0,'DefaultFigureVisible','off'); 85 | 86 | % Create figure for frames 87 | fig = figure; 88 | ax = gca; 89 | ax.NextPlot = 'ReplaceChildren'; 90 | 91 | % Compute number of frames 92 | num_frame = length(1:ceil((1.0 ./ fps) ./ sol.dt):N); 93 | 94 | % Structure to store the frames 95 | frames(num_frame) = struct('cdata',[],'colormap',[]); 96 | 97 | % Plot limits 98 | xl = []; 99 | yl = []; 100 | 101 | % Counter for frames processed 102 | n = 1; 103 | 104 | % Open video for writing 105 | %vid = VideoWriter('cart-pole', 'Uncompressed AVI'); 106 | %vid.FrameRate = fps; 107 | %open(vid); 108 | 109 | % Loop over points in time to process frames at 110 | for k = 1:ceil((1.0 ./ fps) ./ sol.dt):N 111 | % Plot the joint between cart and pole 112 | plot(z(k), 0.0, 'ro'); 113 | 114 | % Apply plot hold 115 | hold on; 116 | 117 | % Plot the cart 118 | rectangle('Position', [z(k) - 0.25 .* l, -0.1 .* l 0.5 .* l, 0.2 .* l]); 119 | 120 | % Coordinated of the tip of the pole 121 | tipz = z(k) + l .* sin(theta(k)); 122 | tipy = -l .* cos(theta(k)); 123 | 124 | % Plot the pole 125 | plot([z(k), tipz], [0.0, tipy], '-b'); 126 | 127 | % Plot the cart track 128 | plot(-100:100, -0.1 .* l .* ones(1, length(-100:100)), "-k"); 129 | 130 | % Remove plot hold 131 | hold off; 132 | 133 | % Obtain or apply plot limits for consistency across frames 134 | if k == 1 135 | xlim([-2 .* l, 2 .* l]); 136 | axis equal; 137 | xl = xlim(); 138 | yl = ylim() + 0.5 .* l; 139 | ylim(yl); 140 | else 141 | xlim(xl); 142 | ylim(yl); 143 | end 144 | 145 | % Apply blank background 146 | axis off; 147 | 148 | % Render the frame 149 | drawnow; 150 | 151 | % Record the frame and store in the structure 152 | frames(n) = getframe(fig); 153 | 154 | % Write frame to video 155 | %writeVideo(vid, frames(n)); 156 | 157 | % Display progress to command line 158 | fprintf("rendering video frame %d out of %d...\n", n, num_frame); 159 | 160 | % Increment frame counter 161 | n = n + 1; 162 | end 163 | 164 | % Close the video 165 | %close(vid); 166 | 167 | % Renable plot visibility to view movie and other plots 168 | set(0,'DefaultFigureVisible','on') 169 | 170 | % Create fresh figure object for the movie 171 | figure; 172 | 173 | % Render the movie and set to loop for a very large number of times 174 | fprintf("done rendering video frames, presenting video...\n") 175 | movie(gcf, frames, 9999, fps); 176 | 177 | %% Plot trajectories 178 | 179 | % Plot cart position history 180 | figure; 181 | pbaspect([5 3 1]) 182 | hold on; 183 | plot(sol.t, z, "k", "Linewidth", 2); 184 | grid on; 185 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 186 | ylabel("Cart Position [m]", "Interpreter", "latex", "FontSize", 20); 187 | ax = gca(); 188 | ax.FontSize = 16; 189 | ax.TickLabelInterpreter = "latex"; 190 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/z.png", "-dpng", "-r500") 191 | 192 | % Plot cart velocity history 193 | figure; 194 | pbaspect([5 3 1]) 195 | hold on; 196 | plot(sol.t, z_dot, "k", "Linewidth", 2); 197 | grid on; 198 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 199 | ylabel("Cart velocity [m/s]", "Interpreter", "latex", "FontSize", 20); 200 | ax = gca(); 201 | ax.FontSize = 16; 202 | ax.TickLabelInterpreter = "latex"; 203 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/z_dot.png", "-dpng", "-r500") 204 | 205 | % Plot pole angle history 206 | figure; 207 | pbaspect([5 3 1]) 208 | hold on; 209 | plot(sol.t, theta, "k", "Linewidth", 2); 210 | grid on; 211 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 212 | ylabel("Pole Angle [rad]", "Interpreter", "latex", "FontSize", 20); 213 | ax = gca(); 214 | ax.FontSize = 16; 215 | ax.TickLabelInterpreter = "latex"; 216 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/theta.png", "-dpng", "-r500") 217 | 218 | % Plot pole anglular velocity history 219 | figure; 220 | pbaspect([5 3 1]) 221 | hold on; 222 | plot(sol.t, theta_dot, "k", "Linewidth", 2); 223 | grid on; 224 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 225 | ylabel("Pole Anglular Velocity [rad/s]", "Interpreter", "latex", "FontSize", 20); 226 | ax = gca(); 227 | ax.FontSize = 16; 228 | ax.TickLabelInterpreter = "latex"; 229 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/theta_dot.png", "-dpng", "-r500") 230 | 231 | % Plot trajectory in partial state space 232 | figure; 233 | pbaspect([5 3 1]) 234 | hold on; 235 | plot(z, theta, "k", "Linewidth", 2); 236 | plot(x_0(1), x_0(3), 'o', "MarkerFaceColor", "blue", ... 237 | "MarkerEdgeColor", "blue"); 238 | plot(x_star(1), x_star(3), 'o', "MarkerFaceColor", "green", ... 239 | "MarkerEdgeColor", "green") 240 | grid on; 241 | xlabel("Cart Position [m]", "Interpreter", "latex", "FontSize", 20); 242 | ylabel("Pole Angular [rad]", "Interpreter", "latex", "FontSize", 20); 243 | ax = gca(); 244 | ax.FontSize = 16; 245 | ax.TickLabelInterpreter = "latex"; 246 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/traj.png", "-dpng", "-r500") 247 | 248 | % Plot control sequence 249 | figure; 250 | pbaspect([5 3 1]) 251 | hold on; 252 | plot(sol.t, u, "b", "LineWidth", 2); 253 | grid on; 254 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 255 | ylabel("Control Input [N]", "Interpreter", "latex", "FontSize", 20); 256 | ax = gca(); 257 | ax.FontSize = 16; 258 | ax.TickLabelInterpreter = "latex"; 259 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/u.png", "-dpng", "-r500") 260 | 261 | % Plot cost function vs iteration 262 | figure; 263 | pbaspect([5 3 1]) 264 | hold on; 265 | plot(1:length(sol.J), sol.J, "r", "LineWidth", 2); 266 | grid on; 267 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20); 268 | ylabel("Cost Function [-]", "Interpreter", "latex", "FontSize", 20); 269 | ax = gca(); 270 | ax.FontSize = 16; 271 | ax.TickLabelInterpreter = "latex"; 272 | ax.YScale = 'log'; 273 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/J.png", "-dpng", "-r500") 274 | 275 | % Plot control energy vs iteration 276 | figure; 277 | pbaspect([5 3 1]) 278 | hold on; 279 | plot(1:length(sol.E), sol.E, "r", "LineWidth", 2); 280 | grid on; 281 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20); 282 | ylabel("Control Energy Usage [$\rm{N}^{2}\rm{s}$]", "Interpreter", "latex", "FontSize", 20); 283 | ax = gca(); 284 | ax.FontSize = 16; 285 | ax.TickLabelInterpreter = "latex"; 286 | ax.YScale = 'log'; 287 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/E.png", "-dpng", "-r500") 288 | 289 | return; 290 | -------------------------------------------------------------------------------- /src/main_inverted_pendulum.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved 2 | 3 | %% Setup 4 | 5 | clc; 6 | clear; 7 | close all; 8 | rng(0); 9 | 10 | %% Cart-pendulum problem definition 11 | 12 | % Initial state 13 | x_0 = [0.0; 0.0]; 14 | 15 | % Target state 16 | x_star = [pi; 0.0]; 17 | 18 | % Time horizon 19 | t_f = 5.0; 20 | 21 | % Number of states along trajectory 22 | N = floor(t_f ./ 0.01); 23 | 24 | % Maximum magnitude of control 25 | u_max = [10.0]; 26 | 27 | % Initialize dynamics 28 | fprintf("initializing inverted pendulum dynamics...\n") 29 | m = 1.0; 30 | l = 1.0; 31 | b = 1.0; 32 | dyn = InvertedPendulumDynamics(m, l, b); 33 | 34 | % Initialize cost 35 | fprintf("initializing quadratic cost function...\n") 36 | Q_f = [100.0 0.0; 37 | 0.0 100.0]; 38 | R = 0.0001; 39 | cost = QuadraticCost(Q_f, R); 40 | 41 | % Number of DDP iterations 42 | num_iter = 50; 43 | 44 | % DDP line search parameter 45 | alpha = 0.5; 46 | 47 | % Video framerate 48 | fps = 30; 49 | 50 | %% Execution of DDP 51 | 52 | fprintf("executing DDP..."); 53 | 54 | tic; 55 | sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha); 56 | toc; 57 | 58 | %% Begin post-processing of solution 59 | 60 | if sol.error == 1 61 | fprintf("Error\n"); 62 | end 63 | 64 | % Extract the pendulum angle information from the solution 65 | theta = zeros(1, length(sol.x)); 66 | theta_dot = zeros(1, length(sol.x)); 67 | u = zeros(1, length(sol.x)); 68 | for k = 1:N 69 | theta(k) = sol.x{k}(1); 70 | theta_dot(k) = sol.x{k}(2); 71 | u(k) = sol.u{k}; 72 | end 73 | 74 | %% Create video of system 75 | 76 | % Turn off plot visibility temporarily during frame rendering 77 | set(0,'DefaultFigureVisible','off'); 78 | 79 | % Create figure for frames 80 | fig = figure; 81 | ax = gca; 82 | ax.NextPlot = 'ReplaceChildren'; 83 | 84 | % Compute number of frames 85 | num_frame = length(1:ceil((1.0 ./ fps) ./ sol.dt):N); 86 | 87 | % Structure to store the frames 88 | frames(num_frame) = struct('cdata',[],'colormap',[]); 89 | 90 | % Plot limits 91 | xl = []; 92 | yl = []; 93 | 94 | % Counter for frames processed 95 | n = 1; 96 | 97 | % Open video for writing 98 | %vid = VideoWriter('inverted-pendulum', 'Uncompressed AVI'); 99 | %vid.FrameRate = fps; 100 | %open(vid); 101 | 102 | % Loop over points in time to process frames at 103 | for k = 1:ceil((1.0 ./ fps) ./ sol.dt):N 104 | % Plot the pendlum root 105 | plot(0.0, 0.0, 'ro'); 106 | 107 | % Apply plot hold 108 | hold on; 109 | 110 | % Coordinated of the tip of the pole 111 | tipz = l .* sin(theta(k)); 112 | tipy = -l .* cos(theta(k)); 113 | 114 | % Plot the pendulum 115 | plot([0.0, tipz], [0.0, tipy], '-b'); 116 | 117 | % Remove plot hold 118 | hold off; 119 | 120 | % Obtain or apply plot limits for consistency across frames 121 | if k == 1 122 | ylim([-2 .* l, 2 .* l]); 123 | axis equal; 124 | xl = xlim(); 125 | yl = ylim(); 126 | else 127 | xlim(xl); 128 | ylim(yl); 129 | end 130 | 131 | % Apply blank background 132 | axis off; 133 | 134 | % Render the frame 135 | drawnow; 136 | 137 | % Record the frame and store in the structure 138 | frames(n) = getframe(fig); 139 | 140 | % Write frame to video 141 | %writeVideo(vid, frames(n)); 142 | 143 | % Display progress to command line 144 | fprintf("rendering video frame %d out of %d...\n", n, num_frame); 145 | 146 | % Increment frame counter 147 | n = n + 1; 148 | end 149 | 150 | % Renable plot visibility to view movie and other plots 151 | set(0,'DefaultFigureVisible','on') 152 | 153 | % Create fresh figure object for the movie 154 | figure; 155 | 156 | % Render the movie and set to loop for a very large number of times 157 | fprintf("done rendering video frames, presenting video...\n") 158 | movie(gcf, frames, 9999, fps); 159 | 160 | %% Plot trajectories 161 | 162 | % Plot angle history 163 | figure; 164 | pbaspect([5 3 1]) 165 | hold on; 166 | plot(sol.t, rad2deg(theta), "k", "LineWidth", 2); 167 | grid on; 168 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 169 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20); 170 | ax = gca(); 171 | ax.FontSize = 16; 172 | ax.TickLabelInterpreter = "latex"; 173 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta.png", "-dpng", "-r500") 174 | 175 | % Plot anglular velocity history 176 | figure; 177 | pbaspect([5 3 1]) 178 | hold on; 179 | plot(sol.t, rad2deg(theta_dot), "k", "LineWidth", 2); 180 | grid on; 181 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 182 | ylabel("Pendulum Anglular Velocity [rad/s]", "Interpreter", "latex", "FontSize", 20); 183 | ax = gca(); 184 | ax.FontSize = 16; 185 | ax.TickLabelInterpreter = "latex"; 186 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta_dot.png", "-dpng", "-r500") 187 | 188 | % Plot trajectory in state space 189 | figure; 190 | pbaspect([5 3 1]) 191 | hold on; 192 | plot(rad2deg(theta), rad2deg(theta_dot), "k", "LineWidth", 2); 193 | plot(rad2deg(x_0(1)), rad2deg(x_0(2)), "o", "MarkerFaceColor", "blue", ... 194 | "MarkerEdgeColor", "blue"); 195 | plot(rad2deg(x_star(1)), rad2deg(x_star(2)), "o", "MarkerFaceColor", ... 196 | "green", "MarkerEdgeColor", "green"); 197 | grid on; 198 | xlabel("Pendulum Angle [deg]", "Interpreter", "latex", "FontSize", 20); 199 | ylabel("Pendulum Angular Speed [rad/s]", "Interpreter", "latex", ... 200 | "FontSize", 20); 201 | ax = gca(); 202 | ax.FontSize = 16; 203 | ax.TickLabelInterpreter = "latex"; 204 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/traj.png", "-dpng", "-r500") 205 | 206 | % Plot control sequence 207 | figure; 208 | pbaspect([5 3 1]) 209 | hold on; 210 | plot(sol.t, u, "b", "LineWidth", 2); 211 | grid on; 212 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 213 | ylabel("Control Input [N]", "Interpreter", "latex", "FontSize", 20); 214 | ax = gca(); 215 | ax.FontSize = 16; 216 | ax.TickLabelInterpreter = "latex"; 217 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/u.png", "-dpng", "-r500") 218 | 219 | % Plot cost function vs iteration 220 | figure; 221 | pbaspect([5 3 1]) 222 | hold on; 223 | plot(1:length(sol.J), sol.J, "r", "LineWidth", 2); 224 | grid on; 225 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20); 226 | ylabel("Cost Function [-]", "Interpreter", "latex", "FontSize", 20); 227 | ax = gca(); 228 | ax.FontSize = 16; 229 | ax.TickLabelInterpreter = "latex"; 230 | ax.YScale = 'log'; 231 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/J.png", "-dpng", "-r500") 232 | 233 | % Plot control energy vs iteration 234 | figure; 235 | pbaspect([5 3 1]) 236 | hold on; 237 | plot(1:length(sol.E), sol.E, "r", "LineWidth", 2); 238 | grid on; 239 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20); 240 | ylabel("Control Energy Usage [$\rm{N}^{2}\rm{m}^{2}\rm{s}$]", "Interpreter", "latex", "FontSize", 20); 241 | ax = gca(); 242 | ax.FontSize = 16; 243 | ax.TickLabelInterpreter = "latex"; 244 | ax.YScale = 'log'; 245 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/E.png", "-dpng", "-r500") 246 | 247 | %% Evaluate control robustness in presence of noise in dynamics 248 | 249 | % Number of trials to compute 250 | num_trials = 100; 251 | 252 | % Allocate cell array to contain trajectories 253 | x_list = cell(num_trials, 1); 254 | 255 | % Noise standard deviation 256 | sigma = 1e0; 257 | 258 | % Simulate all the trajectories with noise 259 | for n = 1:num_trials 260 | % Allocate array for trajectory 261 | x = cell(length(sol.x), 1); 262 | 263 | % Initialize trajectory initial state 264 | x{1} = x_0; 265 | 266 | % Compute trajectory with computed control sequence with noise 267 | for k = 1:length(sol.x)-1 268 | noise = normrnd(0.0, sigma, length(u_max)); 269 | x{k+1} = x{k} + dyn.F(x{k}, sol.u{k} + noise) .* sol.dt; 270 | end 271 | 272 | % Add trajectory to list of computed trajectories 273 | x_list{n} = x; 274 | end 275 | 276 | % Plot the angle trajectories 277 | figure; 278 | pbaspect([5 3 1]) 279 | grid on; 280 | hold on; 281 | for n = 1:num_trials 282 | % Extract the pendulum angle information from the solutions 283 | theta = zeros(1, length(sol.x)); 284 | theta_dot = zeros(1, length(sol.x)); 285 | for k = 1:length(sol.x) 286 | theta(k) = x_list{n}{k}(1); 287 | theta_dot(k) = x_list{n}{k}(2); 288 | end 289 | 290 | % Plot the trajectory 291 | plot(sol.t, theta) 292 | end 293 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 294 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20); 295 | ax = gca(); 296 | ax.FontSize = 16; 297 | ax.TickLabelInterpreter = "latex"; 298 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta-noise-1e0.png", "-dpng", "-r500") 299 | 300 | %% Utilize Feedback to adjust control in presence of disturbances 301 | 302 | % Number of trials to compute 303 | num_trials = 100; 304 | 305 | % Allocate cell array to contain trajectories 306 | x_list = cell(num_trials, 1); 307 | 308 | % Noise standard deviation 309 | sigma = 1e0; 310 | 311 | % Simulate all the trajectories with noise 312 | for n = 1:num_trials 313 | % Allocate array for trajectory 314 | x = cell(length(sol.x), 1); 315 | 316 | % Initialize trajectory initial state 317 | x{1} = x_0; 318 | 319 | % Compute trajectory with updating control sequence with noise 320 | for k = 1:length(sol.x)-1 321 | noise = normrnd(0.0, sigma, length(u_max)); 322 | 323 | % Compute control correction for disturbance using feedback term 324 | du = -inv(sol.Q_uu{k}) * sol.Q_ux{k} * (x{k} - sol.x{k}); 325 | 326 | x{k+1} = x{k} + dyn.F(x{k}, sol.u{k} + du + noise) .* sol.dt; 327 | end 328 | 329 | % Add trajectory to list of computed trajectories 330 | x_list{n} = x; 331 | end 332 | 333 | % Plot the angle trajectories 334 | figure; 335 | pbaspect([5 3 1]) 336 | grid on; 337 | hold on; 338 | for n = 1:num_trials 339 | % Extract the pendulum angle information from the solutions 340 | theta = zeros(1, length(sol.x)); 341 | theta_dot = zeros(1, length(sol.x)); 342 | for k = 1:length(sol.x) 343 | theta(k) = x_list{n}{k}(1); 344 | theta_dot(k) = x_list{n}{k}(2); 345 | end 346 | 347 | % Plot the trajectory 348 | plot(sol.t, theta) 349 | end 350 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20); 351 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20); 352 | ax = gca(); 353 | ax.FontSize = 16; 354 | ax.TickLabelInterpreter = "latex"; 355 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta-robust-1e0.png", "-dpng", "-r500") 356 | 357 | return; 358 | --------------------------------------------------------------------------------