├── .gitignore
├── README.md
├── ae4803-hw-1-report-v1.1.pdf
├── cart-pole.gif
└── src
├── CartPoleDynamics.m
├── Cost.m
├── Dynamics.m
├── InvertedPendulumDynamics.m
├── QuadraticCost.m
├── ddp.m
├── main_cart_pole.m
└── main_inverted_pendulum.m
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.m~
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Differential Dynamic Programming Simulation Using MATLAB
2 |
3 | This code is a MATLAB implementation of the **Differential Dynamic Programming** (DDP) algorithm, implemented in **MATLAB**, and utilized to control a simulated inverted pendulum and a simulated cart-pole system. This project is for the AE4803-ROB class, *Robotic Systems and Autonomy*, taught by Prof. Evangelos Theodorou at the Georgia Institute of Technology during the spring 2020 semester.
4 |
5 | The DDP algorithm is a powerful method for optimal control, that can be used to control many different types of systems from a variety of application domains. It is for this reason that the approach to the implementation in this code was to create a DDP routine that can be used for any system without the need to change any of the actual algorithm. The two things that chage between problems that could utilize DDP are:
6 |
7 | 1. Dynamics of the system
8 | 2. Cost functions used in the optimization
9 |
10 | The approach to ensuring that this code can effectively used for many different problems efficiently is abstracting these two elements and create abstract classes for both the dynamics and the cost functions. These classes are defined in [`Dynamics.m`](src/Dynamics.m) and [`Cost.m`](src/Cost.m) as `Dynamics` and `Cost`, respectively. This way, anyone who wants to apply the DDP algorithm to their problem of interest need only write:
11 |
12 | 1. A class that inherits `Dynamics` and implements its abstract methods (e.g. `CartPoleDynamics` in [`CartPoleDynamics.m`](src/CartPoleDynamics.m))
13 | 2. A class that inherits `Cost` and implements its abstract methods (e.g. `QuadraticCost` in [`QuadraticCost.m`](src/QuadraticCost.m))
14 |
15 | The code that performs the actual DDP procedure to generate an optimal control sequence is implemented as a routine in [`ddp.m`](src/ddp.m), and does not need to be changed at all to use DDP for a specific problem.
16 |
17 |

18 |
--------------------------------------------------------------------------------
/ae4803-hw-1-report-v1.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitreyakv/ddp-simulation/b73c4c13bf560b5a1cc720977fb5d08e53776f5a/ae4803-hw-1-report-v1.1.pdf
--------------------------------------------------------------------------------
/cart-pole.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitreyakv/ddp-simulation/b73c4c13bf560b5a1cc720977fb5d08e53776f5a/cart-pole.gif
--------------------------------------------------------------------------------
/src/CartPoleDynamics.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | % Class for dynamics of cart-pole system that inherits from abstract
4 | % Dynamics class.
5 | classdef CartPoleDynamics < Dynamics
6 | properties
7 | m_c; % mass of pole
8 | m_p; % mass of cart
9 | l; % length of pole
10 | end
11 |
12 | methods
13 | % Constructor
14 | function obj = CartPoleDynamics(m_c, m_p, l)
15 | obj.m_c = m_c;
16 | obj.m_p = m_p;
17 | obj.l = l;
18 | end
19 |
20 | % Equations of motion in state space representation
21 | function dxdt = F(obj, x, u)
22 | s = sin(x(3));
23 | c = cos(x(3));
24 |
25 | lambda = obj.m_c + obj.m_p .* s.^2;
26 | psi = u + obj.m_p .* s .* (obj.l .* x(4).^2 + ...
27 | 9.81 .* c);
28 |
29 | eta = -u .* c - obj.m_p .* obj.l .* x(4).^2 ...
30 | .* c .* s - (obj.m_c + obj.m_p) ...
31 | .* 9.81 .* s;
32 |
33 | z_ddot = psi ./ lambda;
34 | theta_ddot = eta ./ (obj.l .* lambda);
35 |
36 | dxdt = [x(2); z_ddot; x(4); theta_ddot];
37 | end
38 |
39 | % Linearized equations of motion (determined using CAS)
40 | function Phi = Phi(obj, x, u, dt)
41 | F_x = zeros(numel(x));
42 |
43 | s = sin(x(3));
44 | c = cos(x(3));
45 |
46 | F_x(1,1) = 0.0;
47 | F_x(1,2) = 1.0;
48 | F_x(1,3) = 0.0;
49 | F_x(1,4) = 0.0;
50 |
51 | F_x(2,1) = 0.0;
52 | F_x(2,2) = 0.0;
53 | F_x(2,3) = (obj.m_p .* c .* (obj.l .* x(4).^2 + ...
54 | 9.81 .* c) - 9.81 .* obj.m_p .* ...
55 | s.^2) ./ (obj.m_c + obj.m_p .* ...
56 | s.^2) - (2.0 .* obj.m_p .* c ...
57 | .* s .* (u + obj.m_p .* s ...
58 | .* (obj.l .* x(4).^2 + 9.81 .* c))) ...
59 | ./ (obj.m_c + obj.m_p .* s.^2)^2;
60 | F_x(2,4) = (2.0 .* obj.l .* obj.m_p .* x(4) .* s) ...
61 | ./ (obj.m_c + obj.m_p .* s.^2);
62 |
63 | F_x(3,1) = 0.0;
64 | F_x(3,2) = 0.0;
65 | F_x(3,3) = 0.0;
66 | F_x(3,4) = 1.0;
67 |
68 | F_x(4,1) = 0.0;
69 | F_x(4,2) = 0.0;
70 | F_x(4,3) = (u .* s - 9.81 .* c .* ...
71 | (obj.m_c + obj.m_p) - obj.l .* obj.m_p ...
72 | .* x(4).^2 .* c.^2 + obj.l .* obj.m_p ...
73 | .* x(4)^2 .* s.^2) ./ (obj.l ...
74 | .* (obj.m_c + obj.m_p .* s.^2)) ...
75 | + (2.0 .* obj.m_p .* c .* s ...
76 | .* (obj.l .* obj.m_p .* c .* s ...
77 | .* x(4).^2 + u .* c + 9.81 ...
78 | .* s .* (obj.m_c + obj.m_p))) ./ (obj.l ...
79 | .* (obj.m_c + obj.m_p .* s.^2)^2);
80 | F_x(4,4) = -(2.0 .* obj.m_p .* x(4) .* c ...
81 | .* s) ./ (obj.m_c + obj.m_p .* s^2);
82 |
83 | Phi = eye(numel(x)) + F_x .* dt;
84 | end
85 | function beta = beta(obj, x, ~, dt)
86 | F_u = zeros(numel(x), 1);
87 |
88 | F_u(1) = 0.0;
89 | F_u(2) = 1.0 ./ (obj.m_c + obj.m_p .* sin(x(3)).^2);
90 | F_u(3) = 0.0;
91 | F_u(4) = -cos(x(3)) ./ (obj.l .* (obj.m_c + obj.m_p ...
92 | .* sin(x(3)).^2));
93 |
94 | beta = F_u .* dt;
95 | end
96 | end
97 | end
98 |
99 |
--------------------------------------------------------------------------------
/src/Cost.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | % Abstract class for terminal and running costs
4 | classdef (Abstract) Cost
5 | methods
6 | % Terminal state cost
7 | phi = phi(obj, x_f, x_star);
8 |
9 | % Terminal state cost derivatives
10 | phi_x = phi_x(obj, x_f, x_star);
11 | phi_xx = phi_xx(obj, x_f, x_star);
12 |
13 | % Running cost
14 | L = L(obj, x, u, dt);
15 |
16 | % Running cost derivatives
17 | L_x = L_x(obj, x, u, dt);
18 | L_u = L_u(obj, x, u, dt);
19 | L_xx = L_xx(obj, x, u, dt);
20 | L_uu = L_uu(obj, x, u, dt);
21 | L_xu = L_xu(obj, x, u, dt);
22 | L_ux = L_ux(obj, x, u, dt);
23 | end
24 | end
25 |
26 |
--------------------------------------------------------------------------------
/src/Dynamics.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | % Abstract class for system dynamics
4 | classdef (Abstract) Dynamics
5 | methods
6 | % Equations of motion in state space representation
7 | dxdt = F(obj, x, u)
8 |
9 | % Linearized equations of motion
10 | Phi = Phi(obj, x, u, dt);
11 | beta = beta(obj, x, u, dt);
12 | end
13 | end
14 |
15 |
--------------------------------------------------------------------------------
/src/InvertedPendulumDynamics.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | % Class for dynamics of inverted pendulum system that inherits from
4 | % abstract Dynamics class.
5 | classdef InvertedPendulumDynamics < Dynamics
6 | properties
7 | m; % mass pendulum
8 | l; % length of pendulum
9 | b; % damping coefficient
10 | I; % moment of intertia of pendulum
11 | end
12 |
13 | methods
14 | % Constructor
15 | function obj = InvertedPendulumDynamics(m, l, b)
16 | obj.m = m;
17 | obj.l = l;
18 | obj.b = b;
19 | obj.I = m .* l.^2;
20 | end
21 |
22 | % Equations of motion in state space representation
23 | function dxdt = F(obj, x, u)
24 | theta_ddot = (u - obj.b .* x(2) - obj.m .* 9.81 ...
25 | .* obj.l .* sin(x(1))) ./ obj.I;
26 |
27 | dxdt = [x(2); theta_ddot];
28 | end
29 |
30 | % Linearized equations of motion (determined using symbolic math)
31 | function Phi = Phi(obj, x, ~, dt)
32 | F_x = zeros(numel(x));
33 |
34 | F_x(1,1) = 0.0;
35 | F_x(1,2) = 1.0;
36 |
37 | F_x(2,1) = -(9.81 .* obj.l .* obj.m .* cos(x(1))) ./ obj.I;
38 | F_x(2,2) = -obj.b ./ obj.I;
39 |
40 | Phi = eye(numel(x)) + F_x .* dt;
41 | end
42 | function beta = beta(obj, x, ~, dt)
43 | F_u = zeros(numel(x), 1);
44 |
45 | F_u(1) = 0.0;
46 | F_u(2) = 1 ./ obj.I;
47 |
48 | beta = F_u .* dt;
49 | end
50 | end
51 | end
52 |
53 |
--------------------------------------------------------------------------------
/src/QuadraticCost.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | % Class for quadratic terminal and running costs that inherits from
4 | % abstract Cost class
5 | classdef QuadraticCost < Cost
6 | properties
7 | Q_f; % terminal cost weights
8 | R; % control cost weights
9 | end
10 |
11 | methods
12 | % Constructor
13 | function obj = QuadraticCost(Q_f, R)
14 | obj.Q_f = Q_f;
15 | obj.R = R;
16 | end
17 |
18 | % Terminal state cost
19 | function phi = phi(obj, x_f, x_star)
20 | phi = 0.5 .* (x_f - x_star).' * obj.Q_f * (x_f - x_star);
21 | end
22 |
23 | % Terminal state cost derivatives
24 | function phi_x = phi_x(obj, x_f, x_star)
25 | phi_x = obj.Q_f * (x_f - x_star);
26 | end
27 | function phi_xx = phi_xx(obj, ~, ~)
28 | phi_xx = obj.Q_f;
29 | end
30 |
31 | % Running cost
32 | function L = L(obj, ~, u, dt)
33 | L = (0.5 .* u.' * obj.R * u) .* dt;
34 | end
35 |
36 | % Running cost derivatives
37 | function L_x = L_x(~, x, ~, ~)
38 | L_x = zeros(numel(x), 1);
39 | end
40 | function L_u = L_u(obj, ~, u, dt)
41 | L_u = (obj.R * u) .* dt;
42 | end
43 | function L_xx = L_xx(~, x, ~, ~)
44 | L_xx = zeros(numel(x));
45 | end
46 | function L_uu = L_uu(obj, ~, ~, dt)
47 | L_uu = obj.R .* dt;
48 | end
49 | function L_xu = L_xu(~, x, u, ~)
50 | L_xu = zeros(numel(x), numel(u));
51 | end
52 | function L_ux = L_ux(~, x, u, ~)
53 | L_ux = zeros(numel(u), numel(x));
54 | end
55 | end
56 | end
57 |
58 |
--------------------------------------------------------------------------------
/src/ddp.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | %% Differential Dynamic Programming Algorithm Function
4 |
5 | % Performs Differential Dynamic Programming Algorithm to produce a
6 | % locally optimal control sequence.
7 | %
8 | % Inputs
9 | %
10 | % x_0 : initial state vector
11 | % x_star : target state vector
12 | % t_f : time horizon
13 | % N : number of discretizations of time
14 | % dyn : instance of Dynamics class
15 | % cost : instance of Cost class
16 | % u_max : maximum magnitude of control, used for clamping
17 | % num_iter : number of DDP iterations
18 | % alpha : DDP learning parameter for line searching
19 | %
20 | % Outputs
21 | %
22 | % sol : structure with solution components
23 | %
24 | function sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha)
25 | %% Allocate arrays for DDP
26 |
27 | % Time stamp array and timestep
28 | t = linspace(0.0, t_f, N);
29 | dt = t(2) - t(1);
30 |
31 | % Cost history
32 | J = zeros(num_iter, 1);
33 |
34 | % Control energy history
35 | E = zeros(num_iter, 1);
36 |
37 | % State trajectory
38 | x = cell(N, 1);
39 | x_new = cell(N, 1);
40 | x{1} = x_0;
41 | x_new{1} = x_0;
42 |
43 | % Control Input trajectory
44 | u = cell(N, 1);
45 |
46 | % Value function and derivatives
47 | V = zeros(N, 1);
48 | V_x = cell(N, 1);
49 | V_xx = cell(N, 1);
50 |
51 | % State action value derivatives
52 | Q_x = cell(N, 1);
53 | Q_u = cell(N, 1);
54 | Q_xx = cell(N, 1);
55 | Q_uu = cell(N, 1);
56 | Q_xu = cell(N, 1);
57 | Q_ux = cell(N, 1);
58 |
59 | %% Initialize DDP with a random input sequence
60 |
61 | fprintf("initializing random control sequence...\n")
62 |
63 | % Generate random control sequence
64 | for k = 1:N-1
65 | u{k} = 2 .* u_max .* rand(length(u_max), 1) - u_max;
66 | end
67 | u{N} = zeros(numel(u_max), 1);
68 |
69 | fprintf("generating initial trajectory...\n")
70 |
71 | % Generate initial trajectory using random control sequence
72 | for k = 1:N-1
73 | x_new{k+1} = x_new{k} + dyn.F(x_new{k}, u{k}) .* dt;
74 | end
75 |
76 | %% Perform main DDP iterations on the trajectory and input sequence
77 |
78 | fprintf("beginning DDP...\n")
79 |
80 | for i = 1:num_iter
81 | fprintf("DDP iteration %d out of %d...\n", i, num_iter);
82 |
83 | % Update control sequence from previous iteration
84 | if i > 1
85 | for k = 1:N-1
86 | % Compute control update feed-forward and feed-back
87 | du_ff = -inv(Q_uu{k}) * Q_u{k};
88 | du_fb = -inv(Q_uu{k}) * Q_ux{k} * (x_new{k} - x{k});
89 |
90 | % Limit feed forward control modification with clamping
91 | for m = 1:numel(u_max)
92 | du_ff(m) = min(u_max(m), max(-u_max(m), ...
93 | du_ff(m) + u{k}(m))) - u{k}(m);
94 | end
95 |
96 | % Update control
97 | u{k} = u{k} + alpha .* (du_ff + du_fb);
98 |
99 | % Compute next state in trajectory with new control
100 | x_new{k+1} = x_new{k} + dyn.F(x_new{k}, u{k}) .* dt;
101 |
102 | % Return error if problem with trajectory
103 | if isnan(x_new{k+1})
104 | sol = assemble_solution(x, u, t, J, E, Q_u, ...
105 | Q_uu, Q_ux, 1);
106 | return
107 | end
108 | end
109 | end
110 |
111 | % Update the current trajectory
112 | x = x_new;
113 |
114 | % Compute total cost
115 | J(i) = cost.phi(x{N}, x_star);
116 | for k = 1:N-1
117 | J(i) = J(i) + cost.L(x{k}, u{k}, dt);
118 | end
119 |
120 | % Compute control energy usage
121 | for k = 1:N-1
122 | E(i) = E(i) + 0.5 .* u{k}.' * u{k} .* dt;
123 | end
124 |
125 | % Compute terminal value function and derivatives
126 | V(N) = cost.phi(x{N}, x_star);
127 | V_x{N} = cost.phi_x(x{N}, x_star);
128 | V_xx{N} = cost.phi_xx(x{N}, x_star);
129 |
130 | % Perform backwards pass
131 | for k = N-1:-1:1
132 | % Compute state-action value function derivatives
133 | Q_x{k} = cost.L_x(x{k}, u{k}, dt) + ...
134 | dyn.Phi(x{k}, u{k}, dt).' * V_x{k+1};
135 | Q_u{k} = cost.L_u(x{k}, u{k}, dt) + ...
136 | dyn.beta(x{k}, u{k}, dt).' * V_x{k+1};
137 | Q_xx{k} = cost.L_xx(x{k}, u{k}, dt) ...
138 | + dyn.Phi(x{k}, u{k}, dt).' * V_xx{k+1} ...
139 | * dyn.Phi(x{k}, u{k}, dt);
140 | Q_uu{k} = cost.L_uu(x{k}, u{k}, dt) ...
141 | + dyn.beta(x{k}, u{k}, dt).' * V_xx{k+1} ...
142 | * dyn.beta(x{k}, u{k}, dt);
143 | Q_xu{k} = cost.L_xu(x{k}, u{k}, dt) ...
144 | + dyn.Phi(x{k}, u{k}, dt).' * V_xx{k+1} ...
145 | * dyn.beta(x{k}, u{k}, dt);
146 | Q_ux{k} = cost.L_ux(x{k}, u{k}, dt) ...
147 | + dyn.beta(x{k}, u{k}, dt).' * V_xx{k+1} ...
148 | * dyn.Phi(x{k}, u{k}, dt);
149 |
150 | % Compute the value function derivatives
151 | V_x{k} = Q_x{k} - Q_xu{k} * (Q_uu{k} \ Q_u{k});
152 | V_xx{k} = Q_xx{k} - Q_xu{k} * (Q_uu{k} \ Q_ux{k});
153 | end
154 | end
155 |
156 | %% Assemble and return solution structure
157 |
158 | fprintf("finished DDP, assembling results for post-processing...\n");
159 |
160 | % Assemble solution
161 | sol = assemble_solution(x, u, t, J, E, Q_u, Q_uu, Q_ux, 0);
162 | end
163 |
164 | %% Helper Functions for DDP Algorithm
165 |
166 | % Assembles and returns solution structure
167 | %
168 | % Inputs
169 | %
170 | % x : locally optimal state trajectory
171 | % u : locally optimal control sequence
172 | % t : discretized time stamps
173 | % J : iteration history of cost function
174 | % E : iteration history of control energy
175 | % Q_u, Q_uu, Q_ux : derivatives of state action value function
176 | % error : zero if no error, nonzero else
177 | %
178 | % Outputs
179 | %
180 | % sol : solution structure
181 | function sol = assemble_solution(x, u, t, J, E, Q_u, Q_uu, Q_ux, error)
182 |
183 | % Solution structure
184 | sol = struct;
185 | sol.error = error;
186 | sol.x = x;
187 | sol.u = u;
188 | sol.t = t;
189 | sol.dt = t(2) - t(1);
190 | sol.J = J;
191 | sol.E = E;
192 | sol.Q_u = Q_u;
193 | sol.Q_uu = Q_uu;
194 | sol.Q_ux = Q_ux;
195 |
196 | return
197 | end
--------------------------------------------------------------------------------
/src/main_cart_pole.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | %% Setup
4 |
5 | clc;
6 | clear;
7 | close all;
8 | rng(0);
9 |
10 | %% Cart-pendulum problem definition
11 |
12 | % Initial state
13 | x_0 = [0.0; 0.0; 0.0; 0.0];
14 |
15 | % Target state
16 | x_star = [0.0; 0.0; pi; 0.0];
17 |
18 | % Time horizon
19 | t_f = 2.0;
20 |
21 | % Number of states along trajectory
22 | N = floor(t_f ./ 0.01);
23 |
24 | % Maximum magnitude of control
25 | u_max = [10.0];
26 |
27 | % Initialize dynamics
28 | fprintf("initializing cart-pole dynamics...\n")
29 | m_c = 1.0;
30 | m_p = 0.01;
31 | l = 0.25;
32 | dyn = CartPoleDynamics(m_c, m_p, l);
33 |
34 | % Initialize cost
35 | fprintf("initializing quadratic cost function...\n")
36 | Q_f = [10.0 0 0 0;
37 | 0 10.0 0 0;
38 | 0 0 10.0 0;
39 | 0 0 0 10.0];
40 | R = 0.01;
41 | cost = QuadraticCost(Q_f, R);
42 |
43 | % Number of DDP iterations
44 | num_iter = 100;
45 |
46 | % DDP learning rate
47 | alpha = 0.1;
48 |
49 | % Video framerate
50 | fps = 30;
51 |
52 | %% Execution of DDP
53 |
54 | fprintf("executing DDP...");
55 |
56 | tic;
57 | sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha);
58 | toc;
59 |
60 | %% Begin post-processing of solution
61 |
62 | if sol.error == 1
63 | fprintf("DIVERGENCE ERROR: try decreasing learning rate\n");
64 | return;
65 | end
66 |
67 | % Extract the cart position and pole angle information from the solution
68 | z = zeros(1, length(sol.x));
69 | theta = zeros(1, length(sol.x));
70 | z_dot = zeros(1, length(sol.x));
71 | theta_dot = zeros(1, length(sol.x));
72 | u = zeros(1, length(sol.x));
73 | for k = 1:N
74 | z(k) = sol.x{k}(1);
75 | theta(k) = sol.x{k}(3);
76 | z_dot(k) = sol.x{k}(2);
77 | theta_dot(k) = sol.x{k}(4);
78 | u(k) = sol.u{k};
79 | end
80 |
81 | %% Create video of system
82 |
83 | % Turn off plot visibility temporarily during frame rendering
84 | set(0,'DefaultFigureVisible','off');
85 |
86 | % Create figure for frames
87 | fig = figure;
88 | ax = gca;
89 | ax.NextPlot = 'ReplaceChildren';
90 |
91 | % Compute number of frames
92 | num_frame = length(1:ceil((1.0 ./ fps) ./ sol.dt):N);
93 |
94 | % Structure to store the frames
95 | frames(num_frame) = struct('cdata',[],'colormap',[]);
96 |
97 | % Plot limits
98 | xl = [];
99 | yl = [];
100 |
101 | % Counter for frames processed
102 | n = 1;
103 |
104 | % Open video for writing
105 | %vid = VideoWriter('cart-pole', 'Uncompressed AVI');
106 | %vid.FrameRate = fps;
107 | %open(vid);
108 |
109 | % Loop over points in time to process frames at
110 | for k = 1:ceil((1.0 ./ fps) ./ sol.dt):N
111 | % Plot the joint between cart and pole
112 | plot(z(k), 0.0, 'ro');
113 |
114 | % Apply plot hold
115 | hold on;
116 |
117 | % Plot the cart
118 | rectangle('Position', [z(k) - 0.25 .* l, -0.1 .* l 0.5 .* l, 0.2 .* l]);
119 |
120 | % Coordinated of the tip of the pole
121 | tipz = z(k) + l .* sin(theta(k));
122 | tipy = -l .* cos(theta(k));
123 |
124 | % Plot the pole
125 | plot([z(k), tipz], [0.0, tipy], '-b');
126 |
127 | % Plot the cart track
128 | plot(-100:100, -0.1 .* l .* ones(1, length(-100:100)), "-k");
129 |
130 | % Remove plot hold
131 | hold off;
132 |
133 | % Obtain or apply plot limits for consistency across frames
134 | if k == 1
135 | xlim([-2 .* l, 2 .* l]);
136 | axis equal;
137 | xl = xlim();
138 | yl = ylim() + 0.5 .* l;
139 | ylim(yl);
140 | else
141 | xlim(xl);
142 | ylim(yl);
143 | end
144 |
145 | % Apply blank background
146 | axis off;
147 |
148 | % Render the frame
149 | drawnow;
150 |
151 | % Record the frame and store in the structure
152 | frames(n) = getframe(fig);
153 |
154 | % Write frame to video
155 | %writeVideo(vid, frames(n));
156 |
157 | % Display progress to command line
158 | fprintf("rendering video frame %d out of %d...\n", n, num_frame);
159 |
160 | % Increment frame counter
161 | n = n + 1;
162 | end
163 |
164 | % Close the video
165 | %close(vid);
166 |
167 | % Renable plot visibility to view movie and other plots
168 | set(0,'DefaultFigureVisible','on')
169 |
170 | % Create fresh figure object for the movie
171 | figure;
172 |
173 | % Render the movie and set to loop for a very large number of times
174 | fprintf("done rendering video frames, presenting video...\n")
175 | movie(gcf, frames, 9999, fps);
176 |
177 | %% Plot trajectories
178 |
179 | % Plot cart position history
180 | figure;
181 | pbaspect([5 3 1])
182 | hold on;
183 | plot(sol.t, z, "k", "Linewidth", 2);
184 | grid on;
185 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
186 | ylabel("Cart Position [m]", "Interpreter", "latex", "FontSize", 20);
187 | ax = gca();
188 | ax.FontSize = 16;
189 | ax.TickLabelInterpreter = "latex";
190 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/z.png", "-dpng", "-r500")
191 |
192 | % Plot cart velocity history
193 | figure;
194 | pbaspect([5 3 1])
195 | hold on;
196 | plot(sol.t, z_dot, "k", "Linewidth", 2);
197 | grid on;
198 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
199 | ylabel("Cart velocity [m/s]", "Interpreter", "latex", "FontSize", 20);
200 | ax = gca();
201 | ax.FontSize = 16;
202 | ax.TickLabelInterpreter = "latex";
203 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/z_dot.png", "-dpng", "-r500")
204 |
205 | % Plot pole angle history
206 | figure;
207 | pbaspect([5 3 1])
208 | hold on;
209 | plot(sol.t, theta, "k", "Linewidth", 2);
210 | grid on;
211 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
212 | ylabel("Pole Angle [rad]", "Interpreter", "latex", "FontSize", 20);
213 | ax = gca();
214 | ax.FontSize = 16;
215 | ax.TickLabelInterpreter = "latex";
216 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/theta.png", "-dpng", "-r500")
217 |
218 | % Plot pole anglular velocity history
219 | figure;
220 | pbaspect([5 3 1])
221 | hold on;
222 | plot(sol.t, theta_dot, "k", "Linewidth", 2);
223 | grid on;
224 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
225 | ylabel("Pole Anglular Velocity [rad/s]", "Interpreter", "latex", "FontSize", 20);
226 | ax = gca();
227 | ax.FontSize = 16;
228 | ax.TickLabelInterpreter = "latex";
229 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/theta_dot.png", "-dpng", "-r500")
230 |
231 | % Plot trajectory in partial state space
232 | figure;
233 | pbaspect([5 3 1])
234 | hold on;
235 | plot(z, theta, "k", "Linewidth", 2);
236 | plot(x_0(1), x_0(3), 'o', "MarkerFaceColor", "blue", ...
237 | "MarkerEdgeColor", "blue");
238 | plot(x_star(1), x_star(3), 'o', "MarkerFaceColor", "green", ...
239 | "MarkerEdgeColor", "green")
240 | grid on;
241 | xlabel("Cart Position [m]", "Interpreter", "latex", "FontSize", 20);
242 | ylabel("Pole Angular [rad]", "Interpreter", "latex", "FontSize", 20);
243 | ax = gca();
244 | ax.FontSize = 16;
245 | ax.TickLabelInterpreter = "latex";
246 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/traj.png", "-dpng", "-r500")
247 |
248 | % Plot control sequence
249 | figure;
250 | pbaspect([5 3 1])
251 | hold on;
252 | plot(sol.t, u, "b", "LineWidth", 2);
253 | grid on;
254 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
255 | ylabel("Control Input [N]", "Interpreter", "latex", "FontSize", 20);
256 | ax = gca();
257 | ax.FontSize = 16;
258 | ax.TickLabelInterpreter = "latex";
259 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/u.png", "-dpng", "-r500")
260 |
261 | % Plot cost function vs iteration
262 | figure;
263 | pbaspect([5 3 1])
264 | hold on;
265 | plot(1:length(sol.J), sol.J, "r", "LineWidth", 2);
266 | grid on;
267 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20);
268 | ylabel("Cost Function [-]", "Interpreter", "latex", "FontSize", 20);
269 | ax = gca();
270 | ax.FontSize = 16;
271 | ax.TickLabelInterpreter = "latex";
272 | ax.YScale = 'log';
273 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/J.png", "-dpng", "-r500")
274 |
275 | % Plot control energy vs iteration
276 | figure;
277 | pbaspect([5 3 1])
278 | hold on;
279 | plot(1:length(sol.E), sol.E, "r", "LineWidth", 2);
280 | grid on;
281 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20);
282 | ylabel("Control Energy Usage [$\rm{N}^{2}\rm{s}$]", "Interpreter", "latex", "FontSize", 20);
283 | ax = gca();
284 | ax.FontSize = 16;
285 | ax.TickLabelInterpreter = "latex";
286 | ax.YScale = 'log';
287 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/cart-pole/E.png", "-dpng", "-r500")
288 |
289 | return;
290 |
--------------------------------------------------------------------------------
/src/main_inverted_pendulum.m:
--------------------------------------------------------------------------------
1 | % Copyright (C) 2019 Maitreya Venkataswamy - All Rights Reserved
2 |
3 | %% Setup
4 |
5 | clc;
6 | clear;
7 | close all;
8 | rng(0);
9 |
10 | %% Cart-pendulum problem definition
11 |
12 | % Initial state
13 | x_0 = [0.0; 0.0];
14 |
15 | % Target state
16 | x_star = [pi; 0.0];
17 |
18 | % Time horizon
19 | t_f = 5.0;
20 |
21 | % Number of states along trajectory
22 | N = floor(t_f ./ 0.01);
23 |
24 | % Maximum magnitude of control
25 | u_max = [10.0];
26 |
27 | % Initialize dynamics
28 | fprintf("initializing inverted pendulum dynamics...\n")
29 | m = 1.0;
30 | l = 1.0;
31 | b = 1.0;
32 | dyn = InvertedPendulumDynamics(m, l, b);
33 |
34 | % Initialize cost
35 | fprintf("initializing quadratic cost function...\n")
36 | Q_f = [100.0 0.0;
37 | 0.0 100.0];
38 | R = 0.0001;
39 | cost = QuadraticCost(Q_f, R);
40 |
41 | % Number of DDP iterations
42 | num_iter = 50;
43 |
44 | % DDP line search parameter
45 | alpha = 0.5;
46 |
47 | % Video framerate
48 | fps = 30;
49 |
50 | %% Execution of DDP
51 |
52 | fprintf("executing DDP...");
53 |
54 | tic;
55 | sol = ddp(x_0, x_star, t_f, N, dyn, cost, u_max, num_iter, alpha);
56 | toc;
57 |
58 | %% Begin post-processing of solution
59 |
60 | if sol.error == 1
61 | fprintf("Error\n");
62 | end
63 |
64 | % Extract the pendulum angle information from the solution
65 | theta = zeros(1, length(sol.x));
66 | theta_dot = zeros(1, length(sol.x));
67 | u = zeros(1, length(sol.x));
68 | for k = 1:N
69 | theta(k) = sol.x{k}(1);
70 | theta_dot(k) = sol.x{k}(2);
71 | u(k) = sol.u{k};
72 | end
73 |
74 | %% Create video of system
75 |
76 | % Turn off plot visibility temporarily during frame rendering
77 | set(0,'DefaultFigureVisible','off');
78 |
79 | % Create figure for frames
80 | fig = figure;
81 | ax = gca;
82 | ax.NextPlot = 'ReplaceChildren';
83 |
84 | % Compute number of frames
85 | num_frame = length(1:ceil((1.0 ./ fps) ./ sol.dt):N);
86 |
87 | % Structure to store the frames
88 | frames(num_frame) = struct('cdata',[],'colormap',[]);
89 |
90 | % Plot limits
91 | xl = [];
92 | yl = [];
93 |
94 | % Counter for frames processed
95 | n = 1;
96 |
97 | % Open video for writing
98 | %vid = VideoWriter('inverted-pendulum', 'Uncompressed AVI');
99 | %vid.FrameRate = fps;
100 | %open(vid);
101 |
102 | % Loop over points in time to process frames at
103 | for k = 1:ceil((1.0 ./ fps) ./ sol.dt):N
104 | % Plot the pendlum root
105 | plot(0.0, 0.0, 'ro');
106 |
107 | % Apply plot hold
108 | hold on;
109 |
110 | % Coordinated of the tip of the pole
111 | tipz = l .* sin(theta(k));
112 | tipy = -l .* cos(theta(k));
113 |
114 | % Plot the pendulum
115 | plot([0.0, tipz], [0.0, tipy], '-b');
116 |
117 | % Remove plot hold
118 | hold off;
119 |
120 | % Obtain or apply plot limits for consistency across frames
121 | if k == 1
122 | ylim([-2 .* l, 2 .* l]);
123 | axis equal;
124 | xl = xlim();
125 | yl = ylim();
126 | else
127 | xlim(xl);
128 | ylim(yl);
129 | end
130 |
131 | % Apply blank background
132 | axis off;
133 |
134 | % Render the frame
135 | drawnow;
136 |
137 | % Record the frame and store in the structure
138 | frames(n) = getframe(fig);
139 |
140 | % Write frame to video
141 | %writeVideo(vid, frames(n));
142 |
143 | % Display progress to command line
144 | fprintf("rendering video frame %d out of %d...\n", n, num_frame);
145 |
146 | % Increment frame counter
147 | n = n + 1;
148 | end
149 |
150 | % Renable plot visibility to view movie and other plots
151 | set(0,'DefaultFigureVisible','on')
152 |
153 | % Create fresh figure object for the movie
154 | figure;
155 |
156 | % Render the movie and set to loop for a very large number of times
157 | fprintf("done rendering video frames, presenting video...\n")
158 | movie(gcf, frames, 9999, fps);
159 |
160 | %% Plot trajectories
161 |
162 | % Plot angle history
163 | figure;
164 | pbaspect([5 3 1])
165 | hold on;
166 | plot(sol.t, rad2deg(theta), "k", "LineWidth", 2);
167 | grid on;
168 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
169 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20);
170 | ax = gca();
171 | ax.FontSize = 16;
172 | ax.TickLabelInterpreter = "latex";
173 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta.png", "-dpng", "-r500")
174 |
175 | % Plot anglular velocity history
176 | figure;
177 | pbaspect([5 3 1])
178 | hold on;
179 | plot(sol.t, rad2deg(theta_dot), "k", "LineWidth", 2);
180 | grid on;
181 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
182 | ylabel("Pendulum Anglular Velocity [rad/s]", "Interpreter", "latex", "FontSize", 20);
183 | ax = gca();
184 | ax.FontSize = 16;
185 | ax.TickLabelInterpreter = "latex";
186 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta_dot.png", "-dpng", "-r500")
187 |
188 | % Plot trajectory in state space
189 | figure;
190 | pbaspect([5 3 1])
191 | hold on;
192 | plot(rad2deg(theta), rad2deg(theta_dot), "k", "LineWidth", 2);
193 | plot(rad2deg(x_0(1)), rad2deg(x_0(2)), "o", "MarkerFaceColor", "blue", ...
194 | "MarkerEdgeColor", "blue");
195 | plot(rad2deg(x_star(1)), rad2deg(x_star(2)), "o", "MarkerFaceColor", ...
196 | "green", "MarkerEdgeColor", "green");
197 | grid on;
198 | xlabel("Pendulum Angle [deg]", "Interpreter", "latex", "FontSize", 20);
199 | ylabel("Pendulum Angular Speed [rad/s]", "Interpreter", "latex", ...
200 | "FontSize", 20);
201 | ax = gca();
202 | ax.FontSize = 16;
203 | ax.TickLabelInterpreter = "latex";
204 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/traj.png", "-dpng", "-r500")
205 |
206 | % Plot control sequence
207 | figure;
208 | pbaspect([5 3 1])
209 | hold on;
210 | plot(sol.t, u, "b", "LineWidth", 2);
211 | grid on;
212 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
213 | ylabel("Control Input [N]", "Interpreter", "latex", "FontSize", 20);
214 | ax = gca();
215 | ax.FontSize = 16;
216 | ax.TickLabelInterpreter = "latex";
217 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/u.png", "-dpng", "-r500")
218 |
219 | % Plot cost function vs iteration
220 | figure;
221 | pbaspect([5 3 1])
222 | hold on;
223 | plot(1:length(sol.J), sol.J, "r", "LineWidth", 2);
224 | grid on;
225 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20);
226 | ylabel("Cost Function [-]", "Interpreter", "latex", "FontSize", 20);
227 | ax = gca();
228 | ax.FontSize = 16;
229 | ax.TickLabelInterpreter = "latex";
230 | ax.YScale = 'log';
231 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/J.png", "-dpng", "-r500")
232 |
233 | % Plot control energy vs iteration
234 | figure;
235 | pbaspect([5 3 1])
236 | hold on;
237 | plot(1:length(sol.E), sol.E, "r", "LineWidth", 2);
238 | grid on;
239 | xlabel("DDP Iteration [-]", "Interpreter", "latex", "FontSize", 20);
240 | ylabel("Control Energy Usage [$\rm{N}^{2}\rm{m}^{2}\rm{s}$]", "Interpreter", "latex", "FontSize", 20);
241 | ax = gca();
242 | ax.FontSize = 16;
243 | ax.TickLabelInterpreter = "latex";
244 | ax.YScale = 'log';
245 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/E.png", "-dpng", "-r500")
246 |
247 | %% Evaluate control robustness in presence of noise in dynamics
248 |
249 | % Number of trials to compute
250 | num_trials = 100;
251 |
252 | % Allocate cell array to contain trajectories
253 | x_list = cell(num_trials, 1);
254 |
255 | % Noise standard deviation
256 | sigma = 1e0;
257 |
258 | % Simulate all the trajectories with noise
259 | for n = 1:num_trials
260 | % Allocate array for trajectory
261 | x = cell(length(sol.x), 1);
262 |
263 | % Initialize trajectory initial state
264 | x{1} = x_0;
265 |
266 | % Compute trajectory with computed control sequence with noise
267 | for k = 1:length(sol.x)-1
268 | noise = normrnd(0.0, sigma, length(u_max));
269 | x{k+1} = x{k} + dyn.F(x{k}, sol.u{k} + noise) .* sol.dt;
270 | end
271 |
272 | % Add trajectory to list of computed trajectories
273 | x_list{n} = x;
274 | end
275 |
276 | % Plot the angle trajectories
277 | figure;
278 | pbaspect([5 3 1])
279 | grid on;
280 | hold on;
281 | for n = 1:num_trials
282 | % Extract the pendulum angle information from the solutions
283 | theta = zeros(1, length(sol.x));
284 | theta_dot = zeros(1, length(sol.x));
285 | for k = 1:length(sol.x)
286 | theta(k) = x_list{n}{k}(1);
287 | theta_dot(k) = x_list{n}{k}(2);
288 | end
289 |
290 | % Plot the trajectory
291 | plot(sol.t, theta)
292 | end
293 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
294 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20);
295 | ax = gca();
296 | ax.FontSize = 16;
297 | ax.TickLabelInterpreter = "latex";
298 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta-noise-1e0.png", "-dpng", "-r500")
299 |
300 | %% Utilize Feedback to adjust control in presence of disturbances
301 |
302 | % Number of trials to compute
303 | num_trials = 100;
304 |
305 | % Allocate cell array to contain trajectories
306 | x_list = cell(num_trials, 1);
307 |
308 | % Noise standard deviation
309 | sigma = 1e0;
310 |
311 | % Simulate all the trajectories with noise
312 | for n = 1:num_trials
313 | % Allocate array for trajectory
314 | x = cell(length(sol.x), 1);
315 |
316 | % Initialize trajectory initial state
317 | x{1} = x_0;
318 |
319 | % Compute trajectory with updating control sequence with noise
320 | for k = 1:length(sol.x)-1
321 | noise = normrnd(0.0, sigma, length(u_max));
322 |
323 | % Compute control correction for disturbance using feedback term
324 | du = -inv(sol.Q_uu{k}) * sol.Q_ux{k} * (x{k} - sol.x{k});
325 |
326 | x{k+1} = x{k} + dyn.F(x{k}, sol.u{k} + du + noise) .* sol.dt;
327 | end
328 |
329 | % Add trajectory to list of computed trajectories
330 | x_list{n} = x;
331 | end
332 |
333 | % Plot the angle trajectories
334 | figure;
335 | pbaspect([5 3 1])
336 | grid on;
337 | hold on;
338 | for n = 1:num_trials
339 | % Extract the pendulum angle information from the solutions
340 | theta = zeros(1, length(sol.x));
341 | theta_dot = zeros(1, length(sol.x));
342 | for k = 1:length(sol.x)
343 | theta(k) = x_list{n}{k}(1);
344 | theta_dot(k) = x_list{n}{k}(2);
345 | end
346 |
347 | % Plot the trajectory
348 | plot(sol.t, theta)
349 | end
350 | xlabel("Time [s]", "Interpreter", "latex", "FontSize", 20);
351 | ylabel("Pendulum Angle [rad]", "Interpreter", "latex", "FontSize", 20);
352 | ax = gca();
353 | ax.FontSize = 16;
354 | ax.TickLabelInterpreter = "latex";
355 | %print("~/Dropbox/gatech_classes/ae4803/hw/hw1/report/fig/inverted-pendulum/theta-robust-1e0.png", "-dpng", "-r500")
356 |
357 | return;
358 |
--------------------------------------------------------------------------------