├── PID.m
├── PID_control.slx
├── Project_Report.pdf
├── PID_control_2018a.slx
├── PID_control.slx.r2017a
├── quadcopter_control.slx
├── quadcopter_control_2018a.slx
├── README.md
└── arc_project.m
/PID.m:
--------------------------------------------------------------------------------
1 | control_enable = true;
2 | set_param('PID/control_enable', 'sw', int2str(control_enable))
--------------------------------------------------------------------------------
/PID_control.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/PID_control.slx
--------------------------------------------------------------------------------
/Project_Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/Project_Report.pdf
--------------------------------------------------------------------------------
/PID_control_2018a.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/PID_control_2018a.slx
--------------------------------------------------------------------------------
/PID_control.slx.r2017a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/PID_control.slx.r2017a
--------------------------------------------------------------------------------
/quadcopter_control.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/quadcopter_control.slx
--------------------------------------------------------------------------------
/quadcopter_control_2018a.slx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RamprasadRaj/Auto-tuning-PID-Q-Learning/HEAD/quadcopter_control_2018a.slx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Auto-tuning-PID-Q-Learning
2 | Repository for all the MATLAB and Simulink files for auto-tuning of PID using Q Learning for a quadrotor
3 | ## Objective
4 | Implement Q-learning based RL technique to auto tune the PID parameters of the quadrotor in order to achieve the desired goal state
5 | ### Simulated quadrotor traversing the diagonal of a cube
6 | [
](https://youtu.be/QTW8twD71YM)
7 | ### Simulated quadrotor traversing the diagonal of a square
8 | [
](https://youtu.be/Cy4xLyxrArg)
9 |
--------------------------------------------------------------------------------
/arc_project.m:
--------------------------------------------------------------------------------
1 | % Time steps
2 | time_steps = 0:0.1:2.5;
3 |
4 | % Initialize states
5 | S_t = zeros(12,1);
6 |
7 | % Initialize Q tables for P and D
8 | Q_1 = zeros(12, length(time_steps));
9 | Q_2 = zeros(12, length(time_steps));
10 | Q_3 = zeros(12, length(time_steps));
11 | Q_4 = zeros(12, length(time_steps));
12 |
13 | e_p = zeros(6,length(time_steps));
14 | e_d = zeros(6,length(time_steps));
15 |
16 | % Learning rates
17 | alpha_1 = 0.01;
18 | alpha_2 = 0.01;
19 |
20 | % Exploration rate
21 | epsilon = 0.27;
22 |
23 | % Episodes
24 | max_episodes = 6001;
25 |
26 | %
27 | X_min = zeros(3,1);
28 | X_max = 5 * ones(3,1);
29 | %
30 | omega_min = -45 * pi/180 * ones(3,1);
31 | omega_max = 45 * pi/180 * ones(3,1);
32 | %
33 | X_dot_min = zeros(3,1);
34 | X_dot_max = ones(3,1);
35 | %
36 | omega_dot_min = -15 * pi/180 * ones(3,1);
37 | omega_dot_max = 15 * pi/180 * ones(3,1);
38 | %
39 | X_con = zeros(3,1);
40 | X_dot_con = zeros(3,1);
41 | omega_con = zeros(3,1);
42 | omega_dot_con = zeros(3,1);
43 | %
44 | % Number of buckets each variable is defined into
45 | N = 10;
46 | %
47 | % Discretization
48 | n_x = zeros(3,length(time_steps));
49 | n_x_dot = zeros(3,length(time_steps));
50 | n_omega = zeros(3,length(time_steps));
51 | n_omega_dot = zeros(3,length(time_steps));
52 | %
53 | % Choosing actions based on greedy method
54 | action = zeros(12,26);
55 |
56 | % Action p
57 | action_pos_p = zeros(12,length(time_steps));
58 | action_alt_p = zeros(12,length(time_steps));
59 |
60 | % Action d
61 | action_pos_d = zeros(12,length(time_steps));
62 | action_alt_d = zeros(12,length(time_steps));
63 |
64 | % Output from controller
65 | u_p = zeros(3,length(time_steps));
66 | u_d = zeros(3,length(time_steps));
67 |
68 | % Threshold
69 | pos_thresh = 0.01;
70 | alt_thresh = 0.01;
71 |
72 | %
73 | simout = sim('quadcopter_control_training','StartTime',num2str(0),'StopTime',...
74 | num2str(0.1),'OutputOption','SpecifiedOutputTimes','OutputTimes',num2str(0.1))
75 | S_pos = zeros(6,length(time_steps));
76 | S_alt = zeros(6,length(time_steps));
77 |
78 | e_p_lim = 0.01;
79 | e_d_lim = 0.1;
80 |
81 | df = 0.75;
82 |
83 |
84 | t = 1;
85 | while episode < length(time_steps)
86 |
87 | % Decay.
88 | if episode > 0.6*max_episodes
89 | epsilon = 1 / (1 + exp(episode)) + 0.001;
90 | else
91 | epsilon = 0;
92 | end
93 | for t = 0:0.1:2.5
94 | % Discretization of states
95 | S_pos(1:3,t) = S_t(1:3);
96 | S_pos(4:6,t) = S_t(7:9);
97 | %
98 | S_alt(1:3,t) = S_t(4:6);
99 | S_alt(4:6,t) = S_t(10:12);
100 | %
101 | %
102 | if X_con < X_min
103 | n_x(:,t) = ones(3,1);
104 | elseif X_con > X_min
105 | n_x(:,t) = 10*ones(3,1);
106 | elseif (X_min <= X_con) && (X_con <= X_max)
107 | n_x(:,t) = (X_con .* N ./ (X_max - X_min)) + 1*ones(3,1);
108 | end
109 | %
110 | %
111 | if X_dot_con < X_dot_min
112 | n_x_dot(:,t) = 1*ones(3,1);
113 | elseif X_dot_con > X_dot_min
114 | n_x_dot(:,t) = 10*ones(3,1);
115 | elseif (X_dot_min <= X_dot_con) && (X_dot_con <= X_dot_max)
116 | n_x_dot(:,t) = (X_dot_con .* N ./ (X_dot_max - X_dot_min)) + 1*ones(3,1);
117 | end
118 | %
119 | %
120 | if omega_con < omega_min
121 | n_omega(:,t) = 1*ones(3,1);
122 | elseif omega_con > omega_min
123 | n_omega(:,t) = 10*ones(3,1);
124 | elseif (omega_min <= omega_con) && (omega_con <= omega_max)
125 | n_omega(:,t) = (omega_con .* N ./ (omega_max - omega_min)) + 1*ones(3,1);
126 | end
127 | %
128 | %
129 | if omega_dot_con < omega_dot_min
130 | n_omega_dot(:,t) = 1*ones(3,1);
131 | elseif omega_dot_con > omega_dot_min
132 | n_omega_dot(:,t) = 10*ones(3,1);
133 | elseif (omega_dot_min <= omega_dot_con) && (omega_dot_con <= omega_dot_max)
134 | n_omega_dot(:,t) = (omega_dot_con .* N ./ (omega_dot_max - omega_dot_min)) + 1*ones(3,1);
135 | end
136 | %
137 | %
138 | for i = 1:1:2
139 | % xi
140 | x_i = rand;
141 | %
142 | if x_i < epsilon
143 | action_pos_p(:,t) = rand(Q_1);
144 | else
145 | action_pos_p(:,t) = max(Q_1);
146 | end
147 | %
148 | %
149 | if x_i < epsilon
150 | action_alt_p(:,t) = rand(Q_2);
151 | else
152 | action_alt_p(:,t) = max(Q_2);
153 | end
154 | end
155 | for i = 3:4
156 | % xi
157 | x_i = rand;
158 | %
159 | if x_i < epsilon
160 | action_pos_d(:,t) = rand(Q_3);
161 | else
162 | action_pos_d(:,t) = max(Q_3);
163 | end
164 | %
165 | %
166 | if x_i < epsilon
167 | action_alt_d(:,t) = rand(Q_4);
168 | else
169 | action_alt_d(:,t) = max(Q_4);
170 | end
171 | end
172 | % Obtain total output u(t)
173 | % u_p = k_p(e) + k_d(e_dot)
174 | % e = x_d - x_c e_dot = x_dot_d -x_dot_c
175 | %
176 | % u_d = k_p(e) + k_d(e_dot)
177 | % e = phi_d - phi_c e_dot = phi_dot_d - phi_dot_c
178 | %
179 | x_ref = zeros(6,1);
180 | x_ref(1:3) = x_vector(1:3);
181 | x_ref(4:6) = x_vector_dot(1:3);
182 | e_p(:,t) = x_ref - S_pos;
183 |
184 | omega_ref = zeros(6,1);
185 | omega_ref(1:3) = theta_vector(1:3);
186 | omega_ref(4:6) = theta_vector_dot(1:3);
187 | e_d(:,t) = omega_ref - S_alt;
188 |
189 | u_p(:,t) = (k_p*e_p(1:3,t)) + (k_d*e_p(4:6,t));
190 | u_d(:,t) = (k_p*e_d(1:3,t)) + (k_d*e_d(4:6,t));
191 |
192 | %
193 | % Observe new state S_t+1
194 | S_pos(:,t+1) = S_t(1:3,t);
195 | S_pos(:,t+1) = S_t(7:9,t);
196 | %
197 | S_alt(:,t+1) = S_t(4:6,t);
198 | S_alt(:,t+1) = S_t(10:12,t);
199 | %
200 | % Receive reward R_p for Q_1 and Q_2
201 | %
202 |
203 | %x0 =
204 |
205 |
206 |
207 | simout = sim('quadcopter_control_training','StartTime',num2str(0),'StopTime',...
208 | num2str(0.1),'OutputOption','SpecifiedOutputTimes','OutputTimes',num2str(0.1));
209 |
210 | x_vec_next(1:3,1) = (simout.simout.Data(end,1:3))';
211 | x_vec_next(4:6,1) = (simout.simout.Data(end,7:9))';
212 | omega_vec_next(1:3,1) = (simout.simout.Data(end,4:6))';
213 | omega_vec_next(4:6,1) = (simout.simout.Data(end,10:12))';
214 | X_con(1:3,1) = x_vec_next(1:3,1);
215 | X_dot_con(1:3,1) = x_vec_next(4:6,1);
216 | omega_con(1:3,1) = omega_vec_next(1:3,1);
217 | omega_dot_con(1:3,1) = omega_vec_next(4:6,1);
218 | e_p_next = x_vec_next - S_pos(:,t+1);
219 | e_d_next = omega_vec_next - S_alt(:,t+1);
220 |
221 | if (abs(e_p_next)e_p_lim)
222 | R_p = 0;
223 | elseif (abs(e_p_next)<=e_p_lim)
224 | R_p = 1;
225 | else
226 | R_p = -1;
227 | end
228 |
229 |
230 | if (abs(e_d_next)e_d_lim)
231 | R_d = 0;
232 | elseif (abs(e_d_next)<=e_d_lim)
233 | R_d = 1;
234 | else
235 | R_d = -1;
236 | end
237 |
238 | S_pos(1:3,t+1) = S_t(1:3);
239 | S_pos(4:6,t+1) = S_t(7:9);
240 | %
241 | S_alt(1:3,t+1) = S_t(4:6);
242 | S_alt(4:6,t+1) = S_t(10:12);
243 | %
244 | %
245 | if X_con < X_min
246 | n_x(:,t+1) = ones(3,1);
247 | elseif X_con > X_min
248 | n_x(:,t+1) = 10*ones(3,1);
249 | elseif (X_min <= X_con) && (X_con <= X_max)
250 | n_x(:,t+1) = (X_con .* N ./ (X_max - X_min)) + 1*ones(3,1);
251 | end
252 |
253 | if X_dot_con < X_dot_min
254 | n_x_dot(:,t+1) = 1*ones(3,1);
255 | elseif X_dot_con > X_dot_min
256 | n_x_dot(:,t+1) = 10*ones(3,1);
257 | elseif (X_dot_min <= X_dot_con) && (X_dot_con <= X_dot_max)
258 | n_x_dot(:,t+1) = (X_dot_con .* N ./ (X_dot_max - X_dot_min)) + 1*ones(3,1);
259 | end
260 | %
261 | %
262 | if omega_con < omega_min
263 | n_omega(:,t+1) = 1*ones(3,1);
264 | elseif omega_con > omega_min
265 | n_omega(:,t+1) = 10*ones(3,1);
266 | elseif (omega_min <= omega_con) && (omega_con <= omega_max)
267 | n_omega(:,t+1) = (omega_con .* N ./ (omega_max - omega_min)) + 1*ones(3,1);
268 | end
269 | %
270 | %
271 | if omega_dot_con < omega_dot_min
272 | n_omega_dot(:,t+1) = 1*ones(3,1);
273 | elseif omega_dot_con > omega_dot_min
274 | n_omega_dot(:,t+1) = 10*ones(3,1);
275 | elseif (omega_dot_min <= omega_dot_con) && (omega_dot_con <= omega_dot_max)
276 | n_omega_dot(:,t+1) = (omega_dot_con .* N ./ (omega_dot_max - omega_dot_min)) + 1*ones(3,1);
277 | end
278 |
279 | end
280 |
281 | Q_1(:,t) = (Q_1(:,t) + (alpha_1 * R_p)) + (df * max(Q_1(:,t+1)-Q_1(:,t)));
282 | Q_2(:,t) = (Q_2(:,t) + (alpha_1 * R_p)) + (df * max(Q_2(:,t+1)-Q_2(:,t)));
283 | Q_3(:,t) = (Q_3(:,t) + (alpha_2 * R_d)) + (df * max(Q_3(:,t+1)-Q_3(:,t)));
284 | Q_4(:,t) = (Q_4(:,t) + (alpha_2 * R_d)) + (df * max(Q_4(:,t+1)-Q_4(:,t)));
285 |
286 | t = t+1;
287 |
288 |
289 | end
--------------------------------------------------------------------------------