├── Code ├── Policy_Iteration_2Link_with_Obstacles.m ├── Policy_Iteration_3Link_with_Obstacles.m ├── Q_Learning.m ├── Value_Iteration_2Link_Arm.m ├── Value_Iteration_2Link_with_Obstacles.m ├── Value_Iteration_3Link_Arm.m └── Value_Iteration_3Link_with_Obstacles.m ├── EEE587 Final Presentation.pptx ├── EEE_587_Final_Report.pdf ├── README.md └── README.txt /Code/Policy_Iteration_2Link_with_Obstacles.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the policy iteration algorithm written to evaluate the 3 | % optimal path for a 2-Link Planar Robotic Arm with obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Initial Parameters 8 | x0 = [0 0]; % Start State 9 | goal = [pi 0]; % End State 10 | r = -0.01; % Living Reward 11 | R = 100; % End Reward 12 | 13 | %% Obstacle Modeling 14 | L = linspace(0,2*pi,6); 15 | L2 = linspace(0,2*pi,5); 16 | L3 = linspace(0,2*pi,100); 17 | L4 = linspace(0,2*pi,6); 18 | xv = 1.5+1*cos(L)'; 19 | yv = 1.5+1*sin(L)'; 20 | npoints = 20; 21 | 22 | xv2 = -0.5+0.3*cos(L2)'; 23 | yv2 = 0.5+0.3*sin(L2)'; 24 | % 25 | xv3 = 1.4+1*cos(L3)'; 26 | yv3 = -1.5+1*sin(L3)'; 27 | 28 | xv4 = -1.4+1*cos(L4)'; 29 | yv4 = -1.3+1*sin(L4)'; 30 | 31 | grid1 = 2*pi/100; 32 | grid2 = 2*pi/100; 33 | th1 = -pi:grid1:pi; 34 | th2 = -pi:grid1:pi; 35 | 36 | states = zeros(length(th1),length(th2),2); 37 | V0 = zeros(length(th1),length(th2)); 38 | dummy_pol = ones(length(th1),length(th2)); 39 | update1 = 0; 40 | update2 = 0; 41 | update3 = 0; 42 | l1 = length(th1); 43 | l2 = length(th2); 44 | colijs = []; 45 | disp('Initializing the Problem...') 46 | for i = 1:length(th1) 47 | for j = 1:length(th2) 48 | states(i,j,1) = th1(i); 49 | states(i,j,2) = th2(j); 50 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) 51 | if update1 == 0 52 | istart = i; 53 | jstart = j; 54 | update1 = 1; 55 | end 56 | end 57 | 58 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) 59 | if update2 == 0 60 | V0(i,j) = R; 61 | dummy_pol(i,j) = 0; 62 | update2 = 1; 63 | iend = i; 64 | jend = j; 65 | end 66 | end 67 | 68 | % Collision Check 69 | q1 = th1(i); q2 = th2(j); 70 | E1 = [cos(q1) sin(q1)]; 71 | E2 = [cos(q1)+cos(q1 + q2) sin(q1)+sin(q1 + q2)]; 72 | xlin1 = linspace(0,E1(1),npoints); 73 | ylin1 = linspace(0,E1(2),npoints); 74 | points1 = [xlin1(:) ylin1(:)]; 75 | 76 | xlin2 = linspace(E1(1),E2(1),npoints); 77 | ylin2 = linspace(E1(2),E2(2),npoints); 78 | points2 = [xlin2(:) ylin2(:)]; 79 | 80 | points = vertcat(points1,points2); 81 | xq = points(:,1); 82 | yq = points(:,2); 83 | [in1,on1] = inpolygon(xq,yq,xv,yv); 84 | [in2,on2] = inpolygon(xq,yq,xv2,yv2); 85 | [in3,on3] = inpolygon(xq,yq,xv3,yv3); 86 | [in4,on4] = inpolygon(xq,yq,xv4,yv4); 87 | col_points = numel(xq(in1)) + numel(xq(on1))+numel(xq(in2)) + numel(xq(on2))+numel(xq(in3)) + numel(xq(on3))... 88 | +numel(xq(in4)) + numel(xq(on4)); 89 | 90 | if col_points ~=0 91 | colijs(end+1,:) = [i j]; 92 | end 93 | 94 | 95 | end 96 | end 97 | nstates = length(th1)*length(th2); 98 | 99 | %% Creating MINUS, PLUS ARRAYS FOR WRITING TRANSITION MODEL 100 | iminus = zeros(l1,1); 101 | iplus = zeros(l1,1); 102 | jminus = zeros(l2,1); 103 | jplus = zeros(l2,1); 104 | 105 | for i = 1:l1 106 | iminus(i) = i-1; 107 | iplus(i) = i+1; 108 | if i == 1 109 | iminus(i) = l1 - 1; 110 | elseif i == l1 111 | iplus(i) = 2; 112 | end 113 | end 114 | 115 | for j = 1:l2 116 | jminus(j) = j-1; 117 | jplus(j) = j+1; 118 | if j == 1 119 | jminus(j) = l2 - 1; 120 | elseif j == l2 121 | jplus(j) = 2; 122 | end 123 | end 124 | %% Initiating Few Other Parameters... 125 | Vold = V0; 126 | Vnew = V0; 127 | policy = ones(length(th1),length(th2)); 128 | 129 | crnt_pol = policy; 130 | iter = 1; 131 | pol_iter = 1; 132 | 133 | %% Policy Iteration 134 | disp('Policy Iteration Starts...') 135 | while iter > 0 136 | Vold = Vnew; 137 | Q = Qfunc(Vnew,r,l1,l2,iminus,iplus,jminus,jplus,colijs); 138 | for i = 1:l1 139 | for j = 1:l2 140 | if crnt_pol(i,j) ~= 0 141 | Vnew(i,j) = Q(i,j,crnt_pol(i,j)); 142 | end 143 | end 144 | end 145 | 146 | fprintf('Value Determination Iteration: %d\n',iter); 147 | iter = iter + 1; 148 | 149 | if Vold == Vnew 150 | Q = Qfunc(Vnew,r,l1,l2,iminus,iplus,jminus,jplus,colijs); 151 | Vint = V0; 152 | Policyint = zeros(length(th1),length(th2)); 153 | for i = 1:l1 154 | for j = 1:l2 155 | if dummy_pol(i,j) ~= 0 156 | [maxval,index] = max(Q(i,j,:)); 157 | Vint(i,j) = maxval; 158 | Policyint(i,j) = index; 159 | end 160 | end 161 | end 162 | if crnt_pol == Policyint 163 | disp('==================================') 164 | Optimal_Policy = crnt_pol; 165 | disp('Policy Converged!') 166 | break 167 | else 168 | Vnew(:,:) = Vint(:,:); 169 | crnt_pol(:,:) = Policyint(:,:); 170 | disp('POLICY IMPROVED!') 171 | pol_iter = pol_iter + 1; 172 | disp('==================================') 173 | disp('==================================') 174 | iter = 1; 175 | end 176 | end 177 | end 178 | 179 | disp('Policy Iteration Method Found an Optimal Policy ') 180 | fprintf('Number of Policy Iterations: %d\n',pol_iter); 181 | 182 | %% Retrieving the Optimal Policy 183 | 184 | pathlength = 1; 185 | i = istart; 186 | j = jstart; 187 | qout = x0; 188 | count = 0; 189 | while pathlength > 0 190 | a = Optimal_Policy(i,j); 191 | snext = nexts(i,j,a,states,iminus,iplus,jminus,jplus); 192 | qout(end+1,:) = snext; 193 | i = find(th1 == snext(1)); 194 | j = find(th2 == snext(2)); 195 | 196 | if i == iend && j == jend 197 | break; 198 | end 199 | 200 | 201 | end 202 | qout(end+1,:) = goal; 203 | 204 | %% Visualizing the Optimal Policy 205 | mdl_planar2; 206 | for i = 1:size(qout,1) 207 | p2.plot(qout(i,:)) 208 | hold on; 209 | plot3(xv,yv,zeros(length(xv),1),'r','Linewidth',2); 210 | plot3(xv2,yv2,zeros(length(xv2),1),'r','Linewidth',2); 211 | plot3(xv3,yv3,zeros(length(xv3),1),'r','Linewidth',2); 212 | plot3(xv4,yv4,zeros(length(xv4),1),'r','Linewidth',2); 213 | fill3(xv,yv,zeros(length(xv),1),'r'); 214 | fill3(xv2,yv2,zeros(length(xv2),1),'r'); 215 | fill3(xv3,yv3,zeros(length(xv3),1),'r'); 216 | fill3(xv4,yv4,zeros(length(xv4),1),'r'); 217 | end 218 | 219 | endpoints = []; 220 | for i = 1:size(qout,1)-1 221 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))]; 222 | hold on; 223 | end 224 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 225 | 226 | %% Action Value and Transition Functions are Defined Below. 227 | % Action Value Function 228 | function [Q] = Qfunc(V,r,l1,l2,iminus,iplus,jminus,jplus,colijs) 229 | Q = zeros(l1,l2,9); 230 | % 9 Actions 231 | for m = 1:length(colijs) 232 | V(colijs(m,1),colijs(m,2)) = 0; 233 | end 234 | for i = 1:l1 235 | for j = 1:l2 236 | Q(i,j,1) = V(iminus(i),jminus(j))+r; 237 | Q(i,j,2) = V(iminus(i),j)+r; 238 | Q(i,j,3) = V(iminus(i),jplus(j))+r; 239 | Q(i,j,4) = V(i,jminus(j))+r; 240 | Q(i,j,5) = V(i,j)+r; 241 | Q(i,j,6) = V(i,jplus(j))+r; 242 | Q(i,j,7) = V(iplus(i),jminus(j))+r; 243 | Q(i,j,8) = V(iplus(i),j)+r; 244 | Q(i,j,9) = V(iplus(i),jplus(j))+r; 245 | end 246 | end 247 | end 248 | 249 | % Transition Function 250 | function [snext] = nexts(i,j,a,states,iminus,iplus,jminus,jplus) 251 | ni = size(states,1); 252 | nj = size(states,2); 253 | 254 | Nextindex = [iminus(i) jminus(j); 255 | iminus(i) j; 256 | iminus(i) jplus(j); 257 | i jminus(j); 258 | i j; 259 | i jplus(j); 260 | iplus(i) jminus(j); 261 | iplus(i) j; 262 | iplus(i) jplus(j)]; 263 | 264 | inext = Nextindex(a,1); 265 | jnext = Nextindex(a,2); 266 | 267 | snext = states(inext,jnext,:); 268 | 269 | end 270 | -------------------------------------------------------------------------------- /Code/Policy_Iteration_3Link_with_Obstacles.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the policy iteration algorithm written to evaluate the 3 | % optimal path for a 3-Link Planar Robotic Arm with obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Initial Parameters 8 | x0 = [-pi/2 0 0]; % Start State 9 | goal = [pi/2 pi/2 0]; % End State 10 | r = -0.2; % Living Reward 11 | R = 100; % End Reward 12 | 13 | %% Obstacle Modeling 14 | L = linspace(0,2*pi,6); 15 | L2 = linspace(0,2*pi,5); 16 | L3 = linspace(0,2*pi,100); 17 | L4 = linspace(0,2*pi,6); 18 | xv = 1.5+1*cos(L)'; 19 | yv = 1.5+1*sin(L)'; 20 | npoints = 20; 21 | 22 | xv2 = -0.5+0.3*cos(L2)'; 23 | yv2 = 0.5+0.3*sin(L2)'; 24 | % 25 | xv3 = 1.4+1*cos(L3)'; 26 | yv3 = -1.5+1*sin(L3)'; 27 | 28 | xv4 = -1.4+1*cos(L4)'; 29 | yv4 = -1.5+1*sin(L4)'; 30 | 31 | %% 32 | 33 | grid1 = 2*pi/50; 34 | grid2 = 2*pi/50; 35 | grid3 = 2*pi/50; 36 | th1 = -pi:grid1:pi; 37 | th2 = -pi:grid1:pi; 38 | th3 = -pi:grid1:pi; 39 | 40 | 41 | states = zeros(length(th1),length(th2),length(th3),3); 42 | V0 = zeros(length(th1),length(th2),length(th3)); 43 | dummy_pol = ones(length(th1),length(th2),length(th3)); 44 | update1 = 0; 45 | update2 = 0; 46 | update3 = 0; 47 | l1 = length(th1); 48 | l2 = length(th2); 49 | l3 = length(th3); 50 | colijs = []; 51 | disp('Initializing the Problem...') 52 | policy = ones(length(th1),length(th2),length(th3)); 53 | dummy_pol = ones(length(th1),length(th2),length(th3)); 54 | 55 | for i = 1:length(th1) 56 | for j = 1:length(th2) 57 | for k = 1:length(th3) 58 | states(i,j,k,1) = th1(i); 59 | states(i,j,k,2) = th2(j); 60 | states(i,j,k,3) = th3(k); 61 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) && (abs(th3(k)-x0(3)) <= grid3) 62 | if update1 == 0 63 | istart = i; 64 | jstart = j; 65 | kstart = k; 66 | update1 = 1; 67 | end 68 | end 69 | 70 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) && (abs(th3(k)-goal(3)) <= grid3) 71 | if update2 == 0 72 | V0(i,j,k) = R; 73 | policy(i,j,k) = 0; 74 | dummy_pol(i,j,k) = 0; 75 | update2 = 1; 76 | iend = i; 77 | jend = j; 78 | kend = k; 79 | end 80 | end 81 | 82 | % Collision Check 83 | q1 = th1(i); q2 = th2(j); q3 = th3(k); 84 | E1 = [cos(q1) sin(q1)]; 85 | E2 = [cos(q1)+cos(q1 + q2) sin(q1)+sin(q1 + q2)]; 86 | E3 = [cos(q1)+cos(q1 + q2)+cos(q1 + q2 + q3) sin(q1)+sin(q1 + q2)+sin(q1 + q2 + q3)]; 87 | xlin1 = linspace(0,E1(1),npoints); 88 | ylin1 = linspace(0,E1(2),npoints); 89 | points1 = [xlin1(:) ylin1(:)]; 90 | 91 | xlin2 = linspace(E1(1),E2(1),npoints); 92 | ylin2 = linspace(E1(2),E2(2),npoints); 93 | points2 = [xlin2(:) ylin2(:)]; 94 | 95 | xlin3 = linspace(E2(1),E3(1),npoints); 96 | ylin3 = linspace(E2(2),E3(2),npoints); 97 | points3 = [xlin3(:) ylin3(:)]; 98 | 99 | points = vertcat(points1,points2,points3); 100 | xq = points(:,1); 101 | yq = points(:,2); 102 | [in1,on1] = inpolygon(xq,yq,xv,yv); 103 | [in2,on2] = inpolygon(xq,yq,xv2,yv2); 104 | [in3,on3] = inpolygon(xq,yq,xv3,yv3); 105 | [in4,on4] = inpolygon(xq,yq,xv4,yv4); 106 | col_points = numel(xq(in1)) + numel(xq(on1))+numel(xq(in2)) + numel(xq(on2))+numel(xq(in3)) + numel(xq(on3))... 107 | +numel(xq(in4)) + numel(xq(on4)); 108 | 109 | if col_points ~=0 110 | colijs(end+1,:) = [i j k]; 111 | end 112 | 113 | 114 | 115 | end 116 | end 117 | end 118 | nstates = l1*l2*l3; 119 | 120 | %% Creating MINUS, PLUS ARRAYS 121 | 122 | iminus = zeros(l1,1); 123 | iplus = zeros(l1,1); 124 | jminus = zeros(l2,1); 125 | jplus = zeros(l2,1); 126 | kminus = zeros(l3,1); 127 | kplus = zeros(l3,1); 128 | 129 | for i = 1:l1 130 | iminus(i) = i-1; 131 | iplus(i) = i+1; 132 | if i == 1 133 | iminus(i) = l1 - 1; 134 | elseif i == l1 135 | iplus(i) = 2; 136 | end 137 | end 138 | 139 | for j = 1:l2 140 | jminus(j) = j-1; 141 | jplus(j) = j+1; 142 | if j == 1 143 | jminus(j) = l2 - 1; 144 | elseif j == l2 145 | jplus(j) = 2; 146 | end 147 | end 148 | 149 | for k = 1:l3 150 | kminus(k) = k-1; 151 | kplus(k) = k+1; 152 | if k == 1 153 | kminus(k) = l3 - 1; 154 | elseif k == l3 155 | kplus(k) = 2; 156 | end 157 | end 158 | 159 | %% Initializing Few Other Parameters 160 | Vold = V0; 161 | Vnew = V0; 162 | % disp('Actions 1 = N, 2 = E, 3 = S, 4 = W'); 163 | crnt_pol = policy; 164 | iter = 1; 165 | pol_iter = 1; 166 | 167 | %% Policy Iteration 168 | disp('Policy Iteration Starts...') 169 | while iter > 0 170 | Vold = Vnew; 171 | Q = Qfunc(Vnew,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus,colijs); 172 | for i = 1:l1 173 | for j = 1:l2 174 | for k = 1:l3 175 | if policy(i,j,k) ~= 0 176 | Vnew(i,j,k) = Q(i,j,k,crnt_pol(i,j,k)); 177 | end 178 | end 179 | end 180 | end 181 | 182 | fprintf('Value Determination Iteration: %d\n',iter); 183 | % disp('The current state value matrix is '); 184 | % Vnew 185 | iter = iter + 1; 186 | 187 | if Vold == Vnew 188 | Q = Qfunc(Vnew,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus,colijs); 189 | Vint = V0; 190 | Policyint = zeros(length(th1),length(th2),length(th3)); 191 | for i = 1:l1 192 | for j = 1:l2 193 | for k = 1:l3 194 | if dummy_pol(i,j,k) ~= 0 195 | [maxval,index] = max(Q(i,j,k,:)); 196 | Vint(i,j,k) = maxval; 197 | Policyint(i,j,k) = index; 198 | % targeti = i; 199 | % targetj = j; 200 | end 201 | end 202 | end 203 | end 204 | if crnt_pol == Policyint 205 | disp('==================================') 206 | Optimal_Policy = crnt_pol; 207 | disp('Policy Converged!') 208 | % Optimal_Policy 209 | % disp('where,') 210 | % disp('Actions 1 = N, 2 = E, 3 = S, 4 = W'); 211 | break 212 | else 213 | Vnew(:,:,:) = Vint(:,:,:); 214 | crnt_pol(:,:,:) = Policyint(:,:,:); 215 | % disp('The current policy is') 216 | % crnt_pol 217 | % disp('State Value matrix is') 218 | % Vnew 219 | disp('POLICY IMPROVED!') 220 | pol_iter = pol_iter + 1; 221 | disp('==================================') 222 | disp('==================================') 223 | % iter 224 | iter = 1; 225 | end 226 | end 227 | end 228 | 229 | 230 | % disp('State-value matrix for optimal policy is') 231 | % Vnew 232 | disp('Policy Iteration Method Found an Optimal Policy ') 233 | fprintf('Number of Policy Iterations: %d\n',pol_iter); 234 | pathlength = 1; 235 | i = istart; 236 | j = jstart; 237 | k = kstart; 238 | qout = x0; 239 | 240 | while pathlength > 0 241 | a = Optimal_Policy(i,j,k); 242 | snext = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus); 243 | qout(end+1,:) = snext; 244 | i = find(th1 == snext(1)); 245 | j = find(th2 == snext(2)); 246 | k = find(th3 == snext(3)); 247 | 248 | if i == iend && j == jend && k == kend 249 | break; 250 | end 251 | 252 | end 253 | qout(end+1,:) = goal; 254 | mdl_planar3; 255 | for i = 1:size(qout,1) 256 | p3.plot(qout(i,:)) 257 | hold on; 258 | plot3(xv,yv,zeros(length(xv),1),'r','Linewidth',2); 259 | plot3(xv2,yv2,zeros(length(xv2),1),'r','Linewidth',2); 260 | plot3(xv3,yv3,zeros(length(xv3),1),'r','Linewidth',2); 261 | plot3(xv4,yv4,zeros(length(xv4),1),'r','Linewidth',2); 262 | fill3(xv,yv,zeros(length(xv),1),'r'); 263 | fill3(xv2,yv2,zeros(length(xv2),1),'r'); 264 | fill3(xv3,yv3,zeros(length(xv3),1),'r'); 265 | fill3(xv4,yv4,zeros(length(xv4),1),'r'); 266 | end 267 | 268 | endpoints = []; 269 | for i = 1:size(qout,1)-1 270 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2))+cos(qout(i,1) + qout(i,2)+qout(i,3)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))+sin(qout(i,1) + qout(i,2)+qout(i,3))]; 271 | hold on; 272 | end 273 | 274 | p3.plot(qout(1,:)) 275 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 276 | 277 | %% Action Value and Transition Functions are Defined Below. 278 | 279 | function [Q] = Qfunc(V,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus,colijs) 280 | Q = zeros(l1,l2,l3,27); 281 | % 27 Actions 282 | for m = 1:length(colijs) 283 | V(colijs(m,1),colijs(m,2),colijs(m,3)) = 0; 284 | end 285 | 286 | 287 | for i = 1:l1 288 | for j = 1:l2 289 | for k = 1:l3 290 | Q(i,j,k,1) = V(iminus(i),jminus(j),k)+r; 291 | Q(i,j,k,2) = V(iminus(i),j,k)+r; 292 | Q(i,j,k,3) = V(iminus(i),jplus(j),k)+r; 293 | Q(i,j,k,4) = V(i,jminus(j),k)+r; 294 | Q(i,j,k,5) = V(i,jminus(j),k)+r; 295 | Q(i,j,k,6) = V(i,jplus(j),k)+r; 296 | Q(i,j,k,7) = V(iplus(i),jminus(j),k)+r; 297 | Q(i,j,k,8) = V(iplus(i),j,k)+r; 298 | Q(i,j,k,9) = V(iplus(i),jplus(j),k)+r; 299 | 300 | Q(i,j,k,10) = V(iminus(i),jminus(j),kminus(k))+r; 301 | Q(i,j,k,11) = V(iminus(i),j,kminus(k))+r; 302 | Q(i,j,k,12) = V(iminus(i),jplus(j),kminus(k))+r; 303 | Q(i,j,k,13) = V(i,jminus(j),kminus(k))+r; 304 | Q(i,j,k,14) = V(i,j,kminus(k))+r; 305 | Q(i,j,k,15) = V(i,jplus(j),kminus(k))+r; 306 | Q(i,j,k,16) = V(iplus(i),jminus(j),kminus(k))+r; 307 | Q(i,j,k,17) = V(iplus(i),j,kminus(k))+r; 308 | Q(i,j,k,18) = V(iplus(i),jplus(j),kminus(k))+r; 309 | 310 | Q(i,j,k,19) = V(iminus(i),jminus(j),kplus(k))+r; 311 | Q(i,j,k,20) = V(iminus(i),j,kplus(k))+r; 312 | Q(i,j,k,21) = V(iminus(i),jplus(j),kplus(k))+r; 313 | Q(i,j,k,22) = V(i,jminus(j),kplus(k))+r; 314 | Q(i,j,k,23) = V(i,j,kplus(k))+r; 315 | Q(i,j,k,24) = V(i,jplus(j),kplus(k))+r; 316 | Q(i,j,k,25) = V(iplus(i),jminus(j),kplus(k))+r; 317 | Q(i,j,k,26) = V(iplus(i),j,kplus(k))+r; 318 | Q(i,j,k,27) = V(iplus(i),jplus(j),kplus(k))+r; 319 | end 320 | end 321 | end 322 | end 323 | 324 | function [snext] = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus) 325 | 326 | ni = size(states,1); 327 | nj = size(states,2); 328 | nk = size(states,3); 329 | 330 | Nextindex = [iminus(i) jminus(j) k; 331 | iminus(i) j k; 332 | iminus(i) jplus(j) k; 333 | i jminus(j) k; 334 | i jminus(j) k; 335 | i jplus(j) k; 336 | iplus(i) jminus(j) k; 337 | iplus(i) j k; 338 | iplus(i) jplus(j) k; 339 | iminus(i) jminus(j) kminus(k); 340 | iminus(i) j kminus(k); 341 | iminus(i) jplus(j) kminus(k); 342 | i jminus(j) kminus(k); 343 | i j kminus(k); 344 | i jplus(j) kminus(k); 345 | iplus(i) jminus(j) kminus(k); 346 | iplus(i) j kminus(k); 347 | iplus(i) jplus(j) kminus(k); 348 | iminus(i) jminus(j) kplus(k); 349 | iminus(i) j kplus(k); 350 | iminus(i) jplus(j) kplus(k); 351 | i jminus(j) kplus(k); 352 | i j kplus(k); 353 | i jplus(j) kplus(k); 354 | iplus(i) jminus(j) kplus(k); 355 | iplus(i) j kplus(k); 356 | iplus(i) jplus(j) kplus(k)]; 357 | 358 | inext = Nextindex(a,1); 359 | jnext = Nextindex(a,2); 360 | knext = Nextindex(a,3); 361 | 362 | 363 | snext = states(inext,jnext,knext,:); 364 | 365 | 366 | end 367 | -------------------------------------------------------------------------------- /Code/Q_Learning.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the Q-learning algorithm written to evaluate the 3 | % optimal path for a 2-Link Planar Robotic Arm. The algorithm hasn't 4 | % converge to optimal results in 50000 iterations. Hence, it is being 5 | % improved. 6 | clc; 7 | clear; 8 | close all; 9 | %% Initial Parameters 10 | x0 = [0 0]; % Start State 11 | goal = [pi/2 pi/2]; % End State 12 | R = 100; % End Reward 13 | nIter = 5000; 14 | r = -0.1; 15 | gamma = 1; 16 | eps = 0.1; 17 | 18 | grid1 = 2*pi/100; 19 | grid2 = 2*pi/100; 20 | th1 = -pi:grid1:pi; 21 | th2 = -pi:grid1:pi; 22 | l1 = length(th1); 23 | l2 = length(th2); 24 | states = zeros(l1,l2,2); 25 | update1 = 0; 26 | update2 = 0; 27 | for i = 1:length(th1) 28 | for j = 1:length(th2) 29 | states(i,j,1) = th1(i); 30 | states(i,j,2) = th2(j); 31 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) 32 | if update1 == 0 33 | istart = i; 34 | jstart = j; 35 | update1 = 1; 36 | end 37 | end 38 | 39 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) 40 | if update2 == 0 41 | update2 = 1; 42 | iend = i; 43 | jend = j; 44 | end 45 | end 46 | end 47 | end 48 | 49 | 50 | V0 = zeros(l1,l2); 51 | Q0 = zeros(l1,l2,8); 52 | Q = Q0; 53 | episode = 1; 54 | % alpha = 0.5; 55 | 56 | V = V0; 57 | U = V; 58 | 59 | s0 = x0; 60 | 61 | s = s0; 62 | % Vold = V0; 63 | % Vnew = V0; 64 | % disp('Actions 1 = N, 2 = E, 3 = S, 4 = W'); 65 | % iter = 1; 66 | while episode < nIter 67 | U = V; 68 | s1 = s(1); 69 | s2 = s(2); 70 | icrnt = find(th1 == s1); 71 | jcrnt = find(th2 == s2); 72 | roll = rand(1); % Epsilon-Greedy Roll 73 | % eps = 1/episode; % Decaying Learning Rate 74 | if roll < eps 75 | a = randi(8); 76 | else 77 | [M,ind] = max(Q(icrnt,jcrnt,:)); 78 | a = ind; 79 | end 80 | 81 | snext = nextstate(icrnt,jcrnt,a,states); 82 | inxt = find(th1 == snext(1)); 83 | jnxt = find(th2 == snext(2)); 84 | if (round(s1,4) == round(goal(1),4) && round(s2,4) == round(goal(2),4)) 85 | reward = R; 86 | else 87 | reward = r; 88 | end 89 | alpha = 1/episode; % Decaying Learning Rate 90 | Q(icrnt,jcrnt,a) = (1-alpha)*Q(icrnt,jcrnt,a) + alpha*(reward + gamma*max(Q(inxt,jnxt,:))); 91 | fprintf('Episode: %d', episode); 92 | fprintf('Current State: (%.4f, %.4f)\n',snext(1),snext(2)); 93 | if (round(snext(1),4) == round(goal(1),4) && round(snext(2),4) == round(goal(2),4)) 94 | fprintf('End of Episode %d!\n', episode); 95 | Q(inxt,jnxt,:) = (1-alpha)*Q(inxt,jnxt,:) + alpha*(R); 96 | episode = episode + 1; 97 | s = s0; 98 | V = V0; 99 | for i = 1:l1 100 | for j = 1:l2 101 | [Max,I] = max(Q(i,j,:)); 102 | V(i,j) = Max; 103 | end 104 | end 105 | disp('========================================='); 106 | else 107 | s = snext; 108 | end 109 | 110 | end 111 | fprintf('State Values after %d episodes!',episode) 112 | V 113 | Optimal_Policy = zeros(l1,l2); 114 | for i = 1:l1 115 | for j = 1:l2 116 | [Max,I] = max(Q(i,j,:)); 117 | V(i,j) = Max; 118 | Optimal_Policy(i,j) = I; 119 | end 120 | end 121 | Optimal_Policy(iend,jend) = 0; 122 | 123 | pathlength = 1; 124 | i = istart; 125 | j = jstart; 126 | qout = x0; 127 | count = 0; 128 | while pathlength > 0 129 | a = Optimal_Policy(i,j); 130 | snext = nextstate(i,j,a,states); 131 | qout(end+1,:) = snext; 132 | i = find(th1 == snext(1)); 133 | j = find(th2 == snext(2)); 134 | 135 | if i == iend && j == jend 136 | break; 137 | end 138 | 139 | 140 | end 141 | qout(end+1,:) = goal; 142 | mdl_planar2; 143 | for i = 1:size(qout,1) 144 | p2.plot(qout(i,:)) 145 | hold on; 146 | 147 | % plot3(xv,yv,zeros(length(xv),1),'r','Linewidth',2); 148 | % plot3(xv2,yv2,zeros(length(xv2),1),'r','Linewidth',2); 149 | % plot3(xv3,yv3,zeros(length(xv3),1),'r','Linewidth',2); 150 | % plot3(xv4,yv4,zeros(length(xv4),1),'r','Linewidth',2); 151 | % fill3(xv,yv,zeros(length(xv),1),'r'); 152 | % fill3(xv2,yv2,zeros(length(xv2),1),'r'); 153 | % fill3(xv3,yv3,zeros(length(xv3),1),'r'); 154 | % fill3(xv4,yv4,zeros(length(xv4),1),'r'); 155 | end 156 | 157 | 158 | 159 | function [snext] = nextstate(i,j,a,states) 160 | nrows = size(states,1); 161 | ncols = size(states,2); 162 | 163 | if i == 1 && j == 1 164 | i1 = nrows; j1 = ncols; 165 | i2 = nrows; j2 = 1; 166 | i3 = nrows; j3 = 2; 167 | i4 = 1; j4 = ncols; 168 | i6 = 1; j6 = 2; 169 | i7 = 2; j7 = ncols; 170 | i8 = 2; j8 = 1; 171 | i9 = 2; j9 = 2; 172 | elseif i == nrows && j == 1 173 | i1 = nrows-1; j1 = ncols; 174 | i2 = nrows-1; j2 = 1; 175 | i3 = nrows-1; j3 = 2; 176 | i4 = nrows; j4 = ncols; 177 | i6 = nrows; j6 = 2; 178 | i7 = 1; j7 = ncols; 179 | i8 = 1; j8 = 1; 180 | i9 = 1; j9 = 2; 181 | elseif i == 1 && j == ncols 182 | i1 = nrows; j1 = ncols-1; 183 | i2 = nrows; j2 = ncols; 184 | i3 = nrows; j3 = 1; 185 | i4 = 1; j4 = ncols-1; 186 | i6 = 1; j6 = 1; 187 | i7 = 2; j7 = ncols-1; 188 | i8 = 2; j8 = ncols; 189 | i9 = 2; j9 = 1; 190 | elseif i == nrows && j == ncols 191 | i1 = nrows-1; j1 = ncols-1; 192 | i2 = nrows-1; j2 = ncols; 193 | i3 = nrows-1; j3 = 1; 194 | i4 = nrows; j4 = ncols-1; 195 | i6 = nrows; j6 = 1; 196 | i7 = 1; j7 = ncols-1; 197 | i8 = 1; j8 = ncols; 198 | i9 = 1; j9 = 1; 199 | elseif i == 1 200 | i1 = nrows; j1 = j-1; 201 | i2 = nrows; j2 = j; 202 | i3 = nrows; j3 = j+1; 203 | i4 = 1; j4 = j-1; 204 | i6 = 1; j6 = j+1; 205 | i7 = 2; j7 = j-1; 206 | i8 = 2; j8 = j; 207 | i9 = 2; j9 = j+1; 208 | elseif i == nrows 209 | i1 = nrows-1; j1 = j-1; 210 | i2 = nrows-1; j2 = j; 211 | i3 = nrows-1; j3 = j+1; 212 | i4 = nrows; j4 = j-1; 213 | i6 = nrows; j6 = j+1; 214 | i7 = 1; j7 = j-1; 215 | i8 = 1; j8 = j; 216 | i9 = 1; j9 = j+1; 217 | elseif j == 1 218 | i1 = i-1; j1 = ncols; 219 | i2 = i-1; j2 = j; 220 | i3 = i-1; j3 = j+1; 221 | i4 = i; j4 = ncols; 222 | i6 = i; j6 = j+1; 223 | i7 = i+1; j7 = ncols; 224 | i8 = i+1; j8 = j; 225 | i9 = i+1; j9 = j+1; 226 | elseif j == ncols 227 | i1 = i-1; j1 = j-1; 228 | i2 = i-1; j2 = j; 229 | i3 = i-1; j3 = 1; 230 | i4 = i; j4 = j-1; 231 | i6 = i; j6 = 1; 232 | i7 = i+1; j7 = j-1; 233 | i8 = i+1; j8 = j; 234 | i9 = i+1; j9 = 1; 235 | else 236 | i1 = i-1; j1 = j-1; 237 | i2 = i-1; j2 = j; 238 | i3 = i-1; j3 = j+1; 239 | i4 = i; j4 = j-1; 240 | i6 = i; j6 = j+1; 241 | i7 = i+1; j7 = j-1; 242 | i8 = i+1; j8 = j; 243 | i9 = i+1; j9 = j+1; 244 | 245 | end 246 | 247 | if a == 1 248 | snext = [states(i1,j1,1),states(i1,j1,2)]; 249 | elseif a == 2 250 | snext = [states(i2,j2,1),states(i2,j2,2)]; 251 | elseif a == 3 252 | snext = [states(i3,j3,1),states(i3,j3,2)]; 253 | elseif a == 4 254 | snext = [states(i4,j4,1),states(i4,j4,2)]; 255 | elseif a == 5 256 | snext = [states(i6,j6,1),states(i6,j6,2)]; 257 | elseif a == 6 258 | snext = [states(i7,j7,1),states(i7,j7,2)]; 259 | elseif a == 7 260 | snext = [states(i8,j8,1),states(i8,j8,2)]; 261 | elseif a == 8 262 | snext = [states(i9,j9,1),states(i9,j9,2)]; 263 | end 264 | end 265 | -------------------------------------------------------------------------------- /Code/Value_Iteration_2Link_Arm.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the value iteration algorithm written to evaluate the 3 | % optimal path for a 2-Link Planar Robotic Arm without any obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Parameters 8 | % (User can Modify with the Start and Goal States to check how the results vary!) 9 | % ----Start State---- 10 | x0 = [0 0]; 11 | % ----End State----- 12 | goal = [7*pi/8 -pi/4]; 13 | r = -0.2; % Living Reward / Penalty 14 | R = 10; % End Reward 15 | 16 | grid1 = 2*pi/100; % Grid Size of theta1 17 | grid2 = 2*pi/100; % Grid Size of theta2 18 | th1 = -pi:grid1:pi; 19 | th2 = -pi:grid1:pi; 20 | 21 | states = zeros(length(th1),length(th2),2); 22 | V0 = zeros(length(th1),length(th2)); 23 | dummy_pol = ones(length(th1),length(th2)); 24 | update1 = 0; 25 | update2 = 0; 26 | update3 = 0; 27 | l1 = length(th1); 28 | l2 = length(th2); 29 | 30 | %% Identifying the start and end joint angle indices 31 | for i = 1:length(th1) 32 | for j = 1:length(th2) 33 | states(i,j,1) = th1(i); 34 | states(i,j,2) = th2(j); 35 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) 36 | if update1 == 0 37 | istart = i; 38 | jstart = j; 39 | update1 = 1; 40 | end 41 | end 42 | 43 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) 44 | if update2 == 0 45 | V0(i,j) = R; 46 | dummy_pol(i,j) = 0; 47 | update2 = 1; 48 | iend = i; 49 | jend = j; 50 | end 51 | end 52 | end 53 | end 54 | nstates = l1*l2; 55 | 56 | %% Creating MINUS, PLUS ARRAYS FOR WRITING TRANSITION MODEL 57 | iminus = zeros(l1,1); 58 | iplus = zeros(l1,1); 59 | jminus = zeros(l2,1); 60 | jplus = zeros(l2,1); 61 | 62 | for i = 1:l1 63 | iminus(i) = i-1; 64 | iplus(i) = i+1; 65 | if i == 1 66 | iminus(i) = l1 - 1; 67 | elseif i == l1 68 | iplus(i) = 2; 69 | end 70 | end 71 | 72 | for j = 1:l2 73 | jminus(j) = j-1; 74 | jplus(j) = j+1; 75 | if j == 1 76 | jminus(j) = l2 - 1; 77 | elseif j == l2 78 | jplus(j) = 2; 79 | end 80 | end 81 | 82 | %% Initiating Few Other Parameters... 83 | Vold = V0; 84 | Vnew = V0; 85 | iter = 1; 86 | policy = zeros(length(th1),length(th2)); 87 | 88 | %% Value Iteration 89 | while iter > 0 90 | Vold = Vnew; 91 | Q = Qfunc(Vnew,r,l1,l2,iminus,iplus,jminus,jplus); % Q value calculation using Bellman's Equation 92 | for i = 1:l1 93 | for j = 1:l2 94 | if dummy_pol(i,j) ~= 0 95 | [maxval,index] = max(Q(i,j,:)); 96 | Vnew(i,j) = maxval; 97 | policy(i,j) = index; 98 | end 99 | end 100 | end 101 | 102 | fprintf('Value Iteration: %d\n',iter); 103 | iter = iter + 1; 104 | 105 | if Vold == Vnew % Convergence Check 106 | disp('==================================') 107 | disp('Values Converged!') 108 | break 109 | end 110 | end 111 | 112 | fprintf('Number of value iterations performed is %d\n',iter) 113 | 114 | %% Retrieving the Optimal Path 115 | pathlength = 1; 116 | i = istart; 117 | j = jstart; 118 | qout = x0; 119 | while pathlength > 0 120 | a = policy(i,j); 121 | snext = nexts(i,j,a,states,iminus,iplus,jminus,jplus); 122 | qout(end+1,:) = snext; 123 | i = find(th1 == snext(1)); 124 | j = find(th1 == snext(2)); 125 | 126 | if i == iend && j == jend 127 | break; 128 | end 129 | 130 | end 131 | qout(end+1,:) = goal; 132 | 133 | %% Visualizing the Optimal Path 134 | mdl_planar2; 135 | for i = 1:size(qout,1) 136 | p2.plot(qout(i,:)) 137 | end 138 | 139 | endpoints = []; 140 | for i = 1:size(qout,1)-1 141 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))]; 142 | hold on; 143 | end 144 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 145 | 146 | %% Action Value and Transition Functions are Defined Below. 147 | % Action Value Function 148 | function [Q] = Qfunc(V,r,l1,l2,iminus,iplus,jminus,jplus) 149 | Q = zeros(l1,l2,9); 150 | % 9 Actions 151 | for i = 1:l1 152 | for j = 1:l2 153 | Q(i,j,1) = V(iminus(i),jminus(j))+r; 154 | Q(i,j,2) = V(iminus(i),j)+r; 155 | Q(i,j,3) = V(iminus(i),jplus(j))+r; 156 | Q(i,j,4) = V(i,jminus(j))+r; 157 | Q(i,j,5) = V(i,j)+r; 158 | Q(i,j,6) = V(i,jplus(j))+r; 159 | Q(i,j,7) = V(iplus(i),jminus(j))+r; 160 | Q(i,j,8) = V(iplus(i),j)+r; 161 | Q(i,j,9) = V(iplus(i),jplus(j))+r; 162 | end 163 | end 164 | 165 | end 166 | 167 | % Transition Function 168 | function [snext] = nexts(i,j,a,states,iminus,iplus,jminus,jplus) 169 | ni = size(states,1); 170 | nj = size(states,2); 171 | 172 | Nextindex = [iminus(i) jminus(j); 173 | iminus(i) j; 174 | iminus(i) jplus(j); 175 | i jminus(j); 176 | i j; 177 | i jplus(j); 178 | iplus(i) jminus(j); 179 | iplus(i) j; 180 | iplus(i) jplus(j)]; 181 | 182 | inext = Nextindex(a,1); 183 | jnext = Nextindex(a,2); 184 | 185 | snext = states(inext,jnext,:); 186 | 187 | end 188 | -------------------------------------------------------------------------------- /Code/Value_Iteration_2Link_with_Obstacles.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the value iteration algorithm written to evaluate the 3 | % optimal path for a 2-Link Planar Robotic Arm with obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Parameters 8 | % (User can Modify with the Start and Goal States to check how the results vary!) 9 | % ----Start State---- 10 | x0 = [0 0]; 11 | % ----End State----- 12 | goal = [-pi 0]; 13 | r = -0.01; % Living Reward / Penalty 14 | R = 100; % End Reward 15 | grid1 = 2*pi/100; % Grid Size of theta1 16 | grid2 = 2*pi/100; % Grid Size of theta2 17 | th1 = -pi:grid1:pi; 18 | th2 = -pi:grid1:pi; 19 | 20 | %% Obstacle Modeling 21 | L = linspace(0,2*pi,6); 22 | L2 = linspace(0,2*pi,5); 23 | L3 = linspace(0,2*pi,100); 24 | L4 = linspace(0,2*pi,6); 25 | xv = 1.5+1*cos(L)'; 26 | yv = 1.5+1*sin(L)'; 27 | npoints = 20; 28 | 29 | xv2 = -0.5+0.3*cos(L2)'; 30 | yv2 = 0.5+0.3*sin(L2)'; 31 | xv3 = 1.4+1*cos(L3)'; 32 | yv3 = -1.5+1*sin(L3)'; 33 | xv4 = -1.4+1*cos(L4)'; 34 | yv4 = -1.5+1*sin(L4)'; 35 | 36 | %% Identifying the start and end joint angle indices 37 | states = zeros(length(th1),length(th2),2); 38 | V0 = zeros(length(th1),length(th2)); 39 | dummy_pol = ones(length(th1),length(th2)); 40 | update1 = 0; 41 | update2 = 0; 42 | update3 = 0; 43 | l1 = length(th1); 44 | l2 = length(th2); 45 | colijs = []; 46 | 47 | for i = 1:length(th1) 48 | for j = 1:length(th2) 49 | states(i,j,1) = th1(i); 50 | states(i,j,2) = th2(j); 51 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) 52 | if update1 == 0 53 | istart = i; 54 | jstart = j; 55 | update1 = 1; 56 | end 57 | end 58 | 59 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) 60 | if update2 == 0 61 | V0(i,j) = R; 62 | dummy_pol(i,j) = 0; 63 | update2 = 1; 64 | iend = i; 65 | jend = j; 66 | end 67 | end 68 | 69 | % Collision Check 70 | q1 = th1(i); q2 = th2(j); 71 | E1 = [cos(q1) sin(q1)]; 72 | E2 = [cos(q1)+cos(q1 + q2) sin(q1)+sin(q1 + q2)]; 73 | xlin1 = linspace(0,E1(1),npoints); 74 | ylin1 = linspace(0,E1(2),npoints); 75 | points1 = [xlin1(:) ylin1(:)]; 76 | 77 | xlin2 = linspace(E1(1),E2(1),npoints); 78 | ylin2 = linspace(E1(2),E2(2),npoints); 79 | points2 = [xlin2(:) ylin2(:)]; 80 | 81 | points = vertcat(points1,points2); 82 | xq = points(:,1); 83 | yq = points(:,2); 84 | [in1,on1] = inpolygon(xq,yq,xv,yv); 85 | [in2,on2] = inpolygon(xq,yq,xv2,yv2); 86 | [in3,on3] = inpolygon(xq,yq,xv3,yv3); 87 | [in4,on4] = inpolygon(xq,yq,xv4,yv4); 88 | col_points = numel(xq(in1)) + numel(xq(on1))+numel(xq(in2)) + numel(xq(on2))+numel(xq(in3)) + numel(xq(on3))... 89 | +numel(xq(in4)) + numel(xq(on4)); 90 | 91 | if col_points ~=0 92 | colijs(end+1,:) = [i j]; 93 | end 94 | 95 | 96 | end 97 | end 98 | nstates = length(th1)*length(th2); 99 | 100 | %% Creating MINUS, PLUS ARRAYS FOR WRITING TRANSITION MODEL 101 | iminus = zeros(l1,1); 102 | iplus = zeros(l1,1); 103 | jminus = zeros(l2,1); 104 | jplus = zeros(l2,1); 105 | 106 | for i = 1:l1 107 | iminus(i) = i-1; 108 | iplus(i) = i+1; 109 | if i == 1 110 | iminus(i) = l1 - 1; 111 | elseif i == l1 112 | iplus(i) = 2; 113 | end 114 | end 115 | 116 | for j = 1:l2 117 | jminus(j) = j-1; 118 | jplus(j) = j+1; 119 | if j == 1 120 | jminus(j) = l2 - 1; 121 | elseif j == l2 122 | jplus(j) = 2; 123 | end 124 | end 125 | 126 | %% Initiating Few Other Parameters... 127 | Vold = V0; 128 | Vnew = V0; 129 | iter = 1; 130 | policy = zeros(length(th1),length(th2)); 131 | 132 | %% Value Iteration 133 | while iter > 0 134 | Vold = Vnew; 135 | Q = Qfunc(Vnew,r,l1,l2,iminus,iplus,jminus,jplus,colijs); % Q value calculation using Bellman's Equation 136 | 137 | for i = 1:l1 138 | for j = 1:l2 139 | if dummy_pol(i,j) ~= 0 140 | [maxval,index] = max(Q(i,j,:)); 141 | Vnew(i,j) = maxval; 142 | policy(i,j) = index; 143 | targeti = i; 144 | targetj = j; 145 | end 146 | end 147 | end 148 | 149 | fprintf('Value Iteration: %d\n',iter); 150 | iter = iter + 1; 151 | 152 | if Vold == Vnew 153 | disp('==================================') 154 | disp('Values Converged!') 155 | break 156 | end 157 | end 158 | fprintf('Number of value iterations performed is %d\n',iter) 159 | 160 | %% Retrieving the Optimal Path 161 | pathlength = 1; 162 | i = istart; 163 | j = jstart; 164 | qout = x0; 165 | 166 | while pathlength > 0 167 | a = policy(i,j); 168 | snext = nexts(i,j,a,states,iminus,iplus,jminus,jplus); 169 | qout(end+1,:) = snext; 170 | i = find(th1 == snext(1)); 171 | j = find(th1 == snext(2)); 172 | 173 | if i == iend && j == jend 174 | break; 175 | end 176 | 177 | end 178 | qout(end+1,:) = goal; 179 | 180 | %% Visualizing the Optimal Path 181 | mdl_planar2; 182 | for i = 1:size(qout,1) 183 | p2.plot(qout(i,:)) 184 | plot3(xv,yv,zeros(length(xv),1),'r','Linewidth',2); 185 | plot3(xv2,yv2,zeros(length(xv2),1),'r','Linewidth',2); 186 | plot3(xv3,yv3,zeros(length(xv3),1),'r','Linewidth',2); 187 | plot3(xv4,yv4,zeros(length(xv4),1),'r','Linewidth',2); 188 | fill3(xv,yv,zeros(length(xv),1),'r'); 189 | fill3(xv2,yv2,zeros(length(xv2),1),'r'); 190 | fill3(xv3,yv3,zeros(length(xv3),1),'r'); 191 | fill3(xv4,yv4,zeros(length(xv4),1),'r'); 192 | end 193 | endpoints = []; 194 | for i = 1:size(qout,1)-1 195 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))]; 196 | hold on; 197 | end 198 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 199 | 200 | %% Action Value and Transition Functions are Defined Below. 201 | % Action Value Function 202 | function [Q] = Qfunc(V,r,l1,l2,iminus,iplus,jminus,jplus,colijs) 203 | Q = zeros(l1,l2,9); 204 | % 9 Actions 205 | for m = 1:length(colijs) 206 | V(colijs(m,1),colijs(m,2)) = 0; 207 | end 208 | for i = 1:l1 209 | for j = 1:l2 210 | Q(i,j,1) = V(iminus(i),jminus(j))+r; 211 | Q(i,j,2) = V(iminus(i),j)+r; 212 | Q(i,j,3) = V(iminus(i),jplus(j))+r; 213 | Q(i,j,4) = V(i,jminus(j))+r; 214 | Q(i,j,5) = V(i,j)+r; 215 | Q(i,j,6) = V(i,jplus(j))+r; 216 | Q(i,j,7) = V(iplus(i),jminus(j))+r; 217 | Q(i,j,8) = V(iplus(i),j)+r; 218 | Q(i,j,9) = V(iplus(i),jplus(j))+r; 219 | end 220 | end 221 | end 222 | 223 | % Transition Function 224 | function [snext] = nexts(i,j,a,states,iminus,iplus,jminus,jplus) 225 | ni = size(states,1); 226 | nj = size(states,2); 227 | 228 | Nextindex = [iminus(i) jminus(j); 229 | iminus(i) j; 230 | iminus(i) jplus(j); 231 | i jminus(j); 232 | i j; 233 | i jplus(j); 234 | iplus(i) jminus(j); 235 | iplus(i) j; 236 | iplus(i) jplus(j)]; 237 | 238 | inext = Nextindex(a,1); 239 | jnext = Nextindex(a,2); 240 | 241 | snext = states(inext,jnext,:); 242 | 243 | end 244 | -------------------------------------------------------------------------------- /Code/Value_Iteration_3Link_Arm.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the value iteration algorithm written to evaluate the 3 | % optimal path for a 3-Link Planar Robotic Arm without any obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Initial Parameters 8 | x0 = [0 0 0]; % Start State 9 | goal = [5*pi/6 pi/2 pi/4]; % End State 10 | r = -0.2; % Living Reward 11 | R = 10; % End Reward 12 | 13 | grid1 = 2*pi/50; 14 | grid2 = 2*pi/50; 15 | grid3 = 2*pi/50; 16 | th1 = -pi:grid1:pi; 17 | th2 = -pi:grid2:pi; 18 | th3 = -pi:grid3:pi; 19 | 20 | 21 | states = zeros(length(th1),length(th2),length(th3),3); 22 | V0 = zeros(length(th1),length(th2),length(th3)); 23 | dummy_pol = ones(length(th1),length(th2),length(th3)); 24 | update1 = 0; 25 | update2 = 0; 26 | update3 = 0; 27 | l1 = length(th1); 28 | l2 = length(th2); 29 | l3 = length(th3); 30 | for i = 1:length(th1) 31 | for j = 1:length(th2) 32 | for k = 1:length(th3) 33 | states(i,j,k,1) = th1(i); 34 | states(i,j,k,2) = th2(j); 35 | states(i,j,k,3) = th3(k); 36 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) && (abs(th3(k)-x0(3)) <= grid3) 37 | if update1 == 0 38 | istart = i; 39 | jstart = j; 40 | kstart = k; 41 | update1 = 1; 42 | end 43 | end 44 | 45 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) && (abs(th3(k)-goal(3)) <= grid3) 46 | if update2 == 0 47 | V0(i,j,k) = R; 48 | dummy_pol(i,j,k) = 0; 49 | update2 = 1; 50 | iend = i; 51 | jend = j; 52 | kend = k; 53 | end 54 | end 55 | 56 | end 57 | end 58 | end 59 | nstates = l1*l2*l3; 60 | 61 | %% Creating MINUS, PLUS ARRAYS 62 | iminus = zeros(l1,1); 63 | iplus = zeros(l1,1); 64 | jminus = zeros(l2,1); 65 | jplus = zeros(l2,1); 66 | kminus = zeros(l3,1); 67 | kplus = zeros(l3,1); 68 | 69 | for i = 1:l1 70 | iminus(i) = i-1; 71 | iplus(i) = i+1; 72 | if i == 1 73 | iminus(i) = l1 - 1; 74 | elseif i == l1 75 | iplus(i) = 2; 76 | end 77 | end 78 | 79 | for j = 1:l2 80 | jminus(j) = j-1; 81 | jplus(j) = j+1; 82 | if j == 1 83 | jminus(j) = l2 - 1; 84 | elseif j == l2 85 | jplus(j) = 2; 86 | end 87 | end 88 | 89 | for k = 1:l3 90 | kminus(k) = k-1; 91 | kplus(k) = k+1; 92 | if k == 1 93 | kminus(k) = l3 - 1; 94 | elseif k == l3 95 | kplus(k) = 2; 96 | end 97 | end 98 | 99 | %% Initiating Few Other Parameters.... 100 | Vold = V0; 101 | Vnew = V0; 102 | iter = 1; 103 | policy = zeros(length(th1),length(th2),length(th3)); 104 | 105 | %% Value Iteration 106 | while iter > 0 107 | Vold = Vnew; 108 | Q = Qfunc(Vnew,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus); 109 | 110 | for i = 1:l1 111 | for j = 1:l2 112 | for k = 1:l3 113 | if dummy_pol(i,j,k) ~= 0 114 | [maxval,index] = max(Q(i,j,k,:)); 115 | Vnew(i,j,k) = maxval; 116 | policy(i,j,k) = index; 117 | targeti = i; 118 | targetj = j; 119 | targetk = k; 120 | end 121 | end 122 | end 123 | end 124 | 125 | fprintf('Iteration is %d\n',iter); 126 | iter = iter + 1; 127 | 128 | if Vold == Vnew 129 | disp('==================================') 130 | disp('Values Converged!') 131 | break 132 | end 133 | end 134 | 135 | fprintf('Number of value iterations performed is %d\n',iter) 136 | 137 | %% Retrieving the Optimal Path 138 | pathlength = 1; 139 | i = istart; 140 | j = jstart; 141 | k = kstart; 142 | qout = x0; 143 | 144 | while pathlength > 0 145 | a = policy(i,j,k); 146 | snext = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus); 147 | qout(end+1,:) = snext; 148 | i = find(th1 == snext(1)); 149 | j = find(th2 == snext(2)); 150 | k = find(th3 == snext(3)); 151 | 152 | if i == iend && j == jend && k == kend 153 | break; 154 | end 155 | 156 | end 157 | qout(end+1,:) = goal; 158 | 159 | %% Visualizing the Optimal Path 160 | mdl_planar3; 161 | for i = 1:size(qout,1) 162 | p3.plot(qout(i,:)) 163 | end 164 | 165 | endpoints = []; 166 | for i = 1:size(qout,1)-1 167 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2))+cos(qout(i,1) + qout(i,2)+qout(i,3)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))+sin(qout(i,1) + qout(i,2)+qout(i,3))]; 168 | hold on; 169 | end 170 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 171 | 172 | %% Action Value and Transition Functions are Defined Below. 173 | % Action-Value Function 174 | function [Q] = Qfunc(V,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus) 175 | Q = zeros(l1,l2,l3,27); 176 | % 27 Actions 177 | for i = 1:l1 178 | for j = 1:l2 179 | for k = 1:l3 180 | Q(i,j,k,1) = V(iminus(i),jminus(j),k)+r; 181 | Q(i,j,k,2) = V(iminus(i),j,k)+r; 182 | Q(i,j,k,3) = V(iminus(i),jplus(j),k)+r; 183 | Q(i,j,k,4) = V(i,jminus(j),k)+r; 184 | Q(i,j,k,5) = V(i,jminus(j),k)+r; 185 | Q(i,j,k,6) = V(i,jplus(j),k)+r; 186 | Q(i,j,k,7) = V(iplus(i),jminus(j),k)+r; 187 | Q(i,j,k,8) = V(iplus(i),j,k)+r; 188 | Q(i,j,k,9) = V(iplus(i),jplus(j),k)+r; 189 | 190 | Q(i,j,k,10) = V(iminus(i),jminus(j),kminus(k))+r; 191 | Q(i,j,k,11) = V(iminus(i),j,kminus(k))+r; 192 | Q(i,j,k,12) = V(iminus(i),jplus(j),kminus(k))+r; 193 | Q(i,j,k,13) = V(i,jminus(j),kminus(k))+r; 194 | Q(i,j,k,14) = V(i,j,kminus(k))+r; 195 | Q(i,j,k,15) = V(i,jplus(j),kminus(k))+r; 196 | Q(i,j,k,16) = V(iplus(i),jminus(j),kminus(k))+r; 197 | Q(i,j,k,17) = V(iplus(i),j,kminus(k))+r; 198 | Q(i,j,k,18) = V(iplus(i),jplus(j),kminus(k))+r; 199 | 200 | Q(i,j,k,19) = V(iminus(i),jminus(j),kplus(k))+r; 201 | Q(i,j,k,20) = V(iminus(i),j,kplus(k))+r; 202 | Q(i,j,k,21) = V(iminus(i),jplus(j),kplus(k))+r; 203 | Q(i,j,k,22) = V(i,jminus(j),kplus(k))+r; 204 | Q(i,j,k,23) = V(i,j,kplus(k))+r; 205 | Q(i,j,k,24) = V(i,jplus(j),kplus(k))+r; 206 | Q(i,j,k,25) = V(iplus(i),jminus(j),kplus(k))+r; 207 | Q(i,j,k,26) = V(iplus(i),j,kplus(k))+r; 208 | Q(i,j,k,27) = V(iplus(i),jplus(j),kplus(k))+r; 209 | end 210 | end 211 | end 212 | end 213 | 214 | % Transition Function 215 | function [snext] = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus) 216 | ni = size(states,1); 217 | nj = size(states,2); 218 | nk = size(states,3); 219 | 220 | Nextindex = [iminus(i) jminus(j) k; 221 | iminus(i) j k; 222 | iminus(i) jplus(j) k; 223 | i jminus(j) k; 224 | i jminus(j) k; 225 | i jplus(j) k; 226 | iplus(i) jminus(j) k; 227 | iplus(i) j k; 228 | iplus(i) jplus(j) k; 229 | iminus(i) jminus(j) kminus(k); 230 | iminus(i) j kminus(k); 231 | iminus(i) jplus(j) kminus(k); 232 | i jminus(j) kminus(k); 233 | i j kminus(k); 234 | i jplus(j) kminus(k); 235 | iplus(i) jminus(j) kminus(k); 236 | iplus(i) j kminus(k); 237 | iplus(i) jplus(j) kminus(k); 238 | iminus(i) jminus(j) kplus(k); 239 | iminus(i) j kplus(k); 240 | iminus(i) jplus(j) kplus(k); 241 | i jminus(j) kplus(k); 242 | i j kplus(k); 243 | i jplus(j) kplus(k); 244 | iplus(i) jminus(j) kplus(k); 245 | iplus(i) j kplus(k); 246 | iplus(i) jplus(j) kplus(k)]; 247 | 248 | inext = Nextindex(a,1); 249 | jnext = Nextindex(a,2); 250 | knext = Nextindex(a,3); 251 | 252 | snext = states(inext,jnext,knext,:); 253 | end 254 | -------------------------------------------------------------------------------- /Code/Value_Iteration_3Link_with_Obstacles.m: -------------------------------------------------------------------------------- 1 | %% Problem Description 2 | % This file contains the value iteration algorithm written to evaluate the 3 | % optimal path for a 3-Link Planar Robotic Arm with obstacles. 4 | clc; 5 | clear; 6 | close all; 7 | %% Initial Parameters 8 | x0 = [0 0 0]; % Start State 9 | goal = [pi 0 0]; % End State 10 | r = -0.2; % Living Reward 11 | R = 100; % End Reward 12 | 13 | %% Obstacle Modeling 14 | L = linspace(0,2*pi,6); 15 | L2 = linspace(0,2*pi,5); 16 | L3 = linspace(0,2*pi,100); 17 | L4 = linspace(0,2*pi,6); 18 | xv = 1.5+1*cos(L)'; 19 | yv = 1.5+1*sin(L)'; 20 | npoints = 20; 21 | 22 | xv2 = -0.5+0.3*cos(L2)'; 23 | yv2 = 0.5+0.3*sin(L2)'; 24 | 25 | xv3 = 1.4+1*cos(L3)'; 26 | yv3 = -1.5+1*sin(L3)'; 27 | 28 | xv4 = -1.4+1*cos(L4)'; 29 | yv4 = -1.5+1*sin(L4)'; 30 | 31 | grid1 = 2*pi/50; 32 | grid2 = 2*pi/50; 33 | grid3 = 2*pi/50; 34 | th1 = -pi:grid1:pi; 35 | th2 = -pi:grid1:pi; 36 | th3 = -pi:grid1:pi; 37 | 38 | 39 | states = zeros(length(th1),length(th2),length(th3),3); 40 | V0 = zeros(length(th1),length(th2),length(th3)); 41 | dummy_pol = ones(length(th1),length(th2),length(th3)); 42 | update1 = 0; 43 | update2 = 0; 44 | update3 = 0; 45 | l1 = length(th1); 46 | l2 = length(th2); 47 | l3 = length(th3); 48 | colijs = []; 49 | disp('Initializing the Problem...') 50 | for i = 1:length(th1) 51 | for j = 1:length(th2) 52 | for k = 1:length(th3) 53 | states(i,j,k,1) = th1(i); 54 | states(i,j,k,2) = th2(j); 55 | states(i,j,k,3) = th3(k); 56 | if (abs(th1(i)-x0(1)) <= grid1) && (abs(th2(j)-x0(2)) <= grid2) && (abs(th3(k)-x0(3)) <= grid3) 57 | if update1 == 0 58 | istart = i; 59 | jstart = j; 60 | kstart = k; 61 | update1 = 1; 62 | end 63 | end 64 | 65 | if (abs(th1(i)-goal(1)) <= grid1) && (abs(th2(j)-goal(2)) <= grid2) && (abs(th3(k)-goal(3)) <= grid3) 66 | if update2 == 0 67 | V0(i,j,k) = R; 68 | dummy_pol(i,j,k) = 0; 69 | update2 = 1; 70 | iend = i; 71 | jend = j; 72 | kend = k; 73 | end 74 | end 75 | 76 | % Collision Check 77 | q1 = th1(i); q2 = th2(j); q3 = th3(k); 78 | E1 = [cos(q1) sin(q1)]; 79 | E2 = [cos(q1)+cos(q1 + q2) sin(q1)+sin(q1 + q2)]; 80 | E3 = [cos(q1)+cos(q1 + q2)+cos(q1 + q2 + q3) sin(q1)+sin(q1 + q2)+sin(q1 + q2 + q3)]; 81 | xlin1 = linspace(0,E1(1),npoints); 82 | ylin1 = linspace(0,E1(2),npoints); 83 | points1 = [xlin1(:) ylin1(:)]; 84 | 85 | xlin2 = linspace(E1(1),E2(1),npoints); 86 | ylin2 = linspace(E1(2),E2(2),npoints); 87 | points2 = [xlin2(:) ylin2(:)]; 88 | 89 | xlin3 = linspace(E2(1),E3(1),npoints); 90 | ylin3 = linspace(E2(2),E3(2),npoints); 91 | points3 = [xlin3(:) ylin3(:)]; 92 | 93 | points = vertcat(points1,points2,points3); 94 | xq = points(:,1); 95 | yq = points(:,2); 96 | [in1,on1] = inpolygon(xq,yq,xv,yv); 97 | [in2,on2] = inpolygon(xq,yq,xv2,yv2); 98 | [in3,on3] = inpolygon(xq,yq,xv3,yv3); 99 | [in4,on4] = inpolygon(xq,yq,xv4,yv4); 100 | col_points = numel(xq(in1)) + numel(xq(on1))+numel(xq(in2)) + numel(xq(on2))+numel(xq(in3)) + numel(xq(on3))... 101 | +numel(xq(in4)) + numel(xq(on4)); 102 | 103 | if col_points ~=0 104 | colijs(end+1,:) = [i j k]; 105 | end 106 | 107 | end 108 | end 109 | end 110 | nstates = l1*l2*l3; 111 | 112 | 113 | %% Creating MINUS, PLUS ARRAYS 114 | 115 | iminus = zeros(l1,1); 116 | iplus = zeros(l1,1); 117 | jminus = zeros(l2,1); 118 | jplus = zeros(l2,1); 119 | kminus = zeros(l3,1); 120 | kplus = zeros(l3,1); 121 | 122 | for i = 1:l1 123 | iminus(i) = i-1; 124 | iplus(i) = i+1; 125 | if i == 1 126 | iminus(i) = l1 - 1; 127 | elseif i == l1 128 | iplus(i) = 2; 129 | end 130 | end 131 | 132 | for j = 1:l2 133 | jminus(j) = j-1; 134 | jplus(j) = j+1; 135 | if j == 1 136 | jminus(j) = l2 - 1; 137 | elseif j == l2 138 | jplus(j) = 2; 139 | end 140 | end 141 | 142 | for k = 1:l3 143 | kminus(k) = k-1; 144 | kplus(k) = k+1; 145 | if k == 1 146 | kminus(k) = l3 - 1; 147 | elseif k == l3 148 | kplus(k) = 2; 149 | end 150 | end 151 | 152 | %% Initiating Few Other Parameters 153 | Vold = V0; 154 | Vnew = V0; 155 | iter = 1; 156 | policy = zeros(length(th1),length(th2),length(th3)); 157 | 158 | %% Value Iteration 159 | disp('Value Iteration Starts...') 160 | while iter > 0 161 | Vold = Vnew; 162 | Q = Qfunc(Vnew,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus,colijs); 163 | 164 | for i = 1:l1 165 | for j = 1:l2 166 | for k = 1:l3 167 | if dummy_pol(i,j,k) ~= 0 168 | [maxval,index] = max(Q(i,j,k,:)); 169 | Vnew(i,j,k) = maxval; 170 | policy(i,j,k) = index; 171 | targeti = i; 172 | targetj = j; 173 | targetk = k; 174 | end 175 | end 176 | end 177 | end 178 | 179 | fprintf('Iteration is %d\n',iter); 180 | iter = iter + 1; 181 | 182 | if Vold == Vnew 183 | disp('==================================') 184 | disp('Values Converged!') 185 | break 186 | end 187 | end 188 | 189 | fprintf('Number of value iterations performed is %d\n',iter) 190 | 191 | pathlength = 1; 192 | i = istart; 193 | j = jstart; 194 | k = kstart; 195 | qout = x0; 196 | 197 | %% Retrieving the Optimal Path 198 | while pathlength > 0 199 | a = policy(i,j,k); 200 | snext = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus); 201 | qout(end+1,:) = snext; 202 | i = find(th1 == snext(1)); 203 | j = find(th2 == snext(2)); 204 | k = find(th3 == snext(3)); 205 | 206 | if i == iend && j == jend && k == kend 207 | break; 208 | end 209 | 210 | end 211 | qout(end+1,:) = goal; 212 | 213 | %% Visualizing the Optimal Path 214 | mdl_planar3; 215 | for i = 1:size(qout,1) 216 | p3.plot(qout(i,:)) 217 | hold on; 218 | plot3(xv,yv,zeros(length(xv),1),'r','Linewidth',2); 219 | plot3(xv2,yv2,zeros(length(xv2),1),'r','Linewidth',2); 220 | plot3(xv3,yv3,zeros(length(xv3),1),'r','Linewidth',2); 221 | plot3(xv4,yv4,zeros(length(xv4),1),'r','Linewidth',2); 222 | fill3(xv,yv,zeros(length(xv),1),'r'); 223 | fill3(xv2,yv2,zeros(length(xv2),1),'r'); 224 | fill3(xv3,yv3,zeros(length(xv3),1),'r'); 225 | fill3(xv4,yv4,zeros(length(xv4),1),'r'); 226 | end 227 | 228 | endpoints = []; 229 | for i = 1:size(qout,1)-1 230 | endpoints(i,:) = [cos(qout(i,1))+cos(qout(i,1) + qout(i,2))+cos(qout(i,1) + qout(i,2)+qout(i,3)) sin(qout(i,1))+sin(qout(i,1) + qout(i,2))+sin(qout(i,1) + qout(i,2)+qout(i,3))]; 231 | hold on; 232 | end 233 | 234 | plot3(endpoints(:,1),endpoints(:,2),zeros(length(endpoints),1),'k','Linewidth',2) 235 | 236 | %% Action Value and Transition Functions are Defined Below. 237 | % Action-Value Function 238 | function [Q] = Qfunc(V,r,l1,l2,l3,iminus,iplus,jminus,jplus,kminus,kplus,colijs) 239 | Q = zeros(l1,l2,l3,27); 240 | % 27 Actions 241 | for m = 1:length(colijs) 242 | V(colijs(m,1),colijs(m,2),colijs(m,3)) = 0; 243 | end 244 | 245 | 246 | for i = 1:l1 247 | for j = 1:l2 248 | for k = 1:l3 249 | Q(i,j,k,1) = V(iminus(i),jminus(j),k)+r; 250 | Q(i,j,k,2) = V(iminus(i),j,k)+r; 251 | Q(i,j,k,3) = V(iminus(i),jplus(j),k)+r; 252 | Q(i,j,k,4) = V(i,jminus(j),k)+r; 253 | Q(i,j,k,5) = V(i,jminus(j),k)+r; 254 | Q(i,j,k,6) = V(i,jplus(j),k)+r; 255 | Q(i,j,k,7) = V(iplus(i),jminus(j),k)+r; 256 | Q(i,j,k,8) = V(iplus(i),j,k)+r; 257 | Q(i,j,k,9) = V(iplus(i),jplus(j),k)+r; 258 | 259 | Q(i,j,k,10) = V(iminus(i),jminus(j),kminus(k))+r; 260 | Q(i,j,k,11) = V(iminus(i),j,kminus(k))+r; 261 | Q(i,j,k,12) = V(iminus(i),jplus(j),kminus(k))+r; 262 | Q(i,j,k,13) = V(i,jminus(j),kminus(k))+r; 263 | Q(i,j,k,14) = V(i,j,kminus(k))+r; 264 | Q(i,j,k,15) = V(i,jplus(j),kminus(k))+r; 265 | Q(i,j,k,16) = V(iplus(i),jminus(j),kminus(k))+r; 266 | Q(i,j,k,17) = V(iplus(i),j,kminus(k))+r; 267 | Q(i,j,k,18) = V(iplus(i),jplus(j),kminus(k))+r; 268 | 269 | Q(i,j,k,19) = V(iminus(i),jminus(j),kplus(k))+r; 270 | Q(i,j,k,20) = V(iminus(i),j,kplus(k))+r; 271 | Q(i,j,k,21) = V(iminus(i),jplus(j),kplus(k))+r; 272 | Q(i,j,k,22) = V(i,jminus(j),kplus(k))+r; 273 | Q(i,j,k,23) = V(i,j,kplus(k))+r; 274 | Q(i,j,k,24) = V(i,jplus(j),kplus(k))+r; 275 | Q(i,j,k,25) = V(iplus(i),jminus(j),kplus(k))+r; 276 | Q(i,j,k,26) = V(iplus(i),j,kplus(k))+r; 277 | Q(i,j,k,27) = V(iplus(i),jplus(j),kplus(k))+r; 278 | end 279 | end 280 | end 281 | end 282 | 283 | % Transition Function 284 | function [snext] = nexts(i,j,k,a,states,iminus,iplus,jminus,jplus,kminus,kplus) 285 | ni = size(states,1); 286 | nj = size(states,2); 287 | nk = size(states,3); 288 | 289 | Nextindex = [iminus(i) jminus(j) k; 290 | iminus(i) j k; 291 | iminus(i) jplus(j) k; 292 | i jminus(j) k; 293 | i jminus(j) k; 294 | i jplus(j) k; 295 | iplus(i) jminus(j) k; 296 | iplus(i) j k; 297 | iplus(i) jplus(j) k; 298 | iminus(i) jminus(j) kminus(k); 299 | iminus(i) j kminus(k); 300 | iminus(i) jplus(j) kminus(k); 301 | i jminus(j) kminus(k); 302 | i j kminus(k); 303 | i jplus(j) kminus(k); 304 | iplus(i) jminus(j) kminus(k); 305 | iplus(i) j kminus(k); 306 | iplus(i) jplus(j) kminus(k); 307 | iminus(i) jminus(j) kplus(k); 308 | iminus(i) j kplus(k); 309 | iminus(i) jplus(j) kplus(k); 310 | i jminus(j) kplus(k); 311 | i j kplus(k); 312 | i jplus(j) kplus(k); 313 | iplus(i) jminus(j) kplus(k); 314 | iplus(i) j kplus(k); 315 | iplus(i) jplus(j) kplus(k)]; 316 | 317 | inext = Nextindex(a,1); 318 | jnext = Nextindex(a,2); 319 | knext = Nextindex(a,3); 320 | 321 | snext = states(inext,jnext,knext,:); 322 | end 323 | -------------------------------------------------------------------------------- /EEE587 Final Presentation.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharthacheruvu/Robotic-Arm-Path-Planning-using-Optimal-Control/a417a14c593cd7cb7d6cd7a7c13167aad35b73b3/EEE587 Final Presentation.pptx -------------------------------------------------------------------------------- /EEE_587_Final_Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siddharthacheruvu/Robotic-Arm-Path-Planning-using-Optimal-Control/a417a14c593cd7cb7d6cd7a7c13167aad35b73b3/EEE_587_Final_Report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EEE587-Final-Project 2 | 3 | This repository contains the files created for EEE587 Optimal Control (Spring 2020) Final Project at ASU 4 | 5 | ## TEAM MEMBERS: Siddhartha Cheruvu, Aaron Arul Maria John, Lei Zhang 6 | 7 | ## Tools Used: MATLAB, Peter Corke's Toolbox 8 | 9 | 10 | 11 | ## INSTRUCTIONS TO RUN THE CODE 12 | 13 | The MATLAB code created uses Peter Corke's Robotics Toolbox. Hence, the first step is to install the add-on in MATLAB. 14 | 15 | Navigate to 'Add-on' explorer from the 'Home' ribbon of MATLAB. 16 | 17 | Search 'Peter Corke' and from the results install 'Robotics Toolbox for MATLAB'. 18 | 19 | Then, download the 'Code' folder from this repository. The files can then be run directly without any additional support. 20 | 21 | Comments are added in the files to clearly explain the algorithm. 22 | 23 | ## FILES 24 | 25 | The `Code` folder contains the following files. 26 | * `Value_Iteration_2Link_Arm.m` 27 | * `Value_Iteration_2Link_with_Obstacles.m` 28 | * `Value_Iteration_3Link_Arm.m` 29 | * `Value_Iteration_3Link_with_Obstacles.m` 30 | * `Policy_Iteration_2Link_with_Obstacles.m` 31 | * `Policy_Iteration_3Link_with_Obstacles.m` 32 | * `Q_Learning.m` 33 | 34 | 35 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | # EEE587-Final-Project 2 | 3 | This repository contains the files created for EEE587 Optimal Control (Spring 2020) Final Project at ASU 4 | 5 | ## TEAM MEMBERS: Siddhartha Cheruvu, Aaron Arul Maria John, Lei Zhang 6 | 7 | ## GITHUB: https://github.com/siddharthacheruvu/EEE587-Final-Project 8 | 9 | ## Tools Used: MATLAB, Peter Corke's Toolbox 10 | 11 | 12 | 13 | ## INSTRUCTIONS TO RUN THE CODE 14 | 15 | The MATLAB code created uses Peter Corke's Robotics Toolbox. Hence, the first step is to install the add-on in MATLAB. 16 | 17 | Navigate to 'Add-on' explorer from the 'Home' ribbon of MATLAB. 18 | 19 | Search 'Peter Corke' and from the results install 'Robotics Toolbox for MATLAB'. 20 | 21 | Then, download the 'Code' folder from this repository. The files can then be run directly without any additional support. 22 | 23 | Comments are added in the files to clearly explain the algorithm. 24 | 25 | ## FILES 26 | 27 | The `Code` folder contains the following files. 28 | * `Value_Iteration_2Link_Arm.m` 29 | * `Value_Iteration_2Link_with_Obstacles.m` 30 | * `Value_Iteration_3Link_Arm.m` 31 | * `Value_Iteration_3Link_with_Obstacles.m` 32 | * `Policy_Iteration_2Link_with_Obstacles.m` 33 | * `Policy_Iteration_3Link_with_Obstacles.m` 34 | * `Q_Learning.m` 35 | 36 | 37 | --------------------------------------------------------------------------------