├── .gitignore ├── README.md ├── doc └── Problem Description.pdf └── src ├── ComputeShotProbability.m ├── ComputeStageCosts.m ├── ComputeTerminalStateIndex.m ├── ComputeTransitionProbabilities.m ├── FindNextStateIndex.m ├── GenerateWorld.p ├── LinearProgramming.m ├── MakePlots.p ├── PolicyIteration.m ├── ValueIteration.m ├── exampleG.mat ├── exampleP.mat ├── exampleWorld.mat └── main.m /.gitignore: -------------------------------------------------------------------------------- 1 | # MATLAB auxiliary files 2 | *.asv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dynamic Programming and Optimal Control Lecture 2 | 3 | This repository stores my programming exercises for the Dynamic Programming and Optimal Control lecture (151-0563-01) at ETH Zurich in Fall 2019. In this project, an infinite horizon problem was solved with value iteration, policy iteration and linear programming methods. Note that the resulting optimal policies may differ but the optimal cost should be the same with a tolerance of numerical errors. 4 | 5 | ## Evaluation 6 | 7 | - Full grade granted 8 | 9 | ## Requirement 10 | 11 | - MATLAB 12 | 13 | ## Usage 14 | 15 | - run `main.m` 16 | -------------------------------------------------------------------------------- /doc/Problem Description.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/doc/Problem Description.pdf -------------------------------------------------------------------------------- /src/ComputeShotProbability.m: -------------------------------------------------------------------------------- 1 | function P_SHOT = ComputeShotProbability(stateSpace, currStateIndex, shooterPos) 2 | % ComputeShotProbability Compute the probability of being shot 3 | % 4 | % P_SHOT = ComputeShotProbability(stateSpace, map, currState, shooterPos) 5 | % Compute the probability of being shot by a angry resident 6 | % 7 | % Input arguments: 8 | % stateSpace: 9 | % A (K x 3) - matrix, where the i-th row represents the i-th 10 | % element of the state space. 11 | % currState: 12 | % A scalar representing the index in stateSpace of current state 13 | % shooterPos: 14 | % A matrix where the i-th row represent the i-th shoot position 15 | % 16 | % Output arguments: 17 | % P_SHOT: 18 | % The probability of beging shot by a angry resident 19 | 20 | %% declare global variables 21 | global GAMMA R 22 | 23 | %% parse information 24 | currM = stateSpace(currStateIndex, 1); % current state in m 25 | currN = stateSpace(currStateIndex, 2); % current state in n 26 | numShooter = size(shooterPos, 1); % shooter number 27 | 28 | %% compute distance 29 | Distance = zeros(numShooter, 1); % initialize distance vector 30 | for i = 1:numShooter 31 | shooterM = shooterPos(i, 1); % m of i-th shooter 32 | shooterN = shooterPos(i, 2); % n of i-th shooter 33 | Distance(i) = abs(shooterM-currM) + abs(shooterN-currN); 34 | end 35 | 36 | %% compute probability of being shot by each shooter 37 | shotP = zeros(numShooter, 1); % initialize shot probability vector 38 | for i = 1:numShooter 39 | if ( Distance(i) >= 0 && Distance(i) <= R ) 40 | shotP(i) = GAMMA/(Distance(i)+1); 41 | else 42 | shotP(i) = 0; 43 | end 44 | end 45 | 46 | %% compute probability of being shot 47 | notShotP = 1 - shotP; % probability of not being shot by each shooter 48 | P_NOT_SHOT = prod(notShotP); % probability of not being shot by all shooters 49 | P_SHOT = 1 - P_NOT_SHOT; % probability of being shot by at least one shooter 50 | 51 | end -------------------------------------------------------------------------------- /src/ComputeStageCosts.m: -------------------------------------------------------------------------------- 1 | function G = ComputeStageCosts(stateSpace, map) 2 | % COMPUTESTAGECOSTS Compute stage costs. 3 | % Compute the stage costs for all states in the state space for all 4 | % control inputs. 5 | % 6 | % G = ComputeStageCosts(stateSpace, map) 7 | % computes the stage costs for all states in the state space for all 8 | % control inputs. 9 | % 10 | % Input arguments: 11 | % stateSpace: 12 | % A (K x 3)-matrix, where the i-th row represents the i-th 13 | % element of the state space. 14 | % map: 15 | % A (M x N)-matrix describing the world. With 16 | % values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE 17 | % 18 | % Output arguments: 19 | % G: 20 | % A (K x L)-matrix containing the stage costs of all states in 21 | % the state space for all control inputs. The entry G(i, l) 22 | % represents the expected stage cost if we are in state i and 23 | % apply control input l. 24 | 25 | %% declare global variables 26 | global GAMMA R P_WIND Nc %#ok<*NUSED> 27 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE 28 | global NORTH SOUTH EAST WEST HOVER 29 | global K 30 | global TERMINAL_STATE_INDEX 31 | G = zeros( size(stateSpace, 1), 5 ); 32 | 33 | %% find base state index 34 | [baseM, baseN] = find( map == BASE ); 35 | if isempty(baseM) 36 | error('Error: Invalid Map (No Base)'); 37 | elseif ( size(baseM, 1) > 1 ) 38 | error('Error: Invalid Map (Multiple Bases)'); 39 | end 40 | [~, baseIndex] = ismember([baseM, baseN, 0], stateSpace, 'row'); 41 | if ~baseIndex 42 | error('Error: State Space Constructed Wrongly') 43 | end 44 | 45 | %% find shooters 46 | [shooterM, shooterN] = find( map == SHOOTER ); % find m and n of shooters 47 | shooterPos = [shooterM, shooterN]; % shooter positions 48 | 49 | %% compute stage cost 50 | for curr = 1:K 51 | for act = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions 52 | currM = stateSpace(curr, 1); % current state in m 53 | currN = stateSpace(curr, 2); % current state in n 54 | currPsi = stateSpace(curr, 3); % current state in psi 55 | % terminal state yields no cost 56 | if curr == TERMINAL_STATE_INDEX 57 | G(curr, act) = 0; 58 | continue; 59 | % find next position 60 | elseif act == NORTH 61 | nextState = [currM currN+1 currPsi]; 62 | elseif act == SOUTH 63 | nextState = [currM currN-1 currPsi]; 64 | elseif act == EAST 65 | nextState = [currM+1 currN currPsi]; 66 | elseif act == WEST 67 | nextState = [currM-1 currN currPsi]; 68 | else % act: hover 69 | nextState = [currM currN currPsi]; 70 | end 71 | % if next position causes crash, make cost infinite 72 | if ~ismember(nextState, stateSpace, 'row') 73 | G(curr, act) = inf; 74 | % if next position is allowable 75 | else 76 | [~, nextStateIndex] = ismember(nextState, stateSpace, 'row'); 77 | % compute shot probability after movement 78 | P_SHOT = ComputeShotProbability(stateSpace, nextStateIndex, shooterPos); 79 | % no wind and no shot 80 | G(curr, act) = G(curr, act) + 1 * (1 - P_WIND) * (1 - P_SHOT); 81 | % no wind but shot 82 | G(curr, act) = G(curr, act) + Nc * (1 - P_WIND) * P_SHOT; 83 | % wind affects position 84 | for windAct = [NORTH, SOUTH, EAST, WEST] 85 | [isCrashed, postWindStateIndex] = FindNextStateIndex(stateSpace, baseIndex, nextStateIndex, windAct); 86 | % if crash happens after wind action, return to base 87 | if isCrashed 88 | G(curr, act) = G(curr, act) + Nc * P_WIND/4; 89 | % if no crash happens, check shoot 90 | else 91 | P_SHOT = ComputeShotProbability(stateSpace, postWindStateIndex, shooterPos); 92 | G(curr, act) = G(curr, act) + Nc * P_WIND/4 * P_SHOT; 93 | G(curr, act) = G(curr, act) + 1 * P_WIND/4 * (1 - P_SHOT); 94 | end 95 | end 96 | end 97 | end 98 | end 99 | 100 | end -------------------------------------------------------------------------------- /src/ComputeTerminalStateIndex.m: -------------------------------------------------------------------------------- 1 | function stateIndex = ComputeTerminalStateIndex(stateSpace, map) 2 | % ComputeTerminalStateIndex Compute the index of the terminal state in the 3 | % stateSpace matrix 4 | % 5 | % stateIndex = ComputeTerminalStateIndex(stateSpace, map) 6 | % Computes the index of the terminal state in the stateSpace matrix 7 | % 8 | % Input arguments: 9 | % stateSpace: 10 | % A (K x 3) - matrix, where the i-th row represents the i-th 11 | % element of the state space. 12 | % map: 13 | % A (M x N) - matrix describing the terrain of the estate map. 14 | % With values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE 15 | % 16 | % Output arguments: 17 | % stateIndex: 18 | % An integer that is the index of the terminal state in the 19 | % stateSpace matrix 20 | 21 | % find drop off position 22 | global DROP_OFF 23 | [dropM, dropN] = find( map == DROP_OFF ); 24 | 25 | % check existence and uniqueness of drop off position 26 | if isempty(dropM) 27 | error('Error: Invalid Map (No Drop Off Position)'); 28 | elseif ( size(dropM, 1) > 1 ) 29 | error('Error: Invalid Map (Multiple Drop Off Positions)'); 30 | end 31 | 32 | % find terminal state index 33 | [~, stateIndex] = ismember([dropM, dropN, 1], stateSpace, 'row'); 34 | 35 | % error if no index found 36 | if ~stateIndex 37 | error('Error: State Space Constructed Wrongly') 38 | end 39 | 40 | end -------------------------------------------------------------------------------- /src/ComputeTransitionProbabilities.m: -------------------------------------------------------------------------------- 1 | function P = ComputeTransitionProbabilities(stateSpace, map) 2 | % ComputeTransitionProbabilities Compute the transition probabilities 3 | % between all states in the state space for all control inputs. 4 | % 5 | % P = ComputeTransitionProbabilities(stateSpace, map) 6 | % computes the transition probabilities between all states in the state 7 | % space for all control inputs. 8 | % 9 | % Input arguments: 10 | % stateSpace: 11 | % A (K x 3) - matrix, where the i-th row represents the i-th 12 | % element of the state space. 13 | % map: 14 | % A (M x N) - matrix describing the world. With 15 | % values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE 16 | % 17 | % Output arguments: 18 | % P: 19 | % A (K x K x L) - matrix containing the transition probabilities 20 | % between all states in the state space for all control inputs. 21 | % The entry P(i, j, l) represents the transition probability 22 | % from state i to state j if control input l is applied. 23 | 24 | %% declare global variables 25 | global GAMMA R P_WIND P_PEACE %#ok<*NUSED> 26 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE 27 | global NORTH SOUTH EAST WEST HOVER 28 | global K TERMINAL_STATE_INDEX 29 | 30 | %% step 1: undisturbed movement 31 | P = zeros(K, K, 5); % final transition matrix 32 | P1 = zeros(K, K, 5); % transition matrix in step 1 33 | P2 = zeros(K, K, 5); % transition matrix in step 2 34 | 35 | % find naive transition matrix 36 | for i = 1:K % time complexity: O(n2) 37 | for j = 1:K 38 | currPos = stateSpace(i,:); % current state 39 | nextPos = stateSpace(j,:); % next state 40 | horzMove = nextPos(1) - currPos(1); % movement along m direction 41 | vertMove = nextPos(2) - currPos(2); % movement along n direction 42 | packMove = nextPos(3) - currPos(3); % change in package state 43 | if ( packMove ~= 0 ) % should not pick package yet 44 | continue; 45 | elseif ( horzMove == 1 && vertMove == 0 ) % east movement detected 46 | P1(i, j, EAST) = 1; 47 | elseif ( horzMove == -1 && vertMove == 0 ) % west movement detected 48 | P1(i, j, WEST) = 1; 49 | elseif ( horzMove == 0 && vertMove== 1 ) % north movement detected 50 | P1(i, j, NORTH) = 1; 51 | elseif ( horzMove== 0 && vertMove== -1 ) % south movement detected 52 | P1(i, j, SOUTH) = 1; 53 | elseif ( horzMove == 0 && vertMove == 0 ) % hover movement detected 54 | P1(i, j, HOVER) = 1; 55 | end 56 | end 57 | end 58 | 59 | % clear used variables 60 | clear currPos; 61 | clear nextPos; 62 | clear horzMove; 63 | clear vertMove; 64 | clear packMove; 65 | 66 | %% step 2: wind disturbance and angry resident shot 67 | % compute some constants 68 | M = size(map, 1); % map size in m direction 69 | N = size(map, 2); % map size in n direction 70 | P_PEACE = 1 - P_WIND; % probability of a peaceful day 71 | 72 | % find base state index 73 | [baseM, baseN] = find( map == BASE ); 74 | if isempty(baseM) 75 | error('Error: Invalid Map (No Base)'); 76 | elseif ( size(baseM, 1) > 1 ) 77 | error('Error: Invalid Map (Multiple Bases)'); 78 | end 79 | [~, baseIndex] = ismember([baseM, baseN, 0], stateSpace, 'row'); 80 | if ~baseIndex 81 | error('Error: State Space Constructed Wrongly') 82 | end 83 | 84 | % find shooters 85 | [shooterM, shooterN] = find( map == SHOOTER ); % find m and n of shooters 86 | shooterPos = [shooterM, shooterN]; % shooter positions 87 | 88 | % compute transition matrix after wind 89 | for i = 1:K % iterate starting state 90 | for j = 1:K % iteratre ending state 91 | for k = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions 92 | if i == TERMINAL_STATE_INDEX % do not move if mission completed 93 | P2(i,j,k) = (j == TERMINAL_STATE_INDEX); 94 | elseif ( P1(i,j,k) == 1 ) % when naive movement valid 95 | P_SHOT = ComputeShotProbability(stateSpace, j, shooterPos); % if no wind, stay or be shot 96 | P2(i,j,k) = P2(i,j,k) + (1 - P_WIND) * (1 - P_SHOT); % no wind and no shot 97 | P2(i,baseIndex,k) = P2(i,baseIndex,k) + (1 - P_WIND) * P_SHOT; % no wind but shot 98 | for windAction = [NORTH, SOUTH, EAST, WEST] % iterate wind actions 99 | [isCrashed, nextStateIndex] = FindNextStateIndex(stateSpace, baseIndex, j, windAction); % find next state resulting from wind 100 | if isCrashed 101 | P2(i,baseIndex,k) = P2(i,baseIndex,k) + P_WIND/4; % return to base 102 | else 103 | P_SHOT = ComputeShotProbability(stateSpace, nextStateIndex, shooterPos); 104 | P2(i,nextStateIndex,k) = P2(i,nextStateIndex,k) + P_WIND/4*(1-P_SHOT); 105 | P2(i,baseIndex,k) = P2(i,baseIndex,k) + P_WIND/4*P_SHOT; 106 | end 107 | end 108 | else 109 | continue; 110 | end 111 | end 112 | end 113 | end 114 | 115 | %% step 3: pick up package 116 | % find pick up position state index 117 | [pickM, pickN] = find( map == PICK_UP ); 118 | if isempty(pickM) 119 | error('Error: Invalid Map (No Pickup Position)'); 120 | elseif ( size(pickM, 1) > 1 ) 121 | error('Error: Invalid Map (Multiple Pickup Positions)'); 122 | end 123 | [~, pickIndex0] = ismember([pickM ,pickN ,0], stateSpace, 'row'); % index of state: pickup position without package 124 | if ~pickIndex0 125 | error('Error: State Space Constructed Wrongly') 126 | end 127 | pickIndex1 = pickIndex0 + 1; % index of state: pickup position with package 128 | 129 | % compute final transition probability matrix 130 | P = P2; 131 | for i = 1:K % iterate starting state 132 | for j = 1:K % iteratre ending state 133 | for k = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions 134 | if ( j == pickIndex0 ) 135 | P(i,pickIndex1,k) = P(i,pickIndex1,k) + P2(i,pickIndex0,k); 136 | P(i,pickIndex0,k) = 0; 137 | end 138 | end 139 | end 140 | end 141 | 142 | end -------------------------------------------------------------------------------- /src/FindNextStateIndex.m: -------------------------------------------------------------------------------- 1 | function [isCrashed, nextStateIndex] = FindNextStateIndex(stateSpace, baseIndex, currState, action) 2 | % FindIndex Compute the index in stateSpace of next state if action applied 3 | % 4 | % nextStateIndex = FindIndex(stateSpace, map, action) 5 | % Compute the index in stateSpace of next state if action applied 6 | % Return the base state index if next state would be tree or outside map 7 | % 8 | % Input arguments: 9 | % stateSpace: 10 | % A (K x 3) - matrix, where the i-th row represents the i-th 11 | % element of the state space. 12 | % baseIndex: 13 | % A scalar representing the index in stateSpace of base state 14 | % currState: 15 | % A scalar representing the index in stateSpace of current state 16 | % action: 17 | % A scalar representing the action 18 | % 19 | % Output arguments: 20 | % isCrashed: 21 | % A boolean that indicates whether the drone is crashed 22 | % nextStateIndex: 23 | % A scalar representing the index in stateSpace of next state 24 | % if action applied. Return the base state index if next state 25 | % would be tree or outside map 26 | 27 | %% declare global variables 28 | global NORTH SOUTH EAST WEST HOVER 29 | 30 | %% parse information 31 | isCrashed = false; % not crashed by default 32 | currM = stateSpace(currState, 1); % current state in m 33 | currN = stateSpace(currState, 2); % current state in n 34 | currPsi = stateSpace(currState, 3); % current state in psi 35 | 36 | %% compute next state 37 | if action == NORTH && ismember([currM,currN+1,currPsi],stateSpace,'row') 38 | [~, nextStateIndex] = ismember([currM,currN+1,currPsi],stateSpace,'row'); 39 | elseif action == SOUTH && ismember([currM,currN-1,currPsi],stateSpace,'row') 40 | [~, nextStateIndex] = ismember([currM,currN-1,currPsi],stateSpace,'row'); 41 | elseif action == EAST && ismember([currM+1,currN,currPsi],stateSpace,'row') 42 | [~, nextStateIndex] = ismember([currM+1,currN,currPsi],stateSpace,'row'); 43 | elseif action == WEST && ismember([currM-1,currN,currPsi],stateSpace,'row') 44 | [~, nextStateIndex] = ismember([currM-1,currN,currPsi],stateSpace,'row'); 45 | else 46 | nextStateIndex = baseIndex; % if crash into next state, return to base 47 | isCrashed = true; 48 | end 49 | 50 | end -------------------------------------------------------------------------------- /src/GenerateWorld.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/GenerateWorld.p -------------------------------------------------------------------------------- /src/LinearProgramming.m: -------------------------------------------------------------------------------- 1 | function [J_opt, u_opt_ind] = LinearProgramming(P, G) 2 | % LINEARPROGRAMMING Linear Programming 3 | % Solve a stochastic shortest path problem by Linear Programming. 4 | % 5 | % [J_opt, u_opt_ind] = LinearProgramming(P, G) computes the optimal cost 6 | % and the optimal control input for each state of the state space. 7 | % 8 | % Input arguments: 9 | % P: 10 | % A (K x K x L)-matrix containing the transition probabilities 11 | % between all states in the state space for all control inputs. 12 | % The entry P(i, j, l) represents the transition probability 13 | % from state i to state j if control input l is applied. 14 | % G: 15 | % A (K x L)-matrix containing the stage costs of all states in 16 | % the state space for all control inputs. The entry G(i, l) 17 | % represents the cost if we are in state i and apply control 18 | % input l. 19 | % 20 | % Output arguments: 21 | % J_opt: 22 | % A (K x 1)-matrix containing the optimal cost-to-go for each 23 | % element of the state space. 24 | % u_opt_ind: 25 | % A (K x 1)-matrix containing the index of the optimal control 26 | % input for each element of the state space. Mapping of the 27 | % terminal state is arbitrary (for example: HOVER). 28 | 29 | %% declare global variables 30 | global K 31 | global TERMINAL_STATE_INDEX 32 | 33 | %% initialize variables 34 | J_opt = zeros(K, 1); % initialize optimal cost 35 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix 36 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K]; % all index except terminal state 37 | 38 | %% compute optimization input variables 39 | A = []; % A in linprog function 40 | b = []; % b in linprog function 41 | f = -1 * ones(1, K-1); % f in linprog function, negative because max instead of min 42 | % stack A matrix and b vector 43 | for k = 1:5 44 | A = [A; eye(K-1)-P(nonTerminalState,nonTerminalState,k)]; 45 | b = [b; G(nonTerminalState,k)]; 46 | end 47 | % modify b vector: inf -> 1e6 because linear programming cannot deal with inf 48 | b(isinf(b)) = 1e6; 49 | 50 | %% compute J_opt and u_opt_ind 51 | options = optimset('linprog'); 52 | options.Display = 'off'; 53 | J_opt(nonTerminalState) = linprog(f, A, b, [], [], [], [], options); 54 | for k = 1:5 55 | J_opt_temp(nonTerminalState,k) = G(nonTerminalState,k) + P(nonTerminalState,nonTerminalState,k) * J_opt(nonTerminalState); 56 | end 57 | [~, u_opt_ind] = min(J_opt_temp, [], 2); 58 | 59 | end 60 | 61 | -------------------------------------------------------------------------------- /src/MakePlots.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/MakePlots.p -------------------------------------------------------------------------------- /src/PolicyIteration.m: -------------------------------------------------------------------------------- 1 | function [J_opt, u_opt_ind] = PolicyIteration(P, G) 2 | % POLICYITERATION Policy iteration 3 | % Solve a stochastic shortest path problem by Policy Iteration. 4 | % 5 | % [J_opt, u_opt_ind] = PolicyIteration(P, G) computes the optimal cost and 6 | % the optimal control input for each state of the state space. 7 | % 8 | % Input arguments: 9 | % P: 10 | % A (k x k x L)-matrix containing the transition probabilities 11 | % between all states in the state space for all control inputs. 12 | % The entry P(i, j, l) represents the transition probability 13 | % from state i to state j if control input l is applied. 14 | % G: 15 | % A (k x L)-matrix containing the stage costs of all states in 16 | % the state space for all control inputs. The entry G(i, l) 17 | % represents the cost if we are in state i and apply control 18 | % input l. 19 | % 20 | % Output arguments: 21 | % J_opt: 22 | % A (k x 1)-matrix containing the optimal cost-to-go for each 23 | % element of the state space. 24 | % u_opt_ind: 25 | % A (k x 1)-matrix containing the index of the optimal control 26 | % input for each element of the state space. Mapping of the 27 | % terminal state is arbitrary (for example: HOVER). 28 | 29 | %% declare global variables 30 | global K HOVER 31 | global TERMINAL_STATE_INDEX 32 | 33 | %% initialize variables 34 | tol = 1e-6; % error tolerance 35 | error = intmax; % initial error 36 | P_J = zeros(K, K); % inialize coefficent matrix postmultiplied by J given policy 37 | J_opt = zeros(K, 1); % initialize optimal cost with zero vector 38 | J_prev = ones(K, 1); % initialize with an infinite vector 39 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix 40 | u_opt_ind = HOVER * ones(K, 1); % initialize optimal control with Hover 41 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K]; % all index except terminal state 42 | 43 | %% start iteration 44 | while( error > tol ) 45 | % update J_prev 46 | J_prev = J_opt; 47 | % compute index of elements in G that will end up in q 48 | G_ind = sub2ind([K,5], 1:K, u_opt_ind'); % compute index from G 49 | q = G(G_ind)'; % stage cost vector given current policy 50 | % construct P_J matrix postmultiplied by J given current policy 51 | for i = 1:K 52 | P_J(i,:) = P(i,:,u_opt_ind(i)); 53 | end 54 | % solve J_opt using linear equation solver 55 | A = eye(K-1) - P_J(nonTerminalState,nonTerminalState); % A matrix 56 | b = q(nonTerminalState); % b vector 57 | J_opt(nonTerminalState) = linsolve(A,b); 58 | % policy improvement 59 | % construct J_opt_temp: the k-th column indicates the cost-to-go vector 60 | % after applying control k 61 | for k = 1:5 62 | J_opt_temp(:,k) = G(:,k) + P(:,:,k) * J_opt; 63 | end 64 | [~, u_opt_ind] = min(J_opt_temp, [], 2); 65 | % update error 66 | error = max(abs(J_opt - J_prev)); 67 | end 68 | 69 | end -------------------------------------------------------------------------------- /src/ValueIteration.m: -------------------------------------------------------------------------------- 1 | function [J_opt, u_opt_ind] = ValueIteration(P, G) 2 | % VALUEITERATION Value iteration 3 | % Solve a stochastic shortest path problem by Value Iteration. 4 | % 5 | % [J_opt, u_opt_ind] = ValueIteration(P, G) computes the optimal cost and 6 | % the optimal control input for each state of the state space. 7 | % 8 | % Input arguments: 9 | % P: 10 | % A (K x K x L)-matrix containing the transition probabilities 11 | % between all states in the state space for all control inputs. 12 | % The entry P(i, j, l) represents the transition probability 13 | % from state i to state j if control input l is applied. 14 | % G: 15 | % A (K x L)-matrix containing the stage costs of all states in 16 | % the state space for all control inputs. The entry G(i, l) 17 | % represents the cost if we are in state i and apply control 18 | % input l. 19 | % 20 | % Output arguments: 21 | % J_opt: 22 | % A (K x 1)-matrix containing the optimal cost-to-go for each 23 | % element of the state space. 24 | % u_opt_ind: 25 | % A (K x 1)-matrix containing the index of the optimal control 26 | % input for each element of the state space. Mapping of the 27 | % terminal state is arbitrary (for example: HOVER). 28 | 29 | %% declare global variables 30 | global K HOVER 31 | global TERMINAL_STATE_INDEX 32 | 33 | %% initialize variables 34 | tol = 1e-6; % error tolerance 35 | error = intmax; % initial error 36 | J_opt = zeros(K, 1); % optimal cost-to-go vector 37 | J_prev = zeros(K, 1); % previous optimal cost-to-go vector 38 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix 39 | u_opt_ind = HOVER * ones(K, 1); % optimal control vector 40 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K]; 41 | 42 | %% start iteration 43 | while( error > tol ) 44 | % construct J_opt_temp: the k-th column indicates the cost-to-go vector after applying control k 45 | for k = 1:5 46 | J_opt_temp(nonTerminalState,k) = G(nonTerminalState,k) + P(nonTerminalState,nonTerminalState,k) * J_opt(nonTerminalState); 47 | end 48 | % compute optimal cost-to-go and optimal control 49 | [J_opt, u_opt_ind] = min(J_opt_temp, [], 2); 50 | % update error 51 | error = max(abs(J_opt - J_prev)); 52 | % store optimal cost-to-go after this iteration 53 | J_prev = J_opt; 54 | end 55 | 56 | end -------------------------------------------------------------------------------- /src/exampleG.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleG.mat -------------------------------------------------------------------------------- /src/exampleP.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleP.mat -------------------------------------------------------------------------------- /src/exampleWorld.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleWorld.mat -------------------------------------------------------------------------------- /src/main.m: -------------------------------------------------------------------------------- 1 | % Dynamic Programming and Optimal Control 2 | % Fall 2019 3 | % Programming Exercise 4 | % 5 | % Author: Yize Wang 6 | % Email: yizwang@student.ethz.ch 7 | 8 | %% Clear workspace and command window 9 | clc; 10 | clear all; 11 | close all; 12 | 13 | %% Initialize stopwatches 14 | mainStart = tic; 15 | 16 | %% Options 17 | mapSize = [15, 20]; % [M, N] 18 | % Set to true to generate a random map of size mapSize, else set to false 19 | % to load the pre-exsisting example map 20 | generateRandomWorld = true; 21 | 22 | % Plotting options 23 | global PLOT_POLICY PLOT_COST 24 | PLOT_POLICY = true; 25 | PLOT_COST = false; 26 | 27 | %% Global problem parameters 28 | global GAMMA R Nc P_WIND 29 | GAMMA = 0.2; % Shooter gamma factor 30 | R = 2; % Shooter range 31 | Nc = 10; % Time steps required to bring drone to base when it crashes 32 | P_WIND = 0.1; % Gust of wind probability 33 | 34 | % IDs of elements in the map matrix 35 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE 36 | FREE = 0; 37 | TREE = 1; 38 | SHOOTER = 2; 39 | PICK_UP = 3; 40 | DROP_OFF = 4; 41 | BASE = 5; 42 | 43 | % Index of each action in the P and G matrices. Use this ordering 44 | global NORTH SOUTH EAST WEST HOVER 45 | NORTH = 1; 46 | SOUTH = 2; 47 | EAST = 3; 48 | WEST = 4; 49 | HOVER = 5; 50 | 51 | %% Generate map 52 | startTime = tic; 53 | % map(m,n) represents the cell at indices (m,n) according to the axes 54 | if generateRandomWorld 55 | map = GenerateWorld(mapSize(1), mapSize(2)); %#ok<*UNRCH> 56 | else 57 | % load a pre-generated map 58 | load('exampleWorld.mat'); 59 | end 60 | MakePlots(map); 61 | disp("Generate Map: " + toc(startTime) + " sec"); 62 | 63 | %% Generate state space 64 | startTime = tic; 65 | % Generate a (K x 3) - matrix 'stateSpace', where each accessible cell is 66 | % represented by two rows (with and without carrying a package). 67 | stateSpace = []; 68 | for m = 1 : size(map, 1) 69 | for n = 1 : size(map, 2) 70 | if map(m, n) ~= TREE 71 | stateSpace = [stateSpace; 72 | m, n, 0; 73 | m, n, 1]; 74 | end 75 | end 76 | end 77 | 78 | % State space size 79 | global K 80 | K = size(stateSpace,1); 81 | disp("Generate State Space: " + toc(startTime) + " sec"); 82 | 83 | %% Set the following to true as you progress with the files 84 | transitionProbabilitiesImplemented = true; 85 | stageCostsImplemented = true; 86 | valueIterationImplemented = true; 87 | policyIterationImplemented = true; 88 | linearProgrammingImplemented = true; 89 | 90 | %% Compute the terminal state index 91 | global TERMINAL_STATE_INDEX %#ok<*NUSED> 92 | if transitionProbabilitiesImplemented 93 | TERMINAL_STATE_INDEX = ComputeTerminalStateIndex(stateSpace, map); 94 | end 95 | 96 | %% Compute transition probabilities 97 | if transitionProbabilitiesImplemented 98 | startTime = tic; 99 | P = ComputeTransitionProbabilities(stateSpace, map); 100 | disp("Compute Transition Probabilities: " + toc(startTime) + " sec"); 101 | end 102 | 103 | %% Compute stage costs 104 | if stageCostsImplemented 105 | startTime = tic; 106 | G = ComputeStageCosts(stateSpace, map); 107 | disp("Compute Stage Costs: " + toc(startTime) + " sec"); 108 | end 109 | 110 | %% Solve stochastic shortest path problem 111 | % value iteration 112 | if valueIterationImplemented 113 | startTime = tic; 114 | [ J_opt_vi, u_opt_ind_vi ] = ValueIteration(P, G); 115 | 116 | if size(J_opt_vi,1)~=K || size(u_opt_ind_vi,1)~=K 117 | disp('[ERROR] the size of J and u must be K') 118 | end 119 | disp("Value Iteration: " + toc(startTime) + " sec"); 120 | end 121 | 122 | % policy iteration 123 | if policyIterationImplemented 124 | startTime = tic; 125 | [ J_opt_pi, u_opt_ind_pi ] = PolicyIteration(P, G); 126 | 127 | if size(J_opt_pi,1)~=K || size(u_opt_ind_pi,1)~=K 128 | disp('[ERROR] the size of J and u must be K') 129 | end 130 | disp("Policy Iteration: " + toc(startTime) + " sec"); 131 | end 132 | 133 | % linear programming 134 | if linearProgrammingImplemented 135 | startTime = tic; 136 | [ J_opt_lp, u_opt_ind_lp ] = LinearProgramming(P, G); 137 | 138 | if size(J_opt_lp,1)~=K || size(u_opt_ind_lp,1)~=K 139 | disp('[ERROR] the size of J and u must be K') 140 | end 141 | disp("Linear Programming: " + toc(startTime) + " sec"); 142 | end 143 | 144 | %% Plot results 145 | startTime = tic; 146 | if valueIterationImplemented 147 | MakePlots(map, stateSpace, J_opt_vi, u_opt_ind_vi, 'Value iteration'); 148 | end 149 | if policyIterationImplemented 150 | MakePlots(map, stateSpace, J_opt_pi, u_opt_ind_pi, 'Policy iteration'); 151 | end 152 | if linearProgrammingImplemented 153 | MakePlots(map, stateSpace, J_opt_lp, u_opt_ind_lp, 'Linear programming'); 154 | end 155 | disp("Plot results: " + toc(startTime) + " sec"); 156 | 157 | %% Terminated 158 | disp("Computation Time: " + (toc(mainStart) - toc(startTime)) + " sec"); 159 | disp("Total Time: " + toc(mainStart) + " sec"); 160 | 161 | %% Compare 162 | % compute maximum error to verify algorithm implementation correctness 163 | if valueIterationImplemented && policyIterationImplemented && linearProgrammingImplemented 164 | maxError = max(max(abs(J_opt_pi-J_opt_lp))+max(abs(J_opt_pi-J_opt_vi))+max(abs(J_opt_vi-J_opt_lp))); 165 | if maxError < 1e-4 166 | fprintf('\nCongradulations! Three Methods Yield Same Result! \n') 167 | else 168 | fprintf('\nWaring! Three Methods Yield Different Result! \n') 169 | end 170 | end --------------------------------------------------------------------------------