├── .gitignore
├── README.md
├── doc
    └── Problem Description.pdf
└── src
    ├── ComputeShotProbability.m
    ├── ComputeStageCosts.m
    ├── ComputeTerminalStateIndex.m
    ├── ComputeTransitionProbabilities.m
    ├── FindNextStateIndex.m
    ├── GenerateWorld.p
    ├── LinearProgramming.m
    ├── MakePlots.p
    ├── PolicyIteration.m
    ├── ValueIteration.m
    ├── exampleG.mat
    ├── exampleP.mat
    ├── exampleWorld.mat
    └── main.m


/.gitignore:
--------------------------------------------------------------------------------
1 | # MATLAB auxiliary files
2 | *.asv


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dynamic Programming and Optimal Control Lecture
 2 | 
 3 | This repository stores my programming exercises for the Dynamic Programming and Optimal Control lecture (151-0563-01) at ETH Zurich in Fall 2019. In this project, an infinite horizon problem was solved with value iteration, policy iteration and linear programming methods. Note that the resulting optimal policies may differ but the optimal cost should be the same with a tolerance of numerical errors.
 4 | 
 5 | ## Evaluation
 6 | 
 7 | - Full grade granted
 8 | 
 9 | ## Requirement
10 | 
11 | - MATLAB
12 | 
13 | ## Usage
14 | 
15 | - run `main.m`
16 | 


--------------------------------------------------------------------------------
/doc/Problem Description.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/doc/Problem Description.pdf


--------------------------------------------------------------------------------
/src/ComputeShotProbability.m:
--------------------------------------------------------------------------------
 1 | function P_SHOT = ComputeShotProbability(stateSpace, currStateIndex, shooterPos)
 2 | % ComputeShotProbability Compute the probability of being shot
 3 | %
 4 | %   P_SHOT = ComputeShotProbability(stateSpace, map, currState, shooterPos)
 5 | %   Compute the probability of being shot by a angry resident
 6 | %
 7 | %   Input arguments:
 8 | %       stateSpace:
 9 | %           A (K x 3) - matrix, where the i-th row represents the i-th
10 | %           element of the state space.
11 | %       currState:
12 | %           A scalar representing the index in stateSpace of current state
13 | %       shooterPos:
14 | %           A matrix where the i-th row represent the i-th shoot position
15 | %
16 | %   Output arguments:
17 | %       P_SHOT:
18 | %       The probability of beging shot by a angry resident
19 | 
20 | %% declare global variables
21 | global GAMMA R
22 | 
23 | %% parse information
24 | currM = stateSpace(currStateIndex, 1); % current state in m
25 | currN = stateSpace(currStateIndex, 2); % current state in n
26 | numShooter = size(shooterPos, 1); % shooter number
27 | 
28 | %% compute distance
29 | Distance = zeros(numShooter, 1); % initialize distance vector
30 | for i = 1:numShooter
31 |     shooterM = shooterPos(i, 1); % m of i-th shooter
32 |     shooterN = shooterPos(i, 2); % n of i-th shooter
33 |     Distance(i) = abs(shooterM-currM) + abs(shooterN-currN);
34 | end
35 | 
36 | %% compute probability of being shot by each shooter
37 | shotP = zeros(numShooter, 1); % initialize shot probability vector
38 | for i = 1:numShooter
39 |     if ( Distance(i) >= 0 && Distance(i) <= R )
40 |         shotP(i) = GAMMA/(Distance(i)+1);
41 |     else
42 |         shotP(i) = 0;
43 |     end
44 | end
45 | 
46 | %% compute probability of being shot
47 | notShotP = 1 - shotP; % probability of not being shot by each shooter
48 | P_NOT_SHOT = prod(notShotP); % probability of not being shot by all shooters
49 | P_SHOT = 1 - P_NOT_SHOT; % probability of being shot by at least one shooter
50 | 
51 | end


--------------------------------------------------------------------------------
/src/ComputeStageCosts.m:
--------------------------------------------------------------------------------
  1 | function G = ComputeStageCosts(stateSpace, map)
  2 | % COMPUTESTAGECOSTS Compute stage costs.
  3 | % Compute the stage costs for all states in the state space for all
  4 | % control inputs.
  5 | %
  6 | %   G = ComputeStageCosts(stateSpace, map) 
  7 | %   computes the stage costs for all states in the state space for all
  8 | %   control inputs.
  9 | %
 10 | %   Input arguments:
 11 | %       stateSpace:
 12 | %           A (K x 3)-matrix, where the i-th row represents the i-th
 13 | %           element of the state space.
 14 | %       map:
 15 | %           A (M x N)-matrix describing the world. With
 16 | %           values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE
 17 | %
 18 | %   Output arguments:
 19 | %       G:
 20 | %           A (K x L)-matrix containing the stage costs of all states in
 21 | %           the state space for all control inputs. The entry G(i, l)
 22 | %           represents the expected stage cost if we are in state i and 
 23 | %           apply control input l.
 24 | 
 25 | %% declare global variables
 26 | global GAMMA R P_WIND Nc %#ok<*NUSED>
 27 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE
 28 | global NORTH SOUTH EAST WEST HOVER
 29 | global K
 30 | global TERMINAL_STATE_INDEX
 31 | G = zeros( size(stateSpace, 1), 5 );
 32 | 
 33 | %% find base state index
 34 | [baseM, baseN] = find( map == BASE );
 35 | if isempty(baseM)
 36 |     error('Error: Invalid Map (No Base)');
 37 | elseif ( size(baseM, 1) > 1 )
 38 |     error('Error: Invalid Map (Multiple Bases)');
 39 | end
 40 | [~, baseIndex] = ismember([baseM, baseN, 0], stateSpace, 'row');
 41 | if ~baseIndex
 42 |     error('Error: State Space Constructed Wrongly')
 43 | end
 44 | 
 45 | %% find shooters
 46 | [shooterM, shooterN] = find( map == SHOOTER ); % find m and n of shooters
 47 | shooterPos = [shooterM, shooterN]; % shooter positions
 48 | 
 49 | %% compute stage cost
 50 | for curr = 1:K
 51 |     for act = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions
 52 |         currM = stateSpace(curr, 1); % current state in m
 53 |         currN = stateSpace(curr, 2); % current state in n
 54 |         currPsi = stateSpace(curr, 3); % current state in psi
 55 |         % terminal state yields no cost
 56 |         if curr == TERMINAL_STATE_INDEX
 57 |             G(curr, act) = 0;
 58 |             continue;
 59 |         % find next position
 60 |         elseif act == NORTH
 61 |             nextState = [currM currN+1 currPsi];
 62 |         elseif act == SOUTH
 63 |             nextState = [currM currN-1 currPsi];
 64 |         elseif act == EAST
 65 |             nextState = [currM+1 currN currPsi];
 66 |         elseif act == WEST
 67 |             nextState = [currM-1 currN currPsi];
 68 |         else % act: hover
 69 |             nextState = [currM currN currPsi];
 70 |         end
 71 |         % if next position causes crash, make cost infinite
 72 |         if ~ismember(nextState, stateSpace, 'row')
 73 |             G(curr, act) = inf;
 74 |         % if next position is allowable
 75 |         else
 76 |             [~, nextStateIndex] = ismember(nextState, stateSpace, 'row');
 77 |             % compute shot probability after movement
 78 |             P_SHOT = ComputeShotProbability(stateSpace, nextStateIndex, shooterPos);
 79 |             % no wind and no shot
 80 |             G(curr, act) = G(curr, act) + 1 * (1 - P_WIND) * (1 - P_SHOT);
 81 |             % no wind but shot
 82 |             G(curr, act) = G(curr, act) + Nc * (1 - P_WIND) * P_SHOT;
 83 |             % wind affects position
 84 |             for windAct = [NORTH, SOUTH, EAST, WEST]
 85 |                 [isCrashed, postWindStateIndex] = FindNextStateIndex(stateSpace, baseIndex, nextStateIndex, windAct);
 86 |                 % if crash happens after wind action, return to base
 87 |                 if isCrashed
 88 |                     G(curr, act) = G(curr, act) + Nc * P_WIND/4;
 89 |                 % if no crash happens, check shoot
 90 |                 else
 91 |                     P_SHOT = ComputeShotProbability(stateSpace, postWindStateIndex, shooterPos);
 92 |                     G(curr, act) = G(curr, act) + Nc * P_WIND/4 * P_SHOT;
 93 |                     G(curr, act) = G(curr, act) + 1 * P_WIND/4 * (1 - P_SHOT);
 94 |                 end
 95 |             end
 96 |         end
 97 |     end
 98 | end
 99 | 
100 | end


--------------------------------------------------------------------------------
/src/ComputeTerminalStateIndex.m:
--------------------------------------------------------------------------------
 1 | function stateIndex = ComputeTerminalStateIndex(stateSpace, map)
 2 | % ComputeTerminalStateIndex Compute the index of the terminal state in the
 3 | % stateSpace matrix
 4 | %
 5 | %   stateIndex = ComputeTerminalStateIndex(stateSpace, map)
 6 | %   Computes the index of the terminal state in the stateSpace matrix
 7 | %
 8 | %   Input arguments:
 9 | %       stateSpace:
10 | %           A (K x 3) - matrix, where the i-th row represents the i-th
11 | %           element of the state space.
12 | %       map:
13 | %           A (M x N) - matrix describing the terrain of the estate map.
14 | %           With values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE
15 | %
16 | %   Output arguments:
17 | %       stateIndex:
18 | %           An integer that is the index of the terminal state in the
19 | %           stateSpace matrix
20 | 
21 | % find drop off position
22 | global DROP_OFF
23 | [dropM, dropN] = find( map == DROP_OFF );
24 | 
25 | % check existence and uniqueness of drop off position
26 | if isempty(dropM)
27 |     error('Error: Invalid Map (No Drop Off Position)');
28 | elseif ( size(dropM, 1) > 1 )
29 |     error('Error: Invalid Map (Multiple Drop Off Positions)');
30 | end
31 | 
32 | % find terminal state index
33 | [~, stateIndex] = ismember([dropM, dropN, 1], stateSpace, 'row');
34 | 
35 | % error if no index found
36 | if ~stateIndex
37 |     error('Error: State Space Constructed Wrongly')
38 | end
39 | 
40 | end


--------------------------------------------------------------------------------
/src/ComputeTransitionProbabilities.m:
--------------------------------------------------------------------------------
  1 | function P = ComputeTransitionProbabilities(stateSpace, map)
  2 | % ComputeTransitionProbabilities Compute the transition probabilities
  3 | % between all states in the state space for all control inputs.
  4 | %
  5 | %   P = ComputeTransitionProbabilities(stateSpace, map) 
  6 | %   computes the transition probabilities between all states in the state 
  7 | %   space for all control inputs.
  8 | %
  9 | %   Input arguments:
 10 | %       stateSpace:
 11 | %           A (K x 3) - matrix, where the i-th row represents the i-th
 12 | %           element of the state space.
 13 | %       map:
 14 | %           A (M x N) - matrix describing the world. With
 15 | %           values: FREE TREE SHOOTER PICK_UP DROP_OFF BASE
 16 | %
 17 | %   Output arguments:
 18 | %       P:
 19 | %           A (K x K x L) - matrix containing the transition probabilities
 20 | %           between all states in the state space for all control inputs.
 21 | %           The entry P(i, j, l) represents the transition probability
 22 | %           from state i to state j if control input l is applied.
 23 | 
 24 | %% declare global variables
 25 | global GAMMA R P_WIND P_PEACE  %#ok<*NUSED>
 26 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE
 27 | global NORTH SOUTH EAST WEST HOVER
 28 | global K TERMINAL_STATE_INDEX
 29 | 
 30 | %% step 1: undisturbed movement
 31 | P = zeros(K, K, 5); % final transition matrix
 32 | P1 = zeros(K, K, 5); % transition matrix in step 1
 33 | P2 = zeros(K, K, 5); % transition matrix in step 2
 34 | 
 35 | % find naive transition matrix
 36 | for i = 1:K % time complexity: O(n2)
 37 |    for j = 1:K
 38 |        currPos = stateSpace(i,:); % current state
 39 |        nextPos = stateSpace(j,:); % next state
 40 |        horzMove = nextPos(1) - currPos(1); % movement along m direction
 41 |        vertMove = nextPos(2) - currPos(2); % movement along n direction
 42 |        packMove = nextPos(3) - currPos(3); % change in package state
 43 |        if ( packMove ~= 0 ) % should not pick package yet
 44 |            continue;
 45 |        elseif ( horzMove == 1 && vertMove == 0 ) % east movement detected
 46 |            P1(i, j, EAST) = 1;
 47 |        elseif ( horzMove == -1 && vertMove == 0 ) % west movement detected
 48 |            P1(i, j, WEST) = 1;
 49 |        elseif ( horzMove == 0 && vertMove== 1 ) % north movement detected
 50 |            P1(i, j, NORTH) = 1;
 51 |        elseif ( horzMove== 0 && vertMove== -1 ) % south movement detected
 52 |            P1(i, j, SOUTH) = 1;
 53 |        elseif ( horzMove == 0 && vertMove == 0 ) % hover movement detected
 54 |            P1(i, j, HOVER) = 1;
 55 |        end
 56 |    end
 57 | end
 58 | 
 59 | % clear used variables
 60 | clear currPos;
 61 | clear nextPos;
 62 | clear horzMove;
 63 | clear vertMove;
 64 | clear packMove;
 65 | 
 66 | %% step 2: wind disturbance and angry resident shot
 67 | % compute some constants
 68 | M = size(map, 1); % map size in m direction
 69 | N = size(map, 2); % map size in n direction
 70 | P_PEACE = 1 - P_WIND; % probability of a peaceful day
 71 | 
 72 | % find base state index
 73 | [baseM, baseN] = find( map == BASE );
 74 | if isempty(baseM)
 75 |     error('Error: Invalid Map (No Base)');
 76 | elseif ( size(baseM, 1) > 1 )
 77 |     error('Error: Invalid Map (Multiple Bases)');
 78 | end
 79 | [~, baseIndex] = ismember([baseM, baseN, 0], stateSpace, 'row');
 80 | if ~baseIndex
 81 |     error('Error: State Space Constructed Wrongly')
 82 | end
 83 | 
 84 | % find shooters
 85 | [shooterM, shooterN] = find( map == SHOOTER ); % find m and n of shooters
 86 | shooterPos = [shooterM, shooterN]; % shooter positions
 87 | 
 88 | % compute transition matrix after wind
 89 | for i = 1:K % iterate starting state
 90 |     for j = 1:K % iteratre ending state
 91 |         for k = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions
 92 |             if i == TERMINAL_STATE_INDEX % do not move if mission completed
 93 |                 P2(i,j,k) = (j == TERMINAL_STATE_INDEX);
 94 |             elseif ( P1(i,j,k) == 1 ) % when naive movement valid
 95 |                 P_SHOT = ComputeShotProbability(stateSpace, j, shooterPos); % if no wind, stay or be shot
 96 |                 P2(i,j,k) = P2(i,j,k) + (1 - P_WIND) * (1 - P_SHOT); % no wind and no shot
 97 |                 P2(i,baseIndex,k) = P2(i,baseIndex,k) + (1 - P_WIND) * P_SHOT; % no wind but shot
 98 |                 for windAction = [NORTH, SOUTH, EAST, WEST] % iterate wind actions
 99 |                     [isCrashed, nextStateIndex] = FindNextStateIndex(stateSpace, baseIndex, j, windAction); % find next state resulting from wind
100 |                     if isCrashed
101 |                         P2(i,baseIndex,k) = P2(i,baseIndex,k) + P_WIND/4; % return to base
102 |                     else
103 |                         P_SHOT = ComputeShotProbability(stateSpace, nextStateIndex, shooterPos);
104 |                         P2(i,nextStateIndex,k) = P2(i,nextStateIndex,k) + P_WIND/4*(1-P_SHOT);
105 |                         P2(i,baseIndex,k) = P2(i,baseIndex,k) + P_WIND/4*P_SHOT;
106 |                     end
107 |                 end
108 |             else
109 |                 continue;
110 |             end
111 |         end
112 |     end
113 | end
114 | 
115 | %% step 3: pick up package
116 | % find pick up position state index
117 | [pickM, pickN] = find( map == PICK_UP );
118 | if isempty(pickM)
119 |     error('Error: Invalid Map (No Pickup Position)');
120 | elseif ( size(pickM, 1) > 1 )
121 |     error('Error: Invalid Map (Multiple Pickup Positions)');
122 | end
123 | [~, pickIndex0] = ismember([pickM ,pickN ,0], stateSpace, 'row'); % index of state: pickup position without package
124 | if ~pickIndex0
125 |     error('Error: State Space Constructed Wrongly')
126 | end
127 | pickIndex1 = pickIndex0 + 1; % index of state: pickup position with package
128 | 
129 | % compute final transition probability matrix
130 | P = P2;
131 | for i = 1:K % iterate starting state
132 |     for j = 1:K % iteratre ending state
133 |         for k = [NORTH, SOUTH, EAST, WEST, HOVER] % iterate actions
134 |             if ( j == pickIndex0 )
135 |                 P(i,pickIndex1,k) = P(i,pickIndex1,k) + P2(i,pickIndex0,k);
136 |                 P(i,pickIndex0,k) = 0;
137 |             end
138 |         end
139 |     end
140 | end
141 | 
142 | end


--------------------------------------------------------------------------------
/src/FindNextStateIndex.m:
--------------------------------------------------------------------------------
 1 | function [isCrashed, nextStateIndex] = FindNextStateIndex(stateSpace, baseIndex, currState, action)
 2 | % FindIndex Compute the index in stateSpace of next state if action applied
 3 | %
 4 | %   nextStateIndex = FindIndex(stateSpace, map, action)
 5 | %   Compute the index in stateSpace of next state if action applied
 6 | %   Return the base state index if next state would be tree or outside map
 7 | %
 8 | %   Input arguments:
 9 | %       stateSpace:
10 | %           A (K x 3) - matrix, where the i-th row represents the i-th
11 | %           element of the state space.
12 | %       baseIndex:
13 | %           A scalar representing the index in stateSpace of base state
14 | %       currState:
15 | %           A scalar representing the index in stateSpace of current state
16 | %       action:
17 | %           A scalar representing the action
18 | %
19 | %   Output arguments:
20 | %       isCrashed:
21 | %           A boolean that indicates whether the drone is crashed
22 | %       nextStateIndex:
23 | %           A scalar representing the index in stateSpace of next state 
24 | %           if action applied. Return the base state index if next state
25 | %           would be tree or outside map
26 | 
27 | %% declare global variables
28 | global NORTH SOUTH EAST WEST HOVER
29 | 
30 | %% parse information
31 | isCrashed = false; % not crashed by default
32 | currM = stateSpace(currState, 1); % current state in m
33 | currN = stateSpace(currState, 2); % current state in n
34 | currPsi = stateSpace(currState, 3); % current state in psi
35 | 
36 | %% compute next state
37 | if action == NORTH && ismember([currM,currN+1,currPsi],stateSpace,'row')
38 |     [~, nextStateIndex] = ismember([currM,currN+1,currPsi],stateSpace,'row');
39 | elseif action == SOUTH && ismember([currM,currN-1,currPsi],stateSpace,'row')
40 |     [~, nextStateIndex] = ismember([currM,currN-1,currPsi],stateSpace,'row');
41 | elseif action == EAST && ismember([currM+1,currN,currPsi],stateSpace,'row')
42 |     [~, nextStateIndex] = ismember([currM+1,currN,currPsi],stateSpace,'row');
43 | elseif action == WEST && ismember([currM-1,currN,currPsi],stateSpace,'row')
44 |     [~, nextStateIndex] = ismember([currM-1,currN,currPsi],stateSpace,'row');
45 | else
46 |     nextStateIndex = baseIndex; % if crash into next state, return to base
47 |     isCrashed = true;
48 | end
49 | 
50 | end


--------------------------------------------------------------------------------
/src/GenerateWorld.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/GenerateWorld.p


--------------------------------------------------------------------------------
/src/LinearProgramming.m:
--------------------------------------------------------------------------------
 1 | function [J_opt, u_opt_ind] = LinearProgramming(P, G)
 2 | % LINEARPROGRAMMING Linear Programming
 3 | % Solve a stochastic shortest path problem by Linear Programming.
 4 | %
 5 | %   [J_opt, u_opt_ind] = LinearProgramming(P, G) computes the optimal cost
 6 | %   and the optimal control input for each state of the state space.
 7 | %
 8 | %   Input arguments:
 9 | %       P:
10 | %           A (K x K x L)-matrix containing the transition probabilities
11 | %           between all states in the state space for all control inputs.
12 | %           The entry P(i, j, l) represents the transition probability
13 | %           from state i to state j if control input l is applied.
14 | %       G:
15 | %           A (K x L)-matrix containing the stage costs of all states in
16 | %           the state space for all control inputs. The entry G(i, l)
17 | %           represents the cost if we are in state i and apply control
18 | %           input l.
19 | %
20 | %   Output arguments:
21 | %       J_opt:
22 | %       	A (K x 1)-matrix containing the optimal cost-to-go for each
23 | %       	element of the state space.
24 | %       u_opt_ind:
25 | %       	A (K x 1)-matrix containing the index of the optimal control
26 | %       	input for each element of the state space. Mapping of the
27 | %       	terminal state is arbitrary (for example: HOVER).
28 | 
29 | %% declare global variables
30 | global K
31 | global TERMINAL_STATE_INDEX
32 | 
33 | %% initialize variables
34 | J_opt = zeros(K, 1); % initialize optimal cost
35 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix
36 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K]; % all index except terminal state
37 | 
38 | %% compute optimization input variables
39 | A = []; % A in linprog function
40 | b = []; % b in linprog function
41 | f = -1 * ones(1, K-1); % f in linprog function, negative because max instead of min
42 | % stack A matrix and b vector
43 | for k = 1:5
44 |     A = [A; eye(K-1)-P(nonTerminalState,nonTerminalState,k)];
45 |     b = [b; G(nonTerminalState,k)];
46 | end
47 | % modify b vector: inf -> 1e6 because linear programming cannot deal with inf
48 | b(isinf(b)) = 1e6;
49 | 
50 | %% compute J_opt and u_opt_ind
51 | options = optimset('linprog');
52 | options.Display = 'off';
53 | J_opt(nonTerminalState) = linprog(f, A, b, [], [], [], [], options);
54 | for k  = 1:5
55 |     J_opt_temp(nonTerminalState,k) = G(nonTerminalState,k) + P(nonTerminalState,nonTerminalState,k) * J_opt(nonTerminalState);
56 | end
57 | [~, u_opt_ind] = min(J_opt_temp, [], 2);
58 | 
59 | end
60 | 
61 | 


--------------------------------------------------------------------------------
/src/MakePlots.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/MakePlots.p


--------------------------------------------------------------------------------
/src/PolicyIteration.m:
--------------------------------------------------------------------------------
 1 | function [J_opt, u_opt_ind] = PolicyIteration(P, G)
 2 | % POLICYITERATION Policy iteration
 3 | % Solve a stochastic shortest path problem by Policy Iteration.
 4 | %
 5 | %   [J_opt, u_opt_ind] = PolicyIteration(P, G) computes the optimal cost and
 6 | %   the optimal control input for each state of the state space.
 7 | %
 8 | %   Input arguments:
 9 | %       P:
10 | %           A (k x k x L)-matrix containing the transition probabilities
11 | %           between all states in the state space for all control inputs.
12 | %           The entry P(i, j, l) represents the transition probability
13 | %           from state i to state j if control input l is applied.
14 | %       G:
15 | %           A (k x L)-matrix containing the stage costs of all states in
16 | %           the state space for all control inputs. The entry G(i, l)
17 | %           represents the cost if we are in state i and apply control
18 | %           input l.
19 | %
20 | %   Output arguments:
21 | %       J_opt:
22 | %       	A (k x 1)-matrix containing the optimal cost-to-go for each
23 | %       	element of the state space.
24 | %       u_opt_ind:
25 | %       	A (k x 1)-matrix containing the index of the optimal control
26 | %       	input for each element of the state space. Mapping of the
27 | %       	terminal state is arbitrary (for example: HOVER).
28 | 
29 | %% declare global variables
30 | global K HOVER
31 | global TERMINAL_STATE_INDEX
32 | 
33 | %% initialize variables
34 | tol = 1e-6; % error tolerance
35 | error = intmax; % initial error
36 | P_J = zeros(K, K); % inialize coefficent matrix postmultiplied by J given policy
37 | J_opt = zeros(K, 1); % initialize optimal cost with zero vector
38 | J_prev = ones(K, 1); % initialize with an infinite vector
39 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix
40 | u_opt_ind = HOVER * ones(K, 1); % initialize optimal control with Hover
41 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K]; % all index except terminal state
42 | 
43 | %% start iteration
44 | while( error > tol )
45 |     % update J_prev
46 |     J_prev = J_opt;
47 |     % compute index of elements in G that will end up in q
48 |     G_ind = sub2ind([K,5], 1:K, u_opt_ind'); % compute index from G
49 |     q = G(G_ind)'; % stage cost vector given current policy
50 |     % construct P_J matrix postmultiplied by J given current policy
51 |     for i = 1:K
52 |         P_J(i,:) = P(i,:,u_opt_ind(i));
53 |     end
54 |     % solve J_opt using linear equation solver
55 |     A = eye(K-1) - P_J(nonTerminalState,nonTerminalState); % A matrix
56 |     b = q(nonTerminalState); % b vector
57 |     J_opt(nonTerminalState) = linsolve(A,b);
58 |     % policy improvement
59 |     % construct J_opt_temp: the k-th column indicates the cost-to-go vector
60 |     % after applying control k
61 |     for k  = 1:5
62 |         J_opt_temp(:,k) = G(:,k) + P(:,:,k) * J_opt;
63 |     end
64 |     [~, u_opt_ind] = min(J_opt_temp, [], 2);
65 |     % update error
66 |     error = max(abs(J_opt - J_prev));
67 | end
68 | 
69 | end


--------------------------------------------------------------------------------
/src/ValueIteration.m:
--------------------------------------------------------------------------------
 1 | function [J_opt, u_opt_ind] = ValueIteration(P, G)
 2 | % VALUEITERATION Value iteration
 3 | % Solve a stochastic shortest path problem by Value Iteration.
 4 | %
 5 | %   [J_opt, u_opt_ind] = ValueIteration(P, G) computes the optimal cost and
 6 | %   the optimal control input for each state of the state space.
 7 | %
 8 | %   Input arguments:
 9 | %       P:
10 | %           A (K x K x L)-matrix containing the transition probabilities
11 | %           between all states in the state space for all control inputs.
12 | %           The entry P(i, j, l) represents the transition probability
13 | %           from state i to state j if control input l is applied.
14 | %       G:
15 | %           A (K x L)-matrix containing the stage costs of all states in
16 | %           the state space for all control inputs. The entry G(i, l)
17 | %           represents the cost if we are in state i and apply control
18 | %           input l.
19 | %
20 | %   Output arguments:
21 | %       J_opt:
22 | %       	A (K x 1)-matrix containing the optimal cost-to-go for each
23 | %       	element of the state space.
24 | %       u_opt_ind:
25 | %       	A (K x 1)-matrix containing the index of the optimal control
26 | %       	input for each element of the state space. Mapping of the
27 | %       	terminal state is arbitrary (for example: HOVER).
28 | 
29 | %% declare global variables
30 | global K HOVER
31 | global TERMINAL_STATE_INDEX
32 | 
33 | %% initialize variables
34 | tol = 1e-6; % error tolerance
35 | error = intmax; % initial error
36 | J_opt = zeros(K, 1); % optimal cost-to-go vector
37 | J_prev = zeros(K, 1); % previous optimal cost-to-go vector
38 | J_opt_temp = zeros(K, 5); % store temporary cost-to-go matrix
39 | u_opt_ind = HOVER * ones(K, 1); % optimal control vector
40 | nonTerminalState = [1:TERMINAL_STATE_INDEX-1 TERMINAL_STATE_INDEX+1:K];
41 | 
42 | %% start iteration
43 | while( error > tol )
44 |     % construct J_opt_temp: the k-th column indicates the cost-to-go vector after applying control k
45 |     for k  = 1:5
46 |         J_opt_temp(nonTerminalState,k) = G(nonTerminalState,k) + P(nonTerminalState,nonTerminalState,k) * J_opt(nonTerminalState);
47 |     end
48 |     % compute optimal cost-to-go and optimal control
49 |     [J_opt, u_opt_ind] = min(J_opt_temp, [], 2);
50 |     % update error
51 |     error = max(abs(J_opt - J_prev));
52 |     % store optimal cost-to-go after this iteration
53 |     J_prev = J_opt;
54 | end
55 | 
56 | end


--------------------------------------------------------------------------------
/src/exampleG.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleG.mat


--------------------------------------------------------------------------------
/src/exampleP.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleP.mat


--------------------------------------------------------------------------------
/src/exampleWorld.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YizeWang/Dynamic-Programming-and-Optimal-Control-Lecture/c3aa0a6bc3eb8e3f43060587f4a3ac5cf5357762/src/exampleWorld.mat


--------------------------------------------------------------------------------
/src/main.m:
--------------------------------------------------------------------------------
  1 | % Dynamic Programming and Optimal Control
  2 | % Fall 2019
  3 | % Programming Exercise
  4 | %
  5 | % Author: Yize Wang
  6 | % Email: yizwang@student.ethz.ch
  7 | 
  8 | %% Clear workspace and command window
  9 | clc;
 10 | clear all;
 11 | close all;
 12 | 
 13 | %% Initialize stopwatches
 14 | mainStart = tic;
 15 | 
 16 | %% Options
 17 | mapSize = [15, 20]; % [M, N]
 18 | % Set to true to generate a random map of size mapSize, else set to false
 19 | % to load the pre-exsisting example map
 20 | generateRandomWorld = true;
 21 | 
 22 | % Plotting options
 23 | global PLOT_POLICY PLOT_COST
 24 | PLOT_POLICY = true;
 25 | PLOT_COST = false;
 26 | 
 27 | %% Global problem parameters
 28 | global GAMMA R Nc P_WIND
 29 | GAMMA  = 0.2; % Shooter gamma factor
 30 | R = 2; % Shooter range
 31 | Nc = 10; % Time steps required to bring drone to base when it crashes
 32 | P_WIND = 0.1; % Gust of wind probability
 33 | 
 34 | % IDs of elements in the map matrix
 35 | global FREE TREE SHOOTER PICK_UP DROP_OFF BASE 
 36 | FREE = 0;
 37 | TREE = 1;
 38 | SHOOTER = 2;
 39 | PICK_UP = 3;
 40 | DROP_OFF = 4;
 41 | BASE = 5;
 42 | 
 43 | % Index of each action in the P and G matrices. Use this ordering
 44 | global NORTH SOUTH EAST WEST HOVER
 45 | NORTH  = 1;
 46 | SOUTH = 2;
 47 | EAST = 3;
 48 | WEST = 4;
 49 | HOVER = 5;
 50 | 
 51 | %% Generate map
 52 | startTime = tic;
 53 | % map(m,n) represents the cell at indices (m,n) according to the axes
 54 | if generateRandomWorld
 55 | 	map = GenerateWorld(mapSize(1), mapSize(2)); %#ok<*UNRCH>
 56 | else
 57 |     % load a pre-generated map
 58 |     load('exampleWorld.mat');
 59 | end
 60 | MakePlots(map);
 61 | disp("Generate Map: " + toc(startTime) + " sec");
 62 | 
 63 | %% Generate state space
 64 | startTime = tic;
 65 | % Generate a (K x 3) - matrix 'stateSpace', where each accessible cell is
 66 | % represented by two rows (with and without carrying a package).
 67 | stateSpace = [];
 68 | for m = 1 : size(map, 1)
 69 |     for n = 1 : size(map, 2)
 70 |         if map(m, n) ~= TREE
 71 |             stateSpace = [stateSpace;
 72 |                           m, n, 0;
 73 |                           m, n, 1];
 74 |         end
 75 |     end
 76 | end
 77 | 
 78 | % State space size
 79 | global K
 80 | K = size(stateSpace,1);
 81 | disp("Generate State Space: " + toc(startTime) + " sec");
 82 | 
 83 | %% Set the following to true as you progress with the files
 84 | transitionProbabilitiesImplemented = true;
 85 | stageCostsImplemented = true;
 86 | valueIterationImplemented = true; 
 87 | policyIterationImplemented = true;
 88 | linearProgrammingImplemented = true;
 89 | 
 90 | %% Compute the terminal state index
 91 | global TERMINAL_STATE_INDEX %#ok<*NUSED>
 92 | if transitionProbabilitiesImplemented
 93 |     TERMINAL_STATE_INDEX = ComputeTerminalStateIndex(stateSpace, map);
 94 | end
 95 | 
 96 | %% Compute transition probabilities
 97 | if transitionProbabilitiesImplemented
 98 |     startTime = tic;
 99 |     P = ComputeTransitionProbabilities(stateSpace, map);
100 |     disp("Compute Transition Probabilities: " + toc(startTime) + " sec");
101 | end
102 | 
103 | %% Compute stage costs
104 | if stageCostsImplemented 
105 |     startTime = tic;
106 |     G = ComputeStageCosts(stateSpace, map);
107 |     disp("Compute Stage Costs: " + toc(startTime) + " sec");
108 | end
109 | 
110 | %% Solve stochastic shortest path problem
111 | % value iteration
112 | if valueIterationImplemented
113 |     startTime = tic;
114 |     [ J_opt_vi, u_opt_ind_vi ] = ValueIteration(P, G);
115 |     
116 |     if size(J_opt_vi,1)~=K || size(u_opt_ind_vi,1)~=K
117 |         disp('[ERROR] the size of J and u must be K')
118 |     end
119 |     disp("Value Iteration: " + toc(startTime) + " sec");
120 | end
121 | 
122 | % policy iteration
123 | if policyIterationImplemented
124 |     startTime = tic;
125 |     [ J_opt_pi, u_opt_ind_pi ] = PolicyIteration(P, G);
126 |     
127 |     if size(J_opt_pi,1)~=K || size(u_opt_ind_pi,1)~=K
128 |         disp('[ERROR] the size of J and u must be K')
129 |     end
130 |     disp("Policy Iteration: " + toc(startTime) + " sec");
131 | end
132 | 
133 | % linear programming
134 | if linearProgrammingImplemented
135 |     startTime = tic;
136 |     [ J_opt_lp, u_opt_ind_lp ] = LinearProgramming(P, G);
137 |     
138 |     if size(J_opt_lp,1)~=K || size(u_opt_ind_lp,1)~=K
139 |         disp('[ERROR] the size of J and u must be K')
140 |     end
141 |     disp("Linear Programming: " + toc(startTime) + " sec");
142 | end
143 | 
144 | %% Plot results
145 | startTime = tic;
146 | if valueIterationImplemented
147 |     MakePlots(map, stateSpace, J_opt_vi, u_opt_ind_vi, 'Value iteration');
148 | end
149 | if policyIterationImplemented
150 |     MakePlots(map, stateSpace, J_opt_pi, u_opt_ind_pi, 'Policy iteration');
151 | end
152 | if linearProgrammingImplemented
153 |     MakePlots(map, stateSpace, J_opt_lp, u_opt_ind_lp, 'Linear programming');
154 | end
155 | disp("Plot results: " + toc(startTime) + " sec");
156 | 
157 | %% Terminated
158 | disp("Computation Time: " + (toc(mainStart) - toc(startTime)) + " sec");
159 | disp("Total Time: " + toc(mainStart) + " sec");
160 | 
161 | %% Compare
162 | % compute maximum error to verify algorithm implementation correctness
163 | if valueIterationImplemented && policyIterationImplemented && linearProgrammingImplemented
164 |     maxError = max(max(abs(J_opt_pi-J_opt_lp))+max(abs(J_opt_pi-J_opt_vi))+max(abs(J_opt_vi-J_opt_lp)));
165 |     if maxError < 1e-4
166 |         fprintf('\nCongradulations! Three Methods Yield Same Result! \n')
167 |     else
168 |         fprintf('\nWaring! Three Methods Yield Different Result! \n')
169 |     end
170 | end


--------------------------------------------------------------------------------