├── FA_A2C.m
├── PID Neural Net.pdf
├── PID_FA_NN.m
└── README.md


/FA_A2C.m:
--------------------------------------------------------------------------------
  1 | clc;
  2 | clear;
  3 | close all
  4 | 
  5 | global beta0 T
  6 | global sigma Amp
  7 | beta0 = 0;
  8 | T     = 0.01;
  9 | sigma = 1;
 10 | Amp   = -1;
 11 | 
 12 | dr = 0.9;    % discount rate
 13 | LR = 0.1;     % Learning Rate
 14 | x0 = [1;1];   % Initial Condition
 15 | Tf = 200;
 16 | t  = 0:T:Tf;
 17 | N = Tf/T;
 18 | 
 19 | % Initialization
 20 | % u  = zeros(1, N+1);
 21 | u  = sin(2*pi/100*t);
 22 | e1 = zeros(1, N+1);
 23 | e2 = zeros(1, N+1);
 24 | 
 25 | ym = zeros(N+1, 1);
 26 | V  = ones(N+1, 1);
 27 | x1 = x0(1)*ones(1, N+1);
 28 | x2 = x0(2)*ones(1, N+1);
 29 | y  = x2;
 30 | Outputs = x0;
 31 | 
 32 | % Neural Network structure
 33 | n  = 3;      % Number of NN box Inputs for function approximation
 34 | N1 = 10;     % Number of nourons
 35 | 
 36 | % Initialize Weights
 37 | W1_Critic = zeros(n, N1);
 38 | W1_Actor  = zeros(n, N1);
 39 | W2_Critic = zeros(N1+1,1);
 40 | W2_Actor  = zeros(N1+1,1);
 41 | 
 42 | % Main Loop
 43 | 
 44 | for k = 1:N
 45 |     
 46 |     Outputs = NonLinDynamic(Outputs, u(k));
 47 |     Outputs = Outputs + 0.01*norm(Outputs)*randn(size(Outputs));
 48 |     
 49 |     x1(k+1) = Outputs(1);
 50 |     x2(k+1) = Outputs(2);
 51 |     
 52 |     y(k+1)  = Outputs(2);
 53 |     
 54 |     % Forward net
 55 |     history_vector    = [x1(k), x2(k), u(k)].';
 56 |     [V(k+1), ym(k+1), Phi_Critic, Phi_Actor] = RL_Agent(history_vector, W1_Critic, W2_Critic, W1_Actor, W2_Actor);
 57 |     
 58 |     Sigma_V = 0.01*sigmoid(-2*V(k));
 59 |     ym(k+1) = ym(k+1) + Sigma_V*randn();
 60 |     % Get reward
 61 |     r = Reward(y(k+1), ym(k+1));
 62 |     
 63 |     % TD error
 64 |     delta_TD = r + dr* V(k+1) - V(k);
 65 |     e        = y(k+1)-ym(k+1);
 66 |     % Error  = 1/2*(delta_TD)^2
 67 |     
 68 |     % Backward Updates for Critic
 69 |     W2_Critic = W2_Critic - LR*(delta_TD)*(-1)*Phi_Critic;
 70 |     for i=1:n
 71 |         for j=1:N1
 72 |             W1_Critic(i,j) = W1_Critic(i,j) - LR*(delta_TD)*(-1)*W2_Critic(j)*Phi_Critic(j)*(1-Phi_Critic(j))*history_vector(i);
 73 |         end
 74 |     end
 75 |     % Backward Updates for Actor
 76 |     W2_Actor = W2_Actor - LR*((delta_TD)*(2*e/sigma^2*r)-e)*Phi_Actor;
 77 |     for i=1:n
 78 |         for j=1:N1
 79 |             W1_Actor(i,j) = W1_Actor(i,j) - LR*((delta_TD)*(2*e/sigma^2*r)-e)*W2_Actor(j)*Phi_Actor(j)*(1-Phi_Actor(j))*history_vector(i);
 80 |         end
 81 |     end
 82 |     Amp = Amp - LR*delta_TD*exp(-e^2/sigma^2);
 83 |     
 84 | end
 85 | %% plot Outpots
 86 | figure;
 87 | plot(t, y, 'LineWidth', 2)
 88 | hold on, grid on
 89 | plot(t, ym, 'LineWidth', 2)
 90 | xlabel('Time'), ylabel('Outputs')
 91 | legend('y', 'ym')
 92 | % axis([0 10 -5 5])
 93 | 
 94 | %% Functions
 95 | function [V, ym, Ho_Critic, Ho_Actor] = RL_Agent(X, W1_Critic, W2_Critic, W1_Actor, W2_Actor)
 96 | 
 97 |     % Critic Part
 98 |     hi_Critic = (X'*W1_Critic)';
 99 |     ho_Critic = sigmoid(hi_Critic);
100 |     Ho_Critic = [ho_Critic;1];
101 |     V         = W2_Critic'*Ho_Critic;
102 | 
103 |     % Actor Part
104 |     hi_Actor = (X'*W1_Actor)';
105 |     ho_Actor = sigmoid(hi_Actor);
106 |     Ho_Actor = [ho_Actor;1];
107 |     ym       = W2_Actor'*Ho_Actor;
108 | 
109 | end
110 | function r = Reward(y, ym)
111 |     global sigma Amp
112 |     r = Amp*exp(-(y-ym)^2/sigma^2);
113 | end
114 | function y = sigmoid(z)
115 |     y = 1./(1+exp(-z));
116 | end
117 | 
118 | function Out = NonLinDynamic(x,u)
119 |     
120 |     global T
121 |     Da = 0.72;
122 |     B = 8;
123 |     gama = 20;
124 |     beta = 0.3;
125 |     
126 |     x1 = x(1);
127 |     x2 = x(2);
128 | 
129 |     x1_prime = x1+T*(-x1 + Da*(1-x1)*exp(x2/(1+x2/gama)));
130 |     
131 |     x2_prime = x2+T*(-x2 + B*Da*(1-x1)*exp(x2/(1+x2/gama))) + beta*(u-x2);
132 | 
133 |     Out        = [x1_prime;x2_prime];
134 |     
135 | end
136 | 
137 | 


--------------------------------------------------------------------------------
/PID Neural Net.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iman-sharifi-ghb/Function-Approximation-and-Adaptive-PID-Gain-Tuning-using-Neural-Networks-and-Reinforcement-Learning/df0ef72655f4fb7d4c2c54648a0ac6c6b6ed6eb9/PID Neural Net.pdf


--------------------------------------------------------------------------------
/PID_FA_NN.m:
--------------------------------------------------------------------------------
  1 | clc;
  2 | clear;
  3 | close all
  4 | 
  5 | global beta0 T
  6 | beta0 = 0;
  7 | T     = 0.01;
  8 | 
  9 | LR = 0.1;       % Learning Rate
 10 | x0 = [1;2.5];   % Initial Condition
 11 | Tf = 100;
 12 | t  = 0:T:Tf;
 13 | N = Tf/T;
 14 | 
 15 | % Initialization
 16 | u  = zeros(1, N+1);
 17 | e1 = zeros(1, N+1);
 18 | e2 = zeros(1, N+1);
 19 | 
 20 | ym = zeros(N+1, 1);
 21 | x1 = x0(1)*ones(1, N+1);
 22 | x2 = x0(2)*ones(1, N+1);
 23 | y  = x2;
 24 | Outputs = x0;
 25 | 
 26 | yd = [2*ones(1, N/5), 1*ones(1,N/5), 0*ones(1, N/5), 1*ones(1,N/5), 2*ones(1,N/5+1)];
 27 | 
 28 | % Initial Gains and Errors
 29 | Kp = 1*ones(1, N+1);
 30 | Ki = 3*ones(1, N+1);
 31 | Kd = 0*ones(1, N+1);
 32 | 
 33 | ep = 0;
 34 | ei = 0;
 35 | ed = 0;
 36 | 
 37 | % Neural Network structure
 38 | n  = 3;      % Number of NN box Inputs for function approximation
 39 | N1 = 10;     % Number of nourons
 40 | 
 41 | % Initialize Weights
 42 | W1 = zeros(n, N1);
 43 | W2 = zeros(N1+1, 1);
 44 | 
 45 | % Main Loop
 46 | for k = 1:N
 47 |    
 48 |     Outputs = NonLinDynamic(Outputs, u(k));
 49 |     Outputs = Outputs + 0.01*norm(Outputs)*randn(size(Outputs));
 50 |     
 51 |     x1(k+1) = Outputs(1);
 52 |     x2(k+1) = Outputs(2);
 53 |     
 54 |     y(k+1)  = Outputs(2);
 55 |     
 56 |     % Forward net
 57 |     history_vector = [x1(k), x2(k), u(k)].';
 58 |     
 59 |     [ym(k+1), Phi] = f_hat(history_vector, W1, W2);
 60 |     
 61 |     e1(k+1) = y(k+1) - ym(k+1);
 62 | %     Error  = 1/2*(e1(k+1))^2
 63 |     
 64 |     % Backward Updates
 65 |     W2 = W2 + LR*(e1(k+1))* Phi;
 66 |     for i=1:n
 67 |         for j=1:N1
 68 |             W1(i,j) = W1(i,j) + LR*(e1(k+1))*W2(j)*Phi(j)*(1-Phi(j))*history_vector(i);
 69 |         end
 70 |     end
 71 |     % Update PID Control Gains
 72 |     Dym_Du = 0;
 73 |     for j=1:N1
 74 |         dym_du = W2(j)*Phi(j)*(1-Phi(j))*W1(3,j);
 75 |         Dym_Du = Dym_Du + dym_du;
 76 |     end
 77 |     
 78 |     gradE_kp = (yd(k+1)-ym(k+1))*Dym_Du*ep;
 79 |     gradE_ki = (yd(k+1)-ym(k+1))*Dym_Du*ei;
 80 |     gradE_kd = (yd(k+1)-ym(k+1))*Dym_Du*ed;
 81 |     
 82 |     % Update Control Commands
 83 |     % delta Kp,i,d(k) = -learningRate*gradient(E) + beta(t)*deltaKp,i,d(k-1)
 84 |     Kp(k+1) = Kp(k) + LR*gradE_kp - betta(k)*(Kp(k)-Kp(k));
 85 |     Ki(k+1) = Ki(k) + LR*gradE_ki - betta(k)*(Ki(k)-Ki(k));
 86 | %     Kd(k+1) = Kd(k) - LR*gradE_kd;% - betta(k)*(Kd(k)-Kd(k));
 87 |     
 88 |     % Update u(t)
 89 |     e2(k+1) = yd(k+1) - y(k+1);
 90 |     ep      = e2(k+1) - e2(k);
 91 |     ei      = T/2*(e2(k+1) + e2(k));
 92 |     if k > 1
 93 |         ed  = 1/T*(e2(k+1)-2*e2(k)+e2(k-1));
 94 |     end
 95 |     u(k+1)  = u(k) + Kp(k+1)*ep + Ki(k+1)*ei + Kd(k+1)*ed;
 96 |     
 97 | end
 98 | %% plot Outpots
 99 | figure;
100 | plot(t, yd, '--g', 'LineWidth', 1.5)
101 | hold on, grid on
102 | plot(t, y, 'LineWidth', 2)
103 | hold on, 
104 | plot(t, ym, 'LineWidth', 2)
105 | xlabel('Time'), ylabel('Outputs')
106 | legend('desired', 'y', 'ym')
107 | % axis([0 10 -5 5])
108 | 
109 | figure;
110 | plot(t, Kp, 'LineWidth', 2)
111 | hold on
112 | plot(t, Ki, 'LineWidth', 2)
113 | hold on
114 | plot(t, Kd, 'g', 'LineWidth', 2)
115 | xlabel('Time'), ylabel('Amp')
116 | title('PID Gain Tuning')
117 | legend('K_p', 'K_i', 'K_d')
118 | 
119 | %% Functions
120 | function [ym, Phi] = f_hat(history_vector, W1, W2)
121 | 
122 |     X   = history_vector; % Inputs must be a n*1 vector
123 |     N   = length(W2) - 1;
124 |     phi = zeros(N,1);
125 |     for i=1:N
126 |         phi(i) = 1/(1+exp(-X.'*W1(:,i)));
127 |     end
128 |     Phi = [phi; 1];
129 |     ym  = W2.'*Phi;       % W2 must be a (N+1)*1 vector because of bias
130 |     
131 | end
132 | 
133 | function out = betta(k)
134 |     global beta0 T
135 |     b = 1;
136 |     out = beta0*exp(-b*k*T);
137 | end
138 | 
139 | function Out = NonLinDynamic(x,u)
140 |     
141 |     global T
142 |     Da = 0.72;
143 |     B = 8;
144 |     gama = 20;
145 |     beta = 0.3;
146 |     
147 |     x1 = x(1);
148 |     x2 = x(2);
149 | 
150 |     x1_prime = x1+T*(-x1 + Da*(1-x1)*exp(x2/(1+x2/gama)));
151 |     
152 |     x2_prime = x2+T*(-x2 + B*Da*(1-x1)*exp(x2/(1+x2/gama))) + beta*(u-x2);
153 | 
154 |     Out        = [x1_prime;x2_prime];
155 |     
156 | end
157 | 
158 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Function-Approximation-and-Adaptive-PID-Gain-Tuning-using-Neural-Networks-and-Actor-Critic-algorithm
 2 | 
 3 | **System Identification and Self-Tuning PID Control using NN and reinforcement learning**
 4 | 
 5 | In this project, we will aim to tune the PID controller gains adaptively using Actor-Critic method with the radial basis or guassian kernels. We assume we don't have an accurate model of the system and that is why we take the advantage of Neural Networks to estimate the dynamical model of the system and then use the achieved model to find the best PID gains using Actor-Critic Reinforcement Learning method.
 6 | 
 7 | ## Notes
 8 | 
 9 | `PID_FA_NN.m` : 
10 | 
11 | This file is Fucntion approximation using Neural Networks with Adaptive PID Gains. You can read the attached `PID Neural Networks.pdf` file for learning the algorithm and structures.
12 | 
13 | ![PID_FA_NN](https://user-images.githubusercontent.com/60617560/129597840-e8d9f399-4de6-4a1a-8218-b4fd27fd5570.png)
14 | 
15 | ![PID_gains](https://user-images.githubusercontent.com/60617560/129597930-453bcfa4-9962-4000-905a-179b3a898e61.png)
16 | 
17 | `FA_A2C.m` :
18 | 
19 | Function Approximation using Actor-Critic Algorithm
20 | 
21 | ![FA_A2C](https://user-images.githubusercontent.com/60617560/129596768-e3680e6c-bc19-4833-b5cb-73681c8fb1ef.png)
22 | 
23 | If you want to change the dynamic system, Please just change the `NonLinDynamic(.)` function in MATLAB files.
24 | 


--------------------------------------------------------------------------------