├── FA_A2C.m ├── PID Neural Net.pdf ├── PID_FA_NN.m └── README.md /FA_A2C.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear; 3 | close all 4 | 5 | global beta0 T 6 | global sigma Amp 7 | beta0 = 0; 8 | T = 0.01; 9 | sigma = 1; 10 | Amp = -1; 11 | 12 | dr = 0.9; % discount rate 13 | LR = 0.1; % Learning Rate 14 | x0 = [1;1]; % Initial Condition 15 | Tf = 200; 16 | t = 0:T:Tf; 17 | N = Tf/T; 18 | 19 | % Initialization 20 | % u = zeros(1, N+1); 21 | u = sin(2*pi/100*t); 22 | e1 = zeros(1, N+1); 23 | e2 = zeros(1, N+1); 24 | 25 | ym = zeros(N+1, 1); 26 | V = ones(N+1, 1); 27 | x1 = x0(1)*ones(1, N+1); 28 | x2 = x0(2)*ones(1, N+1); 29 | y = x2; 30 | Outputs = x0; 31 | 32 | % Neural Network structure 33 | n = 3; % Number of NN box Inputs for function approximation 34 | N1 = 10; % Number of nourons 35 | 36 | % Initialize Weights 37 | W1_Critic = zeros(n, N1); 38 | W1_Actor = zeros(n, N1); 39 | W2_Critic = zeros(N1+1,1); 40 | W2_Actor = zeros(N1+1,1); 41 | 42 | % Main Loop 43 | 44 | for k = 1:N 45 | 46 | Outputs = NonLinDynamic(Outputs, u(k)); 47 | Outputs = Outputs + 0.01*norm(Outputs)*randn(size(Outputs)); 48 | 49 | x1(k+1) = Outputs(1); 50 | x2(k+1) = Outputs(2); 51 | 52 | y(k+1) = Outputs(2); 53 | 54 | % Forward net 55 | history_vector = [x1(k), x2(k), u(k)].'; 56 | [V(k+1), ym(k+1), Phi_Critic, Phi_Actor] = RL_Agent(history_vector, W1_Critic, W2_Critic, W1_Actor, W2_Actor); 57 | 58 | Sigma_V = 0.01*sigmoid(-2*V(k)); 59 | ym(k+1) = ym(k+1) + Sigma_V*randn(); 60 | % Get reward 61 | r = Reward(y(k+1), ym(k+1)); 62 | 63 | % TD error 64 | delta_TD = r + dr* V(k+1) - V(k); 65 | e = y(k+1)-ym(k+1); 66 | % Error = 1/2*(delta_TD)^2 67 | 68 | % Backward Updates for Critic 69 | W2_Critic = W2_Critic - LR*(delta_TD)*(-1)*Phi_Critic; 70 | for i=1:n 71 | for j=1:N1 72 | W1_Critic(i,j) = W1_Critic(i,j) - LR*(delta_TD)*(-1)*W2_Critic(j)*Phi_Critic(j)*(1-Phi_Critic(j))*history_vector(i); 73 | end 74 | end 75 | % Backward Updates for Actor 76 | W2_Actor = W2_Actor - LR*((delta_TD)*(2*e/sigma^2*r)-e)*Phi_Actor; 77 | for i=1:n 78 | for j=1:N1 79 | W1_Actor(i,j) = W1_Actor(i,j) - LR*((delta_TD)*(2*e/sigma^2*r)-e)*W2_Actor(j)*Phi_Actor(j)*(1-Phi_Actor(j))*history_vector(i); 80 | end 81 | end 82 | Amp = Amp - LR*delta_TD*exp(-e^2/sigma^2); 83 | 84 | end 85 | %% plot Outpots 86 | figure; 87 | plot(t, y, 'LineWidth', 2) 88 | hold on, grid on 89 | plot(t, ym, 'LineWidth', 2) 90 | xlabel('Time'), ylabel('Outputs') 91 | legend('y', 'ym') 92 | % axis([0 10 -5 5]) 93 | 94 | %% Functions 95 | function [V, ym, Ho_Critic, Ho_Actor] = RL_Agent(X, W1_Critic, W2_Critic, W1_Actor, W2_Actor) 96 | 97 | % Critic Part 98 | hi_Critic = (X'*W1_Critic)'; 99 | ho_Critic = sigmoid(hi_Critic); 100 | Ho_Critic = [ho_Critic;1]; 101 | V = W2_Critic'*Ho_Critic; 102 | 103 | % Actor Part 104 | hi_Actor = (X'*W1_Actor)'; 105 | ho_Actor = sigmoid(hi_Actor); 106 | Ho_Actor = [ho_Actor;1]; 107 | ym = W2_Actor'*Ho_Actor; 108 | 109 | end 110 | function r = Reward(y, ym) 111 | global sigma Amp 112 | r = Amp*exp(-(y-ym)^2/sigma^2); 113 | end 114 | function y = sigmoid(z) 115 | y = 1./(1+exp(-z)); 116 | end 117 | 118 | function Out = NonLinDynamic(x,u) 119 | 120 | global T 121 | Da = 0.72; 122 | B = 8; 123 | gama = 20; 124 | beta = 0.3; 125 | 126 | x1 = x(1); 127 | x2 = x(2); 128 | 129 | x1_prime = x1+T*(-x1 + Da*(1-x1)*exp(x2/(1+x2/gama))); 130 | 131 | x2_prime = x2+T*(-x2 + B*Da*(1-x1)*exp(x2/(1+x2/gama))) + beta*(u-x2); 132 | 133 | Out = [x1_prime;x2_prime]; 134 | 135 | end 136 | 137 | -------------------------------------------------------------------------------- /PID Neural Net.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iman-sharifi-ghb/Function-Approximation-and-Adaptive-PID-Gain-Tuning-using-Neural-Networks-and-Reinforcement-Learning/df0ef72655f4fb7d4c2c54648a0ac6c6b6ed6eb9/PID Neural Net.pdf -------------------------------------------------------------------------------- /PID_FA_NN.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear; 3 | close all 4 | 5 | global beta0 T 6 | beta0 = 0; 7 | T = 0.01; 8 | 9 | LR = 0.1; % Learning Rate 10 | x0 = [1;2.5]; % Initial Condition 11 | Tf = 100; 12 | t = 0:T:Tf; 13 | N = Tf/T; 14 | 15 | % Initialization 16 | u = zeros(1, N+1); 17 | e1 = zeros(1, N+1); 18 | e2 = zeros(1, N+1); 19 | 20 | ym = zeros(N+1, 1); 21 | x1 = x0(1)*ones(1, N+1); 22 | x2 = x0(2)*ones(1, N+1); 23 | y = x2; 24 | Outputs = x0; 25 | 26 | yd = [2*ones(1, N/5), 1*ones(1,N/5), 0*ones(1, N/5), 1*ones(1,N/5), 2*ones(1,N/5+1)]; 27 | 28 | % Initial Gains and Errors 29 | Kp = 1*ones(1, N+1); 30 | Ki = 3*ones(1, N+1); 31 | Kd = 0*ones(1, N+1); 32 | 33 | ep = 0; 34 | ei = 0; 35 | ed = 0; 36 | 37 | % Neural Network structure 38 | n = 3; % Number of NN box Inputs for function approximation 39 | N1 = 10; % Number of nourons 40 | 41 | % Initialize Weights 42 | W1 = zeros(n, N1); 43 | W2 = zeros(N1+1, 1); 44 | 45 | % Main Loop 46 | for k = 1:N 47 | 48 | Outputs = NonLinDynamic(Outputs, u(k)); 49 | Outputs = Outputs + 0.01*norm(Outputs)*randn(size(Outputs)); 50 | 51 | x1(k+1) = Outputs(1); 52 | x2(k+1) = Outputs(2); 53 | 54 | y(k+1) = Outputs(2); 55 | 56 | % Forward net 57 | history_vector = [x1(k), x2(k), u(k)].'; 58 | 59 | [ym(k+1), Phi] = f_hat(history_vector, W1, W2); 60 | 61 | e1(k+1) = y(k+1) - ym(k+1); 62 | % Error = 1/2*(e1(k+1))^2 63 | 64 | % Backward Updates 65 | W2 = W2 + LR*(e1(k+1))* Phi; 66 | for i=1:n 67 | for j=1:N1 68 | W1(i,j) = W1(i,j) + LR*(e1(k+1))*W2(j)*Phi(j)*(1-Phi(j))*history_vector(i); 69 | end 70 | end 71 | % Update PID Control Gains 72 | Dym_Du = 0; 73 | for j=1:N1 74 | dym_du = W2(j)*Phi(j)*(1-Phi(j))*W1(3,j); 75 | Dym_Du = Dym_Du + dym_du; 76 | end 77 | 78 | gradE_kp = (yd(k+1)-ym(k+1))*Dym_Du*ep; 79 | gradE_ki = (yd(k+1)-ym(k+1))*Dym_Du*ei; 80 | gradE_kd = (yd(k+1)-ym(k+1))*Dym_Du*ed; 81 | 82 | % Update Control Commands 83 | % delta Kp,i,d(k) = -learningRate*gradient(E) + beta(t)*deltaKp,i,d(k-1) 84 | Kp(k+1) = Kp(k) + LR*gradE_kp - betta(k)*(Kp(k)-Kp(k)); 85 | Ki(k+1) = Ki(k) + LR*gradE_ki - betta(k)*(Ki(k)-Ki(k)); 86 | % Kd(k+1) = Kd(k) - LR*gradE_kd;% - betta(k)*(Kd(k)-Kd(k)); 87 | 88 | % Update u(t) 89 | e2(k+1) = yd(k+1) - y(k+1); 90 | ep = e2(k+1) - e2(k); 91 | ei = T/2*(e2(k+1) + e2(k)); 92 | if k > 1 93 | ed = 1/T*(e2(k+1)-2*e2(k)+e2(k-1)); 94 | end 95 | u(k+1) = u(k) + Kp(k+1)*ep + Ki(k+1)*ei + Kd(k+1)*ed; 96 | 97 | end 98 | %% plot Outpots 99 | figure; 100 | plot(t, yd, '--g', 'LineWidth', 1.5) 101 | hold on, grid on 102 | plot(t, y, 'LineWidth', 2) 103 | hold on, 104 | plot(t, ym, 'LineWidth', 2) 105 | xlabel('Time'), ylabel('Outputs') 106 | legend('desired', 'y', 'ym') 107 | % axis([0 10 -5 5]) 108 | 109 | figure; 110 | plot(t, Kp, 'LineWidth', 2) 111 | hold on 112 | plot(t, Ki, 'LineWidth', 2) 113 | hold on 114 | plot(t, Kd, 'g', 'LineWidth', 2) 115 | xlabel('Time'), ylabel('Amp') 116 | title('PID Gain Tuning') 117 | legend('K_p', 'K_i', 'K_d') 118 | 119 | %% Functions 120 | function [ym, Phi] = f_hat(history_vector, W1, W2) 121 | 122 | X = history_vector; % Inputs must be a n*1 vector 123 | N = length(W2) - 1; 124 | phi = zeros(N,1); 125 | for i=1:N 126 | phi(i) = 1/(1+exp(-X.'*W1(:,i))); 127 | end 128 | Phi = [phi; 1]; 129 | ym = W2.'*Phi; % W2 must be a (N+1)*1 vector because of bias 130 | 131 | end 132 | 133 | function out = betta(k) 134 | global beta0 T 135 | b = 1; 136 | out = beta0*exp(-b*k*T); 137 | end 138 | 139 | function Out = NonLinDynamic(x,u) 140 | 141 | global T 142 | Da = 0.72; 143 | B = 8; 144 | gama = 20; 145 | beta = 0.3; 146 | 147 | x1 = x(1); 148 | x2 = x(2); 149 | 150 | x1_prime = x1+T*(-x1 + Da*(1-x1)*exp(x2/(1+x2/gama))); 151 | 152 | x2_prime = x2+T*(-x2 + B*Da*(1-x1)*exp(x2/(1+x2/gama))) + beta*(u-x2); 153 | 154 | Out = [x1_prime;x2_prime]; 155 | 156 | end 157 | 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Function-Approximation-and-Adaptive-PID-Gain-Tuning-using-Neural-Networks-and-Actor-Critic-algorithm 2 | 3 | **System Identification and Self-Tuning PID Control using NN and reinforcement learning** 4 | 5 | In this project, we will aim to tune the PID controller gains adaptively using Actor-Critic method with the radial basis or guassian kernels. We assume we don't have an accurate model of the system and that is why we take the advantage of Neural Networks to estimate the dynamical model of the system and then use the achieved model to find the best PID gains using Actor-Critic Reinforcement Learning method. 6 | 7 | ## Notes 8 | 9 | `PID_FA_NN.m` : 10 | 11 | This file is Fucntion approximation using Neural Networks with Adaptive PID Gains. You can read the attached `PID Neural Networks.pdf` file for learning the algorithm and structures. 12 | 13 | ![PID_FA_NN](https://user-images.githubusercontent.com/60617560/129597840-e8d9f399-4de6-4a1a-8218-b4fd27fd5570.png) 14 | 15 | ![PID_gains](https://user-images.githubusercontent.com/60617560/129597930-453bcfa4-9962-4000-905a-179b3a898e61.png) 16 | 17 | `FA_A2C.m` : 18 | 19 | Function Approximation using Actor-Critic Algorithm 20 | 21 | ![FA_A2C](https://user-images.githubusercontent.com/60617560/129596768-e3680e6c-bc19-4833-b5cb-73681c8fb1ef.png) 22 | 23 | If you want to change the dynamic system, Please just change the `NonLinDynamic(.)` function in MATLAB files. 24 | --------------------------------------------------------------------------------