├── DDPG_Generalized_Training_Code.m ├── PPO_Generalized_Training_Code.m ├── README.md ├── SAC_Generalized_Training_Code.m ├── TD3_Generalized_Training_Code.m └── TRPO_Generalized_Training_Code.m /DDPG_Generalized_Training_Code.m: -------------------------------------------------------------------------------- 1 | %% DDPG Model Network and Code 2 | %% Flags / Settings 3 | parallelComputing_flag = 0; % Whether use Parallel computing 4 | load_Saved_Agent_flag = 0; 5 | %% Load Saved Agent 6 | if load_Saved_Agent_flag == 1 7 | savedAgent_dir = 'saved_Agents01'; 8 | listing = dir(fullfile(savedAgent_dir, '*.mat')); 9 | for i = 1:length(listing) 10 | temp_String= string(listing(i).name); 11 | temp_String = extractAfter(temp_String,5); 12 | temp_String = extractBefore(temp_String,'.mat'); 13 | agent_names(i,1) = str2num(temp_String); 14 | 15 | end 16 | sorted_agent_names = sort(agent_names,'ascend'); 17 | last_Agent = sorted_agent_names(end); 18 | agent_Name = append('\Agent',num2str(last_Agent), '.mat'); 19 | load([savedAgent_dir agent_Name]); 20 | [ep_reward ep_no] = max(savedAgentResult.EpisodeReward); 21 | load([savedAgent_dir append('\Agent', num2str(ep_no), '.mat')]); 22 | plot(savedAgentResult) 23 | end 24 | %% Model Intialization 25 | mdl = 'Your Simulink Mdl Name'; 26 | open_system(mdl); % opens system model 27 | agentblk = [mdl 'Location/To/RL Agent Block']; % Replace with the location to RL Agent Block in Simulink 28 | 29 | %% Sample Time & Simulation Duration 30 | T_Sample = 0.1; % Replace it with your own sample time 31 | T_Total = 205; % Replace it with with your own Total Simulation Time 32 | set_param(mdl,'StartTime','0','StopTime',int2str(T_Total)); % Set Start and Stop Time in Simulink 33 | 34 | %% Observation Info 35 | numObs = 5; % Enter Observation Number 36 | obsInfo = rlNumericSpec([numObs 1],'LowerLimit',(-inf*zeros(numObs,1)),'UpperLimit',inf*zeros(numObs,1)); % Upper and Lower Limit of +- inf 37 | obsInfo.Name = 'Observations'; 38 | obsInfo.Description = 'This is the description about observation info'; 39 | numOfObservations = obsInfo.Dimension(1); 40 | 41 | %% Action Info 42 | 43 | Act1_Min = -5; 44 | Act1_Max = 5; 45 | 46 | % theta Controller 47 | Act2_Min = -0.04; 48 | Act2_Max = 0.04; 49 | 50 | % Action Object 51 | numAct = 1; % Number of Actions 52 | actInfo = rlNumericSpec([numAct 1],'LowerLimit',Act1_Min*zeros(numAct,1),'UpperLimit' ,Act2_Min*zeros(numAct,1)); 53 | actInfo.Name = 'Action'; 54 | numActions = actInfo.Dimension(1); 55 | 56 | %% Create Environment 57 | %rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, 58 | %actInfo,'UseFastRestart','on'); % if you want to use Fast Restart 59 | rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, actInfo); % Creates Environment 60 | %% Environment Reset Function 61 | % To define the initial condition for the certain variable, specify an environment reset function using an anonymous function handle. 62 | % The reset function localResetFcn, which is defined at the end of the example. 63 | rl_env.ResetFcn = @(in)localResetFcn(in); 64 | % Fix the random generator seed for reproducibility. 65 | rng('default') 66 | %% Create Agent DDPG 67 | % A DDPG agent approximates the long-term reward given observations and actions using a critic value function representation. 68 | % To create the critic, first create a deep neural network with two inputs, the state and action, and one output. 69 | % For more information on creating a neural network value function representation, see Create Policy and Value Function Representations. 70 | nI = obsInfo.Dimension(1); % number of inputs 71 | nL = 128; % number of neurons 72 | nO = actInfo.Dimension(1); % number of outputs 73 | 74 | statePath = [ 75 | featureInputLayer(nI,'Normalization','none','Name','observation') 76 | fullyConnectedLayer(nL,'Name','fc1') 77 | reluLayer('Name','relu1') 78 | fullyConnectedLayer(nL,'Name','fc2') 79 | additionLayer(2,'Name','add') 80 | reluLayer('Name','relu2') 81 | fullyConnectedLayer(nL,'Name','fc3') 82 | reluLayer('Name','relu3') 83 | fullyConnectedLayer(1,'Name','fc4')]; 84 | 85 | actionPath = [ 86 | featureInputLayer(nO,'Normalization','none','Name','action') 87 | fullyConnectedLayer(nL, 'Name', 'fc5')]; 88 | 89 | %% Critic Netwrok 90 | criticNetwork = layerGraph(statePath); 91 | criticNetwork = addLayers(criticNetwork, actionPath); 92 | 93 | criticNetwork = connectLayers(criticNetwork,'fc5','add/in2'); 94 | 95 | % Specify options for the critic representation using rlRepresentationOptions. 96 | criticOptions = rlRepresentationOptions('Optimizer','adam','LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',2e-4,'UseDevice',"gpu"); %Use GPU for Training 97 | 98 | % Create the critic representation using the specified neural network and options. 99 | % You must also specify the action and observation info for the critic, which you obtain from the environment interface. 100 | % For more information, see rlQValueRepresentation. 101 | critic = rlQValueRepresentation(criticNetwork,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'},criticOptions); 102 | 103 | % Design, visualize, and train deep learning networks 104 | %deepNetworkDesigner(criticNetwork); 105 | % View the critic network configuration. 106 | figure('Name','Critic Network'); 107 | plot(criticNetwork); 108 | 109 | %% Actor Netwrok 110 | % A DDPG agent decides which action to take given observations by using an actor representation. 111 | % To create the actor, first create a deep neural network with one input, the observation, and one output, the action. 112 | % Construct the actor similarly to the critic. For more information, see rlDeterministicActorRepresentation. 113 | actorNetwork = [ 114 | featureInputLayer(nI,'Normalization','none','Name','observation') 115 | fullyConnectedLayer(nL,'Name','fc1') 116 | reluLayer('Name','relu1') 117 | fullyConnectedLayer(nL,'Name','fc2') 118 | reluLayer('Name','relu2') 119 | fullyConnectedLayer(nL,'Name','fc3') 120 | reluLayer('Name','relu3') 121 | fullyConnectedLayer(1,'Name','fc4') 122 | tanhLayer('Name','tanh1') 123 | scalingLayer('Name','ActorScaling1','Scale',Act1_Max,'Bias',-0.5)]; 124 | 125 | actorOptions = rlRepresentationOptions('Optimizer','adam','LearnRate',1e-4,'GradientThreshold',1,'L2RegularizationFactor',1e-5,'UseDevice',"gpu"); %Use GPU for Training 126 | 127 | actor = rlDeterministicActorRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'observation'},'Action',{'ActorScaling1'},actorOptions); 128 | 129 | % Design, visualize, and train deep learning networks 130 | %deepNetworkDesigner(actorNetwork); 131 | % View the actor network configuration. 132 | figure('Name','Actor Network'); 133 | plot(layerGraph(actorNetwork)); 134 | 135 | %% Agent Options 136 | % To create the DDPG agent, first specify the DDPG agent options using rlDDPGAgentOptions. 137 | agentOptions = rlDDPGAgentOptions(... 138 | 'SampleTime',T_Sample,... 139 | 'TargetSmoothFactor',1e-3,... 140 | 'SaveExperienceBufferWithAgent',true, ... 141 | 'ExperienceBufferLength',1e8,... 142 | 'DiscountFactor',0.99,... 143 | 'MiniBatchSize',64); 144 | agentOptions.NoiseOptions.Variance = 0.3; 145 | agentOptions.NoiseOptions.VarianceDecayRate = 1e-5; 146 | agentOptions.ResetExperienceBufferBeforeTraining = false; 147 | agentOptions.SaveExperienceBufferWithAgent = true; 148 | 149 | % Then, create the DDPG agent using the specified actor representation, critic representation, and agent options. 150 | % For more information, see rlDDPGAgent. 151 | agent = rlDDPGAgent(actor,critic,agentOptions); 152 | 153 | %% Specify Training Options and Train Agent 154 | % For this example, the training options for the DDPG and TD3 agents are the same. 155 | % Run each training session for 5000 episodes with each episode lasting at most maxSteps time steps. 156 | % Display the training progress in the Episode Manager dialog box (set the Plots option) and disable the command line display (set the Verbose option). 157 | % Terminate the training only when it reaches the maximum number of episodes (maxEpisodes). Doing so allows the comparison of the learning curves for multiple agents over the entire training session. 158 | maxEpisodes = 1000000; 159 | maxSteps = floor(T_Total/T_Sample); 160 | trainingOptions = rlTrainingOptions(... 161 | 'MaxEpisodes',maxEpisodes,... 162 | 'MaxStepsPerEpisode',maxSteps,... 163 | 'ScoreAveragingWindowLength',100,... 164 | 'Verbose',true,... 165 | 'Plots','training-progress',... 166 | 'StopTrainingCriteria','EpisodeCount',... 167 | 'StopTrainingValue',maxEpisodes,... 168 | 'SaveAgentCriteria','EpisodeFrequency',... 169 | 'SaveAgentValue',100, ... 170 | 'SaveAgentDirectory','saved_Agents01' ... 171 | ); % saves every 100th episode for DDPG Agent 172 | 173 | % To train the agent in parallel, specify the following training options. 174 | % Training in parallel requires Parallel Computing Toolbox™. 175 | % If you do not have Parallel Computing Toolbox software installed, set UseParallel to false. 176 | % Set the UseParallel option to true. 177 | % Train the agent in parallel asynchronously. 178 | % After every 32 steps, have each worker send experiences to the parallel pool client (the MATLAB® process which starts the training). 179 | % DDPG and TD3 agents require workers to send experiences to the client. 180 | if parallelComputing_flag==1 181 | save_system(mdl); 182 | num_cores = feature('numcores'); % Get number of CPU Cores 183 | parpool(floor(num_cores*.75)); % Use 75% fo Available Cores 184 | trainingOptions.UseParallel = true; 185 | trainingOptions.ParallelizationOptions.Mode = 'async'; 186 | trainingOptions.ParallelizationOptions.StepsUntilDataIsSent = 32; 187 | trainingOptions.ParallelizationOptions.DataToSendFromWorkers = 'Experiences'; 188 | end 189 | 190 | %% Train the agent. 191 | trainingStats = train(agent,rl_env,trainingOptions) 192 | 193 | %% Simulate DDPG Agent 194 | % To validate the performance of the trained agent, simulate the agent within the Simulink environment by uncommenting the following commands. 195 | % For more information on agent simulation, see rlSimulationOptions and sim. 196 | 197 | simOptions = rlSimulationOptions('MaxSteps',maxsteps); 198 | experience = sim(env,agent,simOptions); 199 | 200 | %% Reset Function Definition 201 | function in = localResetFcn(in) 202 | mdl = 'Your Simulink Model Name'; 203 | in =Simulink.SimulationInput(mdl); 204 | 205 | % LOGIC TO INITIALIZE A VARIABLE HERE 206 | % alt = answer; 207 | 208 | %change value in model worspace 209 | mdlWks = get_param(mdl,'ModelWorkspace'); 210 | assignin(mdlWks,'variable name',alt) % assigns value to Base Workspace of Model 211 | end 212 | 213 | %% CopyRights 214 | % Everything is designed with the Help of Mathworks and its documentation. 215 | % Talha Bin Riaz -------------------------------------------------------------------------------- /PPO_Generalized_Training_Code.m: -------------------------------------------------------------------------------- 1 | %% PPO Model Network and Code 2 | %% Flags / Settings 3 | parallelComputing_flag = 0; % Whether use Parallel computing 4 | load_Saved_Agent_flag = 0; 5 | %% Load Saved Agent 6 | if load_Saved_Agent_flag == 1 7 | savedAgent_dir = 'saved_Agents01'; 8 | listing = dir(fullfile(savedAgent_dir, '*.mat')); 9 | for i = 1:length(listing) 10 | temp_String= string(listing(i).name); 11 | temp_String = extractAfter(temp_String,5); 12 | temp_String = extractBefore(temp_String,'.mat'); 13 | agent_names(i,1) = str2num(temp_String); 14 | 15 | end 16 | sorted_agent_names = sort(agent_names,'ascend'); 17 | last_Agent = sorted_agent_names(end); 18 | agent_Name = append('\Agent',num2str(last_Agent), '.mat'); 19 | load([savedAgent_dir agent_Name]); 20 | [ep_reward ep_no] = max(savedAgentResult.EpisodeReward); 21 | load([savedAgent_dir append('\Agent', num2str(ep_no), '.mat')]); 22 | plot(savedAgentResult) 23 | end 24 | %% Model Intialization 25 | mdl = 'Your Simulink Mdl Name'; 26 | open_system(mdl); % opens system model 27 | agentblk = [mdl 'Location/To/RL Agent Block']; % Replace with the location to RL Agent Block in Simulink 28 | 29 | %% Sample Time & Simulation Duration 30 | T_Sample = 0.1; % Replace it with your own sample time 31 | T_Total = 205; % Replace it with with your own Total Simulation Time 32 | set_param(mdl,'StartTime','0','StopTime',int2str(T_Total)); % Set Start and Stop Time in Simulink 33 | 34 | %% Observation Info 35 | numObs = 5; % Enter Observation Number 36 | obsInfo = rlNumericSpec([numObs 1],'LowerLimit',(-inf*zeros(numObs,1)),'UpperLimit',inf*zeros(numObs,1)); % Upper and Lower Limit of +- inf 37 | obsInfo.Name = 'Observations'; 38 | obsInfo.Description = 'This is the description about observation info'; 39 | numOfObservations = obsInfo.Dimension(1); 40 | 41 | %% Action Info 42 | 43 | Act1_Min = -5; 44 | Act1_Max = 5; 45 | 46 | % theta Controller 47 | Act2_Min = -0.04; 48 | Act2_Max = 0.04; 49 | 50 | % Action Object 51 | numAct = 1; % Number of Actions 52 | actInfo = rlNumericSpec([numAct 1],'LowerLimit',Act1_Min*zeros(numAct,1),'UpperLimit' ,Act2_Min*zeros(numAct,1)); 53 | actInfo.Name = 'Action'; 54 | numActions = actInfo.Dimension(1); 55 | 56 | %% Create Environment 57 | %rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, 58 | %actInfo,'UseFastRestart','on'); % if you want to use Fast Restart 59 | rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, actInfo); % Creates Environment 60 | %% Environment Reset Function 61 | % To define the initial condition for the certain variable, specify an environment reset function using an anonymous function handle. 62 | % The reset function localResetFcn, which is defined at the end of the example. 63 | rl_env.ResetFcn = @(in)localResetFcn(in); 64 | % Fix the random generator seed for reproducibility. 65 | rng('default') 66 | %% Create Agent PPO 67 | % A PPO agent approximates the long-term reward given observations and actions using a critic value function representation. 68 | % To create the critic, first create a deep neural network with two inputs, the state and action, and one output. 69 | % For more information on creating a neural network value function representation, see Create Policy and Value Function Representations. 70 | nI = obsInfo.Dimension(1); % number of inputs 71 | nL = 128; % number of neurons 72 | nO = actInfo.Dimension(1); % number of outputs 73 | 74 | criticNetwork = [ 75 | featureInputLayer(nI,'Normalization','none','Name','observation') 76 | fullyConnectedLayer(nL,'Name','fc1') 77 | reluLayer('Name','relu1') 78 | fullyConnectedLayer(nL,'Name','fc2') 79 | reluLayer('Name','relu2') 80 | fullyConnectedLayer(nL,'Name','fc3') 81 | reluLayer('Name','relu3') 82 | fullyConnectedLayer(1,'Name','fc4')]; 83 | 84 | %% Critic Netwrok 85 | 86 | criticNetwork = dlnetwork(criticNetwork); 87 | 88 | % Specify options for the critic representation using rlOptimizerOptions. 89 | criticOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',2e-4); %Use GPU for Training 90 | 91 | % Create the critic representation using the specified neural network and options. 92 | % You must also specify the action and observation info for the critic, which you obtain from the environment interface. 93 | % For more information, see rlQValueRepresentation. 94 | critic = rlValueFunction(criticNetwork,obsInfo,'Observation',{'observation'},'UseDevice',"gpu"); 95 | 96 | % Design, visualize, and train deep learning networks 97 | % View the critic network configuration. 98 | figure('Name','Critic Network'); 99 | plot(criticNetwork); 100 | 101 | %% Actor Netwrok 102 | % A PPO agent decides which action to take given observations by using an actor representation. 103 | % To create the actor, first create a deep neural network with one input, the observation, and one output, the action. 104 | % Construct the actor similarly to the critic. For more information, see rlContinuousGaussianActor. 105 | 106 | 107 | commonPath = [ 108 | featureInputLayer(nI,'Normalization','none','Name','comPathIn') 109 | fullyConnectedLayer(nL,'Name','fc1_c') 110 | reluLayer('Name','relu1_c') 111 | fullyConnectedLayer(nL,'Name','fc2_c') 112 | reluLayer('Name','comPathOut') 113 | ]; 114 | 115 | meanPath = [ 116 | fullyConnectedLayer(1,'Name','meanPathIn') 117 | tanhLayer('Name','tanh1_m') 118 | scalingLayer('Name','meanPathOut','Scale',Act1_Max,'Bias',-0.5) 119 | ]; 120 | 121 | sdevPath = [ 122 | fullyConnectedLayer(1,'Name','stdPathIn') 123 | softplusLayer('Name','stdPathOut') 124 | ]; 125 | 126 | actorNetwork = layerGraph(commonPath); 127 | actorNetwork = addLayers(actorNetwork,meanPath); 128 | actorNetwork = addLayers(actorNetwork,sdevPath); 129 | 130 | actorNetwork = connectLayers(actorNetwork,"comPathOut","meanPathIn/in"); 131 | actorNetwork = connectLayers(actorNetwork,"comPathOut","stdPathIn/in"); 132 | 133 | actorOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-4,'GradientThreshold',1,'L2RegularizationFactor',1e-5); 134 | 135 | actor = rlContinuousGaussianActor(actorNetwork,obsInfo,actInfo,'ActionMeanOutputNames',{'meanPathOut'}, ... 136 | 'ActionStandardDeviationOutputNames',{'stdPathOut'},'ObservationInputNames',{'comPathIn'},'UseDevice','gpu'); %Use GPU for Training 137 | 138 | % Design, visualize, and train deep learning networks 139 | % View the actor network configuration. 140 | figure('Name','Actor Network'); 141 | plot(actorNetwork); 142 | 143 | %% Agent Options 144 | % To create the PPO agent, first specify the PPO agent options using rlPPOAgentOptions. 145 | agentOptions = rlPPOAgentOptions(... 146 | 'SampleTime',T_Sample, ... 147 | ExperienceHorizon=1024,ClipFactor=0.04, EntropyLossWeight=0.1,NumEpoch=3 148 | AdvantageEstimateMethod="gae",GAEFactor=0.5, ... 149 | DiscountFactor=0.997,ActorOptimizerOptions=actorOptions,CriticOptimizerOptions=criticOptions); 150 | 151 | % Then, create the PPO agent using the specified actor representation, critic representation, and agent options. 152 | % For more information, see rlPPOAgent. 153 | 154 | agent = rlPPOAgent(actor,critic,agentOptions); 155 | 156 | %% Specify Training Options and Train Agent 157 | % For this example, the training options for the DDPG and TD3 agents are the same. 158 | % Run each training session for 5000 episodes with each episode lasting at most maxSteps time steps. 159 | % Display the training progress in the Episode Manager dialog box (set the Plots option) and disable the command line display (set the Verbose option). 160 | % Terminate the training only when it reaches the maximum number of episodes (maxEpisodes). Doing so allows the comparison of the learning curves for multiple agents over the entire training session. 161 | maxEpisodes = 1000000; 162 | maxSteps = floor(T_Total / T_Sample); 163 | 164 | % Configure Parallelization Options 165 | parallelOptions = rl.option.ParallelTraining(... 166 | 'Mode', 'async'); % Async Parallel Training Mode 167 | 168 | % Define training options for the reinforcement learning agent 169 | trainingOptions = rlTrainingOptions(... 170 | 'MaxEpisodes', maxEpisodes, ... 171 | 'MaxStepsPerEpisode', maxSteps, ... 172 | 'ScoreAveragingWindowLength', 100, ... 173 | 'Verbose', true, ... 174 | 'Plots', 'training-progress', ... 175 | 'StopTrainingCriteria', 'EpisodeCount', ... 176 | 'StopTrainingValue', maxEpisodes, ... 177 | 'SaveAgentCriteria', 'EpisodeSteps', ... 178 | 'SaveAgentValue', 900, ... 179 | 'SaveAgentDirectory', 'savedAgents_1', ... 180 | 'UseParallel', true, ... 181 | 'ParallelizationOptions', parallelOptions); 182 | 183 | 184 | % To train the agent in parallel, specify the following training options. 185 | % Training in parallel requires Parallel Computing Toolbox™. 186 | % If you do not have Parallel Computing Toolbox software installed, set UseParallel to false. 187 | % Set the UseParallel option to true. 188 | % Train the agent in parallel asynchronously. 189 | if parallelComputing_flag==1 190 | save_system(mdl); 191 | % Set up the parallel pool using 75% of available CPU cores 192 | num_cores = feature('numcores'); 193 | parpool(floor(num_cores * 0.5)); 194 | % Ensure the GPU is selected on all workers 195 | parfevalOnAll(@() gpuDevice(1), 0); 196 | end 197 | 198 | %% Train the agent. 199 | trainingStats = train(agent,rl_env,trainingOptions) 200 | 201 | %% Simulate PPO Agent 202 | % To validate the performance of the trained agent, simulate the agent within the Simulink environment by uncommenting the following commands. 203 | % For more information on agent simulation, see rlSimulationOptions and sim. 204 | 205 | simOptions = rlSimulationOptions('MaxSteps',maxsteps); 206 | experience = sim(env,agent,simOptions); 207 | 208 | %% Reset Function Definition 209 | function in = localResetFcn(in) 210 | mdl = 'Your Simulink Model Name'; 211 | in =Simulink.SimulationInput(mdl); 212 | 213 | % LOGIC TO INITIALIZE A VARIABLE HERE 214 | % alt = answer; 215 | 216 | %change value in model worspace 217 | mdlWks = get_param(mdl,'ModelWorkspace'); 218 | assignin(mdlWks,'variable name',alt) % assigns value to Base Workspace of Model 219 | end 220 | 221 | %% CopyRights 222 | % Everything is designed with the Help of Mathworks and its documentation. 223 | % Talha Bin Riaz 224 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MATLAB_RL_Agent_Architecture 2 | Following repository contains the Architecture and its code for training in Continuous Domain RL Agents that include: Deep Deterministic Policy Gradient (DDPG), Trust Region Policy Optimization (TRPO), Proximal Policy Optimization (PPO), Soft Actor Critic (SAC) and Twin Delayed Deep Deterministic Policy Gradient (TD3). 3 | -------------------------------------------------------------------------------- /SAC_Generalized_Training_Code.m: -------------------------------------------------------------------------------- 1 | %% SAC Model Network and Code 2 | %% Flags / Settings 3 | parallelComputing_flag = 0; % Whether use Parallel computing 4 | load_Saved_Agent_flag = 0; 5 | %% Load Saved Agent 6 | if load_Saved_Agent_flag == 1 7 | savedAgent_dir = 'saved_Agents01'; 8 | listing = dir(fullfile(savedAgent_dir, '*.mat')); 9 | for i = 1:length(listing) 10 | temp_String= string(listing(i).name); 11 | temp_String = extractAfter(temp_String,5); 12 | temp_String = extractBefore(temp_String,'.mat'); 13 | agent_names(i,1) = str2num(temp_String); 14 | 15 | end 16 | sorted_agent_names = sort(agent_names,'ascend'); 17 | last_Agent = sorted_agent_names(end); 18 | agent_Name = append('\Agent',num2str(last_Agent), '.mat'); 19 | load([savedAgent_dir agent_Name]); 20 | [ep_reward ep_no] = max(savedAgentResult.EpisodeReward); 21 | load([savedAgent_dir append('\Agent', num2str(ep_no), '.mat')]); 22 | plot(savedAgentResult) 23 | end 24 | %% Model Intialization 25 | mdl = 'Your Simulink Mdl Name'; 26 | open_system(mdl); % opens system model 27 | agentblk = [mdl 'Location/To/RL Agent Block']; % Replace with the location to RL Agent Block in Simulink 28 | 29 | %% Sample Time & Simulation Duration 30 | T_Sample = 0.1; % Replace it with your own sample time 31 | T_Total = 205; % Replace it with with your own Total Simulation Time 32 | set_param(mdl,'StartTime','0','StopTime',int2str(T_Total)); % Set Start and Stop Time in Simulink 33 | 34 | %% Observation Info 35 | numObs = 5; % Enter Observation Number 36 | obsInfo = rlNumericSpec([numObs 1],'LowerLimit',(-inf*zeros(numObs,1)),'UpperLimit',inf*zeros(numObs,1)); % Upper and Lower Limit of +- inf 37 | obsInfo.Name = 'Observations'; 38 | obsInfo.Description = 'This is the description about observation info'; 39 | numOfObservations = obsInfo.Dimension(1); 40 | 41 | %% Action Info 42 | 43 | Act1_Min = -5; 44 | Act1_Max = 5; 45 | 46 | % theta Controller 47 | Act2_Min = -0.04; 48 | Act2_Max = 0.04; 49 | 50 | % Action Object 51 | numAct = 1; % Number of Actions 52 | actInfo = rlNumericSpec([numAct 1],'LowerLimit',Act1_Min*zeros(numAct,1),'UpperLimit' ,Act2_Min*zeros(numAct,1)); 53 | actInfo.Name = 'Action'; 54 | numActions = actInfo.Dimension(1); 55 | 56 | %% Create Environment 57 | %rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, 58 | %actInfo,'UseFastRestart','on'); % if you want to use Fast Restart 59 | rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, actInfo); % Creates Environment 60 | %% Environment Reset Function 61 | % To define the initial condition for the certain variable, specify an environment reset function using an anonymous function handle. 62 | % The reset function localResetFcn, which is defined at the end of the example. 63 | rl_env.ResetFcn = @(in)localResetFcn(in); 64 | % Fix the random generator seed for reproducibility. 65 | rng('default') 66 | %% Create Agent SAC 67 | % A SAC agent approximates the long-term reward given observations and actions using a critic value function representation. 68 | % To create the critic, first create a deep neural network with two inputs, the state and action, and one output. 69 | % For more information on creating a neural network value function representation, see Create Policy and Value Function Representations. 70 | nI = obsInfo.Dimension(1); % number of inputs 71 | nL = 128; % number of neurons 72 | nO = actInfo.Dimension(1); % number of outputs 73 | 74 | statePath = [ 75 | featureInputLayer(nI,'Normalization','none','Name','observation') 76 | fullyConnectedLayer(nL,'Name','fc1') 77 | reluLayer('Name','relu1') 78 | fullyConnectedLayer(nL,'Name','fc2') 79 | additionLayer(2,'Name','add') 80 | reluLayer('Name','relu2') 81 | fullyConnectedLayer(nL,'Name','fc3') 82 | reluLayer('Name','relu3') 83 | fullyConnectedLayer(1,'Name','fc4')]; 84 | 85 | actionPath = [ 86 | featureInputLayer(nO,'Normalization','none','Name','action') 87 | fullyConnectedLayer(nL, 'Name', 'fc5')]; 88 | 89 | %% Critic Netwrok 90 | criticNetwork = layerGraph(statePath); 91 | criticNetwork = addLayers(criticNetwork, actionPath); 92 | 93 | criticNetwork = connectLayers(criticNetwork,'fc5','add/in2'); 94 | 95 | criticNetwork = dlnetwork(criticNetwork,Initialize=false); 96 | % Specify options for the critic representation using rlRepresentationOptions. 97 | criticOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',2e-4); %Use GPU for Training 98 | 99 | criticNetwork_1 = initialize(criticNetwork); 100 | criticNetwork_2 = initialize(criticNetwork); 101 | 102 | % Create the critic representation using the specified neural network and options. 103 | % You must also specify the action and observation info for the critic, which you obtain from the environment interface. 104 | % For more information, see rlQValueRepresentation. 105 | critic_1 = rlQValueFunction(criticNetwork_1,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'}); 106 | critic_2 = rlQValueFunction(criticNetwork_2,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'}); 107 | 108 | % Design, visualize, and train deep learning networks 109 | % View the critic network configuration. 110 | figure('Name','Critic Network'); 111 | plot(criticNetwork); 112 | 113 | %% Actor Netwrok 114 | % A SAC agent decides which action to take given observations by using an actor representation. 115 | % To create the actor, first create a deep neural network with one input, the observation, and one output, the action. 116 | % Construct the actor similarly to the critic. For more information, see rlDeterministicActorRepresentation. 117 | 118 | commonPath = [ 119 | featureInputLayer(nI,'Normalization','none','Name','obsInLyr') 120 | fullyConnectedLayer(nL,'Name','fc1_c') 121 | reluLayer('Name','relu1_c') 122 | fullyConnectedLayer(nL,'Name','fc2_c') 123 | reluLayer('Name','relu2_c') 124 | fullyConnectedLayer(nL,'Name','fc3_c') 125 | reluLayer('Name','relu3_c') 126 | fullyConnectedLayer(1,'Name','comPathOut') 127 | ]; 128 | 129 | meanPath = [ 130 | fullyConnectedLayer(2,'Name','meanFC') 131 | fullyConnectedLayer(nL,'Name','fc1_m') 132 | reluLayer('Name','relu1_m') 133 | fullyConnectedLayer(nL,'Name','fc2_m') 134 | reluLayer('Name','relu2_m') 135 | fullyConnectedLayer(nL,'Name','fc3_m') 136 | reluLayer('Name','relu3_m') 137 | fullyConnectedLayer(1,'Name','fc4_m') 138 | tanhLayer('Name','tanh1_m') 139 | scalingLayer('Name','meanPathOut','Scale',5,'Bias',-0.5) 140 | ]; 141 | 142 | sdevPath = [ 143 | fullyConnectedLayer(2,'Name','stdFC') 144 | reluLayer('Name','relu1_s') 145 | fullyConnectedLayer(nL,'Name','fc2_s') 146 | reluLayer('Name','relu2_s') 147 | fullyConnectedLayer(nL,'Name','fc3_s') 148 | reluLayer('Name','relu3_s') 149 | fullyConnectedLayer(1,'Name','fc4_s') 150 | reluLayer('Name','relu4_s') 151 | scalingLayer('Name','stdPathOut','Scale',5) 152 | ]; 153 | 154 | actorNetwork = layerGraph(commonPath); 155 | actorNetwork = addLayers(actorNetwork,meanPath); 156 | actorNetwork = addLayers(actorNetwork,sdevPath); 157 | 158 | actorNetwork = connectLayers(actorNetwork,"comPathOut","meanFC/in"); 159 | actorNetwork = connectLayers(actorNetwork,"comPathOut","stdFC/in"); 160 | actorNetwork = dlnetwork(actorNetwork); 161 | actorOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-4,'GradientThreshold',1,'L2RegularizationFactor',1e-5); %Use GPU for Training 162 | 163 | actor = rlContinuousGaussianActor(actorNetwork,obsInfo,actInfo,'ActionMeanOutputNames',{'meanPathOut'}, ... 164 | 'ActionStandardDeviationOutputNames',{'stdPathOut'},'ObservationInputNames',{'obsInLyr'},'UseDevice','gpu'); %Use GPU for Training 165 | 166 | % Design, visualize, and train deep learning networks 167 | % View the actor network configuration. 168 | figure('Name','Actor Network'); 169 | plot(layerGraph(actorNetwork)); 170 | 171 | %% Agent Options 172 | % To create the SAC agent, first specify the SAC agent options using rlSACAgentOptions. 173 | agentOptions = rlSACAgentOptions(... 174 | 'SampleTime',T_Sample,... 175 | 'TargetSmoothFactor',1e-3,... 176 | 'SaveExperienceBufferWithAgent',true, ... 177 | 'ExperienceBufferLength',1e8,... 178 | 'DiscountFactor',0.99,... 179 | 'MiniBatchSize',64, ... 180 | 'NumWarmStartSteps',1000); 181 | 182 | for ct =1:2 183 | 184 | agentOptions.CriticOptimizerOptions(ct) = criticOptions; 185 | 186 | end 187 | 188 | % Then, create the SAC agent using the specified actor representation, critic representation, and agent options. 189 | % For more information, see rlSACAgent. 190 | agent = rlSACAgent(actor,[critic_1,critic_2],agentOptions); 191 | 192 | %% Specify Training Options and Train Agent 193 | % For this example, the training options for the DDPG and TD3 agents are the same. 194 | % Run each training session for 5000 episodes with each episode lasting at most maxSteps time steps. 195 | % Display the training progress in the Episode Manager dialog box (set the Plots option) and disable the command line display (set the Verbose option). 196 | % Terminate the training only when it reaches the maximum number of episodes (maxEpisodes). Doing so allows the comparison of the learning curves for multiple agents over the entire training session. 197 | maxEpisodes = 1000000; 198 | maxSteps = floor(T_Total/T_Sample); 199 | trainingOptions = rlTrainingOptions(... 200 | 'MaxEpisodes',maxEpisodes,... 201 | 'MaxStepsPerEpisode',maxSteps,... 202 | 'ScoreAveragingWindowLength',100,... 203 | 'Verbose',true,... 204 | 'Plots','training-progress',... 205 | 'StopTrainingCriteria','EpisodeCount',... 206 | 'StopTrainingValue',maxEpisodes,... 207 | 'SaveAgentCriteria','EpisodeSteps',... 208 | 'SaveAgentValue',500, ... 209 | 'SaveAgentDirectory','saved_agents01' ... 210 | ); 211 | 212 | % To train the agent in parallel, specify the following training options. 213 | % Training in parallel requires Parallel Computing Toolbox™. 214 | % If you do not have Parallel Computing Toolbox software installed, set UseParallel to false. 215 | % Set the UseParallel option to true. 216 | % Train the agent in parallel asynchronously. 217 | if parallelComputing_flag==1 218 | %save_system(mdl); 219 | num_cores = feature('numcores'); % Get number of CPU Cores 220 | parpool(floor(num_cores*.75)); % Use 75% fo Available Cores 221 | trainingOptions.UseParallel = true; 222 | trainingOptions.ParallelizationOptions.Mode = 'async'; 223 | end 224 | 225 | %% Train the agent. 226 | trainingStats = train(agent,rl_env,trainingOptions) 227 | 228 | %% Simulate SAC Agent 229 | % To validate the performance of the trained agent, simulate the agent within the Simulink environment by uncommenting the following commands. 230 | % For more information on agent simulation, see rlSimulationOptions and sim. 231 | 232 | simOptions = rlSimulationOptions('MaxSteps',maxsteps); 233 | experience = sim(env,agent,simOptions); 234 | 235 | %% Reset Function Definition 236 | function in = localResetFcn(in) 237 | mdl = 'Your Simulink Model Name'; 238 | in =Simulink.SimulationInput(mdl); 239 | 240 | % LOGIC TO INITIALIZE A VARIABLE HERE 241 | % alt = answer; 242 | 243 | %change value in model worspace 244 | mdlWks = get_param(mdl,'ModelWorkspace'); 245 | assignin(mdlWks,'variable name',alt) % assigns value to Base Workspace of Model 246 | end 247 | 248 | %% CopyRights 249 | % Everything is designed with the Help of Mathworks and its documentation. 250 | % Talha Bin Riaz -------------------------------------------------------------------------------- /TD3_Generalized_Training_Code.m: -------------------------------------------------------------------------------- 1 | %% SAC Model Network and Code 2 | %% Flags / Settings 3 | parallelComputing_flag = 0; % Whether use Parallel computing 4 | load_Saved_Agent_flag = 0; 5 | %% Load Saved Agent 6 | if load_Saved_Agent_flag == 1 7 | savedAgent_dir = 'saved_Agents01'; 8 | listing = dir(fullfile(savedAgent_dir, '*.mat')); 9 | for i = 1:length(listing) 10 | temp_String= string(listing(i).name); 11 | temp_String = extractAfter(temp_String,5); 12 | temp_String = extractBefore(temp_String,'.mat'); 13 | agent_names(i,1) = str2num(temp_String); 14 | 15 | end 16 | sorted_agent_names = sort(agent_names,'ascend'); 17 | last_Agent = sorted_agent_names(end); 18 | agent_Name = append('\Agent',num2str(last_Agent), '.mat'); 19 | load([savedAgent_dir agent_Name]); 20 | [ep_reward ep_no] = max(savedAgentResult.EpisodeReward); 21 | load([savedAgent_dir append('\Agent', num2str(ep_no), '.mat')]); 22 | plot(savedAgentResult) 23 | end 24 | %% Model Intialization 25 | mdl = 'Your Simulink Mdl Name'; 26 | open_system(mdl); % opens system model 27 | agentblk = [mdl 'Location/To/RL Agent Block']; % Replace with the location to RL Agent Block in Simulink 28 | 29 | %% Sample Time & Simulation Duration 30 | T_Sample = 0.1; % Replace it with your own sample time 31 | T_Total = 205; % Replace it with with your own Total Simulation Time 32 | set_param(mdl,'StartTime','0','StopTime',int2str(T_Total)); % Set Start and Stop Time in Simulink 33 | 34 | %% Observation Info 35 | numObs = 5; % Enter Observation Number 36 | obsInfo = rlNumericSpec([numObs 1],'LowerLimit',(-inf*zeros(numObs,1)),'UpperLimit',inf*zeros(numObs,1)); % Upper and Lower Limit of +- inf 37 | obsInfo.Name = 'Observations'; 38 | obsInfo.Description = 'This is the description about observation info'; 39 | numOfObservations = obsInfo.Dimension(1); 40 | 41 | %% Action Info 42 | 43 | Act1_Min = -5; 44 | Act1_Max = 5; 45 | 46 | % theta Controller 47 | Act2_Min = -0.04; 48 | Act2_Max = 0.04; 49 | 50 | % Action Object 51 | numAct = 1; % Number of Actions 52 | actInfo = rlNumericSpec([numAct 1],'LowerLimit',Act1_Min*zeros(numAct,1),'UpperLimit' ,Act2_Min*zeros(numAct,1)); 53 | actInfo.Name = 'Action'; 54 | numActions = actInfo.Dimension(1); 55 | 56 | %% Create Environment 57 | %rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, 58 | %actInfo,'UseFastRestart','on'); % if you want to use Fast Restart 59 | rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, actInfo); % Creates Environment 60 | %% Environment Reset Function 61 | % To define the initial condition for the certain variable, specify an environment reset function using an anonymous function handle. 62 | % The reset function localResetFcn, which is defined at the end of the example. 63 | rl_env.ResetFcn = @(in)localResetFcn(in); 64 | % Fix the random generator seed for reproducibility. 65 | rng('default') 66 | 67 | %% Create Agent TD3 68 | % A TD3 agent approximates the long-term reward given observations and actions using a critic value function representation. 69 | % To create the critic, first create a deep neural network with two inputs, the state and action, and one output. 70 | % For more information on creating a neural network value function representation, see Create Policy and Value Function Representations. 71 | nI = obsInfo.Dimension(1); % number of inputs 72 | nL = 128; % number of neurons 73 | nO = actInfo.Dimension(1); % number of outputs 74 | 75 | statePath = [ 76 | featureInputLayer(nI,'Normalization','none','Name','observation') 77 | fullyConnectedLayer(nL,'Name','fc1') 78 | reluLayer('Name','relu1') 79 | fullyConnectedLayer(nL,'Name','fc2') 80 | additionLayer(2,'Name','add') 81 | reluLayer('Name','relu2') 82 | fullyConnectedLayer(nL,'Name','fc3') 83 | reluLayer('Name','relu3') 84 | fullyConnectedLayer(1,'Name','fc4')]; 85 | 86 | actionPath = [ 87 | featureInputLayer(nO,'Normalization','none','Name','action') 88 | fullyConnectedLayer(nL, 'Name', 'fc5')]; 89 | 90 | %% Critic Netwrok 91 | criticNetwork = layerGraph(statePath); 92 | criticNetwork = addLayers(criticNetwork, actionPath); 93 | 94 | criticNetwork = connectLayers(criticNetwork,'fc5','add/in2'); 95 | 96 | criticNetwork = dlnetwork(criticNetwork,Initialize=false); 97 | % Specify options for the critic representation using rlRepresentationOptions. 98 | criticOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',2e-4); %Use GPU for Training 99 | 100 | criticNetwork_1 = initialize(criticNetwork); 101 | criticNetwork_2 = initialize(criticNetwork); 102 | 103 | % Create the critic representation using the specified neural network and options. 104 | % You must also specify the action and observation info for the critic, which you obtain from the environment interface. 105 | % For more information, see rlQValueRepresentation. 106 | critic_1 = rlQValueFunction(criticNetwork_1,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'}); 107 | critic_2 = rlQValueFunction(criticNetwork_2,obsInfo,actInfo,'Observation',{'observation'},'Action',{'action'}); 108 | 109 | % Design, visualize, and train deep learning networks 110 | % View the critic network configuration. 111 | figure('Name','Critic Network'); 112 | plot(criticNetwork); 113 | 114 | %% Actor Netwrok 115 | % A TD3 agent decides which action to take given observations by using an actor representation. 116 | % To create the actor, first create a deep neural network with one input, the observation, and one output, the action. 117 | % Construct the actor similarly to the critic. For more information, see rlDeterministicActorRepresentation. 118 | 119 | actorNetwork = [ 120 | featureInputLayer(nI,'Normalization','none','Name','observation') 121 | fullyConnectedLayer(nL,'Name','fc1') 122 | reluLayer('Name','relu1') 123 | fullyConnectedLayer(nL,'Name','fc2') 124 | reluLayer('Name','relu2') 125 | fullyConnectedLayer(nL,'Name','fc3') 126 | reluLayer('Name','relu3') 127 | fullyConnectedLayer(1,'Name','fc4') 128 | tanhLayer('Name','tanh1') 129 | % scalingLayer('Name','ActorScaling1','Scale',5)]; 130 | scalingLayer('Name','ActorScaling1','Scale',5,'Bias',-0.5)]; 131 | 132 | actorNetwork = dlnetwork(actorNetwork); 133 | actorOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-4,'GradientThreshold',1,'L2RegularizationFactor',1e-5); %Use GPU for Training 134 | 135 | actor = rlContinuousDeterministicActor(actorNetwork,obsInfo,actInfo,'ObservationInputNames',{'observation'}); %Use GPU for Training 136 | 137 | % Design, visualize, and train deep learning networks 138 | % View the actor network configuration. 139 | figure('Name','Actor Network'); 140 | plot(layerGraph(actorNetwork)); 141 | 142 | %% Agent Options 143 | % To create the TD3 agent, first specify the TD3 agent options using rlTD3AgentOptions. 144 | agentOptions = rlTD3AgentOptions(... 145 | 'SampleTime',T_Sample,... 146 | 'TargetSmoothFactor',1e-3,... 147 | 'SaveExperienceBufferWithAgent',true, ... 148 | 'ExperienceBufferLength',1e8,... 149 | 'DiscountFactor',0.99,... 150 | 'MiniBatchSize',64); 151 | 152 | agentOptions.ActorOptimizerOptions = actorOptions; 153 | agentOptions.CriticOptimizerOptions = criticOptions; 154 | agentOptions.ExplorationModel.Variance = 0.05; 155 | agentOptions.ExplorationModel.VarianceDecayRate = 2e-04; 156 | agentOptions.ExplorationModel.VarianceMin = 0.001; 157 | agentOptions.TargetPolicySmoothModel.Variance = 0.1; 158 | agentOptions.TargetPolicySmoothModel.VarianceDecayRate = 1e-04; 159 | 160 | 161 | 162 | for ct =1:2 163 | 164 | agentOptions.CriticOptimizerOptions(ct) = criticOptions; 165 | 166 | end 167 | 168 | % Then, create the TD3 agent using the specified actor representation, critic representation, and agent options. 169 | % For more information, see rlTD3Agent. 170 | agent = rlTD3Agent(actor,[critic_1,critic_2],agentOptions); 171 | 172 | %% Specify Training Options and Train Agent 173 | % For this example, the training options for the DDPG and TD3 agents are the same. 174 | % Run each training session for 5000 episodes with each episode lasting at most maxSteps time steps. 175 | % Display the training progress in the Episode Manager dialog box (set the Plots option) and disable the command line display (set the Verbose option). 176 | % Terminate the training only when it reaches the maximum number of episodes (maxEpisodes). Doing so allows the comparison of the learning curves for multiple agents over the entire training session. 177 | % Define maximum episodes and steps 178 | % Define maximum episodes and steps 179 | maxEpisodes = 1000000; 180 | maxSteps = floor(T_Total / T_Sample); 181 | 182 | % Configure Parallelization Options 183 | parallelOptions = rl.option.ParallelTraining(... 184 | 'Mode', 'async'); 185 | 186 | % Define training options for the reinforcement learning agent 187 | trainingOptions = rlTrainingOptions(... 188 | 'MaxEpisodes', maxEpisodes, ... 189 | 'MaxStepsPerEpisode', maxSteps, ... 190 | 'ScoreAveragingWindowLength', 100, ... 191 | 'Verbose', true, ... 192 | 'Plots', 'training-progress', ... 193 | 'StopTrainingCriteria', 'EpisodeCount', ... 194 | 'StopTrainingValue', maxEpisodes, ... 195 | 'SaveAgentCriteria', 'EpisodeSteps', ... 196 | 'SaveAgentValue', 900, ... 197 | 'SaveAgentDirectory', 'savedAgents_1', ... 198 | 'UseParallel', true, ... % Enable parallel training 199 | 'ParallelizationOptions', parallelOptions); 200 | 201 | 202 | % To train the agent in parallel, specify the following training options. 203 | % Training in parallel requires Parallel Computing Toolbox™. 204 | % If you do not have Parallel Computing Toolbox software installed, set UseParallel to false. 205 | % Set the UseParallel option to true. 206 | % Train the agent in parallel asynchronously. 207 | % After every 32 steps, have each worker send experiences to the parallel pool client (the MATLAB® process which starts the training). 208 | % DDPG and TD3 agents require workers to send experiences to the client. 209 | if parallelComputing_flag==1 210 | save_system(mdl); 211 | % Set up the parallel pool using 75% of available CPU cores 212 | num_cores = feature('numcores'); 213 | parpool(floor(num_cores * 0.5)); 214 | % Ensure the GPU is selected on all workers 215 | parfevalOnAll(@() gpuDevice(1), 0); 216 | end 217 | 218 | %% Train the agent. 219 | trainingStats = train(agent,rl_env,trainingOptions) 220 | 221 | %% Simulate TD3 Agent 222 | % To validate the performance of the trained agent, simulate the agent within the Simulink environment by uncommenting the following commands. 223 | % For more information on agent simulation, see rlSimulationOptions and sim. 224 | 225 | simOptions = rlSimulationOptions('MaxSteps',maxsteps); 226 | experience = sim(env,agent,simOptions); 227 | 228 | %% Reset Function Definition 229 | function in = localResetFcn(in) 230 | mdl = 'Your Simulink Model Name'; 231 | in =Simulink.SimulationInput(mdl); 232 | 233 | % LOGIC TO INITIALIZE A VARIABLE HERE 234 | % alt = answer; 235 | 236 | %change value in model worspace 237 | mdlWks = get_param(mdl,'ModelWorkspace'); 238 | assignin(mdlWks,'variable name',alt) % assigns value to Base Workspace of Model 239 | end 240 | 241 | %% CopyRights 242 | % Everything is designed with the Help of Mathworks and its documentation. 243 | % Talha Bin Riaz 244 | 245 | -------------------------------------------------------------------------------- /TRPO_Generalized_Training_Code.m: -------------------------------------------------------------------------------- 1 | %% TRPO Model Network and Code 2 | %% Flags / Settings 3 | parallelComputing_flag = 0; % Whether use Parallel computing 4 | load_Saved_Agent_flag = 0; 5 | %% Load Saved Agent 6 | if load_Saved_Agent_flag == 1 7 | savedAgent_dir = 'saved_Agents01'; 8 | listing = dir(fullfile(savedAgent_dir, '*.mat')); 9 | for i = 1:length(listing) 10 | temp_String= string(listing(i).name); 11 | temp_String = extractAfter(temp_String,5); 12 | temp_String = extractBefore(temp_String,'.mat'); 13 | agent_names(i,1) = str2num(temp_String); 14 | 15 | end 16 | sorted_agent_names = sort(agent_names,'ascend'); 17 | last_Agent = sorted_agent_names(end); 18 | agent_Name = append('\Agent',num2str(last_Agent), '.mat'); 19 | load([savedAgent_dir agent_Name]); 20 | [ep_reward ep_no] = max(savedAgentResult.EpisodeReward); 21 | load([savedAgent_dir append('\Agent', num2str(ep_no), '.mat')]); 22 | plot(savedAgentResult) 23 | end 24 | %% Model Intialization 25 | mdl = 'Your Simulink Mdl Name'; 26 | open_system(mdl); % opens system model 27 | agentblk = [mdl 'Location/To/RL Agent Block']; % Replace with the location to RL Agent Block in Simulink 28 | 29 | %% Sample Time & Simulation Duration 30 | T_Sample = 0.1; % Replace it with your own sample time 31 | T_Total = 205; % Replace it with with your own Total Simulation Time 32 | set_param(mdl,'StartTime','0','StopTime',int2str(T_Total)); % Set Start and Stop Time in Simulink 33 | 34 | %% Observation Info 35 | numObs = 5; % Enter Observation Number 36 | obsInfo = rlNumericSpec([numObs 1],'LowerLimit',(-inf*zeros(numObs,1)),'UpperLimit',inf*zeros(numObs,1)); % Upper and Lower Limit of +- inf 37 | obsInfo.Name = 'Observations'; 38 | obsInfo.Description = 'This is the description about observation info'; 39 | numOfObservations = obsInfo.Dimension(1); 40 | 41 | %% Action Info 42 | 43 | Act1_Min = -5; 44 | Act1_Max = 5; 45 | 46 | % theta Controller 47 | Act2_Min = -0.04; 48 | Act2_Max = 0.04; 49 | 50 | % Action Object 51 | numAct = 1; % Number of Actions 52 | actInfo = rlNumericSpec([numAct 1],'LowerLimit',Act1_Min*zeros(numAct,1),'UpperLimit' ,Act2_Min*zeros(numAct,1)); 53 | actInfo.Name = 'Action'; 54 | numActions = actInfo.Dimension(1); 55 | 56 | %% Create Environment 57 | %rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, 58 | %actInfo,'UseFastRestart','on'); % if you want to use Fast Restart 59 | rl_env = rlSimulinkEnv(mdl, agentblk, obsInfo, actInfo); % Creates Environment 60 | %% Environment Reset Function 61 | % To define the initial condition for the certain variable, specify an environment reset function using an anonymous function handle. 62 | % The reset function localResetFcn, which is defined at the end of the example. 63 | rl_env.ResetFcn = @(in)localResetFcn(in); 64 | % Fix the random generator seed for reproducibility. 65 | rng('default') 66 | 67 | %% Create Agent TRPO 68 | % A TRPO agent approximates the long-term reward given observations and actions using a critic value function representation. 69 | % To create the critic, first create a deep neural network with two inputs, the state and action, and one output. 70 | % For more information on creating a neural network value function representation, see Create Policy and Value Function Representations. 71 | nI = obsInfo.Dimension(1); % number of inputs 72 | nL = 128; % number of neurons 73 | nO = actInfo.Dimension(1); % number of outputs 74 | 75 | criticNet = [ 76 | featureInputLayer(nI,'Normalization','none','Name','observation') 77 | fullyConnectedLayer(nL,'Name','fc1') 78 | reluLayer('Name','relu1') 79 | fullyConnectedLayer(nL,'Name','fc2') 80 | reluLayer('Name','relu2') 81 | fullyConnectedLayer(nL,'Name','fc3') 82 | reluLayer('Name','relu3') 83 | fullyConnectedLayer(1,'Name','fc4')]; 84 | 85 | 86 | %% Critic Netwrok 87 | criticNetwork = layerGraph(criticNet); 88 | 89 | % criticNetwork = dlnetwork(criticNetwork,Initialize=false); 90 | % Specify options for the critic representation using rlRepresentationOptions. 91 | criticOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-3,'GradientThreshold',1,'L2RegularizationFactor',2e-4); %Use GPU for Training %LR: 1e-3; GT: 1 92 | 93 | % Create the critic representation using the specified neural network and options. 94 | % You must also specify the action and observation info for the critic, which you obtain from the environment interface. 95 | % For more information, see rlQValueRepresentation. 96 | critic = rlValueFunction(criticNetwork,obsInfo,'Observation',{'observation'},'UseDevice',"gpu"); 97 | 98 | % Design, visualize, and train deep learning networks 99 | % View the critic network configuration. 100 | figure('Name','Critic Network'); 101 | plot(criticNetwork); 102 | 103 | %% Actor Netwrok 104 | % A TRPO agent decides which action to take given observations by using an actor representation. 105 | % To create the actor, first create a deep neural network with one input, the observation, and one output, the action. 106 | % Construct the actor similarly to the critic. For more information, see rlDeterministicActorRepresentation. 107 | 108 | 109 | commonPath = [ 110 | featureInputLayer(nI,'Normalization','none','Name','comPathIn') 111 | fullyConnectedLayer(nL,'Name','fc1_c') 112 | reluLayer('Name','relu1_c') 113 | fullyConnectedLayer(nL,'Name','fc2_c') 114 | reluLayer('Name','comPathOut') 115 | ]; 116 | 117 | meanPath = [ 118 | fullyConnectedLayer(1,'Name','meanPathIn') 119 | tanhLayer('Name','tanh1_m') 120 | scalingLayer('Name','meanPathOut','Scale',5,'Bias',-0.5) 121 | ]; 122 | 123 | sdevPath = [ 124 | fullyConnectedLayer(1,'Name','stdPathIn') 125 | softplusLayer('Name','stdPathOut') 126 | ]; 127 | 128 | actorNetwork = layerGraph(commonPath); 129 | actorNetwork = addLayers(actorNetwork,meanPath); 130 | actorNetwork = addLayers(actorNetwork,sdevPath); 131 | 132 | actorNetwork = connectLayers(actorNetwork,"comPathOut","meanPathIn/in"); 133 | actorNetwork = connectLayers(actorNetwork,"comPathOut","stdPathIn/in"); 134 | 135 | actorOptions = rlOptimizerOptions('Optimizer','adam','LearnRate',1e-4,'GradientThreshold',1,'L2RegularizationFactor',1e-5); 136 | 137 | actor = rlContinuousGaussianActor(actorNetwork,obsInfo,actInfo,'ActionMeanOutputNames',{'meanPathOut'}, ... 138 | 'ActionStandardDeviationOutputNames',{'stdPathOut'},'ObservationInputNames',{'comPathIn'},'UseDevice','gpu'); %Use GPU for Training 139 | 140 | % Design, visualize, and train deep learning networks 141 | % View the actor network configuration. 142 | figure('Name','Actor Network'); 143 | plot(layerGraph(dlnetwork(actorNetwork))); 144 | 145 | %% Agent Options 146 | % To create the TRPO agent, first specify the TRPO agent options using rlTRPOAgentOptions. 147 | agentOptions = rlTRPOAgentOptions(... 148 | 'SampleTime',T_Sample,... 149 | 'CriticOptimizerOptions',criticOptions, ... % 'UseDeterministicExploitation',1,... 150 | 'DiscountFactor',0.99,... 151 | 'MiniBatchSize',128,'AdvantageEstimateMethod',"gae", ... 152 | "ExperienceHorizon",512,"EntropyLossWeight",0.95, ... 153 | "KLDivergenceLimit",0.02,"NumIterationsConjugateGradient",10, ... 154 | "NumEpoch",3,"GAEFactor",0.95,"NumIterationsLineSearch",10, ... 155 | "ConjugateGradientDamping",0.01,"ConjugateGradientResidualTolerance",1e-08); 156 | 157 | % Then, create the TRPO agent using the specified actor representation, critic representation, and agent options. 158 | % For more information, see rlTRPOAgent. 159 | agent = rlTRPOAgent(actor,critic,agentOptions); 160 | 161 | %% Specify Training Options and Train Agent 162 | % For this example, the training options for the DDPG and TD3 agents are the same. 163 | % Run each training session for 5000 episodes with each episode lasting at most maxSteps time steps. 164 | % Display the training progress in the Episode Manager dialog box (set the Plots option) and disable the command line display (set the Verbose option). 165 | % Terminate the training only when it reaches the maximum number of episodes (maxEpisodes). Doing so allows the comparison of the learning curves for multiple agents over the entire training session. 166 | maxEpisodes = 1000000; 167 | maxSteps = floor(T_Total/T_Sample); 168 | trainingOptions = rlTrainingOptions(... 169 | 'MaxEpisodes',maxEpisodes,... 170 | 'MaxStepsPerEpisode',maxSteps,... 171 | 'ScoreAveragingWindowLength',100,... 172 | 'Verbose',true,... 173 | 'Plots','training-progress',... 174 | 'StopTrainingCriteria','EpisodeCount',... 175 | 'StopTrainingValue',maxEpisodes,... 176 | 'SaveAgentCriteria','EpisodeSteps',... 177 | 'SaveAgentValue',1000, ... 178 | 'SaveAgentDirectory','savedAgents_1' ,'UseParallel',1 ... 179 | ); 180 | 181 | % To train the agent in parallel, specify the following training options. 182 | % Training in parallel requires Parallel Computing Toolbox™. 183 | % If you do not have Parallel Computing Toolbox software installed, set UseParallel to false. 184 | % Set the UseParallel option to true. 185 | % Train the agent in parallel asynchronously. 186 | % After every 32 steps, have each worker send experiences to the parallel pool client (the MATLAB® process which starts the training). 187 | % DDPG and TD3 agents require workers to send experiences to the client. 188 | if parallelComputing_flag==1 189 | save_system(mdl); 190 | num_cores = feature('numcores'); % Get number of CPU Cores 191 | parpool(floor(num_cores*.25)); % Use 75% fo Available Cores 192 | trainingOptions.UseParallel = true; 193 | trainingOptions.ParallelizationOptions.Mode = 'async'; 194 | trainingOptions.ParallelizationOptions.StepsUntilDataIsSent = 32; 195 | trainingOptions.ParallelizationOptions.DataToSendFromWorkers = 'Experiences'; 196 | end 197 | 198 | %% Train the agent. 199 | trainingStats = train(agent,rl_env,trainingOptions) 200 | 201 | %% Simulate TTRPO Agent 202 | % To validate the performance of the trained agent, simulate the agent within the Simulink environment by uncommenting the following commands. 203 | % For more information on agent simulation, see rlSimulationOptions and sim. 204 | 205 | simOptions = rlSimulationOptions('MaxSteps',maxsteps); 206 | experience = sim(env,agent,simOptions); 207 | 208 | %% Reset Function Definition 209 | function in = localResetFcn(in) 210 | mdl = 'Your Simulink Model Name'; 211 | in =Simulink.SimulationInput(mdl); 212 | 213 | % LOGIC TO INITIALIZE A VARIABLE HERE 214 | % alt = answer; 215 | 216 | %change value in model worspace 217 | mdlWks = get_param(mdl,'ModelWorkspace'); 218 | assignin(mdlWks,'variable name',alt) % assigns value to Base Workspace of Model 219 | end 220 | 221 | %% CopyRights 222 | % Everything is designed with the Help of Mathworks and its documentation. 223 | % Talha Bin Riaz 224 | 225 | --------------------------------------------------------------------------------