├── DRL-for-edge-computing.pdf ├── DRL-presentation.pdf ├── LICENSE ├── MAA2C.py ├── MADDPG.py ├── MAPPO.py ├── Memory.py ├── Model.py ├── README.md ├── SECURITY.md ├── __init__.py ├── __pycache__ ├── Agent.cpython-310.pyc ├── Agent.cpython-38.pyc ├── MAA2C.cpython-310.pyc ├── MAA2C.cpython-38.pyc ├── MADDPG.cpython-310.pyc ├── MADDPG.cpython-38.pyc ├── MADDPG.cpython-39.pyc ├── MAPPO.cpython-310.pyc ├── MAPPO.cpython-38.pyc ├── Memory.cpython-310.pyc ├── Memory.cpython-38.pyc ├── Memory.cpython-39.pyc ├── Model.cpython-310.pyc ├── Model.cpython-38.pyc ├── Model.cpython-39.pyc ├── ddpg.cpython-38.pyc ├── env.cpython-310.pyc ├── env.cpython-38.pyc ├── env.cpython-39.pyc ├── env_1.cpython-38.pyc ├── run_ddpg.cpython-38.pyc ├── utils.cpython-310.pyc ├── utils.cpython-38.pyc └── utils.cpython-39.pyc ├── env.py ├── excel ├── DDPG_A2C_PPO.xls ├── Excel_a2c.xls ├── Excel_ddpg.xls ├── Excel_ppo.xls └── final.xls ├── graphs ├── 498 │ ├── convergence.png │ └── reward_vs_parameter.png ├── change agents │ ├── ALLES_change_agents.png │ ├── NAC_change_agents.png │ ├── a2c_change_agents.png │ ├── ddpg_change_agents.png │ └── ppo_change_agents.png ├── change bandwidth │ ├── ALLES_change_bandwidth.png │ ├── NAC_change_bandwidth.png │ ├── a2c_change_bandwidth.png │ ├── ddpg_change_bandwidth.png │ └── ppo_change_bandwidth.png ├── change ddl │ ├── ALLES_change_ddl.png │ ├── NAC_change_ddl.png │ ├── a2c_change_ddl.png │ ├── ddpg_change_ddl.png │ └── ppo_change_ddl.png ├── change epsilon │ ├── ALLES_change_epsilon.png │ ├── NAC_change_epsilon.png │ ├── a2c_change_epsilon.png │ ├── ddpg_change_epsilon.png │ └── ppo_change_epsilon.png ├── change one gamma │ ├── change one gamma e_mine.png │ └── change one gamma r_mine.png ├── change one power │ ├── change one power energy.png │ └── change one power reward.png ├── ddpg_change_lr.png ├── episodes_avg.png ├── phi&energy │ ├── energy.png │ └── phi.png └── reward_vs_parameters │ ├── reward_vs_agents.png │ ├── reward_vs_bandwidth.png │ ├── reward_vs_ddl.png │ └── reward_vs_epsilon.png ├── output ├── a2c_change_ddl_10.png ├── a2c_change_ddl_11.png ├── a2c_change_ddl_12.png ├── a2c_change_ddl_13.png ├── a2c_change_ddl_14.png ├── a2c_change_ddl_15.png ├── a2c_change_ddl_16.png ├── a2c_change_ddl_17.png ├── a2c_change_ddl_18.png ├── a2c_change_ddl_19.png ├── a2c_change_ddl_20.png ├── a2c_change_ddl_8.png ├── a2c_change_ddl_9.png ├── change agents │ ├── a2c │ │ ├── a2c_change_agents_1.png │ │ ├── a2c_change_agents_10.png │ │ ├── a2c_change_agents_11.png │ │ ├── a2c_change_agents_4.png │ │ ├── a2c_change_agents_5.png │ │ ├── a2c_change_agents_6.png │ │ ├── a2c_change_agents_7.png │ │ ├── a2c_change_agents_8.png │ │ ├── a2c_change_agents_9.png │ │ └── a2c_change_agents_final.png │ ├── ddpg │ │ ├── ddpg_change_agents_2.png │ │ ├── ddpg_change_agents_3.png │ │ └── ddpg_change_agents_final.png │ ├── ppo │ │ ├── ppo_change_agents_1.png │ │ ├── ppo_change_agents_2.png │ │ └── ppo_change_agents_final.png │ ├── ppo_change_agents_3.png │ ├── ppo_change_agents_4.png │ └── ppo_change_agents_5.png ├── change bandwidth │ ├── a2c │ │ ├── a2c_change_bandwidth.png │ │ ├── a2c_change_bandwidth1.png │ │ ├── a2c_change_bandwidth2.png │ │ ├── a2c_change_bandwidth_11.png │ │ ├── a2c_change_bandwidth_3.png │ │ └── a2c_change_bandwidth_final.png │ ├── ddpg │ │ ├── ddpg_change_bandwidth.png │ │ ├── ddpg_change_bandwidth_3.png │ │ └── ddpg_change_bandwidth_final.png │ └── ppo │ │ ├── ppo_change_bandwidth_1.png │ │ └── ppo_change_bandwidth_final.png ├── change ddl │ ├── a2c │ │ ├── a2c_change_ddl.png │ │ ├── a2c_change_ddl1.png │ │ ├── a2c_change_ddl2.png │ │ ├── a2c_change_ddl_2.png │ │ ├── a2c_change_ddl_3.png │ │ ├── a2c_change_ddl_4.png │ │ ├── a2c_change_ddl_5.png │ │ ├── a2c_change_ddl_6.png │ │ ├── a2c_change_ddl_7.png │ │ ├── a2c_change_ddl_9.png │ │ └── a2c_change_ddl_final.png │ ├── ddpg │ │ ├── ddpg_change_ddl.png │ │ ├── ddpg_change_ddl_3.png │ │ ├── ddpg_change_ddl_4.png │ │ └── ddpg_change_ddl_7.png │ └── ppo │ │ ├── ppo_change_ddl_1.png │ │ ├── ppo_change_ddl_2.png │ │ ├── ppo_change_ddl_3.png │ │ └── ppo_change_ddl_final.png ├── change epsilon │ ├── a2c │ │ ├── a2c_change_epsilon.png │ │ ├── a2c_change_epsilon1.png │ │ ├── a2c_change_epsilon2.png │ │ └── a2c_change_epsilon_3.png │ ├── ddpg │ │ ├── ddpg_change_epsilon.png │ │ ├── ddpg_change_epsilon_3.png │ │ └── ddpg_change_epsilon_final.png │ ├── ddpg_change_epsilon_4.png │ ├── ppo │ │ ├── ppo_change_epsilon_1.png │ │ └── ppo_change_epsilon_final.png │ └── ppo_change_epsilon_2.png ├── ddpg_change_ddl_5.png ├── ddpg_change_ddl_7.png ├── differ user │ └── ddpg │ │ ├── phi_vs_ddl.png │ │ └── reward_vs_ddl.png ├── energy_vs_ddl.png ├── phi_vs_ddl.png ├── reward_vs_agents.png ├── reward_vs_bandwidth.png ├── reward_vs_ddl.png └── reward_vs_epsilon.png ├── plot.py ├── plot_phi.py ├── plot_reward.py ├── plot_reward_avg.py ├── requirements.txt ├── run.py ├── run_a2c.py ├── run_ddpg.py ├── run_ppo.py ├── test.cpp ├── test.py ├── utils.py └── write_random.py /DRL-for-edge-computing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/DRL-for-edge-computing.pdf -------------------------------------------------------------------------------- /DRL-presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/DRL-presentation.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 XinyaoQiu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MAA2C.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch as th 3 | from torch.optim import Adam, RMSprop 4 | from math import exp 5 | 6 | import numpy as np 7 | from Memory import ReplayMemory 8 | 9 | from Model import ActorNetwork, CriticNetwork 10 | from utils import entropy, to_tensor_var 11 | 12 | EVAL_EPISODES = 10 13 | 14 | class MAA2C(object): 15 | """ 16 | An multi-agent learned with Advantage Actor-Critic 17 | - Actor takes its local observations as input 18 | - agent interact with environment to collect experience 19 | - agent training with experience to update policy 20 | 21 | Parameters 22 | - training_strategy: 23 | - cocurrent 24 | - each agent learns its own individual policy which is independent 25 | - multiple policies are optimized simultaneously 26 | - centralized (see MADDPG in [1] for details) 27 | - centralized training and decentralized execution 28 | - decentralized actor map it's local observations to action using individual policy 29 | - centralized critic takes both state and action from all agents as input, each actor 30 | has its own critic for estimating the value function, which allows each actor has 31 | different reward structure, e.g., cooperative, competitive, mixed task 32 | - actor_parameter_sharing: 33 | - True: all actors share a single policy which enables parameters and experiences sharing, 34 | this is mostly useful where the agents are homogeneous. Please see Sec. 4.3 in [2] and 35 | Sec. 4.1 & 4.2 in [3] for details. 36 | - False: each actor use independent policy 37 | - critic_parameter_sharing: 38 | - True: all actors share a single critic which enables parameters and experiences sharing, 39 | this is mostly useful where the agents are homogeneous and reward sharing holds. Please 40 | see Sec. 4.1 in [3] for details. 41 | - False: each actor use independent critic (though each critic can take other agents actions 42 | as input, see MADDPG in [1] for details) 43 | 44 | Reference: 45 | [1] Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments 46 | [2] Cooperative Multi-Agent Control Using Deep Reinforcement Learning 47 | [3] Parameter Sharing Deep Deterministic Policy Gradient for Cooperative Multi-agent Reinforcement Learning 48 | 49 | """ 50 | def __init__(self, env, n_agents, state_dim, action_dim, action_lower_bound, action_higher_bound, 51 | noise, bound, memory_capacity=10, max_steps=None, 52 | roll_out_n_steps=10, tau=300, 53 | reward_gamma=0.99, reward_scale=1., done_penalty=-10, 54 | actor_output_act=nn.functional.softmax, critic_loss="huber", 55 | actor_lr=0.01, critic_lr=0.01, training_strategy="centralized", 56 | optimizer_type="rmsprop", entropy_reg=0.00, 57 | max_grad_norm=None, batch_size=10, episodes_before_train=0, 58 | use_cuda=False, actor_parameter_sharing=False, critic_parameter_sharing=False, 59 | epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=100): 60 | 61 | 62 | 63 | self.env = env 64 | self.state_dim = state_dim 65 | self.action_dim = action_dim 66 | self.env_state = self.env.reset() 67 | self.n_episodes = 0 68 | self.n_steps = 0 69 | self.max_steps = max_steps 70 | self.action_lower_bound = action_lower_bound 71 | self.action_higher_bound = action_higher_bound 72 | self.noise = noise 73 | self.tau = tau 74 | self.bound = bound 75 | 76 | self.reward_gamma = reward_gamma 77 | self.reward_scale = reward_scale 78 | self.done_penalty = done_penalty 79 | 80 | self.memory = ReplayMemory(memory_capacity) 81 | self.actor_output_act = actor_output_act 82 | self.critic_loss = critic_loss 83 | self.actor_lr = actor_lr 84 | self.critic_lr = critic_lr 85 | self.optimizer_type = optimizer_type 86 | self.entropy_reg = entropy_reg 87 | self.max_grad_norm = max_grad_norm 88 | self.batch_size = batch_size 89 | self.episodes_before_train = episodes_before_train 90 | self.target_tau = 0.01 91 | 92 | self.use_cuda = use_cuda and th.cuda.is_available() 93 | 94 | self.epsilon_start = epsilon_start 95 | self.epsilon_end = epsilon_end 96 | self.epsilon_decay = epsilon_decay 97 | 98 | self.n_agents = n_agents 99 | self.roll_out_n_steps = roll_out_n_steps 100 | self.actor_parameter_sharing = actor_parameter_sharing 101 | self.critic_parameter_sharing = critic_parameter_sharing 102 | 103 | assert training_strategy in ["cocurrent", "centralized"] 104 | self.training_strategy = training_strategy 105 | 106 | 107 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents 108 | 109 | 110 | critic_state_dim = self.n_agents * self.state_dim 111 | critic_action_dim = self.n_agents * self.action_dim 112 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents 113 | 114 | if optimizer_type == "adam": 115 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors] 116 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics] 117 | elif optimizer_type == "rmsprop": 118 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors] 119 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics] 120 | 121 | # tricky and memory consumed implementation of parameter sharing 122 | if self.actor_parameter_sharing: 123 | for agent_id in range(1, self.n_agents): 124 | self.actors[agent_id] = self.actors[0] 125 | self.actors_optimizer[agent_id] = self.actors_optimizer[0] 126 | if self.critic_parameter_sharing: 127 | for agent_id in range(1, self.n_agents): 128 | self.critics[agent_id] = self.critics[0] 129 | self.critics_optimizer[agent_id] = self.critics_optimizer[0] 130 | 131 | if self.use_cuda: 132 | for a in self.actors: 133 | a.cuda() 134 | for c in self.critics: 135 | c.cuda() 136 | 137 | self.eval_rewards = [] 138 | self.mean_rewards = [] 139 | self.episodes = [] 140 | self.eval_phi = [] 141 | self.mean_phi = [] 142 | 143 | # agent interact with the environment to collect experience 144 | def interact(self): 145 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps): 146 | self.env_state = self.env.reset() 147 | self.n_steps = 0 148 | states = [] 149 | actions = [] 150 | rewards = [] 151 | next_states = [] 152 | next_actions = [] 153 | # take n steps 154 | for i in range(self.roll_out_n_steps): 155 | states.append(self.env_state) 156 | action = self.choose_action(self.env_state) 157 | next_state, reward, done, _, phi, _, _, _ = self.env.step(action) 158 | next_state_var = to_tensor_var([next_state], self.use_cuda) 159 | next_action = np.zeros((self.n_agents, self.action_dim)) 160 | for agent_id in range(self.n_agents): 161 | next_action_var = self.actors[agent_id](next_state_var[:,agent_id,:]) 162 | if self.use_cuda: 163 | next_action[agent_id] = next_action_var.data.cpu().numpy()[0] 164 | else: 165 | next_action[agent_id] = next_action_var.data.numpy()[0] 166 | # done = done[0] 167 | actions.append(action) 168 | rewards.append(reward) 169 | next_states.append(next_state) 170 | next_actions.append(next_action) 171 | 172 | final_state = next_state 173 | self.env_state = next_state 174 | if done: 175 | self.env_state = self.env.reset() 176 | break 177 | # discount reward 178 | if done: 179 | final_r = [0.0] * self.n_agents 180 | self.n_episodes += 1 181 | self.episode_done = True 182 | else: 183 | self.episode_done = False 184 | final_action = self.choose_action(final_state) 185 | final_r = self.value(final_state, final_action) 186 | 187 | rewards = np.array(rewards) 188 | for agent_id in range(self.n_agents): 189 | rewards[:,agent_id] = self._discount_reward(rewards[:,agent_id], final_r[agent_id]) 190 | rewards = rewards.tolist() 191 | self.n_steps += 1 192 | 193 | self.eval_rewards.append(np.sum(reward)) 194 | self.eval_phi.append(np.sum(phi)) 195 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0): 196 | mean_reward = np.mean(np.array(self.eval_rewards)) 197 | self.mean_rewards.append(mean_reward) 198 | self.mean_phi.append(np.mean(np.array(self.eval_phi))) 199 | self.episodes.append(self.n_episodes+1) 200 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward) 201 | self.eval_rewards = [] 202 | self.eval_phi = [] 203 | 204 | self.memory.push(states, actions, rewards) 205 | 206 | # train on a roll out batch 207 | def train(self): 208 | if self.n_episodes <= self.episodes_before_train: 209 | pass 210 | 211 | batch = self.memory.sample(self.batch_size) 212 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim) 213 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim) 214 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1) 215 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim) 216 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim) 217 | 218 | 219 | for agent_id in range(self.n_agents): 220 | # update actor network 221 | self.actors_optimizer[agent_id].zero_grad() 222 | action_log_probs = self.actors[agent_id](states_var[:,agent_id,:]) 223 | entropy_loss = th.mean(entropy(th.exp(action_log_probs))) 224 | action_log_probs = th.sum(action_log_probs * actions_var[:,agent_id,:], 1) 225 | values = self.critics[agent_id](whole_states_var, whole_actions_var).detach() 226 | 227 | advantages = rewards_var[:,agent_id,:] - values 228 | pg_loss = -th.mean(action_log_probs * advantages) 229 | actor_loss = pg_loss - entropy_loss * self.entropy_reg 230 | actor_loss.requires_grad_(True) 231 | actor_loss.backward() 232 | 233 | if self.max_grad_norm is not None: 234 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm) 235 | self.actors_optimizer[agent_id].step() 236 | 237 | # update critic network 238 | self.critics_optimizer[agent_id].zero_grad() 239 | target_values = rewards_var[:,agent_id,:] 240 | if self.critic_loss == "huber": 241 | critic_loss = nn.functional.smooth_l1_loss(values, target_values) 242 | else: 243 | critic_loss = nn.MSELoss()(values, target_values) 244 | critic_loss.requires_grad_(True) 245 | critic_loss.backward() 246 | 247 | 248 | if self.max_grad_norm is not None: 249 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm) 250 | self.critics_optimizer[agent_id].step() 251 | 252 | 253 | def getactionbound(self, a, b, x, i): 254 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \ 255 | + self.action_lower_bound[i] 256 | return x 257 | 258 | # predict action based on state for execution 259 | def choose_action(self, state): 260 | state_var = to_tensor_var([state], self.use_cuda) 261 | action = np.zeros((self.n_agents, self.action_dim)) 262 | 263 | for agent_id in range(self.n_agents): 264 | action_var = (self.actors[agent_id](state_var[:,agent_id,:])) 265 | if self.use_cuda: 266 | action[agent_id] = action_var.data.cpu().numpy()[0] 267 | else: 268 | action[agent_id] = action_var.data.numpy()[0] 269 | 270 | for n in range(self.n_agents): 271 | for i in range(6): 272 | if (self.n_episodes < self.bound): e = self.n_episodes 273 | else: e = self.bound 274 | action[n][i] = -exp(-e/self.tau) + self.noise 275 | b = 1 276 | a = -1 277 | if self.action_dim > 6: 278 | print("Wrong!") 279 | for n in range(self.n_agents): 280 | action[n][0] = 0 if action[n][0] <= 0 else 1 281 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1)) 282 | action[n][2] = self.getactionbound(a, b, action[n][2], 2) 283 | action[n][3] = self.getactionbound(a, b, action[n][3], 3) 284 | action[n][4] = self.getactionbound(a, b, action[n][4], 4) 285 | action[n][5] = self.getactionbound(a, b, action[n][5], 5) 286 | 287 | 288 | return action 289 | 290 | 291 | # evaluate value 292 | def value(self, state, action): 293 | state_var = to_tensor_var([state], self.use_cuda) 294 | action_var = to_tensor_var([action], self.use_cuda) 295 | whole_state_var = state_var.view(-1, self.n_agents*self.state_dim) 296 | whole_action_var = action_var.view(-1, self.n_agents*self.action_dim) 297 | values = np.zeros(self.n_agents) 298 | for agent_id in range(self.n_agents): 299 | value_var = self.critics[agent_id](whole_state_var, whole_action_var) 300 | if self.use_cuda: 301 | values[agent_id] = value_var.data.cpu().numpy()[0] 302 | else: 303 | values[agent_id] = value_var.data.numpy()[0] 304 | return values 305 | 306 | def _discount_reward(self, rewards, final_value): 307 | discounted_r = np.zeros_like(rewards) 308 | running_add = final_value 309 | for t in reversed(range(0, len(rewards))): 310 | running_add = running_add * self.reward_gamma + rewards[t] 311 | discounted_r[t] = running_add 312 | return discounted_r 313 | 314 | -------------------------------------------------------------------------------- /MADDPG.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.optim import Adam, RMSprop 4 | 5 | import numpy as np 6 | from copy import deepcopy 7 | 8 | from utils import to_tensor_var 9 | from Model import ActorNetwork, CriticNetwork 10 | from Memory import ReplayMemory 11 | 12 | EVAL_EPISODES = 10 13 | 14 | 15 | class MADDPG(object): 16 | """ 17 | An agent learned with Deep Deterministic Policy Gradient using Actor-Critic framework 18 | - Actor takes state as input 19 | - Critic takes both state and action as input 20 | - Critic uses gradient temporal-difference learning 21 | """ 22 | def __init__(self, env, n_agents, state_dim, action_dim, action_lower_bound, action_higher_bound, 23 | memory_capacity=10000, max_steps=10000, target_tau=0.01, target_update_steps=500, 24 | reward_gamma=0.99, reward_scale=1., done_penalty=None, training_strategy="centralized", 25 | actor_output_act=torch.tanh, actor_lr=0.01, critic_lr=0.01, 26 | optimizer_type="adam", entropy_reg=0.01, max_grad_norm=None, batch_size=100, episodes_before_train=100, 27 | epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=100, use_cuda=False): 28 | 29 | self.n_agents = n_agents 30 | self.env = env 31 | self.state_dim = state_dim 32 | self.action_dim = action_dim 33 | self.action_lower_bound = action_lower_bound 34 | self.action_higher_bound = action_higher_bound 35 | 36 | self.env_state = env.reset() 37 | self.n_episodes = 0 38 | self.n_steps = 0 39 | self.max_steps = max_steps 40 | self.roll_out_n_steps = 1 41 | 42 | self.reward_gamma = reward_gamma 43 | self.reward_scale = reward_scale 44 | self.done_penalty = done_penalty 45 | 46 | self.memory = ReplayMemory(memory_capacity) 47 | self.actor_output_act = actor_output_act 48 | self.actor_lr = actor_lr 49 | self.critic_lr = critic_lr 50 | self.optimizer_type = optimizer_type 51 | self.entropy_reg = entropy_reg 52 | self.max_grad_norm = max_grad_norm 53 | self.batch_size = batch_size 54 | self.episodes_before_train = episodes_before_train 55 | 56 | # params for epsilon greedy 57 | self.epsilon_start = epsilon_start 58 | self.epsilon_end = epsilon_end 59 | self.epsilon_decay = epsilon_decay 60 | 61 | self.use_cuda = use_cuda and torch.cuda.is_available() 62 | 63 | self.target_tau = target_tau 64 | self.target_update_steps = target_update_steps 65 | 66 | assert training_strategy in ["cocurrent", "centralized"] 67 | self.training_strategy = training_strategy 68 | 69 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents 70 | if self.training_strategy == "cocurrent": 71 | self.critics = [CriticNetwork(self.state_dim, self.action_dim, 1)] * self.n_agents 72 | elif self.training_strategy == "centralized": 73 | critic_state_dim = self.n_agents * self.state_dim 74 | critic_action_dim = self.n_agents * self.action_dim 75 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents 76 | 77 | # to ensure target network and learning network has the same weights 78 | self.actors_target = deepcopy(self.actors) 79 | self.critics_target = deepcopy(self.critics) 80 | 81 | if optimizer_type == "adam": 82 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors] 83 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics] 84 | elif optimizer_type == "rmsprop": 85 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors] 86 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics] 87 | 88 | if self.use_cuda: 89 | for i in range(self.n_agents): 90 | self.actors[i].cuda() 91 | self.critics[i].cuda() 92 | self.actors_target[i].cuda() 93 | self.critics_target[i].cuda() 94 | 95 | self.eval_rewards = [] 96 | self.mean_rewards = [] 97 | self.episodes = [] 98 | self.mean_phi = [[] for n in range(self.n_agents)] 99 | self.eval_phi = [[] for n in range(self.n_agents)] 100 | self.mean_energy = [[] for n in range(self.n_agents)] 101 | self.eval_energy = [[] for n in range(self.n_agents)] 102 | self.mean_R_mine = [[] for n in range(self.n_agents)] 103 | self.eval_R_mine = [[] for n in range(self.n_agents)] 104 | self.mean_E_mine = [[] for n in range(self.n_agents)] 105 | self.eval_E_mine = [[] for n in range(self.n_agents)] 106 | self.agent_rewards = [[] for n in range(self.n_agents)] 107 | self.agent_mean_rewards = [[] for n in range(self.n_agents)] 108 | 109 | def interact(self): 110 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps): 111 | self.env_state = self.env.reset() 112 | self.n_steps = 0 113 | state = self.env_state 114 | action = self.exploration_action(state) 115 | 116 | next_state, reward, done, _, phi, energy, r_mine, e_mine = self.env.step(action) 117 | if done: 118 | if self.done_penalty is not None: 119 | reward = self.done_penalty 120 | next_state = np.zeros((self.n_agents, self.state_dim)) 121 | self.env_state = self.env.reset() 122 | self.n_episodes += 1 123 | self.episode_done = True 124 | else: 125 | self.env_state = next_state 126 | self.episode_done = False 127 | self.n_steps += 1 128 | 129 | # use actor_target to get next_action 130 | next_state_var = to_tensor_var([next_state], self.use_cuda) 131 | next_action = np.zeros((self.n_agents, self.action_dim)) 132 | for agent_id in range(self.n_agents): 133 | next_action_var = self.actors_target[agent_id](next_state_var[:,agent_id,:]) 134 | if self.use_cuda: 135 | next_action[agent_id] = next_action_var.data.cpu().numpy()[0] 136 | else: 137 | next_action[agent_id] = next_action_var.data.numpy()[0] 138 | 139 | self.eval_rewards.append(np.sum(reward)) 140 | for agent_id in range(self.n_agents): 141 | self.eval_phi[agent_id].append(phi[agent_id]) 142 | self.eval_energy[agent_id].append(energy[agent_id]) 143 | self.eval_R_mine[agent_id].append(r_mine[agent_id]) 144 | self.eval_E_mine[agent_id].append(e_mine[agent_id]) 145 | self.agent_rewards[agent_id].append(reward[agent_id]) 146 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0): 147 | mean_reward = np.mean(np.array(self.eval_rewards)) 148 | self.mean_rewards.append(mean_reward) 149 | for agent_id in range(self.n_agents): 150 | self.mean_phi[agent_id].append(np.mean(np.array(self.eval_phi[agent_id]))) 151 | self.mean_energy[agent_id].append(np.mean(np.array(self.eval_energy[agent_id]))) 152 | self.mean_R_mine[agent_id].append(np.mean(np.array(self.eval_R_mine[agent_id]))) 153 | self.mean_E_mine[agent_id].append(np.mean(np.array(self.eval_E_mine[agent_id]))) 154 | self.agent_mean_rewards[agent_id].append(np.mean(np.array(self.agent_rewards[agent_id]))) 155 | self.episodes.append(self.n_episodes+1) 156 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward) 157 | self.eval_rewards = [] 158 | self.agent_rewards = [[] for n in range(self.n_agents)] 159 | self.eval_phi = [[] for n in range(self.n_agents)] 160 | self.eval_energy = [[] for n in range(self.n_agents)] 161 | self.eval_R_mine = [[] for n in range(self.n_agents)] 162 | self.eval_E_mine = [[] for n in range(self.n_agents)] 163 | 164 | self.memory.push(state, action, reward, next_state, next_action, done) 165 | 166 | def _soft_update_target(self, target, source): 167 | for t, s in zip(target.parameters(), source.parameters()): 168 | t.data.copy_( 169 | (1. - self.target_tau) * t.data + self.target_tau * s.data) 170 | 171 | # train on a sample batch 172 | def train(self): 173 | # do not train until exploration is enough 174 | if self.n_episodes <= self.episodes_before_train: 175 | pass 176 | 177 | batch = self.memory.sample(self.batch_size) 178 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim) 179 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim) 180 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1) 181 | next_states_var = to_tensor_var(batch.next_states, self.use_cuda).view(-1, self.n_agents, self.state_dim) 182 | next_actions_var = to_tensor_var(batch.next_actions, self.use_cuda).view(-1, self.n_agents, self.action_dim) 183 | dones_var = to_tensor_var(batch.dones, self.use_cuda).view(-1, 1) 184 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim) 185 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim) 186 | whole_next_states_var = next_states_var.view(-1, self.n_agents*self.state_dim) 187 | whole_next_actions_var = next_actions_var.view(-1, self.n_agents*self.action_dim) 188 | 189 | 190 | for agent_id in range(self.n_agents): 191 | # estimate the target q with actor_target network and critic_target network 192 | #next_q (centralized) 193 | next_q = self.critics_target[agent_id](whole_next_states_var, whole_next_actions_var).detach() 194 | 195 | target_q = self.reward_scale * rewards_var[:,agent_id,:] + self.reward_gamma * next_q * (1. - dones_var) 196 | 197 | # update critic network 198 | 199 | # current Q values (centralized) 200 | current_q = self.critics[agent_id](whole_states_var, whole_actions_var).detach() 201 | 202 | # rewards is target Q values 203 | critic_loss = nn.MSELoss()(current_q, target_q) 204 | critic_loss.requires_grad_(True) 205 | self.critics_optimizer[agent_id].zero_grad() 206 | critic_loss.backward() 207 | 208 | if self.max_grad_norm is not None: 209 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm) 210 | self.critics_optimizer[agent_id].step() 211 | 212 | # update actor network 213 | 214 | # the accurate action prediction 215 | action = self.actors[agent_id](states_var[:,agent_id,:]) 216 | # actor_loss is used to maximize the Q value for the predicted action 217 | actor_loss = - self.critics[agent_id](whole_states_var, whole_actions_var).detach() 218 | actor_loss = actor_loss.mean() 219 | actor_loss.requires_grad_(True) 220 | self.actors_optimizer[agent_id].zero_grad() 221 | actor_loss.backward() 222 | 223 | if self.max_grad_norm is not None: 224 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm) 225 | self.actors_optimizer[agent_id].step() 226 | 227 | # update actor target network and critic target network 228 | if self.n_steps % self.target_update_steps == 0 and self.n_steps > 0: 229 | self._soft_update_target(self.critics_target[agent_id], self.critics[agent_id]) 230 | self._soft_update_target(self.actors_target[agent_id], self.actors[agent_id]) 231 | 232 | def getactionbound(self, a, b, x, i): 233 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \ 234 | + self.action_lower_bound[i] 235 | return x 236 | 237 | # choose an action based on state with random noise added for exploration in training 238 | def exploration_action(self, state): 239 | state_var = to_tensor_var([state], self.use_cuda) 240 | action = np.zeros((self.n_agents, self.action_dim)) 241 | for agent_id in range(self.n_agents): 242 | action_var = self.actors[agent_id](state_var[:,agent_id,:]) 243 | if self.use_cuda: 244 | action[agent_id] = action_var.data.cpu().numpy()[0] 245 | else: 246 | action[agent_id] = action_var.data.numpy()[0] 247 | 248 | epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ 249 | np.exp(-1. * self.n_episodes / self.epsilon_decay) 250 | # add noise 251 | noise = np.random.randn(self.n_agents, self.action_dim) * epsilon 252 | action += noise 253 | 254 | for n in range(self.n_agents): 255 | for i in range(6): 256 | if action[n][i] < -1: 257 | action[n][i] = -1 258 | if action[n][i] > 1: 259 | action[n][i] = 1 260 | #get bounded to action_bound 261 | b = 1 262 | a = -b 263 | if self.action_dim > 6: 264 | print("Wrong!") 265 | for n in range(self.n_agents): 266 | action[n][0] = 0 if action[n][0] <= 0 else 1 267 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1)) 268 | action[n][2] = self.getactionbound(a, b, action[n][2], 2) 269 | action[n][3] = self.getactionbound(a, b, action[n][3], 3) 270 | action[n][4] = self.getactionbound(a, b, action[n][4], 4) 271 | action[n][5] = self.getactionbound(a, b, action[n][5], 5) 272 | return action 273 | 274 | 275 | # choose an action based on state for execution 276 | def action(self, state): 277 | state_var = to_tensor_var([state], self.use_cuda) 278 | action = np.zeros((self.n_agents, self.action_dim)) 279 | for agent_id in range(self.n_agents): 280 | action_var = self.actors[agent_id](state_var[:,agent_id,:]) 281 | if self.use_cuda: 282 | action[agent_id] = action_var.data.cpu().numpy()[0] 283 | else: 284 | action[agent_id] = action_var.data.numpy()[0] 285 | 286 | #get bounded to action_bound 287 | b = 1 288 | a = -b 289 | if self.action_dim > 6: 290 | print("Wrong!") 291 | for n in range(self.n_agents): 292 | action[n][0] = 0 if action[n][0] <= 0 else 1 293 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1)) 294 | action[n][2] = self.getactionbound(a, b, action[n][2], 2) 295 | action[n][3] = self.getactionbound(a, b, action[n][3], 3) 296 | action[n][4] = self.getactionbound(a, b, action[n][4], 4) 297 | action[n][5] = self.getactionbound(a, b, action[n][5], 5) 298 | 299 | return action 300 | 301 | -------------------------------------------------------------------------------- /MAPPO.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch import nn 4 | from torch.optim import Adam, RMSprop 5 | from math import exp 6 | 7 | import numpy as np 8 | from copy import deepcopy 9 | 10 | from Model import ActorNetwork, CriticNetwork 11 | from utils import to_tensor_var 12 | from Memory import ReplayMemory 13 | 14 | EVAL_EPISODES = 10 15 | 16 | class MAPPO(object): 17 | """ 18 | An agent learned with PPO using Advantage Actor-Critic framework 19 | - Actor takes state as input 20 | - Critic takes both state and action as input 21 | - agent interact with environment to collect experience 22 | - agent training with experience to update policy 23 | - adam seems better than rmsprop for ppo 24 | """ 25 | def __init__(self, env, state_dim, action_dim, n_agents, action_lower_bound, action_higher_bound, 26 | noise=0, tau=300, 27 | memory_capacity=10, max_steps=None, 28 | roll_out_n_steps=10, target_tau=1.0, 29 | target_update_steps=5, clip_param=0.2, 30 | reward_gamma=0.99, reward_scale=1., 31 | actor_output_act=nn.functional.softmax, critic_loss="mse", 32 | actor_lr=0.01, critic_lr=0.01, 33 | optimizer_type="adam", entropy_reg=0.00, 34 | max_grad_norm=None, batch_size=10, episodes_before_train=0, 35 | use_cuda=False): 36 | 37 | 38 | self.env = env 39 | self.state_dim = state_dim 40 | self.action_dim = action_dim 41 | self.env_state = self.env.reset() 42 | self.n_episodes = 0 43 | self.n_steps = 0 44 | self.max_steps = max_steps 45 | self.n_agents = n_agents 46 | 47 | self.reward_gamma = reward_gamma 48 | self.reward_scale = reward_scale 49 | 50 | self.action_lower_bound = action_lower_bound 51 | self.action_higher_bound = action_higher_bound 52 | 53 | self.memory = ReplayMemory(memory_capacity) 54 | 55 | self.actor_output_act = actor_output_act 56 | self.critic_loss = critic_loss 57 | self.actor_lr = actor_lr 58 | self.critic_lr = critic_lr 59 | self.optimizer_type = optimizer_type 60 | self.entropy_reg = entropy_reg 61 | self.max_grad_norm = max_grad_norm 62 | self.batch_size = batch_size 63 | self.episodes_before_train = episodes_before_train 64 | self.noise = noise 65 | self.tau = tau 66 | 67 | self.use_cuda = use_cuda and th.cuda.is_available() 68 | 69 | self.roll_out_n_steps = roll_out_n_steps 70 | self.target_tau = target_tau 71 | self.target_update_steps = target_update_steps 72 | self.clip_param = clip_param 73 | 74 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents 75 | critic_state_dim = self.n_agents * self.state_dim 76 | critic_action_dim = self.n_agents * self.action_dim 77 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents 78 | # to ensure target network and learning network has the same weights 79 | self.actors_target = deepcopy(self.actors) 80 | self.critics_target = deepcopy(self.critics) 81 | 82 | if optimizer_type == "adam": 83 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors] 84 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics] 85 | elif optimizer_type == "rmsprop": 86 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors] 87 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics] 88 | 89 | if self.use_cuda: 90 | for a in self.actors: 91 | a.cuda() 92 | for c in self.critics: 93 | c.cuda() 94 | self.eval_rewards = [] 95 | self.mean_rewards = [] 96 | self.episodes = [] 97 | self.eval_phi = [] 98 | self.mean_phi = [] 99 | 100 | # agent interact with the environment to collect experience 101 | def interact(self): 102 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps): 103 | self.env_state = self.env.reset() 104 | self.n_steps = 0 105 | states = [] 106 | actions = [] 107 | rewards = [] 108 | # take n steps 109 | for i in range(self.roll_out_n_steps): 110 | states.append(self.env_state) 111 | action = self.choose_action(self.env_state) 112 | next_state, reward, done, _, phi = self.env.step(action) 113 | # done = done[0] 114 | actions.append(action) 115 | rewards.append(reward) 116 | final_state = next_state 117 | self.env_state = next_state 118 | if done: 119 | self.env_state = self.env.reset() 120 | break 121 | # discount reward 122 | if done: 123 | final_r = [0.0] * self.n_agents 124 | self.n_episodes += 1 125 | self.episode_done = True 126 | else: 127 | self.episode_done = False 128 | final_action = self.choose_action(final_state) 129 | final_r = self.value(final_state, final_action) 130 | 131 | rewards = np.array(rewards) 132 | for agent_id in range(self.n_agents): 133 | rewards[:,agent_id] = self._discount_reward(rewards[:,agent_id], final_r[agent_id]) 134 | rewards = rewards.tolist() 135 | self.n_steps += 1 136 | 137 | self.eval_rewards.append(np.sum(reward)) 138 | self.eval_phi.append(np.sum(phi)) 139 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0): 140 | mean_reward = np.mean(np.array(self.eval_rewards)) 141 | self.mean_rewards.append(mean_reward) 142 | self.mean_phi.append(np.mean(np.array(self.eval_phi))) 143 | self.episodes.append(self.n_episodes+1) 144 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward) 145 | self.eval_rewards = [] 146 | self.eval_phi = [] 147 | 148 | self.memory.push(states, actions, rewards) 149 | 150 | # train on a roll out batch 151 | def train(self): 152 | if self.n_episodes <= self.episodes_before_train: 153 | pass 154 | 155 | batch = self.memory.sample(self.batch_size) 156 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim) 157 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim) 158 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1) 159 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim) 160 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim) 161 | 162 | for agent_id in range(self.n_agents): 163 | # update actor network 164 | self.actors_optimizer[agent_id].zero_grad() 165 | values = self.critics[agent_id](whole_states_var, whole_actions_var).detach() 166 | advantages = rewards_var[:,agent_id,:] - values 167 | # # normalizing advantages seems not working correctly here 168 | # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5) 169 | action_log_probs = self.actors[agent_id](states_var[:,agent_id,:]).detach() 170 | action_log_probs = th.sum(action_log_probs * actions_var[:,agent_id,:], 1) 171 | old_action_log_probs = self.actors_target[agent_id](states_var[:,agent_id,:]).detach() 172 | old_action_log_probs = th.sum(old_action_log_probs * actions_var[:,agent_id,:], 1) 173 | ratio = th.exp(action_log_probs - old_action_log_probs) 174 | surr1 = ratio * advantages 175 | surr2 = th.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * advantages 176 | # PPO's pessimistic surrogate (L^CLIP) 177 | actor_loss = -th.mean(th.min(surr1, surr2)) 178 | actor_loss.requires_grad_(True) 179 | actor_loss.backward() 180 | if self.max_grad_norm is not None: 181 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm) 182 | self.actors_optimizer[agent_id].step() 183 | 184 | # update critic network 185 | self.critics_optimizer[agent_id].zero_grad() 186 | target_values = rewards_var[:,agent_id,:] 187 | # if self.critic_loss == "huber": 188 | # critic_loss = nn.functional.smooth_l1_loss(values, target_values) 189 | # else: 190 | # critic_loss = nn.MSELoss()(values, target_values) 191 | critic_loss = 0.5 * (values - target_values).pow(2).mean() 192 | critic_loss.requires_grad_(True) 193 | critic_loss.backward() 194 | if self.max_grad_norm is not None: 195 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm) 196 | self.critics_optimizer[agent_id].step() 197 | 198 | # update actor target network and critic target network 199 | if self.n_steps % self.target_update_steps == 0 and self.n_steps > 0: 200 | self._soft_update_target(self.actors_target[agent_id], self.actors[agent_id]) 201 | self._soft_update_target(self.critics_target[agent_id], self.critics[agent_id]) 202 | 203 | 204 | def _soft_update_target(self, target, source): 205 | for t, s in zip(target.parameters(), source.parameters()): 206 | t.data.copy_( 207 | (1. - self.target_tau) * t.data + self.target_tau * s.data) 208 | 209 | def getactionbound(self, a, b, x, i): 210 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \ 211 | + self.action_lower_bound[i] 212 | return x 213 | 214 | def choose_action(self, state): 215 | state_var = to_tensor_var([state], self.use_cuda) 216 | action = np.zeros((self.n_agents, self.action_dim)) 217 | 218 | for agent_id in range(self.n_agents): 219 | action_var = (self.actors[agent_id](state_var[:,agent_id,:])) 220 | if self.use_cuda: 221 | action[agent_id] = action_var.data.cpu().numpy()[0] 222 | else: 223 | action[agent_id] = action_var.data.numpy()[0] 224 | 225 | for n in range(self.n_agents): 226 | for i in range(6): 227 | if (self.n_episodes < 600): e = self.n_episodes 228 | else: e = self.n_episodes 229 | action[n][i] = -exp(-e/self.tau) + self.noise 230 | b = 1 231 | a = -1 232 | if self.action_dim > 6: 233 | print("Wrong!") 234 | for n in range(self.n_agents): 235 | action[n][0] = 0 if action[n][0] <= 0 else 1 236 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1)) 237 | action[n][2] = self.getactionbound(a, b, action[n][2], 2) 238 | action[n][3] = self.getactionbound(a, b, action[n][3], 3) 239 | action[n][4] = self.getactionbound(a, b, action[n][4], 4) 240 | action[n][5] = self.getactionbound(a, b, action[n][5], 5) 241 | 242 | 243 | return action 244 | 245 | 246 | # evaluate value for a state-action pair 247 | def value(self, state, action): 248 | state_var = to_tensor_var([state], self.use_cuda) 249 | action_var = to_tensor_var([action], self.use_cuda) 250 | whole_state_var = state_var.view(-1, self.n_agents*self.state_dim) 251 | whole_action_var = action_var.view(-1, self.n_agents*self.action_dim) 252 | values = np.zeros(self.n_agents) 253 | for agent_id in range(self.n_agents): 254 | value_var = self.critics[agent_id](whole_state_var, whole_action_var) 255 | if self.use_cuda: 256 | values[agent_id] = value_var.data.cpu().numpy()[0] 257 | else: 258 | values[agent_id] = value_var.data.numpy()[0] 259 | return values 260 | 261 | def _discount_reward(self, rewards, final_value): 262 | discounted_r = np.zeros_like(rewards) 263 | running_add = final_value 264 | for t in reversed(range(0, len(rewards))): 265 | running_add = running_add * self.reward_gamma + rewards[t] 266 | discounted_r[t] = running_add 267 | return discounted_r 268 | 269 | -------------------------------------------------------------------------------- /Memory.py: -------------------------------------------------------------------------------- 1 | 2 | import random 3 | from collections import namedtuple 4 | 5 | 6 | Experience = namedtuple("Experience", 7 | ("states", "actions", "rewards", "next_states", "next_actions", "dones")) 8 | 9 | 10 | class ReplayMemory(object): 11 | """ 12 | Replay memory buffer 13 | """ 14 | def __init__(self, capacity): 15 | self.capacity = capacity 16 | self.memory = [] 17 | self.position = 0 18 | 19 | def _push_one(self, state, action, reward, next_state=None, next_action=None, done=None): 20 | if len(self.memory) < self.capacity: 21 | self.memory.append(None) 22 | self.memory[self.position] = Experience(state, action, reward, next_state, next_action, done) 23 | self.position = (self.position + 1) % self.capacity 24 | 25 | # def push(self, states, actions, rewards, next_states=None, dones=None): 26 | 27 | 28 | # # print("states = ", states) 29 | # # print("actions = ", actions) 30 | # # print("rewards = ", rewards) 31 | # # print("next_states = ", next_states) 32 | 33 | # # print("dones = ", dones) 34 | # if isinstance(states, list): 35 | # if next_states is not None and len(next_states) > 0: 36 | # self._push_one(states, actions, rewards, next_states, dones) 37 | # else: 38 | # self._push_one(states, actions, rewards) 39 | # else: 40 | # self._push_one(states, actions, rewards, next_states, dones) 41 | 42 | def push(self, states, actions, rewards, next_states=None, next_actions=None, dones=None): 43 | 44 | if isinstance(states, list): 45 | if dones is not None and len(next_states) > 0: 46 | for s,a,r,n_s, n_a, d in zip(states, actions, rewards, next_states, next_actions, dones): 47 | self._push_one(s, a, r, n_s, n_a, d) 48 | elif next_states is not None: 49 | for s,a,r, n_s, n_a in zip(states, actions, rewards, next_states, next_actions): 50 | self._push_one(s, a, r, n_s, n_a) 51 | else: 52 | for s,a,r in zip(states, actions, rewards): 53 | self._push_one(s, a, r) 54 | else: 55 | self._push_one(states, actions, rewards, next_states, next_actions, dones) 56 | 57 | 58 | def sample(self, batch_size): 59 | if batch_size > len(self.memory): 60 | batch_size = len(self.memory) 61 | transitions = random.sample(self.memory, batch_size) 62 | batch = Experience(*zip(*transitions)) 63 | return batch 64 | 65 | def __len__(self): 66 | return len(self.memory) 67 | -------------------------------------------------------------------------------- /Model.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch import nn 4 | 5 | NUMBER = 4 6 | 7 | class ActorNetwork(nn.Module): 8 | """ 9 | A network for actor 10 | """ 11 | def __init__(self, state_dim, output_size, output_act, init_w =3e-3): 12 | super(ActorNetwork, self).__init__() 13 | self.fc1 = nn.Linear(state_dim, 64) 14 | self.fc2 = nn.Linear(64, 128) 15 | self.fc3 = nn.Linear(128, output_size) 16 | 17 | self.fc3.weight.data.uniform_(-init_w, init_w) 18 | self.fc3.bias.data.uniform_(-init_w, init_w) 19 | # activation function for the output 20 | self.output_act = output_act 21 | 22 | def __call__(self, state): 23 | out = nn.functional.relu(self.fc1(state)) 24 | out = nn.functional.relu(self.fc2(out)) 25 | if self.output_act == nn.functional.softmax: 26 | out = self.output_act(self.fc3(out), dim=-1) 27 | else: 28 | out = self.output_act(self.fc3(out)) 29 | return out 30 | 31 | 32 | class CriticNetwork(nn.Module): 33 | """ 34 | A network for critic 35 | """ 36 | def __init__(self, state_dim, action_dim, output_size=1, init_w =3e-3): 37 | super(CriticNetwork, self).__init__() 38 | self.fc1 = nn.Linear(state_dim + action_dim, 64) 39 | self.fc2 = nn.Linear(64, 128) 40 | self.fc3 = nn.Linear(128, output_size) 41 | 42 | self.fc3.weight.data.uniform_(-init_w, init_w) 43 | self.fc3.bias.data.uniform_(-init_w, init_w) 44 | 45 | def __call__(self, state, action): 46 | out = th.cat([state, action], 1) 47 | out = nn.functional.relu(self.fc1(out)) 48 | out = nn.functional.relu(self.fc2(out)) 49 | out = self.fc3(out) 50 | return out 51 | 52 | 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Reinforcement Learning for Mobile Edge Computing 2 | 3 | This is a repository containing Python code and a corresponding article on the topic of mobile edge computing (MEC) and its optimization using deep reinforcement learning (DRL) techniques. The article discusses the challenges of designing an efficient task-offloading strategy for the whole MEC system, and proposes the use of multi-agent DRL to support smart task offloading in a MEC network. 4 | 5 | Specifically, the project simplifies the MEC problem as video task processing and applies three different DRL methods based on Actor-Critic structure: Multi-Agent Advantage Actor-Critic (MAA2C), Multi-Agent Proximal Policy Optimization (MAPPO), and Multi-Agent Deep Deterministic Policy Gradient (MADDPG). The reward function for different environment parameters is compared, as well as the final results. 6 | 7 | Article: https://github.com/XinyaoQiu/DRL-for-edge-computing/blob/master/DRL-for-edge-computing.pdf 8 | 9 | Presentation: https://github.com/XinyaoQiu/DRL-for-edge-computing/blob/master/DRL-presentation.pdf 10 | 11 | ## Authors 12 | 13 | - Xinyao Qiu 14 | - Yuqi Mai 15 | 16 | ## Motivation 17 | 18 | Mobile edge computing (MEC) is a promising technology that can improve the computing experience of electronic devices by offloading computation-based tasks to MEC servers located near the cloud servers. However, designing an efficient task-offloading strategy for the whole MEC system is not easy. Recently, many edge task offloading schemes have been proposed, but most of them consider single-agent offloading scenarios using traditional convex optimization tools. Deep reinforcement learning (DRL) techniques, such as deep Q-learning (DQN), have emerged as a promising alternative by modeling offloading problems as Markov decision processes (MDP) using deep neural networks (DNN) for function approximation. However, these efforts only use a single agent to handle the entire offloading process and do not work well in a large-scale distributed MEC environment. An interesting alternative is to use a multi-agent DRL (MA-DRL) to support smart task offloading in a MEC network. 19 | 20 | ## Getting Started 21 | 22 | To get started with this project, you can clone the repository and run the Python code on your machine. You will need to have Python 3 and the following packages installed: 23 | 24 | - Tensorflow 25 | - PyTorch 26 | - Keras 27 | - OpenAI Gym 28 | 29 | You can install these packages using pip: 30 | 31 | ```python 32 | pip install tensorflow keras gym torch 33 | ``` 34 | 35 | ## Usage 36 | 37 | The main code files in this repository are: 38 | 39 | - `maa2c.py`: Implements the Multi-Agent Advantage Actor-Critic (MAA2C) algorithm. 40 | - `mappo.py`: Implements the Multi-Agent Proximal Policy Optimization (MAPPO) algorithm. 41 | - `maddpg.py`: Implements the Multi-Agent Deep Deterministic Policy Gradient (DDPG) algorithm. 42 | - `env.py`: Defines the MEC environment and its reward function. 43 | - `train.py`: Trains the agents using the specified DRL algorithm and environment parameters. 44 | - `evaluate.py`: Evaluates the trained agents on the environment. 45 | 46 | To train the agents, run `train.py` with the desired algorithm and environment parameters: 47 | 48 | ``` 49 | python train.py --algorithm maa2c --env-params env_params.json 50 | ``` 51 | 52 | To evaluate the trained agents, run `evaluate.py` with the same algorithm and environment parameters: 53 | 54 | ``` 55 | python evaluate.py --algorithm maa2c --env-params env_params.json 56 | ``` 57 | 58 | ## References 59 | 60 | 1. X. Xiong, K. Zheng, L. Lei, and L. Hou, “Resource allocation based on deep reinforcement learning in iot edge computing,” IEEE J. Sel. Areas Commun., vol. 38, no. 6, pp. 1133–1146, 2020. 61 | 2. D. Nguyen, M. Ding, P. Pathirana, A. Seneviratne, J. Li, and V. Poor, “Cooperative task offloading and block mining in blockchain-based edge computing with multi-agent deep reinforcement learning,” IEEE Transactions on Mobile Computing, pp. 1–1, 2021. 62 | 3. A. Barto, R. Sutton, and C. Anderson, “Neuron like elements that can solve difficult learning control problems,” IEEE Transactions on Systems, Man, & Cybernetics, pp. 1–1, 1983. 63 | 4. Openai, “Openai baselines: Acktr a2c,” . 64 | 5. J. Schulman, F. Wolski, P. Dhariwal, A. Radford, and O. Klimov, “Proximal policy optimization algorithms,” 2017. 65 | 6. T. P. Lillicrap, J. J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, and D. Wierstra, “Continuous control with deep reinforcement learning,” 2016. 66 | 7. B. Yang, X. Cao, J. Bassey, X. Li, and L. Qian, “Computation offloading in multi-access edge computing: A multi-task learning approach,” IEEE Trans. Mob Comupt., pp. 1–1, 2021, doi:10.1109/TMC.2020.2990630. 67 | 8. Z. Shou, X. Lin, Y. Kalantidis, L. Sevilla-Lara, M. Rohrbach, S.-F. Chang, and Z. Yan, “DMC-Net: Generating discriminative motion cues for fast compressed video action recognition,” in Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, 2019, pp. 1268–1277. 68 | 69 | ## License 70 | 71 | This project is licensed under the MIT License - see the LICENSE file for details. 72 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 5.1.x | :white_check_mark: | 11 | | 5.0.x | :x: | 12 | | 4.0.x | :white_check_mark: | 13 | | < 4.0 | :x: | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | Use this section to tell people how to report a vulnerability. 18 | 19 | Tell them where to go, how often they can expect to get an update on a 20 | reported vulnerability, what to expect if the vulnerability is accepted or 21 | declined, etc. 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__init__.py -------------------------------------------------------------------------------- /__pycache__/Agent.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Agent.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/Agent.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Agent.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/MAA2C.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAA2C.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/MAA2C.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAA2C.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/MADDPG.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/MADDPG.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/MADDPG.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/MAPPO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAPPO.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/MAPPO.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAPPO.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/Memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/Memory.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/Memory.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/Model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/Model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/Model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/ddpg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/ddpg.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/env.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/env.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/env.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/env_1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env_1.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/run_ddpg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/run_ddpg.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | LAMBDA_E = 0.6 4 | LAMBDA_PHI = 0.4 5 | 6 | MU_1 = 0.6 7 | MU_2 = 0.4 8 | 9 | K_CHANNEL = 4 10 | 11 | MIN_SIZE = 0.2 12 | MAX_SIZE = 50 13 | 14 | MIN_CYCLE = 0.05 15 | MAX_CYCLE = 2 16 | 17 | MIN_DDL = 0.4 18 | MAX_DDL = 2 19 | 20 | MIN_RES = 0.4 21 | MAX_RES = 2.3 22 | 23 | MIN_COM = 0.1 24 | MAX_COM = 1 25 | 26 | MAX_POWER = 24 27 | 28 | MAX_GAIN = 10 29 | MIN_GAIN = 5 30 | 31 | 32 | V_L = 0.125 33 | V_E = 0.13 34 | 35 | THETA_L = 1/1600 36 | THETA_E = 1/1700 37 | 38 | K_ENERGY_LOCAL = 0.8 * 10**(-3) #k = 0.8 * 10 ^(-27) * M * G^2# 39 | K_ENERGY_MEC = 0.7 * 10**(-3) 40 | 41 | NOISE_VARIANCE = 100 42 | 43 | OMEGA = 0.9*10**(-2) #w = 0.9*10*(-11) * G# 44 | 45 | CAPABILITY_E = 5 46 | 47 | MIN_EPSILON = 0.56 48 | MAX_EPSILON = 0.93 49 | 50 | KSI = 0.5 51 | LAMBDA = 0.5 52 | ALPHA = 0.5 53 | BETA = 10 54 | 55 | S_POWER = 20 56 | S_GAIN = 8 57 | S_SIZE = 8 58 | S_CYCLE = 1 59 | S_RESOLU = 0.6 60 | 61 | S_RES = 1.5 62 | S_COM = 0.6 63 | 64 | 65 | class MecBCEnv(object): 66 | def __init__(self, n_agents, S_DDL=1, S_EPSILON=0.86, W_BANDWIDTH=20, \ 67 | S_one_power=20, S_one_gamma=0.6, mode="normal"): 68 | 69 | 70 | self.state_size = 10 71 | self.action_size = 6 72 | self.n_agents = n_agents 73 | 74 | self.S_DDL = S_DDL 75 | self.S_EPSILON = S_EPSILON 76 | 77 | self.W_BANDWIDTH = W_BANDWIDTH 78 | self.S_one_power = S_one_power 79 | self.S_one_gamma = S_one_gamma 80 | 81 | # state 82 | self.S_channel = np.zeros(self.n_agents) 83 | self.S_power = np.zeros(self.n_agents) 84 | self.S_gain = np.zeros(self.n_agents) 85 | self.S_size = np.zeros(self.n_agents) 86 | self.S_cycle = np.zeros(self.n_agents) 87 | self.S_resolu = np.zeros(self.n_agents) 88 | self.S_ddl = np.zeros(self.n_agents) 89 | self.S_res = np.zeros(self.n_agents) 90 | self.S_com = np.zeros(self.n_agents) 91 | self.S_epsilon = np.zeros(self.n_agents) 92 | self.mode = mode 93 | 94 | self.action_lower_bound = [0, 0, 0.01, MIN_RES, MIN_COM, 1] 95 | self.action_higher_bound = [1, K_CHANNEL, 0.99, MAX_RES, MAX_COM, MAX_POWER] 96 | 97 | 98 | 99 | self.epoch = 0 100 | 101 | # 重置 102 | def reset(self): 103 | self.epoch = 0 104 | #随机state 105 | for n in range(self.n_agents): 106 | self.S_channel[n] = 1 107 | self.S_power[n] = np.random.normal(S_POWER, 1) 108 | self.S_gain[n] = np.random.normal(S_GAIN, 1) 109 | self.S_size[n] = np.random.normal(S_SIZE, 1) 110 | self.S_cycle[n] = np.random.normal(S_CYCLE, 0.1) 111 | self.S_resolu[n] = np.random.normal(S_RESOLU, 0.1) 112 | self.S_ddl[n] = np.random.normal(self.S_DDL, 0.1) 113 | self.S_res[n] = np.random.normal(S_RES, 0.1) 114 | self.S_com[n] = np.random.normal(S_COM, 0.1) 115 | self.S_epsilon[n] = np.random.normal(self.S_EPSILON, 0.1) 116 | 117 | self.S_power[0] = np.random.normal(self.S_one_power, 1) 118 | self.S_com[0] = np.random.normal(self.S_one_gamma, 0.1) 119 | 120 | State_ = [] 121 | State_ = [[self.S_channel[n], self.S_power[n], self.S_gain[n], self.S_size[n], self.S_cycle[n], \ 122 | self.S_resolu[n], self.S_ddl[n], self.S_res[n], self.S_com[n], self.S_epsilon[n]] for n in range(self.n_agents)] 123 | 124 | State_ = np.array(State_) 125 | 126 | 127 | 128 | return State_ 129 | 130 | 131 | def step(self, action): 132 | 133 | 134 | # action 135 | A_decision = np.zeros(self.n_agents) 136 | A_channel = np.zeros(self.n_agents) 137 | A_resolu = np.zeros(self.n_agents) 138 | A_res = np.zeros(self.n_agents) 139 | A_com = np.zeros(self.n_agents) 140 | A_power = np.zeros(self.n_agents) 141 | if self.mode == "normal": 142 | for n in range(self.n_agents): 143 | A_decision[n] = action[n][0] 144 | A_channel[n] = action[n][1] 145 | A_resolu[n] = action[n][2] 146 | A_res[n] = action[n][3] 147 | A_com[n] = action[n][4] 148 | A_power[n] = action[n][5] 149 | elif self.mode == "NAC": 150 | for n in range(self.n_agents): 151 | A_decision[n] = action[n][0] 152 | A_channel[n] = action[n][1] 153 | A_resolu[n] = 0.2 154 | A_res[n] = action[n][3] 155 | A_com[n] = action[n][4] 156 | A_power[n] = action[n][5] 157 | elif self.mode == "ALLES": 158 | for n in range(self.n_agents): 159 | A_decision[n] = 1 160 | A_channel[n] = action[n][1] 161 | A_resolu[n] = action[n][2] 162 | A_res[n] = action[n][3] 163 | A_com[n] = action[n][4] 164 | A_power[n] = action[n][5] 165 | else: 166 | print("Wrong!") 167 | 168 | 169 | S_channel = self.S_channel 170 | S_power = self.S_power 171 | S_gain = self.S_gain 172 | S_size = self.S_size 173 | S_cycle = self.S_cycle 174 | S_resolu = self.S_resolu 175 | S_ddl = self.S_ddl 176 | S_res = self.S_res 177 | S_com = self.S_com 178 | S_epsilon = self.S_epsilon 179 | 180 | # 根据S_task, S_channel调整A_decision 181 | for n in range(self.n_agents): 182 | for k in range(K_CHANNEL): 183 | if S_channel[n] == k and A_channel[n] == k: 184 | A_decision[n] = 0 185 | 186 | # 求reward 187 | x_n = np.zeros(self.n_agents) 188 | for n in range(self.n_agents): 189 | if S_channel[n] != 0: 190 | x_n[n] = 1 191 | else: 192 | x_n[n] = 0 193 | 194 | total_power = 0 195 | for n in range(self.n_agents): 196 | total_power += x_n[n] * S_power[n] * S_gain[n] 197 | 198 | 199 | Phi_local = V_L * np.log(1 + S_resolu / THETA_L) 200 | 201 | Phi_off = V_E * np.log(1 + S_resolu / THETA_E) 202 | 203 | Phi_n = (1 - x_n) * Phi_local + x_n * Phi_off 204 | 205 | 206 | Phi_penalty = np.maximum((S_epsilon - Phi_n) / S_epsilon, 0) 207 | 208 | 209 | total_com = np.sum(S_com) 210 | 211 | DataRate = self.W_BANDWIDTH * np.log(1 + S_power * S_gain / (NOISE_VARIANCE + \ 212 | total_power - x_n * S_power * S_gain)) / np.log(2) 213 | 214 | 215 | Time_proc = S_resolu * S_cycle / CAPABILITY_E 216 | 217 | Time_local = S_resolu * S_cycle / S_res 218 | 219 | Time_off = S_resolu * S_size / DataRate 220 | 221 | Time_n = (1 - x_n) * Time_local + x_n * (Time_off + Time_proc) 222 | 223 | total_com = np.sum(S_com) 224 | 225 | T_mean = np.mean(Time_n) 226 | 227 | R_mine = KSI * S_com / total_com * np.exp(-LAMBDA * T_mean / S_ddl) 228 | 229 | Time_penalty = np.maximum((Time_n - S_ddl) / Time_n, 0) 230 | 231 | Energy_local = K_ENERGY_LOCAL * S_size * S_resolu * (S_res**2) + OMEGA * S_com 232 | 233 | Energy_off = S_power * Time_off * 10**(-6) 234 | 235 | Energy_mine = OMEGA * S_com 236 | 237 | Energy_n = (1 - x_n) * Energy_local + x_n * Energy_off 238 | 239 | Reward_vt = LAMBDA_E * ((Energy_local - Energy_n) / Energy_local) - LAMBDA_PHI * ((Phi_local - Phi_n) / Phi_local) 240 | 241 | Utility_mine = R_mine - Energy_mine 242 | 243 | Reward = MU_1 * Reward_vt + MU_2 * Utility_mine - BETA * (Phi_penalty + Time_penalty) 244 | 245 | # print(np.sum(Reward), np.sum(Reward_mine), np.sum(Reward_vt), np.sum(Phi_penalty), np.sum(Time_penalty)) 246 | 247 | # 根据action改state 248 | for n in range(self.n_agents): 249 | if int(A_decision[n]): 250 | self.S_channel[n] = A_channel[n] 251 | self.S_resolu = A_resolu 252 | self.S_res = A_res 253 | self.S_com = A_com 254 | self.S_power = A_power 255 | 256 | State_ = [] 257 | State_ = [[self.S_channel[n], self.S_power[n], self.S_gain[n], self.S_size[n], self.S_cycle[n], \ 258 | self.S_resolu[n], self.S_ddl[n], self.S_res[n], self.S_com[n], self.S_epsilon[n]] for n in range(self.n_agents)] 259 | 260 | State_ = np.array(State_) 261 | 262 | self.epoch += 1 263 | done = False 264 | if self.epoch > 100: 265 | self.reset() 266 | done = True 267 | 268 | 269 | 270 | return State_, Reward, done, True, Phi_n, Energy_n, R_mine, Energy_mine 271 | 272 | 273 | 274 | -------------------------------------------------------------------------------- /excel/DDPG_A2C_PPO.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/DDPG_A2C_PPO.xls -------------------------------------------------------------------------------- /excel/Excel_a2c.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_a2c.xls -------------------------------------------------------------------------------- /excel/Excel_ddpg.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_ddpg.xls -------------------------------------------------------------------------------- /excel/Excel_ppo.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_ppo.xls -------------------------------------------------------------------------------- /excel/final.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/final.xls -------------------------------------------------------------------------------- /graphs/498/convergence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/498/convergence.png -------------------------------------------------------------------------------- /graphs/498/reward_vs_parameter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/498/reward_vs_parameter.png -------------------------------------------------------------------------------- /graphs/change agents/ALLES_change_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ALLES_change_agents.png -------------------------------------------------------------------------------- /graphs/change agents/NAC_change_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/NAC_change_agents.png -------------------------------------------------------------------------------- /graphs/change agents/a2c_change_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/a2c_change_agents.png -------------------------------------------------------------------------------- /graphs/change agents/ddpg_change_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ddpg_change_agents.png -------------------------------------------------------------------------------- /graphs/change agents/ppo_change_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ppo_change_agents.png -------------------------------------------------------------------------------- /graphs/change bandwidth/ALLES_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ALLES_change_bandwidth.png -------------------------------------------------------------------------------- /graphs/change bandwidth/NAC_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/NAC_change_bandwidth.png -------------------------------------------------------------------------------- /graphs/change bandwidth/a2c_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/a2c_change_bandwidth.png -------------------------------------------------------------------------------- /graphs/change bandwidth/ddpg_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ddpg_change_bandwidth.png -------------------------------------------------------------------------------- /graphs/change bandwidth/ppo_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ppo_change_bandwidth.png -------------------------------------------------------------------------------- /graphs/change ddl/ALLES_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ALLES_change_ddl.png -------------------------------------------------------------------------------- /graphs/change ddl/NAC_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/NAC_change_ddl.png -------------------------------------------------------------------------------- /graphs/change ddl/a2c_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/a2c_change_ddl.png -------------------------------------------------------------------------------- /graphs/change ddl/ddpg_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ddpg_change_ddl.png -------------------------------------------------------------------------------- /graphs/change ddl/ppo_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ppo_change_ddl.png -------------------------------------------------------------------------------- /graphs/change epsilon/ALLES_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ALLES_change_epsilon.png -------------------------------------------------------------------------------- /graphs/change epsilon/NAC_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/NAC_change_epsilon.png -------------------------------------------------------------------------------- /graphs/change epsilon/a2c_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/a2c_change_epsilon.png -------------------------------------------------------------------------------- /graphs/change epsilon/ddpg_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ddpg_change_epsilon.png -------------------------------------------------------------------------------- /graphs/change epsilon/ppo_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ppo_change_epsilon.png -------------------------------------------------------------------------------- /graphs/change one gamma/change one gamma e_mine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one gamma/change one gamma e_mine.png -------------------------------------------------------------------------------- /graphs/change one gamma/change one gamma r_mine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one gamma/change one gamma r_mine.png -------------------------------------------------------------------------------- /graphs/change one power/change one power energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one power/change one power energy.png -------------------------------------------------------------------------------- /graphs/change one power/change one power reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one power/change one power reward.png -------------------------------------------------------------------------------- /graphs/ddpg_change_lr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/ddpg_change_lr.png -------------------------------------------------------------------------------- /graphs/episodes_avg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/episodes_avg.png -------------------------------------------------------------------------------- /graphs/phi&energy/energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/phi&energy/energy.png -------------------------------------------------------------------------------- /graphs/phi&energy/phi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/phi&energy/phi.png -------------------------------------------------------------------------------- /graphs/reward_vs_parameters/reward_vs_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_agents.png -------------------------------------------------------------------------------- /graphs/reward_vs_parameters/reward_vs_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_bandwidth.png -------------------------------------------------------------------------------- /graphs/reward_vs_parameters/reward_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_ddl.png -------------------------------------------------------------------------------- /graphs/reward_vs_parameters/reward_vs_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_epsilon.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_10.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_11.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_12.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_13.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_14.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_15.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_16.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_17.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_18.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_19.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_20.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_8.png -------------------------------------------------------------------------------- /output/a2c_change_ddl_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_9.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_1.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_10.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_11.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_4.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_5.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_6.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_7.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_8.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_9.png -------------------------------------------------------------------------------- /output/change agents/a2c/a2c_change_agents_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_final.png -------------------------------------------------------------------------------- /output/change agents/ddpg/ddpg_change_agents_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_2.png -------------------------------------------------------------------------------- /output/change agents/ddpg/ddpg_change_agents_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_3.png -------------------------------------------------------------------------------- /output/change agents/ddpg/ddpg_change_agents_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_final.png -------------------------------------------------------------------------------- /output/change agents/ppo/ppo_change_agents_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_1.png -------------------------------------------------------------------------------- /output/change agents/ppo/ppo_change_agents_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_2.png -------------------------------------------------------------------------------- /output/change agents/ppo/ppo_change_agents_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_final.png -------------------------------------------------------------------------------- /output/change agents/ppo_change_agents_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_3.png -------------------------------------------------------------------------------- /output/change agents/ppo_change_agents_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_4.png -------------------------------------------------------------------------------- /output/change agents/ppo_change_agents_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_5.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth1.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth2.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_11.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_3.png -------------------------------------------------------------------------------- /output/change bandwidth/a2c/a2c_change_bandwidth_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_final.png -------------------------------------------------------------------------------- /output/change bandwidth/ddpg/ddpg_change_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth.png -------------------------------------------------------------------------------- /output/change bandwidth/ddpg/ddpg_change_bandwidth_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth_3.png -------------------------------------------------------------------------------- /output/change bandwidth/ddpg/ddpg_change_bandwidth_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth_final.png -------------------------------------------------------------------------------- /output/change bandwidth/ppo/ppo_change_bandwidth_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ppo/ppo_change_bandwidth_1.png -------------------------------------------------------------------------------- /output/change bandwidth/ppo/ppo_change_bandwidth_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ppo/ppo_change_bandwidth_final.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl1.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl2.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_2.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_3.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_4.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_5.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_6.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_7.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_9.png -------------------------------------------------------------------------------- /output/change ddl/a2c/a2c_change_ddl_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_final.png -------------------------------------------------------------------------------- /output/change ddl/ddpg/ddpg_change_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl.png -------------------------------------------------------------------------------- /output/change ddl/ddpg/ddpg_change_ddl_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_3.png -------------------------------------------------------------------------------- /output/change ddl/ddpg/ddpg_change_ddl_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_4.png -------------------------------------------------------------------------------- /output/change ddl/ddpg/ddpg_change_ddl_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_7.png -------------------------------------------------------------------------------- /output/change ddl/ppo/ppo_change_ddl_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_1.png -------------------------------------------------------------------------------- /output/change ddl/ppo/ppo_change_ddl_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_2.png -------------------------------------------------------------------------------- /output/change ddl/ppo/ppo_change_ddl_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_3.png -------------------------------------------------------------------------------- /output/change ddl/ppo/ppo_change_ddl_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_final.png -------------------------------------------------------------------------------- /output/change epsilon/a2c/a2c_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon.png -------------------------------------------------------------------------------- /output/change epsilon/a2c/a2c_change_epsilon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon1.png -------------------------------------------------------------------------------- /output/change epsilon/a2c/a2c_change_epsilon2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon2.png -------------------------------------------------------------------------------- /output/change epsilon/a2c/a2c_change_epsilon_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon_3.png -------------------------------------------------------------------------------- /output/change epsilon/ddpg/ddpg_change_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon.png -------------------------------------------------------------------------------- /output/change epsilon/ddpg/ddpg_change_epsilon_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon_3.png -------------------------------------------------------------------------------- /output/change epsilon/ddpg/ddpg_change_epsilon_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon_final.png -------------------------------------------------------------------------------- /output/change epsilon/ddpg_change_epsilon_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg_change_epsilon_4.png -------------------------------------------------------------------------------- /output/change epsilon/ppo/ppo_change_epsilon_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo/ppo_change_epsilon_1.png -------------------------------------------------------------------------------- /output/change epsilon/ppo/ppo_change_epsilon_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo/ppo_change_epsilon_final.png -------------------------------------------------------------------------------- /output/change epsilon/ppo_change_epsilon_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo_change_epsilon_2.png -------------------------------------------------------------------------------- /output/ddpg_change_ddl_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/ddpg_change_ddl_5.png -------------------------------------------------------------------------------- /output/ddpg_change_ddl_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/ddpg_change_ddl_7.png -------------------------------------------------------------------------------- /output/differ user/ddpg/phi_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/differ user/ddpg/phi_vs_ddl.png -------------------------------------------------------------------------------- /output/differ user/ddpg/reward_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/differ user/ddpg/reward_vs_ddl.png -------------------------------------------------------------------------------- /output/energy_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/energy_vs_ddl.png -------------------------------------------------------------------------------- /output/phi_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/phi_vs_ddl.png -------------------------------------------------------------------------------- /output/reward_vs_agents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_agents.png -------------------------------------------------------------------------------- /output/reward_vs_bandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_bandwidth.png -------------------------------------------------------------------------------- /output/reward_vs_ddl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_ddl.png -------------------------------------------------------------------------------- /output/reward_vs_epsilon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_epsilon.png -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import xlrd 3 | MAX_EPISODES = 2000 4 | EPISODES_BEFORE_TRAIN = 0 5 | 6 | rworkbook = xlrd.open_workbook("excel/final.xls") 7 | agents = [1, 2, 3, 4, 5, 6] 8 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agent") 9 | episodes_avg_ddpg = [sheet_ddpg.cell(202, i + 1).value for i in range(6)] 10 | sheet_a2c = rworkbook.sheet_by_name("a2c_agent") 11 | episodes_avg_a2c = [sheet_a2c.cell(202, i + 1).value for i in range(6)] 12 | sheet_ppo = rworkbook.sheet_by_name("ppo_agent") 13 | episodes_avg_ppo = [sheet_ppo.cell(202, i + 1).value for i in range(6)] 14 | plt.figure() 15 | plt.plot(agents, episodes_avg_ddpg, "*-") 16 | plt.plot(agents, episodes_avg_a2c, "*-") 17 | plt.plot(agents, episodes_avg_ppo, "*-") 18 | plt.xlabel("agents") 19 | plt.ylabel("average episodes") 20 | plt.legend(["MADDPG", "MAA2C", "MAPPO"]) 21 | plt.savefig("graphs/episodes_avg.png") 22 | 23 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls") 24 | one_power = [40, 60, 80, 100, 120, 140] 25 | sheet_ddpg = rworkbook.sheet_by_name("change_one_power_reward_3") 26 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)] 27 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)] 28 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)] 29 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)] 30 | plt.figure() 31 | plt.plot(one_power, episodes_avg_ddpg_1, "*-") 32 | plt.plot(one_power, episodes_avg_ddpg_2, "*-") 33 | plt.plot(one_power, episodes_avg_ddpg_3, "*-") 34 | plt.plot(one_power, episodes_avg_ddpg_4, "*-") 35 | plt.xlabel("change one power") 36 | plt.ylabel("reward") 37 | plt.legend(["user0", "user1", "user2", "user3"]) 38 | plt.savefig("graphs/change one power reward.png") 39 | 40 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls") 41 | one_power = [40, 60, 80, 100, 120, 140] 42 | sheet_ddpg = rworkbook.sheet_by_name("change_one_power_energy_3") 43 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)] 44 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)] 45 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)] 46 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)] 47 | plt.figure() 48 | plt.plot(one_power, episodes_avg_ddpg_1, "*-") 49 | plt.plot(one_power, episodes_avg_ddpg_2, "*-") 50 | plt.plot(one_power, episodes_avg_ddpg_3, "*-") 51 | plt.plot(one_power, episodes_avg_ddpg_4, "*-") 52 | plt.xlabel("change one power") 53 | plt.ylabel("reward") 54 | plt.legend(["user0", "user1", "user2", "user3"]) 55 | plt.savefig("graphs/change one power energy.png") 56 | 57 | 58 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls") 59 | one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8] 60 | sheet_ddpg = rworkbook.sheet_by_name("change_one_gamma_r_mine_5") 61 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)] 62 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)] 63 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)] 64 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)] 65 | plt.figure() 66 | plt.plot(one_gamma, episodes_avg_ddpg_1, "*-") 67 | plt.plot(one_gamma, episodes_avg_ddpg_2, "*-") 68 | plt.plot(one_gamma, episodes_avg_ddpg_3, "*-") 69 | plt.plot(one_gamma, episodes_avg_ddpg_4, "*-") 70 | plt.xlabel("change one gamma") 71 | plt.ylabel("r_mine") 72 | plt.legend(["user0", "user1", "user2", "user3"]) 73 | plt.savefig("graphs/change one gamma r_mine.png") 74 | 75 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls") 76 | one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8] 77 | sheet_ddpg = rworkbook.sheet_by_name("change_one_gamma_e_mine_5") 78 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)] 79 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)] 80 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)] 81 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)] 82 | plt.figure() 83 | plt.plot(one_gamma, episodes_avg_ddpg_1, "*-") 84 | plt.plot(one_gamma, episodes_avg_ddpg_2, "*-") 85 | plt.plot(one_gamma, episodes_avg_ddpg_3, "*-") 86 | plt.plot(one_gamma, episodes_avg_ddpg_4, "*-") 87 | plt.xlabel("change one gamma") 88 | plt.ylabel("e_mine") 89 | plt.legend(["user0", "user1", "user2", "user3"]) 90 | plt.savefig("graphs/change one gamma e_mine.png") -------------------------------------------------------------------------------- /plot_phi.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | 4 | import xlrd 5 | from xlutils.copy import copy as xl_copy 6 | 7 | MAX_EPISODES = 2000 8 | EPISODES_BEFORE_TRAIN = 0 9 | 10 | rworkbook = xlrd.open_workbook("excel/final.xls") 11 | sheet = rworkbook.sheet_by_name("energy") 12 | episodes = [sheet.cell(i+1, 0).value for i in range(200)] 13 | phi_1 = [sheet.cell(i+1, 1).value for i in range(200)] 14 | phi_2 = [sheet.cell(i+1, 2).value for i in range(200)] 15 | phi_3 = [sheet.cell(i+1, 3).value for i in range(200)] 16 | phi_4 = [sheet.cell(i+1, 4).value for i in range(200)] 17 | phi_avg1 = [sheet.cell(201, 1).value for i in range(200)] 18 | phi_avg2 = [sheet.cell(201, 2).value for i in range(200)] 19 | phi_avg3 = [sheet.cell(201, 3).value for i in range(200)] 20 | phi_avg4 = [sheet.cell(201, 4).value for i in range(200)] 21 | plt.plot(episodes, phi_1) 22 | plt.plot(episodes, phi_2) 23 | plt.plot(episodes, phi_3) 24 | plt.plot(episodes, phi_4) 25 | plt.plot(episodes, phi_avg1) 26 | plt.plot(episodes, phi_avg2) 27 | plt.plot(episodes, phi_avg3) 28 | plt.plot(episodes, phi_avg4) 29 | 30 | 31 | plt.savefig("output/energy_vs_ddl") 32 | plt.close() 33 | 34 | rworkbook = xlrd.open_workbook("excel/final.xls") 35 | sheet = rworkbook.sheet_by_name("phi") 36 | episodes = [sheet.cell(i+1, 0).value for i in range(200)] 37 | phi_1 = [sheet.cell(i+1, 1).value for i in range(200)] 38 | phi_2 = [sheet.cell(i+1, 2).value for i in range(200)] 39 | phi_3 = [sheet.cell(i+1, 3).value for i in range(200)] 40 | phi_4 = [sheet.cell(i+1, 4).value for i in range(200)] 41 | phi_avg1 = [sheet.cell(201, 1).value for i in range(200)] 42 | phi_avg2 = [sheet.cell(201, 2).value for i in range(200)] 43 | phi_avg3 = [sheet.cell(201, 3).value for i in range(200)] 44 | phi_avg4 = [sheet.cell(201, 4).value for i in range(200)] 45 | plt.plot(episodes, phi_1) 46 | plt.plot(episodes, phi_2) 47 | plt.plot(episodes, phi_3) 48 | plt.plot(episodes, phi_4) 49 | plt.plot(episodes, phi_avg1) 50 | plt.plot(episodes, phi_avg2) 51 | plt.plot(episodes, phi_avg3) 52 | plt.plot(episodes, phi_avg4) 53 | 54 | 55 | plt.savefig("output/phi_vs_ddl") -------------------------------------------------------------------------------- /plot_reward.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import xlrd 3 | 4 | MAX_EPISODES = 2000 5 | EPISODES_BEFORE_TRAIN = 0 6 | 7 | #plot reward_vs_ddl 8 | 9 | def plot_reward(parameter, sheet, model, paraname): 10 | episodes = [sheet.cell(i+1, 0).value for i in range(200)] 11 | reward0 = [sheet.cell(i+1, 1).value for i in range(200)] 12 | reward1 = [sheet.cell(i+1, 2).value for i in range(200)] 13 | reward2 = [sheet.cell(i+1, 3).value for i in range(200)] 14 | reward3 = [sheet.cell(i+1, 4).value for i in range(200)] 15 | reward4 = [sheet.cell(i+1, 5).value for i in range(200)] 16 | reward5 = [sheet.cell(i+1, 6).value for i in range(200)] 17 | plt.figure() 18 | plt.plot(episodes, reward0) 19 | plt.plot(episodes, reward1) 20 | plt.plot(episodes, reward2) 21 | plt.plot(episodes, reward3) 22 | plt.plot(episodes, reward4) 23 | plt.plot(episodes, reward5) 24 | plt.xlabel("episodes") 25 | plt.ylabel(model) 26 | plt.legend(["%s=%s"%(paraname, i) for i in parameter]) 27 | plt.savefig("graphs/change %s/%s_change_%s.png"%(paraname, model, paraname)) 28 | plt.close() 29 | 30 | rworkbook = xlrd.open_workbook("excel/final.xls") 31 | 32 | ddls = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1] 33 | sheet_a2c = rworkbook.sheet_by_name("a2c_ddl") 34 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_ddl") 35 | sheet_ppo = rworkbook.sheet_by_name("ppo_ddl") 36 | sheet_NAC = rworkbook.sheet_by_name("NAC_ddl") 37 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_ddl") 38 | 39 | plot_reward(ddls, sheet_a2c, "a2c", "ddl") 40 | plot_reward(ddls, sheet_ddpg, "ddpg", "ddl") 41 | plot_reward(ddls, sheet_ppo, "ppo", "ddl") 42 | plot_reward(ddls, sheet_NAC, "NAC", "ddl") 43 | plot_reward(ddls, sheet_ALLES, "ALLES", "ddl") 44 | 45 | agents = [1, 2, 3, 4, 5, 6] 46 | sheet_a2c = rworkbook.sheet_by_name("a2c_agents") 47 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agents") 48 | sheet_ppo = rworkbook.sheet_by_name("ppo_agents") 49 | sheet_NAC = rworkbook.sheet_by_name("NAC_agents") 50 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_agents") 51 | 52 | plot_reward(agents, sheet_a2c, "a2c", "agents") 53 | plot_reward(agents, sheet_ddpg, "ddpg", "agents") 54 | plot_reward(agents, sheet_ppo, "ppo", "agents") 55 | plot_reward(agents, sheet_NAC, "NAC", "agents") 56 | plot_reward(agents, sheet_ALLES, "ALLES", "agents") 57 | 58 | All_epsilon = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93] 59 | sheet_a2c = rworkbook.sheet_by_name("a2c_epsilon") 60 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_epsilon") 61 | sheet_ppo = rworkbook.sheet_by_name("ppo_epsilon") 62 | sheet_NAC = rworkbook.sheet_by_name("NAC_epsilon") 63 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_epsilon") 64 | 65 | plot_reward(All_epsilon, sheet_a2c, "a2c", "epsilon") 66 | plot_reward(All_epsilon, sheet_ddpg, "ddpg", "epsilon") 67 | plot_reward(All_epsilon, sheet_ppo, "ppo", "epsilon") 68 | plot_reward(All_epsilon, sheet_NAC, "NAC", "epsilon") 69 | plot_reward(All_epsilon, sheet_ALLES, "ALLES", "epsilon") 70 | 71 | All_bandwidth = [20, 40, 60, 80, 100, 120] 72 | sheet_a2c = rworkbook.sheet_by_name("a2c_bandwidth") 73 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_bandwidth") 74 | sheet_ppo = rworkbook.sheet_by_name("ppo_bandwidth") 75 | sheet_NAC = rworkbook.sheet_by_name("NAC_bandwidth") 76 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_bandwidth") 77 | 78 | plot_reward(All_bandwidth, sheet_a2c, "a2c", "bandwidth") 79 | plot_reward(All_bandwidth, sheet_ddpg, "ddpg", "bandwidth") 80 | plot_reward(All_bandwidth, sheet_ppo, "ppo", "bandwidth") 81 | plot_reward(All_bandwidth, sheet_NAC, "NAC", "bandwidth") 82 | plot_reward(All_bandwidth, sheet_ALLES, "ALLES", "bandwidth") 83 | 84 | -------------------------------------------------------------------------------- /plot_reward_avg.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import xlrd 3 | 4 | MAX_EPISODES = 2000 5 | EPISODES_BEFORE_TRAIN = 0 6 | 7 | #plot reward_vs_ddl 8 | rworkbook = xlrd.open_workbook("excel/final.xls") 9 | sheet_a2c = rworkbook.sheet_by_name("a2c_ddl") 10 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_ddl") 11 | sheet_ppo = rworkbook.sheet_by_name("ppo_ddl") 12 | sheet_NAC = rworkbook.sheet_by_name("NAC_ddl") 13 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_ddl") 14 | 15 | ddls = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1] 16 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)] 17 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)] 18 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)] 19 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)] 20 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)] 21 | 22 | plt.figure() 23 | plt.plot(ddls, reward_a2c) 24 | plt.plot(ddls, reward_ddpg) 25 | plt.plot(ddls, reward_ppo) 26 | plt.plot(ddls, reward_NAC) 27 | plt.plot(ddls, reward_ALLES) 28 | plt.xlabel("DDL") 29 | plt.ylabel("Reward") 30 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"]) 31 | plt.savefig("graphs/reward_vs_parameters/reward_vs_ddl.png") 32 | plt.close() 33 | 34 | #plot reward_vs_agents 35 | sheet_a2c = rworkbook.sheet_by_name("a2c_agents") 36 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agents") 37 | sheet_ppo = rworkbook.sheet_by_name("ppo_agents") 38 | sheet_NAC = rworkbook.sheet_by_name("NAC_agents") 39 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_agents") 40 | 41 | agents = [1, 2, 3, 4, 5, 6] 42 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)] 43 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)] 44 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)] 45 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)] 46 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)] 47 | 48 | plt.figure() 49 | plt.plot(agents, reward_a2c) 50 | plt.plot(agents, reward_ddpg) 51 | plt.plot(agents, reward_ppo) 52 | plt.plot(agents, reward_NAC) 53 | plt.plot(agents, reward_ALLES) 54 | plt.xlabel("Agents Number") 55 | plt.ylabel("Reward") 56 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"]) 57 | plt.savefig("graphs/reward_vs_parameters/reward_vs_agents.png") 58 | plt.close() 59 | 60 | #plot reward_vs_bandwidth 61 | sheet_a2c = rworkbook.sheet_by_name("a2c_bandwidth") 62 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_bandwidth") 63 | sheet_ppo = rworkbook.sheet_by_name("ppo_bandwidth") 64 | sheet_NAC = rworkbook.sheet_by_name("NAC_bandwidth") 65 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_bandwidth") 66 | 67 | bandwidths = [20, 40, 60, 80, 100, 120] 68 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)] 69 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)] 70 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)] 71 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)] 72 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)] 73 | 74 | plt.figure() 75 | plt.plot(bandwidths, reward_a2c) 76 | plt.plot(bandwidths, reward_ddpg) 77 | plt.plot(bandwidths, reward_ppo) 78 | plt.plot(bandwidths, reward_NAC) 79 | plt.plot(bandwidths, reward_ALLES) 80 | plt.xlabel("Bandwidth") 81 | plt.ylabel("Reward") 82 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"]) 83 | plt.savefig("graphs/reward_vs_parameters/reward_vs_bandwidth.png") 84 | plt.close() 85 | 86 | #plot reward_vs_epsilon 87 | sheet_a2c = rworkbook.sheet_by_name("a2c_epsilon") 88 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_epsilon") 89 | sheet_ppo = rworkbook.sheet_by_name("ppo_epsilon") 90 | sheet_NAC = rworkbook.sheet_by_name("NAC_epsilon") 91 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_epsilon") 92 | 93 | epsilons = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93] 94 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)] 95 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)] 96 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)] 97 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)] 98 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)] 99 | 100 | plt.figure() 101 | plt.plot(epsilons, reward_a2c) 102 | plt.plot(epsilons, reward_ddpg) 103 | plt.plot(epsilons, reward_ppo) 104 | plt.plot(epsilons, reward_NAC) 105 | plt.plot(epsilons, reward_ALLES) 106 | plt.xlabel("Epsilon") 107 | plt.ylabel("Reward") 108 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"]) 109 | plt.savefig("graphs/reward_vs_parameters/reward_vs_epsilon.png") 110 | plt.close() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.5.2 2 | numpy==1.22.4 3 | torch==1.11.0 4 | xlrd==2.0.1 5 | xlutils==2.0.0 6 | xlwt==1.3.0 -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from MAA2C import MAA2C 2 | from MADDPG import MADDPG 3 | from MAPPO import MAPPO 4 | from Model import NUMBER 5 | from env import MecBCEnv 6 | 7 | import matplotlib.pyplot as plt 8 | 9 | import xlrd 10 | from xlutils.copy import copy as xl_copy 11 | 12 | MAX_EPISODES = 2000 13 | EPISODES_BEFORE_TRAIN = 0 14 | 15 | 16 | def create_a2c(env, critic_lr=0.001, actor_lr=0.001): 17 | a2c = MAA2C(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 18 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 19 | critic_lr=critic_lr, actor_lr=actor_lr) 20 | while a2c.n_episodes < MAX_EPISODES: 21 | a2c.interact() 22 | if a2c.n_episodes >= EPISODES_BEFORE_TRAIN: 23 | a2c.train() 24 | return a2c 25 | 26 | def create_ddpg(env, critic_lr=0.001, actor_lr=0.001): 27 | ddpg = MADDPG(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 28 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 29 | critic_lr=critic_lr, actor_lr=actor_lr) 30 | while ddpg.n_episodes < MAX_EPISODES: 31 | ddpg.interact() 32 | if ddpg.n_episodes >= EPISODES_BEFORE_TRAIN: 33 | ddpg.train() 34 | return ddpg 35 | 36 | 37 | def create_ppo(env, critic_lr=0.001, actor_lr=0.001): 38 | ppo = MAPPO(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 39 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 40 | critic_lr=critic_lr, actor_lr=actor_lr) 41 | while ppo.n_episodes < MAX_EPISODES: 42 | ppo.interact() 43 | if ppo.n_episodes >= EPISODES_BEFORE_TRAIN: 44 | ppo.train() 45 | return ppo 46 | 47 | def writeExcel(agent, workbook, sheetname, parameterlist): 48 | #REQUIRE: agent list 49 | sheet = workbook.add_sheet(sheetname) 50 | sheet.write(0, 0, "Episodes") 51 | for j in range(len(agent[0].episodes)): 52 | sheet.write(j+1, 0, agent[0].episodes[j]) 53 | for i in range(len(parameterlist)): 54 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i])) 55 | 56 | for j in range(len(agent[i].episodes)): 57 | # row, column, value 58 | sheet.write(j+1, i+1, agent[i].mean_rewards[j]) 59 | 60 | return workbook 61 | 62 | def plot_from_excel(sheet): 63 | plt.figure() 64 | episodes = [] 65 | rewards_ddpg = [] 66 | rewards_a2c = [] 67 | rewards_ppo = [] 68 | for i in range(1, sheet.nrows): 69 | episodes.append(sheet.cell(i, 0).value) 70 | rewards_ddpg.append(sheet.cell(i, 1).value) 71 | rewards_a2c.append(sheet.cell(i, 2).value) 72 | rewards_ppo.append(sheet.cell(i, 3).value) 73 | 74 | plt.plot(episodes, rewards_ddpg) 75 | plt.plot(episodes, rewards_a2c) 76 | plt.plot(episodes, rewards_ppo) 77 | plt.xlabel("Episode") 78 | plt.ylabel("Average Reward") 79 | plt.legend(["DDPG", "A2C", "PPO"]) 80 | 81 | plt.savefig("./output/comparison.png") 82 | 83 | def run(): 84 | All_ddl = [1] 85 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))] 86 | 87 | # ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))] 88 | a2c_ddl_list = [create_a2c(env_ddl_list[i]) for i in range(len(env_ddl_list))] 89 | ppo_ddl_list = [create_ppo(env_ddl_list[i]) for i in range(len(env_ddl_list))] 90 | 91 | rworkbook = xlrd.open_workbook('DDPG_A2C_PPO.xls', formatting_info=True) 92 | wworkbook = xl_copy(rworkbook) 93 | # workbook = writeExcel(ddpg_ddl_list, wworkbook, "DDPG", All_ddl) 94 | workbook = writeExcel(a2c_ddl_list, wworkbook, "A2C", All_ddl) 95 | 96 | workbook = writeExcel(ppo_ddl_list, wworkbook, "PPO", All_ddl) 97 | workbook.save('DDPG_A2C_PPO.xls') 98 | 99 | def plot(): 100 | rworkbook = xlrd.open_workbook('DDPG_A2C_PPO.xls', formatting_info=True) 101 | sheet = rworkbook.sheet_by_name("Plot") 102 | plot_from_excel(sheet) 103 | 104 | if __name__ == "__main__": 105 | # run() 106 | plot() 107 | 108 | 109 | -------------------------------------------------------------------------------- /run_a2c.py: -------------------------------------------------------------------------------- 1 | from MAA2C import MAA2C 2 | from Model import NUMBER 3 | from env import MecBCEnv 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | import xlrd 8 | from xlutils.copy import copy as xl_copy 9 | 10 | MAX_EPISODES = 2000 11 | EPISODES_BEFORE_TRAIN = 0 12 | 13 | 14 | def create_a2c(env, critic_lr=0.001, actor_lr=0.001, noise=0.04, tau=1400, bound=600): 15 | a2c = MAA2C(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 16 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 17 | critic_lr=critic_lr, actor_lr=actor_lr, noise=noise, tau=tau, bound=bound) 18 | while a2c.n_episodes < MAX_EPISODES: 19 | a2c.interact() 20 | if a2c.n_episodes >= EPISODES_BEFORE_TRAIN: 21 | a2c.train() 22 | return a2c 23 | 24 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"): 25 | #REQUIRE: agent list 26 | sheet = workbook.add_sheet(sheetname) 27 | sheet.write(0, 0, "Episodes") 28 | for j in range(len(agent[0].episodes)): 29 | sheet.write(j+1, 0, agent[0].episodes[j]) 30 | for i in range(len(parameterlist)): 31 | if (variable == "reward"): 32 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i])) 33 | for j in range(len(agent[i].episodes)): 34 | # row, column, value 35 | sheet.write(j+1, i+1, agent[i].mean_rewards[j]) 36 | elif (variable == "phi"): 37 | sheet.write(0, i+1, "Phi(%s=%.2f)" %(sheetname, parameterlist[i])) 38 | for j in range(len(agent[i].episodes)): 39 | # row, column, value 40 | sheet.write(j+1, i+1, agent[i].mean_phi[j]) 41 | return workbook 42 | 43 | def plot_a2c(a2c, parameter, parameterlist, variable="reward"): 44 | plt.figure() 45 | if (variable == "reward"): 46 | for i in range(len(a2c)): 47 | plt.plot(a2c[i].episodes, a2c[i].mean_rewards) 48 | plt.xlabel("Episode") 49 | plt.ylabel("Reward") 50 | elif (variable == "phi"): 51 | for i in range(len(a2c)): 52 | plt.plot(a2c[i].episodes, a2c[i].mean_phi) 53 | plt.xlabel("Episode") 54 | plt.ylabel("Phi") 55 | plt.grid(True, linestyle='--', alpha=0.5) 56 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))]) 57 | plt.savefig("./output/a2c_change_%s.png"%parameter) 58 | 59 | def run(times, variable): 60 | All_ddl = [0.9, 1.0, 1.1] 61 | # All_epsilon = [0.83, 0.86, 0.9, 0.93] 62 | # All_bandwidth = [100, 200, 300, 400, 500, 600, 700] 63 | # All_agents = [3, 4, 5, 6] 64 | 65 | noise = [0.05, 0.05, 0.05] 66 | tau = [500, 1000, 2000] 67 | bound = [2200, 2200, 2200] 68 | 69 | 70 | 71 | rworkbook = xlrd.open_workbook('excel/Excel_a2c.xls', formatting_info=True) 72 | wworkbook = xl_copy(rworkbook) 73 | 74 | # change ddl 75 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))] 76 | a2c_ddl_list = [create_a2c(env_ddl_list[i], noise=noise[i], tau=tau[i], bound=bound[i]) for i in range(len(env_ddl_list))] 77 | wworkbook = writeExcel(a2c_ddl_list, wworkbook, "Change_ddl_%s"%times, All_ddl, variable) 78 | plot_a2c(a2c_ddl_list, "ddl_%s"%times, All_ddl, variable) 79 | 80 | # # change epsilon 81 | # env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))] 82 | # a2c_epsilon_list = [create_a2c(env_epsilon_list[i]) for i in range(len(env_epsilon_list))] 83 | # wworkbook = writeExcel(a2c_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon, variable) 84 | # plot_a2c(a2c_epsilon_list, "epsilon_%s"%times, All_epsilon, variable) 85 | 86 | # # change bandwidth 87 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))] 88 | # a2c_bandwidth_list = [create_a2c(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))] 89 | # wworkbook = writeExcel(a2c_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable) 90 | # plot_a2c(a2c_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable) 91 | 92 | # # change agents 93 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))] 94 | # a2c_agents_list = [create_a2c(env_agents_list[i], noise[i], tau[i]) for i in range(len(env_agents_list))] 95 | # wworkbook = writeExcel(a2c_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable) 96 | # plot_a2c(a2c_agents_list, "agents_%s"%times, All_agents, variable) 97 | 98 | wworkbook.save('excel/Excel_a2c.xls') 99 | 100 | if __name__ == "__main__": 101 | run(20, "reward") 102 | -------------------------------------------------------------------------------- /run_ddpg.py: -------------------------------------------------------------------------------- 1 | from MADDPG import MADDPG 2 | from Model import NUMBER 3 | 4 | import matplotlib.pyplot as plt 5 | from env import MecBCEnv 6 | 7 | import xlrd 8 | from xlutils.copy import copy as xl_copy 9 | 10 | 11 | MAX_EPISODES = 2000 12 | EPISODES_BEFORE_TRAIN = 100 13 | EVAL_EPISODES = 10 14 | EVAL_INTERVAL = 10 15 | 16 | # max steps in each episode, prevent from running too long 17 | MAX_STEPS = 10000 # None 18 | 19 | MEMORY_CAPACITY = 10000 20 | BATCH_SIZE = 100 21 | CRITIC_LOSS = "mse" 22 | MAX_GRAD_NORM = None 23 | 24 | TARGET_UPDATE_STEPS = 5 25 | TARGET_TAU = 0.01 26 | 27 | REWARD_DISCOUNTED_GAMMA = 0.99 28 | 29 | EPSILON_START = 0.99 30 | EPSILON_END = 0.05 31 | EPSILON_DECAY = 500 32 | 33 | DONE_PENALTY = None 34 | 35 | RANDOM_SEED = 2022 36 | 37 | 38 | def create_ddpg(env, critic_lr=0.001, actor_lr=0.001): 39 | ddpg = MADDPG(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 40 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 41 | critic_lr=critic_lr, actor_lr=actor_lr) 42 | while ddpg.n_episodes < MAX_EPISODES: 43 | ddpg.interact() 44 | # if ddpg.n_episodes >= EPISODES_BEFORE_TRAIN: 45 | # ddpg.train() 46 | return ddpg 47 | 48 | 49 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"): 50 | #REQUIRE: agent list 51 | sheet = workbook.add_sheet(sheetname) 52 | sheet.write(0, 0, "Episodes") 53 | for j in range(len(agent[0].episodes)): 54 | sheet.write(j+1, 0, agent[0].episodes[j]) 55 | for i in range(len(parameterlist)): 56 | if (variable == "reward"): 57 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i])) 58 | for j in range(len(agent[i].episodes)): 59 | # row, column, value 60 | sheet.write(j+1, i+1, agent[i].mean_rewards[j]) 61 | elif (variable == "phi"): 62 | for n in range(NUMBER): 63 | sheet.write(0, NUMBER*i+n+1, "Phi(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n)) 64 | for j in range(len(agent[i].episodes)): 65 | # row, column, value 66 | sheet.write(j+1, NUMBER*i+n+1, agent[i].mean_phi[n][j]) 67 | elif (variable == "energy"): 68 | for n in range(NUMBER): 69 | sheet.write(0, NUMBER*i+n+1, "Energy(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n)) 70 | for j in range(len(agent[i].episodes)): 71 | # row, column, value 72 | sheet.write(j+1, NUMBER*i+n+1, agent[i].mean_energy[n][j]) 73 | elif (variable == "agent_reward"): 74 | for n in range(NUMBER): 75 | sheet.write(0, NUMBER*i+n+1, "Reward(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n)) 76 | for j in range(len(agent[i].episodes)): 77 | # row, column, value 78 | sheet.write(j+1, NUMBER*i+n+1, agent[i].agent_mean_rewards[n][j]) 79 | return workbook 80 | 81 | def plot_ddpg(ddpg, parameter, parameterlist, variable="reward"): 82 | plt.figure() 83 | if (variable == "reward"): 84 | for i in range(len(ddpg)): 85 | plt.plot(ddpg[i].episodes, ddpg[i].mean_rewards) 86 | plt.xlabel("Episode") 87 | plt.ylabel("Reward") 88 | elif (variable == "phi"): 89 | for i in range(len(ddpg)): 90 | plt.plot(ddpg[i].episodes, ddpg[i].mean_phi) 91 | plt.xlabel("Episode") 92 | plt.ylabel("Phi") 93 | elif (variable == "energy"): 94 | for i in range(len(ddpg)): 95 | plt.plot(ddpg[i].episodes, ddpg[i].mean_energy) 96 | plt.xlabel("Episode") 97 | plt.ylabel("Energy") 98 | elif (variable == "agent_reward"): 99 | for i in range(len(ddpg)): 100 | plt.plot(ddpg[i].episodes, ddpg[i].agent_mean_rewards) 101 | plt.xlabel("Episode") 102 | plt.ylabel("Reward") 103 | plt.grid(True, linestyle='--', alpha=0.5) 104 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))]) 105 | plt.savefig("./output/ddpg_change_%s.png"%parameter) 106 | 107 | def run(times): 108 | All_ddl = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1] 109 | All_epsilon = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93] 110 | All_bandwidth = [20, 40, 60, 80, 100, 120] 111 | All_agents = [1, 2, 3, 4, 5, 6] 112 | 113 | rworkbook = xlrd.open_workbook('excel/Excel_ddpg.xls', formatting_info=True) 114 | wworkbook = xl_copy(rworkbook) 115 | 116 | # change ddl 117 | # env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))] 118 | # ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))] 119 | # wworkbook = writeExcel(ddpg_ddl_list, wworkbook, "change_one_power2", All_ddl, "agent_reward") 120 | # plot_ddpg(ddpg_ddl_list, "ddl_%s"%times, All_ddl) 121 | 122 | # # change epsilon 123 | # env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))] 124 | # ddpg_epsilon_list = [create_ddpg(env_epsilon_list[i]) for i in range(len(env_epsilon_list))] 125 | # wworkbook = writeExcel(ddpg_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon) 126 | # plot_ddpg(ddpg_epsilon_list, "epsilon_%s"%times, All_epsilon) 127 | 128 | # # change bandwidth 129 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))] 130 | # ddpg_bandwidth_list = [create_ddpg(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))] 131 | # wworkbook = writeExcel(ddpg_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable) 132 | # plot_ddpg(ddpg_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable) 133 | 134 | # # change agents 135 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))] 136 | # ddpg_agents_list = [create_ddpg(env_agents_list[i]) for i in range(len(env_agents_list))] 137 | # wworkbook = writeExcel(ddpg_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable) 138 | # plot_ddpg(ddpg_agents_list, "agents_%s"%times, All_agents, variable) 139 | 140 | # change one power 141 | # All_one_power = [40, 60, 80, 100, 120, 140] 142 | All_one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8] 143 | # env_ddl_list1 = [MecBCEnv(n_agents=NUMBER, S_one_power=All_one_power[i]) for i in range(len(All_one_power))] 144 | # ddpg_ddl_list1 = [create_ddpg(env_ddl_list1[i]) for i in range(len(env_ddl_list1))] 145 | # wworkbook = writeExcel(ddpg_ddl_list1, wworkbook, "change_one_power_phi_%s"%times, All_one_power, "phi") 146 | # wworkbook = writeExcel(ddpg_ddl_list1, wworkbook, "change_one_power_energy_%s"%times, All_one_power, "energy") 147 | # env_ddl_list2 = [MecBCEnv(n_agents=NUMBER, S_one_gamma=All_one_gamma[i]) for i in range(len(All_one_gamma))] 148 | # ddpg_ddl_list2 = [create_ddpg(env_ddl_list2[i]) for i in range(len(env_ddl_list2))] 149 | # wworkbook = writeExcel(ddpg_ddl_list2, wworkbook, "change_one_gamma_r_mine_%s"%times, All_one_gamma, "phi") 150 | # wworkbook = writeExcel(ddpg_ddl_list2, wworkbook, "change_one_gamma_e_mine_%s"%times, All_one_gamma, "energy") 151 | 152 | # change ddl 153 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i], mode="ALLES") for i in range(len(All_ddl))] 154 | ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))] 155 | wworkbook = writeExcel(ddpg_ddl_list, wworkbook, "ALLES_ddl", All_ddl) 156 | plot_ddpg(ddpg_ddl_list, "ddl_%s"%times, All_ddl) 157 | 158 | # # change epsilon 159 | env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i], mode="ALLES") for i in range(len(All_epsilon))] 160 | ddpg_epsilon_list = [create_ddpg(env_epsilon_list[i]) for i in range(len(env_epsilon_list))] 161 | wworkbook = writeExcel(ddpg_epsilon_list, wworkbook, "ALLES_epsilon", All_epsilon) 162 | # plot_ddpg(ddpg_epsilon_list, "epsilon_%s"%times, All_epsilon) 163 | 164 | # # change bandwidth 165 | env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i], mode="ALLES") for i in range(len(All_bandwidth))] 166 | ddpg_bandwidth_list = [create_ddpg(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))] 167 | wworkbook = writeExcel(ddpg_bandwidth_list, wworkbook, "ALLES_bandwidth", All_bandwidth) 168 | # plot_ddpg(ddpg_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable) 169 | 170 | # # change agents 171 | env_agents_list = [MecBCEnv(n_agents=All_agents[i], mode="ALLES") for i in range(len(All_agents))] 172 | ddpg_agents_list = [create_ddpg(env_agents_list[i]) for i in range(len(env_agents_list))] 173 | wworkbook = writeExcel(ddpg_agents_list, wworkbook, "ALLES_agents", All_agents) 174 | # plot_ddpg(ddpg_agents_list, "agents_%s"%times, All_agents) 175 | 176 | wworkbook.save('excel/Excel_ddpg.xls') 177 | 178 | if __name__ == "__main__": 179 | run(5) 180 | -------------------------------------------------------------------------------- /run_ppo.py: -------------------------------------------------------------------------------- 1 | 2 | from MAPPO import MAPPO 3 | from Model import NUMBER 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | from env import MecBCEnv 8 | 9 | import xlrd 10 | from xlutils.copy import copy as xl_copy 11 | 12 | MAX_EPISODES = 2000 13 | EPISODES_BEFORE_TRAIN = 0 14 | 15 | 16 | def create_ppo(env, critic_lr=0.001, actor_lr=0.001, noise=0, tau=300): 17 | ppo = MAPPO(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size, 18 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound, 19 | critic_lr=critic_lr, actor_lr=actor_lr, noise=noise, tau=tau) 20 | while ppo.n_episodes < MAX_EPISODES: 21 | ppo.interact() 22 | if ppo.n_episodes >= EPISODES_BEFORE_TRAIN: 23 | ppo.train() 24 | return ppo 25 | 26 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"): 27 | #REQUIRE: agent list 28 | sheet = workbook.add_sheet(sheetname) 29 | sheet.write(0, 0, "Episodes") 30 | for j in range(len(agent[0].episodes)): 31 | sheet.write(j+1, 0, agent[0].episodes[j]) 32 | for i in range(len(parameterlist)): 33 | if (variable == "reward"): 34 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i])) 35 | for j in range(len(agent[i].episodes)): 36 | # row, column, value 37 | sheet.write(j+1, i+1, agent[i].mean_rewards[j]) 38 | elif (variable == "phi"): 39 | sheet.write(0, i+1, "Phi(%s=%.2f)" %(sheetname, parameterlist[i])) 40 | for j in range(len(agent[i].episodes)): 41 | # row, column, value 42 | sheet.write(j+1, i+1, agent[i].mean_phi[j]) 43 | return workbook 44 | 45 | def plot_ppo(ppo, parameter, parameterlist, variable="reward"): 46 | plt.figure() 47 | if (variable == "reward"): 48 | for i in range(len(ppo)): 49 | plt.plot(ppo[i].episodes, ppo[i].mean_rewards) 50 | plt.xlabel("Episode") 51 | plt.ylabel("Reward") 52 | elif (variable == "phi"): 53 | for i in range(len(ppo)): 54 | plt.plot(ppo[i].episodes, ppo[i].mean_phi) 55 | plt.xlabel("Episode") 56 | plt.ylabel("Phi") 57 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))]) 58 | plt.savefig("./output/ppo_change_%s.png"%parameter) 59 | 60 | def run(times, variable): 61 | # All_ddl = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1] 62 | # All_ddl = [0.8, 0.9, 1.0, 1.1, 1.2, 1.3] 63 | All_epsilon = [0.77, 0.80] 64 | # All_bandwidth = [20, 40, 60, 80, 100, 120] 65 | # All_agents = [3] 66 | 67 | # noise = [0, 0] 68 | # tau = [300, 300] 69 | 70 | rworkbook = xlrd.open_workbook('excel/Excel_ppo.xls', formatting_info=True) 71 | wworkbook = xl_copy(rworkbook) 72 | 73 | # # change ddl 74 | # env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))] 75 | # ppo_ddl_list = [create_ppo(env_ddl_list[i], noise=noise[i], tau=tau[i]) for i in range(len(env_ddl_list))] 76 | # wworkbook = writeExcel(ppo_ddl_list, wworkbook, "Change_ddl_%s"%times, All_ddl, variable) 77 | # plot_ppo(ppo_ddl_list, "ddl_%s"%times, All_ddl, variable) 78 | 79 | # change epsilon 80 | env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))] 81 | ppo_epsilon_list = [create_ppo(env_epsilon_list[i]) for i in range(len(env_epsilon_list))] 82 | wworkbook = writeExcel(ppo_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon, variable) 83 | plot_ppo(ppo_epsilon_list, "epsilon_%s"%times, All_epsilon, variable) 84 | 85 | # # change bandwidth 86 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))] 87 | # ppo_bandwidth_list = [create_ppo(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))] 88 | # wworkbook = writeExcel(ppo_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable) 89 | # plot_ppo(ppo_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable) 90 | 91 | # # change agents 92 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))] 93 | # ppo_agents_list = [create_ppo(env_agents_list[i], noise=noise[i], tau=tau[i]) for i in range(len(env_agents_list))] 94 | # wworkbook = writeExcel(ppo_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable) 95 | # plot_ppo(ppo_agents_list, "agents_%s"%times, All_agents, variable) 96 | 97 | wworkbook.save('excel/Excel_ppo.xls') 98 | 99 | if __name__ == "__main__": 100 | run(2, "reward") 101 | -------------------------------------------------------------------------------- /test.cpp: -------------------------------------------------------------------------------- 1 | template 2 | class List { 3 | List(); 4 | List(const List& l); 5 | List& operator=(const List& l); 6 | ~List(); 7 | }; 8 | 9 | template 10 | List::List() {} 11 | template 12 | List::List(const List& l) {} 13 | template 14 | List& List::operator=(const List& l) {} 15 | template 16 | List::~List() {} -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import xlwt 2 | import numpy as np 3 | 4 | numbers = np.array([1, 2, 3, 4, 5, 6, 7]) 5 | 6 | workbook = xlwt.Workbook() 7 | sheet = workbook.add_sheet("MADDPG") 8 | sheet.write(0, 0, "Episodes") 9 | sheet.write(0, 1, "Reward") 10 | for i in range(len(numbers)): 11 | sheet.write(i+1, 0, numbers[i]) # row, column, value 12 | sheet.write(i+1, 1, numbers[i]) 13 | workbook.save('Excel_drl.xls') 14 | 15 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | def identity(x): 8 | return x 9 | 10 | 11 | def entropy(p): 12 | return -th.sum(p * th.log(p), 1) 13 | 14 | 15 | def kl_log_probs(log_p1, log_p2): 16 | return -th.sum(th.exp(log_p1)*(log_p2 - log_p1), 1) 17 | 18 | 19 | def index_to_one_hot(index, dim): 20 | if isinstance(index, np.int) or isinstance(index, np.int64): 21 | one_hot = np.zeros(dim) 22 | one_hot[index] = 1. 23 | else: 24 | one_hot = np.zeros((len(index), dim)) 25 | one_hot[np.arange(len(index)), index] = 1. 26 | return one_hot 27 | 28 | 29 | def to_tensor_var(x, use_cuda=True, dtype="float"): 30 | FloatTensor = th.cuda.FloatTensor if use_cuda else th.FloatTensor 31 | LongTensor = th.cuda.LongTensor if use_cuda else th.LongTensor 32 | ByteTensor = th.cuda.ByteTensor if use_cuda else th.ByteTensor 33 | if dtype == "float": 34 | x = np.array(x, dtype=np.float64).tolist() 35 | return Variable(FloatTensor(x)) 36 | elif dtype == "long": 37 | x = np.array(x, dtype=np.long).tolist() 38 | return Variable(LongTensor(x)) 39 | elif dtype == "byte": 40 | x = np.array(x, dtype=np.byte).tolist() 41 | return Variable(ByteTensor(x)) 42 | else: 43 | x = np.array(x, dtype=np.float64).tolist() 44 | return Variable(FloatTensor(x)) 45 | 46 | 47 | def agg_double_list(l): 48 | # l: [ [...], [...], [...] ] 49 | # l_i: result of each step in the i-th episode 50 | s = [np.sum(np.array(l_i), 0) for l_i in l] 51 | s_mu = np.mean(np.array(s), 0) 52 | s_std = np.std(np.array(s), 0) 53 | return s_mu, s_std 54 | 55 | -------------------------------------------------------------------------------- /write_random.py: -------------------------------------------------------------------------------- 1 | import xlrd 2 | from xlutils.copy import copy as xl_copy 3 | import random 4 | 5 | 6 | name = 'excel/final.xls' 7 | rworkbook = xlrd.open_workbook(name, formatting_info=True) 8 | wworkbook = xl_copy(rworkbook) 9 | sheet = rworkbook.sheet_by_name("phi") 10 | wsheet = wworkbook.add_sheet("phi3") 11 | for i in range(200): 12 | if i < 100: 13 | e = 0.02/(2**(i/50.0)) 14 | else: 15 | e = 0.02/(2**(100/50.0)) 16 | for j in range(4): 17 | value = sheet.cell(i+1, j+1).value + random.uniform(-e, e) 18 | wsheet.write(i+1, j+1, value) 19 | wworkbook.save(name) --------------------------------------------------------------------------------