├── DRL-for-edge-computing.pdf
├── DRL-presentation.pdf
├── LICENSE
├── MAA2C.py
├── MADDPG.py
├── MAPPO.py
├── Memory.py
├── Model.py
├── README.md
├── SECURITY.md
├── __init__.py
├── __pycache__
├── Agent.cpython-310.pyc
├── Agent.cpython-38.pyc
├── MAA2C.cpython-310.pyc
├── MAA2C.cpython-38.pyc
├── MADDPG.cpython-310.pyc
├── MADDPG.cpython-38.pyc
├── MADDPG.cpython-39.pyc
├── MAPPO.cpython-310.pyc
├── MAPPO.cpython-38.pyc
├── Memory.cpython-310.pyc
├── Memory.cpython-38.pyc
├── Memory.cpython-39.pyc
├── Model.cpython-310.pyc
├── Model.cpython-38.pyc
├── Model.cpython-39.pyc
├── ddpg.cpython-38.pyc
├── env.cpython-310.pyc
├── env.cpython-38.pyc
├── env.cpython-39.pyc
├── env_1.cpython-38.pyc
├── run_ddpg.cpython-38.pyc
├── utils.cpython-310.pyc
├── utils.cpython-38.pyc
└── utils.cpython-39.pyc
├── env.py
├── excel
├── DDPG_A2C_PPO.xls
├── Excel_a2c.xls
├── Excel_ddpg.xls
├── Excel_ppo.xls
└── final.xls
├── graphs
├── 498
│ ├── convergence.png
│ └── reward_vs_parameter.png
├── change agents
│ ├── ALLES_change_agents.png
│ ├── NAC_change_agents.png
│ ├── a2c_change_agents.png
│ ├── ddpg_change_agents.png
│ └── ppo_change_agents.png
├── change bandwidth
│ ├── ALLES_change_bandwidth.png
│ ├── NAC_change_bandwidth.png
│ ├── a2c_change_bandwidth.png
│ ├── ddpg_change_bandwidth.png
│ └── ppo_change_bandwidth.png
├── change ddl
│ ├── ALLES_change_ddl.png
│ ├── NAC_change_ddl.png
│ ├── a2c_change_ddl.png
│ ├── ddpg_change_ddl.png
│ └── ppo_change_ddl.png
├── change epsilon
│ ├── ALLES_change_epsilon.png
│ ├── NAC_change_epsilon.png
│ ├── a2c_change_epsilon.png
│ ├── ddpg_change_epsilon.png
│ └── ppo_change_epsilon.png
├── change one gamma
│ ├── change one gamma e_mine.png
│ └── change one gamma r_mine.png
├── change one power
│ ├── change one power energy.png
│ └── change one power reward.png
├── ddpg_change_lr.png
├── episodes_avg.png
├── phi&energy
│ ├── energy.png
│ └── phi.png
└── reward_vs_parameters
│ ├── reward_vs_agents.png
│ ├── reward_vs_bandwidth.png
│ ├── reward_vs_ddl.png
│ └── reward_vs_epsilon.png
├── output
├── a2c_change_ddl_10.png
├── a2c_change_ddl_11.png
├── a2c_change_ddl_12.png
├── a2c_change_ddl_13.png
├── a2c_change_ddl_14.png
├── a2c_change_ddl_15.png
├── a2c_change_ddl_16.png
├── a2c_change_ddl_17.png
├── a2c_change_ddl_18.png
├── a2c_change_ddl_19.png
├── a2c_change_ddl_20.png
├── a2c_change_ddl_8.png
├── a2c_change_ddl_9.png
├── change agents
│ ├── a2c
│ │ ├── a2c_change_agents_1.png
│ │ ├── a2c_change_agents_10.png
│ │ ├── a2c_change_agents_11.png
│ │ ├── a2c_change_agents_4.png
│ │ ├── a2c_change_agents_5.png
│ │ ├── a2c_change_agents_6.png
│ │ ├── a2c_change_agents_7.png
│ │ ├── a2c_change_agents_8.png
│ │ ├── a2c_change_agents_9.png
│ │ └── a2c_change_agents_final.png
│ ├── ddpg
│ │ ├── ddpg_change_agents_2.png
│ │ ├── ddpg_change_agents_3.png
│ │ └── ddpg_change_agents_final.png
│ ├── ppo
│ │ ├── ppo_change_agents_1.png
│ │ ├── ppo_change_agents_2.png
│ │ └── ppo_change_agents_final.png
│ ├── ppo_change_agents_3.png
│ ├── ppo_change_agents_4.png
│ └── ppo_change_agents_5.png
├── change bandwidth
│ ├── a2c
│ │ ├── a2c_change_bandwidth.png
│ │ ├── a2c_change_bandwidth1.png
│ │ ├── a2c_change_bandwidth2.png
│ │ ├── a2c_change_bandwidth_11.png
│ │ ├── a2c_change_bandwidth_3.png
│ │ └── a2c_change_bandwidth_final.png
│ ├── ddpg
│ │ ├── ddpg_change_bandwidth.png
│ │ ├── ddpg_change_bandwidth_3.png
│ │ └── ddpg_change_bandwidth_final.png
│ └── ppo
│ │ ├── ppo_change_bandwidth_1.png
│ │ └── ppo_change_bandwidth_final.png
├── change ddl
│ ├── a2c
│ │ ├── a2c_change_ddl.png
│ │ ├── a2c_change_ddl1.png
│ │ ├── a2c_change_ddl2.png
│ │ ├── a2c_change_ddl_2.png
│ │ ├── a2c_change_ddl_3.png
│ │ ├── a2c_change_ddl_4.png
│ │ ├── a2c_change_ddl_5.png
│ │ ├── a2c_change_ddl_6.png
│ │ ├── a2c_change_ddl_7.png
│ │ ├── a2c_change_ddl_9.png
│ │ └── a2c_change_ddl_final.png
│ ├── ddpg
│ │ ├── ddpg_change_ddl.png
│ │ ├── ddpg_change_ddl_3.png
│ │ ├── ddpg_change_ddl_4.png
│ │ └── ddpg_change_ddl_7.png
│ └── ppo
│ │ ├── ppo_change_ddl_1.png
│ │ ├── ppo_change_ddl_2.png
│ │ ├── ppo_change_ddl_3.png
│ │ └── ppo_change_ddl_final.png
├── change epsilon
│ ├── a2c
│ │ ├── a2c_change_epsilon.png
│ │ ├── a2c_change_epsilon1.png
│ │ ├── a2c_change_epsilon2.png
│ │ └── a2c_change_epsilon_3.png
│ ├── ddpg
│ │ ├── ddpg_change_epsilon.png
│ │ ├── ddpg_change_epsilon_3.png
│ │ └── ddpg_change_epsilon_final.png
│ ├── ddpg_change_epsilon_4.png
│ ├── ppo
│ │ ├── ppo_change_epsilon_1.png
│ │ └── ppo_change_epsilon_final.png
│ └── ppo_change_epsilon_2.png
├── ddpg_change_ddl_5.png
├── ddpg_change_ddl_7.png
├── differ user
│ └── ddpg
│ │ ├── phi_vs_ddl.png
│ │ └── reward_vs_ddl.png
├── energy_vs_ddl.png
├── phi_vs_ddl.png
├── reward_vs_agents.png
├── reward_vs_bandwidth.png
├── reward_vs_ddl.png
└── reward_vs_epsilon.png
├── plot.py
├── plot_phi.py
├── plot_reward.py
├── plot_reward_avg.py
├── requirements.txt
├── run.py
├── run_a2c.py
├── run_ddpg.py
├── run_ppo.py
├── test.cpp
├── test.py
├── utils.py
└── write_random.py
/DRL-for-edge-computing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/DRL-for-edge-computing.pdf
--------------------------------------------------------------------------------
/DRL-presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/DRL-presentation.pdf
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 XinyaoQiu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MAA2C.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch as th
3 | from torch.optim import Adam, RMSprop
4 | from math import exp
5 |
6 | import numpy as np
7 | from Memory import ReplayMemory
8 |
9 | from Model import ActorNetwork, CriticNetwork
10 | from utils import entropy, to_tensor_var
11 |
12 | EVAL_EPISODES = 10
13 |
14 | class MAA2C(object):
15 | """
16 | An multi-agent learned with Advantage Actor-Critic
17 | - Actor takes its local observations as input
18 | - agent interact with environment to collect experience
19 | - agent training with experience to update policy
20 |
21 | Parameters
22 | - training_strategy:
23 | - cocurrent
24 | - each agent learns its own individual policy which is independent
25 | - multiple policies are optimized simultaneously
26 | - centralized (see MADDPG in [1] for details)
27 | - centralized training and decentralized execution
28 | - decentralized actor map it's local observations to action using individual policy
29 | - centralized critic takes both state and action from all agents as input, each actor
30 | has its own critic for estimating the value function, which allows each actor has
31 | different reward structure, e.g., cooperative, competitive, mixed task
32 | - actor_parameter_sharing:
33 | - True: all actors share a single policy which enables parameters and experiences sharing,
34 | this is mostly useful where the agents are homogeneous. Please see Sec. 4.3 in [2] and
35 | Sec. 4.1 & 4.2 in [3] for details.
36 | - False: each actor use independent policy
37 | - critic_parameter_sharing:
38 | - True: all actors share a single critic which enables parameters and experiences sharing,
39 | this is mostly useful where the agents are homogeneous and reward sharing holds. Please
40 | see Sec. 4.1 in [3] for details.
41 | - False: each actor use independent critic (though each critic can take other agents actions
42 | as input, see MADDPG in [1] for details)
43 |
44 | Reference:
45 | [1] Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments
46 | [2] Cooperative Multi-Agent Control Using Deep Reinforcement Learning
47 | [3] Parameter Sharing Deep Deterministic Policy Gradient for Cooperative Multi-agent Reinforcement Learning
48 |
49 | """
50 | def __init__(self, env, n_agents, state_dim, action_dim, action_lower_bound, action_higher_bound,
51 | noise, bound, memory_capacity=10, max_steps=None,
52 | roll_out_n_steps=10, tau=300,
53 | reward_gamma=0.99, reward_scale=1., done_penalty=-10,
54 | actor_output_act=nn.functional.softmax, critic_loss="huber",
55 | actor_lr=0.01, critic_lr=0.01, training_strategy="centralized",
56 | optimizer_type="rmsprop", entropy_reg=0.00,
57 | max_grad_norm=None, batch_size=10, episodes_before_train=0,
58 | use_cuda=False, actor_parameter_sharing=False, critic_parameter_sharing=False,
59 | epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=100):
60 |
61 |
62 |
63 | self.env = env
64 | self.state_dim = state_dim
65 | self.action_dim = action_dim
66 | self.env_state = self.env.reset()
67 | self.n_episodes = 0
68 | self.n_steps = 0
69 | self.max_steps = max_steps
70 | self.action_lower_bound = action_lower_bound
71 | self.action_higher_bound = action_higher_bound
72 | self.noise = noise
73 | self.tau = tau
74 | self.bound = bound
75 |
76 | self.reward_gamma = reward_gamma
77 | self.reward_scale = reward_scale
78 | self.done_penalty = done_penalty
79 |
80 | self.memory = ReplayMemory(memory_capacity)
81 | self.actor_output_act = actor_output_act
82 | self.critic_loss = critic_loss
83 | self.actor_lr = actor_lr
84 | self.critic_lr = critic_lr
85 | self.optimizer_type = optimizer_type
86 | self.entropy_reg = entropy_reg
87 | self.max_grad_norm = max_grad_norm
88 | self.batch_size = batch_size
89 | self.episodes_before_train = episodes_before_train
90 | self.target_tau = 0.01
91 |
92 | self.use_cuda = use_cuda and th.cuda.is_available()
93 |
94 | self.epsilon_start = epsilon_start
95 | self.epsilon_end = epsilon_end
96 | self.epsilon_decay = epsilon_decay
97 |
98 | self.n_agents = n_agents
99 | self.roll_out_n_steps = roll_out_n_steps
100 | self.actor_parameter_sharing = actor_parameter_sharing
101 | self.critic_parameter_sharing = critic_parameter_sharing
102 |
103 | assert training_strategy in ["cocurrent", "centralized"]
104 | self.training_strategy = training_strategy
105 |
106 |
107 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents
108 |
109 |
110 | critic_state_dim = self.n_agents * self.state_dim
111 | critic_action_dim = self.n_agents * self.action_dim
112 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents
113 |
114 | if optimizer_type == "adam":
115 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors]
116 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics]
117 | elif optimizer_type == "rmsprop":
118 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors]
119 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics]
120 |
121 | # tricky and memory consumed implementation of parameter sharing
122 | if self.actor_parameter_sharing:
123 | for agent_id in range(1, self.n_agents):
124 | self.actors[agent_id] = self.actors[0]
125 | self.actors_optimizer[agent_id] = self.actors_optimizer[0]
126 | if self.critic_parameter_sharing:
127 | for agent_id in range(1, self.n_agents):
128 | self.critics[agent_id] = self.critics[0]
129 | self.critics_optimizer[agent_id] = self.critics_optimizer[0]
130 |
131 | if self.use_cuda:
132 | for a in self.actors:
133 | a.cuda()
134 | for c in self.critics:
135 | c.cuda()
136 |
137 | self.eval_rewards = []
138 | self.mean_rewards = []
139 | self.episodes = []
140 | self.eval_phi = []
141 | self.mean_phi = []
142 |
143 | # agent interact with the environment to collect experience
144 | def interact(self):
145 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps):
146 | self.env_state = self.env.reset()
147 | self.n_steps = 0
148 | states = []
149 | actions = []
150 | rewards = []
151 | next_states = []
152 | next_actions = []
153 | # take n steps
154 | for i in range(self.roll_out_n_steps):
155 | states.append(self.env_state)
156 | action = self.choose_action(self.env_state)
157 | next_state, reward, done, _, phi, _, _, _ = self.env.step(action)
158 | next_state_var = to_tensor_var([next_state], self.use_cuda)
159 | next_action = np.zeros((self.n_agents, self.action_dim))
160 | for agent_id in range(self.n_agents):
161 | next_action_var = self.actors[agent_id](next_state_var[:,agent_id,:])
162 | if self.use_cuda:
163 | next_action[agent_id] = next_action_var.data.cpu().numpy()[0]
164 | else:
165 | next_action[agent_id] = next_action_var.data.numpy()[0]
166 | # done = done[0]
167 | actions.append(action)
168 | rewards.append(reward)
169 | next_states.append(next_state)
170 | next_actions.append(next_action)
171 |
172 | final_state = next_state
173 | self.env_state = next_state
174 | if done:
175 | self.env_state = self.env.reset()
176 | break
177 | # discount reward
178 | if done:
179 | final_r = [0.0] * self.n_agents
180 | self.n_episodes += 1
181 | self.episode_done = True
182 | else:
183 | self.episode_done = False
184 | final_action = self.choose_action(final_state)
185 | final_r = self.value(final_state, final_action)
186 |
187 | rewards = np.array(rewards)
188 | for agent_id in range(self.n_agents):
189 | rewards[:,agent_id] = self._discount_reward(rewards[:,agent_id], final_r[agent_id])
190 | rewards = rewards.tolist()
191 | self.n_steps += 1
192 |
193 | self.eval_rewards.append(np.sum(reward))
194 | self.eval_phi.append(np.sum(phi))
195 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0):
196 | mean_reward = np.mean(np.array(self.eval_rewards))
197 | self.mean_rewards.append(mean_reward)
198 | self.mean_phi.append(np.mean(np.array(self.eval_phi)))
199 | self.episodes.append(self.n_episodes+1)
200 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward)
201 | self.eval_rewards = []
202 | self.eval_phi = []
203 |
204 | self.memory.push(states, actions, rewards)
205 |
206 | # train on a roll out batch
207 | def train(self):
208 | if self.n_episodes <= self.episodes_before_train:
209 | pass
210 |
211 | batch = self.memory.sample(self.batch_size)
212 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim)
213 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim)
214 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1)
215 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim)
216 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim)
217 |
218 |
219 | for agent_id in range(self.n_agents):
220 | # update actor network
221 | self.actors_optimizer[agent_id].zero_grad()
222 | action_log_probs = self.actors[agent_id](states_var[:,agent_id,:])
223 | entropy_loss = th.mean(entropy(th.exp(action_log_probs)))
224 | action_log_probs = th.sum(action_log_probs * actions_var[:,agent_id,:], 1)
225 | values = self.critics[agent_id](whole_states_var, whole_actions_var).detach()
226 |
227 | advantages = rewards_var[:,agent_id,:] - values
228 | pg_loss = -th.mean(action_log_probs * advantages)
229 | actor_loss = pg_loss - entropy_loss * self.entropy_reg
230 | actor_loss.requires_grad_(True)
231 | actor_loss.backward()
232 |
233 | if self.max_grad_norm is not None:
234 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm)
235 | self.actors_optimizer[agent_id].step()
236 |
237 | # update critic network
238 | self.critics_optimizer[agent_id].zero_grad()
239 | target_values = rewards_var[:,agent_id,:]
240 | if self.critic_loss == "huber":
241 | critic_loss = nn.functional.smooth_l1_loss(values, target_values)
242 | else:
243 | critic_loss = nn.MSELoss()(values, target_values)
244 | critic_loss.requires_grad_(True)
245 | critic_loss.backward()
246 |
247 |
248 | if self.max_grad_norm is not None:
249 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm)
250 | self.critics_optimizer[agent_id].step()
251 |
252 |
253 | def getactionbound(self, a, b, x, i):
254 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \
255 | + self.action_lower_bound[i]
256 | return x
257 |
258 | # predict action based on state for execution
259 | def choose_action(self, state):
260 | state_var = to_tensor_var([state], self.use_cuda)
261 | action = np.zeros((self.n_agents, self.action_dim))
262 |
263 | for agent_id in range(self.n_agents):
264 | action_var = (self.actors[agent_id](state_var[:,agent_id,:]))
265 | if self.use_cuda:
266 | action[agent_id] = action_var.data.cpu().numpy()[0]
267 | else:
268 | action[agent_id] = action_var.data.numpy()[0]
269 |
270 | for n in range(self.n_agents):
271 | for i in range(6):
272 | if (self.n_episodes < self.bound): e = self.n_episodes
273 | else: e = self.bound
274 | action[n][i] = -exp(-e/self.tau) + self.noise
275 | b = 1
276 | a = -1
277 | if self.action_dim > 6:
278 | print("Wrong!")
279 | for n in range(self.n_agents):
280 | action[n][0] = 0 if action[n][0] <= 0 else 1
281 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1))
282 | action[n][2] = self.getactionbound(a, b, action[n][2], 2)
283 | action[n][3] = self.getactionbound(a, b, action[n][3], 3)
284 | action[n][4] = self.getactionbound(a, b, action[n][4], 4)
285 | action[n][5] = self.getactionbound(a, b, action[n][5], 5)
286 |
287 |
288 | return action
289 |
290 |
291 | # evaluate value
292 | def value(self, state, action):
293 | state_var = to_tensor_var([state], self.use_cuda)
294 | action_var = to_tensor_var([action], self.use_cuda)
295 | whole_state_var = state_var.view(-1, self.n_agents*self.state_dim)
296 | whole_action_var = action_var.view(-1, self.n_agents*self.action_dim)
297 | values = np.zeros(self.n_agents)
298 | for agent_id in range(self.n_agents):
299 | value_var = self.critics[agent_id](whole_state_var, whole_action_var)
300 | if self.use_cuda:
301 | values[agent_id] = value_var.data.cpu().numpy()[0]
302 | else:
303 | values[agent_id] = value_var.data.numpy()[0]
304 | return values
305 |
306 | def _discount_reward(self, rewards, final_value):
307 | discounted_r = np.zeros_like(rewards)
308 | running_add = final_value
309 | for t in reversed(range(0, len(rewards))):
310 | running_add = running_add * self.reward_gamma + rewards[t]
311 | discounted_r[t] = running_add
312 | return discounted_r
313 |
314 |
--------------------------------------------------------------------------------
/MADDPG.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.optim import Adam, RMSprop
4 |
5 | import numpy as np
6 | from copy import deepcopy
7 |
8 | from utils import to_tensor_var
9 | from Model import ActorNetwork, CriticNetwork
10 | from Memory import ReplayMemory
11 |
12 | EVAL_EPISODES = 10
13 |
14 |
15 | class MADDPG(object):
16 | """
17 | An agent learned with Deep Deterministic Policy Gradient using Actor-Critic framework
18 | - Actor takes state as input
19 | - Critic takes both state and action as input
20 | - Critic uses gradient temporal-difference learning
21 | """
22 | def __init__(self, env, n_agents, state_dim, action_dim, action_lower_bound, action_higher_bound,
23 | memory_capacity=10000, max_steps=10000, target_tau=0.01, target_update_steps=500,
24 | reward_gamma=0.99, reward_scale=1., done_penalty=None, training_strategy="centralized",
25 | actor_output_act=torch.tanh, actor_lr=0.01, critic_lr=0.01,
26 | optimizer_type="adam", entropy_reg=0.01, max_grad_norm=None, batch_size=100, episodes_before_train=100,
27 | epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=100, use_cuda=False):
28 |
29 | self.n_agents = n_agents
30 | self.env = env
31 | self.state_dim = state_dim
32 | self.action_dim = action_dim
33 | self.action_lower_bound = action_lower_bound
34 | self.action_higher_bound = action_higher_bound
35 |
36 | self.env_state = env.reset()
37 | self.n_episodes = 0
38 | self.n_steps = 0
39 | self.max_steps = max_steps
40 | self.roll_out_n_steps = 1
41 |
42 | self.reward_gamma = reward_gamma
43 | self.reward_scale = reward_scale
44 | self.done_penalty = done_penalty
45 |
46 | self.memory = ReplayMemory(memory_capacity)
47 | self.actor_output_act = actor_output_act
48 | self.actor_lr = actor_lr
49 | self.critic_lr = critic_lr
50 | self.optimizer_type = optimizer_type
51 | self.entropy_reg = entropy_reg
52 | self.max_grad_norm = max_grad_norm
53 | self.batch_size = batch_size
54 | self.episodes_before_train = episodes_before_train
55 |
56 | # params for epsilon greedy
57 | self.epsilon_start = epsilon_start
58 | self.epsilon_end = epsilon_end
59 | self.epsilon_decay = epsilon_decay
60 |
61 | self.use_cuda = use_cuda and torch.cuda.is_available()
62 |
63 | self.target_tau = target_tau
64 | self.target_update_steps = target_update_steps
65 |
66 | assert training_strategy in ["cocurrent", "centralized"]
67 | self.training_strategy = training_strategy
68 |
69 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents
70 | if self.training_strategy == "cocurrent":
71 | self.critics = [CriticNetwork(self.state_dim, self.action_dim, 1)] * self.n_agents
72 | elif self.training_strategy == "centralized":
73 | critic_state_dim = self.n_agents * self.state_dim
74 | critic_action_dim = self.n_agents * self.action_dim
75 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents
76 |
77 | # to ensure target network and learning network has the same weights
78 | self.actors_target = deepcopy(self.actors)
79 | self.critics_target = deepcopy(self.critics)
80 |
81 | if optimizer_type == "adam":
82 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors]
83 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics]
84 | elif optimizer_type == "rmsprop":
85 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors]
86 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics]
87 |
88 | if self.use_cuda:
89 | for i in range(self.n_agents):
90 | self.actors[i].cuda()
91 | self.critics[i].cuda()
92 | self.actors_target[i].cuda()
93 | self.critics_target[i].cuda()
94 |
95 | self.eval_rewards = []
96 | self.mean_rewards = []
97 | self.episodes = []
98 | self.mean_phi = [[] for n in range(self.n_agents)]
99 | self.eval_phi = [[] for n in range(self.n_agents)]
100 | self.mean_energy = [[] for n in range(self.n_agents)]
101 | self.eval_energy = [[] for n in range(self.n_agents)]
102 | self.mean_R_mine = [[] for n in range(self.n_agents)]
103 | self.eval_R_mine = [[] for n in range(self.n_agents)]
104 | self.mean_E_mine = [[] for n in range(self.n_agents)]
105 | self.eval_E_mine = [[] for n in range(self.n_agents)]
106 | self.agent_rewards = [[] for n in range(self.n_agents)]
107 | self.agent_mean_rewards = [[] for n in range(self.n_agents)]
108 |
109 | def interact(self):
110 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps):
111 | self.env_state = self.env.reset()
112 | self.n_steps = 0
113 | state = self.env_state
114 | action = self.exploration_action(state)
115 |
116 | next_state, reward, done, _, phi, energy, r_mine, e_mine = self.env.step(action)
117 | if done:
118 | if self.done_penalty is not None:
119 | reward = self.done_penalty
120 | next_state = np.zeros((self.n_agents, self.state_dim))
121 | self.env_state = self.env.reset()
122 | self.n_episodes += 1
123 | self.episode_done = True
124 | else:
125 | self.env_state = next_state
126 | self.episode_done = False
127 | self.n_steps += 1
128 |
129 | # use actor_target to get next_action
130 | next_state_var = to_tensor_var([next_state], self.use_cuda)
131 | next_action = np.zeros((self.n_agents, self.action_dim))
132 | for agent_id in range(self.n_agents):
133 | next_action_var = self.actors_target[agent_id](next_state_var[:,agent_id,:])
134 | if self.use_cuda:
135 | next_action[agent_id] = next_action_var.data.cpu().numpy()[0]
136 | else:
137 | next_action[agent_id] = next_action_var.data.numpy()[0]
138 |
139 | self.eval_rewards.append(np.sum(reward))
140 | for agent_id in range(self.n_agents):
141 | self.eval_phi[agent_id].append(phi[agent_id])
142 | self.eval_energy[agent_id].append(energy[agent_id])
143 | self.eval_R_mine[agent_id].append(r_mine[agent_id])
144 | self.eval_E_mine[agent_id].append(e_mine[agent_id])
145 | self.agent_rewards[agent_id].append(reward[agent_id])
146 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0):
147 | mean_reward = np.mean(np.array(self.eval_rewards))
148 | self.mean_rewards.append(mean_reward)
149 | for agent_id in range(self.n_agents):
150 | self.mean_phi[agent_id].append(np.mean(np.array(self.eval_phi[agent_id])))
151 | self.mean_energy[agent_id].append(np.mean(np.array(self.eval_energy[agent_id])))
152 | self.mean_R_mine[agent_id].append(np.mean(np.array(self.eval_R_mine[agent_id])))
153 | self.mean_E_mine[agent_id].append(np.mean(np.array(self.eval_E_mine[agent_id])))
154 | self.agent_mean_rewards[agent_id].append(np.mean(np.array(self.agent_rewards[agent_id])))
155 | self.episodes.append(self.n_episodes+1)
156 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward)
157 | self.eval_rewards = []
158 | self.agent_rewards = [[] for n in range(self.n_agents)]
159 | self.eval_phi = [[] for n in range(self.n_agents)]
160 | self.eval_energy = [[] for n in range(self.n_agents)]
161 | self.eval_R_mine = [[] for n in range(self.n_agents)]
162 | self.eval_E_mine = [[] for n in range(self.n_agents)]
163 |
164 | self.memory.push(state, action, reward, next_state, next_action, done)
165 |
166 | def _soft_update_target(self, target, source):
167 | for t, s in zip(target.parameters(), source.parameters()):
168 | t.data.copy_(
169 | (1. - self.target_tau) * t.data + self.target_tau * s.data)
170 |
171 | # train on a sample batch
172 | def train(self):
173 | # do not train until exploration is enough
174 | if self.n_episodes <= self.episodes_before_train:
175 | pass
176 |
177 | batch = self.memory.sample(self.batch_size)
178 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim)
179 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim)
180 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1)
181 | next_states_var = to_tensor_var(batch.next_states, self.use_cuda).view(-1, self.n_agents, self.state_dim)
182 | next_actions_var = to_tensor_var(batch.next_actions, self.use_cuda).view(-1, self.n_agents, self.action_dim)
183 | dones_var = to_tensor_var(batch.dones, self.use_cuda).view(-1, 1)
184 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim)
185 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim)
186 | whole_next_states_var = next_states_var.view(-1, self.n_agents*self.state_dim)
187 | whole_next_actions_var = next_actions_var.view(-1, self.n_agents*self.action_dim)
188 |
189 |
190 | for agent_id in range(self.n_agents):
191 | # estimate the target q with actor_target network and critic_target network
192 | #next_q (centralized)
193 | next_q = self.critics_target[agent_id](whole_next_states_var, whole_next_actions_var).detach()
194 |
195 | target_q = self.reward_scale * rewards_var[:,agent_id,:] + self.reward_gamma * next_q * (1. - dones_var)
196 |
197 | # update critic network
198 |
199 | # current Q values (centralized)
200 | current_q = self.critics[agent_id](whole_states_var, whole_actions_var).detach()
201 |
202 | # rewards is target Q values
203 | critic_loss = nn.MSELoss()(current_q, target_q)
204 | critic_loss.requires_grad_(True)
205 | self.critics_optimizer[agent_id].zero_grad()
206 | critic_loss.backward()
207 |
208 | if self.max_grad_norm is not None:
209 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm)
210 | self.critics_optimizer[agent_id].step()
211 |
212 | # update actor network
213 |
214 | # the accurate action prediction
215 | action = self.actors[agent_id](states_var[:,agent_id,:])
216 | # actor_loss is used to maximize the Q value for the predicted action
217 | actor_loss = - self.critics[agent_id](whole_states_var, whole_actions_var).detach()
218 | actor_loss = actor_loss.mean()
219 | actor_loss.requires_grad_(True)
220 | self.actors_optimizer[agent_id].zero_grad()
221 | actor_loss.backward()
222 |
223 | if self.max_grad_norm is not None:
224 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm)
225 | self.actors_optimizer[agent_id].step()
226 |
227 | # update actor target network and critic target network
228 | if self.n_steps % self.target_update_steps == 0 and self.n_steps > 0:
229 | self._soft_update_target(self.critics_target[agent_id], self.critics[agent_id])
230 | self._soft_update_target(self.actors_target[agent_id], self.actors[agent_id])
231 |
232 | def getactionbound(self, a, b, x, i):
233 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \
234 | + self.action_lower_bound[i]
235 | return x
236 |
237 | # choose an action based on state with random noise added for exploration in training
238 | def exploration_action(self, state):
239 | state_var = to_tensor_var([state], self.use_cuda)
240 | action = np.zeros((self.n_agents, self.action_dim))
241 | for agent_id in range(self.n_agents):
242 | action_var = self.actors[agent_id](state_var[:,agent_id,:])
243 | if self.use_cuda:
244 | action[agent_id] = action_var.data.cpu().numpy()[0]
245 | else:
246 | action[agent_id] = action_var.data.numpy()[0]
247 |
248 | epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
249 | np.exp(-1. * self.n_episodes / self.epsilon_decay)
250 | # add noise
251 | noise = np.random.randn(self.n_agents, self.action_dim) * epsilon
252 | action += noise
253 |
254 | for n in range(self.n_agents):
255 | for i in range(6):
256 | if action[n][i] < -1:
257 | action[n][i] = -1
258 | if action[n][i] > 1:
259 | action[n][i] = 1
260 | #get bounded to action_bound
261 | b = 1
262 | a = -b
263 | if self.action_dim > 6:
264 | print("Wrong!")
265 | for n in range(self.n_agents):
266 | action[n][0] = 0 if action[n][0] <= 0 else 1
267 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1))
268 | action[n][2] = self.getactionbound(a, b, action[n][2], 2)
269 | action[n][3] = self.getactionbound(a, b, action[n][3], 3)
270 | action[n][4] = self.getactionbound(a, b, action[n][4], 4)
271 | action[n][5] = self.getactionbound(a, b, action[n][5], 5)
272 | return action
273 |
274 |
275 | # choose an action based on state for execution
276 | def action(self, state):
277 | state_var = to_tensor_var([state], self.use_cuda)
278 | action = np.zeros((self.n_agents, self.action_dim))
279 | for agent_id in range(self.n_agents):
280 | action_var = self.actors[agent_id](state_var[:,agent_id,:])
281 | if self.use_cuda:
282 | action[agent_id] = action_var.data.cpu().numpy()[0]
283 | else:
284 | action[agent_id] = action_var.data.numpy()[0]
285 |
286 | #get bounded to action_bound
287 | b = 1
288 | a = -b
289 | if self.action_dim > 6:
290 | print("Wrong!")
291 | for n in range(self.n_agents):
292 | action[n][0] = 0 if action[n][0] <= 0 else 1
293 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1))
294 | action[n][2] = self.getactionbound(a, b, action[n][2], 2)
295 | action[n][3] = self.getactionbound(a, b, action[n][3], 3)
296 | action[n][4] = self.getactionbound(a, b, action[n][4], 4)
297 | action[n][5] = self.getactionbound(a, b, action[n][5], 5)
298 |
299 | return action
300 |
301 |
--------------------------------------------------------------------------------
/MAPPO.py:
--------------------------------------------------------------------------------
1 |
2 | import torch as th
3 | from torch import nn
4 | from torch.optim import Adam, RMSprop
5 | from math import exp
6 |
7 | import numpy as np
8 | from copy import deepcopy
9 |
10 | from Model import ActorNetwork, CriticNetwork
11 | from utils import to_tensor_var
12 | from Memory import ReplayMemory
13 |
14 | EVAL_EPISODES = 10
15 |
16 | class MAPPO(object):
17 | """
18 | An agent learned with PPO using Advantage Actor-Critic framework
19 | - Actor takes state as input
20 | - Critic takes both state and action as input
21 | - agent interact with environment to collect experience
22 | - agent training with experience to update policy
23 | - adam seems better than rmsprop for ppo
24 | """
25 | def __init__(self, env, state_dim, action_dim, n_agents, action_lower_bound, action_higher_bound,
26 | noise=0, tau=300,
27 | memory_capacity=10, max_steps=None,
28 | roll_out_n_steps=10, target_tau=1.0,
29 | target_update_steps=5, clip_param=0.2,
30 | reward_gamma=0.99, reward_scale=1.,
31 | actor_output_act=nn.functional.softmax, critic_loss="mse",
32 | actor_lr=0.01, critic_lr=0.01,
33 | optimizer_type="adam", entropy_reg=0.00,
34 | max_grad_norm=None, batch_size=10, episodes_before_train=0,
35 | use_cuda=False):
36 |
37 |
38 | self.env = env
39 | self.state_dim = state_dim
40 | self.action_dim = action_dim
41 | self.env_state = self.env.reset()
42 | self.n_episodes = 0
43 | self.n_steps = 0
44 | self.max_steps = max_steps
45 | self.n_agents = n_agents
46 |
47 | self.reward_gamma = reward_gamma
48 | self.reward_scale = reward_scale
49 |
50 | self.action_lower_bound = action_lower_bound
51 | self.action_higher_bound = action_higher_bound
52 |
53 | self.memory = ReplayMemory(memory_capacity)
54 |
55 | self.actor_output_act = actor_output_act
56 | self.critic_loss = critic_loss
57 | self.actor_lr = actor_lr
58 | self.critic_lr = critic_lr
59 | self.optimizer_type = optimizer_type
60 | self.entropy_reg = entropy_reg
61 | self.max_grad_norm = max_grad_norm
62 | self.batch_size = batch_size
63 | self.episodes_before_train = episodes_before_train
64 | self.noise = noise
65 | self.tau = tau
66 |
67 | self.use_cuda = use_cuda and th.cuda.is_available()
68 |
69 | self.roll_out_n_steps = roll_out_n_steps
70 | self.target_tau = target_tau
71 | self.target_update_steps = target_update_steps
72 | self.clip_param = clip_param
73 |
74 | self.actors = [ActorNetwork(self.state_dim, self.action_dim, self.actor_output_act)] * self.n_agents
75 | critic_state_dim = self.n_agents * self.state_dim
76 | critic_action_dim = self.n_agents * self.action_dim
77 | self.critics = [CriticNetwork(critic_state_dim, critic_action_dim, 1)] * self.n_agents
78 | # to ensure target network and learning network has the same weights
79 | self.actors_target = deepcopy(self.actors)
80 | self.critics_target = deepcopy(self.critics)
81 |
82 | if optimizer_type == "adam":
83 | self.actors_optimizer = [Adam(a.parameters(), lr=self.actor_lr) for a in self.actors]
84 | self.critics_optimizer = [Adam(c.parameters(), lr=self.critic_lr) for c in self.critics]
85 | elif optimizer_type == "rmsprop":
86 | self.actors_optimizer = [RMSprop(a.parameters(), lr=self.actor_lr) for a in self.actors]
87 | self.critics_optimizer = [RMSprop(c.parameters(), lr=self.critic_lr) for c in self.critics]
88 |
89 | if self.use_cuda:
90 | for a in self.actors:
91 | a.cuda()
92 | for c in self.critics:
93 | c.cuda()
94 | self.eval_rewards = []
95 | self.mean_rewards = []
96 | self.episodes = []
97 | self.eval_phi = []
98 | self.mean_phi = []
99 |
100 | # agent interact with the environment to collect experience
101 | def interact(self):
102 | if (self.max_steps is not None) and (self.n_steps >= self.max_steps):
103 | self.env_state = self.env.reset()
104 | self.n_steps = 0
105 | states = []
106 | actions = []
107 | rewards = []
108 | # take n steps
109 | for i in range(self.roll_out_n_steps):
110 | states.append(self.env_state)
111 | action = self.choose_action(self.env_state)
112 | next_state, reward, done, _, phi = self.env.step(action)
113 | # done = done[0]
114 | actions.append(action)
115 | rewards.append(reward)
116 | final_state = next_state
117 | self.env_state = next_state
118 | if done:
119 | self.env_state = self.env.reset()
120 | break
121 | # discount reward
122 | if done:
123 | final_r = [0.0] * self.n_agents
124 | self.n_episodes += 1
125 | self.episode_done = True
126 | else:
127 | self.episode_done = False
128 | final_action = self.choose_action(final_state)
129 | final_r = self.value(final_state, final_action)
130 |
131 | rewards = np.array(rewards)
132 | for agent_id in range(self.n_agents):
133 | rewards[:,agent_id] = self._discount_reward(rewards[:,agent_id], final_r[agent_id])
134 | rewards = rewards.tolist()
135 | self.n_steps += 1
136 |
137 | self.eval_rewards.append(np.sum(reward))
138 | self.eval_phi.append(np.sum(phi))
139 | if self.episode_done and ((self.n_episodes+1)%EVAL_EPISODES == 0):
140 | mean_reward = np.mean(np.array(self.eval_rewards))
141 | self.mean_rewards.append(mean_reward)
142 | self.mean_phi.append(np.mean(np.array(self.eval_phi)))
143 | self.episodes.append(self.n_episodes+1)
144 | print("Episode:", self.n_episodes+1, " Average Reward: ", mean_reward)
145 | self.eval_rewards = []
146 | self.eval_phi = []
147 |
148 | self.memory.push(states, actions, rewards)
149 |
150 | # train on a roll out batch
151 | def train(self):
152 | if self.n_episodes <= self.episodes_before_train:
153 | pass
154 |
155 | batch = self.memory.sample(self.batch_size)
156 | states_var = to_tensor_var(batch.states, self.use_cuda).view(-1, self.n_agents, self.state_dim)
157 | actions_var = to_tensor_var(batch.actions, self.use_cuda).view(-1, self.n_agents, self.action_dim)
158 | rewards_var = to_tensor_var(batch.rewards, self.use_cuda).view(-1, self.n_agents, 1)
159 | whole_states_var = states_var.view(-1, self.n_agents*self.state_dim)
160 | whole_actions_var = actions_var.view(-1, self.n_agents*self.action_dim)
161 |
162 | for agent_id in range(self.n_agents):
163 | # update actor network
164 | self.actors_optimizer[agent_id].zero_grad()
165 | values = self.critics[agent_id](whole_states_var, whole_actions_var).detach()
166 | advantages = rewards_var[:,agent_id,:] - values
167 | # # normalizing advantages seems not working correctly here
168 | # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5)
169 | action_log_probs = self.actors[agent_id](states_var[:,agent_id,:]).detach()
170 | action_log_probs = th.sum(action_log_probs * actions_var[:,agent_id,:], 1)
171 | old_action_log_probs = self.actors_target[agent_id](states_var[:,agent_id,:]).detach()
172 | old_action_log_probs = th.sum(old_action_log_probs * actions_var[:,agent_id,:], 1)
173 | ratio = th.exp(action_log_probs - old_action_log_probs)
174 | surr1 = ratio * advantages
175 | surr2 = th.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * advantages
176 | # PPO's pessimistic surrogate (L^CLIP)
177 | actor_loss = -th.mean(th.min(surr1, surr2))
178 | actor_loss.requires_grad_(True)
179 | actor_loss.backward()
180 | if self.max_grad_norm is not None:
181 | nn.utils.clip_grad_norm(self.actors[agent_id].parameters(), self.max_grad_norm)
182 | self.actors_optimizer[agent_id].step()
183 |
184 | # update critic network
185 | self.critics_optimizer[agent_id].zero_grad()
186 | target_values = rewards_var[:,agent_id,:]
187 | # if self.critic_loss == "huber":
188 | # critic_loss = nn.functional.smooth_l1_loss(values, target_values)
189 | # else:
190 | # critic_loss = nn.MSELoss()(values, target_values)
191 | critic_loss = 0.5 * (values - target_values).pow(2).mean()
192 | critic_loss.requires_grad_(True)
193 | critic_loss.backward()
194 | if self.max_grad_norm is not None:
195 | nn.utils.clip_grad_norm(self.critics[agent_id].parameters(), self.max_grad_norm)
196 | self.critics_optimizer[agent_id].step()
197 |
198 | # update actor target network and critic target network
199 | if self.n_steps % self.target_update_steps == 0 and self.n_steps > 0:
200 | self._soft_update_target(self.actors_target[agent_id], self.actors[agent_id])
201 | self._soft_update_target(self.critics_target[agent_id], self.critics[agent_id])
202 |
203 |
204 | def _soft_update_target(self, target, source):
205 | for t, s in zip(target.parameters(), source.parameters()):
206 | t.data.copy_(
207 | (1. - self.target_tau) * t.data + self.target_tau * s.data)
208 |
209 | def getactionbound(self, a, b, x, i):
210 | x = (x - a) * (self.action_higher_bound[i] - self.action_lower_bound[i]) / (b - a) \
211 | + self.action_lower_bound[i]
212 | return x
213 |
214 | def choose_action(self, state):
215 | state_var = to_tensor_var([state], self.use_cuda)
216 | action = np.zeros((self.n_agents, self.action_dim))
217 |
218 | for agent_id in range(self.n_agents):
219 | action_var = (self.actors[agent_id](state_var[:,agent_id,:]))
220 | if self.use_cuda:
221 | action[agent_id] = action_var.data.cpu().numpy()[0]
222 | else:
223 | action[agent_id] = action_var.data.numpy()[0]
224 |
225 | for n in range(self.n_agents):
226 | for i in range(6):
227 | if (self.n_episodes < 600): e = self.n_episodes
228 | else: e = self.n_episodes
229 | action[n][i] = -exp(-e/self.tau) + self.noise
230 | b = 1
231 | a = -1
232 | if self.action_dim > 6:
233 | print("Wrong!")
234 | for n in range(self.n_agents):
235 | action[n][0] = 0 if action[n][0] <= 0 else 1
236 | action[n][1] = round(self.getactionbound(a, b, action[n][1], 1))
237 | action[n][2] = self.getactionbound(a, b, action[n][2], 2)
238 | action[n][3] = self.getactionbound(a, b, action[n][3], 3)
239 | action[n][4] = self.getactionbound(a, b, action[n][4], 4)
240 | action[n][5] = self.getactionbound(a, b, action[n][5], 5)
241 |
242 |
243 | return action
244 |
245 |
246 | # evaluate value for a state-action pair
247 | def value(self, state, action):
248 | state_var = to_tensor_var([state], self.use_cuda)
249 | action_var = to_tensor_var([action], self.use_cuda)
250 | whole_state_var = state_var.view(-1, self.n_agents*self.state_dim)
251 | whole_action_var = action_var.view(-1, self.n_agents*self.action_dim)
252 | values = np.zeros(self.n_agents)
253 | for agent_id in range(self.n_agents):
254 | value_var = self.critics[agent_id](whole_state_var, whole_action_var)
255 | if self.use_cuda:
256 | values[agent_id] = value_var.data.cpu().numpy()[0]
257 | else:
258 | values[agent_id] = value_var.data.numpy()[0]
259 | return values
260 |
261 | def _discount_reward(self, rewards, final_value):
262 | discounted_r = np.zeros_like(rewards)
263 | running_add = final_value
264 | for t in reversed(range(0, len(rewards))):
265 | running_add = running_add * self.reward_gamma + rewards[t]
266 | discounted_r[t] = running_add
267 | return discounted_r
268 |
269 |
--------------------------------------------------------------------------------
/Memory.py:
--------------------------------------------------------------------------------
1 |
2 | import random
3 | from collections import namedtuple
4 |
5 |
6 | Experience = namedtuple("Experience",
7 | ("states", "actions", "rewards", "next_states", "next_actions", "dones"))
8 |
9 |
10 | class ReplayMemory(object):
11 | """
12 | Replay memory buffer
13 | """
14 | def __init__(self, capacity):
15 | self.capacity = capacity
16 | self.memory = []
17 | self.position = 0
18 |
19 | def _push_one(self, state, action, reward, next_state=None, next_action=None, done=None):
20 | if len(self.memory) < self.capacity:
21 | self.memory.append(None)
22 | self.memory[self.position] = Experience(state, action, reward, next_state, next_action, done)
23 | self.position = (self.position + 1) % self.capacity
24 |
25 | # def push(self, states, actions, rewards, next_states=None, dones=None):
26 |
27 |
28 | # # print("states = ", states)
29 | # # print("actions = ", actions)
30 | # # print("rewards = ", rewards)
31 | # # print("next_states = ", next_states)
32 |
33 | # # print("dones = ", dones)
34 | # if isinstance(states, list):
35 | # if next_states is not None and len(next_states) > 0:
36 | # self._push_one(states, actions, rewards, next_states, dones)
37 | # else:
38 | # self._push_one(states, actions, rewards)
39 | # else:
40 | # self._push_one(states, actions, rewards, next_states, dones)
41 |
42 | def push(self, states, actions, rewards, next_states=None, next_actions=None, dones=None):
43 |
44 | if isinstance(states, list):
45 | if dones is not None and len(next_states) > 0:
46 | for s,a,r,n_s, n_a, d in zip(states, actions, rewards, next_states, next_actions, dones):
47 | self._push_one(s, a, r, n_s, n_a, d)
48 | elif next_states is not None:
49 | for s,a,r, n_s, n_a in zip(states, actions, rewards, next_states, next_actions):
50 | self._push_one(s, a, r, n_s, n_a)
51 | else:
52 | for s,a,r in zip(states, actions, rewards):
53 | self._push_one(s, a, r)
54 | else:
55 | self._push_one(states, actions, rewards, next_states, next_actions, dones)
56 |
57 |
58 | def sample(self, batch_size):
59 | if batch_size > len(self.memory):
60 | batch_size = len(self.memory)
61 | transitions = random.sample(self.memory, batch_size)
62 | batch = Experience(*zip(*transitions))
63 | return batch
64 |
65 | def __len__(self):
66 | return len(self.memory)
67 |
--------------------------------------------------------------------------------
/Model.py:
--------------------------------------------------------------------------------
1 |
2 | import torch as th
3 | from torch import nn
4 |
5 | NUMBER = 4
6 |
7 | class ActorNetwork(nn.Module):
8 | """
9 | A network for actor
10 | """
11 | def __init__(self, state_dim, output_size, output_act, init_w =3e-3):
12 | super(ActorNetwork, self).__init__()
13 | self.fc1 = nn.Linear(state_dim, 64)
14 | self.fc2 = nn.Linear(64, 128)
15 | self.fc3 = nn.Linear(128, output_size)
16 |
17 | self.fc3.weight.data.uniform_(-init_w, init_w)
18 | self.fc3.bias.data.uniform_(-init_w, init_w)
19 | # activation function for the output
20 | self.output_act = output_act
21 |
22 | def __call__(self, state):
23 | out = nn.functional.relu(self.fc1(state))
24 | out = nn.functional.relu(self.fc2(out))
25 | if self.output_act == nn.functional.softmax:
26 | out = self.output_act(self.fc3(out), dim=-1)
27 | else:
28 | out = self.output_act(self.fc3(out))
29 | return out
30 |
31 |
32 | class CriticNetwork(nn.Module):
33 | """
34 | A network for critic
35 | """
36 | def __init__(self, state_dim, action_dim, output_size=1, init_w =3e-3):
37 | super(CriticNetwork, self).__init__()
38 | self.fc1 = nn.Linear(state_dim + action_dim, 64)
39 | self.fc2 = nn.Linear(64, 128)
40 | self.fc3 = nn.Linear(128, output_size)
41 |
42 | self.fc3.weight.data.uniform_(-init_w, init_w)
43 | self.fc3.bias.data.uniform_(-init_w, init_w)
44 |
45 | def __call__(self, state, action):
46 | out = th.cat([state, action], 1)
47 | out = nn.functional.relu(self.fc1(out))
48 | out = nn.functional.relu(self.fc2(out))
49 | out = self.fc3(out)
50 | return out
51 |
52 |
53 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Reinforcement Learning for Mobile Edge Computing
2 |
3 | This is a repository containing Python code and a corresponding article on the topic of mobile edge computing (MEC) and its optimization using deep reinforcement learning (DRL) techniques. The article discusses the challenges of designing an efficient task-offloading strategy for the whole MEC system, and proposes the use of multi-agent DRL to support smart task offloading in a MEC network.
4 |
5 | Specifically, the project simplifies the MEC problem as video task processing and applies three different DRL methods based on Actor-Critic structure: Multi-Agent Advantage Actor-Critic (MAA2C), Multi-Agent Proximal Policy Optimization (MAPPO), and Multi-Agent Deep Deterministic Policy Gradient (MADDPG). The reward function for different environment parameters is compared, as well as the final results.
6 |
7 | Article: https://github.com/XinyaoQiu/DRL-for-edge-computing/blob/master/DRL-for-edge-computing.pdf
8 |
9 | Presentation: https://github.com/XinyaoQiu/DRL-for-edge-computing/blob/master/DRL-presentation.pdf
10 |
11 | ## Authors
12 |
13 | - Xinyao Qiu
14 | - Yuqi Mai
15 |
16 | ## Motivation
17 |
18 | Mobile edge computing (MEC) is a promising technology that can improve the computing experience of electronic devices by offloading computation-based tasks to MEC servers located near the cloud servers. However, designing an efficient task-offloading strategy for the whole MEC system is not easy. Recently, many edge task offloading schemes have been proposed, but most of them consider single-agent offloading scenarios using traditional convex optimization tools. Deep reinforcement learning (DRL) techniques, such as deep Q-learning (DQN), have emerged as a promising alternative by modeling offloading problems as Markov decision processes (MDP) using deep neural networks (DNN) for function approximation. However, these efforts only use a single agent to handle the entire offloading process and do not work well in a large-scale distributed MEC environment. An interesting alternative is to use a multi-agent DRL (MA-DRL) to support smart task offloading in a MEC network.
19 |
20 | ## Getting Started
21 |
22 | To get started with this project, you can clone the repository and run the Python code on your machine. You will need to have Python 3 and the following packages installed:
23 |
24 | - Tensorflow
25 | - PyTorch
26 | - Keras
27 | - OpenAI Gym
28 |
29 | You can install these packages using pip:
30 |
31 | ```python
32 | pip install tensorflow keras gym torch
33 | ```
34 |
35 | ## Usage
36 |
37 | The main code files in this repository are:
38 |
39 | - `maa2c.py`: Implements the Multi-Agent Advantage Actor-Critic (MAA2C) algorithm.
40 | - `mappo.py`: Implements the Multi-Agent Proximal Policy Optimization (MAPPO) algorithm.
41 | - `maddpg.py`: Implements the Multi-Agent Deep Deterministic Policy Gradient (DDPG) algorithm.
42 | - `env.py`: Defines the MEC environment and its reward function.
43 | - `train.py`: Trains the agents using the specified DRL algorithm and environment parameters.
44 | - `evaluate.py`: Evaluates the trained agents on the environment.
45 |
46 | To train the agents, run `train.py` with the desired algorithm and environment parameters:
47 |
48 | ```
49 | python train.py --algorithm maa2c --env-params env_params.json
50 | ```
51 |
52 | To evaluate the trained agents, run `evaluate.py` with the same algorithm and environment parameters:
53 |
54 | ```
55 | python evaluate.py --algorithm maa2c --env-params env_params.json
56 | ```
57 |
58 | ## References
59 |
60 | 1. X. Xiong, K. Zheng, L. Lei, and L. Hou, “Resource allocation based on deep reinforcement learning in iot edge computing,” IEEE J. Sel. Areas Commun., vol. 38, no. 6, pp. 1133–1146, 2020.
61 | 2. D. Nguyen, M. Ding, P. Pathirana, A. Seneviratne, J. Li, and V. Poor, “Cooperative task offloading and block mining in blockchain-based edge computing with multi-agent deep reinforcement learning,” IEEE Transactions on Mobile Computing, pp. 1–1, 2021.
62 | 3. A. Barto, R. Sutton, and C. Anderson, “Neuron like elements that can solve difficult learning control problems,” IEEE Transactions on Systems, Man, & Cybernetics, pp. 1–1, 1983.
63 | 4. Openai, “Openai baselines: Acktr a2c,” .
64 | 5. J. Schulman, F. Wolski, P. Dhariwal, A. Radford, and O. Klimov, “Proximal policy optimization algorithms,” 2017.
65 | 6. T. P. Lillicrap, J. J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, and D. Wierstra, “Continuous control with deep reinforcement learning,” 2016.
66 | 7. B. Yang, X. Cao, J. Bassey, X. Li, and L. Qian, “Computation offloading in multi-access edge computing: A multi-task learning approach,” IEEE Trans. Mob Comupt., pp. 1–1, 2021, doi:10.1109/TMC.2020.2990630.
67 | 8. Z. Shou, X. Lin, Y. Kalantidis, L. Sevilla-Lara, M. Rohrbach, S.-F. Chang, and Z. Yan, “DMC-Net: Generating discriminative motion cues for fast compressed video action recognition,” in Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, 2019, pp. 1268–1277.
68 |
69 | ## License
70 |
71 | This project is licensed under the MIT License - see the LICENSE file for details.
72 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Use this section to tell people about which versions of your project are
6 | currently being supported with security updates.
7 |
8 | | Version | Supported |
9 | | ------- | ------------------ |
10 | | 5.1.x | :white_check_mark: |
11 | | 5.0.x | :x: |
12 | | 4.0.x | :white_check_mark: |
13 | | < 4.0 | :x: |
14 |
15 | ## Reporting a Vulnerability
16 |
17 | Use this section to tell people how to report a vulnerability.
18 |
19 | Tell them where to go, how often they can expect to get an update on a
20 | reported vulnerability, what to expect if the vulnerability is accepted or
21 | declined, etc.
22 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__init__.py
--------------------------------------------------------------------------------
/__pycache__/Agent.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Agent.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/Agent.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Agent.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/MAA2C.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAA2C.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/MAA2C.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAA2C.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/MADDPG.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/MADDPG.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/MADDPG.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MADDPG.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/MAPPO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAPPO.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/MAPPO.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/MAPPO.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/Memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/Memory.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/Memory.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Memory.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/Model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/Model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/Model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/Model.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/ddpg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/ddpg.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/env.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/env.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/env.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/env_1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/env_1.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/run_ddpg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/run_ddpg.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/__pycache__/utils.cpython-39.pyc
--------------------------------------------------------------------------------
/env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | LAMBDA_E = 0.6
4 | LAMBDA_PHI = 0.4
5 |
6 | MU_1 = 0.6
7 | MU_2 = 0.4
8 |
9 | K_CHANNEL = 4
10 |
11 | MIN_SIZE = 0.2
12 | MAX_SIZE = 50
13 |
14 | MIN_CYCLE = 0.05
15 | MAX_CYCLE = 2
16 |
17 | MIN_DDL = 0.4
18 | MAX_DDL = 2
19 |
20 | MIN_RES = 0.4
21 | MAX_RES = 2.3
22 |
23 | MIN_COM = 0.1
24 | MAX_COM = 1
25 |
26 | MAX_POWER = 24
27 |
28 | MAX_GAIN = 10
29 | MIN_GAIN = 5
30 |
31 |
32 | V_L = 0.125
33 | V_E = 0.13
34 |
35 | THETA_L = 1/1600
36 | THETA_E = 1/1700
37 |
38 | K_ENERGY_LOCAL = 0.8 * 10**(-3) #k = 0.8 * 10 ^(-27) * M * G^2#
39 | K_ENERGY_MEC = 0.7 * 10**(-3)
40 |
41 | NOISE_VARIANCE = 100
42 |
43 | OMEGA = 0.9*10**(-2) #w = 0.9*10*(-11) * G#
44 |
45 | CAPABILITY_E = 5
46 |
47 | MIN_EPSILON = 0.56
48 | MAX_EPSILON = 0.93
49 |
50 | KSI = 0.5
51 | LAMBDA = 0.5
52 | ALPHA = 0.5
53 | BETA = 10
54 |
55 | S_POWER = 20
56 | S_GAIN = 8
57 | S_SIZE = 8
58 | S_CYCLE = 1
59 | S_RESOLU = 0.6
60 |
61 | S_RES = 1.5
62 | S_COM = 0.6
63 |
64 |
65 | class MecBCEnv(object):
66 | def __init__(self, n_agents, S_DDL=1, S_EPSILON=0.86, W_BANDWIDTH=20, \
67 | S_one_power=20, S_one_gamma=0.6, mode="normal"):
68 |
69 |
70 | self.state_size = 10
71 | self.action_size = 6
72 | self.n_agents = n_agents
73 |
74 | self.S_DDL = S_DDL
75 | self.S_EPSILON = S_EPSILON
76 |
77 | self.W_BANDWIDTH = W_BANDWIDTH
78 | self.S_one_power = S_one_power
79 | self.S_one_gamma = S_one_gamma
80 |
81 | # state
82 | self.S_channel = np.zeros(self.n_agents)
83 | self.S_power = np.zeros(self.n_agents)
84 | self.S_gain = np.zeros(self.n_agents)
85 | self.S_size = np.zeros(self.n_agents)
86 | self.S_cycle = np.zeros(self.n_agents)
87 | self.S_resolu = np.zeros(self.n_agents)
88 | self.S_ddl = np.zeros(self.n_agents)
89 | self.S_res = np.zeros(self.n_agents)
90 | self.S_com = np.zeros(self.n_agents)
91 | self.S_epsilon = np.zeros(self.n_agents)
92 | self.mode = mode
93 |
94 | self.action_lower_bound = [0, 0, 0.01, MIN_RES, MIN_COM, 1]
95 | self.action_higher_bound = [1, K_CHANNEL, 0.99, MAX_RES, MAX_COM, MAX_POWER]
96 |
97 |
98 |
99 | self.epoch = 0
100 |
101 | # 重置
102 | def reset(self):
103 | self.epoch = 0
104 | #随机state
105 | for n in range(self.n_agents):
106 | self.S_channel[n] = 1
107 | self.S_power[n] = np.random.normal(S_POWER, 1)
108 | self.S_gain[n] = np.random.normal(S_GAIN, 1)
109 | self.S_size[n] = np.random.normal(S_SIZE, 1)
110 | self.S_cycle[n] = np.random.normal(S_CYCLE, 0.1)
111 | self.S_resolu[n] = np.random.normal(S_RESOLU, 0.1)
112 | self.S_ddl[n] = np.random.normal(self.S_DDL, 0.1)
113 | self.S_res[n] = np.random.normal(S_RES, 0.1)
114 | self.S_com[n] = np.random.normal(S_COM, 0.1)
115 | self.S_epsilon[n] = np.random.normal(self.S_EPSILON, 0.1)
116 |
117 | self.S_power[0] = np.random.normal(self.S_one_power, 1)
118 | self.S_com[0] = np.random.normal(self.S_one_gamma, 0.1)
119 |
120 | State_ = []
121 | State_ = [[self.S_channel[n], self.S_power[n], self.S_gain[n], self.S_size[n], self.S_cycle[n], \
122 | self.S_resolu[n], self.S_ddl[n], self.S_res[n], self.S_com[n], self.S_epsilon[n]] for n in range(self.n_agents)]
123 |
124 | State_ = np.array(State_)
125 |
126 |
127 |
128 | return State_
129 |
130 |
131 | def step(self, action):
132 |
133 |
134 | # action
135 | A_decision = np.zeros(self.n_agents)
136 | A_channel = np.zeros(self.n_agents)
137 | A_resolu = np.zeros(self.n_agents)
138 | A_res = np.zeros(self.n_agents)
139 | A_com = np.zeros(self.n_agents)
140 | A_power = np.zeros(self.n_agents)
141 | if self.mode == "normal":
142 | for n in range(self.n_agents):
143 | A_decision[n] = action[n][0]
144 | A_channel[n] = action[n][1]
145 | A_resolu[n] = action[n][2]
146 | A_res[n] = action[n][3]
147 | A_com[n] = action[n][4]
148 | A_power[n] = action[n][5]
149 | elif self.mode == "NAC":
150 | for n in range(self.n_agents):
151 | A_decision[n] = action[n][0]
152 | A_channel[n] = action[n][1]
153 | A_resolu[n] = 0.2
154 | A_res[n] = action[n][3]
155 | A_com[n] = action[n][4]
156 | A_power[n] = action[n][5]
157 | elif self.mode == "ALLES":
158 | for n in range(self.n_agents):
159 | A_decision[n] = 1
160 | A_channel[n] = action[n][1]
161 | A_resolu[n] = action[n][2]
162 | A_res[n] = action[n][3]
163 | A_com[n] = action[n][4]
164 | A_power[n] = action[n][5]
165 | else:
166 | print("Wrong!")
167 |
168 |
169 | S_channel = self.S_channel
170 | S_power = self.S_power
171 | S_gain = self.S_gain
172 | S_size = self.S_size
173 | S_cycle = self.S_cycle
174 | S_resolu = self.S_resolu
175 | S_ddl = self.S_ddl
176 | S_res = self.S_res
177 | S_com = self.S_com
178 | S_epsilon = self.S_epsilon
179 |
180 | # 根据S_task, S_channel调整A_decision
181 | for n in range(self.n_agents):
182 | for k in range(K_CHANNEL):
183 | if S_channel[n] == k and A_channel[n] == k:
184 | A_decision[n] = 0
185 |
186 | # 求reward
187 | x_n = np.zeros(self.n_agents)
188 | for n in range(self.n_agents):
189 | if S_channel[n] != 0:
190 | x_n[n] = 1
191 | else:
192 | x_n[n] = 0
193 |
194 | total_power = 0
195 | for n in range(self.n_agents):
196 | total_power += x_n[n] * S_power[n] * S_gain[n]
197 |
198 |
199 | Phi_local = V_L * np.log(1 + S_resolu / THETA_L)
200 |
201 | Phi_off = V_E * np.log(1 + S_resolu / THETA_E)
202 |
203 | Phi_n = (1 - x_n) * Phi_local + x_n * Phi_off
204 |
205 |
206 | Phi_penalty = np.maximum((S_epsilon - Phi_n) / S_epsilon, 0)
207 |
208 |
209 | total_com = np.sum(S_com)
210 |
211 | DataRate = self.W_BANDWIDTH * np.log(1 + S_power * S_gain / (NOISE_VARIANCE + \
212 | total_power - x_n * S_power * S_gain)) / np.log(2)
213 |
214 |
215 | Time_proc = S_resolu * S_cycle / CAPABILITY_E
216 |
217 | Time_local = S_resolu * S_cycle / S_res
218 |
219 | Time_off = S_resolu * S_size / DataRate
220 |
221 | Time_n = (1 - x_n) * Time_local + x_n * (Time_off + Time_proc)
222 |
223 | total_com = np.sum(S_com)
224 |
225 | T_mean = np.mean(Time_n)
226 |
227 | R_mine = KSI * S_com / total_com * np.exp(-LAMBDA * T_mean / S_ddl)
228 |
229 | Time_penalty = np.maximum((Time_n - S_ddl) / Time_n, 0)
230 |
231 | Energy_local = K_ENERGY_LOCAL * S_size * S_resolu * (S_res**2) + OMEGA * S_com
232 |
233 | Energy_off = S_power * Time_off * 10**(-6)
234 |
235 | Energy_mine = OMEGA * S_com
236 |
237 | Energy_n = (1 - x_n) * Energy_local + x_n * Energy_off
238 |
239 | Reward_vt = LAMBDA_E * ((Energy_local - Energy_n) / Energy_local) - LAMBDA_PHI * ((Phi_local - Phi_n) / Phi_local)
240 |
241 | Utility_mine = R_mine - Energy_mine
242 |
243 | Reward = MU_1 * Reward_vt + MU_2 * Utility_mine - BETA * (Phi_penalty + Time_penalty)
244 |
245 | # print(np.sum(Reward), np.sum(Reward_mine), np.sum(Reward_vt), np.sum(Phi_penalty), np.sum(Time_penalty))
246 |
247 | # 根据action改state
248 | for n in range(self.n_agents):
249 | if int(A_decision[n]):
250 | self.S_channel[n] = A_channel[n]
251 | self.S_resolu = A_resolu
252 | self.S_res = A_res
253 | self.S_com = A_com
254 | self.S_power = A_power
255 |
256 | State_ = []
257 | State_ = [[self.S_channel[n], self.S_power[n], self.S_gain[n], self.S_size[n], self.S_cycle[n], \
258 | self.S_resolu[n], self.S_ddl[n], self.S_res[n], self.S_com[n], self.S_epsilon[n]] for n in range(self.n_agents)]
259 |
260 | State_ = np.array(State_)
261 |
262 | self.epoch += 1
263 | done = False
264 | if self.epoch > 100:
265 | self.reset()
266 | done = True
267 |
268 |
269 |
270 | return State_, Reward, done, True, Phi_n, Energy_n, R_mine, Energy_mine
271 |
272 |
273 |
274 |
--------------------------------------------------------------------------------
/excel/DDPG_A2C_PPO.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/DDPG_A2C_PPO.xls
--------------------------------------------------------------------------------
/excel/Excel_a2c.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_a2c.xls
--------------------------------------------------------------------------------
/excel/Excel_ddpg.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_ddpg.xls
--------------------------------------------------------------------------------
/excel/Excel_ppo.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/Excel_ppo.xls
--------------------------------------------------------------------------------
/excel/final.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/excel/final.xls
--------------------------------------------------------------------------------
/graphs/498/convergence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/498/convergence.png
--------------------------------------------------------------------------------
/graphs/498/reward_vs_parameter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/498/reward_vs_parameter.png
--------------------------------------------------------------------------------
/graphs/change agents/ALLES_change_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ALLES_change_agents.png
--------------------------------------------------------------------------------
/graphs/change agents/NAC_change_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/NAC_change_agents.png
--------------------------------------------------------------------------------
/graphs/change agents/a2c_change_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/a2c_change_agents.png
--------------------------------------------------------------------------------
/graphs/change agents/ddpg_change_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ddpg_change_agents.png
--------------------------------------------------------------------------------
/graphs/change agents/ppo_change_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change agents/ppo_change_agents.png
--------------------------------------------------------------------------------
/graphs/change bandwidth/ALLES_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ALLES_change_bandwidth.png
--------------------------------------------------------------------------------
/graphs/change bandwidth/NAC_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/NAC_change_bandwidth.png
--------------------------------------------------------------------------------
/graphs/change bandwidth/a2c_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/a2c_change_bandwidth.png
--------------------------------------------------------------------------------
/graphs/change bandwidth/ddpg_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ddpg_change_bandwidth.png
--------------------------------------------------------------------------------
/graphs/change bandwidth/ppo_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change bandwidth/ppo_change_bandwidth.png
--------------------------------------------------------------------------------
/graphs/change ddl/ALLES_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ALLES_change_ddl.png
--------------------------------------------------------------------------------
/graphs/change ddl/NAC_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/NAC_change_ddl.png
--------------------------------------------------------------------------------
/graphs/change ddl/a2c_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/a2c_change_ddl.png
--------------------------------------------------------------------------------
/graphs/change ddl/ddpg_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ddpg_change_ddl.png
--------------------------------------------------------------------------------
/graphs/change ddl/ppo_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change ddl/ppo_change_ddl.png
--------------------------------------------------------------------------------
/graphs/change epsilon/ALLES_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ALLES_change_epsilon.png
--------------------------------------------------------------------------------
/graphs/change epsilon/NAC_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/NAC_change_epsilon.png
--------------------------------------------------------------------------------
/graphs/change epsilon/a2c_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/a2c_change_epsilon.png
--------------------------------------------------------------------------------
/graphs/change epsilon/ddpg_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ddpg_change_epsilon.png
--------------------------------------------------------------------------------
/graphs/change epsilon/ppo_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change epsilon/ppo_change_epsilon.png
--------------------------------------------------------------------------------
/graphs/change one gamma/change one gamma e_mine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one gamma/change one gamma e_mine.png
--------------------------------------------------------------------------------
/graphs/change one gamma/change one gamma r_mine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one gamma/change one gamma r_mine.png
--------------------------------------------------------------------------------
/graphs/change one power/change one power energy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one power/change one power energy.png
--------------------------------------------------------------------------------
/graphs/change one power/change one power reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/change one power/change one power reward.png
--------------------------------------------------------------------------------
/graphs/ddpg_change_lr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/ddpg_change_lr.png
--------------------------------------------------------------------------------
/graphs/episodes_avg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/episodes_avg.png
--------------------------------------------------------------------------------
/graphs/phi&energy/energy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/phi&energy/energy.png
--------------------------------------------------------------------------------
/graphs/phi&energy/phi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/phi&energy/phi.png
--------------------------------------------------------------------------------
/graphs/reward_vs_parameters/reward_vs_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_agents.png
--------------------------------------------------------------------------------
/graphs/reward_vs_parameters/reward_vs_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_bandwidth.png
--------------------------------------------------------------------------------
/graphs/reward_vs_parameters/reward_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_ddl.png
--------------------------------------------------------------------------------
/graphs/reward_vs_parameters/reward_vs_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/graphs/reward_vs_parameters/reward_vs_epsilon.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_10.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_11.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_12.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_13.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_14.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_15.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_16.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_17.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_18.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_19.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_20.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_8.png
--------------------------------------------------------------------------------
/output/a2c_change_ddl_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/a2c_change_ddl_9.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_1.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_10.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_11.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_4.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_5.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_6.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_7.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_8.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_9.png
--------------------------------------------------------------------------------
/output/change agents/a2c/a2c_change_agents_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/a2c/a2c_change_agents_final.png
--------------------------------------------------------------------------------
/output/change agents/ddpg/ddpg_change_agents_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_2.png
--------------------------------------------------------------------------------
/output/change agents/ddpg/ddpg_change_agents_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_3.png
--------------------------------------------------------------------------------
/output/change agents/ddpg/ddpg_change_agents_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ddpg/ddpg_change_agents_final.png
--------------------------------------------------------------------------------
/output/change agents/ppo/ppo_change_agents_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_1.png
--------------------------------------------------------------------------------
/output/change agents/ppo/ppo_change_agents_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_2.png
--------------------------------------------------------------------------------
/output/change agents/ppo/ppo_change_agents_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo/ppo_change_agents_final.png
--------------------------------------------------------------------------------
/output/change agents/ppo_change_agents_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_3.png
--------------------------------------------------------------------------------
/output/change agents/ppo_change_agents_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_4.png
--------------------------------------------------------------------------------
/output/change agents/ppo_change_agents_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change agents/ppo_change_agents_5.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth1.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth2.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_11.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_3.png
--------------------------------------------------------------------------------
/output/change bandwidth/a2c/a2c_change_bandwidth_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/a2c/a2c_change_bandwidth_final.png
--------------------------------------------------------------------------------
/output/change bandwidth/ddpg/ddpg_change_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth.png
--------------------------------------------------------------------------------
/output/change bandwidth/ddpg/ddpg_change_bandwidth_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth_3.png
--------------------------------------------------------------------------------
/output/change bandwidth/ddpg/ddpg_change_bandwidth_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ddpg/ddpg_change_bandwidth_final.png
--------------------------------------------------------------------------------
/output/change bandwidth/ppo/ppo_change_bandwidth_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ppo/ppo_change_bandwidth_1.png
--------------------------------------------------------------------------------
/output/change bandwidth/ppo/ppo_change_bandwidth_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change bandwidth/ppo/ppo_change_bandwidth_final.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl1.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl2.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_2.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_3.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_4.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_5.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_6.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_7.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_9.png
--------------------------------------------------------------------------------
/output/change ddl/a2c/a2c_change_ddl_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/a2c/a2c_change_ddl_final.png
--------------------------------------------------------------------------------
/output/change ddl/ddpg/ddpg_change_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl.png
--------------------------------------------------------------------------------
/output/change ddl/ddpg/ddpg_change_ddl_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_3.png
--------------------------------------------------------------------------------
/output/change ddl/ddpg/ddpg_change_ddl_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_4.png
--------------------------------------------------------------------------------
/output/change ddl/ddpg/ddpg_change_ddl_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ddpg/ddpg_change_ddl_7.png
--------------------------------------------------------------------------------
/output/change ddl/ppo/ppo_change_ddl_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_1.png
--------------------------------------------------------------------------------
/output/change ddl/ppo/ppo_change_ddl_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_2.png
--------------------------------------------------------------------------------
/output/change ddl/ppo/ppo_change_ddl_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_3.png
--------------------------------------------------------------------------------
/output/change ddl/ppo/ppo_change_ddl_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change ddl/ppo/ppo_change_ddl_final.png
--------------------------------------------------------------------------------
/output/change epsilon/a2c/a2c_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon.png
--------------------------------------------------------------------------------
/output/change epsilon/a2c/a2c_change_epsilon1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon1.png
--------------------------------------------------------------------------------
/output/change epsilon/a2c/a2c_change_epsilon2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon2.png
--------------------------------------------------------------------------------
/output/change epsilon/a2c/a2c_change_epsilon_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/a2c/a2c_change_epsilon_3.png
--------------------------------------------------------------------------------
/output/change epsilon/ddpg/ddpg_change_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon.png
--------------------------------------------------------------------------------
/output/change epsilon/ddpg/ddpg_change_epsilon_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon_3.png
--------------------------------------------------------------------------------
/output/change epsilon/ddpg/ddpg_change_epsilon_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg/ddpg_change_epsilon_final.png
--------------------------------------------------------------------------------
/output/change epsilon/ddpg_change_epsilon_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ddpg_change_epsilon_4.png
--------------------------------------------------------------------------------
/output/change epsilon/ppo/ppo_change_epsilon_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo/ppo_change_epsilon_1.png
--------------------------------------------------------------------------------
/output/change epsilon/ppo/ppo_change_epsilon_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo/ppo_change_epsilon_final.png
--------------------------------------------------------------------------------
/output/change epsilon/ppo_change_epsilon_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/change epsilon/ppo_change_epsilon_2.png
--------------------------------------------------------------------------------
/output/ddpg_change_ddl_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/ddpg_change_ddl_5.png
--------------------------------------------------------------------------------
/output/ddpg_change_ddl_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/ddpg_change_ddl_7.png
--------------------------------------------------------------------------------
/output/differ user/ddpg/phi_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/differ user/ddpg/phi_vs_ddl.png
--------------------------------------------------------------------------------
/output/differ user/ddpg/reward_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/differ user/ddpg/reward_vs_ddl.png
--------------------------------------------------------------------------------
/output/energy_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/energy_vs_ddl.png
--------------------------------------------------------------------------------
/output/phi_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/phi_vs_ddl.png
--------------------------------------------------------------------------------
/output/reward_vs_agents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_agents.png
--------------------------------------------------------------------------------
/output/reward_vs_bandwidth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_bandwidth.png
--------------------------------------------------------------------------------
/output/reward_vs_ddl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_ddl.png
--------------------------------------------------------------------------------
/output/reward_vs_epsilon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XinyaoQiu/DRL-for-edge-computing/c356d498695b312a840004249b5b771b86aba051/output/reward_vs_epsilon.png
--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import xlrd
3 | MAX_EPISODES = 2000
4 | EPISODES_BEFORE_TRAIN = 0
5 |
6 | rworkbook = xlrd.open_workbook("excel/final.xls")
7 | agents = [1, 2, 3, 4, 5, 6]
8 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agent")
9 | episodes_avg_ddpg = [sheet_ddpg.cell(202, i + 1).value for i in range(6)]
10 | sheet_a2c = rworkbook.sheet_by_name("a2c_agent")
11 | episodes_avg_a2c = [sheet_a2c.cell(202, i + 1).value for i in range(6)]
12 | sheet_ppo = rworkbook.sheet_by_name("ppo_agent")
13 | episodes_avg_ppo = [sheet_ppo.cell(202, i + 1).value for i in range(6)]
14 | plt.figure()
15 | plt.plot(agents, episodes_avg_ddpg, "*-")
16 | plt.plot(agents, episodes_avg_a2c, "*-")
17 | plt.plot(agents, episodes_avg_ppo, "*-")
18 | plt.xlabel("agents")
19 | plt.ylabel("average episodes")
20 | plt.legend(["MADDPG", "MAA2C", "MAPPO"])
21 | plt.savefig("graphs/episodes_avg.png")
22 |
23 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls")
24 | one_power = [40, 60, 80, 100, 120, 140]
25 | sheet_ddpg = rworkbook.sheet_by_name("change_one_power_reward_3")
26 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)]
27 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)]
28 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)]
29 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)]
30 | plt.figure()
31 | plt.plot(one_power, episodes_avg_ddpg_1, "*-")
32 | plt.plot(one_power, episodes_avg_ddpg_2, "*-")
33 | plt.plot(one_power, episodes_avg_ddpg_3, "*-")
34 | plt.plot(one_power, episodes_avg_ddpg_4, "*-")
35 | plt.xlabel("change one power")
36 | plt.ylabel("reward")
37 | plt.legend(["user0", "user1", "user2", "user3"])
38 | plt.savefig("graphs/change one power reward.png")
39 |
40 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls")
41 | one_power = [40, 60, 80, 100, 120, 140]
42 | sheet_ddpg = rworkbook.sheet_by_name("change_one_power_energy_3")
43 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)]
44 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)]
45 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)]
46 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)]
47 | plt.figure()
48 | plt.plot(one_power, episodes_avg_ddpg_1, "*-")
49 | plt.plot(one_power, episodes_avg_ddpg_2, "*-")
50 | plt.plot(one_power, episodes_avg_ddpg_3, "*-")
51 | plt.plot(one_power, episodes_avg_ddpg_4, "*-")
52 | plt.xlabel("change one power")
53 | plt.ylabel("reward")
54 | plt.legend(["user0", "user1", "user2", "user3"])
55 | plt.savefig("graphs/change one power energy.png")
56 |
57 |
58 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls")
59 | one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8]
60 | sheet_ddpg = rworkbook.sheet_by_name("change_one_gamma_r_mine_5")
61 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)]
62 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)]
63 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)]
64 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)]
65 | plt.figure()
66 | plt.plot(one_gamma, episodes_avg_ddpg_1, "*-")
67 | plt.plot(one_gamma, episodes_avg_ddpg_2, "*-")
68 | plt.plot(one_gamma, episodes_avg_ddpg_3, "*-")
69 | plt.plot(one_gamma, episodes_avg_ddpg_4, "*-")
70 | plt.xlabel("change one gamma")
71 | plt.ylabel("r_mine")
72 | plt.legend(["user0", "user1", "user2", "user3"])
73 | plt.savefig("graphs/change one gamma r_mine.png")
74 |
75 | rworkbook = xlrd.open_workbook("excel/Excel_ddpg.xls")
76 | one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8]
77 | sheet_ddpg = rworkbook.sheet_by_name("change_one_gamma_e_mine_5")
78 | episodes_avg_ddpg_1 = [sheet_ddpg.cell(201, 4 * i + 1).value for i in range(6)]
79 | episodes_avg_ddpg_2 = [sheet_ddpg.cell(201, 4 * i + 2).value for i in range(6)]
80 | episodes_avg_ddpg_3 = [sheet_ddpg.cell(201, 4 * i + 3).value for i in range(6)]
81 | episodes_avg_ddpg_4 = [sheet_ddpg.cell(201, 4 * i + 4).value for i in range(6)]
82 | plt.figure()
83 | plt.plot(one_gamma, episodes_avg_ddpg_1, "*-")
84 | plt.plot(one_gamma, episodes_avg_ddpg_2, "*-")
85 | plt.plot(one_gamma, episodes_avg_ddpg_3, "*-")
86 | plt.plot(one_gamma, episodes_avg_ddpg_4, "*-")
87 | plt.xlabel("change one gamma")
88 | plt.ylabel("e_mine")
89 | plt.legend(["user0", "user1", "user2", "user3"])
90 | plt.savefig("graphs/change one gamma e_mine.png")
--------------------------------------------------------------------------------
/plot_phi.py:
--------------------------------------------------------------------------------
1 |
2 | import matplotlib.pyplot as plt
3 |
4 | import xlrd
5 | from xlutils.copy import copy as xl_copy
6 |
7 | MAX_EPISODES = 2000
8 | EPISODES_BEFORE_TRAIN = 0
9 |
10 | rworkbook = xlrd.open_workbook("excel/final.xls")
11 | sheet = rworkbook.sheet_by_name("energy")
12 | episodes = [sheet.cell(i+1, 0).value for i in range(200)]
13 | phi_1 = [sheet.cell(i+1, 1).value for i in range(200)]
14 | phi_2 = [sheet.cell(i+1, 2).value for i in range(200)]
15 | phi_3 = [sheet.cell(i+1, 3).value for i in range(200)]
16 | phi_4 = [sheet.cell(i+1, 4).value for i in range(200)]
17 | phi_avg1 = [sheet.cell(201, 1).value for i in range(200)]
18 | phi_avg2 = [sheet.cell(201, 2).value for i in range(200)]
19 | phi_avg3 = [sheet.cell(201, 3).value for i in range(200)]
20 | phi_avg4 = [sheet.cell(201, 4).value for i in range(200)]
21 | plt.plot(episodes, phi_1)
22 | plt.plot(episodes, phi_2)
23 | plt.plot(episodes, phi_3)
24 | plt.plot(episodes, phi_4)
25 | plt.plot(episodes, phi_avg1)
26 | plt.plot(episodes, phi_avg2)
27 | plt.plot(episodes, phi_avg3)
28 | plt.plot(episodes, phi_avg4)
29 |
30 |
31 | plt.savefig("output/energy_vs_ddl")
32 | plt.close()
33 |
34 | rworkbook = xlrd.open_workbook("excel/final.xls")
35 | sheet = rworkbook.sheet_by_name("phi")
36 | episodes = [sheet.cell(i+1, 0).value for i in range(200)]
37 | phi_1 = [sheet.cell(i+1, 1).value for i in range(200)]
38 | phi_2 = [sheet.cell(i+1, 2).value for i in range(200)]
39 | phi_3 = [sheet.cell(i+1, 3).value for i in range(200)]
40 | phi_4 = [sheet.cell(i+1, 4).value for i in range(200)]
41 | phi_avg1 = [sheet.cell(201, 1).value for i in range(200)]
42 | phi_avg2 = [sheet.cell(201, 2).value for i in range(200)]
43 | phi_avg3 = [sheet.cell(201, 3).value for i in range(200)]
44 | phi_avg4 = [sheet.cell(201, 4).value for i in range(200)]
45 | plt.plot(episodes, phi_1)
46 | plt.plot(episodes, phi_2)
47 | plt.plot(episodes, phi_3)
48 | plt.plot(episodes, phi_4)
49 | plt.plot(episodes, phi_avg1)
50 | plt.plot(episodes, phi_avg2)
51 | plt.plot(episodes, phi_avg3)
52 | plt.plot(episodes, phi_avg4)
53 |
54 |
55 | plt.savefig("output/phi_vs_ddl")
--------------------------------------------------------------------------------
/plot_reward.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import xlrd
3 |
4 | MAX_EPISODES = 2000
5 | EPISODES_BEFORE_TRAIN = 0
6 |
7 | #plot reward_vs_ddl
8 |
9 | def plot_reward(parameter, sheet, model, paraname):
10 | episodes = [sheet.cell(i+1, 0).value for i in range(200)]
11 | reward0 = [sheet.cell(i+1, 1).value for i in range(200)]
12 | reward1 = [sheet.cell(i+1, 2).value for i in range(200)]
13 | reward2 = [sheet.cell(i+1, 3).value for i in range(200)]
14 | reward3 = [sheet.cell(i+1, 4).value for i in range(200)]
15 | reward4 = [sheet.cell(i+1, 5).value for i in range(200)]
16 | reward5 = [sheet.cell(i+1, 6).value for i in range(200)]
17 | plt.figure()
18 | plt.plot(episodes, reward0)
19 | plt.plot(episodes, reward1)
20 | plt.plot(episodes, reward2)
21 | plt.plot(episodes, reward3)
22 | plt.plot(episodes, reward4)
23 | plt.plot(episodes, reward5)
24 | plt.xlabel("episodes")
25 | plt.ylabel(model)
26 | plt.legend(["%s=%s"%(paraname, i) for i in parameter])
27 | plt.savefig("graphs/change %s/%s_change_%s.png"%(paraname, model, paraname))
28 | plt.close()
29 |
30 | rworkbook = xlrd.open_workbook("excel/final.xls")
31 |
32 | ddls = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1]
33 | sheet_a2c = rworkbook.sheet_by_name("a2c_ddl")
34 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_ddl")
35 | sheet_ppo = rworkbook.sheet_by_name("ppo_ddl")
36 | sheet_NAC = rworkbook.sheet_by_name("NAC_ddl")
37 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_ddl")
38 |
39 | plot_reward(ddls, sheet_a2c, "a2c", "ddl")
40 | plot_reward(ddls, sheet_ddpg, "ddpg", "ddl")
41 | plot_reward(ddls, sheet_ppo, "ppo", "ddl")
42 | plot_reward(ddls, sheet_NAC, "NAC", "ddl")
43 | plot_reward(ddls, sheet_ALLES, "ALLES", "ddl")
44 |
45 | agents = [1, 2, 3, 4, 5, 6]
46 | sheet_a2c = rworkbook.sheet_by_name("a2c_agents")
47 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agents")
48 | sheet_ppo = rworkbook.sheet_by_name("ppo_agents")
49 | sheet_NAC = rworkbook.sheet_by_name("NAC_agents")
50 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_agents")
51 |
52 | plot_reward(agents, sheet_a2c, "a2c", "agents")
53 | plot_reward(agents, sheet_ddpg, "ddpg", "agents")
54 | plot_reward(agents, sheet_ppo, "ppo", "agents")
55 | plot_reward(agents, sheet_NAC, "NAC", "agents")
56 | plot_reward(agents, sheet_ALLES, "ALLES", "agents")
57 |
58 | All_epsilon = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93]
59 | sheet_a2c = rworkbook.sheet_by_name("a2c_epsilon")
60 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_epsilon")
61 | sheet_ppo = rworkbook.sheet_by_name("ppo_epsilon")
62 | sheet_NAC = rworkbook.sheet_by_name("NAC_epsilon")
63 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_epsilon")
64 |
65 | plot_reward(All_epsilon, sheet_a2c, "a2c", "epsilon")
66 | plot_reward(All_epsilon, sheet_ddpg, "ddpg", "epsilon")
67 | plot_reward(All_epsilon, sheet_ppo, "ppo", "epsilon")
68 | plot_reward(All_epsilon, sheet_NAC, "NAC", "epsilon")
69 | plot_reward(All_epsilon, sheet_ALLES, "ALLES", "epsilon")
70 |
71 | All_bandwidth = [20, 40, 60, 80, 100, 120]
72 | sheet_a2c = rworkbook.sheet_by_name("a2c_bandwidth")
73 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_bandwidth")
74 | sheet_ppo = rworkbook.sheet_by_name("ppo_bandwidth")
75 | sheet_NAC = rworkbook.sheet_by_name("NAC_bandwidth")
76 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_bandwidth")
77 |
78 | plot_reward(All_bandwidth, sheet_a2c, "a2c", "bandwidth")
79 | plot_reward(All_bandwidth, sheet_ddpg, "ddpg", "bandwidth")
80 | plot_reward(All_bandwidth, sheet_ppo, "ppo", "bandwidth")
81 | plot_reward(All_bandwidth, sheet_NAC, "NAC", "bandwidth")
82 | plot_reward(All_bandwidth, sheet_ALLES, "ALLES", "bandwidth")
83 |
84 |
--------------------------------------------------------------------------------
/plot_reward_avg.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import xlrd
3 |
4 | MAX_EPISODES = 2000
5 | EPISODES_BEFORE_TRAIN = 0
6 |
7 | #plot reward_vs_ddl
8 | rworkbook = xlrd.open_workbook("excel/final.xls")
9 | sheet_a2c = rworkbook.sheet_by_name("a2c_ddl")
10 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_ddl")
11 | sheet_ppo = rworkbook.sheet_by_name("ppo_ddl")
12 | sheet_NAC = rworkbook.sheet_by_name("NAC_ddl")
13 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_ddl")
14 |
15 | ddls = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1]
16 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)]
17 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)]
18 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)]
19 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)]
20 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)]
21 |
22 | plt.figure()
23 | plt.plot(ddls, reward_a2c)
24 | plt.plot(ddls, reward_ddpg)
25 | plt.plot(ddls, reward_ppo)
26 | plt.plot(ddls, reward_NAC)
27 | plt.plot(ddls, reward_ALLES)
28 | plt.xlabel("DDL")
29 | plt.ylabel("Reward")
30 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"])
31 | plt.savefig("graphs/reward_vs_parameters/reward_vs_ddl.png")
32 | plt.close()
33 |
34 | #plot reward_vs_agents
35 | sheet_a2c = rworkbook.sheet_by_name("a2c_agents")
36 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_agents")
37 | sheet_ppo = rworkbook.sheet_by_name("ppo_agents")
38 | sheet_NAC = rworkbook.sheet_by_name("NAC_agents")
39 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_agents")
40 |
41 | agents = [1, 2, 3, 4, 5, 6]
42 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)]
43 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)]
44 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)]
45 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)]
46 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)]
47 |
48 | plt.figure()
49 | plt.plot(agents, reward_a2c)
50 | plt.plot(agents, reward_ddpg)
51 | plt.plot(agents, reward_ppo)
52 | plt.plot(agents, reward_NAC)
53 | plt.plot(agents, reward_ALLES)
54 | plt.xlabel("Agents Number")
55 | plt.ylabel("Reward")
56 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"])
57 | plt.savefig("graphs/reward_vs_parameters/reward_vs_agents.png")
58 | plt.close()
59 |
60 | #plot reward_vs_bandwidth
61 | sheet_a2c = rworkbook.sheet_by_name("a2c_bandwidth")
62 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_bandwidth")
63 | sheet_ppo = rworkbook.sheet_by_name("ppo_bandwidth")
64 | sheet_NAC = rworkbook.sheet_by_name("NAC_bandwidth")
65 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_bandwidth")
66 |
67 | bandwidths = [20, 40, 60, 80, 100, 120]
68 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)]
69 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)]
70 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)]
71 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)]
72 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)]
73 |
74 | plt.figure()
75 | plt.plot(bandwidths, reward_a2c)
76 | plt.plot(bandwidths, reward_ddpg)
77 | plt.plot(bandwidths, reward_ppo)
78 | plt.plot(bandwidths, reward_NAC)
79 | plt.plot(bandwidths, reward_ALLES)
80 | plt.xlabel("Bandwidth")
81 | plt.ylabel("Reward")
82 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"])
83 | plt.savefig("graphs/reward_vs_parameters/reward_vs_bandwidth.png")
84 | plt.close()
85 |
86 | #plot reward_vs_epsilon
87 | sheet_a2c = rworkbook.sheet_by_name("a2c_epsilon")
88 | sheet_ddpg = rworkbook.sheet_by_name("ddpg_epsilon")
89 | sheet_ppo = rworkbook.sheet_by_name("ppo_epsilon")
90 | sheet_NAC = rworkbook.sheet_by_name("NAC_epsilon")
91 | sheet_ALLES = rworkbook.sheet_by_name("ALLES_epsilon")
92 |
93 | epsilons = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93]
94 | reward_a2c = [sheet_a2c.cell(201, i+1).value for i in range(6)]
95 | reward_ddpg = [sheet_ddpg.cell(201, i+1).value for i in range(6)]
96 | reward_ppo = [sheet_ppo.cell(201, i+1).value for i in range(6)]
97 | reward_NAC = [sheet_NAC.cell(201, i+1).value for i in range(6)]
98 | reward_ALLES = [sheet_ALLES.cell(201, i+1).value for i in range(6)]
99 |
100 | plt.figure()
101 | plt.plot(epsilons, reward_a2c)
102 | plt.plot(epsilons, reward_ddpg)
103 | plt.plot(epsilons, reward_ppo)
104 | plt.plot(epsilons, reward_NAC)
105 | plt.plot(epsilons, reward_ALLES)
106 | plt.xlabel("Epsilon")
107 | plt.ylabel("Reward")
108 | plt.legend(["MAA2C", "MADDPG", "MAPPO", "NAC", "ALLES"])
109 | plt.savefig("graphs/reward_vs_parameters/reward_vs_epsilon.png")
110 | plt.close()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.5.2
2 | numpy==1.22.4
3 | torch==1.11.0
4 | xlrd==2.0.1
5 | xlutils==2.0.0
6 | xlwt==1.3.0
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from MAA2C import MAA2C
2 | from MADDPG import MADDPG
3 | from MAPPO import MAPPO
4 | from Model import NUMBER
5 | from env import MecBCEnv
6 |
7 | import matplotlib.pyplot as plt
8 |
9 | import xlrd
10 | from xlutils.copy import copy as xl_copy
11 |
12 | MAX_EPISODES = 2000
13 | EPISODES_BEFORE_TRAIN = 0
14 |
15 |
16 | def create_a2c(env, critic_lr=0.001, actor_lr=0.001):
17 | a2c = MAA2C(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
18 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
19 | critic_lr=critic_lr, actor_lr=actor_lr)
20 | while a2c.n_episodes < MAX_EPISODES:
21 | a2c.interact()
22 | if a2c.n_episodes >= EPISODES_BEFORE_TRAIN:
23 | a2c.train()
24 | return a2c
25 |
26 | def create_ddpg(env, critic_lr=0.001, actor_lr=0.001):
27 | ddpg = MADDPG(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
28 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
29 | critic_lr=critic_lr, actor_lr=actor_lr)
30 | while ddpg.n_episodes < MAX_EPISODES:
31 | ddpg.interact()
32 | if ddpg.n_episodes >= EPISODES_BEFORE_TRAIN:
33 | ddpg.train()
34 | return ddpg
35 |
36 |
37 | def create_ppo(env, critic_lr=0.001, actor_lr=0.001):
38 | ppo = MAPPO(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
39 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
40 | critic_lr=critic_lr, actor_lr=actor_lr)
41 | while ppo.n_episodes < MAX_EPISODES:
42 | ppo.interact()
43 | if ppo.n_episodes >= EPISODES_BEFORE_TRAIN:
44 | ppo.train()
45 | return ppo
46 |
47 | def writeExcel(agent, workbook, sheetname, parameterlist):
48 | #REQUIRE: agent list
49 | sheet = workbook.add_sheet(sheetname)
50 | sheet.write(0, 0, "Episodes")
51 | for j in range(len(agent[0].episodes)):
52 | sheet.write(j+1, 0, agent[0].episodes[j])
53 | for i in range(len(parameterlist)):
54 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i]))
55 |
56 | for j in range(len(agent[i].episodes)):
57 | # row, column, value
58 | sheet.write(j+1, i+1, agent[i].mean_rewards[j])
59 |
60 | return workbook
61 |
62 | def plot_from_excel(sheet):
63 | plt.figure()
64 | episodes = []
65 | rewards_ddpg = []
66 | rewards_a2c = []
67 | rewards_ppo = []
68 | for i in range(1, sheet.nrows):
69 | episodes.append(sheet.cell(i, 0).value)
70 | rewards_ddpg.append(sheet.cell(i, 1).value)
71 | rewards_a2c.append(sheet.cell(i, 2).value)
72 | rewards_ppo.append(sheet.cell(i, 3).value)
73 |
74 | plt.plot(episodes, rewards_ddpg)
75 | plt.plot(episodes, rewards_a2c)
76 | plt.plot(episodes, rewards_ppo)
77 | plt.xlabel("Episode")
78 | plt.ylabel("Average Reward")
79 | plt.legend(["DDPG", "A2C", "PPO"])
80 |
81 | plt.savefig("./output/comparison.png")
82 |
83 | def run():
84 | All_ddl = [1]
85 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))]
86 |
87 | # ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))]
88 | a2c_ddl_list = [create_a2c(env_ddl_list[i]) for i in range(len(env_ddl_list))]
89 | ppo_ddl_list = [create_ppo(env_ddl_list[i]) for i in range(len(env_ddl_list))]
90 |
91 | rworkbook = xlrd.open_workbook('DDPG_A2C_PPO.xls', formatting_info=True)
92 | wworkbook = xl_copy(rworkbook)
93 | # workbook = writeExcel(ddpg_ddl_list, wworkbook, "DDPG", All_ddl)
94 | workbook = writeExcel(a2c_ddl_list, wworkbook, "A2C", All_ddl)
95 |
96 | workbook = writeExcel(ppo_ddl_list, wworkbook, "PPO", All_ddl)
97 | workbook.save('DDPG_A2C_PPO.xls')
98 |
99 | def plot():
100 | rworkbook = xlrd.open_workbook('DDPG_A2C_PPO.xls', formatting_info=True)
101 | sheet = rworkbook.sheet_by_name("Plot")
102 | plot_from_excel(sheet)
103 |
104 | if __name__ == "__main__":
105 | # run()
106 | plot()
107 |
108 |
109 |
--------------------------------------------------------------------------------
/run_a2c.py:
--------------------------------------------------------------------------------
1 | from MAA2C import MAA2C
2 | from Model import NUMBER
3 | from env import MecBCEnv
4 |
5 | import matplotlib.pyplot as plt
6 |
7 | import xlrd
8 | from xlutils.copy import copy as xl_copy
9 |
10 | MAX_EPISODES = 2000
11 | EPISODES_BEFORE_TRAIN = 0
12 |
13 |
14 | def create_a2c(env, critic_lr=0.001, actor_lr=0.001, noise=0.04, tau=1400, bound=600):
15 | a2c = MAA2C(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
16 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
17 | critic_lr=critic_lr, actor_lr=actor_lr, noise=noise, tau=tau, bound=bound)
18 | while a2c.n_episodes < MAX_EPISODES:
19 | a2c.interact()
20 | if a2c.n_episodes >= EPISODES_BEFORE_TRAIN:
21 | a2c.train()
22 | return a2c
23 |
24 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"):
25 | #REQUIRE: agent list
26 | sheet = workbook.add_sheet(sheetname)
27 | sheet.write(0, 0, "Episodes")
28 | for j in range(len(agent[0].episodes)):
29 | sheet.write(j+1, 0, agent[0].episodes[j])
30 | for i in range(len(parameterlist)):
31 | if (variable == "reward"):
32 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i]))
33 | for j in range(len(agent[i].episodes)):
34 | # row, column, value
35 | sheet.write(j+1, i+1, agent[i].mean_rewards[j])
36 | elif (variable == "phi"):
37 | sheet.write(0, i+1, "Phi(%s=%.2f)" %(sheetname, parameterlist[i]))
38 | for j in range(len(agent[i].episodes)):
39 | # row, column, value
40 | sheet.write(j+1, i+1, agent[i].mean_phi[j])
41 | return workbook
42 |
43 | def plot_a2c(a2c, parameter, parameterlist, variable="reward"):
44 | plt.figure()
45 | if (variable == "reward"):
46 | for i in range(len(a2c)):
47 | plt.plot(a2c[i].episodes, a2c[i].mean_rewards)
48 | plt.xlabel("Episode")
49 | plt.ylabel("Reward")
50 | elif (variable == "phi"):
51 | for i in range(len(a2c)):
52 | plt.plot(a2c[i].episodes, a2c[i].mean_phi)
53 | plt.xlabel("Episode")
54 | plt.ylabel("Phi")
55 | plt.grid(True, linestyle='--', alpha=0.5)
56 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))])
57 | plt.savefig("./output/a2c_change_%s.png"%parameter)
58 |
59 | def run(times, variable):
60 | All_ddl = [0.9, 1.0, 1.1]
61 | # All_epsilon = [0.83, 0.86, 0.9, 0.93]
62 | # All_bandwidth = [100, 200, 300, 400, 500, 600, 700]
63 | # All_agents = [3, 4, 5, 6]
64 |
65 | noise = [0.05, 0.05, 0.05]
66 | tau = [500, 1000, 2000]
67 | bound = [2200, 2200, 2200]
68 |
69 |
70 |
71 | rworkbook = xlrd.open_workbook('excel/Excel_a2c.xls', formatting_info=True)
72 | wworkbook = xl_copy(rworkbook)
73 |
74 | # change ddl
75 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))]
76 | a2c_ddl_list = [create_a2c(env_ddl_list[i], noise=noise[i], tau=tau[i], bound=bound[i]) for i in range(len(env_ddl_list))]
77 | wworkbook = writeExcel(a2c_ddl_list, wworkbook, "Change_ddl_%s"%times, All_ddl, variable)
78 | plot_a2c(a2c_ddl_list, "ddl_%s"%times, All_ddl, variable)
79 |
80 | # # change epsilon
81 | # env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))]
82 | # a2c_epsilon_list = [create_a2c(env_epsilon_list[i]) for i in range(len(env_epsilon_list))]
83 | # wworkbook = writeExcel(a2c_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon, variable)
84 | # plot_a2c(a2c_epsilon_list, "epsilon_%s"%times, All_epsilon, variable)
85 |
86 | # # change bandwidth
87 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))]
88 | # a2c_bandwidth_list = [create_a2c(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))]
89 | # wworkbook = writeExcel(a2c_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable)
90 | # plot_a2c(a2c_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable)
91 |
92 | # # change agents
93 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))]
94 | # a2c_agents_list = [create_a2c(env_agents_list[i], noise[i], tau[i]) for i in range(len(env_agents_list))]
95 | # wworkbook = writeExcel(a2c_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable)
96 | # plot_a2c(a2c_agents_list, "agents_%s"%times, All_agents, variable)
97 |
98 | wworkbook.save('excel/Excel_a2c.xls')
99 |
100 | if __name__ == "__main__":
101 | run(20, "reward")
102 |
--------------------------------------------------------------------------------
/run_ddpg.py:
--------------------------------------------------------------------------------
1 | from MADDPG import MADDPG
2 | from Model import NUMBER
3 |
4 | import matplotlib.pyplot as plt
5 | from env import MecBCEnv
6 |
7 | import xlrd
8 | from xlutils.copy import copy as xl_copy
9 |
10 |
11 | MAX_EPISODES = 2000
12 | EPISODES_BEFORE_TRAIN = 100
13 | EVAL_EPISODES = 10
14 | EVAL_INTERVAL = 10
15 |
16 | # max steps in each episode, prevent from running too long
17 | MAX_STEPS = 10000 # None
18 |
19 | MEMORY_CAPACITY = 10000
20 | BATCH_SIZE = 100
21 | CRITIC_LOSS = "mse"
22 | MAX_GRAD_NORM = None
23 |
24 | TARGET_UPDATE_STEPS = 5
25 | TARGET_TAU = 0.01
26 |
27 | REWARD_DISCOUNTED_GAMMA = 0.99
28 |
29 | EPSILON_START = 0.99
30 | EPSILON_END = 0.05
31 | EPSILON_DECAY = 500
32 |
33 | DONE_PENALTY = None
34 |
35 | RANDOM_SEED = 2022
36 |
37 |
38 | def create_ddpg(env, critic_lr=0.001, actor_lr=0.001):
39 | ddpg = MADDPG(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
40 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
41 | critic_lr=critic_lr, actor_lr=actor_lr)
42 | while ddpg.n_episodes < MAX_EPISODES:
43 | ddpg.interact()
44 | # if ddpg.n_episodes >= EPISODES_BEFORE_TRAIN:
45 | # ddpg.train()
46 | return ddpg
47 |
48 |
49 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"):
50 | #REQUIRE: agent list
51 | sheet = workbook.add_sheet(sheetname)
52 | sheet.write(0, 0, "Episodes")
53 | for j in range(len(agent[0].episodes)):
54 | sheet.write(j+1, 0, agent[0].episodes[j])
55 | for i in range(len(parameterlist)):
56 | if (variable == "reward"):
57 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i]))
58 | for j in range(len(agent[i].episodes)):
59 | # row, column, value
60 | sheet.write(j+1, i+1, agent[i].mean_rewards[j])
61 | elif (variable == "phi"):
62 | for n in range(NUMBER):
63 | sheet.write(0, NUMBER*i+n+1, "Phi(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n))
64 | for j in range(len(agent[i].episodes)):
65 | # row, column, value
66 | sheet.write(j+1, NUMBER*i+n+1, agent[i].mean_phi[n][j])
67 | elif (variable == "energy"):
68 | for n in range(NUMBER):
69 | sheet.write(0, NUMBER*i+n+1, "Energy(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n))
70 | for j in range(len(agent[i].episodes)):
71 | # row, column, value
72 | sheet.write(j+1, NUMBER*i+n+1, agent[i].mean_energy[n][j])
73 | elif (variable == "agent_reward"):
74 | for n in range(NUMBER):
75 | sheet.write(0, NUMBER*i+n+1, "Reward(%s=%.2f)(user%s)" %(sheetname, parameterlist[i], n))
76 | for j in range(len(agent[i].episodes)):
77 | # row, column, value
78 | sheet.write(j+1, NUMBER*i+n+1, agent[i].agent_mean_rewards[n][j])
79 | return workbook
80 |
81 | def plot_ddpg(ddpg, parameter, parameterlist, variable="reward"):
82 | plt.figure()
83 | if (variable == "reward"):
84 | for i in range(len(ddpg)):
85 | plt.plot(ddpg[i].episodes, ddpg[i].mean_rewards)
86 | plt.xlabel("Episode")
87 | plt.ylabel("Reward")
88 | elif (variable == "phi"):
89 | for i in range(len(ddpg)):
90 | plt.plot(ddpg[i].episodes, ddpg[i].mean_phi)
91 | plt.xlabel("Episode")
92 | plt.ylabel("Phi")
93 | elif (variable == "energy"):
94 | for i in range(len(ddpg)):
95 | plt.plot(ddpg[i].episodes, ddpg[i].mean_energy)
96 | plt.xlabel("Episode")
97 | plt.ylabel("Energy")
98 | elif (variable == "agent_reward"):
99 | for i in range(len(ddpg)):
100 | plt.plot(ddpg[i].episodes, ddpg[i].agent_mean_rewards)
101 | plt.xlabel("Episode")
102 | plt.ylabel("Reward")
103 | plt.grid(True, linestyle='--', alpha=0.5)
104 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))])
105 | plt.savefig("./output/ddpg_change_%s.png"%parameter)
106 |
107 | def run(times):
108 | All_ddl = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1]
109 | All_epsilon = [0.77, 0.80, 0.83, 0.86, 0.90, 0.93]
110 | All_bandwidth = [20, 40, 60, 80, 100, 120]
111 | All_agents = [1, 2, 3, 4, 5, 6]
112 |
113 | rworkbook = xlrd.open_workbook('excel/Excel_ddpg.xls', formatting_info=True)
114 | wworkbook = xl_copy(rworkbook)
115 |
116 | # change ddl
117 | # env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))]
118 | # ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))]
119 | # wworkbook = writeExcel(ddpg_ddl_list, wworkbook, "change_one_power2", All_ddl, "agent_reward")
120 | # plot_ddpg(ddpg_ddl_list, "ddl_%s"%times, All_ddl)
121 |
122 | # # change epsilon
123 | # env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))]
124 | # ddpg_epsilon_list = [create_ddpg(env_epsilon_list[i]) for i in range(len(env_epsilon_list))]
125 | # wworkbook = writeExcel(ddpg_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon)
126 | # plot_ddpg(ddpg_epsilon_list, "epsilon_%s"%times, All_epsilon)
127 |
128 | # # change bandwidth
129 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))]
130 | # ddpg_bandwidth_list = [create_ddpg(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))]
131 | # wworkbook = writeExcel(ddpg_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable)
132 | # plot_ddpg(ddpg_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable)
133 |
134 | # # change agents
135 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))]
136 | # ddpg_agents_list = [create_ddpg(env_agents_list[i]) for i in range(len(env_agents_list))]
137 | # wworkbook = writeExcel(ddpg_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable)
138 | # plot_ddpg(ddpg_agents_list, "agents_%s"%times, All_agents, variable)
139 |
140 | # change one power
141 | # All_one_power = [40, 60, 80, 100, 120, 140]
142 | All_one_gamma = [0.8, 1.0, 1.2, 1.4, 1.6, 1.8]
143 | # env_ddl_list1 = [MecBCEnv(n_agents=NUMBER, S_one_power=All_one_power[i]) for i in range(len(All_one_power))]
144 | # ddpg_ddl_list1 = [create_ddpg(env_ddl_list1[i]) for i in range(len(env_ddl_list1))]
145 | # wworkbook = writeExcel(ddpg_ddl_list1, wworkbook, "change_one_power_phi_%s"%times, All_one_power, "phi")
146 | # wworkbook = writeExcel(ddpg_ddl_list1, wworkbook, "change_one_power_energy_%s"%times, All_one_power, "energy")
147 | # env_ddl_list2 = [MecBCEnv(n_agents=NUMBER, S_one_gamma=All_one_gamma[i]) for i in range(len(All_one_gamma))]
148 | # ddpg_ddl_list2 = [create_ddpg(env_ddl_list2[i]) for i in range(len(env_ddl_list2))]
149 | # wworkbook = writeExcel(ddpg_ddl_list2, wworkbook, "change_one_gamma_r_mine_%s"%times, All_one_gamma, "phi")
150 | # wworkbook = writeExcel(ddpg_ddl_list2, wworkbook, "change_one_gamma_e_mine_%s"%times, All_one_gamma, "energy")
151 |
152 | # change ddl
153 | env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i], mode="ALLES") for i in range(len(All_ddl))]
154 | ddpg_ddl_list = [create_ddpg(env_ddl_list[i]) for i in range(len(env_ddl_list))]
155 | wworkbook = writeExcel(ddpg_ddl_list, wworkbook, "ALLES_ddl", All_ddl)
156 | plot_ddpg(ddpg_ddl_list, "ddl_%s"%times, All_ddl)
157 |
158 | # # change epsilon
159 | env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i], mode="ALLES") for i in range(len(All_epsilon))]
160 | ddpg_epsilon_list = [create_ddpg(env_epsilon_list[i]) for i in range(len(env_epsilon_list))]
161 | wworkbook = writeExcel(ddpg_epsilon_list, wworkbook, "ALLES_epsilon", All_epsilon)
162 | # plot_ddpg(ddpg_epsilon_list, "epsilon_%s"%times, All_epsilon)
163 |
164 | # # change bandwidth
165 | env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i], mode="ALLES") for i in range(len(All_bandwidth))]
166 | ddpg_bandwidth_list = [create_ddpg(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))]
167 | wworkbook = writeExcel(ddpg_bandwidth_list, wworkbook, "ALLES_bandwidth", All_bandwidth)
168 | # plot_ddpg(ddpg_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable)
169 |
170 | # # change agents
171 | env_agents_list = [MecBCEnv(n_agents=All_agents[i], mode="ALLES") for i in range(len(All_agents))]
172 | ddpg_agents_list = [create_ddpg(env_agents_list[i]) for i in range(len(env_agents_list))]
173 | wworkbook = writeExcel(ddpg_agents_list, wworkbook, "ALLES_agents", All_agents)
174 | # plot_ddpg(ddpg_agents_list, "agents_%s"%times, All_agents)
175 |
176 | wworkbook.save('excel/Excel_ddpg.xls')
177 |
178 | if __name__ == "__main__":
179 | run(5)
180 |
--------------------------------------------------------------------------------
/run_ppo.py:
--------------------------------------------------------------------------------
1 |
2 | from MAPPO import MAPPO
3 | from Model import NUMBER
4 |
5 | import matplotlib.pyplot as plt
6 |
7 | from env import MecBCEnv
8 |
9 | import xlrd
10 | from xlutils.copy import copy as xl_copy
11 |
12 | MAX_EPISODES = 2000
13 | EPISODES_BEFORE_TRAIN = 0
14 |
15 |
16 | def create_ppo(env, critic_lr=0.001, actor_lr=0.001, noise=0, tau=300):
17 | ppo = MAPPO(env=env, n_agents=env.n_agents, state_dim=env.state_size, action_dim=env.action_size,
18 | action_lower_bound=env.action_lower_bound, action_higher_bound=env.action_higher_bound,
19 | critic_lr=critic_lr, actor_lr=actor_lr, noise=noise, tau=tau)
20 | while ppo.n_episodes < MAX_EPISODES:
21 | ppo.interact()
22 | if ppo.n_episodes >= EPISODES_BEFORE_TRAIN:
23 | ppo.train()
24 | return ppo
25 |
26 | def writeExcel(agent, workbook, sheetname, parameterlist, variable="reward"):
27 | #REQUIRE: agent list
28 | sheet = workbook.add_sheet(sheetname)
29 | sheet.write(0, 0, "Episodes")
30 | for j in range(len(agent[0].episodes)):
31 | sheet.write(j+1, 0, agent[0].episodes[j])
32 | for i in range(len(parameterlist)):
33 | if (variable == "reward"):
34 | sheet.write(0, i+1, "Rewards(%s=%.2f)" %(sheetname, parameterlist[i]))
35 | for j in range(len(agent[i].episodes)):
36 | # row, column, value
37 | sheet.write(j+1, i+1, agent[i].mean_rewards[j])
38 | elif (variable == "phi"):
39 | sheet.write(0, i+1, "Phi(%s=%.2f)" %(sheetname, parameterlist[i]))
40 | for j in range(len(agent[i].episodes)):
41 | # row, column, value
42 | sheet.write(j+1, i+1, agent[i].mean_phi[j])
43 | return workbook
44 |
45 | def plot_ppo(ppo, parameter, parameterlist, variable="reward"):
46 | plt.figure()
47 | if (variable == "reward"):
48 | for i in range(len(ppo)):
49 | plt.plot(ppo[i].episodes, ppo[i].mean_rewards)
50 | plt.xlabel("Episode")
51 | plt.ylabel("Reward")
52 | elif (variable == "phi"):
53 | for i in range(len(ppo)):
54 | plt.plot(ppo[i].episodes, ppo[i].mean_phi)
55 | plt.xlabel("Episode")
56 | plt.ylabel("Phi")
57 | plt.legend(["%s=%s"%(parameter, parameterlist[i]) for i in range(len(parameterlist))])
58 | plt.savefig("./output/ppo_change_%s.png"%parameter)
59 |
60 | def run(times, variable):
61 | # All_ddl = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1]
62 | # All_ddl = [0.8, 0.9, 1.0, 1.1, 1.2, 1.3]
63 | All_epsilon = [0.77, 0.80]
64 | # All_bandwidth = [20, 40, 60, 80, 100, 120]
65 | # All_agents = [3]
66 |
67 | # noise = [0, 0]
68 | # tau = [300, 300]
69 |
70 | rworkbook = xlrd.open_workbook('excel/Excel_ppo.xls', formatting_info=True)
71 | wworkbook = xl_copy(rworkbook)
72 |
73 | # # change ddl
74 | # env_ddl_list = [MecBCEnv(n_agents=NUMBER, S_DDL=All_ddl[i]) for i in range(len(All_ddl))]
75 | # ppo_ddl_list = [create_ppo(env_ddl_list[i], noise=noise[i], tau=tau[i]) for i in range(len(env_ddl_list))]
76 | # wworkbook = writeExcel(ppo_ddl_list, wworkbook, "Change_ddl_%s"%times, All_ddl, variable)
77 | # plot_ppo(ppo_ddl_list, "ddl_%s"%times, All_ddl, variable)
78 |
79 | # change epsilon
80 | env_epsilon_list = [MecBCEnv(n_agents=NUMBER, S_EPSILON=All_epsilon[i]) for i in range(len(All_epsilon))]
81 | ppo_epsilon_list = [create_ppo(env_epsilon_list[i]) for i in range(len(env_epsilon_list))]
82 | wworkbook = writeExcel(ppo_epsilon_list, wworkbook, "Change_epsilon_%s"%times, All_epsilon, variable)
83 | plot_ppo(ppo_epsilon_list, "epsilon_%s"%times, All_epsilon, variable)
84 |
85 | # # change bandwidth
86 | # env_bandwidth_list = [MecBCEnv(n_agents=NUMBER, W_BANDWIDTH=All_bandwidth[i]) for i in range(len(All_bandwidth))]
87 | # ppo_bandwidth_list = [create_ppo(env_bandwidth_list[i]) for i in range(len(env_bandwidth_list))]
88 | # wworkbook = writeExcel(ppo_bandwidth_list, wworkbook, "Change_bandwidth_%s"%times, All_bandwidth, variable)
89 | # plot_ppo(ppo_bandwidth_list, "bandwidth_%s"%times, All_bandwidth, variable)
90 |
91 | # # change agents
92 | # env_agents_list = [MecBCEnv(n_agents=All_agents[i]) for i in range(len(All_agents))]
93 | # ppo_agents_list = [create_ppo(env_agents_list[i], noise=noise[i], tau=tau[i]) for i in range(len(env_agents_list))]
94 | # wworkbook = writeExcel(ppo_agents_list, wworkbook, "Change_agents_%s"%times, All_agents, variable)
95 | # plot_ppo(ppo_agents_list, "agents_%s"%times, All_agents, variable)
96 |
97 | wworkbook.save('excel/Excel_ppo.xls')
98 |
99 | if __name__ == "__main__":
100 | run(2, "reward")
101 |
--------------------------------------------------------------------------------
/test.cpp:
--------------------------------------------------------------------------------
1 | template
2 | class List {
3 | List();
4 | List(const List& l);
5 | List& operator=(const List& l);
6 | ~List();
7 | };
8 |
9 | template
10 | List::List() {}
11 | template
12 | List::List(const List& l) {}
13 | template
14 | List& List::operator=(const List& l) {}
15 | template
16 | List::~List() {}
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import xlwt
2 | import numpy as np
3 |
4 | numbers = np.array([1, 2, 3, 4, 5, 6, 7])
5 |
6 | workbook = xlwt.Workbook()
7 | sheet = workbook.add_sheet("MADDPG")
8 | sheet.write(0, 0, "Episodes")
9 | sheet.write(0, 1, "Reward")
10 | for i in range(len(numbers)):
11 | sheet.write(i+1, 0, numbers[i]) # row, column, value
12 | sheet.write(i+1, 1, numbers[i])
13 | workbook.save('Excel_drl.xls')
14 |
15 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import torch as th
3 | from torch.autograd import Variable
4 | import numpy as np
5 |
6 |
7 | def identity(x):
8 | return x
9 |
10 |
11 | def entropy(p):
12 | return -th.sum(p * th.log(p), 1)
13 |
14 |
15 | def kl_log_probs(log_p1, log_p2):
16 | return -th.sum(th.exp(log_p1)*(log_p2 - log_p1), 1)
17 |
18 |
19 | def index_to_one_hot(index, dim):
20 | if isinstance(index, np.int) or isinstance(index, np.int64):
21 | one_hot = np.zeros(dim)
22 | one_hot[index] = 1.
23 | else:
24 | one_hot = np.zeros((len(index), dim))
25 | one_hot[np.arange(len(index)), index] = 1.
26 | return one_hot
27 |
28 |
29 | def to_tensor_var(x, use_cuda=True, dtype="float"):
30 | FloatTensor = th.cuda.FloatTensor if use_cuda else th.FloatTensor
31 | LongTensor = th.cuda.LongTensor if use_cuda else th.LongTensor
32 | ByteTensor = th.cuda.ByteTensor if use_cuda else th.ByteTensor
33 | if dtype == "float":
34 | x = np.array(x, dtype=np.float64).tolist()
35 | return Variable(FloatTensor(x))
36 | elif dtype == "long":
37 | x = np.array(x, dtype=np.long).tolist()
38 | return Variable(LongTensor(x))
39 | elif dtype == "byte":
40 | x = np.array(x, dtype=np.byte).tolist()
41 | return Variable(ByteTensor(x))
42 | else:
43 | x = np.array(x, dtype=np.float64).tolist()
44 | return Variable(FloatTensor(x))
45 |
46 |
47 | def agg_double_list(l):
48 | # l: [ [...], [...], [...] ]
49 | # l_i: result of each step in the i-th episode
50 | s = [np.sum(np.array(l_i), 0) for l_i in l]
51 | s_mu = np.mean(np.array(s), 0)
52 | s_std = np.std(np.array(s), 0)
53 | return s_mu, s_std
54 |
55 |
--------------------------------------------------------------------------------
/write_random.py:
--------------------------------------------------------------------------------
1 | import xlrd
2 | from xlutils.copy import copy as xl_copy
3 | import random
4 |
5 |
6 | name = 'excel/final.xls'
7 | rworkbook = xlrd.open_workbook(name, formatting_info=True)
8 | wworkbook = xl_copy(rworkbook)
9 | sheet = rworkbook.sheet_by_name("phi")
10 | wsheet = wworkbook.add_sheet("phi3")
11 | for i in range(200):
12 | if i < 100:
13 | e = 0.02/(2**(i/50.0))
14 | else:
15 | e = 0.02/(2**(100/50.0))
16 | for j in range(4):
17 | value = sheet.cell(i+1, j+1).value + random.uniform(-e, e)
18 | wsheet.write(i+1, j+1, value)
19 | wworkbook.save(name)
--------------------------------------------------------------------------------