├── .gitignore ├── __init__.py ├── experiment ├── __init__.py ├── dqn.py ├── env.py ├── env_test.py ├── main.py ├── mec.py ├── memory.py ├── model.py ├── task.py └── vehicle.py ├── experiment2 ├── __init__.py ├── dqn.py ├── env.py ├── env_test.py ├── main.py ├── mec.py ├── memory.py ├── model.py ├── task.py ├── test.py └── vehicle.py ├── experiment3 ├── __init__.py ├── dqn.py ├── env.py ├── env_test.py ├── main.py ├── mec.py ├── memory.py ├── model.py ├── task.py ├── test.py └── vehicle.py ├── experiment4 ├── __init__.py ├── dqn.py ├── env.py ├── env_test.py ├── main.py ├── mec.py ├── memory.py ├── model.py ├── task.py ├── test.py └── vehicle.py ├── experiment5 ├── __init__.py ├── env.py ├── env_test.py ├── main.py ├── mappo.py ├── mec.py ├── memory.py ├── model.py ├── task.py └── vehicle.py ├── experiment6 ├── __init__.py ├── env.py ├── env_test.py ├── maddpg.py ├── main.py ├── mec.py ├── memory.py ├── model.py ├── task.py ├── test.py └── vehicle.py ├── experiment7 ├── MyQueue.py ├── __init__.py ├── dqn.py ├── env.py ├── env_test.py ├── main.py ├── mappo.py ├── mec.py ├── mecEnv.py ├── memory.py ├── model.py ├── task.py ├── test.py └── vehicle.py └── test ├── test.py └── test2.py /.gitignore: -------------------------------------------------------------------------------- 1 | /result/ 2 | /experiment/runs 3 | /experiment/saves 4 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenyuhaoCYH/DRL/7d105abdf5a938e0a0c55671528eac256c936704/__init__.py -------------------------------------------------------------------------------- /experiment/__init__.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | from torch.distributions.categorical import Categorical 5 | from experiment.env import Env 6 | 7 | 8 | def test_net(nets, env: Env, count=10): 9 | rewards = 0.0 10 | steps = 0 11 | for _ in range(count): 12 | env.reset() 13 | while steps < 1000: 14 | action = [] 15 | with torch.no_grad(): 16 | for vehicle in env.vehicles: 17 | state = torch.tensor(vehicle.self_state) 18 | _, pro = nets[vehicle.id](state) 19 | act = Categorical.sample(pro) 20 | action.append(act.item()) 21 | _, _, reward, _ = env.step(action) 22 | rewards += reward 23 | steps += 1 24 | return rewards / count, steps / count 25 | 26 | # def calc_logprob(pro_v, actions_v): 27 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 28 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 29 | # return p1 + p2 30 | -------------------------------------------------------------------------------- /experiment/dqn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | from collections import namedtuple 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | from pylab import mpl 12 | import netron 13 | 14 | from env import Env 15 | from model import DQN 16 | 17 | # 设置显示中文字体 18 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 19 | 20 | Experience = namedtuple('Transition', 21 | field_names=['cur_otherState', 'cur_TaskState', # 状态 22 | 'taskAction', 'aimAction', 'resourceAction', # 动作 23 | 'reward', # 奖励 24 | 'next_otherState', 'next_TaskState']) # Define a transition tuple 25 | GAMMA = 0.99 26 | BATCH_SIZE = 64 27 | REPLAY_SIZE = 10000 28 | LEARNING_RATE = 1e-4 29 | SYNC_TARGET_FRAMES = 1000 30 | 31 | EPSILON_DECAY_LAST_FRAME = 150000 32 | EPSILON_START = 0.6 33 | EPSILON_FINAL = 0.01 34 | 35 | RESET = 10000 # 重置游戏次数 36 | 37 | MAX_TASK = 10 # 任务队列最大长度 38 | 39 | momentum = 0.005 40 | 41 | RESOURCE = [0.2, 0.4, 0.6, 0.8] 42 | 43 | 44 | @torch.no_grad() 45 | def play_step(env, epsilon, models, device="cpu"): 46 | vehicles = env.vehicles 47 | old_otherState = [] 48 | old_taskState = [] 49 | 50 | actionTask = [] 51 | actionAim = [] 52 | actionResource = [] 53 | # 贪心选择动作 54 | for i, model in enumerate(models): 55 | old_otherState.append(vehicles[i].self_state) 56 | old_taskState.append(vehicles[i].task_state) 57 | if np.random.random() < epsilon: 58 | # 随机动作 59 | actionTask.append(np.random.randint(0, 10)) 60 | actionAim.append(np.random.randint(0, 7)) # local+mec+neighbor 61 | actionResource.append(round(np.random.random(), 1)) 62 | else: 63 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 64 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 65 | taskAction, aimAction, resourceAction = model(state_v, taskState_v) 66 | 67 | taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 68 | aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 69 | resourceAction = np.array(resourceAction, dtype=np.float32).reshape(-1) 70 | 71 | actionAim.append(np.argmax(aimAction)) 72 | actionTask.append(np.argmax(taskAction)) 73 | actionResource.append(RESOURCE[np.argmax(resourceAction)]) 74 | # print("action:", action) 75 | _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim, actionResource) 76 | # print("reward:", reward) 77 | 78 | # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作,下一其他状态、下一任务状态】 79 | for i, vehicle in enumerate(vehicles): 80 | exp = Experience(old_otherState[i], [old_taskState[i]], 81 | actionTask[i], actionAim[i], actionResource[i], 82 | reward[i], 83 | otherState[i], [taskState[i]]) 84 | vehicle.buffer.append(exp) 85 | return round(Reward, 2) # 返回总的平均奖励 86 | 87 | 88 | # 计算一个智能体的损失 89 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"): 90 | cur_otherState, cur_TaskState, taskAction, aimAction, resourceAction, rewards, next_otherState, next_TaskState = batch # 91 | 92 | otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device) 93 | taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device) 94 | # print("states_v:", states_v) # batch状态 95 | taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device) 96 | aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device) 97 | resourceAction_v = torch.tensor(np.array(resourceAction), dtype=torch.int64).to(device) 98 | # print("actions_v", actions_v) # batch动作 99 | rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device) 100 | # print("rewards_v", rewards_v) # batch奖励 101 | next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device) 102 | next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device) 103 | # print("next_states_v", next_states_v) # batch下一个状态 104 | 105 | # 计算当前网络q值 106 | taskActionValues, aimActionValues, resourceActionValues = net(otherStates_v, 107 | taskStates_v) # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 108 | taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1) 109 | aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 110 | resourceActionValues = resourceActionValues.gather(1, resourceAction_v.unsqueeze(-1)).squeeze(-1) 111 | 112 | # 计算目标网络q值 113 | next_taskActionValues, next_aimActionValues, next_resourceActionValues = tgt_net(next_otherStates_v, 114 | next_taskStates_v) # .max(1)[0] # 得到最大的q值 115 | 116 | next_taskActionValues = next_taskActionValues.max(1)[0].detach() 117 | next_aimActionValues = next_aimActionValues.max(1)[0].detach() 118 | next_resourceActionValues = next_resourceActionValues.max(1)[0].detach() 119 | 120 | # 防止梯度流入用于计算下一状态q近似值得NN 121 | # next_states_values = next_aimActionValues.detach() 122 | # print("next_states_values", next_states_values) 123 | expected_aim_values = next_aimActionValues * GAMMA + rewards_v 124 | expected_task_values = next_taskActionValues * GAMMA + rewards_v 125 | expected_resource_values = next_resourceActionValues * GAMMA + rewards_v 126 | # print(" expected_state_values", expected_state_values) 127 | 128 | return nn.MSELoss()(taskActionValues, expected_task_values) + \ 129 | nn.MSELoss()(aimActionValues, expected_aim_values) + \ 130 | nn.MSELoss()(resourceActionValues, expected_resource_values) 131 | 132 | 133 | if __name__ == '__main__': 134 | env = Env() 135 | env.reset() 136 | 137 | frame_idx = 0 138 | # writer = SummaryWriter(comment="-" + env.__doc__) 139 | agents = env.vehicles 140 | models = [] 141 | tgt_models = [] 142 | optimizers = [] 143 | for agent in agents: 144 | # print(agent.get_location, agent.velocity) 145 | task_shape = np.array([agent.task_state]).shape 146 | # print(task_shape) 147 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2, len(RESOURCE)) 148 | models.append(model) 149 | optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum) 150 | optimizers.append(optimer) 151 | for agent in agents: 152 | # print(agent.get_location, agent.velocity) 153 | task_shape = np.array([agent.task_state]).shape 154 | # print(task_shape) 155 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2, len(RESOURCE)) 156 | model.load_state_dict(models[agent.id].state_dict()) 157 | tgt_models.append(model) 158 | 159 | # 打印网络结构 160 | model = models[0] 161 | state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32) 162 | taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32) 163 | # 针对有网络模型,但还没有训练保存 .pth 文件的情况 164 | modelpath = "./netStruct/demo.onnx" # 定义模型结构保存的路径 165 | torch.onnx.export(model, (state_v, taskState_v), modelpath) # 导出并保存 166 | netron.start(modelpath) 167 | 168 | total_reward = [] 169 | recent_reward = [] 170 | loss_1 = [] 171 | reward_1 = [] 172 | 173 | epsilon = EPSILON_START 174 | eliposde = 150000 175 | while eliposde > 0: 176 | # 重置游戏 177 | if frame_idx % RESET == 0: 178 | print("游戏重置") 179 | env.reset() 180 | 181 | frame_idx += 1 182 | print("the {} steps".format(frame_idx)) 183 | epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME) 184 | reward = play_step(env, epsilon, models) 185 | total_reward.append(reward) 186 | print("current reward:", reward) 187 | print("current 100 times total rewards:", np.mean(total_reward[-100:])) 188 | recent_reward.append(np.mean(total_reward[-100:])) 189 | if np.mean(total_reward[-100:]) > 0.5: 190 | break 191 | 192 | for i, agent in enumerate(agents): 193 | # print("length of {} buffer".format(agent.id), len(agent.buffer)) 194 | if len(agent.buffer) < REPLAY_SIZE: # 缓冲池要足够大 195 | continue 196 | if frame_idx % SYNC_TARGET_FRAMES == 0: # 更新目标网络 197 | tgt_models[i].load_state_dict(models[i].state_dict()) 198 | optimizers[i].zero_grad() 199 | batch = agent.buffer.sample(BATCH_SIZE) 200 | loss_t = calc_loss(batch, models[i], tgt_models[i]) 201 | # print("loss:", loss_t) 202 | loss_t.backward() 203 | optimizers[i].step() 204 | if agent.id == 0: 205 | print("cur_loss:", loss_t.item()) 206 | loss_1.append(loss_t.item()) 207 | reward_1.append(env.reward[0]) 208 | eliposde -= 1 209 | 210 | cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time())) 211 | # 创建文件夹 212 | os.makedirs("D:/pycharm/Project/VML/MyErion/experiment/result/" + cur_time) 213 | for i, vehicle in enumerate(env.vehicles): 214 | # 保存每个网络模型 215 | torch.save(models[i].state_dict(), 216 | "D:/pycharm/Project/VML/MyErion/experiment/result/" + cur_time + "/vehicle" + str(i) + ".pkl") 217 | 218 | plt.plot(range(len(recent_reward)), recent_reward) 219 | plt.title("奖励曲线") 220 | plt.show() 221 | 222 | plt.plot(range(len(loss_1)), loss_1) 223 | plt.title("损失曲线") 224 | plt.show() 225 | 226 | plt.plot(range(1000), reward_1[-1000:]) 227 | plt.title("车辆一奖励曲线") 228 | plt.show() 229 | -------------------------------------------------------------------------------- /experiment/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | 4 | if __name__ == '__main__': 5 | print() 6 | env = Env() 7 | env.reset() 8 | # 测试找最邻近的mec 9 | # for vehicle in env.vehicles: 10 | # print("vehicle{} location:".format(vehicle.id),vehicle.get_location) 11 | # for mec in env.MECs: 12 | # print("mec{} location:".format(mec.id),mec.get_location) 13 | # for vehicle in env.vehicles: 14 | # print(vehicle.mec_lest.get_location, end=" ") 15 | 16 | # 测试网络节点数 17 | task = np.array(env.taskState) 18 | print(task.size) 19 | print(task.shape) 20 | vehicles = env.vehicles 21 | # print(vehicles[0].actor1) 22 | # print(vehicles[0].target_actor1) 23 | # print(vehicles[0].state) 24 | # print(vehicles[0].get_state()) 25 | # print(len(vehicles[0].state)) 26 | # print(len(env.state)) 27 | 28 | # 测试更新邻居表 29 | # for vehicle in vehicles: 30 | # print(vehicle.get_location) 31 | # 32 | # print("-----------------------------------") 33 | # for vehicle in vehicles: 34 | # for i in vehicle.neighbor: 35 | # print(i.id, end=" ") 36 | # print() 37 | # 测试更新total——task 38 | # list = [vehicles[0],vehicles[1],vehicles[2],vehicles[3],vehicles[4]] 39 | # print(list) 40 | # for i in reversed(list): 41 | # if i.id >=2: 42 | # list.remove(i) 43 | # else: 44 | # break 45 | # print(list) 46 | # list=[[]]*5 47 | # print(list) 48 | for vehicle in vehicles: 49 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 50 | print("该车邻居:") 51 | for i in vehicle.neighbor: 52 | print(i.id, end=" ") 53 | print() 54 | 55 | # 测试环境运行 56 | for i in range(1000): 57 | action1 = [] 58 | action2 = [] 59 | action3 = [] 60 | for j in range(20): 61 | action1.append(0) 62 | # action2.append(np.random.randint(0, 7)) 63 | action2.append(0) 64 | # action3.append(round(np.random.random(), 2)) 65 | action3.append(0.8) 66 | env.step(action1, action2, action3) 67 | # print("当前状态:", state) 68 | # print("下一状态:", next_state) 69 | # print("车状态:", vehicleState) 70 | # print("任务状态", taskState) 71 | # print("当前奖励:", reward) 72 | # print("每个奖励,", vehicleReward) 73 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 74 | # print("average reward:", env.Reward) 75 | -------------------------------------------------------------------------------- /experiment/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 200 # MEC通信范围 4 | RESOURCE = 20000 # 可用资源 MHz 5 | 6 | 7 | # 边缘服务器 8 | class MEC: 9 | def __init__(self, id, loc_x, loc_y, resources=RESOURCE): 10 | self.loc_x = loc_x 11 | self.loc_y = loc_y 12 | self.loc = [self.loc_x, self.loc_y] 13 | self.id = id 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息 20 | self.accept_task = [] 21 | # 接受任务的数量 22 | self.sum_needDeal_task = 0 23 | # 此时刻有多少动作选则我 多少任务正在传输给我 24 | self.len_action = 0 25 | # 当前时间 26 | self.cur_frame = 0 27 | # 当前状态 28 | self.get_state() 29 | 30 | @property 31 | def get_x(self): 32 | return self.loc_x 33 | 34 | @property 35 | def get_y(self): 36 | return self.loc_y 37 | 38 | @property 39 | def get_location(self): 40 | return self.loc 41 | 42 | """ 43 | 获得状态 44 | """ 45 | 46 | def get_state(self): 47 | """ 48 | :return:state 维度:1+2+2 3维[id,loc_x,loc_y,resources] 49 | """ 50 | self.state = [] 51 | self.state.extend(self.loc) 52 | self.state.append(self.resources) 53 | return self.state 54 | 55 | 56 | # 测试 57 | if __name__ == '__main__': 58 | mec = MEC(10, 10, 1) 59 | # vehicles = [] 60 | # for i in range(40): 61 | # vehicle = Vehicle(i, random.randint(1, 5), random.randint(1, 5), random.randint(0, 4)) 62 | # vehicle.creat_work() 63 | # vehicles.append(vehicle) 64 | # for i, vehicle in enumerate(vehicles): 65 | # print("v{}.get_state():{}".format(i, vehicle.get_state())) 66 | # print("mec.get_state():", mec.get_state(), mec.cur_frame) 67 | # mec.get_task([2] * 40, vehicles) 68 | # print("mec.received_task:", mec.received_task) 69 | # print("resources:", mec.resources) 70 | # mec.renew_resources(1) 71 | # print("after received_task:", mec.received_task) 72 | # print("after resources:", mec.resources) 73 | # print("renew_state", mec.renew_state(1, [1, 2, 2], vehicles), mec.cur_frame) 74 | print(mec.get_location) 75 | -------------------------------------------------------------------------------- /experiment/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['state', 'action', 'reward', 'next_state']) # Define a transition tuple 9 | 10 | 11 | class ReplayMemory(object): # Define a replay memory 12 | 13 | # 初始化缓冲池 14 | def __init__(self, capacity): 15 | # 最大容量 16 | self.capacity = capacity 17 | # 缓冲池经验 18 | self.memory = [] 19 | # ? 20 | self.position = 0 21 | 22 | # 存入经验 23 | def push(self, *args): 24 | if len(self.memory) < self.capacity: 25 | self.memory.append(None) 26 | # 存入经验 27 | self.memory[self.position] = Experience(*args) 28 | # 记录最新经验所在位置 29 | self.position = (self.position + 1) % self.capacity 30 | 31 | # 采样 32 | def sample(self, batch_size): 33 | return sample(self.memory, batch_size) 34 | 35 | def __len__(self): 36 | return len(self.memory) 37 | 38 | 39 | class ExperienceBuffer: 40 | def __init__(self, capacity): 41 | self.maxLen = capacity 42 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 43 | 44 | def __len__(self): 45 | return len(self.buffer) 46 | 47 | def append(self, experience: Experience): 48 | self.buffer.append(experience) 49 | 50 | def sample(self, batch_size): 51 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 52 | cur_otherState, cur_TaskState, taskAction, aimAction, resourceAction, rewards, next_otherState, next_TaskState = zip( 53 | *[self.buffer[idx] for idx in indices]) 54 | # 转换成numpy 55 | return np.array(cur_otherState), np.array(cur_TaskState), \ 56 | np.array(taskAction), np.array(aimAction), np.array(resourceAction), \ 57 | np.array(rewards, dtype=np.float32), \ 58 | np.array(next_otherState), np.array(next_TaskState) 59 | 60 | # 清空 61 | def clear(self): 62 | self.buffer = collections.deque(maxlen=self.maxLen) 63 | -------------------------------------------------------------------------------- /experiment/model.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.distributions.categorical import Categorical 7 | 8 | HID_SIZE = 64 9 | 10 | 11 | class ModelActor(nn.Module): 12 | def __init__(self, obs_dim, act_dim, task_dim): 13 | super(ModelActor, self).__init__() 14 | self.act_dim = act_dim 15 | 16 | self.mu = nn.Sequential( 17 | nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2), 18 | nn.ReLU(), 19 | nn.Linear(HID_SIZE * 2, HID_SIZE), 20 | nn.ReLU(), 21 | nn.Linear(HID_SIZE, act_dim * 2), 22 | ) 23 | self.cnn = CNNLayer(task_dim, HID_SIZE) 24 | 25 | def forward(self, x, task): 26 | cnn_out = self.cnn(task) 27 | x = torch.cat((x, cnn_out), -1) 28 | out = self.mu(x) 29 | action_out = out[:, :self.act_dim] 30 | task_out = out[:, self.act_dim:] 31 | action_pro = F.softmax(action_out, dim=-1) 32 | task_pro = F.softmax(task_out, dim=1) 33 | print(action_pro) 34 | print(task_pro) 35 | return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro) 36 | 37 | 38 | class ModelCritic(nn.Module): 39 | def __init__(self, obs_size): 40 | super(ModelCritic, self).__init__() 41 | 42 | self.value = nn.Sequential( 43 | nn.Linear(obs_size, HID_SIZE), 44 | nn.ReLU(), 45 | nn.Linear(HID_SIZE, HID_SIZE), 46 | nn.ReLU(), 47 | nn.Linear(HID_SIZE, 1), 48 | ) 49 | 50 | def forward(self, x): 51 | return self.value(x) 52 | 53 | 54 | class ModelSACTwinQ(nn.Module): 55 | def __init__(self, obs_size, act_size): 56 | super(ModelSACTwinQ, self).__init__() 57 | 58 | self.q1 = nn.Sequential( 59 | nn.Linear(obs_size + act_size, HID_SIZE), 60 | nn.ReLU(), 61 | nn.Linear(HID_SIZE, HID_SIZE), 62 | nn.ReLU(), 63 | nn.Linear(HID_SIZE, 1), 64 | ) 65 | 66 | self.q2 = nn.Sequential( 67 | nn.Linear(obs_size + act_size, HID_SIZE), 68 | nn.ReLU(), 69 | nn.Linear(HID_SIZE, HID_SIZE), 70 | nn.ReLU(), 71 | nn.Linear(HID_SIZE, 1), 72 | ) 73 | 74 | def forward(self, obs, act): 75 | x = torch.cat([obs, act], dim=1) 76 | return self.q1(x), self.q2(x) 77 | 78 | 79 | class AgentDDPG(ptan.agent.BaseAgent): 80 | """ 81 | Agent implementing Orstein-Uhlenbeck exploration process 82 | """ 83 | 84 | def __init__(self, net, device="cpu", ou_enabled=True, 85 | ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2, 86 | ou_epsilon=1.0): 87 | self.net = net 88 | self.device = device 89 | self.ou_enabled = ou_enabled 90 | self.ou_mu = ou_mu 91 | self.ou_teta = ou_teta 92 | self.ou_sigma = ou_sigma 93 | self.ou_epsilon = ou_epsilon 94 | 95 | def initial_state(self): 96 | return None 97 | 98 | def __call__(self, states, agent_states): 99 | states_v = ptan.agent.float32_preprocessor(states) 100 | states_v = states_v.to(self.device) 101 | mu_v = self.net(states_v) 102 | actions = mu_v.data.cpu().numpy() 103 | 104 | if self.ou_enabled and self.ou_epsilon > 0: 105 | new_a_states = [] 106 | for a_state, action in zip(agent_states, actions): 107 | if a_state is None: 108 | a_state = np.zeros( 109 | shape=action.shape, dtype=np.float32) 110 | a_state += self.ou_teta * (self.ou_mu - a_state) 111 | a_state += self.ou_sigma * np.random.normal( 112 | size=action.shape) 113 | 114 | action += self.ou_epsilon * a_state 115 | new_a_states.append(a_state) 116 | else: 117 | new_a_states = agent_states 118 | 119 | actions = np.clip(actions, -1, 1) 120 | return actions, new_a_states 121 | 122 | 123 | class DQN(nn.Module): 124 | def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim, resourceAction_dim): 125 | super(DQN, self).__init__() 126 | self.input_layer = nn.Linear(obs_dim + 32, 128) 127 | self.hidden1 = nn.Linear(128, 64) 128 | self.hidden2 = nn.Linear(64, 64) 129 | self.hidden3 = nn.Linear(64, 128) 130 | self.cnn = CNNLayer(task_dim, 32) 131 | self.output_layer1 = self.common(64, taskAction_dim) 132 | self.output_layer2 = self.common(64, aimAction_dim) 133 | self.output_layer3 = self.common(64, resourceAction_dim) 134 | 135 | def common(self, input_dim, action_dim): 136 | return nn.Sequential( 137 | nn.Linear(input_dim, 128), 138 | nn.ReLU(), 139 | self.hidden1, 140 | nn.ReLU(), 141 | self.hidden2, 142 | nn.ReLU(), 143 | nn.Linear(64, action_dim) 144 | ) 145 | 146 | def forward(self, x, task): 147 | """ 148 | 149 | :param x: batch_size*state_n 150 | :return: batch_size*actions_n 输出每个动作对应的q值 151 | """ 152 | # 任务卷积层 153 | cnn_out = self.cnn(task) 154 | x = torch.cat((x, cnn_out), -1) 155 | 156 | # 公共层 157 | x1 = F.relu(self.input_layer(x)) 158 | x2 = F.relu(self.hidden1(x1)) 159 | x3 = F.relu(self.hidden2(x2)) 160 | 161 | taskActionValue = self.output_layer1(x3) 162 | aimActionValue = self.output_layer2(x3) 163 | resourceActionValue = self.output_layer3(x3) 164 | 165 | return taskActionValue, aimActionValue, resourceActionValue 166 | 167 | 168 | class CNNLayer(nn.Module): 169 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 170 | super(CNNLayer, self).__init__() 171 | 172 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 173 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 174 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 175 | 176 | def init_(m): # 权重使用正交初始化,激活函数使用relu 177 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 178 | 179 | input_channel = obs_shape[0] 180 | input_width = obs_shape[1] 181 | input_height = obs_shape[2] 182 | 183 | self.cnn = nn.Sequential( 184 | init_(nn.Conv2d(in_channels=input_channel, 185 | out_channels=hidden_size // 2, 186 | kernel_size=kernel_size, 187 | stride=stride) 188 | ), 189 | active_func, 190 | nn.Flatten(), 191 | init_(nn.Linear( 192 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 193 | hidden_size) 194 | ), 195 | active_func, 196 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 197 | 198 | def forward(self, x): 199 | x = x / 255.0 200 | x = self.cnn(x) 201 | 202 | return x 203 | 204 | 205 | def init(module, weight_init, bias_init, gain=1): 206 | weight_init(module.weight.data, gain=gain) 207 | bias_init(module.bias.data) 208 | return module 209 | -------------------------------------------------------------------------------- /experiment/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle, createTime): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.size = np.random.uniform(0.2, 1) # Mb 18 | self.cycle = np.random.randint(20, 50) # cycle/bit 19 | 20 | self.max_time = 50 # ms 最大容忍时间 21 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 22 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 23 | 24 | self.rate = 0 # 当前速率 25 | 26 | self.compute_resource = 0 27 | self.hold_on_time = 0 28 | 29 | self.create_time = createTime # 任务产生时间 30 | self.pick_time = 0 # 被选择的时间(出队列时间) 31 | 32 | # 完成该任务所消耗的cup资源 33 | self.energy = 0 34 | self.trans_time = 0 # 传输所需要的时间(实际) 35 | self.precess_time = 0 # 任务处理所需要的时间(实际) 36 | -------------------------------------------------------------------------------- /experiment/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import random 3 | 4 | import numpy as np 5 | 6 | from memory import ExperienceBuffer 7 | from task import Task 8 | 9 | Dv = 50 # 车的最大通信范围 10 | Fv = 4000 # 车最大计算能力 MHZ 11 | alpha = 0.25 12 | MAX_TASK = 10 # 任务队列最大长度 13 | 14 | CAPACITY = 20000 # 缓冲池大小 15 | TASK_DISTRIBUTE = 4 # 可分的任务段数 16 | TASK_SOLT = 20 # 任务产生时隙 17 | 18 | np.random.seed(0) 19 | 20 | 21 | class Vehicle: 22 | # 位置:x,y 速度、方向:-1左,1右 23 | def __init__(self, id, loc_x, loc_y, direction, velocity=20): 24 | # 车的位置信息 25 | self.loc_x = loc_x 26 | self.loc_y = loc_y 27 | self.loc = [loc_x, loc_y] 28 | self.velocity = velocity # m/s 29 | self.direction = direction 30 | self.id = id 31 | # 功率和信道增益 32 | self.alpha = alpha 33 | # 通信范围 34 | self.range = Dv 35 | # 邻居表 36 | self.neighbor = [] 37 | # 最近的mec 38 | self.mec_lest = None 39 | # 当前时间 40 | self.cur_frame = 0 41 | # 接受的任务的列表 42 | self.accept_task = [] 43 | # 接受任务的数量 44 | self.sum_needDeal_task = 0 45 | # 此时刻有多少动作选则我 46 | self.len_action = 0 47 | # 当前可用资源 48 | self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2) # MHz 49 | # 表示当前是否有任务正在传输(0:没有,1:有) 50 | self.trans_task = 0 51 | # 当前处理的任务(用于计算奖励,不用于状态信息) 52 | self.cur_task = None 53 | # 任务队列 54 | self.total_task = [] 55 | # 任务队列的长度 56 | self.len_task = len(self.total_task) 57 | # 当前状态信息 58 | self.otherState = [] 59 | # 当前任务队列状态 60 | self.taskState = [] 61 | # 去除邻居的状态信息用于邻居车观察和全局critic的处理 62 | self.excludeNeighbor_state = [] 63 | # 缓冲池 64 | self.buffer = ExperienceBuffer(capacity=CAPACITY) 65 | # 总奖励 66 | self.reward = [] 67 | # 任务溢出的数量 68 | self.overflow = 0 69 | # 需等待时长 70 | self.hold_on = 0 71 | # 上一个任务产生的时间 72 | self.lastCreatWorkTime = 0 73 | 74 | self.create_work() 75 | 76 | # 获得位置 77 | @property 78 | def get_location(self): 79 | return self.loc 80 | 81 | # 设置位置 82 | def set_location(self, loc_x, loc_y): 83 | self.loc_x = loc_x 84 | self.loc_y = loc_y 85 | self.loc = [self.loc_x, self.loc_y] 86 | 87 | # 获得x 88 | @property 89 | def get_x(self): 90 | return self.loc_x 91 | 92 | # 获得y 93 | @property 94 | def get_y(self): 95 | return self.loc_y 96 | 97 | # 产生任务 传入当前时间 98 | def create_work(self): 99 | # 每隔一段时间进行一次任务产生 100 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 101 | # 每次有0.6的概率产生任务 102 | if random.random() < 0.6: 103 | if self.len_task < MAX_TASK: # 队列不满 104 | task = Task(self, self.cur_frame) 105 | self.lastCreatWorkTime = self.cur_frame 106 | self.total_task.append(task) 107 | self.len_task += 1 108 | print("第{}辆车产生了任务".format(self.id)) 109 | self.overflow = 0 110 | else: 111 | print("第{}辆车任务队列已满".format(self.id)) 112 | self.overflow += 1 113 | 114 | """ 115 | 获得状态 116 | """ 117 | 118 | def get_state(self): 119 | self.otherState = [] 120 | self.excludeNeighbor_state = [] 121 | self.taskState = [] 122 | 123 | # 位置信息 4 124 | self.otherState.extend(self.loc) 125 | self.otherState.append(self.velocity) 126 | self.otherState.append(self.direction) 127 | self.excludeNeighbor_state.extend(self.loc) 128 | self.excludeNeighbor_state.append(self.velocity) 129 | self.excludeNeighbor_state.append(self.direction) 130 | 131 | # 资源信息(可用资源) 132 | self.otherState.append(self.resources) 133 | self.excludeNeighbor_state.append(self.resources) 134 | 135 | # 当前是否有任务在传输 136 | self.excludeNeighbor_state.append(self.trans_task) 137 | self.otherState.append(self.trans_task) 138 | 139 | # 正在传输的任务信息 140 | # if self.trans_task is not None: 141 | # self.otherState.append(self.trans_task.need_trans_size) 142 | # self.excludeNeighbor_state.append(self.trans_task.need_trans_size) 143 | # else: 144 | # self.otherState.append(0) 145 | # self.excludeNeighbor_state.append(0) 146 | self.otherState.append(self.len_task) # 当前队列长度 147 | self.excludeNeighbor_state.append(self.len_task) 148 | 149 | # 邻居表 7*数量 150 | for neighbor in self.neighbor: 151 | self.otherState.extend(neighbor.position) # 位置 152 | self.otherState.append(neighbor.velocity) # 速度 153 | self.otherState.append(neighbor.direction) # 方向 154 | self.otherState.append(neighbor.resources) # 可用资源 155 | 156 | # 最近mec的状态 6 157 | if self.mec_lest is not None: 158 | self.otherState.extend(self.mec_lest.get_state()) 159 | 160 | # 任务状态信息 161 | for i in range(MAX_TASK): 162 | if i < self.len_task: 163 | task = self.total_task[i] 164 | self.taskState.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 165 | else: 166 | self.taskState.append([0, 0, 0, 0]) 167 | 168 | return self.excludeNeighbor_state 169 | -------------------------------------------------------------------------------- /experiment2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境2 3 | (两个动作:选择任务和选择对象) 4 | 使用dqn训练模型 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | """ 7 | -------------------------------------------------------------------------------- /experiment2/dqn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | from collections import namedtuple 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | from pylab import mpl 13 | import netron 14 | 15 | from env import Env 16 | from model import DQN 17 | 18 | # 设置显示中文字体 19 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 20 | matplotlib.rcParams['axes.unicode_minus'] = False 21 | 22 | Experience = namedtuple('Transition', 23 | field_names=['cur_otherState', 'cur_TaskState', # 状态 24 | 'taskAction', 'aimAction', # 动作 25 | 'reward', # 奖励 26 | 'next_otherState', 'next_TaskState']) # Define a transition tuple 27 | GAMMA = 0.99 28 | BATCH_SIZE = 64 29 | REPLAY_SIZE = 10000 30 | LEARNING_RATE = 1e-4 31 | SYNC_TARGET_FRAMES = 1000 32 | 33 | EPSILON_DECAY_LAST_FRAME = 150000 34 | EPSILON_START = 0.6 35 | EPSILON_FINAL = 0.01 36 | 37 | RESET = 100000 # 重置游戏次数 38 | 39 | MAX_TASK = 10 # 任务队列最大长度 40 | 41 | momentum = 0.005 42 | 43 | RESOURCE = [0.2, 0.4, 0.6, 0.8] 44 | 45 | 46 | @torch.no_grad() 47 | def play_step(env, epsilon, models): 48 | vehicles = env.vehicles 49 | old_otherState = [] 50 | old_taskState = [] 51 | 52 | actionTask = [] 53 | actionAim = [] 54 | # 贪心选择动作 55 | for i, model in enumerate(models): 56 | old_otherState.append(vehicles[i].self_state) 57 | old_taskState.append(vehicles[i].task_state) 58 | if np.random.random() < epsilon: 59 | # 随机动作 60 | actionTask.append(np.random.randint(0, 10)) 61 | actionAim.append(np.random.randint(0, 7)) # local+mec+neighbor 62 | else: 63 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 64 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 65 | taskAction, aimAction = model(state_v, taskState_v) 66 | 67 | taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 68 | aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 69 | 70 | actionAim.append(np.argmax(aimAction)) 71 | actionTask.append(np.argmax(taskAction)) 72 | # print("action:", action) 73 | _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim) 74 | # print("reward:", reward) 75 | 76 | # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作,下一其他状态、下一任务状态】 77 | for i, vehicle in enumerate(vehicles): 78 | exp = Experience(old_otherState[i], [old_taskState[i]], 79 | actionTask[i], actionAim[i], 80 | reward[i], 81 | otherState[i], [taskState[i]]) 82 | vehicle.buffer.append(exp) 83 | return round(Reward, 2) # 返回总的平均奖励 84 | 85 | 86 | # 计算一个智能体的损失 87 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"): 88 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch # 89 | 90 | otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device) 91 | taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device) 92 | # print("states_v:", states_v) # batch状态 93 | taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device) 94 | aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device) 95 | # print("actions_v", actions_v) # batch动作 96 | rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device) 97 | # print("rewards_v", rewards_v) # batch奖励 98 | next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device) 99 | next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device) 100 | # print("next_states_v", next_states_v) # batch下一个状态 101 | 102 | # 计算当前网络q值 103 | taskActionValues, aimActionValues = net(otherStates_v, 104 | taskStates_v) # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 105 | taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1) 106 | aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 107 | 108 | # 计算目标网络q值 109 | next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v, 110 | next_taskStates_v) # .max(1)[0] # 得到最大的q值 111 | 112 | next_taskActionValues = next_taskActionValues.max(1)[0].detach() 113 | next_aimActionValues = next_aimActionValues.max(1)[0].detach() 114 | 115 | # 防止梯度流入用于计算下一状态q近似值得NN 116 | # next_states_values = next_aimActionValues.detach() 117 | # print("next_states_values", next_states_values) 118 | expected_aim_values = next_aimActionValues * GAMMA + rewards_v 119 | expected_task_values = next_taskActionValues * GAMMA + rewards_v 120 | # print(" expected_state_values", expected_state_values) 121 | 122 | return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values) 123 | 124 | 125 | if __name__ == '__main__': 126 | env = Env() 127 | env.reset() 128 | 129 | frame_idx = 0 130 | # writer = SummaryWriter(comment="-" + env.__doc__) 131 | agents = env.vehicles 132 | models = [] 133 | tgt_models = [] 134 | optimizers = [] 135 | for agent in agents: 136 | # print(agent.get_location, agent.velocity) 137 | task_shape = np.array([agent.task_state]).shape 138 | # print(task_shape) 139 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 140 | models.append(model) 141 | optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum) 142 | optimizers.append(optimer) 143 | for agent in agents: 144 | # print(agent.get_location, agent.velocity) 145 | task_shape = np.array([agent.task_state]).shape 146 | # print(task_shape) 147 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 148 | model.load_state_dict(models[agent.id].state_dict()) 149 | tgt_models.append(model) 150 | 151 | # 打印网络结构 152 | # model = models[0] 153 | # state_v = torch.tensor([env.vehicles[0].otherState], dtype=torch.float32) 154 | # taskState_v = torch.tensor([[env.vehicles[0].taskState]], dtype=torch.float32) 155 | # # 针对有网络模型,但还没有训练保存 .pth 文件的情况 156 | # modelpath = "./netStruct/demo.onnx" # 定义模型结构保存的路径 157 | # torch.onnx.export(model, (state_v, taskState_v), modelpath) # 导出并保存 158 | # netron.start(modelpath) 159 | 160 | total_reward = [] 161 | recent_reward = [] 162 | loss_task_list = [] 163 | loss_aim_list = [] 164 | reward_1 = [] 165 | 166 | epsilon = EPSILON_START 167 | eliposde = 500000 168 | while eliposde > 0: 169 | # 重置游戏 170 | if frame_idx % RESET == 0: 171 | print("游戏重置") 172 | env.reset() 173 | agents = env.vehicles 174 | 175 | frame_idx += 1 176 | print("the {} steps".format(frame_idx)) 177 | epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME) 178 | reward = play_step(env, epsilon, models) 179 | total_reward.append(reward) 180 | print("current reward:", reward) 181 | print("current 100 times total rewards:", np.mean(total_reward[-100:])) 182 | recent_reward.append(np.mean(total_reward[-100:])) 183 | if np.mean(total_reward[-100:]) > 0.7: 184 | break 185 | 186 | for i, agent in enumerate(agents): 187 | # print("length of {} buffer".format(agent.id), len(agent.buffer)) 188 | if len(agent.buffer) < REPLAY_SIZE: # 缓冲池要足够大 189 | continue 190 | if frame_idx % SYNC_TARGET_FRAMES == 0: # 更新目标网络 191 | tgt_models[i].load_state_dict(models[i].state_dict()) 192 | optimizers[i].zero_grad() 193 | batch = agent.buffer.sample(BATCH_SIZE) 194 | loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i]) 195 | # print("loss:", loss_task, " ", loss_aim) 196 | # loss_t.backward() 197 | torch.autograd.backward([loss_task, loss_aim]) 198 | optimizers[i].step() 199 | if agent.id == 0: 200 | print("cur_loss:", loss_task.item()) 201 | print("cur_aim_loss", loss_aim.item()) 202 | loss_task_list.append(loss_task.item()) 203 | loss_aim_list.append(loss_aim.item()) 204 | reward_1.append(env.reward[0]) 205 | eliposde -= 1 206 | 207 | cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time())) 208 | # 创建文件夹 209 | os.makedirs("D:/pycharm/Project/VML/MyErion/experiment2/result/" + cur_time) 210 | for i, vehicle in enumerate(env.vehicles): 211 | # 保存每个网络模型 212 | torch.save(tgt_models[i].state_dict(), 213 | "D:/pycharm/Project/VML/MyErion/experiment2/result/" + cur_time + "/vehicle" + str(i) + ".pkl") 214 | 215 | plt.plot(range(len(recent_reward)), recent_reward) 216 | plt.title("奖励曲线") 217 | plt.show() 218 | 219 | plt.plot(range(len(loss_task_list)), loss_task_list) 220 | plt.title("任务选择损失曲线") 221 | plt.show() 222 | 223 | plt.plot(range(len(loss_aim_list)), loss_aim_list) 224 | plt.title("目标选择损失曲线") 225 | plt.show() 226 | 227 | plt.plot(range(1000), reward_1[-1000:]) 228 | plt.title("车辆一奖励曲线") 229 | plt.show() 230 | -------------------------------------------------------------------------------- /experiment2/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | print() 7 | env = Env() 8 | env.reset() 9 | # 测试找最邻近的mec 10 | # for vehicle in env.vehicles: 11 | # print("vehicle{} location:".format(vehicle.id),vehicle.get_location) 12 | # for mec in env.MECs: 13 | # print("mec{} location:".format(mec.id),mec.get_location) 14 | # for vehicle in env.vehicles: 15 | # print(vehicle.mec_lest.get_location, end=" ") 16 | 17 | # 测试网络节点数 18 | task = np.array(env.taskState) 19 | print(task.shape) 20 | vehicles = env.vehicles 21 | # print(vehicles[0].actor1) 22 | # print(vehicles[0].target_actor1) 23 | # print(vehicles[0].state) 24 | # print(vehicles[0].get_state()) 25 | # print(len(vehicles[0].state)) 26 | # print(len(env.state)) 27 | 28 | # 测试更新邻居表 29 | # for vehicle in vehicles: 30 | # print(vehicle.get_location) 31 | # 32 | # print("-----------------------------------") 33 | # for vehicle in vehicles: 34 | # for i in vehicle.neighbor: 35 | # print(i.id, end=" ") 36 | # print() 37 | # 测试更新total——task 38 | # list = [vehicles[0],vehicles[1],vehicles[2],vehicles[3],vehicles[4]] 39 | # print(list) 40 | # for i in reversed(list): 41 | # if i.id >=2: 42 | # list.remove(i) 43 | # else: 44 | # break 45 | # print(list) 46 | # list=[[]]*5 47 | # print(list) 48 | for vehicle in vehicles: 49 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 50 | print("该车邻居:") 51 | for i in vehicle.neighbor: 52 | print(i.id, end=" ") 53 | print() 54 | 55 | # 测试环境运行 56 | x = [[] for i in range(20)] 57 | y = [[] for i in range(20)] 58 | for i in range(10000): 59 | for j in range(20): 60 | x[j].append(env.vehicles[j].position[0]) 61 | y[j].append(env.vehicles[j].position[1]) 62 | action1 = [] 63 | action2 = [] 64 | action3 = [] 65 | for j in range(20): 66 | action1.append(np.random.randint(0, 10)) 67 | # action1.append(0) 68 | action2.append(np.random.randint(0, 7)) 69 | # action2.append(0) 70 | # action3.append(round(np.random.random(), 2)) 71 | action3.append(0.8) 72 | other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2) 73 | print("第{}次平均奖励{}".format(i, Reward)) 74 | # print("当前状态:", state) 75 | # print("下一状态:", next_state) 76 | # print("车状态:", vehicleState) 77 | # print("任务状态", taskState) 78 | # print("当前奖励:", reward) 79 | # print("每个奖励,", vehicleReward) 80 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 81 | # print("average reward:", env.Reward) 82 | plt.figure(figsize=(100, 100)) 83 | fix, ax = plt.subplots(5, 4) 84 | 85 | for i in range(5): 86 | for j in range(4): 87 | number = i * 4 + j 88 | ax[i, j].plot(x[number], y[number]) 89 | ax[i, j].set_title('vehicle {}'.format(number)) 90 | plt.show() 91 | -------------------------------------------------------------------------------- /experiment2/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 200 # MEC通信范围 4 | RESOURCE = 20000 # 可用资源 MHz 5 | 6 | 7 | # 边缘服务器 8 | class MEC: 9 | def __init__(self, position, resources=RESOURCE): 10 | self.loc_x = position[0] 11 | self.loc_y = position[1] 12 | self.loc = position 13 | # 当前可用资源 MHz 14 | self.resources = resources 15 | self.state = [] 16 | # 通信范围 m 17 | self.range = RANGE_MEC 18 | # 当前接到需要处理的任务信息 19 | self.accept_task = [] 20 | # 接受任务的数量 21 | self.sum_needDeal_task = 0 22 | # 此时刻有多少动作选则我 多少任务选择传输给我 23 | self.len_action = 0 24 | # 当前状态 25 | self.get_state() 26 | 27 | @property 28 | def get_x(self): 29 | return self.loc_x 30 | 31 | @property 32 | def get_y(self): 33 | return self.loc_y 34 | 35 | @property 36 | def get_location(self): 37 | return self.loc 38 | 39 | """ 40 | 获得状态 41 | """ 42 | 43 | def get_state(self): 44 | """ 45 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 46 | """ 47 | self.state = [] 48 | self.state.extend(self.loc) 49 | self.state.append(self.sum_needDeal_task) 50 | self.state.append(self.len_action) 51 | self.state.append(self.resources) 52 | return self.state 53 | -------------------------------------------------------------------------------- /experiment2/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['state', 'action', 'reward', 'next_state']) # Define a transition tuple 9 | 10 | 11 | class ReplayMemory(object): # Define a replay memory 12 | 13 | # 初始化缓冲池 14 | def __init__(self, capacity): 15 | # 最大容量 16 | self.capacity = capacity 17 | # 缓冲池经验 18 | self.memory = [] 19 | # ? 20 | self.position = 0 21 | 22 | # 存入经验 23 | def push(self, *args): 24 | if len(self.memory) < self.capacity: 25 | self.memory.append(None) 26 | # 存入经验 27 | self.memory[self.position] = Experience(*args) 28 | # 记录最新经验所在位置 29 | self.position = (self.position + 1) % self.capacity 30 | 31 | # 采样 32 | def sample(self, batch_size): 33 | return sample(self.memory, batch_size) 34 | 35 | def __len__(self): 36 | return len(self.memory) 37 | 38 | 39 | class ExperienceBuffer: 40 | def __init__(self, capacity): 41 | self.maxLen = capacity 42 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 43 | 44 | def __len__(self): 45 | return len(self.buffer) 46 | 47 | def append(self, experience: Experience): 48 | self.buffer.append(experience) 49 | 50 | def sample(self, batch_size): 51 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 52 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip( 53 | *[self.buffer[idx] for idx in indices]) 54 | # 转换成numpy 55 | return np.array(cur_otherState), np.array(cur_TaskState), \ 56 | np.array(taskAction), np.array(aimAction), \ 57 | np.array(rewards, dtype=np.float32), \ 58 | np.array(next_otherState), np.array(next_TaskState) 59 | 60 | # 清空 61 | def clear(self): 62 | self.buffer = collections.deque(maxlen=self.maxLen) 63 | -------------------------------------------------------------------------------- /experiment2/model.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.distributions.categorical import Categorical 7 | 8 | HID_SIZE = 64 9 | 10 | 11 | class ModelActor(nn.Module): 12 | def __init__(self, obs_dim, act_dim, task_dim): 13 | super(ModelActor, self).__init__() 14 | self.act_dim = act_dim 15 | 16 | self.mu = nn.Sequential( 17 | nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2), 18 | nn.ReLU(), 19 | nn.Linear(HID_SIZE * 2, HID_SIZE), 20 | nn.ReLU(), 21 | nn.Linear(HID_SIZE, act_dim * 2), 22 | ) 23 | self.cnn = CNNLayer(task_dim, HID_SIZE) 24 | 25 | def forward(self, x, task): 26 | cnn_out = self.cnn(task) 27 | x = torch.cat((x, cnn_out), -1) 28 | out = self.mu(x) 29 | action_out = out[:, :self.act_dim] 30 | task_out = out[:, self.act_dim:] 31 | action_pro = F.softmax(action_out, dim=-1) 32 | task_pro = F.softmax(task_out, dim=1) 33 | print(action_pro) 34 | print(task_pro) 35 | return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro) 36 | 37 | 38 | class ModelCritic(nn.Module): 39 | def __init__(self, obs_size): 40 | super(ModelCritic, self).__init__() 41 | 42 | self.value = nn.Sequential( 43 | nn.Linear(obs_size, HID_SIZE), 44 | nn.ReLU(), 45 | nn.Linear(HID_SIZE, HID_SIZE), 46 | nn.ReLU(), 47 | nn.Linear(HID_SIZE, 1), 48 | ) 49 | 50 | def forward(self, x): 51 | return self.value(x) 52 | 53 | 54 | class ModelSACTwinQ(nn.Module): 55 | def __init__(self, obs_size, act_size): 56 | super(ModelSACTwinQ, self).__init__() 57 | 58 | self.q1 = nn.Sequential( 59 | nn.Linear(obs_size + act_size, HID_SIZE), 60 | nn.ReLU(), 61 | nn.Linear(HID_SIZE, HID_SIZE), 62 | nn.ReLU(), 63 | nn.Linear(HID_SIZE, 1), 64 | ) 65 | 66 | self.q2 = nn.Sequential( 67 | nn.Linear(obs_size + act_size, HID_SIZE), 68 | nn.ReLU(), 69 | nn.Linear(HID_SIZE, HID_SIZE), 70 | nn.ReLU(), 71 | nn.Linear(HID_SIZE, 1), 72 | ) 73 | 74 | def forward(self, obs, act): 75 | x = torch.cat([obs, act], dim=1) 76 | return self.q1(x), self.q2(x) 77 | 78 | 79 | class AgentDDPG(ptan.agent.BaseAgent): 80 | """ 81 | Agent implementing Orstein-Uhlenbeck exploration process 82 | """ 83 | 84 | def __init__(self, net, device="cpu", ou_enabled=True, 85 | ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2, 86 | ou_epsilon=1.0): 87 | self.net = net 88 | self.device = device 89 | self.ou_enabled = ou_enabled 90 | self.ou_mu = ou_mu 91 | self.ou_teta = ou_teta 92 | self.ou_sigma = ou_sigma 93 | self.ou_epsilon = ou_epsilon 94 | 95 | def initial_state(self): 96 | return None 97 | 98 | def __call__(self, states, agent_states): 99 | states_v = ptan.agent.float32_preprocessor(states) 100 | states_v = states_v.to(self.device) 101 | mu_v = self.net(states_v) 102 | actions = mu_v.data.cpu().numpy() 103 | 104 | if self.ou_enabled and self.ou_epsilon > 0: 105 | new_a_states = [] 106 | for a_state, action in zip(agent_states, actions): 107 | if a_state is None: 108 | a_state = np.zeros( 109 | shape=action.shape, dtype=np.float32) 110 | a_state += self.ou_teta * (self.ou_mu - a_state) 111 | a_state += self.ou_sigma * np.random.normal( 112 | size=action.shape) 113 | 114 | action += self.ou_epsilon * a_state 115 | new_a_states.append(a_state) 116 | else: 117 | new_a_states = agent_states 118 | 119 | actions = np.clip(actions, -1, 1) 120 | return actions, new_a_states 121 | 122 | 123 | class DQN(nn.Module): 124 | def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim): 125 | super(DQN, self).__init__() 126 | self.input_layer = nn.Linear(obs_dim + 32, 128) 127 | self.hidden1 = nn.Linear(128, 64) 128 | self.hidden2 = nn.Linear(64, 64) 129 | self.hidden3 = nn.Linear(64, 128) 130 | self.cnn = CNNLayer(task_dim, 32) 131 | self.output_layer1 = self.common(64, taskAction_dim) 132 | self.output_layer2 = self.common(64, aimAction_dim) 133 | 134 | def common(self, input_dim, action_dim): 135 | return nn.Sequential( 136 | nn.Linear(input_dim, 128), 137 | nn.ReLU(), 138 | self.hidden1, 139 | nn.ReLU(), 140 | self.hidden2, 141 | nn.ReLU(), 142 | nn.Linear(64, action_dim) 143 | ) 144 | 145 | def forward(self, x, task): 146 | """ 147 | 148 | :param x: batch_size*state_n 149 | :return: batch_size*actions_n 输出每个动作对应的q值 150 | """ 151 | # 任务卷积层 152 | cnn_out = self.cnn(task) 153 | x = torch.cat((x, cnn_out), -1) 154 | 155 | # 公共层 156 | x1 = F.relu(self.input_layer(x)) 157 | x2 = F.relu(self.hidden1(x1)) 158 | x3 = F.relu(self.hidden2(x2)) 159 | 160 | taskActionValue = self.output_layer1(x3) 161 | aimActionValue = self.output_layer2(x3) 162 | 163 | return taskActionValue, aimActionValue 164 | 165 | 166 | class CNNLayer(nn.Module): 167 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 168 | super(CNNLayer, self).__init__() 169 | 170 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 171 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 172 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 173 | 174 | def init_(m): # 权重使用正交初始化,激活函数使用relu 175 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 176 | 177 | input_channel = obs_shape[0] 178 | input_width = obs_shape[1] 179 | input_height = obs_shape[2] 180 | 181 | self.cnn = nn.Sequential( 182 | init_(nn.Conv2d(in_channels=input_channel, 183 | out_channels=hidden_size // 2, 184 | kernel_size=kernel_size, 185 | stride=stride) 186 | ), 187 | active_func, 188 | nn.Flatten(), 189 | init_(nn.Linear( 190 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 191 | hidden_size) 192 | ), 193 | active_func, 194 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 195 | 196 | def forward(self, x): 197 | x = x / 255.0 198 | x = self.cnn(x) 199 | 200 | return x 201 | 202 | 203 | def init(module, weight_init, bias_init, gain=1): 204 | weight_init(module.weight.data, gain=gain) 205 | bias_init(module.bias.data) 206 | return module 207 | -------------------------------------------------------------------------------- /experiment2/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle, createTime): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.size = np.random.uniform(0.2, 1) # Mb 18 | self.cycle = np.random.randint(20, 50) # cycle/bit 19 | 20 | self.max_time = 35 # ms 最大容忍时间 21 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 22 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 23 | 24 | self.rate = 0 # 当前速率 25 | 26 | self.compute_resource = 0 27 | 28 | self.create_time = createTime # 任务产生时间 29 | self.pick_time = 0 # 被选择的时间(出队列时间) 30 | 31 | # 完成该任务所消耗的cup资源 32 | self.energy = 0 33 | self.trans_time = 0 # 传输所需要的时间(实际) 34 | self.precess_time = 0 # 任务处理所需要的时间(实际) 35 | -------------------------------------------------------------------------------- /experiment2/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import model as model 5 | from env import Env 6 | import matplotlib.pyplot as plt 7 | 8 | if __name__ == '__main__': 9 | env = Env() 10 | env.reset() 11 | 12 | N = env.num_Vehicles 13 | vehicles = env.vehicles 14 | models = [] 15 | 16 | task_shape = np.array([vehicles[0].task_state]).shape 17 | for i in range(N): 18 | tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2) 19 | tgt_model.load_state_dict(torch.load( 20 | "D:\pycharm\Project\VML\MyErion\experiment2\\result\\2022-11-04-00-54\\vehicle{}.pkl".format(i))) 21 | models.append(tgt_model) 22 | 23 | # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32) 24 | # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32) 25 | # taskAction, aimAction = models[0](state_v, taskState_v) 26 | 27 | vehicleReward = [] 28 | averageReward = [] 29 | for step in range(1000): 30 | action1 = [] 31 | action2 = [] 32 | 33 | for i in range(N): 34 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 35 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 36 | taskAction, aimAction = models[i](state_v, taskState_v) 37 | 38 | # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 39 | # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 40 | taskAction = taskAction.detach().numpy().reshape(-1) 41 | aimAction = aimAction.detach().numpy().reshape(-1) 42 | action1.append(np.argmax(taskAction)) 43 | action2.append(np.argmax(aimAction)) 44 | 45 | print(action1) 46 | print(action2) 47 | other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2) 48 | vehicleReward.append(reward[1]) 49 | averageReward.append(Reward) 50 | print("第{}次车辆平均奖励{}".format(step, Reward)) 51 | 52 | fig, aix = plt.subplots(2, 1) 53 | aix[0].plot(range(len(vehicleReward)), vehicleReward) 54 | aix[1].plot(range(len(averageReward)), averageReward) 55 | plt.show() 56 | -------------------------------------------------------------------------------- /experiment2/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import random 3 | 4 | import numpy as np 5 | 6 | from memory import ExperienceBuffer 7 | from task import Task 8 | 9 | Dv = 50 # 车的最大通信范围 10 | Fv = 4000 # 车最大计算能力 MHZ 11 | MAX_TASK = 10 # 任务队列最大长度 12 | 13 | CAPACITY = 20000 # 缓冲池大小 14 | TASK_SOLT = 10 # 任务产生时隙 15 | 16 | np.random.seed(0) 17 | 18 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 19 | 20 | 21 | class Vehicle: 22 | # 位置:x,y 速度、方向:-1左,1右 23 | def __init__(self, id, position, direction, velocity=20): 24 | self.id = id 25 | # 车的位置信息 26 | self.loc_x = position[0] 27 | self.loc_y = position[1] 28 | self.position = position 29 | self.velocity = velocity # m/s 30 | self.direction = direction 31 | # 通信范围 32 | self.range = Dv 33 | # 邻居表 34 | self.neighbor = [] 35 | # mec 36 | self.Mec = None 37 | # 当前时间 38 | self.cur_frame = 0 39 | # 接受的任务的列表 40 | self.accept_task = [] 41 | # 接受任务的数量 42 | self.sum_needDeal_task = 0 43 | # 此时刻有多少动作选则我 44 | self.len_action = 0 45 | # 当前可用资源 46 | self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2) # MHz 47 | # 表示当前是否有任务正在传输(0:没有,1:有) 48 | self.trans_task = 0 49 | # 当前处理的任务(用于计算奖励,不用于状态信息) 50 | self.cur_task = None 51 | # 任务队列 52 | self.total_task = [] 53 | # 任务队列的长度 54 | self.len_task = len(self.total_task) 55 | 56 | # 当前状态信息 57 | self.otherState = [] 58 | # 当前任务队列状态 59 | self.taskState = [] 60 | # 去除邻居的状态信息用于邻居车观察和全局critic的处理 61 | self.excludeNeighbor_state = [] 62 | # 缓冲池 63 | self.buffer = ExperienceBuffer(capacity=CAPACITY) 64 | # 总奖励 65 | self.reward = [] 66 | # 任务溢出的数量 67 | self.overflow = 0 68 | # 上一个任务产生的时间 69 | self.lastCreatWorkTime = 0 70 | 71 | self.create_work() 72 | 73 | # 获得位置 74 | @property 75 | def get_location(self): 76 | return self.position 77 | 78 | # 设置位置 79 | def set_location(self, loc_x, loc_y): 80 | self.loc_x = loc_x 81 | self.loc_y = loc_y 82 | self.position = [self.loc_x, self.loc_y] 83 | 84 | # 获得x 85 | @property 86 | def get_x(self): 87 | return self.loc_x 88 | 89 | # 获得y 90 | @property 91 | def get_y(self): 92 | return self.loc_y 93 | 94 | # 产生任务 传入当前时间 95 | def create_work(self): 96 | if self.id % 3 == 0: 97 | return 98 | # 每隔一段时间进行一次任务产生 99 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 100 | # 每次有0.6的概率产生任务 101 | if random.random() < 0.6: 102 | if self.len_task < MAX_TASK: # 队列不满 103 | task = Task(self, self.cur_frame) 104 | self.lastCreatWorkTime = self.cur_frame 105 | self.total_task.append(task) 106 | self.len_task += 1 107 | print("第{}辆车产生了任务".format(self.id)) 108 | self.overflow = 0 109 | else: 110 | print("第{}辆车任务队列已满".format(self.id)) 111 | self.overflow += 1 112 | 113 | """ 114 | 获得状态 115 | """ 116 | 117 | def get_state(self): 118 | self.otherState = [] 119 | self.excludeNeighbor_state = [] 120 | self.taskState = [] 121 | 122 | # 位置信息 4 123 | self.otherState.extend(self.position) 124 | self.otherState.append(self.velocity) 125 | self.otherState.append(direction_map.get(self.direction)) 126 | self.excludeNeighbor_state.extend(self.position) 127 | self.excludeNeighbor_state.append(self.velocity) 128 | self.excludeNeighbor_state.append(direction_map.get(self.direction)) 129 | 130 | # 资源信息(可用资源) 131 | self.otherState.append(self.resources) 132 | self.excludeNeighbor_state.append(self.resources) 133 | 134 | # 当前处理的任务量 135 | self.otherState.append(self.sum_needDeal_task) 136 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 137 | # 当前接受传输的任务量 138 | self.otherState.append(self.len_action) 139 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 140 | 141 | # 当前是否有任务在传输 142 | self.excludeNeighbor_state.append(self.trans_task) 143 | self.otherState.append(self.trans_task) 144 | 145 | # 正在传输的任务信息 146 | # if self.trans_task is not None: 147 | # self.otherState.append(self.trans_task.need_trans_size) 148 | # self.excludeNeighbor_state.append(self.trans_task.need_trans_size) 149 | # else: 150 | # self.otherState.append(0) 151 | # self.excludeNeighbor_state.append(0) 152 | 153 | # 当前队列长度 154 | self.otherState.append(self.len_task) 155 | self.excludeNeighbor_state.append(self.len_task) 156 | 157 | # 邻居表 7*数量 158 | for neighbor in self.neighbor: 159 | self.otherState.extend(neighbor.position) # 位置 160 | self.otherState.append(neighbor.velocity) # 速度 161 | self.otherState.append(direction_map.get(neighbor.direction)) # 方向 162 | self.otherState.append(neighbor.resources) # 可用资源 163 | self.otherState.append(neighbor.sum_needDeal_task) # 处理任务长度 164 | self.otherState.append(neighbor.len_action) # 当前正在传输任务数量 165 | 166 | self.otherState.extend(self.Mec.state) 167 | 168 | # 任务状态信息 169 | for i in range(MAX_TASK): 170 | if i < self.len_task: 171 | task = self.total_task[i] 172 | self.taskState.append([task.need_trans_size, task.need_precess_cycle, task.max_time]) 173 | else: 174 | self.taskState.append([0, 0, 0]) 175 | 176 | return self.excludeNeighbor_state 177 | -------------------------------------------------------------------------------- /experiment3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境3 3 | (两个动作:选择任务和选择对象) 4 | 加入了mec和车在时隙内处理任务的上限(mec最多同时处理10个任务、车最多处理5个任务) 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | """ 7 | import ptan 8 | import numpy as np 9 | import torch 10 | from torch.distributions.categorical import Categorical 11 | from env import Env 12 | 13 | 14 | def test_net(nets, env: Env, count=10): 15 | rewards = 0.0 16 | steps = 0 17 | for _ in range(count): 18 | env.reset() 19 | while steps < 1000: 20 | action = [] 21 | with torch.no_grad(): 22 | for vehicle in env.vehicles: 23 | state = torch.tensor(vehicle.self_state) 24 | _, pro = nets[vehicle.id](state) 25 | act = Categorical.sample(pro) 26 | action.append(act.item()) 27 | _, _, reward, _ = env.step(action) 28 | rewards += reward 29 | steps += 1 30 | return rewards / count, steps / count 31 | 32 | # def calc_logprob(pro_v, actions_v): 33 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 34 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 35 | # return p1 + p2 36 | -------------------------------------------------------------------------------- /experiment3/dqn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | from collections import namedtuple 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | from pylab import mpl 13 | import netron 14 | 15 | from env import Env 16 | from model import DQN 17 | 18 | # 设置显示中文字体 19 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 20 | matplotlib.rcParams['axes.unicode_minus'] = False 21 | 22 | Experience = namedtuple('Transition', 23 | field_names=['cur_otherState', 'cur_TaskState', # 状态 24 | 'taskAction', 'aimAction', # 动作 25 | 'reward', # 奖励 26 | 'next_otherState', 'next_TaskState']) # Define a transition tuple 27 | GAMMA = 0.99 28 | BATCH_SIZE = 64 29 | REPLAY_SIZE = 10000 30 | LEARNING_RATE = 1e-4 31 | SYNC_TARGET_FRAMES = 1000 32 | 33 | EPSILON_DECAY_LAST_FRAME = 150000 34 | EPSILON_START = 0.6 35 | EPSILON_FINAL = 0.01 36 | 37 | RESET = 100000 # 重置游戏次数 38 | 39 | MAX_TASK = 10 # 任务队列最大长度 40 | 41 | momentum = 0.005 42 | 43 | RESOURCE = [0.2, 0.4, 0.6, 0.8] 44 | 45 | 46 | @torch.no_grad() 47 | def play_step(env, epsilon, models): 48 | vehicles = env.vehicles 49 | old_otherState = [] 50 | old_taskState = [] 51 | 52 | actionTask = [] 53 | actionAim = [] 54 | # 贪心选择动作 55 | for i, model in enumerate(models): 56 | old_otherState.append(vehicles[i].self_state) 57 | old_taskState.append(vehicles[i].task_state) 58 | if np.random.random() < epsilon: 59 | # 随机动作 60 | actionTask.append(np.random.randint(0, 10)) 61 | actionAim.append(np.random.randint(0, 7)) # local+mec+neighbor 62 | else: 63 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 64 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 65 | taskAction, aimAction = model(state_v, taskState_v) 66 | 67 | taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 68 | aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 69 | 70 | actionAim.append(np.argmax(aimAction)) 71 | actionTask.append(np.argmax(taskAction)) 72 | # print("action:", action) 73 | _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim) 74 | # print("reward:", reward) 75 | 76 | # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作,下一其他状态、下一任务状态】 77 | for i, vehicle in enumerate(vehicles): 78 | exp = Experience(old_otherState[i], [old_taskState[i]], 79 | actionTask[i], actionAim[i], 80 | reward[i], 81 | otherState[i], [taskState[i]]) 82 | vehicle.buffer.append(exp) 83 | return round(Reward, 2) # 返回总的平均奖励 84 | 85 | 86 | # 计算一个智能体的损失 87 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"): 88 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch # 89 | 90 | otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device) 91 | taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device) 92 | # print("states_v:", states_v) # batch状态 93 | taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device) 94 | aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device) 95 | # print("actions_v", actions_v) # batch动作 96 | rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device) 97 | # print("rewards_v", rewards_v) # batch奖励 98 | next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device) 99 | next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device) 100 | # print("next_states_v", next_states_v) # batch下一个状态 101 | 102 | # 计算当前网络q值 103 | taskActionValues, aimActionValues = net(otherStates_v, 104 | taskStates_v) # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 105 | taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1) 106 | aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 107 | 108 | # 计算目标网络q值 109 | next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v, 110 | next_taskStates_v) # .max(1)[0] # 得到最大的q值 111 | 112 | next_taskActionValues = next_taskActionValues.max(1)[0].detach() 113 | next_aimActionValues = next_aimActionValues.max(1)[0].detach() 114 | 115 | # 防止梯度流入用于计算下一状态q近似值得NN 116 | # next_states_values = next_aimActionValues.detach() 117 | # print("next_states_values", next_states_values) 118 | expected_aim_values = next_aimActionValues * GAMMA + rewards_v 119 | expected_task_values = next_taskActionValues * GAMMA + rewards_v 120 | # print(" expected_state_values", expected_state_values) 121 | 122 | return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values) 123 | 124 | 125 | if __name__ == '__main__': 126 | env = Env() 127 | env.reset() 128 | 129 | frame_idx = 0 130 | # writer = SummaryWriter(comment="-" + env.__doc__) 131 | agents = env.vehicles 132 | models = [] 133 | tgt_models = [] 134 | optimizers = [] 135 | for agent in agents: 136 | # print(agent.get_location, agent.velocity) 137 | task_shape = np.array([agent.task_state]).shape 138 | # print(task_shape) 139 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 140 | models.append(model) 141 | optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum) 142 | optimizers.append(optimer) 143 | for agent in agents: 144 | # print(agent.get_location, agent.velocity) 145 | task_shape = np.array([agent.task_state]).shape 146 | # print(task_shape) 147 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 148 | model.load_state_dict(models[agent.id].state_dict()) 149 | tgt_models.append(model) 150 | 151 | # 打印网络结构 152 | model = models[0] 153 | state_v = torch.tensor([env.vehicles[0].otherState], dtype=torch.float32) 154 | taskState_v = torch.tensor([[env.vehicles[0].taskState]], dtype=torch.float32) 155 | # 针对有网络模型,但还没有训练保存 .pth 文件的情况 156 | modelpath = "./netStruct/demo.onnx" # 定义模型结构保存的路径 157 | torch.onnx.export(model, (state_v, taskState_v), modelpath) # 导出并保存 158 | netron.start(modelpath) 159 | 160 | total_reward = [] 161 | recent_reward = [] 162 | loss_task_list = [] 163 | loss_aim_list = [] 164 | reward_1 = [] 165 | 166 | epsilon = EPSILON_START 167 | eliposde = 500000 168 | while eliposde > 0: 169 | # 重置游戏 170 | # if frame_idx % RESET == 0: 171 | # print("游戏重置") 172 | # env.reset() 173 | # agents = env.vehicles 174 | 175 | frame_idx += 1 176 | print("the {} steps".format(frame_idx)) 177 | epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME) 178 | reward = play_step(env, epsilon, models) 179 | total_reward.append(reward) 180 | print("current reward:", reward) 181 | print("current 100 times total rewards:", np.mean(total_reward[-100:])) 182 | recent_reward.append(np.mean(total_reward[-100:])) 183 | if np.mean(total_reward[-100:]) > 0.7: 184 | break 185 | 186 | for i, agent in enumerate(agents): 187 | # print("length of {} buffer".format(agent.id), len(agent.buffer)) 188 | if len(agent.buffer) < REPLAY_SIZE: # 缓冲池要足够大 189 | continue 190 | if frame_idx % SYNC_TARGET_FRAMES == 0: # 更新目标网络 191 | tgt_models[i].load_state_dict(models[i].state_dict()) 192 | optimizers[i].zero_grad() 193 | batch = agent.buffer.sample(BATCH_SIZE) 194 | loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i]) 195 | # print("loss:", loss_task, " ", loss_aim) 196 | # loss_t.backward() 197 | torch.autograd.backward([loss_task, loss_aim]) 198 | optimizers[i].step() 199 | if agent.id == 0: 200 | # print("cur_loss:", loss_task.item()) 201 | # print("cur_aim_loss", loss_aim.item()) 202 | loss_task_list.append(loss_task.item()) 203 | loss_aim_list.append(loss_aim.item()) 204 | reward_1.append(env.reward[0]) 205 | eliposde -= 1 206 | 207 | cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time())) 208 | # 创建文件夹 209 | os.makedirs("D:/pycharm/Project/VML/MyErion/experiment3/result/" + cur_time) 210 | for i, vehicle in enumerate(env.vehicles): 211 | # 保存每个网络模型 212 | torch.save(tgt_models[i].state_dict(), 213 | "D:/pycharm/Project/VML/MyErion/experiment3/result/" + cur_time + "/vehicle" + str(i) + ".pkl") 214 | 215 | plt.plot(range(len(recent_reward)), recent_reward) 216 | plt.title("奖励曲线") 217 | plt.show() 218 | 219 | plt.plot(range(len(loss_task_list)), loss_task_list) 220 | plt.title("任务选择损失曲线") 221 | plt.show() 222 | 223 | plt.plot(range(len(loss_aim_list)), loss_aim_list) 224 | plt.title("目标选择损失曲线") 225 | plt.show() 226 | 227 | plt.plot(range(1000), reward_1[-1000:]) 228 | plt.title("车辆一奖励曲线") 229 | plt.show() 230 | -------------------------------------------------------------------------------- /experiment3/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | print() 7 | env = Env() 8 | env.reset() 9 | 10 | # 测试网络节点数 11 | task = np.array(env.taskState) 12 | print(task.shape) 13 | vehicles = env.vehicles 14 | 15 | for vehicle in vehicles: 16 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 17 | print("该车邻居:") 18 | for i in vehicle.neighbor: 19 | print(i.id, end=" ") 20 | print() 21 | 22 | # 测试环境运行 23 | x = [[] for i in range(40)] 24 | y = [[] for i in range(40)] 25 | for i in range(1000): 26 | for j in range(40): 27 | x[j].append(env.vehicles[j].position[0]) 28 | y[j].append(env.vehicles[j].position[1]) 29 | action1 = [] 30 | action2 = [] 31 | action3 = [] 32 | for j in range(40): 33 | # action1.append(np.random.randint(0, 10)) 34 | action1.append(0) 35 | # action2.append(np.random.randint(0, 7)) 36 | action2.append(0) 37 | # action3.append(round(np.random.random(), 2)) 38 | action3.append(0.8) 39 | other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2) 40 | print("第{}次平均奖励{}".format(i, Reward)) 41 | # print("当前状态:", state) 42 | # print("下一状态:", next_state) 43 | # print("车状态:", vehicleState) 44 | # print("任务状态", taskState) 45 | # print("当前奖励:", reward) 46 | # print("每个奖励,", vehicleReward) 47 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 48 | # print("average reward:", env.Reward) 49 | plt.figure(figsize=(100, 100)) 50 | fix, ax = plt.subplots(5, 4) 51 | 52 | for i in range(5): 53 | for j in range(4): 54 | number = i * 4 + j 55 | ax[i, j].plot(x[number], y[number]) 56 | ax[i, j].set_title('vehicle {}'.format(number)) 57 | plt.show() 58 | -------------------------------------------------------------------------------- /experiment3/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 200 # MEC通信范围 4 | RESOURCE = 20000 # 可用资源 MHz 5 | MAX_QUEUE = 10 6 | 7 | 8 | # 边缘服务器 9 | class MEC: 10 | def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE): 11 | self.loc_x = position[0] 12 | self.loc_y = position[1] 13 | self.loc = position 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息(最多同时处理10个任务) 20 | self.accept_task = [] 21 | # 最多处理任务量 22 | self.max_task = 5 23 | # 接受任务的数量 24 | self.sum_needDeal_task = 0 25 | # 此时刻有多少动作选则我 多少任务选择传输给我 26 | self.len_action = 0 27 | # 等待计算的任务队列(理解为挂起状态) 28 | self.task_queue = [] 29 | # 用于奖励计算的任务队列 30 | self.task_queue_for_reward = [] 31 | # 队列最长长度 32 | self.max_queue = max_queue 33 | # 当前状态 34 | self.get_state() 35 | 36 | @property 37 | def get_x(self): 38 | return self.loc_x 39 | 40 | @property 41 | def get_y(self): 42 | return self.loc_y 43 | 44 | @property 45 | def get_location(self): 46 | return self.loc 47 | 48 | """ 49 | 获得状态 50 | """ 51 | 52 | def get_state(self): 53 | """ 54 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 55 | """ 56 | self.state = [] 57 | self.state.extend(self.loc) 58 | self.state.append(self.sum_needDeal_task) 59 | self.state.append(self.len_action) 60 | self.state.append(self.resources) 61 | return self.state 62 | -------------------------------------------------------------------------------- /experiment3/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['state', 'action', 'reward', 'next_state']) # Define a transition tuple 9 | 10 | 11 | class ReplayMemory(object): # Define a replay memory 12 | 13 | # 初始化缓冲池 14 | def __init__(self, capacity): 15 | # 最大容量 16 | self.capacity = capacity 17 | # 缓冲池经验 18 | self.memory = [] 19 | # ? 20 | self.position = 0 21 | 22 | # 存入经验 23 | def push(self, *args): 24 | if len(self.memory) < self.capacity: 25 | self.memory.append(None) 26 | # 存入经验 27 | self.memory[self.position] = Experience(*args) 28 | # 记录最新经验所在位置 29 | self.position = (self.position + 1) % self.capacity 30 | 31 | # 采样 32 | def sample(self, batch_size): 33 | return sample(self.memory, batch_size) 34 | 35 | def __len__(self): 36 | return len(self.memory) 37 | 38 | 39 | class ExperienceBuffer: 40 | def __init__(self, capacity): 41 | self.maxLen = capacity 42 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 43 | 44 | def __len__(self): 45 | return len(self.buffer) 46 | 47 | def append(self, experience: Experience): 48 | self.buffer.append(experience) 49 | 50 | def sample(self, batch_size): 51 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 52 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip( 53 | *[self.buffer[idx] for idx in indices]) 54 | # 转换成numpy 55 | return np.array(cur_otherState), np.array(cur_TaskState), \ 56 | np.array(taskAction), np.array(aimAction), \ 57 | np.array(rewards, dtype=np.float32), \ 58 | np.array(next_otherState), np.array(next_TaskState) 59 | 60 | # 清空 61 | def clear(self): 62 | self.buffer = collections.deque(maxlen=self.maxLen) 63 | -------------------------------------------------------------------------------- /experiment3/model.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.distributions.categorical import Categorical 7 | 8 | HID_SIZE = 64 9 | 10 | 11 | class ModelActor(nn.Module): 12 | def __init__(self, obs_dim, act_dim, task_dim): 13 | super(ModelActor, self).__init__() 14 | self.act_dim = act_dim 15 | 16 | self.mu = nn.Sequential( 17 | nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2), 18 | nn.ReLU(), 19 | nn.Linear(HID_SIZE * 2, HID_SIZE), 20 | nn.ReLU(), 21 | nn.Linear(HID_SIZE, act_dim * 2), 22 | ) 23 | self.cnn = CNNLayer(task_dim, HID_SIZE) 24 | 25 | def forward(self, x, task): 26 | cnn_out = self.cnn(task) 27 | x = torch.cat((x, cnn_out), -1) 28 | out = self.mu(x) 29 | action_out = out[:, :self.act_dim] 30 | task_out = out[:, self.act_dim:] 31 | action_pro = F.softmax(action_out, dim=-1) 32 | task_pro = F.softmax(task_out, dim=1) 33 | print(action_pro) 34 | print(task_pro) 35 | return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro) 36 | 37 | 38 | class ModelCritic(nn.Module): 39 | def __init__(self, obs_size): 40 | super(ModelCritic, self).__init__() 41 | 42 | self.value = nn.Sequential( 43 | nn.Linear(obs_size, HID_SIZE), 44 | nn.ReLU(), 45 | nn.Linear(HID_SIZE, HID_SIZE), 46 | nn.ReLU(), 47 | nn.Linear(HID_SIZE, 1), 48 | ) 49 | 50 | def forward(self, x): 51 | return self.value(x) 52 | 53 | 54 | class ModelSACTwinQ(nn.Module): 55 | def __init__(self, obs_size, act_size): 56 | super(ModelSACTwinQ, self).__init__() 57 | 58 | self.q1 = nn.Sequential( 59 | nn.Linear(obs_size + act_size, HID_SIZE), 60 | nn.ReLU(), 61 | nn.Linear(HID_SIZE, HID_SIZE), 62 | nn.ReLU(), 63 | nn.Linear(HID_SIZE, 1), 64 | ) 65 | 66 | self.q2 = nn.Sequential( 67 | nn.Linear(obs_size + act_size, HID_SIZE), 68 | nn.ReLU(), 69 | nn.Linear(HID_SIZE, HID_SIZE), 70 | nn.ReLU(), 71 | nn.Linear(HID_SIZE, 1), 72 | ) 73 | 74 | def forward(self, obs, act): 75 | x = torch.cat([obs, act], dim=1) 76 | return self.q1(x), self.q2(x) 77 | 78 | 79 | class AgentDDPG(ptan.agent.BaseAgent): 80 | """ 81 | Agent implementing Orstein-Uhlenbeck exploration process 82 | """ 83 | 84 | def __init__(self, net, device="cpu", ou_enabled=True, 85 | ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2, 86 | ou_epsilon=1.0): 87 | self.net = net 88 | self.device = device 89 | self.ou_enabled = ou_enabled 90 | self.ou_mu = ou_mu 91 | self.ou_teta = ou_teta 92 | self.ou_sigma = ou_sigma 93 | self.ou_epsilon = ou_epsilon 94 | 95 | def initial_state(self): 96 | return None 97 | 98 | def __call__(self, states, agent_states): 99 | states_v = ptan.agent.float32_preprocessor(states) 100 | states_v = states_v.to(self.device) 101 | mu_v = self.net(states_v) 102 | actions = mu_v.data.cpu().numpy() 103 | 104 | if self.ou_enabled and self.ou_epsilon > 0: 105 | new_a_states = [] 106 | for a_state, action in zip(agent_states, actions): 107 | if a_state is None: 108 | a_state = np.zeros( 109 | shape=action.shape, dtype=np.float32) 110 | a_state += self.ou_teta * (self.ou_mu - a_state) 111 | a_state += self.ou_sigma * np.random.normal( 112 | size=action.shape) 113 | 114 | action += self.ou_epsilon * a_state 115 | new_a_states.append(a_state) 116 | else: 117 | new_a_states = agent_states 118 | 119 | actions = np.clip(actions, -1, 1) 120 | return actions, new_a_states 121 | 122 | 123 | class DQN(nn.Module): 124 | def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim): 125 | super(DQN, self).__init__() 126 | self.input_layer = nn.Linear(obs_dim + 32, 128) 127 | self.hidden1 = nn.Linear(128, 64) 128 | self.hidden2 = nn.Linear(64, 64) 129 | self.hidden3 = nn.Linear(64, 128) 130 | self.cnn = CNNLayer(task_dim, 32) 131 | self.output_layer1 = self.common(64, taskAction_dim) 132 | self.output_layer2 = self.common(64, aimAction_dim) 133 | 134 | def common(self, input_dim, action_dim): 135 | return nn.Sequential( 136 | nn.Linear(input_dim, 128), 137 | nn.ReLU(), 138 | self.hidden1, 139 | nn.ReLU(), 140 | self.hidden2, 141 | nn.ReLU(), 142 | nn.Linear(64, action_dim) 143 | ) 144 | 145 | def forward(self, x, task): 146 | """ 147 | 148 | :param x: batch_size*state_n 149 | :return: batch_size*actions_n 输出每个动作对应的q值 150 | """ 151 | # 任务卷积层 152 | cnn_out = self.cnn(task) 153 | x = torch.cat((x, cnn_out), -1) 154 | 155 | # 公共层 156 | x1 = F.relu(self.input_layer(x)) 157 | x2 = F.relu(self.hidden1(x1)) 158 | x3 = F.relu(self.hidden2(x2)) 159 | 160 | taskActionValue = self.output_layer1(x3) 161 | aimActionValue = self.output_layer2(x3) 162 | 163 | return taskActionValue, aimActionValue 164 | 165 | 166 | class CNNLayer(nn.Module): 167 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 168 | super(CNNLayer, self).__init__() 169 | 170 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 171 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 172 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 173 | 174 | def init_(m): # 权重使用正交初始化,激活函数使用relu 175 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 176 | 177 | input_channel = obs_shape[0] 178 | input_width = obs_shape[1] 179 | input_height = obs_shape[2] 180 | 181 | self.cnn = nn.Sequential( 182 | init_(nn.Conv2d(in_channels=input_channel, 183 | out_channels=hidden_size // 2, 184 | kernel_size=kernel_size, 185 | stride=stride) 186 | ), 187 | active_func, 188 | nn.Flatten(), 189 | init_(nn.Linear( 190 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 191 | hidden_size) 192 | ), 193 | active_func, 194 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 195 | 196 | def forward(self, x): 197 | x = x / 255.0 198 | x = self.cnn(x) 199 | 200 | return x 201 | 202 | 203 | def init(module, weight_init, bias_init, gain=1): 204 | weight_init(module.weight.data, gain=gain) 205 | bias_init(module.bias.data) 206 | return module 207 | -------------------------------------------------------------------------------- /experiment3/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle=None, createTime=0): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.max_time = 30 # ms 最大容忍时间 18 | self.size = np.random.uniform(0.2, 1) # Mb 19 | self.cycle = np.random.randint(20, 50) # cycle/bit 20 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 21 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 22 | self.need_time = 0 # 需要计算时间 23 | self.hold_time = 0 # 任务在计算等待队列中得等待时间 24 | 25 | self.rate = 0 # 当前速率 26 | 27 | self.compute_resource = 0 28 | 29 | self.create_time = createTime # 任务产生时间 30 | self.pick_time = 0 # 被选择的时间(出队列时间) 31 | 32 | # 完成该任务所消耗的cup资源 33 | self.energy = 0 34 | self.trans_time = 0 # 传输所需要的时间(实际) 35 | self.precess_time = 0 # 任务处理所需要的时间(实际) 36 | -------------------------------------------------------------------------------- /experiment3/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import matplotlib 5 | from pylab import mpl 6 | import model 7 | from env import Env 8 | import matplotlib.pyplot as plt 9 | 10 | # 设置显示中文字体 11 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 12 | matplotlib.rcParams['axes.unicode_minus'] = False 13 | 14 | if __name__ == '__main__': 15 | env = Env() 16 | env.reset() 17 | 18 | N = env.num_Vehicles 19 | vehicles = env.vehicles 20 | models = [] 21 | 22 | task_shape = np.array([vehicles[0].task_state]).shape 23 | for i in range(N): 24 | tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2) 25 | tgt_model.load_state_dict(torch.load( 26 | "D:\pycharm\Project\VML\MyErion\experiment3\\result\\2022-11-07-18-40\\vehicle{}.pkl".format(i))) 27 | models.append(tgt_model) 28 | 29 | # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32) 30 | # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32) 31 | # taskAction, aimAction = models[0](state_v, taskState_v) 32 | 33 | vehicleReward = [] 34 | averageReward = [] 35 | for step in range(1000): 36 | action1 = [] 37 | action2 = [] 38 | 39 | for i in range(N): 40 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 41 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 42 | taskAction, aimAction = models[i](state_v, taskState_v) 43 | 44 | # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 45 | # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 46 | taskAction = taskAction.detach().numpy().reshape(-1) 47 | aimAction = aimAction.detach().numpy().reshape(-1) 48 | action1.append(np.argmax(taskAction)) 49 | action2.append(np.argmax(aimAction)) 50 | 51 | print(action1) 52 | print(action2) 53 | other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2) 54 | vehicleReward.append(reward[1]) 55 | averageReward.append(Reward) 56 | print("第{}次车辆平均奖励{}".format(step, Reward)) 57 | 58 | fig, aix = plt.subplots(2, 1) 59 | aix[0].plot(range(len(vehicleReward)), vehicleReward) 60 | aix[1].plot(range(len(averageReward)), averageReward) 61 | plt.show() 62 | -------------------------------------------------------------------------------- /experiment3/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import random 3 | 4 | import numpy as np 5 | 6 | from memory import ExperienceBuffer 7 | from task import Task 8 | 9 | Dv = 50 # 车的最大通信范围 10 | Fv = 4000 # 车最大计算能力 MHZ 11 | MAX_TASK = 10 # 任务队列最大长度 12 | 13 | CAPACITY = 20000 # 缓冲池大小 14 | TASK_SOLT = 10 # 任务产生时隙 15 | 16 | # 等待队列最长长度 17 | MAX_QUEUE = 10 18 | 19 | np.random.seed(0) 20 | 21 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 22 | 23 | 24 | class Vehicle: 25 | # 位置:x,y 速度、方向:-1左,1右 26 | def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE): 27 | self.id = id 28 | # 车的位置信息 29 | self.loc_x = position[0] 30 | self.loc_y = position[1] 31 | self.position = position 32 | self.velocity = velocity # m/s 33 | self.direction = direction 34 | # 通信范围 35 | self.range = Dv 36 | # 邻居表 37 | self.neighbor = [] 38 | # mec 39 | self.Mec = None 40 | # 当前时间 41 | self.cur_frame = 0 42 | # 接受的任务的列表(最多同时处理5个任务) 43 | self.accept_task = [] 44 | # 最多处理任务量 45 | self.max_task = 3 46 | # 等待队列最长长度 47 | self.max_queue = max_queue 48 | # 等待计算的任务队列(理解为挂起状态) 49 | self.task_queue = [] 50 | # 用于奖励计算的任务队列 51 | self.task_queue_for_reward = [] 52 | # 接受任务的数量 53 | self.sum_needDeal_task = 0 54 | # 此时刻有多少动作选则我 55 | self.len_action = 0 56 | # 当前可用资源 57 | self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2) # MHz 58 | # 表示当前是否有任务正在传输(0:没有,1:有) 59 | self.trans_task = 0 60 | # 当前处理的任务(用于计算奖励,不用于状态信息) 61 | self.cur_task = None 62 | # 任务队列 63 | self.total_task = [] 64 | # 任务队列的长度 65 | self.len_task = len(self.total_task) 66 | 67 | # 当前状态信息 68 | self.otherState = [] 69 | # 当前任务队列状态 70 | self.taskState = [] 71 | # 去除邻居的状态信息用于邻居车观察和全局critic的处理 72 | self.excludeNeighbor_state = [] 73 | # 缓冲池 74 | self.buffer = ExperienceBuffer(capacity=CAPACITY) 75 | # 总奖励 76 | self.reward = [] 77 | # 任务溢出的数量 78 | self.overflow = 0 79 | # 上一个任务产生的时间 80 | self.lastCreatWorkTime = 0 81 | 82 | # 产生任务 83 | self.create_work() 84 | 85 | # 获得位置 86 | @property 87 | def get_location(self): 88 | return self.position 89 | 90 | # 设置位置 91 | def set_location(self, loc_x, loc_y): 92 | self.loc_x = loc_x 93 | self.loc_y = loc_y 94 | self.position = [self.loc_x, self.loc_y] 95 | 96 | # 获得x 97 | @property 98 | def get_x(self): 99 | return self.loc_x 100 | 101 | # 获得y 102 | @property 103 | def get_y(self): 104 | return self.loc_y 105 | 106 | # 产生任务 传入当前时间 107 | def create_work(self): 108 | if self.id % 3 == 0: 109 | return 110 | # 每隔一段时间进行一次任务产生 111 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 112 | # 每次有0.6的概率产生任务 113 | if random.random() < 0.6: 114 | if self.len_task < MAX_TASK: # 队列不满 115 | task = Task(self, self.cur_frame) 116 | self.lastCreatWorkTime = self.cur_frame 117 | self.total_task.append(task) 118 | self.len_task += 1 119 | # print("第{}辆车产生了任务".format(self.id)) 120 | self.overflow = 0 121 | else: 122 | # print("第{}辆车任务队列已满".format(self.id)) 123 | self.overflow = 1 124 | 125 | """ 126 | 获得状态 127 | """ 128 | 129 | def get_state(self): 130 | self.otherState = [] 131 | self.excludeNeighbor_state = [] 132 | self.taskState = [] 133 | 134 | # 位置信息 4 135 | self.otherState.extend(self.position) 136 | self.otherState.append(self.velocity) 137 | self.otherState.append(direction_map.get(self.direction)) 138 | self.excludeNeighbor_state.extend(self.position) 139 | self.excludeNeighbor_state.append(self.velocity) 140 | self.excludeNeighbor_state.append(direction_map.get(self.direction)) 141 | 142 | # 资源信息(可用资源) 143 | self.otherState.append(self.resources) 144 | self.excludeNeighbor_state.append(self.resources) 145 | 146 | # 当前处理的任务量 147 | self.otherState.append(self.sum_needDeal_task) 148 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 149 | # 当前接受传输的任务量 150 | self.otherState.append(self.len_action) 151 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 152 | 153 | # 当前是否有任务在传输 154 | self.excludeNeighbor_state.append(self.trans_task) 155 | self.otherState.append(self.trans_task) 156 | 157 | # 正在传输的任务信息 158 | # if self.trans_task is not None: 159 | # self.otherState.append(self.trans_task.need_trans_size) 160 | # self.excludeNeighbor_state.append(self.trans_task.need_trans_size) 161 | # else: 162 | # self.otherState.append(0) 163 | # self.excludeNeighbor_state.append(0) 164 | 165 | # 当前队列长度 166 | self.otherState.append(self.len_task) 167 | self.excludeNeighbor_state.append(self.len_task) 168 | 169 | # 邻居表 7*数量 170 | for neighbor in self.neighbor: 171 | self.otherState.extend(neighbor.position) # 位置 172 | self.otherState.append(neighbor.velocity) # 速度 173 | self.otherState.append(direction_map.get(neighbor.direction)) # 方向 174 | self.otherState.append(neighbor.resources) # 可用资源 175 | self.otherState.append(neighbor.sum_needDeal_task) # 处理任务长度 176 | self.otherState.append(neighbor.len_action) # 当前正在传输任务数量 177 | 178 | self.otherState.extend(self.Mec.state) 179 | 180 | # 任务状态信息 181 | for i in range(MAX_TASK): 182 | if i < self.len_task: 183 | task = self.total_task[i] 184 | self.taskState.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 185 | else: 186 | self.taskState.append([0, 0, 0, 0]) 187 | 188 | return self.excludeNeighbor_state 189 | -------------------------------------------------------------------------------- /experiment4/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境4 3 | (两个动作:选择任务和选择对象) 4 | 加入了mec和车在时隙内处理任务的上限(mec最多同时处理10个任务、车最多处理5个任务) 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | 为mec卸载和车辆卸载提供两种传输方式(即可同时像车辆和mec传输任务) 7 | """ 8 | import ptan 9 | import numpy as np 10 | import torch 11 | from torch.distributions.categorical import Categorical 12 | from env import Env 13 | 14 | 15 | def test_net(nets, env: Env, count=10): 16 | rewards = 0.0 17 | steps = 0 18 | for _ in range(count): 19 | env.reset() 20 | while steps < 1000: 21 | action = [] 22 | with torch.no_grad(): 23 | for vehicle in env.vehicles: 24 | state = torch.tensor(vehicle.self_state) 25 | _, pro = nets[vehicle.id](state) 26 | act = Categorical.sample(pro) 27 | action.append(act.item()) 28 | _, _, reward, _ = env.step(action) 29 | rewards += reward 30 | steps += 1 31 | return rewards / count, steps / count 32 | 33 | # def calc_logprob(pro_v, actions_v): 34 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 35 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 36 | # return p1 + p2 37 | -------------------------------------------------------------------------------- /experiment4/dqn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | from collections import namedtuple 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import matplotlib.font_manager as fm 9 | from matplotlib.ticker import FuncFormatter 10 | import numpy as np 11 | import torch 12 | import torch.nn as nn 13 | import torch.optim as optim 14 | from pylab import mpl 15 | import netron 16 | from matplotlib import rcParams 17 | 18 | from env import Env 19 | from model import DQN 20 | 21 | np.random.seed(2) 22 | 23 | # 设置显示中文字体 24 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 25 | matplotlib.rcParams['axes.unicode_minus'] = False 26 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 27 | # 加载 Times New Roman 字体 28 | font_path = 'C:/Windows/Fonts/times.ttf' 29 | prop = fm.FontProperties(fname=font_path, size=10) 30 | # 设置全局字体为Times New Roman 31 | rcParams['font.family'] = 'Times New Roman' 32 | 33 | Experience = namedtuple('Transition', 34 | field_names=['cur_otherState', 'cur_TaskState', # 状态 35 | 'taskAction', 'aimAction', # 动作 36 | 'reward', # 奖励 37 | 'next_otherState', 'next_TaskState']) # Define a transition tuple 38 | GAMMA = 0.99 39 | BATCH_SIZE = 64 40 | REPLAY_SIZE = 10000 41 | LEARNING_RATE = 1e-4 42 | SYNC_TARGET_FRAMES = 100 # 更新目标网络频率 43 | 44 | EPSILON_DECAY_LAST_FRAME = 150000 45 | EPSILON_START = 0.6 46 | EPSILON_FINAL = 0.01 47 | EPSILON = 300000 48 | 49 | RESET = 100000 # 重置游戏次数 50 | 51 | MAX_TASK = 10 # 任务队列最大长度 52 | 53 | momentum = 0.005 54 | 55 | RESOURCE = [0.2, 0.4, 0.6, 0.8] 56 | 57 | 58 | @torch.no_grad() 59 | def play_step(env, epsilon, models): 60 | vehicles = env.vehicles 61 | old_otherState = [] 62 | old_taskState = [] 63 | 64 | actionTask = [] 65 | actionAim = [] 66 | # 贪心选择动作 67 | for i, model in enumerate(models): 68 | old_otherState.append(vehicles[i].self_state) 69 | old_taskState.append(vehicles[i].task_state) 70 | if np.random.random() < epsilon: 71 | # 随机动作 72 | actionTask.append(np.random.randint(0, 10)) 73 | actionAim.append(np.random.randint(0, 7)) # local+mec+neighbor 74 | else: 75 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 76 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 77 | taskAction, aimAction = model(state_v, taskState_v) 78 | 79 | taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 80 | aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 81 | 82 | actionAim.append(np.argmax(aimAction)) 83 | actionTask.append(np.argmax(taskAction)) 84 | # print("action:", action) 85 | _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim) 86 | # print("reward:", reward) 87 | 88 | # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作,下一其他状态、下一任务状态】 89 | for i, vehicle in enumerate(vehicles): 90 | exp = Experience(old_otherState[i], [old_taskState[i]], 91 | actionTask[i], actionAim[i], 92 | reward[i], 93 | otherState[i], [taskState[i]]) 94 | vehicle.buffer.append(exp) 95 | return round(Reward, 2) # 返回总的平均奖励 96 | 97 | 98 | # 计算一个智能体的损失 99 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"): 100 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch # 101 | 102 | otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device) 103 | taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device) 104 | # print("states_v:", states_v) # batch状态 105 | taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device) 106 | aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device) 107 | # print("actions_v", actions_v) # batch动作 108 | rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device) 109 | # print("rewards_v", rewards_v) # batch奖励 110 | next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device) 111 | next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device) 112 | # print("next_states_v", next_states_v) # batch下一个状态 113 | 114 | # 计算当前网络q值 115 | taskActionValues, aimActionValues = net(otherStates_v, 116 | taskStates_v) # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 117 | taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1) 118 | aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 119 | 120 | # 计算目标网络q值 121 | next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v, 122 | next_taskStates_v) # .max(1)[0] # 得到最大的q值 123 | 124 | next_taskActionValues = next_taskActionValues.max(1)[0].detach() 125 | next_aimActionValues = next_aimActionValues.max(1)[0].detach() 126 | 127 | # 防止梯度流入用于计算下一状态q近似值得NN 128 | # next_states_values = next_aimActionValues.detach() 129 | # print("next_states_values", next_states_values) 130 | expected_aim_values = next_aimActionValues * GAMMA + rewards_v 131 | expected_task_values = next_taskActionValues * GAMMA + rewards_v 132 | # print(" expected_state_values", expected_state_values) 133 | 134 | return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values) 135 | 136 | 137 | if __name__ == '__main__': 138 | env = Env() 139 | env.reset() 140 | 141 | frame_idx = 0 142 | # writer = SummaryWriter(comment="-" + env.__doc__) 143 | agents = env.vehicles 144 | models = [] 145 | tgt_models = [] 146 | optimizers = [] 147 | for agent in agents: 148 | # print(agent.get_location, agent.velocity) 149 | task_shape = np.array([agent.task_state]).shape 150 | # print(task_shape) 151 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 152 | models.append(model) 153 | optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum) 154 | optimizers.append(optimer) 155 | for agent in agents: 156 | # print(agent.get_location, agent.velocity) 157 | task_shape = np.array([agent.task_state]).shape 158 | # print(task_shape) 159 | model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2) 160 | model.load_state_dict(models[agent.id].state_dict()) 161 | tgt_models.append(model) 162 | 163 | # 打印网络结构 164 | # model = models[0] 165 | # state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32) 166 | # taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32) 167 | # # 针对有网络模型,但还没有训练保存 .pth 文件的情况 168 | # modelpath = "./netStruct/demo.onnx" # 定义模型结构保存的路径 169 | # torch.onnx.export(model, (state_v, taskState_v), modelpath) # 导出并保存 170 | # netron.start(modelpath) 171 | 172 | total_reward = [] 173 | recent_reward = [] 174 | loss_task_list = [] 175 | loss_aim_list = [] 176 | reward_1 = [] 177 | 178 | epsilon = EPSILON_START 179 | eliposde = EPSILON 180 | while eliposde > 0: 181 | # 重置游戏 182 | # if frame_idx % RESET == 0: 183 | # print("游戏重置") 184 | # env.reset() 185 | # agents = env.vehicles 186 | 187 | frame_idx += 1 188 | print("the {} steps".format(frame_idx)) 189 | epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME) 190 | reward = play_step(env, epsilon, models) 191 | total_reward.append(reward) 192 | print("current reward:", reward) 193 | print("current 100 times total rewards:", np.mean(total_reward[-100:])) 194 | recent_reward.append(np.mean(total_reward[-100:])) 195 | if np.mean(total_reward[-100:]) > 0.7: 196 | break 197 | 198 | for i, agent in enumerate(agents): 199 | # print("length of {} buffer".format(agent.id), len(agent.buffer)) 200 | if len(agent.buffer) < REPLAY_SIZE: # 缓冲池要足够大 201 | continue 202 | if frame_idx % SYNC_TARGET_FRAMES == 0: # 更新目标网络 203 | tgt_models[i].load_state_dict(models[i].state_dict()) 204 | optimizers[i].zero_grad() 205 | batch = agent.buffer.sample(BATCH_SIZE) 206 | loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i]) 207 | # print("loss:", loss_task, " ", loss_aim) 208 | # loss_t.backward() 209 | torch.autograd.backward([loss_task, loss_aim]) 210 | optimizers[i].step() 211 | if agent.id == 0: 212 | # print("cur_loss:", loss_task.item()) 213 | # print("cur_aim_loss", loss_aim.item()) 214 | loss_task_list.append(loss_task.item()) 215 | loss_aim_list.append(loss_aim.item()) 216 | reward_1.append(env.reward[1]) 217 | eliposde -= 1 218 | 219 | # cur_time = time.strftime("%Y-%m-%d", time.localtime(time.time())) 220 | # # 创建文件夹 221 | # os.makedirs("D:/pycharm/Project/VML/MyErion/experiment4/result/" + cur_time) 222 | # for i, vehicle in enumerate(env.vehicles): 223 | # # 保存每个网络模型 224 | # torch.save(tgt_models[i].state_dict(), 225 | # "D:/pycharm/Project/VML/MyErion/experiment4/result/" + cur_time + "/vehicle" + str(i) + ".pkl") 226 | 227 | plt.plot(range(len(recent_reward)), recent_reward) 228 | # plt.title("奖励曲线") 229 | plt.ylabel("Average Reward", fontproperties=prop) 230 | plt.xlabel("Episode", fontproperties=prop) 231 | # 设置x轴和y轴的字体大小 232 | plt.tick_params(axis='both', which='major', labelsize=10) 233 | plt.tick_params(axis='both', which='minor', labelsize=10) 234 | # # 显示指数 235 | # # 创建数据 236 | # x = range(0, len(recent_reward) + 1, 50000) 237 | # # 设置x轴坐标为指数形式 238 | # plt.xscale('log') 239 | # plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: '{:.0e}'.format(x) if x != 0 else '0')) 240 | # 241 | # # 设置x轴坐标显示范围 242 | # plt.xlim([1, 3e5]) 243 | # 244 | # # 设置x轴坐标显示标签 245 | # plt.xticks([1] + list(range(int(5e4), int(3e5) + 1, int(5e4)))) 246 | 247 | plt.show() 248 | 249 | # plt.plot(range(len(loss_task_list)), loss_task_list) 250 | # plt.title("任务选择损失曲线") 251 | # plt.show() 252 | # 253 | # plt.plot(range(len(loss_aim_list)), loss_aim_list) 254 | # plt.title("目标选择损失曲线") 255 | # plt.show() 256 | 257 | # plt.plot(range(100000), reward_1[-100000:]) 258 | # plt.title("车辆一奖励曲线") 259 | # plt.show() 260 | -------------------------------------------------------------------------------- /experiment4/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | print() 7 | env = Env() 8 | env.reset() 9 | 10 | # 测试网络节点数 11 | task = np.array(env.taskState) 12 | print(task.shape) 13 | vehicles = env.vehicles 14 | 15 | for vehicle in vehicles: 16 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 17 | print("该车邻居:") 18 | for i in vehicle.neighbor: 19 | print(i.id, end=" ") 20 | print() 21 | 22 | # 测试环境运行 23 | reward = [] 24 | x = [[] for i in range(20)] 25 | y = [[] for i in range(20)] 26 | for i in range(1000): 27 | # for j in range(20): 28 | # x[j].append(env.vehicles[j].position[0]) 29 | # y[j].append(env.vehicles[j].position[1]) 30 | action1 = [] 31 | action2 = [] 32 | for j in range(40): 33 | # action1.append(np.random.randint(0, 10)) 34 | action1.append(0) 35 | # action2.append(np.random.randint(0, 7)) 36 | action2.append(1) 37 | other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2) 38 | reward.append(Reward) 39 | print("第{}次平均奖励{}".format(i, Reward)) 40 | # print("当前状态:", state) 41 | # print("下一状态:", next_state) 42 | # print("车状态:", vehicleState) 43 | # print("任务状态", taskState) 44 | # print("当前奖励:", reward) 45 | # print("每个奖励,", vehicleReward) 46 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 47 | # print("average reward:", env.Reward) 48 | # plt.figure(figsize=(100, 100)) 49 | # fix, ax = plt.subplots(5, 4) 50 | # 51 | # for i in range(5): 52 | # for j in range(4): 53 | # number = i * 4 + j 54 | # ax[i, j].plot(x[number], y[number]) 55 | # ax[i, j].set_title('vehicle {}'.format(number)) 56 | plt.plot(range(len(reward)), reward) 57 | print(reward) 58 | plt.show() 59 | -------------------------------------------------------------------------------- /experiment4/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | import os 4 | import time 5 | from collections import namedtuple 6 | 7 | import ptan 8 | import torch 9 | import torch.nn.functional as F 10 | import model 11 | from env import Env 12 | from mec import MEC 13 | from vehicle import Vehicle 14 | from memory import ReplayMemory 15 | from tensorboardX import SummaryWriter 16 | from torch.distributions.categorical import Categorical 17 | 18 | ENV_ID = "computing offloading" 19 | GAMMA = 0.99 20 | GAE_LAMBDA = 0.95 21 | 22 | TRAJECTORY_SIZE = 65 23 | LEARNING_RATE_ACTOR = 1e-5 24 | LEARNING_RATE_CRITIC = 1e-4 25 | 26 | PPO_EPS = 0.2 27 | PPO_EPOCHES = 10 28 | PPO_BATCH_SIZE = 64 29 | 30 | TEST_ITERS = 10000 31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state')) # Define a transition tuple 32 | 33 | 34 | # 将list装换成tensor存入缓冲池中 35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory): 36 | reward = torch.tensor([reward]) 37 | action = torch.tensor([action]) 38 | state = torch.tensor(state) 39 | state = state.unsqueeze(0) 40 | next_state = torch.tensor(next_state) 41 | next_state = next_state.unsqueeze(0) 42 | memory.push(state, action, reward, next_state) 43 | 44 | 45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"): 46 | """ 47 | By trajectory calculate advantage and 1-step ref value 48 | :param trajectory: trajectory list 49 | :param net_crt: critic network 50 | :param states_v: states tensor 51 | :return: tuple with advantage numpy array and reference values 52 | """ 53 | values_v = net_crt(torch.tensor(states_v)) 54 | values = values_v.squeeze().data.cpu().numpy() 55 | # generalized advantage estimator: smoothed version of the advantage 56 | last_gae = 0.0 57 | result_adv = [] 58 | result_ref = [] 59 | for val, next_val, exp in zip(reversed(values[:-1]), 60 | reversed(values[1:]), 61 | reversed(trajectory[:-1])): 62 | delta = exp.vehicleReward + GAMMA * next_val - val 63 | last_gae = delta + GAMMA * GAE_LAMBDA * last_gae 64 | result_adv.append(last_gae) 65 | result_ref.append(last_gae + val) 66 | 67 | adv_v = torch.FloatTensor(list(reversed(result_adv))) 68 | ref_v = torch.FloatTensor(list(reversed(result_ref))) 69 | return adv_v.to(device), ref_v.to(device) 70 | 71 | 72 | # 将状态信息放入各自的缓冲池中 73 | def push(env, state, actions, next_state): 74 | for i, vehicle in enumerate(env.vehicles): 75 | if vehicle.task is not None: # 没有任务不算经验 76 | continue 77 | exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state) 78 | vehicle.buffer.append(exp) 79 | 80 | 81 | if __name__ == '__main__': 82 | task = MEC([10, 20]) 83 | vehicle = Vehicle(1, [10, 20], 'd') 84 | print(type(task) == MEC) 85 | print(type(task) == Vehicle) 86 | print(type(vehicle) == Vehicle) 87 | print(type(vehicle)) 88 | print(vehicle) 89 | -------------------------------------------------------------------------------- /experiment4/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 650 # MEC通信范围 /m 4 | RESOURCE = 10000 # 可用资源 MHz 5 | MAX_QUEUE = 10 6 | 7 | 8 | # 边缘服务器 9 | class MEC: 10 | def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE): 11 | self.loc_x = position[0] 12 | self.loc_y = position[1] 13 | self.loc = position 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息(最多同时处理10个任务) 20 | self.accept_task = [] 21 | # 最多处理任务量 22 | self.max_task = 10 23 | # 接受任务的数量 24 | self.sum_needDeal_task = 0 25 | # 此时刻有多少动作选则我 多少任务选择传输给我 26 | self.len_action = 0 27 | # 等待计算的任务队列(理解为挂起状态) 28 | self.task_queue = [] 29 | # 用于奖励计算的任务队列 30 | self.task_queue_for_reward = [] 31 | # 队列最长长度 32 | self.max_queue = max_queue 33 | # 当前状态 34 | self.get_state() 35 | 36 | @property 37 | def get_x(self): 38 | return self.loc_x 39 | 40 | @property 41 | def get_y(self): 42 | return self.loc_y 43 | 44 | @property 45 | def get_location(self): 46 | return self.loc 47 | 48 | """ 49 | 获得状态 50 | """ 51 | 52 | def get_state(self): 53 | """ 54 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 55 | """ 56 | self.state = [] 57 | self.state.extend(self.loc) 58 | self.state.append(self.sum_needDeal_task) 59 | self.state.append(self.len_action) 60 | self.state.append(self.resources) 61 | return self.state 62 | -------------------------------------------------------------------------------- /experiment4/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['state', 'action', 'reward', 'next_state']) # Define a transition tuple 9 | 10 | 11 | class ReplayMemory(object): # Define a replay memory 12 | 13 | # 初始化缓冲池 14 | def __init__(self, capacity): 15 | # 最大容量 16 | self.capacity = capacity 17 | # 缓冲池经验 18 | self.memory = [] 19 | # ? 20 | self.position = 0 21 | 22 | # 存入经验 23 | def push(self, *args): 24 | if len(self.memory) < self.capacity: 25 | self.memory.append(None) 26 | # 存入经验 27 | self.memory[self.position] = Experience(*args) 28 | # 记录最新经验所在位置 29 | self.position = (self.position + 1) % self.capacity 30 | 31 | # 采样 32 | def sample(self, batch_size): 33 | return sample(self.memory, batch_size) 34 | 35 | def __len__(self): 36 | return len(self.memory) 37 | 38 | 39 | class ExperienceBuffer: 40 | def __init__(self, capacity): 41 | self.maxLen = capacity 42 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 43 | 44 | def __len__(self): 45 | return len(self.buffer) 46 | 47 | def append(self, experience: Experience): 48 | self.buffer.append(experience) 49 | 50 | def sample(self, batch_size): 51 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 52 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip( 53 | *[self.buffer[idx] for idx in indices]) 54 | # 转换成numpy 55 | return np.array(cur_otherState), np.array(cur_TaskState), \ 56 | np.array(taskAction), np.array(aimAction), \ 57 | np.array(rewards, dtype=np.float32), \ 58 | np.array(next_otherState), np.array(next_TaskState) 59 | 60 | # 清空 61 | def clear(self): 62 | self.buffer = collections.deque(maxlen=self.maxLen) 63 | -------------------------------------------------------------------------------- /experiment4/model.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.distributions.categorical import Categorical 7 | 8 | HID_SIZE = 64 9 | 10 | 11 | class ModelActor(nn.Module): 12 | def __init__(self, obs_dim, act_dim, task_dim): 13 | super(ModelActor, self).__init__() 14 | self.act_dim = act_dim 15 | 16 | self.mu = nn.Sequential( 17 | nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2), 18 | nn.ReLU(), 19 | nn.Linear(HID_SIZE * 2, HID_SIZE), 20 | nn.ReLU(), 21 | nn.Linear(HID_SIZE, act_dim * 2), 22 | ) 23 | self.cnn = CNNLayer(task_dim, HID_SIZE) 24 | 25 | def forward(self, x, task): 26 | cnn_out = self.cnn(task) 27 | x = torch.cat((x, cnn_out), -1) 28 | out = self.mu(x) 29 | action_out = out[:, :self.act_dim] 30 | task_out = out[:, self.act_dim:] 31 | action_pro = F.softmax(action_out, dim=-1) 32 | task_pro = F.softmax(task_out, dim=1) 33 | print(action_pro) 34 | print(task_pro) 35 | return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro) 36 | 37 | 38 | class ModelCritic(nn.Module): 39 | def __init__(self, obs_size): 40 | super(ModelCritic, self).__init__() 41 | 42 | self.value = nn.Sequential( 43 | nn.Linear(obs_size, HID_SIZE), 44 | nn.ReLU(), 45 | nn.Linear(HID_SIZE, HID_SIZE), 46 | nn.ReLU(), 47 | nn.Linear(HID_SIZE, 1), 48 | ) 49 | 50 | def forward(self, x): 51 | return self.value(x) 52 | 53 | 54 | class ModelSACTwinQ(nn.Module): 55 | def __init__(self, obs_size, act_size): 56 | super(ModelSACTwinQ, self).__init__() 57 | 58 | self.q1 = nn.Sequential( 59 | nn.Linear(obs_size + act_size, HID_SIZE), 60 | nn.ReLU(), 61 | nn.Linear(HID_SIZE, HID_SIZE), 62 | nn.ReLU(), 63 | nn.Linear(HID_SIZE, 1), 64 | ) 65 | 66 | self.q2 = nn.Sequential( 67 | nn.Linear(obs_size + act_size, HID_SIZE), 68 | nn.ReLU(), 69 | nn.Linear(HID_SIZE, HID_SIZE), 70 | nn.ReLU(), 71 | nn.Linear(HID_SIZE, 1), 72 | ) 73 | 74 | def forward(self, obs, act): 75 | x = torch.cat([obs, act], dim=1) 76 | return self.q1(x), self.q2(x) 77 | 78 | 79 | class AgentDDPG(ptan.agent.BaseAgent): 80 | """ 81 | Agent implementing Orstein-Uhlenbeck exploration process 82 | """ 83 | 84 | def __init__(self, net, device="cpu", ou_enabled=True, 85 | ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2, 86 | ou_epsilon=1.0): 87 | self.net = net 88 | self.device = device 89 | self.ou_enabled = ou_enabled 90 | self.ou_mu = ou_mu 91 | self.ou_teta = ou_teta 92 | self.ou_sigma = ou_sigma 93 | self.ou_epsilon = ou_epsilon 94 | 95 | def initial_state(self): 96 | return None 97 | 98 | def __call__(self, states, agent_states): 99 | states_v = ptan.agent.float32_preprocessor(states) 100 | states_v = states_v.to(self.device) 101 | mu_v = self.net(states_v) 102 | actions = mu_v.data.cpu().numpy() 103 | 104 | if self.ou_enabled and self.ou_epsilon > 0: 105 | new_a_states = [] 106 | for a_state, action in zip(agent_states, actions): 107 | if a_state is None: 108 | a_state = np.zeros( 109 | shape=action.shape, dtype=np.float32) 110 | a_state += self.ou_teta * (self.ou_mu - a_state) 111 | a_state += self.ou_sigma * np.random.normal( 112 | size=action.shape) 113 | 114 | action += self.ou_epsilon * a_state 115 | new_a_states.append(a_state) 116 | else: 117 | new_a_states = agent_states 118 | 119 | actions = np.clip(actions, -1, 1) 120 | return actions, new_a_states 121 | 122 | 123 | class DQN(nn.Module): 124 | def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim): 125 | super(DQN, self).__init__() 126 | self.input_layer = nn.Linear(obs_dim + 32, 128) 127 | self.hidden1 = nn.Linear(128, 64) 128 | self.hidden2 = nn.Linear(64, 64) 129 | self.hidden3 = nn.Linear(64, 128) 130 | self.cnn = CNNLayer(task_dim, 32) 131 | self.output_layer1 = self.common(64, taskAction_dim) 132 | self.output_layer2 = self.common(64, aimAction_dim) 133 | 134 | def common(self, input_dim, action_dim): 135 | return nn.Sequential( 136 | nn.Linear(input_dim, 128), 137 | nn.ReLU(), 138 | self.hidden1, 139 | nn.ReLU(), 140 | self.hidden2, 141 | nn.ReLU(), 142 | nn.Linear(64, action_dim) 143 | ) 144 | 145 | def forward(self, x, task): 146 | """ 147 | 148 | :param x: batch_size*state_n 149 | :return: batch_size*actions_n 输出每个动作对应的q值 150 | """ 151 | # 任务卷积层 152 | cnn_out = self.cnn(task) 153 | x = torch.cat((x, cnn_out), -1) 154 | 155 | # 公共层 156 | x1 = F.relu(self.input_layer(x)) 157 | x2 = F.relu(self.hidden1(x1)) 158 | x3 = F.relu(self.hidden2(x2)) 159 | 160 | taskActionValue = self.output_layer1(x3) 161 | aimActionValue = self.output_layer2(x3) 162 | 163 | return taskActionValue, aimActionValue 164 | 165 | 166 | class CNNLayer(nn.Module): 167 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 168 | super(CNNLayer, self).__init__() 169 | 170 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 171 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 172 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 173 | 174 | def init_(m): # 权重使用正交初始化,激活函数使用relu 175 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 176 | 177 | input_channel = obs_shape[0] 178 | input_width = obs_shape[1] 179 | input_height = obs_shape[2] 180 | 181 | self.cnn = nn.Sequential( 182 | init_(nn.Conv2d(in_channels=input_channel, 183 | out_channels=hidden_size // 2, 184 | kernel_size=kernel_size, 185 | stride=stride) 186 | ), 187 | active_func, 188 | nn.Flatten(), 189 | init_(nn.Linear( 190 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 191 | hidden_size) 192 | ), 193 | active_func, 194 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 195 | 196 | def forward(self, x): 197 | x = x / 255.0 198 | x = self.cnn(x) 199 | 200 | return x 201 | 202 | 203 | def init(module, weight_init, bias_init, gain=1): 204 | weight_init(module.weight.data, gain=gain) 205 | bias_init(module.bias.data) 206 | return module 207 | -------------------------------------------------------------------------------- /experiment4/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle=None, createTime=0): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.max_time = 50 # ms 最大容忍时间 18 | self.size = np.random.uniform(0.2, 1) # Mb 19 | self.cycle = np.random.randint(50, 100) # cycle/bit 20 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 21 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 22 | self.need_time = 0 # 需要计算时间 23 | self.hold_time = 0 # 任务在计算等待队列中得等待时间 24 | 25 | self.rate = 0 # 当前速率 26 | 27 | self.compute_resource = 0 28 | 29 | self.create_time = createTime # 任务产生时间 30 | self.pick_time = 0 # 被选择的时间(出队列时间) 31 | 32 | # 完成该任务所消耗的cup资源 33 | self.energy = 0 34 | self.trans_time = 0 # 传输所需要的时间(实际) 35 | self.precess_time = 0 # 任务处理所需要的时间(实际) 36 | -------------------------------------------------------------------------------- /experiment4/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import matplotlib 5 | from pylab import mpl 6 | import model 7 | from env import Env 8 | import matplotlib.pyplot as plt 9 | 10 | # 设置显示中文字体 11 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 12 | matplotlib.rcParams['axes.unicode_minus'] = False 13 | np.random.seed(2) 14 | 15 | if __name__ == '__main__': 16 | env = Env() 17 | env.reset() 18 | 19 | N = env.num_Vehicles 20 | vehicles = env.vehicles 21 | models = [] 22 | 23 | task_shape = np.array([vehicles[0].task_state]).shape 24 | for i in range(N): 25 | tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2) 26 | tgt_model.load_state_dict(torch.load( 27 | "D:\pycharm\Project\VML\MyErion\experiment4\\result\\2022-11-26\\vehicle{}.pkl".format(i))) 28 | models.append(tgt_model) 29 | 30 | # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32) 31 | # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32) 32 | # taskAction, aimAction = models[0](state_v, taskState_v) 33 | 34 | vehicleReward = [] 35 | averageReward = [] 36 | for step in range(1000): 37 | action1 = [] 38 | action2 = [] 39 | 40 | for i in range(N): 41 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 42 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 43 | taskAction, aimAction = models[i](state_v, taskState_v) 44 | 45 | # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 46 | # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 47 | taskAction = taskAction.detach().numpy().reshape(-1) 48 | aimAction = aimAction.detach().numpy().reshape(-1) 49 | action1.append(np.argmax(taskAction)) 50 | # action1.append(0) 51 | action2.append(np.argmax(aimAction)) 52 | 53 | print(action1) 54 | print(action2) 55 | other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2) 56 | vehicleReward.append(reward[5]) 57 | averageReward.append(Reward) 58 | print("第{}次车辆平均奖励{}".format(step, Reward)) 59 | 60 | fig, aix = plt.subplots(2, 1) 61 | aix[0].plot(range(len(vehicleReward)), vehicleReward) 62 | aix[1].plot(range(len(averageReward)), averageReward) 63 | plt.show() 64 | 65 | avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg) if i % 3 != 0] 66 | plt.bar(range(len(avg)), avg) 67 | plt.title("平均时延") 68 | plt.ylabel("时延/ms") 69 | plt.show() 70 | 71 | avg = [np.mean(energy) for i, energy in enumerate(env.avg_energy) if i % 3 != 0] 72 | plt.bar(range(len(avg)), avg) 73 | plt.title("平均能量消耗") 74 | plt.ylabel("能量/J") 75 | plt.show() 76 | -------------------------------------------------------------------------------- /experiment4/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from memory import ExperienceBuffer 6 | from task import Task 7 | 8 | Dv = 100 # 车的最大通信范围 9 | Fv = 4000 # 车最大计算能力 MHZ 10 | MAX_TASK = 10 # 任务队列最大长度 11 | 12 | CAPACITY = 10000 # 缓冲池大小 13 | TASK_SOLT = 10 # 任务产生时隙 14 | 15 | # 等待队列最长长度 16 | MAX_QUEUE = 10 17 | 18 | np.random.seed(2) 19 | 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 21 | 22 | 23 | class Vehicle: 24 | # 位置:x,y 速度、方向:-1左,1右 25 | def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE): 26 | self.id = id 27 | # 车的位置信息 28 | self.loc_x = position[0] 29 | self.loc_y = position[1] 30 | self.position = position 31 | self.velocity = velocity # m/s 32 | self.direction = direction 33 | # 通信范围 34 | self.range = Dv 35 | # 邻居表 36 | self.neighbor = [] 37 | # mec 38 | self.Mec = None 39 | # 当前时间 40 | self.cur_frame = 0 41 | # 接受的任务的列表(最多同时处理5个任务) 42 | self.accept_task = [] 43 | # 最多处理任务量 44 | self.max_task = 3 45 | # 等待队列最长长度 46 | self.max_queue = max_queue 47 | # 等待计算的任务队列(理解为挂起状态) 48 | self.task_queue = [] 49 | # 用于奖励计算的任务队列 50 | self.task_queue_for_reward = [] 51 | # 接受任务的数量 52 | self.sum_needDeal_task = 0 53 | # 此时刻有多少动作选则我 54 | self.len_action = 0 55 | # 当前可用资源 56 | self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2) # MHz 57 | # 表示当前是否有任务正在传输给邻居车辆(0:没有,1:有) 58 | self.trans_task_for_vehicle = 0 59 | # 当前是否有任务正在传输给mec 60 | self.trans_task_for_mec = 0 61 | # 当前处理的任务(用于计算奖励,不用于状态信息) 62 | self.cur_task = None 63 | # 任务队列 64 | self.total_task = [] 65 | # 任务队列的长度 66 | self.len_task = len(self.total_task) 67 | 68 | # 当前状态信息 69 | self.self_state = [] 70 | # 当前任务队列状态 71 | self.task_state = [] 72 | # 去除邻居的状态信息用于邻居车观察和全局critic的处理 73 | self.excludeNeighbor_state = [] 74 | # 缓冲池 75 | self.buffer = ExperienceBuffer(capacity=CAPACITY) 76 | # 总奖励 77 | self.reward = [] 78 | # 任务溢出的数量 79 | self.overflow = 0 80 | # 上一个任务产生的时间 81 | self.lastCreatWorkTime = 0 82 | 83 | # 产生任务 84 | self.create_work() 85 | 86 | # 获得位置 87 | @property 88 | def get_location(self): 89 | return self.position 90 | 91 | # 设置位置 92 | def set_location(self, loc_x, loc_y): 93 | self.loc_x = loc_x 94 | self.loc_y = loc_y 95 | self.position = [self.loc_x, self.loc_y] 96 | 97 | # 获得x 98 | @property 99 | def get_x(self): 100 | return self.loc_x 101 | 102 | # 获得y 103 | @property 104 | def get_y(self): 105 | return self.loc_y 106 | 107 | # 产生任务 传入当前时间 108 | def create_work(self): 109 | if self.id % 3 == 0: 110 | return 111 | # 每隔一段时间进行一次任务产生 112 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 113 | # # 每次有0.6的概率产生任务 114 | if np.random.random() < 0.6: 115 | if self.len_task < MAX_TASK: # 队列不满 116 | task = Task(self, self.cur_frame % 50) 117 | self.lastCreatWorkTime = self.cur_frame 118 | self.total_task.append(task) 119 | self.len_task += 1 120 | # print("第{}辆车产生了任务".format(self.id)) 121 | self.overflow = 0 122 | else: 123 | # print("第{}辆车任务队列已满".format(self.id)) 124 | self.overflow = 1 125 | 126 | """ 127 | 获得状态 128 | """ 129 | 130 | def get_state(self): 131 | self.self_state = [] 132 | self.excludeNeighbor_state = [] 133 | self.task_state = [] 134 | 135 | # 位置信息 4 136 | self.self_state.extend(self.position) 137 | self.self_state.append(self.velocity) 138 | self.self_state.append(direction_map.get(self.direction)) 139 | self.excludeNeighbor_state.extend(self.position) 140 | self.excludeNeighbor_state.append(self.velocity) 141 | self.excludeNeighbor_state.append(direction_map.get(self.direction)) 142 | 143 | # 资源信息(可用资源) 144 | self.self_state.append(self.resources) 145 | self.excludeNeighbor_state.append(self.resources) 146 | 147 | # 当前处理的任务量 148 | self.self_state.append(self.sum_needDeal_task) 149 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 150 | # 当前接受传输的任务量 151 | self.self_state.append(self.len_action) 152 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 153 | 154 | # 当前是否有任务在传输 155 | self.excludeNeighbor_state.append(self.trans_task_for_vehicle) 156 | self.excludeNeighbor_state.append(self.trans_task_for_mec) 157 | self.self_state.append(self.trans_task_for_vehicle) 158 | self.self_state.append(self.trans_task_for_mec) 159 | 160 | # 正在传输的任务信息 161 | # if self.trans_task is not None: 162 | # self.otherState.append(self.trans_task.need_trans_size) 163 | # self.excludeNeighbor_state.append(self.trans_task.need_trans_size) 164 | # else: 165 | # self.otherState.append(0) 166 | # self.excludeNeighbor_state.append(0) 167 | 168 | # 当前队列长度 169 | self.self_state.append(self.len_task) 170 | self.excludeNeighbor_state.append(self.len_task) 171 | 172 | # 邻居表 7*数量 173 | for neighbor in self.neighbor: 174 | self.self_state.extend(neighbor.position) # 位置 175 | self.self_state.append(neighbor.velocity) # 速度 176 | self.self_state.append(direction_map.get(neighbor.direction)) # 方向 177 | self.self_state.append(neighbor.resources) # 可用资源 178 | self.self_state.append(neighbor.sum_needDeal_task) # 处理任务长度 179 | self.self_state.append(neighbor.len_action) # 当前正在传输任务数量 180 | 181 | self.self_state.extend(self.Mec.state) 182 | 183 | # 任务状态信息 184 | for i in range(MAX_TASK): 185 | if i < self.len_task: 186 | task = self.total_task[i] 187 | self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 188 | else: 189 | self.task_state.append([0, 0, 0, 0]) 190 | 191 | return self.excludeNeighbor_state 192 | -------------------------------------------------------------------------------- /experiment5/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境5 3 | (两个动作:选择任务和选择对象) 4 | 加入了mec和车在时隙内处理任务的上限(mec最多同时处理10个任务、车最多处理5个任务) 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | 为mec卸载和车辆卸载提供两种传输方式(即可同时像车辆和mec传输任务) 7 | 使用MAPPO训练网络 8 | """ 9 | import ptan 10 | import numpy as np 11 | import torch 12 | from torch.distributions.categorical import Categorical 13 | from env import Env 14 | 15 | 16 | def test_net(nets, env: Env, count=10): 17 | rewards = 0.0 18 | steps = 0 19 | for _ in range(count): 20 | env.reset() 21 | while steps < 1000: 22 | action = [] 23 | with torch.no_grad(): 24 | for vehicle in env.vehicles: 25 | state = torch.tensor(vehicle.self_state) 26 | _, pro = nets[vehicle.id](state) 27 | act = Categorical.sample(pro) 28 | action.append(act.item()) 29 | _, _, reward, _ = env.step(action) 30 | rewards += reward 31 | steps += 1 32 | return rewards / count, steps / count 33 | 34 | # def calc_logprob(pro_v, actions_v): 35 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 36 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 37 | # return p1 + p2 38 | -------------------------------------------------------------------------------- /experiment5/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # if __name__ == '__main__': 6 | # print() 7 | # env = Env() 8 | # env.reset() 9 | # 10 | # # 测试网络节点数 11 | # task = np.array(env.taskState) 12 | # print(task.shape) 13 | # vehicles = env.vehicles 14 | # 15 | # for vehicle in vehicles: 16 | # print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 17 | # print("该车邻居:") 18 | # for i in vehicle.neighbor: 19 | # print(i.id, end=" ") 20 | # print() 21 | # 22 | # # 测试环境运行 23 | # reward = [] 24 | # x = [[] for i in range(20)] 25 | # y = [[] for i in range(20)] 26 | # for i in range(1000): 27 | # # for j in range(20): 28 | # # x[j].append(env.vehicles[j].position[0]) 29 | # # y[j].append(env.vehicles[j].position[1]) 30 | # action1 = [] 31 | # action2 = [] 32 | # for j in range(40): 33 | # # action1.append(np.random.randint(0, 10)) 34 | # action1.append(0) 35 | # # action2.append(np.random.randint(0, 7)) 36 | # action2.append(1) 37 | # other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2) 38 | # reward.append(Reward) 39 | # print("第{}次平均奖励{}".format(i, Reward)) 40 | # # print("当前状态:", state) 41 | # # print("下一状态:", next_state) 42 | # # print("车状态:", vehicleState) 43 | # # print("任务状态", taskState) 44 | # # print("当前奖励:", reward) 45 | # # print("每个奖励,", vehicleReward) 46 | # # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 47 | # # print("average reward:", env.Reward) 48 | # # plt.figure(figsize=(100, 100)) 49 | # # fix, ax = plt.subplots(5, 4) 50 | # # 51 | # # for i in range(5): 52 | # # for j in range(4): 53 | # # number = i * 4 + j 54 | # # ax[i, j].plot(x[number], y[number]) 55 | # # ax[i, j].set_title('vehicle {}'.format(number)) 56 | # plt.plot(range(len(reward)), reward) 57 | # print(reward) 58 | # plt.show() 59 | 60 | if __name__ == '__main__': 61 | print() 62 | env = Env() 63 | env.reset() 64 | 65 | # 测试网络节点数 66 | vehicles = env.vehicles 67 | 68 | for vehicle in vehicles: 69 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 70 | print("该车邻居:") 71 | for i in vehicle.neighbor: 72 | print(i.id, end=" ") 73 | print() 74 | 75 | # 测试环境运行 76 | reward = [] 77 | x = [[] for i in range(20)] 78 | y = [[] for i in range(20)] 79 | for i in range(1000): 80 | # for j in range(20): 81 | # x[j].append(env.vehicles[j].position[0]) 82 | # y[j].append(env.vehicles[j].position[1]) 83 | action1 = [] 84 | action2 = [] 85 | for j in range(20): 86 | # action1.append(np.random.randint(0, 10)) 87 | action1.append(0) 88 | # action2.append(np.random.randint(0, 7)) 89 | action2.append(1) 90 | Reward, _ = env.step(action1, action2) 91 | reward.append(Reward) 92 | print("第{}次平均奖励{}".format(i, Reward)) 93 | -------------------------------------------------------------------------------- /experiment5/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | import os 4 | import time 5 | from collections import namedtuple 6 | 7 | import ptan 8 | import torch 9 | import torch.nn.functional as F 10 | import model 11 | from env import Env 12 | from mec import MEC 13 | from vehicle import Vehicle 14 | from memory import ReplayMemory 15 | from tensorboardX import SummaryWriter 16 | from torch.distributions.categorical import Categorical 17 | 18 | ENV_ID = "computing offloading" 19 | GAMMA = 0.99 20 | GAE_LAMBDA = 0.95 21 | 22 | TRAJECTORY_SIZE = 65 23 | LEARNING_RATE_ACTOR = 1e-5 24 | LEARNING_RATE_CRITIC = 1e-4 25 | 26 | PPO_EPS = 0.2 27 | PPO_EPOCHES = 10 28 | PPO_BATCH_SIZE = 64 29 | 30 | TEST_ITERS = 10000 31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state')) # Define a transition tuple 32 | 33 | 34 | # 将list装换成tensor存入缓冲池中 35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory): 36 | reward = torch.tensor([reward]) 37 | action = torch.tensor([action]) 38 | state = torch.tensor(state) 39 | state = state.unsqueeze(0) 40 | next_state = torch.tensor(next_state) 41 | next_state = next_state.unsqueeze(0) 42 | memory.push(state, action, reward, next_state) 43 | 44 | 45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"): 46 | """ 47 | By trajectory calculate advantage and 1-step ref value 48 | :param trajectory: trajectory list 49 | :param net_crt: critic network 50 | :param states_v: states tensor 51 | :return: tuple with advantage numpy array and reference values 52 | """ 53 | values_v = net_crt(torch.tensor(states_v)) 54 | values = values_v.squeeze().data.cpu().numpy() 55 | # generalized advantage estimator: smoothed version of the advantage 56 | last_gae = 0.0 57 | result_adv = [] 58 | result_ref = [] 59 | for val, next_val, exp in zip(reversed(values[:-1]), 60 | reversed(values[1:]), 61 | reversed(trajectory[:-1])): 62 | delta = exp.vehicleReward + GAMMA * next_val - val 63 | last_gae = delta + GAMMA * GAE_LAMBDA * last_gae 64 | result_adv.append(last_gae) 65 | result_ref.append(last_gae + val) 66 | 67 | adv_v = torch.FloatTensor(list(reversed(result_adv))) 68 | ref_v = torch.FloatTensor(list(reversed(result_ref))) 69 | return adv_v.to(device), ref_v.to(device) 70 | 71 | 72 | # 将状态信息放入各自的缓冲池中 73 | def push(env, state, actions, next_state): 74 | for i, vehicle in enumerate(env.vehicles): 75 | if vehicle.task is not None: # 没有任务不算经验 76 | continue 77 | exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state) 78 | vehicle.buffer.append(exp) 79 | 80 | 81 | if __name__ == '__main__': 82 | task = MEC([10, 20]) 83 | vehicle = Vehicle(1, [10, 20], 'd') 84 | print(type(task) == MEC) 85 | print(type(task) == Vehicle) 86 | print(type(vehicle) == Vehicle) 87 | print(type(vehicle)) 88 | print(vehicle) 89 | -------------------------------------------------------------------------------- /experiment5/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 500 # MEC通信范围 /m 4 | RESOURCE = 10000 # 可用资源 MHz 5 | MAX_QUEUE = 10 6 | 7 | 8 | # 边缘服务器 9 | class MEC: 10 | def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE): 11 | self.loc_x = position[0] 12 | self.loc_y = position[1] 13 | self.loc = position 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息(最多同时处理10个任务) 20 | self.accept_task = [] 21 | # 最多处理任务量 22 | self.max_task = 10 23 | # 接受任务的数量 24 | self.sum_needDeal_task = 0 25 | # 此时刻有多少动作选则我 多少任务选择传输给我 26 | self.len_action = 0 27 | # 等待计算的任务队列(理解为挂起状态) 28 | self.task_queue = [] 29 | # 用于奖励计算的任务队列 30 | self.task_queue_for_reward = [] 31 | # 队列最长长度 32 | self.max_queue = max_queue 33 | # 当前状态 34 | self.get_state() 35 | 36 | @property 37 | def get_x(self): 38 | return self.loc_x 39 | 40 | @property 41 | def get_y(self): 42 | return self.loc_y 43 | 44 | @property 45 | def get_location(self): 46 | return self.loc 47 | 48 | """ 49 | 获得状态 50 | """ 51 | 52 | def get_state(self): 53 | """ 54 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 55 | """ 56 | self.state = [] 57 | self.state.extend(self.loc) 58 | self.state.append(self.sum_needDeal_task) 59 | self.state.append(self.len_action) 60 | self.state.append(self.resources) 61 | return self.state 62 | -------------------------------------------------------------------------------- /experiment5/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['cur_otherState', 'cur_TaskState', # 状态 9 | 'taskAction', 'aimAction', # 动作 10 | 'reward', # 奖励 11 | 'next_otherState', 'next_TaskState']) # Define a transition tuple 12 | 13 | 14 | class PPOMemory: 15 | def __init__(self, batch_size): 16 | self.self_state = [] 17 | self.neighbor_state = [] 18 | self.task_state = [] 19 | self.vehicles_state = [] 20 | self.task_probs = [] 21 | self.aim_probs = [] 22 | self.vals = [] 23 | self.action = [] 24 | self.rewards = [] 25 | self.batch_size = batch_size 26 | 27 | def sample(self): 28 | batch_step = np.arange(0, len(self.self_state), self.batch_size) 29 | indices = np.arange(len(self.self_state), dtype=np.int64) 30 | # np.random.shuffle(indices) 31 | batches = [indices[i:i + self.batch_size] for i in batch_step] 32 | return np.array(self.self_state), \ 33 | np.array(self.neighbor_state), \ 34 | np.array(self.task_state), \ 35 | np.array(self.vehicles_state), \ 36 | np.array(self.task_probs), \ 37 | np.array(self.aim_probs), \ 38 | np.array(self.vals), \ 39 | np.array(self.action), \ 40 | np.array(self.rewards), \ 41 | batches 42 | 43 | def push(self, self_state, neighbor_state, task_state, vehicles_state, 44 | task_action, aim_action, 45 | task_probs, aim_probs, 46 | vals, reward): 47 | self.self_state.append(self_state) 48 | self.neighbor_state.append(neighbor_state) 49 | self.task_state.append(task_state) 50 | self.vehicles_state.append(vehicles_state) 51 | self.action.append([task_action, aim_action]) 52 | self.task_probs.append(task_probs) 53 | self.aim_probs.append(aim_probs) 54 | self.vals.append(vals) 55 | self.rewards.append(reward) 56 | 57 | def clear(self): 58 | self.self_state = [] 59 | self.neighbor_state = [] 60 | self.task_state = [] 61 | self.vehicles_state = [] 62 | self.task_probs = [] 63 | self.aim_probs = [] 64 | self.vals = [] 65 | self.action = [] 66 | self.rewards = [] 67 | 68 | 69 | class ExperienceBuffer: 70 | def __init__(self, capacity): 71 | self.maxLen = capacity 72 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 73 | 74 | def __len__(self): 75 | return len(self.buffer) 76 | 77 | def append(self, experience: Experience): 78 | self.buffer.append(experience) 79 | 80 | def sample(self, batch_size): 81 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 82 | cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip( 83 | *[self.buffer[idx] for idx in indices]) 84 | # 转换成numpy 85 | return np.array(cur_otherState), np.array(cur_TaskState), \ 86 | np.array(taskAction), np.array(aimAction), \ 87 | np.array(rewards, dtype=np.float32), \ 88 | np.array(next_otherState), np.array(next_TaskState) 89 | 90 | # 清空 91 | def clear(self): 92 | self.buffer = collections.deque(maxlen=self.maxLen) 93 | -------------------------------------------------------------------------------- /experiment5/model.py: -------------------------------------------------------------------------------- 1 | import ptan 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.distributions.categorical import Categorical 7 | 8 | HID_SIZE = 64 9 | HID_SIZE_MIN = 32 10 | 11 | 12 | class ModelActor(nn.Module): 13 | def __init__(self, obs_dim, neighbor_dim, task_dim, task_aim_dim, act_aim_dim): 14 | super(ModelActor, self).__init__() 15 | 16 | self.cnn_task = CNNLayer(task_dim, HID_SIZE) 17 | self.cnn_neighbor = CNNLayer(neighbor_dim, HID_SIZE_MIN) 18 | self.same = nn.Sequential( 19 | nn.Linear(HID_SIZE + HID_SIZE_MIN + obs_dim, 2 * HID_SIZE), 20 | nn.ReLU(), 21 | nn.Linear(2 * HID_SIZE, HID_SIZE), 22 | nn.ReLU(), 23 | nn.Linear(HID_SIZE, 2 * HID_SIZE), 24 | nn.ReLU(), 25 | ) 26 | self.task = nn.Sequential( 27 | nn.Linear(2 * HID_SIZE, HID_SIZE), 28 | nn.ReLU(), 29 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 30 | nn.ReLU(), 31 | nn.Linear(HID_SIZE_MIN, task_aim_dim), 32 | ) 33 | self.act = nn.Sequential( 34 | nn.Linear(2 * HID_SIZE, HID_SIZE), 35 | nn.ReLU(), 36 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 37 | nn.ReLU(), 38 | nn.Linear(HID_SIZE_MIN, act_aim_dim), 39 | ) 40 | self.logstd_task = nn.Parameter(torch.zeros(task_aim_dim)) 41 | self.logstd_aim = nn.Parameter(torch.zeros(act_aim_dim)) 42 | 43 | def forward(self, obs, neighbor, task, is_train=True): 44 | task_out = self.cnn_task(task) 45 | neighbor_out = self.cnn_neighbor(neighbor) 46 | x = torch.cat((task_out, neighbor_out, obs), -1) 47 | same_out = self.same(x) 48 | act_out = self.act(same_out) 49 | task_out = self.task(same_out) 50 | if is_train: 51 | rnd_task = torch.tensor(np.random.normal(size=task_out.shape)) 52 | rnd_aim = torch.tensor(np.random.normal(size=act_out.shape)) 53 | task_out = task_out + torch.exp(self.logstd_task) * rnd_task 54 | act_out = act_out + torch.exp(self.logstd_aim) * rnd_aim 55 | 56 | act_out = F.gumbel_softmax(act_out) 57 | 58 | act_pro = F.softmax(act_out, dim=-1) 59 | task_pro = F.softmax(task_out, dim=-1) 60 | # print(act_pro) 61 | # print(torch.sum(act_pro)) 62 | # print(task_pro) 63 | # return act_pro, task_pro # 打印网络结构用 64 | return Categorical(task_pro), Categorical(act_pro) # 真实使用 65 | 66 | 67 | class ModelCritic(nn.Module): 68 | def __init__(self, obs_size, task_size, act_size): 69 | super(ModelCritic, self).__init__() 70 | 71 | self.cnn = CNNLayer(obs_size, HID_SIZE) 72 | 73 | self.task_cnn = CNNLayer(task_size, HID_SIZE) 74 | 75 | self.value = nn.Sequential( 76 | nn.Linear(HID_SIZE * 2 + act_size, HID_SIZE * 2), 77 | nn.ReLU(), 78 | nn.Linear(HID_SIZE * 2, HID_SIZE), 79 | nn.ReLU(), 80 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 81 | nn.ReLU(), 82 | nn.Linear(HID_SIZE_MIN, 1), 83 | ) 84 | self.value1 = nn.Sequential( 85 | nn.Linear(HID_SIZE * 2 + act_size, HID_SIZE * 2), 86 | nn.ReLU(), 87 | nn.Linear(HID_SIZE * 2, HID_SIZE), 88 | nn.ReLU(), 89 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 90 | nn.ReLU(), 91 | nn.Linear(HID_SIZE_MIN, 1), 92 | ) 93 | 94 | def forward(self, states_v, task_states_v, actions_v): 95 | cnn_out = self.cnn(states_v) 96 | task_out = self.task_cnn(task_states_v) 97 | 98 | v = torch.cat((actions_v, cnn_out, task_out), -1) 99 | task_value = self.value(v) 100 | # aim_value = self.value1(v) 101 | return task_value # , aim_value 102 | 103 | 104 | class CNNLayer(nn.Module): 105 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 106 | super(CNNLayer, self).__init__() 107 | 108 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 109 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 110 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 111 | 112 | def init_(m): # 权重使用正交初始化,激活函数使用relu 113 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 114 | 115 | in_channels = obs_shape[0] 116 | input_width = obs_shape[1] 117 | input_height = obs_shape[2] 118 | 119 | self.cnn = nn.Sequential( 120 | init_(nn.Conv2d( 121 | in_channels=in_channels, 122 | out_channels=hidden_size // 2, 123 | kernel_size=kernel_size, 124 | stride=stride) 125 | ), 126 | active_func, 127 | # nn.AvgPool2d( 128 | # kernel_size=kernel_size, 129 | # stride=stride), 130 | # active_func, 131 | # init_(nn.Conv2d( 132 | # in_channels=3, 133 | # out_channels=1, 134 | # kernel_size=kernel_size, 135 | # stride=stride) 136 | # ), 137 | # active_func, 138 | # nn.AvgPool2d( 139 | # kernel_size=kernel_size, 140 | # stride=stride), 141 | # active_func, 142 | nn.Flatten(), 143 | init_(nn.Linear( 144 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 145 | hidden_size) 146 | ), 147 | active_func, 148 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 149 | 150 | def forward(self, x): 151 | x = x / 255.0 152 | x = self.cnn(x) 153 | 154 | return x 155 | 156 | 157 | def init(module, weight_init, bias_init, gain=1): 158 | weight_init(module.weight.data, gain=gain) 159 | bias_init(module.bias.data) 160 | return module 161 | -------------------------------------------------------------------------------- /experiment5/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle=None, createTime=0): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.max_time = 50 # ms 最大容忍时间 18 | self.size = np.random.uniform(0.2, 1) # Mb 19 | self.cycle = np.random.randint(50, 100) # cycle/bit 20 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 21 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 22 | self.need_time = 0 # 需要计算时间 23 | self.hold_time = 0 # 任务在计算等待队列中得等待时间 24 | 25 | self.rate = 0 # 当前速率 26 | 27 | self.compute_resource = 0 28 | 29 | self.create_time = createTime # 任务产生时间 30 | self.pick_time = 0 # 被选择的时间(出队列时间) 31 | 32 | # 完成该任务所消耗的cup资源 33 | self.energy = 0 34 | self.trans_time = 0 # 传输所需要的时间(实际) 35 | self.precess_time = 0 # 任务处理所需要的时间(实际) 36 | -------------------------------------------------------------------------------- /experiment5/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from memory import PPOMemory 6 | from task import Task 7 | 8 | Dv = 100 # 车的最大通信范围 9 | Fv = 2000 # 车最大计算能力 MHZ 10 | MAX_TASK = 10 # 任务队列最大长度 11 | 12 | CAPACITY = 2050 # 缓冲池大小 13 | TASK_SOLT = 10 # 任务产生时隙 14 | 15 | # 等待队列最长长度 16 | MAX_QUEUE = 10 17 | 18 | np.random.seed(2) 19 | 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 21 | 22 | 23 | class Vehicle: 24 | # 位置:x,y 速度、方向:-1左,1右 25 | def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE): 26 | self.id = id 27 | # 车的位置信息 28 | self.loc_x = position[0] 29 | self.loc_y = position[1] 30 | self.position = position 31 | self.velocity = velocity # m/s 32 | self.direction = direction 33 | # 通信范围 34 | self.range = Dv 35 | # 邻居表 36 | self.neighbor = [] 37 | # mec 38 | self.Mec = None 39 | # 当前时间 40 | self.cur_frame = 0 41 | # 接受的任务的列表(最多同时处理5个任务) 42 | self.accept_task = [] 43 | # 最多处理任务量 44 | self.max_task = 3 45 | # 等待队列最长长度 46 | self.max_queue = max_queue 47 | # 等待计算的任务队列(理解为挂起状态) 48 | self.task_queue = [] 49 | # 用于奖励计算的任务队列 50 | self.task_queue_for_reward = [] 51 | # 接受任务的数量 52 | self.sum_needDeal_task = 0 53 | # 此时刻有多少动作选则我 54 | self.len_action = 0 55 | # 当前可用资源 56 | self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2) # MHz 57 | # 表示当前是否有任务正在传输给邻居车辆(0:没有,1:有) 58 | self.trans_task_for_vehicle = 0 59 | # 当前是否有任务正在传输给mec 60 | self.trans_task_for_mec = 0 61 | # 当前处理的任务(用于计算奖励,不用于状态信息) 62 | self.cur_task = None 63 | # 任务队列 64 | self.total_task = [] 65 | # 任务队列的长度 66 | self.len_task = len(self.total_task) 67 | 68 | # 自身状态信息 69 | self.self_state = [] 70 | # 当前任务队列状态 71 | self.task_state = [] 72 | # 邻居状态信息 73 | self.neighbor_state = [] 74 | # 缓冲池 75 | self.memory = PPOMemory(CAPACITY) 76 | # 总奖励 77 | self.reward = [] 78 | # 任务溢出的数量 79 | self.overflow = 0 80 | # 上一个任务产生的时间 81 | self.lastCreatWorkTime = 0 82 | 83 | # 产生任务 84 | self.create_work() 85 | 86 | # 获得位置 87 | @property 88 | def get_location(self): 89 | return self.position 90 | 91 | # 设置位置 92 | def set_location(self, loc_x, loc_y): 93 | self.loc_x = loc_x 94 | self.loc_y = loc_y 95 | self.position = [self.loc_x, self.loc_y] 96 | 97 | # 获得x 98 | @property 99 | def get_x(self): 100 | return self.loc_x 101 | 102 | # 获得y 103 | @property 104 | def get_y(self): 105 | return self.loc_y 106 | 107 | # 产生任务 传入当前时间 108 | def create_work(self): 109 | if self.id % 3 == 0: 110 | return 111 | # 每隔一段时间进行一次任务产生 112 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 113 | # # 每次有0.6的概率产生任务 114 | if np.random.random() < 0.6: 115 | if self.len_task < MAX_TASK: # 队列不满 116 | task = Task(self, self.cur_frame) 117 | self.lastCreatWorkTime = self.cur_frame 118 | self.total_task.append(task) 119 | self.len_task += 1 120 | # print("第{}辆车产生了任务".format(self.id)) 121 | self.overflow = 0 122 | else: 123 | # print("第{}辆车任务队列已满".format(self.id)) 124 | self.overflow = 1 125 | 126 | """ 127 | 获得状态 128 | """ 129 | 130 | def get_state(self): 131 | self.self_state = [] 132 | self.neighbor_state = [] 133 | self.task_state = [] 134 | 135 | # 位置信息 4 136 | self.self_state.extend(self.position) 137 | self.self_state.append(self.velocity) 138 | self.self_state.append(direction_map.get(self.direction)) 139 | 140 | # 资源信息(可用资源) 141 | self.self_state.append(self.resources) 142 | 143 | # 当前处理的任务量 144 | self.self_state.append(self.sum_needDeal_task) 145 | # 当前接受传输的任务量 146 | self.self_state.append(self.len_action) 147 | 148 | # 当前是否有任务在传输 149 | self.self_state.append(self.trans_task_for_vehicle) 150 | self.self_state.append(self.trans_task_for_mec) 151 | 152 | # 邻居表 7*数量 153 | for neighbor in self.neighbor: 154 | state = [] 155 | state.extend(neighbor.position) # 位置 156 | state.append(neighbor.velocity) # 速度 157 | state.append(direction_map.get(neighbor.direction)) # 方向 158 | state.append(neighbor.resources) # 可用资源 159 | state.append(neighbor.sum_needDeal_task) # 处理任务长度 160 | state.append(neighbor.len_action) # 当前正在接受传输任务数量 161 | self.neighbor_state.append(state) 162 | 163 | self.self_state.extend(self.Mec.state) 164 | 165 | # 任务状态信息 166 | for i in range(MAX_TASK): 167 | if i < self.len_task: 168 | task = self.total_task[i] 169 | self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 170 | else: 171 | self.task_state.append([0, 0, 0, 0]) 172 | 173 | return self.self_state 174 | -------------------------------------------------------------------------------- /experiment6/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境6 3 | (两个动作:选择任务和选择对象) 4 | 加入了mec和车在时隙内处理任务的上限(mec最多同时处理10个任务、车最多处理5个任务) 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | 为mec卸载和车辆卸载提供两种传输方式(即可同时像车辆和mec传输任务) 7 | """ 8 | import ptan 9 | import numpy as np 10 | import torch 11 | from torch.distributions.categorical import Categorical 12 | from env import Env 13 | 14 | 15 | def test_net(nets, env: Env, count=10): 16 | rewards = 0.0 17 | steps = 0 18 | for _ in range(count): 19 | env.reset() 20 | while steps < 1000: 21 | action = [] 22 | with torch.no_grad(): 23 | for vehicle in env.vehicles: 24 | state = torch.tensor(vehicle.self_state) 25 | _, pro = nets[vehicle.id](state) 26 | act = Categorical.sample(pro) 27 | action.append(act.item()) 28 | _, _, reward, _ = env.step(action) 29 | rewards += reward 30 | steps += 1 31 | return rewards / count, steps / count 32 | 33 | # def calc_logprob(pro_v, actions_v): 34 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 35 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 36 | # return p1 + p2 37 | -------------------------------------------------------------------------------- /experiment6/env_test.py: -------------------------------------------------------------------------------- 1 | from env import Env 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | print() 7 | env = Env() 8 | env.reset() 9 | 10 | # 测试网络节点数 11 | vehicles = env.vehicles 12 | 13 | for vehicle in vehicles: 14 | print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 15 | print("该车邻居:") 16 | for i in vehicle.neighbor: 17 | print(i.id, end=" ") 18 | print() 19 | 20 | # 测试环境运行 21 | reward = [] 22 | x = [[] for i in range(20)] 23 | y = [[] for i in range(20)] 24 | for i in range(1000): 25 | # for j in range(20): 26 | # x[j].append(env.vehicles[j].position[0]) 27 | # y[j].append(env.vehicles[j].position[1]) 28 | action1 = [] 29 | action2 = [] 30 | for j in range(20): 31 | # action1.append(np.random.randint(0, 10)) 32 | action1.append(0) 33 | # action2.append(np.random.randint(0, 7)) 34 | action2.append(1) 35 | Reward, _ = env.step(action1, action2) 36 | reward.append(Reward) 37 | print("第{}次平均奖励{}".format(i, Reward)) 38 | # print("当前状态:", state) 39 | # print("下一状态:", next_state) 40 | # print("车状态:", vehicleState) 41 | # print("任务状态", taskState) 42 | # print("当前奖励:", reward) 43 | # print("每个奖励,", vehicleReward) 44 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 45 | # print("average reward:", env.Reward) 46 | # plt.figure(figsize=(100, 100)) 47 | # fix, ax = plt.subplots(5, 4) 48 | # 49 | # for i in range(5): 50 | # for j in range(4): 51 | # number = i * 4 + j 52 | # ax[i, j].plot(x[number], y[number]) 53 | # ax[i, j].set_title('vehicle {}'.format(number)) 54 | plt.plot(range(len(reward)), reward) 55 | print(reward) 56 | plt.show() 57 | -------------------------------------------------------------------------------- /experiment6/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | import os 4 | import time 5 | from collections import namedtuple 6 | 7 | import ptan 8 | import torch 9 | import torch.nn.functional as F 10 | import model 11 | from env import Env 12 | from mec import MEC 13 | from vehicle import Vehicle 14 | from memory import ReplayMemory 15 | from tensorboardX import SummaryWriter 16 | from torch.distributions.categorical import Categorical 17 | 18 | ENV_ID = "computing offloading" 19 | GAMMA = 0.99 20 | GAE_LAMBDA = 0.95 21 | 22 | TRAJECTORY_SIZE = 65 23 | LEARNING_RATE_ACTOR = 1e-5 24 | LEARNING_RATE_CRITIC = 1e-4 25 | 26 | PPO_EPS = 0.2 27 | PPO_EPOCHES = 10 28 | PPO_BATCH_SIZE = 64 29 | 30 | TEST_ITERS = 10000 31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state')) # Define a transition tuple 32 | 33 | 34 | # 将list装换成tensor存入缓冲池中 35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory): 36 | reward = torch.tensor([reward]) 37 | action = torch.tensor([action]) 38 | state = torch.tensor(state) 39 | state = state.unsqueeze(0) 40 | next_state = torch.tensor(next_state) 41 | next_state = next_state.unsqueeze(0) 42 | memory.push(state, action, reward, next_state) 43 | 44 | 45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"): 46 | """ 47 | By trajectory calculate advantage and 1-step ref value 48 | :param trajectory: trajectory list 49 | :param net_crt: critic network 50 | :param states_v: states tensor 51 | :return: tuple with advantage numpy array and reference values 52 | """ 53 | values_v = net_crt(torch.tensor(states_v)) 54 | values = values_v.squeeze().data.cpu().numpy() 55 | # generalized advantage estimator: smoothed version of the advantage 56 | last_gae = 0.0 57 | result_adv = [] 58 | result_ref = [] 59 | for val, next_val, exp in zip(reversed(values[:-1]), 60 | reversed(values[1:]), 61 | reversed(trajectory[:-1])): 62 | delta = exp.vehicleReward + GAMMA * next_val - val 63 | last_gae = delta + GAMMA * GAE_LAMBDA * last_gae 64 | result_adv.append(last_gae) 65 | result_ref.append(last_gae + val) 66 | 67 | adv_v = torch.FloatTensor(list(reversed(result_adv))) 68 | ref_v = torch.FloatTensor(list(reversed(result_ref))) 69 | return adv_v.to(device), ref_v.to(device) 70 | 71 | 72 | # 将状态信息放入各自的缓冲池中 73 | def push(env, state, actions, next_state): 74 | for i, vehicle in enumerate(env.vehicles): 75 | if vehicle.task is not None: # 没有任务不算经验 76 | continue 77 | exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state) 78 | vehicle.buffer.append(exp) 79 | 80 | 81 | if __name__ == '__main__': 82 | task = MEC([10, 20]) 83 | vehicle = Vehicle(1, [10, 20], 'd') 84 | print(type(task) == MEC) 85 | print(type(task) == Vehicle) 86 | print(type(vehicle) == Vehicle) 87 | print(type(vehicle)) 88 | print(vehicle) 89 | -------------------------------------------------------------------------------- /experiment6/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 650 # MEC通信范围 /m 4 | RESOURCE = 10000 # 可用资源 MHz 5 | MAX_QUEUE = 10 6 | 7 | 8 | # 边缘服务器 9 | class MEC: 10 | def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE): 11 | self.loc_x = position[0] 12 | self.loc_y = position[1] 13 | self.loc = position 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息(最多同时处理10个任务) 20 | self.accept_task = [] 21 | # 最多处理任务量 22 | self.max_task = 10 23 | # 接受任务的数量 24 | self.sum_needDeal_task = 0 25 | # 此时刻有多少动作选则我 多少任务选择传输给我 26 | self.len_action = 0 27 | # 等待计算的任务队列(理解为挂起状态) 28 | self.task_queue = [] 29 | # 用于奖励计算的任务队列 30 | self.task_queue_for_reward = [] 31 | # 队列最长长度 32 | self.max_queue = max_queue 33 | # 当前状态 34 | self.get_state() 35 | 36 | @property 37 | def get_x(self): 38 | return self.loc_x 39 | 40 | @property 41 | def get_y(self): 42 | return self.loc_y 43 | 44 | @property 45 | def get_location(self): 46 | return self.loc 47 | 48 | """ 49 | 获得状态 50 | """ 51 | 52 | def get_state(self): 53 | """ 54 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 55 | """ 56 | self.state = [] 57 | self.state.extend(self.loc) 58 | self.state.append(self.sum_needDeal_task) 59 | self.state.append(self.len_action) 60 | self.state.append(self.resources) 61 | return self.state 62 | -------------------------------------------------------------------------------- /experiment6/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | import numpy as np 5 | 6 | Experience = namedtuple('Transition', 7 | field_names=['vehicle_state', 'neighbor_state', 'task_state', 'all_vehicle_state', 8 | 'task_action', 'aim_action', 'reward', 9 | 'next_vehicle_state', 'next_neighbor_state', 'next_task_state', 10 | 'next_all_vehicle_state']) # Define a transition tuple 11 | 12 | 13 | class ExperienceBuffer: 14 | def __init__(self, capacity): 15 | self.maxLen = capacity 16 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 17 | 18 | def __len__(self): 19 | return len(self.buffer) 20 | 21 | def append(self, experience: Experience): 22 | self.buffer.append(experience) 23 | 24 | def sample(self, batch_size): 25 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 26 | vehicle_state, neighbor_state, task_state, all_vehicle_state, \ 27 | task_action, aim_action, reward, \ 28 | next_vehicle_state, next_neighbor_state, next_task_state, next_all_vehicle_state = zip( 29 | *[self.buffer[idx] for idx in indices]) 30 | # 转换成numpy 31 | return np.array(vehicle_state), np.array(neighbor_state), \ 32 | np.array(task_state), np.array(all_vehicle_state), \ 33 | np.array(task_action), np.array(aim_action), \ 34 | np.array(reward, dtype=np.float32), \ 35 | np.array(next_vehicle_state), np.array(next_neighbor_state), \ 36 | np.array(next_task_state), np.array(next_all_vehicle_state) 37 | 38 | 39 | def clear(self): 40 | """ 41 | 清空 42 | """ 43 | self.buffer = collections.deque(maxlen=self.maxLen) 44 | -------------------------------------------------------------------------------- /experiment6/model.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | from torch.distributions.categorical import Categorical 8 | 9 | HID_SIZE = 64 10 | HID_SIZE_MIN = 32 11 | 12 | 13 | class TargetNet: 14 | """ 15 | Wrapper around model which provides copy of it instead of trained weights 16 | """ 17 | 18 | def __init__(self, model): 19 | self.model = model 20 | self.target_model = copy.deepcopy(model) 21 | 22 | def sync(self): 23 | self.target_model.load_state_dict(self.model.state_dict()) 24 | 25 | # 软更新 26 | def alpha_sync(self, alpha): 27 | """ 28 | Blend params of target net with params from the model 29 | :param alpha: 30 | """ 31 | assert isinstance(alpha, float) 32 | assert 0.0 < alpha <= 1.0 33 | state = self.model.state_dict() 34 | tgt_state = self.target_model.state_dict() 35 | for k, v in state.items(): 36 | tgt_state[k] = tgt_state[k] * alpha + (1 - alpha) * v 37 | self.target_model.load_state_dict(tgt_state) 38 | 39 | 40 | class ModelActor(nn.Module): 41 | def __init__(self, obs_dim, neighbor_dim, task_dim, task_aim_dim, act_aim_dim): 42 | super(ModelActor, self).__init__() 43 | 44 | self.cnn_task = CNNLayer(task_dim, HID_SIZE) 45 | self.cnn_neighbor = CNNLayer(neighbor_dim, HID_SIZE_MIN) 46 | self.same = nn.Sequential( 47 | nn.Linear(HID_SIZE + HID_SIZE_MIN + obs_dim, 2 * HID_SIZE), 48 | nn.ReLU(), 49 | nn.Linear(2 * HID_SIZE, HID_SIZE), 50 | nn.ReLU(), 51 | nn.Linear(HID_SIZE, 2 * HID_SIZE), 52 | nn.ReLU(), 53 | ) 54 | self.task = nn.Sequential( 55 | nn.Linear(2 * HID_SIZE, HID_SIZE), 56 | nn.ReLU(), 57 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 58 | nn.ReLU(), 59 | nn.Linear(HID_SIZE_MIN, task_aim_dim), 60 | ) 61 | self.act = nn.Sequential( 62 | nn.Linear(2 * HID_SIZE, HID_SIZE), 63 | nn.ReLU(), 64 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 65 | nn.ReLU(), 66 | nn.Linear(HID_SIZE_MIN, act_aim_dim), 67 | ) 68 | 69 | def forward(self, obs, neighbor, task, train=True): 70 | task_out = self.cnn_task(task) 71 | neighbor_out = self.cnn_neighbor(neighbor) 72 | x = torch.cat((task_out, neighbor_out, obs), -1) 73 | same_out = self.same(x) 74 | act_out = self.act(same_out) 75 | task_out = self.task(same_out) 76 | 77 | # 训练完成之后无需添加噪音 78 | if train: 79 | # act_out += torch.tensor(np.random.normal(size=act_out.shape)) 80 | # task_out += torch.tensor(np.random.normal(size=task_out.shape)) 81 | act_out = F.gumbel_softmax(act_out, hard=True) 82 | task_out = F.gumbel_softmax(task_out, hard=True) 83 | # else: 84 | # task_out = F.softmax(task_out, dim=-1) 85 | # act_out = F.softmax(act_out, dim=-1) 86 | 87 | # act_pro = F.softmax(act_out, dim=-1) 88 | # task_pro = F.softmax(task_out, dim=-1) 89 | # print(act_pro) 90 | # print(torch.sum(act_pro)) 91 | # print(task_pro) 92 | # return act_pro, task_pro # 打印网络结构用 93 | # return Categorical(task_pro), Categorical(act_pro) # 真实使用 94 | return task_out, act_out 95 | 96 | 97 | class ModelCritic(nn.Module): 98 | def __init__(self, obs_size, task_size, task_action_size, aim_action_size): 99 | super(ModelCritic, self).__init__() 100 | 101 | self.cnn = CNNLayer(obs_size, HID_SIZE) 102 | 103 | self.task_cnn = CNNLayer(task_size, HID_SIZE) 104 | 105 | self.task_value = nn.Sequential( 106 | nn.Linear(HID_SIZE * 2 + task_action_size, HID_SIZE * 2), 107 | nn.ReLU(), 108 | nn.Linear(HID_SIZE * 2, HID_SIZE), 109 | nn.ReLU(), 110 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 111 | nn.ReLU(), 112 | nn.Linear(HID_SIZE_MIN, 1), 113 | ) 114 | self.aim_value = nn.Sequential( 115 | nn.Linear(HID_SIZE * 2 + aim_action_size, HID_SIZE * 2), 116 | nn.ReLU(), 117 | nn.Linear(HID_SIZE * 2, HID_SIZE), 118 | nn.ReLU(), 119 | nn.Linear(HID_SIZE, HID_SIZE_MIN), 120 | nn.ReLU(), 121 | nn.Linear(HID_SIZE_MIN, 1), 122 | ) 123 | 124 | def forward(self, states_v, task_states_v, task_action_v, aim_action_v): 125 | cnn_out = self.cnn(states_v) 126 | task_out = self.task_cnn(task_states_v) 127 | 128 | v = torch.cat((cnn_out, task_out), -1) 129 | task_value = self.task_value(torch.cat((v, task_action_v), -1)) 130 | aim_value = self.aim_value(torch.cat((v, aim_action_v), -1)) 131 | return task_value, aim_value 132 | 133 | 134 | class CNNLayer(nn.Module): 135 | def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1): 136 | super(CNNLayer, self).__init__() 137 | 138 | active_func = [nn.Tanh(), nn.ReLU()][use_ReLU] 139 | init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal] 140 | gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU]) 141 | 142 | def init_(m): # 权重使用正交初始化,激活函数使用relu 143 | return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain) 144 | 145 | in_channels = obs_shape[0] 146 | input_width = obs_shape[1] 147 | input_height = obs_shape[2] 148 | 149 | self.cnn = nn.Sequential( 150 | init_(nn.Conv2d( 151 | in_channels=in_channels, 152 | out_channels=hidden_size // 2, 153 | kernel_size=kernel_size, 154 | stride=stride) 155 | ), 156 | active_func, 157 | # nn.AvgPool2d( 158 | # kernel_size=kernel_size, 159 | # stride=stride), 160 | # active_func, 161 | # init_(nn.Conv2d( 162 | # in_channels=3, 163 | # out_channels=1, 164 | # kernel_size=kernel_size, 165 | # stride=stride) 166 | # ), 167 | # active_func, 168 | # nn.AvgPool2d( 169 | # kernel_size=kernel_size, 170 | # stride=stride), 171 | # active_func, 172 | nn.Flatten(), 173 | init_(nn.Linear( 174 | hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride), 175 | hidden_size) 176 | ), 177 | active_func, 178 | init_(nn.Linear(hidden_size, hidden_size)), active_func) 179 | 180 | def forward(self, x): 181 | x = x / 255.0 182 | x = self.cnn(x) 183 | 184 | return x 185 | 186 | 187 | def init(module, weight_init, bias_init, gain=1): 188 | weight_init(module.weight.data, gain=gain) 189 | bias_init(module.bias.data) 190 | return module 191 | -------------------------------------------------------------------------------- /experiment6/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle=None, createTime=0): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | self.max_time = 50 # ms 最大容忍时间 18 | self.size = np.random.uniform(0.2, 1) # Mb 19 | self.cycle = np.random.randint(50, 100) # cycle/bit 20 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 21 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 22 | self.need_time = 0 # 需要计算时间 23 | self.hold_time = 0 # 任务在计算等待队列中得等待时间 24 | 25 | self.rate = 0 # 当前速率 26 | 27 | self.compute_resource = 0 28 | 29 | self.create_time = createTime # 任务产生时间 30 | self.pick_time = 0 # 被选择的时间(出队列时间) 31 | 32 | # 完成该任务所消耗的cup资源 33 | self.energy = 0 34 | self.trans_time = 0 # 传输所需要的时间(实际) 35 | self.precess_time = 0 # 任务处理所需要的时间(实际) 36 | -------------------------------------------------------------------------------- /experiment6/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import matplotlib 5 | from pylab import mpl 6 | import model 7 | from env import Env 8 | import matplotlib.pyplot as plt 9 | 10 | # 设置显示中文字体 11 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 12 | matplotlib.rcParams['axes.unicode_minus'] = False 13 | np.random.seed(2) 14 | 15 | if __name__ == '__main__': 16 | env = Env() 17 | env.reset() 18 | 19 | N = env.num_Vehicles 20 | vehicles = env.vehicles 21 | models = [] 22 | 23 | # 初始化网络 24 | TASK_DIM = 5 25 | AIM_DIM = len(vehicles[0].neighbor) + 2 26 | vehicle_shape = len(vehicles[0].self_state) 27 | neighbor_shape = np.array([vehicles[0].neighbor_state]).shape 28 | task_shape = np.array([vehicles[0].task_state]).shape 29 | for i in range(N): 30 | tgt_model = model.ModelActor(vehicle_shape, neighbor_shape, task_shape, TASK_DIM, AIM_DIM) 31 | tgt_model.load_state_dict(torch.load( 32 | "D:\pycharm\Project\VML\MyErion\experiment6\\result\\2022-12-09-05-11\\vehicle{}.pkl".format(i))) 33 | models.append(tgt_model) 34 | 35 | # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32) 36 | # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32) 37 | # taskAction, aimAction = models[0](state_v, taskState_v) 38 | 39 | vehicleReward = [] 40 | averageReward = [] 41 | for step in range(10000): 42 | action1 = [] 43 | action2 = [] 44 | 45 | for i in range(N): 46 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 47 | neighbor_state_v = torch.tensor([[vehicles[i].neighbor_state]], dtype=torch.float32) 48 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 49 | taskAction, aimAction = models[i](state_v, neighbor_state_v, taskState_v, False) 50 | 51 | # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 52 | # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 53 | taskAction = taskAction.detach().numpy().reshape(-1) 54 | aimAction = aimAction.detach().numpy().reshape(-1) 55 | action1.append(np.argmax(taskAction)) 56 | # action1.append(0) 57 | action2.append(np.argmax(aimAction)) 58 | 59 | print(action1) 60 | print(action2) 61 | Reward, reward = env.step(action1, action2) 62 | vehicleReward.append(reward[5]) 63 | averageReward.append(Reward) 64 | print("第{}次车辆平均奖励{}".format(step, Reward)) 65 | 66 | fig, aix = plt.subplots(2, 1) 67 | aix[0].plot(range(len(vehicleReward)), vehicleReward) 68 | aix[1].plot(range(len(averageReward)), averageReward) 69 | plt.show() 70 | -------------------------------------------------------------------------------- /experiment6/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from memory import ExperienceBuffer 6 | from task import Task 7 | 8 | Dv = 100 # 车的最大通信范围 9 | Fv = 4000 # 车最大计算能力 MHZ 10 | MAX_TASK = 10 # 任务队列最大长度 11 | 12 | CAPACITY = 100000 # 缓冲池大小 13 | TASK_SOLT = 10 # 任务产生时隙 14 | 15 | # 等待队列最长长度 16 | MAX_QUEUE = 10 17 | 18 | np.random.seed(2) 19 | 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 21 | 22 | 23 | class Vehicle: 24 | # 位置:x,y 速度、方向:-1左,1右 25 | def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE): 26 | self.id = id 27 | # 车的位置信息 28 | self.loc_x = position[0] 29 | self.loc_y = position[1] 30 | self.position = position 31 | self.velocity = velocity # m/s 32 | self.direction = direction 33 | # 通信范围 34 | self.range = Dv 35 | # 邻居表 36 | self.neighbor = [] 37 | # mec 38 | self.Mec = None 39 | # 当前时间 40 | self.cur_frame = 0 41 | # 接受的任务的列表(最多同时处理5个任务) 42 | self.accept_task = [] 43 | # 最多处理任务量 44 | self.max_task = 3 45 | # 等待队列最长长度 46 | self.max_queue = max_queue 47 | # 等待计算的任务队列(理解为挂起状态) 48 | self.task_queue = [] 49 | # 用于奖励计算的任务队列 50 | self.task_queue_for_reward = [] 51 | # 接受任务的数量 52 | self.sum_needDeal_task = 0 53 | # 此时刻有多少动作选则我 54 | self.len_action = 0 55 | # 当前可用资源 56 | self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2) # MHz 57 | # 表示当前是否有任务正在传输给邻居车辆(0:没有,1:有) 58 | self.trans_task_for_vehicle = 0 59 | # 当前是否有任务正在传输给mec 60 | self.trans_task_for_mec = 0 61 | # 当前处理的任务(用于计算奖励,不用于状态信息) 62 | self.cur_task = None 63 | # 任务队列 64 | self.total_task = [] 65 | # 任务队列的长度 66 | self.len_task = len(self.total_task) 67 | 68 | # 自身状态信息 69 | self.self_state = [] 70 | # 当前任务队列状态 71 | self.task_state = [] 72 | # 邻居状态信息 73 | self.neighbor_state = [] 74 | # 缓冲池 75 | self.buffer = ExperienceBuffer(CAPACITY) 76 | # 总奖励 77 | self.reward = [] 78 | # 任务溢出的数量 79 | self.overflow = 0 80 | # 上一个任务产生的时间 81 | self.lastCreatWorkTime = 0 82 | 83 | # 产生任务 84 | self.create_work() 85 | 86 | # 获得位置 87 | @property 88 | def get_location(self): 89 | return self.position 90 | 91 | # 设置位置 92 | def set_location(self, loc_x, loc_y): 93 | self.loc_x = loc_x 94 | self.loc_y = loc_y 95 | self.position = [self.loc_x, self.loc_y] 96 | 97 | # 获得x 98 | @property 99 | def get_x(self): 100 | return self.loc_x 101 | 102 | # 获得y 103 | @property 104 | def get_y(self): 105 | return self.loc_y 106 | 107 | # 产生任务 传入当前时间 108 | def create_work(self): 109 | if self.id % 3 == 0: 110 | return 111 | # 每隔一段时间进行一次任务产生 112 | if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0: 113 | # # 每次有0.6的概率产生任务 114 | if np.random.random() < 0.6: 115 | if self.len_task < MAX_TASK: # 队列不满 116 | task = Task(self, self.cur_frame) 117 | self.lastCreatWorkTime = self.cur_frame 118 | self.total_task.append(task) 119 | self.len_task += 1 120 | # print("第{}辆车产生了任务".format(self.id)) 121 | self.overflow = 0 122 | else: 123 | # print("第{}辆车任务队列已满".format(self.id)) 124 | self.overflow = 1 125 | 126 | """ 127 | 获得状态 128 | """ 129 | 130 | def get_state(self): 131 | self.self_state = [] 132 | self.neighbor_state = [] 133 | self.task_state = [] 134 | 135 | # 位置信息 4 136 | self.self_state.extend(self.position) 137 | self.self_state.append(self.velocity) 138 | self.self_state.append(direction_map.get(self.direction)) 139 | 140 | # 资源信息(可用资源) 141 | self.self_state.append(self.resources) 142 | 143 | # 当前处理的任务量 144 | self.self_state.append(self.sum_needDeal_task) 145 | # 当前接受传输的任务量 146 | self.self_state.append(self.len_action) 147 | 148 | # 当前是否有任务在传输 149 | self.self_state.append(self.trans_task_for_vehicle) 150 | self.self_state.append(self.trans_task_for_mec) 151 | 152 | # 邻居表 7*数量 153 | for neighbor in self.neighbor: 154 | state = [] 155 | state.extend(neighbor.position) # 位置 156 | state.append(neighbor.velocity) # 速度 157 | state.append(direction_map.get(neighbor.direction)) # 方向 158 | state.append(neighbor.resources) # 可用资源 159 | state.append(neighbor.sum_needDeal_task) # 处理任务长度 160 | state.append(neighbor.len_action) # 当前正在接受传输任务数量 161 | self.neighbor_state.append(state) 162 | 163 | self.self_state.extend(self.Mec.state) 164 | 165 | # 任务状态信息 166 | for i in range(MAX_TASK): 167 | if i < self.len_task: 168 | task = self.total_task[i] 169 | self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 170 | else: 171 | self.task_state.append([0, 0, 0, 0]) 172 | 173 | return self.self_state 174 | -------------------------------------------------------------------------------- /experiment7/MyQueue.py: -------------------------------------------------------------------------------- 1 | class MyQueue: 2 | def __init__(self): 3 | self.items = [] 4 | 5 | def push(self, item): 6 | self.items.append(item) 7 | 8 | def pop(self): 9 | if self.is_empty(): 10 | return None 11 | return self.items.pop(0) 12 | 13 | def peek(self): 14 | return self.items[0] 15 | 16 | def getLast(self): 17 | if self.is_empty(): 18 | return None 19 | return self.items[len(self.items) - 1] 20 | 21 | def is_empty(self): 22 | return len(self.items) == 0 23 | 24 | def size(self): 25 | return len(self.items) 26 | -------------------------------------------------------------------------------- /experiment7/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 环境4 3 | (两个动作:选择任务和选择对象) 4 | 加入了mec和车在时隙内处理任务的上限(mec最多同时处理10个任务、车最多处理5个任务) 5 | 使用经典城市道路(使用不同数量车辆和邻居) 6 | 为mec卸载和车辆卸载提供两种传输方式(即可同时像车辆和mec传输任务) 7 | """ 8 | import ptan 9 | import numpy as np 10 | import torch 11 | from torch.distributions.categorical import Categorical 12 | from env import Env 13 | 14 | 15 | def test_net(nets, env: Env, count=10): 16 | rewards = 0.0 17 | steps = 0 18 | for _ in range(count): 19 | env.reset() 20 | while steps < 1000: 21 | action = [] 22 | with torch.no_grad(): 23 | for vehicle in env.vehicles: 24 | state = torch.tensor(vehicle.self_state) 25 | _, pro = nets[vehicle.id](state) 26 | act = Categorical.sample(pro) 27 | action.append(act.item()) 28 | _, _, reward, _ = env.step(action) 29 | rewards += reward 30 | steps += 1 31 | return rewards / count, steps / count 32 | 33 | # def calc_logprob(pro_v, actions_v): 34 | # p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3)) 35 | # p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v))) 36 | # return p1 + p2 37 | -------------------------------------------------------------------------------- /experiment7/dqn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import time 4 | from collections import namedtuple 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | from pylab import mpl 13 | import matplotlib.font_manager as fm 14 | import netron 15 | 16 | from env import Env 17 | from model import DQN, DQNCNN 18 | from test.test import C 19 | 20 | np.random.seed(2) 21 | 22 | # 设置显示中文字体 23 | mpl.rcParams["font.sans-serif"] = ["SimHei"] 24 | matplotlib.rcParams['axes.unicode_minus'] = False 25 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 26 | # 加载 Times New Roman 字体 27 | font_path = 'C:/Windows/Fonts/times.ttf' 28 | prop = fm.FontProperties(fname=font_path, size=8) 29 | 30 | Experience = namedtuple('Transition', 31 | field_names=['cur_otherState', 'cur_TaskState', "cur_NeighborState", # 状态 32 | 'taskAction', 'aimAction', # 动作 33 | 'reward', # 奖励 34 | 'next_otherState', 'next_TaskState', 35 | 'next_NeighborState']) # Define a transition tuple 36 | GAMMA = 0.99 37 | BATCH_SIZE = 32 38 | REPLAY_SIZE = 100 39 | LEARNING_RATE = 1e-4 40 | SYNC_TARGET_FRAMES = 100 # 更新目标网络频率 41 | 42 | EPSILON_DECAY_LAST_FRAME = 150000 43 | EPSILON_START = 0.8 44 | EPSILON_FINAL = 0.01 45 | EPSILON = 200000 46 | 47 | RESET = 1000 # 重置游戏次数 48 | 49 | MAX_TASK = 10 # 任务队列最大长度 50 | 51 | momentum = 0.005 52 | 53 | RESOURCE = [0.2, 0.4, 0.6, 0.8] 54 | 55 | 56 | @torch.no_grad() 57 | def play_step(env, epsilon, models): 58 | vehicles = env.vehicles 59 | old_otherState = [] 60 | old_taskState = [] 61 | old_neighborState = [] 62 | 63 | actionTask = [] 64 | actionAim = [] 65 | # 贪心选择动作 66 | for i, model in enumerate(models): 67 | old_otherState.append(vehicles[i].self_state) 68 | old_taskState.append(vehicles[i].task_state) 69 | old_neighborState.append(vehicles[i].neighbor_state) 70 | if np.random.random() < epsilon: 71 | # 随机动作 72 | actionTask.append(np.random.randint(0, 5)) 73 | actionAim.append(np.random.randint(0, 7)) # local+mec+neighbor 74 | else: 75 | state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 76 | taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 77 | neighborState_v = torch.tensor([[vehicles[i].neighbor_state]], dtype=torch.float32) 78 | taskAction, aimAction = model(state_v, taskState_v, neighborState_v) 79 | 80 | taskAction = np.array(taskAction, dtype=np.float32).reshape(-1) 81 | aimAction = np.array(aimAction, dtype=np.float32).reshape(-1) 82 | 83 | actionAim.append(np.argmax(aimAction)) 84 | actionTask.append(np.argmax(taskAction)) 85 | # print("action:", action) 86 | _, _, _, otherState, _, taskState, neighborState, Reward, reward = env.step(actionTask, actionAim) 87 | # print("reward:", reward) 88 | 89 | # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作,下一其他状态、下一任务状态】 90 | for i, vehicle in enumerate(vehicles): 91 | exp = Experience(old_otherState[i], [old_taskState[i]], [old_neighborState[i]], 92 | actionTask[i], actionAim[i], 93 | reward[i], 94 | otherState[i], [taskState[i]], [neighborState[i]]) 95 | vehicle.buffer.append(exp) 96 | return round(Reward, 2) # 返回总的平均奖励 97 | 98 | 99 | # 计算一个智能体的损失 100 | def calc_loss(batch, net: DQNCNN, tgt_net: DQNCNN, device="cpu"): 101 | c=C() 102 | cur_otherState, cur_TaskState, curNeighborState, taskAction, aimAction, rewards, next_otherState, next_TaskState, next_NeighborState = batch # 103 | 104 | otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device) 105 | taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device) 106 | neighborStates_v = torch.tensor(np.array(curNeighborState, copy=False), dtype=torch.float32).to(device) 107 | # print("states_v:", states_v) # batch状态 108 | taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device) 109 | aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device) 110 | # print("actions_v", actions_v) # batch动作 111 | rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device) 112 | # print("rewards_v", rewards_v) # batch奖励 113 | next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device) 114 | next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device) 115 | next_NeighborState_v = torch.tensor(np.array(next_NeighborState, copy=False), dtype=torch.float32).to(device) 116 | # print("next_states_v", next_states_v) # batch下一个状态 117 | 118 | # 计算当前网络q值 119 | taskActionValues, aimActionValues = net(otherStates_v, 120 | taskStates_v, 121 | neighborStates_v) # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 122 | taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1) 123 | aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1) 124 | 125 | # 计算目标网络q值 126 | next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v, 127 | next_taskStates_v, 128 | next_NeighborState_v) # .max(1)[0] # 得到最大的q值 129 | 130 | next_taskActionValues = next_taskActionValues.max(1)[0].detach() 131 | next_aimActionValues = next_aimActionValues.max(1)[0].detach() 132 | 133 | # 防止梯度流入用于计算下一状态q近似值得NN 134 | # next_states_values = next_aimActionValues.detach() 135 | # print("next_states_values", next_states_values) 136 | expected_aim_values = next_aimActionValues * GAMMA + rewards_v 137 | expected_task_values = next_taskActionValues * GAMMA + rewards_v 138 | # print(" expected_state_values", expected_state_values) 139 | 140 | return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values) 141 | 142 | 143 | if __name__ == '__main__': 144 | env = Env() 145 | env.reset() 146 | 147 | frame_idx = 0 148 | # writer = SummaryWriter(comment="-" + env.__doc__) 149 | agents = env.vehicles 150 | models = [] 151 | tgt_models = [] 152 | optimizers = [] 153 | task_shape = np.array([agents[0].task_state]).shape 154 | neighbor_shape = np.array([agents[0].neighbor_state]).shape 155 | for agent in agents: 156 | # print(agent.get_location, agent.velocity) 157 | 158 | # print(task_shape) 159 | model = DQNCNN(len(agent.self_state), task_shape, neighbor_shape, MAX_TASK, len(agent.neighbor) + 2) 160 | models.append(model) 161 | optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum) 162 | optimizers.append(optimer) 163 | for agent in agents: 164 | # print(agent.get_location, agent.velocity) 165 | # task_shape = np.array([agent.task_state]).shape 166 | # print(task_shape) 167 | model = DQNCNN(len(agent.self_state), task_shape, neighbor_shape, MAX_TASK, len(agent.neighbor) + 2) 168 | model.load_state_dict(models[agent.id].state_dict()) 169 | tgt_models.append(model) 170 | 171 | # 打印网络结构 172 | # model = models[0] 173 | # state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32) 174 | # taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32) 175 | # neighbor_v = torch.tensor([[env.vehicles[0].neighbor_state]], dtype=torch.float32) 176 | # # 针对有网络模型,但还没有训练保存 .pth 文件的情况 177 | # modelpath = "./netStruct/demo.onnx" # 定义模型结构保存的路径 178 | # torch.onnx.export(model, (state_v, taskState_v, neighbor_v), modelpath) # 导出并保存 179 | # netron.start(modelpath) 180 | 181 | total_reward = [] 182 | recent_reward = [] 183 | loss_task_list = [] 184 | loss_aim_list = [] 185 | reward_1 = [] 186 | 187 | epsilon = EPSILON_START 188 | eliposde = EPSILON 189 | while eliposde > 0: 190 | frame_idx += 1 191 | # 重置游戏 192 | # if frame_idx % RESET == 0: 193 | # print("游戏重置") 194 | # # memory = [] 195 | # # for vehicle in env.vehicles: 196 | # # memory.append(vehicle.buffer) 197 | # env.reset() 198 | # agents = env.vehicles 199 | # # for i, vehicle in enumerate(agents): 200 | # # vehicle.buffer = memory[i] 201 | print("the {} steps".format(frame_idx)) 202 | epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME) 203 | reward = play_step(env, epsilon, models) 204 | total_reward.append(reward) 205 | print("current reward:", reward) 206 | print("current 100 times total rewards:", np.mean(total_reward[-100:])) 207 | recent_reward.append(np.mean(total_reward[-100:])) 208 | # if np.mean(total_reward[-100:]) > 0.7: 209 | # break 210 | 211 | for i, agent in enumerate(agents): 212 | # print("length of {} buffer".format(agent.id), len(agent.buffer)) 213 | if len(agent.buffer) < REPLAY_SIZE: # 缓冲池要足够大 214 | continue 215 | if frame_idx % SYNC_TARGET_FRAMES == 0: # 更新目标网络 216 | tgt_models[i].load_state_dict(models[i].state_dict()) 217 | optimizers[i].zero_grad() 218 | batch = agent.buffer.sample(BATCH_SIZE) 219 | loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i]) 220 | if i == 2: 221 | print("loss:", loss_task, " ", loss_aim) 222 | # loss_t.backward() 223 | torch.autograd.backward([loss_task, loss_aim]) 224 | # total_loss = 0.6 * loss_aim + 0.4 * loss_task 225 | optimizers[i].step() 226 | eliposde -= 1 227 | if frame_idx % 10000 == 0 and frame_idx != 0: 228 | cur_time = time.strftime("%Y-%m-%d-%H", time.localtime(time.time())) + "-" + str(frame_idx) 229 | # 创建文件夹 230 | os.makedirs("D:/pycharm/Project/VML/MyErion/experiment7/result/" + cur_time) 231 | for i, vehicle in enumerate(env.vehicles): 232 | # 保存每个网络模型 233 | torch.save(tgt_models[i].state_dict(), 234 | "D:/pycharm/Project/VML/MyErion/experiment7/result/" + cur_time + "/vehicle" + str( 235 | i) + ".pkl") 236 | 237 | plt.plot(range(len(recent_reward)), recent_reward) 238 | plt.ylabel("Average Reward", fontproperties=prop) 239 | plt.xlabel("Episode", fontproperties=prop) 240 | plt.show() 241 | -------------------------------------------------------------------------------- /experiment7/env_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from experiment7 import model 4 | from env import Env 5 | import os 6 | import numpy as np 7 | from mecEnv import MecEnv 8 | import matplotlib.pyplot as plt 9 | 10 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 11 | np.random.seed(2) 12 | if __name__ == '__main__': 13 | print() 14 | # env = Env() 15 | env = MecEnv() 16 | env.reset() 17 | 18 | # 测试网络节点数 19 | # task = np.array(env.taskState) 20 | # print(task.shape) 21 | vehicles = env.vehicles 22 | 23 | # for vehicle in vehicles: 24 | # print("第{}车状态:{}".format(vehicle.id, vehicle.self_state)) 25 | # print("该车邻居:") 26 | # for i in vehicle.neighbor: 27 | # print(i.id, end=" ") 28 | # print() 29 | 30 | # 测试环境运行 31 | reward = [] 32 | models = [] 33 | 34 | # task_shape = np.array([vehicles[0].task_state]).shape 35 | # for i in range(env.num_Vehicles): 36 | # # 加载模型 37 | # tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2) 38 | # tgt_model.load_state_dict( 39 | # torch.load("D:\\pycharm\\Project\\VML\\MyErion\\experiment7\\result\\2023-05-23\\vehicle{}.pkl".format(i))) 40 | # models.append(tgt_model) 41 | for step in range(100): 42 | # for j in range(20): 43 | # x[j].append(env.vehicles[j].position[0]) 44 | # y[j].append(env.vehicles[j].position[1]) 45 | action_task = [] 46 | action_aim = [] 47 | for i in range(env.num_Vehicles): 48 | # state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32) 49 | # taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32) 50 | # taskAction, aimAction = models[i](state_v, taskState_v) 51 | # 52 | # taskAction = taskAction.detach().numpy().reshape(-1) 53 | # aimAction = aimAction.detach().numpy().reshape(-1) 54 | # # ppo 55 | # action_task.append(np.argmax(taskAction)) 56 | # action_aim.append(np.argmax(aimAction)) 57 | 58 | # action_task.append(np.random.randint(0, 10)) 59 | action_task.append(0) 60 | # action_aim.append(np.random.randint(0, 7)) 61 | action_aim.append(0) 62 | # action_aim.append(1) 63 | # other_state, task_state, vehicle_state, _, _, _, _, 64 | Reward, _ = env.step(action_task, action_aim) 65 | # reward.append(Reward) 66 | # print("第{}次平均奖励{}".format(step, Reward)) 67 | # print("当前状态:", state) 68 | # print("下一状态:", next_state) 69 | # print("车状态:", vehicleState) 70 | # print("任务状态", taskState) 71 | # print("当前奖励:", reward) 72 | # print("每个奖励,", vehicleReward) 73 | # print("当前有{}任务没有传输完成".format(len(env.need_trans_task))) 74 | # print("average reward:", env.Reward) 75 | # plt.figure() 76 | # fix, ax = plt.subplots(5, 4) 77 | # 78 | # for i in range(5): 79 | # for j in range(4): 80 | # number = i * 4 + j 81 | # ax[i, j].plot(x[number], y[number]) 82 | # ax[i, j].set_title('vehicle {}'.format(number)) 83 | # plt.plot(range(len(reward)), reward) 84 | # plt.ylabel("Reward") 85 | # plt.show() 86 | 87 | plt.figure() 88 | avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg) if i % 4 != 0] 89 | plt.ylabel("sumTime") 90 | plt.bar(range(len(avg)), avg, color="blue") 91 | plt.show() 92 | 93 | plt.figure() 94 | avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg_reward) if i % 4 != 0] 95 | plt.ylabel("avg_reward") 96 | plt.plot(range(len(avg)), avg, color="blue") 97 | plt.show() 98 | 99 | plt.figure() 100 | avg = [np.mean(avg_energy) for i, avg_energy in enumerate(env.avg_energy) if i % 4 != 0] 101 | plt.ylabel("Energy") 102 | plt.bar(range(len(avg)), avg, color="blue") 103 | plt.show() 104 | # 105 | plt.figure() 106 | avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg_price) if i % 4 != 0] 107 | plt.ylabel("Price") 108 | plt.bar(range(len(avg)), avg, color="blue") 109 | plt.show() 110 | 111 | plt.figure() 112 | avg = [vehicle.success_task / vehicle.sum_create_task for i, vehicle in enumerate(env.vehicles) if i % 4 != 0] 113 | plt.ylabel("successRate") 114 | plt.bar(range(len(avg)), avg, color="blue") 115 | plt.show() 116 | 117 | # plt.figure() 118 | # plt.ylabel("transTime") 119 | # for i, time in enumerate(env.avg_trans_time): 120 | # if i % 3 != 0: 121 | # plt.plot(range(0, len(time)), time) 122 | # plt.show() 123 | -------------------------------------------------------------------------------- /experiment7/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | import os 4 | import time 5 | from collections import namedtuple 6 | 7 | import ptan 8 | import torch 9 | import torch.nn.functional as F 10 | import model 11 | from env import Env 12 | from mec import MEC 13 | from vehicle import Vehicle 14 | from memory import ReplayMemory 15 | from tensorboardX import SummaryWriter 16 | from torch.distributions.categorical import Categorical 17 | 18 | ENV_ID = "computing offloading" 19 | GAMMA = 0.99 20 | GAE_LAMBDA = 0.95 21 | 22 | TRAJECTORY_SIZE = 65 23 | LEARNING_RATE_ACTOR = 1e-5 24 | LEARNING_RATE_CRITIC = 1e-4 25 | 26 | PPO_EPS = 0.2 27 | PPO_EPOCHES = 10 28 | PPO_BATCH_SIZE = 64 29 | 30 | TEST_ITERS = 10000 31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state')) # Define a transition tuple 32 | 33 | 34 | # 将list装换成tensor存入缓冲池中 35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory): 36 | reward = torch.tensor([reward]) 37 | action = torch.tensor([action]) 38 | state = torch.tensor(state) 39 | state = state.unsqueeze(0) 40 | next_state = torch.tensor(next_state) 41 | next_state = next_state.unsqueeze(0) 42 | memory.push(state, action, reward, next_state) 43 | 44 | 45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"): 46 | """ 47 | By trajectory calculate advantage and 1-step ref value 48 | :param trajectory: trajectory list 49 | :param net_crt: critic network 50 | :param states_v: states tensor 51 | :return: tuple with advantage numpy array and reference values 52 | """ 53 | values_v = net_crt(torch.tensor(states_v)) 54 | values = values_v.squeeze().data.cpu().numpy() 55 | # generalized advantage estimator: smoothed version of the advantage 56 | last_gae = 0.0 57 | result_adv = [] 58 | result_ref = [] 59 | for val, next_val, exp in zip(reversed(values[:-1]), 60 | reversed(values[1:]), 61 | reversed(trajectory[:-1])): 62 | delta = exp.vehicleReward + GAMMA * next_val - val 63 | last_gae = delta + GAMMA * GAE_LAMBDA * last_gae 64 | result_adv.append(last_gae) 65 | result_ref.append(last_gae + val) 66 | 67 | adv_v = torch.FloatTensor(list(reversed(result_adv))) 68 | ref_v = torch.FloatTensor(list(reversed(result_ref))) 69 | return adv_v.to(device), ref_v.to(device) 70 | 71 | 72 | # 将状态信息放入各自的缓冲池中 73 | def push(env, state, actions, next_state): 74 | for i, vehicle in enumerate(env.vehicles): 75 | if vehicle.task is not None: # 没有任务不算经验 76 | continue 77 | exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state) 78 | vehicle.buffer.append(exp) 79 | 80 | 81 | if __name__ == '__main__': 82 | task = MEC([10, 20]) 83 | vehicle = Vehicle(1, [10, 20], 'd') 84 | print(type(task) == MEC) 85 | print(type(task) == Vehicle) 86 | print(type(vehicle) == Vehicle) 87 | print(type(vehicle)) 88 | print(vehicle) 89 | -------------------------------------------------------------------------------- /experiment7/mec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | RANGE_MEC = 1000 # MEC通信范围 /m 4 | RESOURCE = 10000 # 可用资源 MHz 5 | MAX_QUEUE = 40 6 | 7 | 8 | # 边缘服务器 9 | class MEC: 10 | def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE): 11 | self.loc_x = position[0] 12 | self.loc_y = position[1] 13 | self.loc = position 14 | # 当前可用资源 MHz 15 | self.resources = resources 16 | self.state = [] 17 | # 通信范围 m 18 | self.range = RANGE_MEC 19 | # 当前接到需要处理的任务信息(最多同时处理10个任务) 20 | self.accept_task = [] 21 | # 最多处理任务量 22 | self.max_task = 10 23 | # 接受任务的数量 24 | self.sum_needDeal_task = 0 25 | # 此时刻有多少动作选则我 多少任务选择传输给我 26 | # self.len_action = 0 27 | # 等待计算的任务队列(理解为挂起状态) 28 | self.task_queue = [] 29 | # 用于奖励计算的任务队列 30 | self.task_queue_for_reward = [] 31 | # 队列最长长度 32 | self.max_queue = max_queue 33 | # 当前状态 34 | self.get_state() 35 | 36 | @property 37 | def get_x(self): 38 | return self.loc_x 39 | 40 | @property 41 | def get_y(self): 42 | return self.loc_y 43 | 44 | @property 45 | def get_location(self): 46 | return self.loc 47 | 48 | """ 49 | 获得状态 50 | """ 51 | 52 | def get_state(self): 53 | """ 54 | :return:state 维度:[loc_x,loc_y,sum_needDeal_task,resources] 55 | """ 56 | self.state = [] 57 | self.state.extend(self.loc) 58 | self.state.append(self.sum_needDeal_task) 59 | # self.state.append(self.len_action) 60 | self.state.append(self.resources) 61 | return self.state 62 | -------------------------------------------------------------------------------- /experiment7/memory.py: -------------------------------------------------------------------------------- 1 | # 经验类型 2 | import collections 3 | from collections import namedtuple 4 | from random import sample 5 | import numpy as np 6 | 7 | Experience = namedtuple('Transition', 8 | field_names=['cur_otherState', 'cur_TaskState', "cur_NeighborState", # 状态 9 | 'taskAction', 'aimAction', # 动作 10 | 'reward', # 奖励 11 | 'next_otherState', 'next_TaskState', 12 | 'next_NeighborState']) # Define a transition tuple 13 | 14 | 15 | class PPOMemory: 16 | def __init__(self, batch_size): 17 | self.self_state = [] 18 | self.neighbor_state = [] 19 | self.task_state = [] 20 | self.vehicles_state = [] 21 | self.task_probs = [] 22 | self.aim_probs = [] 23 | self.vals = [] 24 | self.action = [] 25 | self.rewards = [] 26 | self.batch_size = batch_size 27 | 28 | def sample(self): 29 | batch_step = np.arange(0, len(self.self_state), self.batch_size) 30 | indices = np.arange(len(self.self_state), dtype=np.int64) 31 | # np.random.shuffle(indices) 32 | batches = [indices[i:i + self.batch_size] for i in batch_step] 33 | return np.array(self.self_state), \ 34 | np.array(self.neighbor_state), \ 35 | np.array(self.task_state), \ 36 | np.array(self.vehicles_state), \ 37 | np.array(self.task_probs), \ 38 | np.array(self.aim_probs), \ 39 | np.array(self.vals), \ 40 | np.array(self.action), \ 41 | np.array(self.rewards), \ 42 | batches 43 | 44 | def push(self, self_state, neighbor_state, task_state, vehicles_state, 45 | task_action, aim_action, 46 | task_probs, aim_probs, 47 | vals, reward): 48 | self.self_state.append(self_state) 49 | self.neighbor_state.append(neighbor_state) 50 | self.task_state.append(task_state) 51 | self.vehicles_state.append(vehicles_state) 52 | self.action.append([task_action, aim_action]) 53 | self.task_probs.append(task_probs) 54 | self.aim_probs.append(aim_probs) 55 | self.vals.append(vals) 56 | self.rewards.append(reward) 57 | 58 | def clear(self): 59 | self.self_state = [] 60 | self.neighbor_state = [] 61 | self.task_state = [] 62 | self.vehicles_state = [] 63 | self.task_probs = [] 64 | self.aim_probs = [] 65 | self.vals = [] 66 | self.action = [] 67 | self.rewards = [] 68 | 69 | 70 | class ReplayMemory(object): # Define a replay memory 71 | 72 | # 初始化缓冲池 73 | def __init__(self, capacity): 74 | # 最大容量 75 | self.capacity = capacity 76 | # 缓冲池经验 77 | self.memory = [] 78 | # ? 79 | self.position = 0 80 | 81 | # 存入经验 82 | def push(self, *args): 83 | if len(self.memory) < self.capacity: 84 | self.memory.append(None) 85 | # 存入经验 86 | self.memory[self.position] = Experience(*args) 87 | # 记录最新经验所在位置 88 | self.position = (self.position + 1) % self.capacity 89 | 90 | # 采样 91 | def sample(self, batch_size): 92 | return sample(self.memory, batch_size) 93 | 94 | def __len__(self): 95 | return len(self.memory) 96 | 97 | 98 | class ExperienceBuffer: 99 | def __init__(self, capacity): 100 | self.maxLen = capacity 101 | self.buffer = collections.deque(maxlen=capacity) # 队列,先进先出 102 | 103 | def __len__(self): 104 | return len(self.buffer) 105 | 106 | def append(self, experience: Experience): 107 | self.buffer.append(experience) 108 | 109 | def sample(self, batch_size): 110 | indices = np.random.choice(len(self.buffer), batch_size, replace=False) 111 | cur_otherState, cur_TaskState, cur_NeighborState, taskAction, aimAction, rewards, next_otherState, next_TaskState, next_NeighborState = zip( 112 | *[self.buffer[idx] for idx in indices]) 113 | # 转换成numpy 114 | return np.array(cur_otherState), np.array(cur_TaskState), np.array(cur_NeighborState), \ 115 | np.array(taskAction), np.array(aimAction), \ 116 | np.array(rewards, dtype=np.float32), \ 117 | np.array(next_otherState), np.array(next_TaskState), np.array(next_NeighborState) 118 | 119 | # 清空 120 | def clear(self): 121 | self.buffer = collections.deque(maxlen=self.maxLen) 122 | -------------------------------------------------------------------------------- /experiment7/task.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.random.seed(2) 4 | 5 | 6 | class Task: 7 | """ 8 | 定义任务类型 9 | """ 10 | 11 | def __init__(self, vehicle=None, createTime=0, flag=1): 12 | # 产生任务的车辆 13 | self.vehicle = vehicle 14 | # 完成该任务所消耗的资源 15 | self.aim = None # 传送对象 16 | 17 | if flag == 1: 18 | # 娱乐性任务 19 | self.max_time = 60 # np.random.randint(50, 70) # ms 最大容忍时间 20 | else: 21 | # 安全性任务 22 | self.max_time = 50 # np.random.randint(40, 50) 23 | self.size = np.random.uniform(0.3, 0.5) # np.random.uniform(0.5, 1) # Mb 24 | self.cycle = 40 # np.random.randint(30, 50) # cycle/bit 25 | self.need_trans_size = self.size * np.power(2, 10) # Kb 还剩余多少未传输完成 26 | self.need_precess_cycle = self.cycle * self.size * 1000 # Mb * cycle/byte =M cycle 还剩余多少轮次未完成(10^6) 27 | 28 | self.need_time = 0 # 需要计算时间 29 | self.trans_time = 0 # 需要传输的时间 30 | self.hold_time = 0 # 任务在计算等待队列中得等待时间 31 | self.wait_time = 0 # 需要等待传输的时间 32 | 33 | self.rate = 0 # 当前速率 34 | self.compute_resource = 0 # 被分配的资源 35 | 36 | self.create_time = createTime # 任务产生时间 37 | self.pick_time = 0 # 被选择的时间(出队列时间) 38 | -------------------------------------------------------------------------------- /experiment7/vehicle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from MyQueue import MyQueue 6 | from memory import ExperienceBuffer, PPOMemory 7 | from task import Task 8 | 9 | Dv = 100 # 车的最大通信范围 10 | Fv = 2000 # 车最大计算能力 MHZ 11 | MAX_TASK = 10 # 任务队列最大长度 12 | 13 | CAPACITY = 10000 # 缓冲池大小 14 | TASK_SOLT = 10 # 任务产生时隙 15 | 16 | # 等待队列最长长度 17 | MAX_QUEUE = 10 18 | 19 | np.random.seed(2) 20 | 21 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4} 22 | 23 | 24 | class Vehicle: 25 | # 位置:x,y 速度、方向:-1左,1右 26 | def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE): 27 | self.id = id 28 | # 车的位置信息 29 | self.loc_x = position[0] 30 | self.loc_y = position[1] 31 | self.position = position 32 | self.velocity = velocity # m/s 33 | self.direction = direction 34 | # 通信范围 35 | self.range = Dv 36 | # 邻居表 37 | self.neighbor = [] 38 | # mec 39 | self.Mec = None 40 | # 当前时间 41 | self.cur_frame = 0 42 | # 接受的任务的列表(最多同时处理5个任务) 43 | self.accept_task = [] 44 | # 等待计算的任务队列(理解为挂起状态) 45 | self.task_queue = [] 46 | # 用于奖励计算的任务队列 47 | self.task_queue_for_reward = [] 48 | # 最多处理任务量 49 | self.max_task = 5 50 | # 等待队列最长长度 51 | self.max_queue = max_queue 52 | # 接受任务的数量(包括处理的任务和正在等待的任务) 53 | self.sum_needDeal_task = 0 54 | # 此时刻有多少动作选择我进行卸载对象 55 | # self.len_action = 0 56 | # 当前可用资源 57 | self.resources = round((1 - np.random.randint(1, 3) / 10) * Fv, 2) # MHz 58 | # 表示当前是否有任务正在传输给邻居车辆(0:没有,1:有) 59 | self.trans_task_for_vehicle = 0 60 | self.queue_for_trans_vehicle = MyQueue() 61 | # 当前是否有任务正在传输给mec 62 | self.trans_task_for_mec = 0 63 | self.queue_for_trans_mec = MyQueue() 64 | # 当前处理的任务(用于计算奖励,不用于状态信息) 65 | self.cur_task = None 66 | # 任务队列 67 | self.total_task = [] 68 | # 任务队列的长度 69 | self.len_task = len(self.total_task) 70 | # 卸载成功率 71 | self.success_rate = 0 72 | self.success_task = 0 73 | self.sum_create_task = 0 74 | 75 | # 当前状态信息 76 | self.self_state = [] 77 | # 当前任务队列状态 78 | self.task_state = [] 79 | # 邻居车状态 80 | self.neighbor_state = [] 81 | # 去除邻居的状态信息用于邻居车观察和全局critic的处理 82 | self.excludeNeighbor_state = [] 83 | # 缓冲池 84 | self.buffer = ExperienceBuffer(capacity=CAPACITY) 85 | # 总奖励 86 | self.reward = [] 87 | # 任务溢出的数量 88 | self.overflow = 0 89 | # 上一个任务产生的时间 90 | self.lastCreatWorkTime = 0 91 | 92 | self.timeSolt = TASK_SOLT # * (id % 2 + 1) 93 | self.memory = PPOMemory(CAPACITY) 94 | # 产生任务 95 | self.create_work() 96 | 97 | # 获得位置 98 | @property 99 | def get_location(self): 100 | return self.position 101 | 102 | # 设置位置 103 | def set_location(self, loc_x, loc_y): 104 | self.loc_x = loc_x 105 | self.loc_y = loc_y 106 | self.position = [self.loc_x, self.loc_y] 107 | 108 | # 获得x 109 | @property 110 | def get_x(self): 111 | return self.loc_x 112 | 113 | # 获得y 114 | @property 115 | def get_y(self): 116 | return self.loc_y 117 | 118 | # 产生任务 传入当前时间 119 | def create_work(self): 120 | if self.id % 3 == 0: 121 | return 122 | # 每隔一段时间进行一次任务产生 123 | if (self.cur_frame - self.lastCreatWorkTime) % self.timeSolt == 0: 124 | # # 每次有0.6的概率产生任务 125 | # if np.random.random() < 0.8: 126 | if self.len_task < MAX_TASK: # 队列不满 127 | if self.cur_frame % 3 == 0: 128 | task = Task(self, self.cur_frame % 1000, 2) 129 | else: 130 | task = Task(self, self.cur_frame % 1000) 131 | self.sum_create_task += 1 132 | self.lastCreatWorkTime = self.cur_frame 133 | self.total_task.append(task) 134 | self.len_task += 1 135 | self.overflow = 0 136 | else: 137 | self.overflow = 1 138 | # 创建第二个任务 139 | # if np.random.random() > 0.5: 140 | # if self.len_task < MAX_TASK: # 队列不满 141 | # task = Task(self, self.cur_frame % 1000) 142 | # self.sum_create_task += 1 143 | # self.lastCreatWorkTime = self.cur_frame 144 | # self.total_task.append(task) 145 | # self.len_task += 1 146 | # # print("第{}辆车产生了任务".format(self.id)) 147 | # self.overflow = 0 148 | 149 | """ 150 | 获得状态 151 | """ 152 | 153 | def get_state(self): 154 | self.self_state = [] 155 | self.neighbor_state = [] 156 | self.excludeNeighbor_state = [] 157 | self.task_state = [] 158 | 159 | # 位置信息 4 160 | self.self_state.extend(self.position) 161 | self.self_state.append(self.velocity) 162 | self.self_state.append(direction_map.get(self.direction)) 163 | self.excludeNeighbor_state.extend(self.position) 164 | self.excludeNeighbor_state.append(self.velocity) 165 | self.excludeNeighbor_state.append(direction_map.get(self.direction)) 166 | 167 | # 资源信息(可用资源) 168 | self.self_state.append(self.resources) 169 | self.excludeNeighbor_state.append(self.resources) 170 | 171 | # 当前处理的任务量 172 | self.self_state.append(self.sum_needDeal_task) 173 | self.excludeNeighbor_state.append(self.sum_needDeal_task) 174 | # 当前接受传输的任务量 175 | # self.self_state.append(self.len_action) 176 | # self.excludeNeighbor_state.append(self.len_action) 177 | 178 | # 当前是否有任务在传输 179 | self.excludeNeighbor_state.append(self.queue_for_trans_vehicle.size()) 180 | self.excludeNeighbor_state.append(self.queue_for_trans_mec.size()) 181 | self.self_state.append(self.queue_for_trans_vehicle.size()) 182 | self.self_state.append(self.queue_for_trans_mec.size()) 183 | 184 | # 当前任务数量 185 | self.self_state.append(self.len_task) 186 | self.excludeNeighbor_state.append(self.len_task) 187 | 188 | # 邻居表 7*数量 189 | for neighbor in self.neighbor: 190 | state = [] 191 | state.extend(neighbor.position) # 位置 192 | state.append(neighbor.velocity) # 速度 193 | state.append(direction_map.get(neighbor.direction)) # 方向 194 | state.append(neighbor.resources) # 可用资源 195 | state.append(neighbor.sum_needDeal_task) # 处理任务长度 196 | # self.self_state.append(neighbor.len_action) # 当前正在传输任务数量 197 | self.neighbor_state.append(state) 198 | 199 | self.self_state.extend(self.Mec.state) 200 | 201 | # 任务状态信息 202 | for i in range(MAX_TASK): 203 | if i < self.len_task: 204 | task = self.total_task[i] 205 | self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time]) 206 | else: 207 | self.task_state.append([0, 0, 0, 0]) 208 | 209 | return self.excludeNeighbor_state 210 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | class A: 2 | name='chen' 3 | def __init__(self,name): 4 | self.name=name 5 | 6 | def get_str(self): 7 | print("A.name"+self.name) 8 | 9 | 10 | 11 | class B: 12 | name='yu' 13 | def __init__(self,name): 14 | self.name=name 15 | 16 | def get_str(self): 17 | print("B.name"+self.name) 18 | 19 | class C(A,B): 20 | name='hao' 21 | def __init__(self, name): 22 | super().__init__(name) 23 | self.name=name 24 | def __init__(self): 25 | return 26 | 27 | def get_str(self): 28 | print("C.name"+self.name) 29 | 30 | if __name__ == '__main__': 31 | c=C(); 32 | c.get_str() -------------------------------------------------------------------------------- /test/test2.py: -------------------------------------------------------------------------------- 1 | from numpy import array 2 | 3 | from test.test import C 4 | 5 | array(1,2,3) --------------------------------------------------------------------------------