├── .gitignore
├── __init__.py
├── experiment
    ├── __init__.py
    ├── dqn.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    └── vehicle.py
├── experiment2
    ├── __init__.py
    ├── dqn.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    ├── test.py
    └── vehicle.py
├── experiment3
    ├── __init__.py
    ├── dqn.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    ├── test.py
    └── vehicle.py
├── experiment4
    ├── __init__.py
    ├── dqn.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    ├── test.py
    └── vehicle.py
├── experiment5
    ├── __init__.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mappo.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    └── vehicle.py
├── experiment6
    ├── __init__.py
    ├── env.py
    ├── env_test.py
    ├── maddpg.py
    ├── main.py
    ├── mec.py
    ├── memory.py
    ├── model.py
    ├── task.py
    ├── test.py
    └── vehicle.py
├── experiment7
    ├── MyQueue.py
    ├── __init__.py
    ├── dqn.py
    ├── env.py
    ├── env_test.py
    ├── main.py
    ├── mappo.py
    ├── mec.py
    ├── mecEnv.py
    ├── memory.py
    ├── model.py
    ├── task.py
    ├── test.py
    └── vehicle.py
└── test
    ├── test.py
    └── test2.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /result/
2 | /experiment/runs
3 | /experiment/saves
4 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuhaoCYH/DRL/7d105abdf5a938e0a0c55671528eac256c936704/__init__.py


--------------------------------------------------------------------------------
/experiment/__init__.py:
--------------------------------------------------------------------------------
 1 | import ptan
 2 | import numpy as np
 3 | import torch
 4 | from torch.distributions.categorical import Categorical
 5 | from experiment.env import Env
 6 | 
 7 | 
 8 | def test_net(nets, env: Env, count=10):
 9 |     rewards = 0.0
10 |     steps = 0
11 |     for _ in range(count):
12 |         env.reset()
13 |         while steps < 1000:
14 |             action = []
15 |             with torch.no_grad():
16 |                 for vehicle in env.vehicles:
17 |                     state = torch.tensor(vehicle.self_state)
18 |                     _, pro = nets[vehicle.id](state)
19 |                     act = Categorical.sample(pro)
20 |                     action.append(act.item())
21 |             _, _, reward, _ = env.step(action)
22 |             rewards += reward
23 |             steps += 1
24 |     return rewards / count, steps / count
25 | 
26 | # def calc_logprob(pro_v, actions_v):
27 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
28 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
29 | #     return p1 + p2
30 | 


--------------------------------------------------------------------------------
/experiment/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | from collections import namedtuple
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | from pylab import mpl
 12 | import netron
 13 | 
 14 | from env import Env
 15 | from model import DQN
 16 | 
 17 | # 设置显示中文字体
 18 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
 19 | 
 20 | Experience = namedtuple('Transition',
 21 |                         field_names=['cur_otherState', 'cur_TaskState',  # 状态
 22 |                                      'taskAction', 'aimAction', 'resourceAction',  # 动作
 23 |                                      'reward',  # 奖励
 24 |                                      'next_otherState', 'next_TaskState'])  # Define a transition tuple
 25 | GAMMA = 0.99
 26 | BATCH_SIZE = 64
 27 | REPLAY_SIZE = 10000
 28 | LEARNING_RATE = 1e-4
 29 | SYNC_TARGET_FRAMES = 1000
 30 | 
 31 | EPSILON_DECAY_LAST_FRAME = 150000
 32 | EPSILON_START = 0.6
 33 | EPSILON_FINAL = 0.01
 34 | 
 35 | RESET = 10000  # 重置游戏次数
 36 | 
 37 | MAX_TASK = 10  # 任务队列最大长度
 38 | 
 39 | momentum = 0.005
 40 | 
 41 | RESOURCE = [0.2, 0.4, 0.6, 0.8]
 42 | 
 43 | 
 44 | @torch.no_grad()
 45 | def play_step(env, epsilon, models, device="cpu"):
 46 |     vehicles = env.vehicles
 47 |     old_otherState = []
 48 |     old_taskState = []
 49 | 
 50 |     actionTask = []
 51 |     actionAim = []
 52 |     actionResource = []
 53 |     # 贪心选择动作
 54 |     for i, model in enumerate(models):
 55 |         old_otherState.append(vehicles[i].self_state)
 56 |         old_taskState.append(vehicles[i].task_state)
 57 |         if np.random.random() < epsilon:
 58 |             # 随机动作
 59 |             actionTask.append(np.random.randint(0, 10))
 60 |             actionAim.append(np.random.randint(0, 7))  # local+mec+neighbor
 61 |             actionResource.append(round(np.random.random(), 1))
 62 |         else:
 63 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 64 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 65 |             taskAction, aimAction, resourceAction = model(state_v, taskState_v)
 66 | 
 67 |             taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
 68 |             aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
 69 |             resourceAction = np.array(resourceAction, dtype=np.float32).reshape(-1)
 70 | 
 71 |             actionAim.append(np.argmax(aimAction))
 72 |             actionTask.append(np.argmax(taskAction))
 73 |             actionResource.append(RESOURCE[np.argmax(resourceAction)])
 74 |     # print("action:", action)
 75 |     _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim, actionResource)
 76 |     # print("reward:", reward)
 77 | 
 78 |     # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作，下一其他状态、下一任务状态】
 79 |     for i, vehicle in enumerate(vehicles):
 80 |         exp = Experience(old_otherState[i], [old_taskState[i]],
 81 |                          actionTask[i], actionAim[i], actionResource[i],
 82 |                          reward[i],
 83 |                          otherState[i], [taskState[i]])
 84 |         vehicle.buffer.append(exp)
 85 |     return round(Reward, 2)  # 返回总的平均奖励
 86 | 
 87 | 
 88 | # 计算一个智能体的损失
 89 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"):
 90 |     cur_otherState, cur_TaskState, taskAction, aimAction, resourceAction, rewards, next_otherState, next_TaskState = batch  #
 91 | 
 92 |     otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device)
 93 |     taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device)
 94 |     # print("states_v:", states_v)  # batch状态
 95 |     taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device)
 96 |     aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device)
 97 |     resourceAction_v = torch.tensor(np.array(resourceAction), dtype=torch.int64).to(device)
 98 |     # print("actions_v", actions_v)  # batch动作
 99 |     rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device)
100 |     # print("rewards_v", rewards_v)  # batch奖励
101 |     next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device)
102 |     next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device)
103 |     # print("next_states_v", next_states_v)  # batch下一个状态
104 | 
105 |     # 计算当前网络q值
106 |     taskActionValues, aimActionValues, resourceActionValues = net(otherStates_v,
107 |                                                                   taskStates_v)  # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
108 |     taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1)
109 |     aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
110 |     resourceActionValues = resourceActionValues.gather(1, resourceAction_v.unsqueeze(-1)).squeeze(-1)
111 | 
112 |     # 计算目标网络q值
113 |     next_taskActionValues, next_aimActionValues, next_resourceActionValues = tgt_net(next_otherStates_v,
114 |                                                                                      next_taskStates_v)  # .max(1)[0]  # 得到最大的q值
115 | 
116 |     next_taskActionValues = next_taskActionValues.max(1)[0].detach()
117 |     next_aimActionValues = next_aimActionValues.max(1)[0].detach()
118 |     next_resourceActionValues = next_resourceActionValues.max(1)[0].detach()
119 | 
120 |     # 防止梯度流入用于计算下一状态q近似值得NN
121 |     # next_states_values = next_aimActionValues.detach()
122 |     # print("next_states_values", next_states_values)
123 |     expected_aim_values = next_aimActionValues * GAMMA + rewards_v
124 |     expected_task_values = next_taskActionValues * GAMMA + rewards_v
125 |     expected_resource_values = next_resourceActionValues * GAMMA + rewards_v
126 |     # print(" expected_state_values", expected_state_values)
127 | 
128 |     return nn.MSELoss()(taskActionValues, expected_task_values) + \
129 |            nn.MSELoss()(aimActionValues, expected_aim_values) + \
130 |            nn.MSELoss()(resourceActionValues, expected_resource_values)
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     env = Env()
135 |     env.reset()
136 | 
137 |     frame_idx = 0
138 |     # writer = SummaryWriter(comment="-" + env.__doc__)
139 |     agents = env.vehicles
140 |     models = []
141 |     tgt_models = []
142 |     optimizers = []
143 |     for agent in agents:
144 |         # print(agent.get_location, agent.velocity)
145 |         task_shape = np.array([agent.task_state]).shape
146 |         # print(task_shape)
147 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2, len(RESOURCE))
148 |         models.append(model)
149 |         optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum)
150 |         optimizers.append(optimer)
151 |     for agent in agents:
152 |         # print(agent.get_location, agent.velocity)
153 |         task_shape = np.array([agent.task_state]).shape
154 |         # print(task_shape)
155 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2, len(RESOURCE))
156 |         model.load_state_dict(models[agent.id].state_dict())
157 |         tgt_models.append(model)
158 | 
159 |     # 打印网络结构
160 |     model = models[0]
161 |     state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32)
162 |     taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32)
163 |     # 针对有网络模型，但还没有训练保存 .pth 文件的情况
164 |     modelpath = "./netStruct/demo.onnx"  # 定义模型结构保存的路径
165 |     torch.onnx.export(model, (state_v, taskState_v), modelpath)  # 导出并保存
166 |     netron.start(modelpath)
167 | 
168 |     total_reward = []
169 |     recent_reward = []
170 |     loss_1 = []
171 |     reward_1 = []
172 | 
173 |     epsilon = EPSILON_START
174 |     eliposde = 150000
175 |     while eliposde > 0:
176 |         # 重置游戏
177 |         if frame_idx % RESET == 0:
178 |             print("游戏重置")
179 |             env.reset()
180 | 
181 |         frame_idx += 1
182 |         print("the {} steps".format(frame_idx))
183 |         epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)
184 |         reward = play_step(env, epsilon, models)
185 |         total_reward.append(reward)
186 |         print("current reward:", reward)
187 |         print("current 100 times total rewards:", np.mean(total_reward[-100:]))
188 |         recent_reward.append(np.mean(total_reward[-100:]))
189 |         if np.mean(total_reward[-100:]) > 0.5:
190 |             break
191 | 
192 |         for i, agent in enumerate(agents):
193 |             # print("length of {} buffer".format(agent.id), len(agent.buffer))
194 |             if len(agent.buffer) < REPLAY_SIZE:  # 缓冲池要足够大
195 |                 continue
196 |             if frame_idx % SYNC_TARGET_FRAMES == 0:  # 更新目标网络
197 |                 tgt_models[i].load_state_dict(models[i].state_dict())
198 |             optimizers[i].zero_grad()
199 |             batch = agent.buffer.sample(BATCH_SIZE)
200 |             loss_t = calc_loss(batch, models[i], tgt_models[i])
201 |             # print("loss:", loss_t)
202 |             loss_t.backward()
203 |             optimizers[i].step()
204 |             if agent.id == 0:
205 |                 print("cur_loss:", loss_t.item())
206 |                 loss_1.append(loss_t.item())
207 |                 reward_1.append(env.reward[0])
208 |         eliposde -= 1
209 | 
210 |     cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time()))
211 |     # 创建文件夹
212 |     os.makedirs("D:/pycharm/Project/VML/MyErion/experiment/result/" + cur_time)
213 |     for i, vehicle in enumerate(env.vehicles):
214 |         # 保存每个网络模型
215 |         torch.save(models[i].state_dict(),
216 |                    "D:/pycharm/Project/VML/MyErion/experiment/result/" + cur_time + "/vehicle" + str(i) + ".pkl")
217 | 
218 |     plt.plot(range(len(recent_reward)), recent_reward)
219 |     plt.title("奖励曲线")
220 |     plt.show()
221 | 
222 |     plt.plot(range(len(loss_1)), loss_1)
223 |     plt.title("损失曲线")
224 |     plt.show()
225 | 
226 |     plt.plot(range(1000), reward_1[-1000:])
227 |     plt.title("车辆一奖励曲线")
228 |     plt.show()
229 | 


--------------------------------------------------------------------------------
/experiment/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | 
 4 | if __name__ == '__main__':
 5 |     print()
 6 |     env = Env()
 7 |     env.reset()
 8 |     # 测试找最邻近的mec
 9 |     # for vehicle in env.vehicles:
10 |     #     print("vehicle{} location:".format(vehicle.id),vehicle.get_location)
11 |     # for mec in env.MECs:
12 |     #     print("mec{} location:".format(mec.id),mec.get_location)
13 |     # for vehicle in env.vehicles:
14 |     #     print(vehicle.mec_lest.get_location, end="  ")
15 | 
16 |     # 测试网络节点数
17 |     task = np.array(env.taskState)
18 |     print(task.size)
19 |     print(task.shape)
20 |     vehicles = env.vehicles
21 |     # print(vehicles[0].actor1)
22 |     # print(vehicles[0].target_actor1)
23 |     # print(vehicles[0].state)
24 |     # print(vehicles[0].get_state())
25 |     # print(len(vehicles[0].state))
26 |     # print(len(env.state))
27 | 
28 |     # 测试更新邻居表
29 |     # for vehicle in vehicles:
30 |     #     print(vehicle.get_location)
31 |     #
32 |     # print("-----------------------------------")
33 |     # for vehicle in vehicles:
34 |     #     for i in vehicle.neighbor:
35 |     #         print(i.id, end=" ")
36 |     #     print()
37 |     # 测试更新total——task
38 |     # list = [vehicles[0],vehicles[1],vehicles[2],vehicles[3],vehicles[4]]
39 |     # print(list)
40 |     # for i in reversed(list):
41 |     #     if i.id >=2:
42 |     #         list.remove(i)
43 |     #     else:
44 |     #         break
45 |     # print(list)
46 |     # list=[[]]*5
47 |     # print(list)
48 |     for vehicle in vehicles:
49 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
50 |         print("该车邻居:")
51 |         for i in vehicle.neighbor:
52 |             print(i.id, end="  ")
53 |         print()
54 | 
55 |     # 测试环境运行
56 |     for i in range(1000):
57 |         action1 = []
58 |         action2 = []
59 |         action3 = []
60 |         for j in range(20):
61 |             action1.append(0)
62 |             # action2.append(np.random.randint(0, 7))
63 |             action2.append(0)
64 |             # action3.append(round(np.random.random(), 2))
65 |             action3.append(0.8)
66 |         env.step(action1, action2, action3)
67 |         # print("当前状态:", state)
68 |         # print("下一状态:", next_state)
69 |         # print("车状态:", vehicleState)
70 |         # print("任务状态", taskState)
71 |         # print("当前奖励:", reward)
72 |         # print("每个奖励,", vehicleReward)
73 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
74 |         # print("average reward:", env.Reward)
75 | 


--------------------------------------------------------------------------------
/experiment/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 200  # MEC通信范围
 4 | RESOURCE = 20000  # 可用资源  MHz
 5 | 
 6 | 
 7 | # 边缘服务器
 8 | class MEC:
 9 |     def __init__(self, id, loc_x, loc_y, resources=RESOURCE):
10 |         self.loc_x = loc_x
11 |         self.loc_y = loc_y
12 |         self.loc = [self.loc_x, self.loc_y]
13 |         self.id = id
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息
20 |         self.accept_task = []
21 |         # 接受任务的数量
22 |         self.sum_needDeal_task = 0
23 |         # 此时刻有多少动作选则我 多少任务正在传输给我
24 |         self.len_action = 0
25 |         # 当前时间
26 |         self.cur_frame = 0
27 |         # 当前状态
28 |         self.get_state()
29 | 
30 |     @property
31 |     def get_x(self):
32 |         return self.loc_x
33 | 
34 |     @property
35 |     def get_y(self):
36 |         return self.loc_y
37 | 
38 |     @property
39 |     def get_location(self):
40 |         return self.loc
41 | 
42 |     """
43 |         获得状态
44 |     """
45 | 
46 |     def get_state(self):
47 |         """
48 |         :return:state 维度：1+2+2 3维[id，loc_x,loc_y,resources]
49 |         """
50 |         self.state = []
51 |         self.state.extend(self.loc)
52 |         self.state.append(self.resources)
53 |         return self.state
54 | 
55 | 
56 | # 测试
57 | if __name__ == '__main__':
58 |     mec = MEC(10, 10, 1)
59 |     # vehicles = []
60 |     # for i in range(40):
61 |     #     vehicle = Vehicle(i, random.randint(1, 5), random.randint(1, 5), random.randint(0, 4))
62 |     #     vehicle.creat_work()
63 |     #     vehicles.append(vehicle)
64 |     # for i, vehicle in enumerate(vehicles):
65 |     #     print("v{}.get_state():{}".format(i, vehicle.get_state()))
66 |     # print("mec.get_state():", mec.get_state(), mec.cur_frame)
67 |     # mec.get_task([2] * 40, vehicles)
68 |     # print("mec.received_task:", mec.received_task)
69 |     # print("resources:", mec.resources)
70 |     # mec.renew_resources(1)
71 |     # print("after received_task:", mec.received_task)
72 |     # print("after resources:", mec.resources)
73 |     # print("renew_state", mec.renew_state(1, [1, 2, 2], vehicles), mec.cur_frame)
74 |     print(mec.get_location)
75 | 


--------------------------------------------------------------------------------
/experiment/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | from random import sample
 5 | import numpy as np
 6 | 
 7 | Experience = namedtuple('Transition',
 8 |                         field_names=['state', 'action', 'reward', 'next_state'])  # Define a transition tuple
 9 | 
10 | 
11 | class ReplayMemory(object):  # Define a replay memory
12 | 
13 |     # 初始化缓冲池
14 |     def __init__(self, capacity):
15 |         # 最大容量
16 |         self.capacity = capacity
17 |         # 缓冲池经验
18 |         self.memory = []
19 |         # ？
20 |         self.position = 0
21 | 
22 |     # 存入经验
23 |     def push(self, *args):
24 |         if len(self.memory) < self.capacity:
25 |             self.memory.append(None)
26 |             # 存入经验
27 |         self.memory[self.position] = Experience(*args)
28 |         # 记录最新经验所在位置
29 |         self.position = (self.position + 1) % self.capacity
30 | 
31 |     # 采样
32 |     def sample(self, batch_size):
33 |         return sample(self.memory, batch_size)
34 | 
35 |     def __len__(self):
36 |         return len(self.memory)
37 | 
38 | 
39 | class ExperienceBuffer:
40 |     def __init__(self, capacity):
41 |         self.maxLen = capacity
42 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
43 | 
44 |     def __len__(self):
45 |         return len(self.buffer)
46 | 
47 |     def append(self, experience: Experience):
48 |         self.buffer.append(experience)
49 | 
50 |     def sample(self, batch_size):
51 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
52 |         cur_otherState, cur_TaskState, taskAction, aimAction, resourceAction, rewards, next_otherState, next_TaskState = zip(
53 |             *[self.buffer[idx] for idx in indices])
54 |         # 转换成numpy
55 |         return np.array(cur_otherState), np.array(cur_TaskState), \
56 |                np.array(taskAction), np.array(aimAction), np.array(resourceAction), \
57 |                np.array(rewards, dtype=np.float32), \
58 |                np.array(next_otherState), np.array(next_TaskState)
59 | 
60 |     # 清空
61 |     def clear(self):
62 |         self.buffer = collections.deque(maxlen=self.maxLen)
63 | 


--------------------------------------------------------------------------------
/experiment/model.py:
--------------------------------------------------------------------------------
  1 | import ptan
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.distributions.categorical import Categorical
  7 | 
  8 | HID_SIZE = 64
  9 | 
 10 | 
 11 | class ModelActor(nn.Module):
 12 |     def __init__(self, obs_dim, act_dim, task_dim):
 13 |         super(ModelActor, self).__init__()
 14 |         self.act_dim = act_dim
 15 | 
 16 |         self.mu = nn.Sequential(
 17 |             nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2),
 18 |             nn.ReLU(),
 19 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 20 |             nn.ReLU(),
 21 |             nn.Linear(HID_SIZE, act_dim * 2),
 22 |         )
 23 |         self.cnn = CNNLayer(task_dim, HID_SIZE)
 24 | 
 25 |     def forward(self, x, task):
 26 |         cnn_out = self.cnn(task)
 27 |         x = torch.cat((x, cnn_out), -1)
 28 |         out = self.mu(x)
 29 |         action_out = out[:, :self.act_dim]
 30 |         task_out = out[:, self.act_dim:]
 31 |         action_pro = F.softmax(action_out, dim=-1)
 32 |         task_pro = F.softmax(task_out, dim=1)
 33 |         print(action_pro)
 34 |         print(task_pro)
 35 |         return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro)
 36 | 
 37 | 
 38 | class ModelCritic(nn.Module):
 39 |     def __init__(self, obs_size):
 40 |         super(ModelCritic, self).__init__()
 41 | 
 42 |         self.value = nn.Sequential(
 43 |             nn.Linear(obs_size, HID_SIZE),
 44 |             nn.ReLU(),
 45 |             nn.Linear(HID_SIZE, HID_SIZE),
 46 |             nn.ReLU(),
 47 |             nn.Linear(HID_SIZE, 1),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.value(x)
 52 | 
 53 | 
 54 | class ModelSACTwinQ(nn.Module):
 55 |     def __init__(self, obs_size, act_size):
 56 |         super(ModelSACTwinQ, self).__init__()
 57 | 
 58 |         self.q1 = nn.Sequential(
 59 |             nn.Linear(obs_size + act_size, HID_SIZE),
 60 |             nn.ReLU(),
 61 |             nn.Linear(HID_SIZE, HID_SIZE),
 62 |             nn.ReLU(),
 63 |             nn.Linear(HID_SIZE, 1),
 64 |         )
 65 | 
 66 |         self.q2 = nn.Sequential(
 67 |             nn.Linear(obs_size + act_size, HID_SIZE),
 68 |             nn.ReLU(),
 69 |             nn.Linear(HID_SIZE, HID_SIZE),
 70 |             nn.ReLU(),
 71 |             nn.Linear(HID_SIZE, 1),
 72 |         )
 73 | 
 74 |     def forward(self, obs, act):
 75 |         x = torch.cat([obs, act], dim=1)
 76 |         return self.q1(x), self.q2(x)
 77 | 
 78 | 
 79 | class AgentDDPG(ptan.agent.BaseAgent):
 80 |     """
 81 |     Agent implementing Orstein-Uhlenbeck exploration process
 82 |     """
 83 | 
 84 |     def __init__(self, net, device="cpu", ou_enabled=True,
 85 |                  ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2,
 86 |                  ou_epsilon=1.0):
 87 |         self.net = net
 88 |         self.device = device
 89 |         self.ou_enabled = ou_enabled
 90 |         self.ou_mu = ou_mu
 91 |         self.ou_teta = ou_teta
 92 |         self.ou_sigma = ou_sigma
 93 |         self.ou_epsilon = ou_epsilon
 94 | 
 95 |     def initial_state(self):
 96 |         return None
 97 | 
 98 |     def __call__(self, states, agent_states):
 99 |         states_v = ptan.agent.float32_preprocessor(states)
100 |         states_v = states_v.to(self.device)
101 |         mu_v = self.net(states_v)
102 |         actions = mu_v.data.cpu().numpy()
103 | 
104 |         if self.ou_enabled and self.ou_epsilon > 0:
105 |             new_a_states = []
106 |             for a_state, action in zip(agent_states, actions):
107 |                 if a_state is None:
108 |                     a_state = np.zeros(
109 |                         shape=action.shape, dtype=np.float32)
110 |                 a_state += self.ou_teta * (self.ou_mu - a_state)
111 |                 a_state += self.ou_sigma * np.random.normal(
112 |                     size=action.shape)
113 | 
114 |                 action += self.ou_epsilon * a_state
115 |                 new_a_states.append(a_state)
116 |         else:
117 |             new_a_states = agent_states
118 | 
119 |         actions = np.clip(actions, -1, 1)
120 |         return actions, new_a_states
121 | 
122 | 
123 | class DQN(nn.Module):
124 |     def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim, resourceAction_dim):
125 |         super(DQN, self).__init__()
126 |         self.input_layer = nn.Linear(obs_dim + 32, 128)
127 |         self.hidden1 = nn.Linear(128, 64)
128 |         self.hidden2 = nn.Linear(64, 64)
129 |         self.hidden3 = nn.Linear(64, 128)
130 |         self.cnn = CNNLayer(task_dim, 32)
131 |         self.output_layer1 = self.common(64, taskAction_dim)
132 |         self.output_layer2 = self.common(64, aimAction_dim)
133 |         self.output_layer3 = self.common(64, resourceAction_dim)
134 | 
135 |     def common(self, input_dim, action_dim):
136 |         return nn.Sequential(
137 |             nn.Linear(input_dim, 128),
138 |             nn.ReLU(),
139 |             self.hidden1,
140 |             nn.ReLU(),
141 |             self.hidden2,
142 |             nn.ReLU(),
143 |             nn.Linear(64, action_dim)
144 |         )
145 | 
146 |     def forward(self, x, task):
147 |         """
148 | 
149 |         :param x: batch_size*state_n
150 |         :return: batch_size*actions_n  输出每个动作对应的q值
151 |         """
152 |         # 任务卷积层
153 |         cnn_out = self.cnn(task)
154 |         x = torch.cat((x, cnn_out), -1)
155 | 
156 |         # 公共层
157 |         x1 = F.relu(self.input_layer(x))
158 |         x2 = F.relu(self.hidden1(x1))
159 |         x3 = F.relu(self.hidden2(x2))
160 | 
161 |         taskActionValue = self.output_layer1(x3)
162 |         aimActionValue = self.output_layer2(x3)
163 |         resourceActionValue = self.output_layer3(x3)
164 | 
165 |         return taskActionValue, aimActionValue, resourceActionValue
166 | 
167 | 
168 | class CNNLayer(nn.Module):
169 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
170 |         super(CNNLayer, self).__init__()
171 | 
172 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
173 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
174 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
175 | 
176 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
177 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
178 | 
179 |         input_channel = obs_shape[0]
180 |         input_width = obs_shape[1]
181 |         input_height = obs_shape[2]
182 | 
183 |         self.cnn = nn.Sequential(
184 |             init_(nn.Conv2d(in_channels=input_channel,
185 |                             out_channels=hidden_size // 2,
186 |                             kernel_size=kernel_size,
187 |                             stride=stride)
188 |                   ),
189 |             active_func,
190 |             nn.Flatten(),
191 |             init_(nn.Linear(
192 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
193 |                 hidden_size)
194 |             ),
195 |             active_func,
196 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
197 | 
198 |     def forward(self, x):
199 |         x = x / 255.0
200 |         x = self.cnn(x)
201 | 
202 |         return x
203 | 
204 | 
205 | def init(module, weight_init, bias_init, gain=1):
206 |     weight_init(module.weight.data, gain=gain)
207 |     bias_init(module.bias.data)
208 |     return module
209 | 


--------------------------------------------------------------------------------
/experiment/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle, createTime):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.size = np.random.uniform(0.2, 1)  # Mb
18 |         self.cycle = np.random.randint(20, 50)  # cycle/bit
19 | 
20 |         self.max_time = 50  # ms  最大容忍时间
21 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
22 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
23 | 
24 |         self.rate = 0  # 当前速率
25 | 
26 |         self.compute_resource = 0
27 |         self.hold_on_time = 0
28 | 
29 |         self.create_time = createTime  # 任务产生时间
30 |         self.pick_time = 0  # 被选择的时间（出队列时间）
31 | 
32 |         # 完成该任务所消耗的cup资源
33 |         self.energy = 0
34 |         self.trans_time = 0  # 传输所需要的时间（实际）
35 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
36 | 


--------------------------------------------------------------------------------
/experiment/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import random
  3 | 
  4 | import numpy as np
  5 | 
  6 | from memory import ExperienceBuffer
  7 | from task import Task
  8 | 
  9 | Dv = 50  # 车的最大通信范围
 10 | Fv = 4000  # 车最大计算能力  MHZ
 11 | alpha = 0.25
 12 | MAX_TASK = 10  # 任务队列最大长度
 13 | 
 14 | CAPACITY = 20000  # 缓冲池大小
 15 | TASK_DISTRIBUTE = 4  # 可分的任务段数
 16 | TASK_SOLT = 20  # 任务产生时隙
 17 | 
 18 | np.random.seed(0)
 19 | 
 20 | 
 21 | class Vehicle:
 22 |     # 位置：x，y 速度、方向：-1左，1右
 23 |     def __init__(self, id, loc_x, loc_y, direction, velocity=20):
 24 |         # 车的位置信息
 25 |         self.loc_x = loc_x
 26 |         self.loc_y = loc_y
 27 |         self.loc = [loc_x, loc_y]
 28 |         self.velocity = velocity  # m/s
 29 |         self.direction = direction
 30 |         self.id = id
 31 |         # 功率和信道增益
 32 |         self.alpha = alpha
 33 |         # 通信范围
 34 |         self.range = Dv
 35 |         # 邻居表
 36 |         self.neighbor = []
 37 |         # 最近的mec
 38 |         self.mec_lest = None
 39 |         # 当前时间
 40 |         self.cur_frame = 0
 41 |         # 接受的任务的列表
 42 |         self.accept_task = []
 43 |         # 接受任务的数量
 44 |         self.sum_needDeal_task = 0
 45 |         # 此时刻有多少动作选则我
 46 |         self.len_action = 0
 47 |         # 当前可用资源
 48 |         self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2)  # MHz
 49 |         # 表示当前是否有任务正在传输（0：没有，1：有）
 50 |         self.trans_task = 0
 51 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 52 |         self.cur_task = None
 53 |         # 任务队列
 54 |         self.total_task = []
 55 |         # 任务队列的长度
 56 |         self.len_task = len(self.total_task)
 57 |         # 当前状态信息
 58 |         self.otherState = []
 59 |         # 当前任务队列状态
 60 |         self.taskState = []
 61 |         # 去除邻居的状态信息用于邻居车观察和全局critic的处理
 62 |         self.excludeNeighbor_state = []
 63 |         # 缓冲池
 64 |         self.buffer = ExperienceBuffer(capacity=CAPACITY)
 65 |         # 总奖励
 66 |         self.reward = []
 67 |         # 任务溢出的数量
 68 |         self.overflow = 0
 69 |         # 需等待时长
 70 |         self.hold_on = 0
 71 |         # 上一个任务产生的时间
 72 |         self.lastCreatWorkTime = 0
 73 | 
 74 |         self.create_work()
 75 | 
 76 |     # 获得位置
 77 |     @property
 78 |     def get_location(self):
 79 |         return self.loc
 80 | 
 81 |     # 设置位置
 82 |     def set_location(self, loc_x, loc_y):
 83 |         self.loc_x = loc_x
 84 |         self.loc_y = loc_y
 85 |         self.loc = [self.loc_x, self.loc_y]
 86 | 
 87 |     # 获得x
 88 |     @property
 89 |     def get_x(self):
 90 |         return self.loc_x
 91 | 
 92 |     # 获得y
 93 |     @property
 94 |     def get_y(self):
 95 |         return self.loc_y
 96 | 
 97 |     # 产生任务 传入当前时间
 98 |     def create_work(self):
 99 |         # 每隔一段时间进行一次任务产生
100 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
101 |             # 每次有0.6的概率产生任务
102 |             if random.random() < 0.6:
103 |                 if self.len_task < MAX_TASK:  # 队列不满
104 |                     task = Task(self, self.cur_frame)
105 |                     self.lastCreatWorkTime = self.cur_frame
106 |                     self.total_task.append(task)
107 |                     self.len_task += 1
108 |                     print("第{}辆车产生了任务".format(self.id))
109 |                     self.overflow = 0
110 |                 else:
111 |                     print("第{}辆车任务队列已满".format(self.id))
112 |                     self.overflow += 1
113 | 
114 |     """
115 |     获得状态
116 |     """
117 | 
118 |     def get_state(self):
119 |         self.otherState = []
120 |         self.excludeNeighbor_state = []
121 |         self.taskState = []
122 | 
123 |         # 位置信息  4
124 |         self.otherState.extend(self.loc)
125 |         self.otherState.append(self.velocity)
126 |         self.otherState.append(self.direction)
127 |         self.excludeNeighbor_state.extend(self.loc)
128 |         self.excludeNeighbor_state.append(self.velocity)
129 |         self.excludeNeighbor_state.append(self.direction)
130 | 
131 |         # 资源信息（可用资源）
132 |         self.otherState.append(self.resources)
133 |         self.excludeNeighbor_state.append(self.resources)
134 | 
135 |         # 当前是否有任务在传输
136 |         self.excludeNeighbor_state.append(self.trans_task)
137 |         self.otherState.append(self.trans_task)
138 | 
139 |         # 正在传输的任务信息
140 |         # if self.trans_task is not None:
141 |         #     self.otherState.append(self.trans_task.need_trans_size)
142 |         #     self.excludeNeighbor_state.append(self.trans_task.need_trans_size)
143 |         # else:
144 |         #     self.otherState.append(0)
145 |         #     self.excludeNeighbor_state.append(0)
146 |         self.otherState.append(self.len_task)  # 当前队列长度
147 |         self.excludeNeighbor_state.append(self.len_task)
148 | 
149 |         # 邻居表  7*数量
150 |         for neighbor in self.neighbor:
151 |             self.otherState.extend(neighbor.position)  # 位置
152 |             self.otherState.append(neighbor.velocity)  # 速度
153 |             self.otherState.append(neighbor.direction)  # 方向
154 |             self.otherState.append(neighbor.resources)  # 可用资源
155 | 
156 |         # 最近mec的状态 6
157 |         if self.mec_lest is not None:
158 |             self.otherState.extend(self.mec_lest.get_state())
159 | 
160 |         # 任务状态信息
161 |         for i in range(MAX_TASK):
162 |             if i < self.len_task:
163 |                 task = self.total_task[i]
164 |                 self.taskState.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
165 |             else:
166 |                 self.taskState.append([0, 0, 0, 0])
167 | 
168 |         return self.excludeNeighbor_state
169 | 


--------------------------------------------------------------------------------
/experiment2/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | 环境2
3 | （两个动作：选择任务和选择对象）
4 | 使用dqn训练模型
5 | 使用经典城市道路（使用不同数量车辆和邻居）
6 | """
7 | 


--------------------------------------------------------------------------------
/experiment2/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | from collections import namedtuple
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | from pylab import mpl
 13 | import netron
 14 | 
 15 | from env import Env
 16 | from model import DQN
 17 | 
 18 | # 设置显示中文字体
 19 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
 20 | matplotlib.rcParams['axes.unicode_minus'] = False
 21 | 
 22 | Experience = namedtuple('Transition',
 23 |                         field_names=['cur_otherState', 'cur_TaskState',  # 状态
 24 |                                      'taskAction', 'aimAction',  # 动作
 25 |                                      'reward',  # 奖励
 26 |                                      'next_otherState', 'next_TaskState'])  # Define a transition tuple
 27 | GAMMA = 0.99
 28 | BATCH_SIZE = 64
 29 | REPLAY_SIZE = 10000
 30 | LEARNING_RATE = 1e-4
 31 | SYNC_TARGET_FRAMES = 1000
 32 | 
 33 | EPSILON_DECAY_LAST_FRAME = 150000
 34 | EPSILON_START = 0.6
 35 | EPSILON_FINAL = 0.01
 36 | 
 37 | RESET = 100000  # 重置游戏次数
 38 | 
 39 | MAX_TASK = 10  # 任务队列最大长度
 40 | 
 41 | momentum = 0.005
 42 | 
 43 | RESOURCE = [0.2, 0.4, 0.6, 0.8]
 44 | 
 45 | 
 46 | @torch.no_grad()
 47 | def play_step(env, epsilon, models):
 48 |     vehicles = env.vehicles
 49 |     old_otherState = []
 50 |     old_taskState = []
 51 | 
 52 |     actionTask = []
 53 |     actionAim = []
 54 |     # 贪心选择动作
 55 |     for i, model in enumerate(models):
 56 |         old_otherState.append(vehicles[i].self_state)
 57 |         old_taskState.append(vehicles[i].task_state)
 58 |         if np.random.random() < epsilon:
 59 |             # 随机动作
 60 |             actionTask.append(np.random.randint(0, 10))
 61 |             actionAim.append(np.random.randint(0, 7))  # local+mec+neighbor
 62 |         else:
 63 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 64 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 65 |             taskAction, aimAction = model(state_v, taskState_v)
 66 | 
 67 |             taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
 68 |             aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
 69 | 
 70 |             actionAim.append(np.argmax(aimAction))
 71 |             actionTask.append(np.argmax(taskAction))
 72 |     # print("action:", action)
 73 |     _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim)
 74 |     # print("reward:", reward)
 75 | 
 76 |     # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作，下一其他状态、下一任务状态】
 77 |     for i, vehicle in enumerate(vehicles):
 78 |         exp = Experience(old_otherState[i], [old_taskState[i]],
 79 |                          actionTask[i], actionAim[i],
 80 |                          reward[i],
 81 |                          otherState[i], [taskState[i]])
 82 |         vehicle.buffer.append(exp)
 83 |     return round(Reward, 2)  # 返回总的平均奖励
 84 | 
 85 | 
 86 | # 计算一个智能体的损失
 87 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"):
 88 |     cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch  #
 89 | 
 90 |     otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device)
 91 |     taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device)
 92 |     # print("states_v:", states_v)  # batch状态
 93 |     taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device)
 94 |     aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device)
 95 |     # print("actions_v", actions_v)  # batch动作
 96 |     rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device)
 97 |     # print("rewards_v", rewards_v)  # batch奖励
 98 |     next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device)
 99 |     next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device)
100 |     # print("next_states_v", next_states_v)  # batch下一个状态
101 | 
102 |     # 计算当前网络q值
103 |     taskActionValues, aimActionValues = net(otherStates_v,
104 |                                             taskStates_v)  # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
105 |     taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1)
106 |     aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
107 | 
108 |     # 计算目标网络q值
109 |     next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v,
110 |                                                           next_taskStates_v)  # .max(1)[0]  # 得到最大的q值
111 | 
112 |     next_taskActionValues = next_taskActionValues.max(1)[0].detach()
113 |     next_aimActionValues = next_aimActionValues.max(1)[0].detach()
114 | 
115 |     # 防止梯度流入用于计算下一状态q近似值得NN
116 |     # next_states_values = next_aimActionValues.detach()
117 |     # print("next_states_values", next_states_values)
118 |     expected_aim_values = next_aimActionValues * GAMMA + rewards_v
119 |     expected_task_values = next_taskActionValues * GAMMA + rewards_v
120 |     # print(" expected_state_values", expected_state_values)
121 | 
122 |     return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values)
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     env = Env()
127 |     env.reset()
128 | 
129 |     frame_idx = 0
130 |     # writer = SummaryWriter(comment="-" + env.__doc__)
131 |     agents = env.vehicles
132 |     models = []
133 |     tgt_models = []
134 |     optimizers = []
135 |     for agent in agents:
136 |         # print(agent.get_location, agent.velocity)
137 |         task_shape = np.array([agent.task_state]).shape
138 |         # print(task_shape)
139 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
140 |         models.append(model)
141 |         optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum)
142 |         optimizers.append(optimer)
143 |     for agent in agents:
144 |         # print(agent.get_location, agent.velocity)
145 |         task_shape = np.array([agent.task_state]).shape
146 |         # print(task_shape)
147 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
148 |         model.load_state_dict(models[agent.id].state_dict())
149 |         tgt_models.append(model)
150 | 
151 |     # 打印网络结构
152 |     # model = models[0]
153 |     # state_v = torch.tensor([env.vehicles[0].otherState], dtype=torch.float32)
154 |     # taskState_v = torch.tensor([[env.vehicles[0].taskState]], dtype=torch.float32)
155 |     # # 针对有网络模型，但还没有训练保存 .pth 文件的情况
156 |     # modelpath = "./netStruct/demo.onnx"  # 定义模型结构保存的路径
157 |     # torch.onnx.export(model, (state_v, taskState_v), modelpath)  # 导出并保存
158 |     # netron.start(modelpath)
159 | 
160 |     total_reward = []
161 |     recent_reward = []
162 |     loss_task_list = []
163 |     loss_aim_list = []
164 |     reward_1 = []
165 | 
166 |     epsilon = EPSILON_START
167 |     eliposde = 500000
168 |     while eliposde > 0:
169 |         # 重置游戏
170 |         if frame_idx % RESET == 0:
171 |             print("游戏重置")
172 |             env.reset()
173 |             agents = env.vehicles
174 | 
175 |         frame_idx += 1
176 |         print("the {} steps".format(frame_idx))
177 |         epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)
178 |         reward = play_step(env, epsilon, models)
179 |         total_reward.append(reward)
180 |         print("current reward:", reward)
181 |         print("current 100 times total rewards:", np.mean(total_reward[-100:]))
182 |         recent_reward.append(np.mean(total_reward[-100:]))
183 |         if np.mean(total_reward[-100:]) > 0.7:
184 |             break
185 | 
186 |         for i, agent in enumerate(agents):
187 |             # print("length of {} buffer".format(agent.id), len(agent.buffer))
188 |             if len(agent.buffer) < REPLAY_SIZE:  # 缓冲池要足够大
189 |                 continue
190 |             if frame_idx % SYNC_TARGET_FRAMES == 0:  # 更新目标网络
191 |                 tgt_models[i].load_state_dict(models[i].state_dict())
192 |             optimizers[i].zero_grad()
193 |             batch = agent.buffer.sample(BATCH_SIZE)
194 |             loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i])
195 |             # print("loss:", loss_task, " ", loss_aim)
196 |             # loss_t.backward()
197 |             torch.autograd.backward([loss_task, loss_aim])
198 |             optimizers[i].step()
199 |             if agent.id == 0:
200 |                 print("cur_loss:", loss_task.item())
201 |                 print("cur_aim_loss", loss_aim.item())
202 |                 loss_task_list.append(loss_task.item())
203 |                 loss_aim_list.append(loss_aim.item())
204 |                 reward_1.append(env.reward[0])
205 |         eliposde -= 1
206 | 
207 |     cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time()))
208 |     # 创建文件夹
209 |     os.makedirs("D:/pycharm/Project/VML/MyErion/experiment2/result/" + cur_time)
210 |     for i, vehicle in enumerate(env.vehicles):
211 |         # 保存每个网络模型
212 |         torch.save(tgt_models[i].state_dict(),
213 |                    "D:/pycharm/Project/VML/MyErion/experiment2/result/" + cur_time + "/vehicle" + str(i) + ".pkl")
214 | 
215 |     plt.plot(range(len(recent_reward)), recent_reward)
216 |     plt.title("奖励曲线")
217 |     plt.show()
218 | 
219 |     plt.plot(range(len(loss_task_list)), loss_task_list)
220 |     plt.title("任务选择损失曲线")
221 |     plt.show()
222 | 
223 |     plt.plot(range(len(loss_aim_list)), loss_aim_list)
224 |     plt.title("目标选择损失曲线")
225 |     plt.show()
226 | 
227 |     plt.plot(range(1000), reward_1[-1000:])
228 |     plt.title("车辆一奖励曲线")
229 |     plt.show()
230 | 


--------------------------------------------------------------------------------
/experiment2/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | if __name__ == '__main__':
 6 |     print()
 7 |     env = Env()
 8 |     env.reset()
 9 |     # 测试找最邻近的mec
10 |     # for vehicle in env.vehicles:
11 |     #     print("vehicle{} location:".format(vehicle.id),vehicle.get_location)
12 |     # for mec in env.MECs:
13 |     #     print("mec{} location:".format(mec.id),mec.get_location)
14 |     # for vehicle in env.vehicles:
15 |     #     print(vehicle.mec_lest.get_location, end="  ")
16 | 
17 |     # 测试网络节点数
18 |     task = np.array(env.taskState)
19 |     print(task.shape)
20 |     vehicles = env.vehicles
21 |     # print(vehicles[0].actor1)
22 |     # print(vehicles[0].target_actor1)
23 |     # print(vehicles[0].state)
24 |     # print(vehicles[0].get_state())
25 |     # print(len(vehicles[0].state))
26 |     # print(len(env.state))
27 | 
28 |     # 测试更新邻居表
29 |     # for vehicle in vehicles:
30 |     #     print(vehicle.get_location)
31 |     #
32 |     # print("-----------------------------------")
33 |     # for vehicle in vehicles:
34 |     #     for i in vehicle.neighbor:
35 |     #         print(i.id, end=" ")
36 |     #     print()
37 |     # 测试更新total——task
38 |     # list = [vehicles[0],vehicles[1],vehicles[2],vehicles[3],vehicles[4]]
39 |     # print(list)
40 |     # for i in reversed(list):
41 |     #     if i.id >=2:
42 |     #         list.remove(i)
43 |     #     else:
44 |     #         break
45 |     # print(list)
46 |     # list=[[]]*5
47 |     # print(list)
48 |     for vehicle in vehicles:
49 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
50 |         print("该车邻居:")
51 |         for i in vehicle.neighbor:
52 |             print(i.id, end="  ")
53 |         print()
54 | 
55 |     # 测试环境运行
56 |     x = [[] for i in range(20)]
57 |     y = [[] for i in range(20)]
58 |     for i in range(10000):
59 |         for j in range(20):
60 |             x[j].append(env.vehicles[j].position[0])
61 |             y[j].append(env.vehicles[j].position[1])
62 |         action1 = []
63 |         action2 = []
64 |         action3 = []
65 |         for j in range(20):
66 |             action1.append(np.random.randint(0, 10))
67 |             # action1.append(0)
68 |             action2.append(np.random.randint(0, 7))
69 |             # action2.append(0)
70 |             # action3.append(round(np.random.random(), 2))
71 |             action3.append(0.8)
72 |         other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2)
73 |         print("第{}次平均奖励{}".format(i, Reward))
74 |         # print("当前状态:", state)
75 |         # print("下一状态:", next_state)
76 |         # print("车状态:", vehicleState)
77 |         # print("任务状态", taskState)
78 |         # print("当前奖励:", reward)
79 |         # print("每个奖励,", vehicleReward)
80 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
81 |         # print("average reward:", env.Reward)
82 |     plt.figure(figsize=(100, 100))
83 |     fix, ax = plt.subplots(5, 4)
84 | 
85 |     for i in range(5):
86 |         for j in range(4):
87 |             number = i * 4 + j
88 |             ax[i, j].plot(x[number], y[number])
89 |             ax[i, j].set_title('vehicle {}'.format(number))
90 |     plt.show()
91 | 


--------------------------------------------------------------------------------
/experiment2/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 200  # MEC通信范围
 4 | RESOURCE = 20000  # 可用资源  MHz
 5 | 
 6 | 
 7 | # 边缘服务器
 8 | class MEC:
 9 |     def __init__(self, position, resources=RESOURCE):
10 |         self.loc_x = position[0]
11 |         self.loc_y = position[1]
12 |         self.loc = position
13 |         # 当前可用资源 MHz
14 |         self.resources = resources
15 |         self.state = []
16 |         # 通信范围 m
17 |         self.range = RANGE_MEC
18 |         # 当前接到需要处理的任务信息
19 |         self.accept_task = []
20 |         # 接受任务的数量
21 |         self.sum_needDeal_task = 0
22 |         # 此时刻有多少动作选则我 多少任务选择传输给我
23 |         self.len_action = 0
24 |         # 当前状态
25 |         self.get_state()
26 | 
27 |     @property
28 |     def get_x(self):
29 |         return self.loc_x
30 | 
31 |     @property
32 |     def get_y(self):
33 |         return self.loc_y
34 | 
35 |     @property
36 |     def get_location(self):
37 |         return self.loc
38 | 
39 |     """
40 |         获得状态
41 |     """
42 | 
43 |     def get_state(self):
44 |         """
45 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
46 |         """
47 |         self.state = []
48 |         self.state.extend(self.loc)
49 |         self.state.append(self.sum_needDeal_task)
50 |         self.state.append(self.len_action)
51 |         self.state.append(self.resources)
52 |         return self.state
53 | 


--------------------------------------------------------------------------------
/experiment2/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | from random import sample
 5 | import numpy as np
 6 | 
 7 | Experience = namedtuple('Transition',
 8 |                         field_names=['state', 'action', 'reward', 'next_state'])  # Define a transition tuple
 9 | 
10 | 
11 | class ReplayMemory(object):  # Define a replay memory
12 | 
13 |     # 初始化缓冲池
14 |     def __init__(self, capacity):
15 |         # 最大容量
16 |         self.capacity = capacity
17 |         # 缓冲池经验
18 |         self.memory = []
19 |         # ？
20 |         self.position = 0
21 | 
22 |     # 存入经验
23 |     def push(self, *args):
24 |         if len(self.memory) < self.capacity:
25 |             self.memory.append(None)
26 |             # 存入经验
27 |         self.memory[self.position] = Experience(*args)
28 |         # 记录最新经验所在位置
29 |         self.position = (self.position + 1) % self.capacity
30 | 
31 |     # 采样
32 |     def sample(self, batch_size):
33 |         return sample(self.memory, batch_size)
34 | 
35 |     def __len__(self):
36 |         return len(self.memory)
37 | 
38 | 
39 | class ExperienceBuffer:
40 |     def __init__(self, capacity):
41 |         self.maxLen = capacity
42 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
43 | 
44 |     def __len__(self):
45 |         return len(self.buffer)
46 | 
47 |     def append(self, experience: Experience):
48 |         self.buffer.append(experience)
49 | 
50 |     def sample(self, batch_size):
51 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
52 |         cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip(
53 |             *[self.buffer[idx] for idx in indices])
54 |         # 转换成numpy
55 |         return np.array(cur_otherState), np.array(cur_TaskState), \
56 |                np.array(taskAction), np.array(aimAction), \
57 |                np.array(rewards, dtype=np.float32), \
58 |                np.array(next_otherState), np.array(next_TaskState)
59 | 
60 |     # 清空
61 |     def clear(self):
62 |         self.buffer = collections.deque(maxlen=self.maxLen)
63 | 


--------------------------------------------------------------------------------
/experiment2/model.py:
--------------------------------------------------------------------------------
  1 | import ptan
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.distributions.categorical import Categorical
  7 | 
  8 | HID_SIZE = 64
  9 | 
 10 | 
 11 | class ModelActor(nn.Module):
 12 |     def __init__(self, obs_dim, act_dim, task_dim):
 13 |         super(ModelActor, self).__init__()
 14 |         self.act_dim = act_dim
 15 | 
 16 |         self.mu = nn.Sequential(
 17 |             nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2),
 18 |             nn.ReLU(),
 19 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 20 |             nn.ReLU(),
 21 |             nn.Linear(HID_SIZE, act_dim * 2),
 22 |         )
 23 |         self.cnn = CNNLayer(task_dim, HID_SIZE)
 24 | 
 25 |     def forward(self, x, task):
 26 |         cnn_out = self.cnn(task)
 27 |         x = torch.cat((x, cnn_out), -1)
 28 |         out = self.mu(x)
 29 |         action_out = out[:, :self.act_dim]
 30 |         task_out = out[:, self.act_dim:]
 31 |         action_pro = F.softmax(action_out, dim=-1)
 32 |         task_pro = F.softmax(task_out, dim=1)
 33 |         print(action_pro)
 34 |         print(task_pro)
 35 |         return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro)
 36 | 
 37 | 
 38 | class ModelCritic(nn.Module):
 39 |     def __init__(self, obs_size):
 40 |         super(ModelCritic, self).__init__()
 41 | 
 42 |         self.value = nn.Sequential(
 43 |             nn.Linear(obs_size, HID_SIZE),
 44 |             nn.ReLU(),
 45 |             nn.Linear(HID_SIZE, HID_SIZE),
 46 |             nn.ReLU(),
 47 |             nn.Linear(HID_SIZE, 1),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.value(x)
 52 | 
 53 | 
 54 | class ModelSACTwinQ(nn.Module):
 55 |     def __init__(self, obs_size, act_size):
 56 |         super(ModelSACTwinQ, self).__init__()
 57 | 
 58 |         self.q1 = nn.Sequential(
 59 |             nn.Linear(obs_size + act_size, HID_SIZE),
 60 |             nn.ReLU(),
 61 |             nn.Linear(HID_SIZE, HID_SIZE),
 62 |             nn.ReLU(),
 63 |             nn.Linear(HID_SIZE, 1),
 64 |         )
 65 | 
 66 |         self.q2 = nn.Sequential(
 67 |             nn.Linear(obs_size + act_size, HID_SIZE),
 68 |             nn.ReLU(),
 69 |             nn.Linear(HID_SIZE, HID_SIZE),
 70 |             nn.ReLU(),
 71 |             nn.Linear(HID_SIZE, 1),
 72 |         )
 73 | 
 74 |     def forward(self, obs, act):
 75 |         x = torch.cat([obs, act], dim=1)
 76 |         return self.q1(x), self.q2(x)
 77 | 
 78 | 
 79 | class AgentDDPG(ptan.agent.BaseAgent):
 80 |     """
 81 |     Agent implementing Orstein-Uhlenbeck exploration process
 82 |     """
 83 | 
 84 |     def __init__(self, net, device="cpu", ou_enabled=True,
 85 |                  ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2,
 86 |                  ou_epsilon=1.0):
 87 |         self.net = net
 88 |         self.device = device
 89 |         self.ou_enabled = ou_enabled
 90 |         self.ou_mu = ou_mu
 91 |         self.ou_teta = ou_teta
 92 |         self.ou_sigma = ou_sigma
 93 |         self.ou_epsilon = ou_epsilon
 94 | 
 95 |     def initial_state(self):
 96 |         return None
 97 | 
 98 |     def __call__(self, states, agent_states):
 99 |         states_v = ptan.agent.float32_preprocessor(states)
100 |         states_v = states_v.to(self.device)
101 |         mu_v = self.net(states_v)
102 |         actions = mu_v.data.cpu().numpy()
103 | 
104 |         if self.ou_enabled and self.ou_epsilon > 0:
105 |             new_a_states = []
106 |             for a_state, action in zip(agent_states, actions):
107 |                 if a_state is None:
108 |                     a_state = np.zeros(
109 |                         shape=action.shape, dtype=np.float32)
110 |                 a_state += self.ou_teta * (self.ou_mu - a_state)
111 |                 a_state += self.ou_sigma * np.random.normal(
112 |                     size=action.shape)
113 | 
114 |                 action += self.ou_epsilon * a_state
115 |                 new_a_states.append(a_state)
116 |         else:
117 |             new_a_states = agent_states
118 | 
119 |         actions = np.clip(actions, -1, 1)
120 |         return actions, new_a_states
121 | 
122 | 
123 | class DQN(nn.Module):
124 |     def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim):
125 |         super(DQN, self).__init__()
126 |         self.input_layer = nn.Linear(obs_dim + 32, 128)
127 |         self.hidden1 = nn.Linear(128, 64)
128 |         self.hidden2 = nn.Linear(64, 64)
129 |         self.hidden3 = nn.Linear(64, 128)
130 |         self.cnn = CNNLayer(task_dim, 32)
131 |         self.output_layer1 = self.common(64, taskAction_dim)
132 |         self.output_layer2 = self.common(64, aimAction_dim)
133 | 
134 |     def common(self, input_dim, action_dim):
135 |         return nn.Sequential(
136 |             nn.Linear(input_dim, 128),
137 |             nn.ReLU(),
138 |             self.hidden1,
139 |             nn.ReLU(),
140 |             self.hidden2,
141 |             nn.ReLU(),
142 |             nn.Linear(64, action_dim)
143 |         )
144 | 
145 |     def forward(self, x, task):
146 |         """
147 | 
148 |         :param x: batch_size*state_n
149 |         :return: batch_size*actions_n  输出每个动作对应的q值
150 |         """
151 |         # 任务卷积层
152 |         cnn_out = self.cnn(task)
153 |         x = torch.cat((x, cnn_out), -1)
154 | 
155 |         # 公共层
156 |         x1 = F.relu(self.input_layer(x))
157 |         x2 = F.relu(self.hidden1(x1))
158 |         x3 = F.relu(self.hidden2(x2))
159 | 
160 |         taskActionValue = self.output_layer1(x3)
161 |         aimActionValue = self.output_layer2(x3)
162 | 
163 |         return taskActionValue, aimActionValue
164 | 
165 | 
166 | class CNNLayer(nn.Module):
167 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
168 |         super(CNNLayer, self).__init__()
169 | 
170 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
171 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
172 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
173 | 
174 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
175 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
176 | 
177 |         input_channel = obs_shape[0]
178 |         input_width = obs_shape[1]
179 |         input_height = obs_shape[2]
180 | 
181 |         self.cnn = nn.Sequential(
182 |             init_(nn.Conv2d(in_channels=input_channel,
183 |                             out_channels=hidden_size // 2,
184 |                             kernel_size=kernel_size,
185 |                             stride=stride)
186 |                   ),
187 |             active_func,
188 |             nn.Flatten(),
189 |             init_(nn.Linear(
190 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
191 |                 hidden_size)
192 |             ),
193 |             active_func,
194 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
195 | 
196 |     def forward(self, x):
197 |         x = x / 255.0
198 |         x = self.cnn(x)
199 | 
200 |         return x
201 | 
202 | 
203 | def init(module, weight_init, bias_init, gain=1):
204 |     weight_init(module.weight.data, gain=gain)
205 |     bias_init(module.bias.data)
206 |     return module
207 | 


--------------------------------------------------------------------------------
/experiment2/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle, createTime):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.size = np.random.uniform(0.2, 1)  # Mb
18 |         self.cycle = np.random.randint(20, 50)  # cycle/bit
19 | 
20 |         self.max_time = 35  # ms  最大容忍时间
21 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
22 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
23 | 
24 |         self.rate = 0  # 当前速率
25 | 
26 |         self.compute_resource = 0
27 | 
28 |         self.create_time = createTime  # 任务产生时间
29 |         self.pick_time = 0  # 被选择的时间（出队列时间）
30 | 
31 |         # 完成该任务所消耗的cup资源
32 |         self.energy = 0
33 |         self.trans_time = 0  # 传输所需要的时间（实际）
34 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
35 | 


--------------------------------------------------------------------------------
/experiment2/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | import model as model
 5 | from env import Env
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | if __name__ == '__main__':
 9 |     env = Env()
10 |     env.reset()
11 | 
12 |     N = env.num_Vehicles
13 |     vehicles = env.vehicles
14 |     models = []
15 | 
16 |     task_shape = np.array([vehicles[0].task_state]).shape
17 |     for i in range(N):
18 |         tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2)
19 |         tgt_model.load_state_dict(torch.load(
20 |             "D:\pycharm\Project\VML\MyErion\experiment2\\result\\2022-11-04-00-54\\vehicle{}.pkl".format(i)))
21 |         models.append(tgt_model)
22 | 
23 |     # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32)
24 |     # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32)
25 |     # taskAction, aimAction = models[0](state_v, taskState_v)
26 | 
27 |     vehicleReward = []
28 |     averageReward = []
29 |     for step in range(1000):
30 |         action1 = []
31 |         action2 = []
32 | 
33 |         for i in range(N):
34 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
35 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
36 |             taskAction, aimAction = models[i](state_v, taskState_v)
37 | 
38 |             # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
39 |             # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
40 |             taskAction = taskAction.detach().numpy().reshape(-1)
41 |             aimAction = aimAction.detach().numpy().reshape(-1)
42 |             action1.append(np.argmax(taskAction))
43 |             action2.append(np.argmax(aimAction))
44 | 
45 |         print(action1)
46 |         print(action2)
47 |         other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2)
48 |         vehicleReward.append(reward[1])
49 |         averageReward.append(Reward)
50 |         print("第{}次车辆平均奖励{}".format(step, Reward))
51 | 
52 |     fig, aix = plt.subplots(2, 1)
53 |     aix[0].plot(range(len(vehicleReward)), vehicleReward)
54 |     aix[1].plot(range(len(averageReward)), averageReward)
55 |     plt.show()
56 | 


--------------------------------------------------------------------------------
/experiment2/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import random
  3 | 
  4 | import numpy as np
  5 | 
  6 | from memory import ExperienceBuffer
  7 | from task import Task
  8 | 
  9 | Dv = 50  # 车的最大通信范围
 10 | Fv = 4000  # 车最大计算能力  MHZ
 11 | MAX_TASK = 10  # 任务队列最大长度
 12 | 
 13 | CAPACITY = 20000  # 缓冲池大小
 14 | TASK_SOLT = 10  # 任务产生时隙
 15 | 
 16 | np.random.seed(0)
 17 | 
 18 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 19 | 
 20 | 
 21 | class Vehicle:
 22 |     # 位置：x，y 速度、方向：-1左，1右
 23 |     def __init__(self, id, position, direction, velocity=20):
 24 |         self.id = id
 25 |         # 车的位置信息
 26 |         self.loc_x = position[0]
 27 |         self.loc_y = position[1]
 28 |         self.position = position
 29 |         self.velocity = velocity  # m/s
 30 |         self.direction = direction
 31 |         # 通信范围
 32 |         self.range = Dv
 33 |         # 邻居表
 34 |         self.neighbor = []
 35 |         # mec
 36 |         self.Mec = None
 37 |         # 当前时间
 38 |         self.cur_frame = 0
 39 |         # 接受的任务的列表
 40 |         self.accept_task = []
 41 |         # 接受任务的数量
 42 |         self.sum_needDeal_task = 0
 43 |         # 此时刻有多少动作选则我
 44 |         self.len_action = 0
 45 |         # 当前可用资源
 46 |         self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2)  # MHz
 47 |         # 表示当前是否有任务正在传输（0：没有，1：有）
 48 |         self.trans_task = 0
 49 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 50 |         self.cur_task = None
 51 |         # 任务队列
 52 |         self.total_task = []
 53 |         # 任务队列的长度
 54 |         self.len_task = len(self.total_task)
 55 | 
 56 |         # 当前状态信息
 57 |         self.otherState = []
 58 |         # 当前任务队列状态
 59 |         self.taskState = []
 60 |         # 去除邻居的状态信息用于邻居车观察和全局critic的处理
 61 |         self.excludeNeighbor_state = []
 62 |         # 缓冲池
 63 |         self.buffer = ExperienceBuffer(capacity=CAPACITY)
 64 |         # 总奖励
 65 |         self.reward = []
 66 |         # 任务溢出的数量
 67 |         self.overflow = 0
 68 |         # 上一个任务产生的时间
 69 |         self.lastCreatWorkTime = 0
 70 | 
 71 |         self.create_work()
 72 | 
 73 |     # 获得位置
 74 |     @property
 75 |     def get_location(self):
 76 |         return self.position
 77 | 
 78 |     # 设置位置
 79 |     def set_location(self, loc_x, loc_y):
 80 |         self.loc_x = loc_x
 81 |         self.loc_y = loc_y
 82 |         self.position = [self.loc_x, self.loc_y]
 83 | 
 84 |     # 获得x
 85 |     @property
 86 |     def get_x(self):
 87 |         return self.loc_x
 88 | 
 89 |     # 获得y
 90 |     @property
 91 |     def get_y(self):
 92 |         return self.loc_y
 93 | 
 94 |     # 产生任务 传入当前时间
 95 |     def create_work(self):
 96 |         if self.id % 3 == 0:
 97 |             return
 98 |             # 每隔一段时间进行一次任务产生
 99 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
100 |             # 每次有0.6的概率产生任务
101 |             if random.random() < 0.6:
102 |                 if self.len_task < MAX_TASK:  # 队列不满
103 |                     task = Task(self, self.cur_frame)
104 |                     self.lastCreatWorkTime = self.cur_frame
105 |                     self.total_task.append(task)
106 |                     self.len_task += 1
107 |                     print("第{}辆车产生了任务".format(self.id))
108 |                     self.overflow = 0
109 |                 else:
110 |                     print("第{}辆车任务队列已满".format(self.id))
111 |                     self.overflow += 1
112 | 
113 |     """
114 |     获得状态
115 |     """
116 | 
117 |     def get_state(self):
118 |         self.otherState = []
119 |         self.excludeNeighbor_state = []
120 |         self.taskState = []
121 | 
122 |         # 位置信息  4
123 |         self.otherState.extend(self.position)
124 |         self.otherState.append(self.velocity)
125 |         self.otherState.append(direction_map.get(self.direction))
126 |         self.excludeNeighbor_state.extend(self.position)
127 |         self.excludeNeighbor_state.append(self.velocity)
128 |         self.excludeNeighbor_state.append(direction_map.get(self.direction))
129 | 
130 |         # 资源信息（可用资源）
131 |         self.otherState.append(self.resources)
132 |         self.excludeNeighbor_state.append(self.resources)
133 | 
134 |         # 当前处理的任务量
135 |         self.otherState.append(self.sum_needDeal_task)
136 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
137 |         # 当前接受传输的任务量
138 |         self.otherState.append(self.len_action)
139 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
140 | 
141 |         # 当前是否有任务在传输
142 |         self.excludeNeighbor_state.append(self.trans_task)
143 |         self.otherState.append(self.trans_task)
144 | 
145 |         # 正在传输的任务信息
146 |         # if self.trans_task is not None:
147 |         #     self.otherState.append(self.trans_task.need_trans_size)
148 |         #     self.excludeNeighbor_state.append(self.trans_task.need_trans_size)
149 |         # else:
150 |         #     self.otherState.append(0)
151 |         #     self.excludeNeighbor_state.append(0)
152 | 
153 |         # 当前队列长度
154 |         self.otherState.append(self.len_task)
155 |         self.excludeNeighbor_state.append(self.len_task)
156 | 
157 |         # 邻居表  7*数量
158 |         for neighbor in self.neighbor:
159 |             self.otherState.extend(neighbor.position)  # 位置
160 |             self.otherState.append(neighbor.velocity)  # 速度
161 |             self.otherState.append(direction_map.get(neighbor.direction))  # 方向
162 |             self.otherState.append(neighbor.resources)  # 可用资源
163 |             self.otherState.append(neighbor.sum_needDeal_task)  # 处理任务长度
164 |             self.otherState.append(neighbor.len_action)  # 当前正在传输任务数量
165 | 
166 |         self.otherState.extend(self.Mec.state)
167 | 
168 |         # 任务状态信息
169 |         for i in range(MAX_TASK):
170 |             if i < self.len_task:
171 |                 task = self.total_task[i]
172 |                 self.taskState.append([task.need_trans_size, task.need_precess_cycle, task.max_time])
173 |             else:
174 |                 self.taskState.append([0, 0, 0])
175 | 
176 |         return self.excludeNeighbor_state
177 | 


--------------------------------------------------------------------------------
/experiment3/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 环境3
 3 | （两个动作：选择任务和选择对象）
 4 | 加入了mec和车在时隙内处理任务的上限（mec最多同时处理10个任务、车最多处理5个任务）
 5 | 使用经典城市道路（使用不同数量车辆和邻居）
 6 | """
 7 | import ptan
 8 | import numpy as np
 9 | import torch
10 | from torch.distributions.categorical import Categorical
11 | from env import Env
12 | 
13 | 
14 | def test_net(nets, env: Env, count=10):
15 |     rewards = 0.0
16 |     steps = 0
17 |     for _ in range(count):
18 |         env.reset()
19 |         while steps < 1000:
20 |             action = []
21 |             with torch.no_grad():
22 |                 for vehicle in env.vehicles:
23 |                     state = torch.tensor(vehicle.self_state)
24 |                     _, pro = nets[vehicle.id](state)
25 |                     act = Categorical.sample(pro)
26 |                     action.append(act.item())
27 |             _, _, reward, _ = env.step(action)
28 |             rewards += reward
29 |             steps += 1
30 |     return rewards / count, steps / count
31 | 
32 | # def calc_logprob(pro_v, actions_v):
33 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
34 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
35 | #     return p1 + p2
36 | 


--------------------------------------------------------------------------------
/experiment3/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | from collections import namedtuple
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | from pylab import mpl
 13 | import netron
 14 | 
 15 | from env import Env
 16 | from model import DQN
 17 | 
 18 | # 设置显示中文字体
 19 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
 20 | matplotlib.rcParams['axes.unicode_minus'] = False
 21 | 
 22 | Experience = namedtuple('Transition',
 23 |                         field_names=['cur_otherState', 'cur_TaskState',  # 状态
 24 |                                      'taskAction', 'aimAction',  # 动作
 25 |                                      'reward',  # 奖励
 26 |                                      'next_otherState', 'next_TaskState'])  # Define a transition tuple
 27 | GAMMA = 0.99
 28 | BATCH_SIZE = 64
 29 | REPLAY_SIZE = 10000
 30 | LEARNING_RATE = 1e-4
 31 | SYNC_TARGET_FRAMES = 1000
 32 | 
 33 | EPSILON_DECAY_LAST_FRAME = 150000
 34 | EPSILON_START = 0.6
 35 | EPSILON_FINAL = 0.01
 36 | 
 37 | RESET = 100000  # 重置游戏次数
 38 | 
 39 | MAX_TASK = 10  # 任务队列最大长度
 40 | 
 41 | momentum = 0.005
 42 | 
 43 | RESOURCE = [0.2, 0.4, 0.6, 0.8]
 44 | 
 45 | 
 46 | @torch.no_grad()
 47 | def play_step(env, epsilon, models):
 48 |     vehicles = env.vehicles
 49 |     old_otherState = []
 50 |     old_taskState = []
 51 | 
 52 |     actionTask = []
 53 |     actionAim = []
 54 |     # 贪心选择动作
 55 |     for i, model in enumerate(models):
 56 |         old_otherState.append(vehicles[i].self_state)
 57 |         old_taskState.append(vehicles[i].task_state)
 58 |         if np.random.random() < epsilon:
 59 |             # 随机动作
 60 |             actionTask.append(np.random.randint(0, 10))
 61 |             actionAim.append(np.random.randint(0, 7))  # local+mec+neighbor
 62 |         else:
 63 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 64 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 65 |             taskAction, aimAction = model(state_v, taskState_v)
 66 | 
 67 |             taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
 68 |             aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
 69 | 
 70 |             actionAim.append(np.argmax(aimAction))
 71 |             actionTask.append(np.argmax(taskAction))
 72 |     # print("action:", action)
 73 |     _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim)
 74 |     # print("reward:", reward)
 75 | 
 76 |     # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作，下一其他状态、下一任务状态】
 77 |     for i, vehicle in enumerate(vehicles):
 78 |         exp = Experience(old_otherState[i], [old_taskState[i]],
 79 |                          actionTask[i], actionAim[i],
 80 |                          reward[i],
 81 |                          otherState[i], [taskState[i]])
 82 |         vehicle.buffer.append(exp)
 83 |     return round(Reward, 2)  # 返回总的平均奖励
 84 | 
 85 | 
 86 | # 计算一个智能体的损失
 87 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"):
 88 |     cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch  #
 89 | 
 90 |     otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device)
 91 |     taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device)
 92 |     # print("states_v:", states_v)  # batch状态
 93 |     taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device)
 94 |     aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device)
 95 |     # print("actions_v", actions_v)  # batch动作
 96 |     rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device)
 97 |     # print("rewards_v", rewards_v)  # batch奖励
 98 |     next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device)
 99 |     next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device)
100 |     # print("next_states_v", next_states_v)  # batch下一个状态
101 | 
102 |     # 计算当前网络q值
103 |     taskActionValues, aimActionValues = net(otherStates_v,
104 |                                             taskStates_v)  # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
105 |     taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1)
106 |     aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
107 | 
108 |     # 计算目标网络q值
109 |     next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v,
110 |                                                           next_taskStates_v)  # .max(1)[0]  # 得到最大的q值
111 | 
112 |     next_taskActionValues = next_taskActionValues.max(1)[0].detach()
113 |     next_aimActionValues = next_aimActionValues.max(1)[0].detach()
114 | 
115 |     # 防止梯度流入用于计算下一状态q近似值得NN
116 |     # next_states_values = next_aimActionValues.detach()
117 |     # print("next_states_values", next_states_values)
118 |     expected_aim_values = next_aimActionValues * GAMMA + rewards_v
119 |     expected_task_values = next_taskActionValues * GAMMA + rewards_v
120 |     # print(" expected_state_values", expected_state_values)
121 | 
122 |     return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values)
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     env = Env()
127 |     env.reset()
128 | 
129 |     frame_idx = 0
130 |     # writer = SummaryWriter(comment="-" + env.__doc__)
131 |     agents = env.vehicles
132 |     models = []
133 |     tgt_models = []
134 |     optimizers = []
135 |     for agent in agents:
136 |         # print(agent.get_location, agent.velocity)
137 |         task_shape = np.array([agent.task_state]).shape
138 |         # print(task_shape)
139 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
140 |         models.append(model)
141 |         optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum)
142 |         optimizers.append(optimer)
143 |     for agent in agents:
144 |         # print(agent.get_location, agent.velocity)
145 |         task_shape = np.array([agent.task_state]).shape
146 |         # print(task_shape)
147 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
148 |         model.load_state_dict(models[agent.id].state_dict())
149 |         tgt_models.append(model)
150 | 
151 |     # 打印网络结构
152 |     model = models[0]
153 |     state_v = torch.tensor([env.vehicles[0].otherState], dtype=torch.float32)
154 |     taskState_v = torch.tensor([[env.vehicles[0].taskState]], dtype=torch.float32)
155 |     # 针对有网络模型，但还没有训练保存 .pth 文件的情况
156 |     modelpath = "./netStruct/demo.onnx"  # 定义模型结构保存的路径
157 |     torch.onnx.export(model, (state_v, taskState_v), modelpath)  # 导出并保存
158 |     netron.start(modelpath)
159 | 
160 |     total_reward = []
161 |     recent_reward = []
162 |     loss_task_list = []
163 |     loss_aim_list = []
164 |     reward_1 = []
165 | 
166 |     epsilon = EPSILON_START
167 |     eliposde = 500000
168 |     while eliposde > 0:
169 |         # 重置游戏
170 |         # if frame_idx % RESET == 0:
171 |         #     print("游戏重置")
172 |         #     env.reset()
173 |         #     agents = env.vehicles
174 | 
175 |         frame_idx += 1
176 |         print("the {} steps".format(frame_idx))
177 |         epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)
178 |         reward = play_step(env, epsilon, models)
179 |         total_reward.append(reward)
180 |         print("current reward:", reward)
181 |         print("current 100 times total rewards:", np.mean(total_reward[-100:]))
182 |         recent_reward.append(np.mean(total_reward[-100:]))
183 |         if np.mean(total_reward[-100:]) > 0.7:
184 |             break
185 | 
186 |         for i, agent in enumerate(agents):
187 |             # print("length of {} buffer".format(agent.id), len(agent.buffer))
188 |             if len(agent.buffer) < REPLAY_SIZE:  # 缓冲池要足够大
189 |                 continue
190 |             if frame_idx % SYNC_TARGET_FRAMES == 0:  # 更新目标网络
191 |                 tgt_models[i].load_state_dict(models[i].state_dict())
192 |             optimizers[i].zero_grad()
193 |             batch = agent.buffer.sample(BATCH_SIZE)
194 |             loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i])
195 |             # print("loss:", loss_task, " ", loss_aim)
196 |             # loss_t.backward()
197 |             torch.autograd.backward([loss_task, loss_aim])
198 |             optimizers[i].step()
199 |             if agent.id == 0:
200 |                 # print("cur_loss:", loss_task.item())
201 |                 # print("cur_aim_loss", loss_aim.item())
202 |                 loss_task_list.append(loss_task.item())
203 |                 loss_aim_list.append(loss_aim.item())
204 |                 reward_1.append(env.reward[0])
205 |         eliposde -= 1
206 | 
207 |     cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time()))
208 |     # 创建文件夹
209 |     os.makedirs("D:/pycharm/Project/VML/MyErion/experiment3/result/" + cur_time)
210 |     for i, vehicle in enumerate(env.vehicles):
211 |         # 保存每个网络模型
212 |         torch.save(tgt_models[i].state_dict(),
213 |                    "D:/pycharm/Project/VML/MyErion/experiment3/result/" + cur_time + "/vehicle" + str(i) + ".pkl")
214 | 
215 |     plt.plot(range(len(recent_reward)), recent_reward)
216 |     plt.title("奖励曲线")
217 |     plt.show()
218 | 
219 |     plt.plot(range(len(loss_task_list)), loss_task_list)
220 |     plt.title("任务选择损失曲线")
221 |     plt.show()
222 | 
223 |     plt.plot(range(len(loss_aim_list)), loss_aim_list)
224 |     plt.title("目标选择损失曲线")
225 |     plt.show()
226 | 
227 |     plt.plot(range(1000), reward_1[-1000:])
228 |     plt.title("车辆一奖励曲线")
229 |     plt.show()
230 | 


--------------------------------------------------------------------------------
/experiment3/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | if __name__ == '__main__':
 6 |     print()
 7 |     env = Env()
 8 |     env.reset()
 9 | 
10 |     # 测试网络节点数
11 |     task = np.array(env.taskState)
12 |     print(task.shape)
13 |     vehicles = env.vehicles
14 | 
15 |     for vehicle in vehicles:
16 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
17 |         print("该车邻居:")
18 |         for i in vehicle.neighbor:
19 |             print(i.id, end="  ")
20 |         print()
21 | 
22 |     # 测试环境运行
23 |     x = [[] for i in range(40)]
24 |     y = [[] for i in range(40)]
25 |     for i in range(1000):
26 |         for j in range(40):
27 |             x[j].append(env.vehicles[j].position[0])
28 |             y[j].append(env.vehicles[j].position[1])
29 |         action1 = []
30 |         action2 = []
31 |         action3 = []
32 |         for j in range(40):
33 |             # action1.append(np.random.randint(0, 10))
34 |             action1.append(0)
35 |             # action2.append(np.random.randint(0, 7))
36 |             action2.append(0)
37 |             # action3.append(round(np.random.random(), 2))
38 |             action3.append(0.8)
39 |         other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2)
40 |         print("第{}次平均奖励{}".format(i, Reward))
41 |         # print("当前状态:", state)
42 |         # print("下一状态:", next_state)
43 |         # print("车状态:", vehicleState)
44 |         # print("任务状态", taskState)
45 |         # print("当前奖励:", reward)
46 |         # print("每个奖励,", vehicleReward)
47 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
48 |         # print("average reward:", env.Reward)
49 |     plt.figure(figsize=(100, 100))
50 |     fix, ax = plt.subplots(5, 4)
51 | 
52 |     for i in range(5):
53 |         for j in range(4):
54 |             number = i * 4 + j
55 |             ax[i, j].plot(x[number], y[number])
56 |             ax[i, j].set_title('vehicle {}'.format(number))
57 |     plt.show()
58 | 


--------------------------------------------------------------------------------
/experiment3/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 200  # MEC通信范围
 4 | RESOURCE = 20000  # 可用资源  MHz
 5 | MAX_QUEUE = 10
 6 | 
 7 | 
 8 | # 边缘服务器
 9 | class MEC:
10 |     def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE):
11 |         self.loc_x = position[0]
12 |         self.loc_y = position[1]
13 |         self.loc = position
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息(最多同时处理10个任务)
20 |         self.accept_task = []
21 |         # 最多处理任务量
22 |         self.max_task = 5
23 |         # 接受任务的数量
24 |         self.sum_needDeal_task = 0
25 |         # 此时刻有多少动作选则我 多少任务选择传输给我
26 |         self.len_action = 0
27 |         # 等待计算的任务队列（理解为挂起状态）
28 |         self.task_queue = []
29 |         # 用于奖励计算的任务队列
30 |         self.task_queue_for_reward = []
31 |         # 队列最长长度
32 |         self.max_queue = max_queue
33 |         # 当前状态
34 |         self.get_state()
35 | 
36 |     @property
37 |     def get_x(self):
38 |         return self.loc_x
39 | 
40 |     @property
41 |     def get_y(self):
42 |         return self.loc_y
43 | 
44 |     @property
45 |     def get_location(self):
46 |         return self.loc
47 | 
48 |     """
49 |         获得状态
50 |     """
51 | 
52 |     def get_state(self):
53 |         """
54 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
55 |         """
56 |         self.state = []
57 |         self.state.extend(self.loc)
58 |         self.state.append(self.sum_needDeal_task)
59 |         self.state.append(self.len_action)
60 |         self.state.append(self.resources)
61 |         return self.state
62 | 


--------------------------------------------------------------------------------
/experiment3/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | from random import sample
 5 | import numpy as np
 6 | 
 7 | Experience = namedtuple('Transition',
 8 |                         field_names=['state', 'action', 'reward', 'next_state'])  # Define a transition tuple
 9 | 
10 | 
11 | class ReplayMemory(object):  # Define a replay memory
12 | 
13 |     # 初始化缓冲池
14 |     def __init__(self, capacity):
15 |         # 最大容量
16 |         self.capacity = capacity
17 |         # 缓冲池经验
18 |         self.memory = []
19 |         # ？
20 |         self.position = 0
21 | 
22 |     # 存入经验
23 |     def push(self, *args):
24 |         if len(self.memory) < self.capacity:
25 |             self.memory.append(None)
26 |             # 存入经验
27 |         self.memory[self.position] = Experience(*args)
28 |         # 记录最新经验所在位置
29 |         self.position = (self.position + 1) % self.capacity
30 | 
31 |     # 采样
32 |     def sample(self, batch_size):
33 |         return sample(self.memory, batch_size)
34 | 
35 |     def __len__(self):
36 |         return len(self.memory)
37 | 
38 | 
39 | class ExperienceBuffer:
40 |     def __init__(self, capacity):
41 |         self.maxLen = capacity
42 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
43 | 
44 |     def __len__(self):
45 |         return len(self.buffer)
46 | 
47 |     def append(self, experience: Experience):
48 |         self.buffer.append(experience)
49 | 
50 |     def sample(self, batch_size):
51 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
52 |         cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip(
53 |             *[self.buffer[idx] for idx in indices])
54 |         # 转换成numpy
55 |         return np.array(cur_otherState), np.array(cur_TaskState), \
56 |                np.array(taskAction), np.array(aimAction), \
57 |                np.array(rewards, dtype=np.float32), \
58 |                np.array(next_otherState), np.array(next_TaskState)
59 | 
60 |     # 清空
61 |     def clear(self):
62 |         self.buffer = collections.deque(maxlen=self.maxLen)
63 | 


--------------------------------------------------------------------------------
/experiment3/model.py:
--------------------------------------------------------------------------------
  1 | import ptan
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.distributions.categorical import Categorical
  7 | 
  8 | HID_SIZE = 64
  9 | 
 10 | 
 11 | class ModelActor(nn.Module):
 12 |     def __init__(self, obs_dim, act_dim, task_dim):
 13 |         super(ModelActor, self).__init__()
 14 |         self.act_dim = act_dim
 15 | 
 16 |         self.mu = nn.Sequential(
 17 |             nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2),
 18 |             nn.ReLU(),
 19 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 20 |             nn.ReLU(),
 21 |             nn.Linear(HID_SIZE, act_dim * 2),
 22 |         )
 23 |         self.cnn = CNNLayer(task_dim, HID_SIZE)
 24 | 
 25 |     def forward(self, x, task):
 26 |         cnn_out = self.cnn(task)
 27 |         x = torch.cat((x, cnn_out), -1)
 28 |         out = self.mu(x)
 29 |         action_out = out[:, :self.act_dim]
 30 |         task_out = out[:, self.act_dim:]
 31 |         action_pro = F.softmax(action_out, dim=-1)
 32 |         task_pro = F.softmax(task_out, dim=1)
 33 |         print(action_pro)
 34 |         print(task_pro)
 35 |         return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro)
 36 | 
 37 | 
 38 | class ModelCritic(nn.Module):
 39 |     def __init__(self, obs_size):
 40 |         super(ModelCritic, self).__init__()
 41 | 
 42 |         self.value = nn.Sequential(
 43 |             nn.Linear(obs_size, HID_SIZE),
 44 |             nn.ReLU(),
 45 |             nn.Linear(HID_SIZE, HID_SIZE),
 46 |             nn.ReLU(),
 47 |             nn.Linear(HID_SIZE, 1),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.value(x)
 52 | 
 53 | 
 54 | class ModelSACTwinQ(nn.Module):
 55 |     def __init__(self, obs_size, act_size):
 56 |         super(ModelSACTwinQ, self).__init__()
 57 | 
 58 |         self.q1 = nn.Sequential(
 59 |             nn.Linear(obs_size + act_size, HID_SIZE),
 60 |             nn.ReLU(),
 61 |             nn.Linear(HID_SIZE, HID_SIZE),
 62 |             nn.ReLU(),
 63 |             nn.Linear(HID_SIZE, 1),
 64 |         )
 65 | 
 66 |         self.q2 = nn.Sequential(
 67 |             nn.Linear(obs_size + act_size, HID_SIZE),
 68 |             nn.ReLU(),
 69 |             nn.Linear(HID_SIZE, HID_SIZE),
 70 |             nn.ReLU(),
 71 |             nn.Linear(HID_SIZE, 1),
 72 |         )
 73 | 
 74 |     def forward(self, obs, act):
 75 |         x = torch.cat([obs, act], dim=1)
 76 |         return self.q1(x), self.q2(x)
 77 | 
 78 | 
 79 | class AgentDDPG(ptan.agent.BaseAgent):
 80 |     """
 81 |     Agent implementing Orstein-Uhlenbeck exploration process
 82 |     """
 83 | 
 84 |     def __init__(self, net, device="cpu", ou_enabled=True,
 85 |                  ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2,
 86 |                  ou_epsilon=1.0):
 87 |         self.net = net
 88 |         self.device = device
 89 |         self.ou_enabled = ou_enabled
 90 |         self.ou_mu = ou_mu
 91 |         self.ou_teta = ou_teta
 92 |         self.ou_sigma = ou_sigma
 93 |         self.ou_epsilon = ou_epsilon
 94 | 
 95 |     def initial_state(self):
 96 |         return None
 97 | 
 98 |     def __call__(self, states, agent_states):
 99 |         states_v = ptan.agent.float32_preprocessor(states)
100 |         states_v = states_v.to(self.device)
101 |         mu_v = self.net(states_v)
102 |         actions = mu_v.data.cpu().numpy()
103 | 
104 |         if self.ou_enabled and self.ou_epsilon > 0:
105 |             new_a_states = []
106 |             for a_state, action in zip(agent_states, actions):
107 |                 if a_state is None:
108 |                     a_state = np.zeros(
109 |                         shape=action.shape, dtype=np.float32)
110 |                 a_state += self.ou_teta * (self.ou_mu - a_state)
111 |                 a_state += self.ou_sigma * np.random.normal(
112 |                     size=action.shape)
113 | 
114 |                 action += self.ou_epsilon * a_state
115 |                 new_a_states.append(a_state)
116 |         else:
117 |             new_a_states = agent_states
118 | 
119 |         actions = np.clip(actions, -1, 1)
120 |         return actions, new_a_states
121 | 
122 | 
123 | class DQN(nn.Module):
124 |     def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim):
125 |         super(DQN, self).__init__()
126 |         self.input_layer = nn.Linear(obs_dim + 32, 128)
127 |         self.hidden1 = nn.Linear(128, 64)
128 |         self.hidden2 = nn.Linear(64, 64)
129 |         self.hidden3 = nn.Linear(64, 128)
130 |         self.cnn = CNNLayer(task_dim, 32)
131 |         self.output_layer1 = self.common(64, taskAction_dim)
132 |         self.output_layer2 = self.common(64, aimAction_dim)
133 | 
134 |     def common(self, input_dim, action_dim):
135 |         return nn.Sequential(
136 |             nn.Linear(input_dim, 128),
137 |             nn.ReLU(),
138 |             self.hidden1,
139 |             nn.ReLU(),
140 |             self.hidden2,
141 |             nn.ReLU(),
142 |             nn.Linear(64, action_dim)
143 |         )
144 | 
145 |     def forward(self, x, task):
146 |         """
147 | 
148 |         :param x: batch_size*state_n
149 |         :return: batch_size*actions_n  输出每个动作对应的q值
150 |         """
151 |         # 任务卷积层
152 |         cnn_out = self.cnn(task)
153 |         x = torch.cat((x, cnn_out), -1)
154 | 
155 |         # 公共层
156 |         x1 = F.relu(self.input_layer(x))
157 |         x2 = F.relu(self.hidden1(x1))
158 |         x3 = F.relu(self.hidden2(x2))
159 | 
160 |         taskActionValue = self.output_layer1(x3)
161 |         aimActionValue = self.output_layer2(x3)
162 | 
163 |         return taskActionValue, aimActionValue
164 | 
165 | 
166 | class CNNLayer(nn.Module):
167 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
168 |         super(CNNLayer, self).__init__()
169 | 
170 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
171 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
172 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
173 | 
174 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
175 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
176 | 
177 |         input_channel = obs_shape[0]
178 |         input_width = obs_shape[1]
179 |         input_height = obs_shape[2]
180 | 
181 |         self.cnn = nn.Sequential(
182 |             init_(nn.Conv2d(in_channels=input_channel,
183 |                             out_channels=hidden_size // 2,
184 |                             kernel_size=kernel_size,
185 |                             stride=stride)
186 |                   ),
187 |             active_func,
188 |             nn.Flatten(),
189 |             init_(nn.Linear(
190 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
191 |                 hidden_size)
192 |             ),
193 |             active_func,
194 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
195 | 
196 |     def forward(self, x):
197 |         x = x / 255.0
198 |         x = self.cnn(x)
199 | 
200 |         return x
201 | 
202 | 
203 | def init(module, weight_init, bias_init, gain=1):
204 |     weight_init(module.weight.data, gain=gain)
205 |     bias_init(module.bias.data)
206 |     return module
207 | 


--------------------------------------------------------------------------------
/experiment3/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle=None, createTime=0):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.max_time = 30  # ms  最大容忍时间
18 |         self.size = np.random.uniform(0.2, 1)  # Mb
19 |         self.cycle = np.random.randint(20, 50)  # cycle/bit
20 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
21 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
22 |         self.need_time = 0  # 需要计算时间
23 |         self.hold_time = 0  # 任务在计算等待队列中得等待时间
24 | 
25 |         self.rate = 0  # 当前速率
26 | 
27 |         self.compute_resource = 0
28 | 
29 |         self.create_time = createTime  # 任务产生时间
30 |         self.pick_time = 0  # 被选择的时间（出队列时间）
31 | 
32 |         # 完成该任务所消耗的cup资源
33 |         self.energy = 0
34 |         self.trans_time = 0  # 传输所需要的时间（实际）
35 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
36 | 


--------------------------------------------------------------------------------
/experiment3/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | import matplotlib
 5 | from pylab import mpl
 6 | import model
 7 | from env import Env
 8 | import matplotlib.pyplot as plt
 9 | 
10 | # 设置显示中文字体
11 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
12 | matplotlib.rcParams['axes.unicode_minus'] = False
13 | 
14 | if __name__ == '__main__':
15 |     env = Env()
16 |     env.reset()
17 | 
18 |     N = env.num_Vehicles
19 |     vehicles = env.vehicles
20 |     models = []
21 | 
22 |     task_shape = np.array([vehicles[0].task_state]).shape
23 |     for i in range(N):
24 |         tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2)
25 |         tgt_model.load_state_dict(torch.load(
26 |             "D:\pycharm\Project\VML\MyErion\experiment3\\result\\2022-11-07-18-40\\vehicle{}.pkl".format(i)))
27 |         models.append(tgt_model)
28 | 
29 |     # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32)
30 |     # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32)
31 |     # taskAction, aimAction = models[0](state_v, taskState_v)
32 | 
33 |     vehicleReward = []
34 |     averageReward = []
35 |     for step in range(1000):
36 |         action1 = []
37 |         action2 = []
38 | 
39 |         for i in range(N):
40 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
41 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
42 |             taskAction, aimAction = models[i](state_v, taskState_v)
43 | 
44 |             # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
45 |             # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
46 |             taskAction = taskAction.detach().numpy().reshape(-1)
47 |             aimAction = aimAction.detach().numpy().reshape(-1)
48 |             action1.append(np.argmax(taskAction))
49 |             action2.append(np.argmax(aimAction))
50 | 
51 |         print(action1)
52 |         print(action2)
53 |         other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2)
54 |         vehicleReward.append(reward[1])
55 |         averageReward.append(Reward)
56 |         print("第{}次车辆平均奖励{}".format(step, Reward))
57 | 
58 |     fig, aix = plt.subplots(2, 1)
59 |     aix[0].plot(range(len(vehicleReward)), vehicleReward)
60 |     aix[1].plot(range(len(averageReward)), averageReward)
61 |     plt.show()
62 | 


--------------------------------------------------------------------------------
/experiment3/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import random
  3 | 
  4 | import numpy as np
  5 | 
  6 | from memory import ExperienceBuffer
  7 | from task import Task
  8 | 
  9 | Dv = 50  # 车的最大通信范围
 10 | Fv = 4000  # 车最大计算能力  MHZ
 11 | MAX_TASK = 10  # 任务队列最大长度
 12 | 
 13 | CAPACITY = 20000  # 缓冲池大小
 14 | TASK_SOLT = 10  # 任务产生时隙
 15 | 
 16 | # 等待队列最长长度
 17 | MAX_QUEUE = 10
 18 | 
 19 | np.random.seed(0)
 20 | 
 21 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 22 | 
 23 | 
 24 | class Vehicle:
 25 |     # 位置：x，y 速度、方向：-1左，1右
 26 |     def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE):
 27 |         self.id = id
 28 |         # 车的位置信息
 29 |         self.loc_x = position[0]
 30 |         self.loc_y = position[1]
 31 |         self.position = position
 32 |         self.velocity = velocity  # m/s
 33 |         self.direction = direction
 34 |         # 通信范围
 35 |         self.range = Dv
 36 |         # 邻居表
 37 |         self.neighbor = []
 38 |         # mec
 39 |         self.Mec = None
 40 |         # 当前时间
 41 |         self.cur_frame = 0
 42 |         # 接受的任务的列表(最多同时处理5个任务)
 43 |         self.accept_task = []
 44 |         # 最多处理任务量
 45 |         self.max_task = 3
 46 |         # 等待队列最长长度
 47 |         self.max_queue = max_queue
 48 |         # 等待计算的任务队列（理解为挂起状态）
 49 |         self.task_queue = []
 50 |         # 用于奖励计算的任务队列
 51 |         self.task_queue_for_reward = []
 52 |         # 接受任务的数量
 53 |         self.sum_needDeal_task = 0
 54 |         # 此时刻有多少动作选则我
 55 |         self.len_action = 0
 56 |         # 当前可用资源
 57 |         self.resources = round((1 - np.random.randint(1, 5) / 10) * Fv, 2)  # MHz
 58 |         # 表示当前是否有任务正在传输（0：没有，1：有）
 59 |         self.trans_task = 0
 60 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 61 |         self.cur_task = None
 62 |         # 任务队列
 63 |         self.total_task = []
 64 |         # 任务队列的长度
 65 |         self.len_task = len(self.total_task)
 66 | 
 67 |         # 当前状态信息
 68 |         self.otherState = []
 69 |         # 当前任务队列状态
 70 |         self.taskState = []
 71 |         # 去除邻居的状态信息用于邻居车观察和全局critic的处理
 72 |         self.excludeNeighbor_state = []
 73 |         # 缓冲池
 74 |         self.buffer = ExperienceBuffer(capacity=CAPACITY)
 75 |         # 总奖励
 76 |         self.reward = []
 77 |         # 任务溢出的数量
 78 |         self.overflow = 0
 79 |         # 上一个任务产生的时间
 80 |         self.lastCreatWorkTime = 0
 81 | 
 82 |         # 产生任务
 83 |         self.create_work()
 84 | 
 85 |     # 获得位置
 86 |     @property
 87 |     def get_location(self):
 88 |         return self.position
 89 | 
 90 |     # 设置位置
 91 |     def set_location(self, loc_x, loc_y):
 92 |         self.loc_x = loc_x
 93 |         self.loc_y = loc_y
 94 |         self.position = [self.loc_x, self.loc_y]
 95 | 
 96 |     # 获得x
 97 |     @property
 98 |     def get_x(self):
 99 |         return self.loc_x
100 | 
101 |     # 获得y
102 |     @property
103 |     def get_y(self):
104 |         return self.loc_y
105 | 
106 |     # 产生任务 传入当前时间
107 |     def create_work(self):
108 |         if self.id % 3 == 0:
109 |             return
110 |             # 每隔一段时间进行一次任务产生
111 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
112 |             # 每次有0.6的概率产生任务
113 |             if random.random() < 0.6:
114 |                 if self.len_task < MAX_TASK:  # 队列不满
115 |                     task = Task(self, self.cur_frame)
116 |                     self.lastCreatWorkTime = self.cur_frame
117 |                     self.total_task.append(task)
118 |                     self.len_task += 1
119 |                     # print("第{}辆车产生了任务".format(self.id))
120 |                     self.overflow = 0
121 |                 else:
122 |                     # print("第{}辆车任务队列已满".format(self.id))
123 |                     self.overflow = 1
124 | 
125 |     """
126 |     获得状态
127 |     """
128 | 
129 |     def get_state(self):
130 |         self.otherState = []
131 |         self.excludeNeighbor_state = []
132 |         self.taskState = []
133 | 
134 |         # 位置信息  4
135 |         self.otherState.extend(self.position)
136 |         self.otherState.append(self.velocity)
137 |         self.otherState.append(direction_map.get(self.direction))
138 |         self.excludeNeighbor_state.extend(self.position)
139 |         self.excludeNeighbor_state.append(self.velocity)
140 |         self.excludeNeighbor_state.append(direction_map.get(self.direction))
141 | 
142 |         # 资源信息（可用资源）
143 |         self.otherState.append(self.resources)
144 |         self.excludeNeighbor_state.append(self.resources)
145 | 
146 |         # 当前处理的任务量
147 |         self.otherState.append(self.sum_needDeal_task)
148 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
149 |         # 当前接受传输的任务量
150 |         self.otherState.append(self.len_action)
151 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
152 | 
153 |         # 当前是否有任务在传输
154 |         self.excludeNeighbor_state.append(self.trans_task)
155 |         self.otherState.append(self.trans_task)
156 | 
157 |         # 正在传输的任务信息
158 |         # if self.trans_task is not None:
159 |         #     self.otherState.append(self.trans_task.need_trans_size)
160 |         #     self.excludeNeighbor_state.append(self.trans_task.need_trans_size)
161 |         # else:
162 |         #     self.otherState.append(0)
163 |         #     self.excludeNeighbor_state.append(0)
164 | 
165 |         # 当前队列长度
166 |         self.otherState.append(self.len_task)
167 |         self.excludeNeighbor_state.append(self.len_task)
168 | 
169 |         # 邻居表  7*数量
170 |         for neighbor in self.neighbor:
171 |             self.otherState.extend(neighbor.position)  # 位置
172 |             self.otherState.append(neighbor.velocity)  # 速度
173 |             self.otherState.append(direction_map.get(neighbor.direction))  # 方向
174 |             self.otherState.append(neighbor.resources)  # 可用资源
175 |             self.otherState.append(neighbor.sum_needDeal_task)  # 处理任务长度
176 |             self.otherState.append(neighbor.len_action)  # 当前正在传输任务数量
177 | 
178 |         self.otherState.extend(self.Mec.state)
179 | 
180 |         # 任务状态信息
181 |         for i in range(MAX_TASK):
182 |             if i < self.len_task:
183 |                 task = self.total_task[i]
184 |                 self.taskState.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
185 |             else:
186 |                 self.taskState.append([0, 0, 0, 0])
187 | 
188 |         return self.excludeNeighbor_state
189 | 


--------------------------------------------------------------------------------
/experiment4/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 环境4
 3 | （两个动作：选择任务和选择对象）
 4 | 加入了mec和车在时隙内处理任务的上限（mec最多同时处理10个任务、车最多处理5个任务）
 5 | 使用经典城市道路（使用不同数量车辆和邻居）
 6 | 为mec卸载和车辆卸载提供两种传输方式（即可同时像车辆和mec传输任务）
 7 | """
 8 | import ptan
 9 | import numpy as np
10 | import torch
11 | from torch.distributions.categorical import Categorical
12 | from env import Env
13 | 
14 | 
15 | def test_net(nets, env: Env, count=10):
16 |     rewards = 0.0
17 |     steps = 0
18 |     for _ in range(count):
19 |         env.reset()
20 |         while steps < 1000:
21 |             action = []
22 |             with torch.no_grad():
23 |                 for vehicle in env.vehicles:
24 |                     state = torch.tensor(vehicle.self_state)
25 |                     _, pro = nets[vehicle.id](state)
26 |                     act = Categorical.sample(pro)
27 |                     action.append(act.item())
28 |             _, _, reward, _ = env.step(action)
29 |             rewards += reward
30 |             steps += 1
31 |     return rewards / count, steps / count
32 | 
33 | # def calc_logprob(pro_v, actions_v):
34 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
35 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
36 | #     return p1 + p2
37 | 


--------------------------------------------------------------------------------
/experiment4/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | from collections import namedtuple
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import matplotlib.font_manager as fm
  9 | from matplotlib.ticker import FuncFormatter
 10 | import numpy as np
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | from pylab import mpl
 15 | import netron
 16 | from matplotlib import rcParams
 17 | 
 18 | from env import Env
 19 | from model import DQN
 20 | 
 21 | np.random.seed(2)
 22 | 
 23 | # 设置显示中文字体
 24 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
 25 | matplotlib.rcParams['axes.unicode_minus'] = False
 26 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 27 | # 加载 Times New Roman 字体
 28 | font_path = 'C:/Windows/Fonts/times.ttf'
 29 | prop = fm.FontProperties(fname=font_path, size=10)
 30 | # 设置全局字体为Times New Roman
 31 | rcParams['font.family'] = 'Times New Roman'
 32 | 
 33 | Experience = namedtuple('Transition',
 34 |                         field_names=['cur_otherState', 'cur_TaskState',  # 状态
 35 |                                      'taskAction', 'aimAction',  # 动作
 36 |                                      'reward',  # 奖励
 37 |                                      'next_otherState', 'next_TaskState'])  # Define a transition tuple
 38 | GAMMA = 0.99
 39 | BATCH_SIZE = 64
 40 | REPLAY_SIZE = 10000
 41 | LEARNING_RATE = 1e-4
 42 | SYNC_TARGET_FRAMES = 100  # 更新目标网络频率
 43 | 
 44 | EPSILON_DECAY_LAST_FRAME = 150000
 45 | EPSILON_START = 0.6
 46 | EPSILON_FINAL = 0.01
 47 | EPSILON = 300000
 48 | 
 49 | RESET = 100000  # 重置游戏次数
 50 | 
 51 | MAX_TASK = 10  # 任务队列最大长度
 52 | 
 53 | momentum = 0.005
 54 | 
 55 | RESOURCE = [0.2, 0.4, 0.6, 0.8]
 56 | 
 57 | 
 58 | @torch.no_grad()
 59 | def play_step(env, epsilon, models):
 60 |     vehicles = env.vehicles
 61 |     old_otherState = []
 62 |     old_taskState = []
 63 | 
 64 |     actionTask = []
 65 |     actionAim = []
 66 |     # 贪心选择动作
 67 |     for i, model in enumerate(models):
 68 |         old_otherState.append(vehicles[i].self_state)
 69 |         old_taskState.append(vehicles[i].task_state)
 70 |         if np.random.random() < epsilon:
 71 |             # 随机动作
 72 |             actionTask.append(np.random.randint(0, 10))
 73 |             actionAim.append(np.random.randint(0, 7))  # local+mec+neighbor
 74 |         else:
 75 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 76 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 77 |             taskAction, aimAction = model(state_v, taskState_v)
 78 | 
 79 |             taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
 80 |             aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
 81 | 
 82 |             actionAim.append(np.argmax(aimAction))
 83 |             actionTask.append(np.argmax(taskAction))
 84 |     # print("action:", action)
 85 |     _, _, _, otherState, _, taskState, Reward, reward = env.step(actionTask, actionAim)
 86 |     # print("reward:", reward)
 87 | 
 88 |     # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作，下一其他状态、下一任务状态】
 89 |     for i, vehicle in enumerate(vehicles):
 90 |         exp = Experience(old_otherState[i], [old_taskState[i]],
 91 |                          actionTask[i], actionAim[i],
 92 |                          reward[i],
 93 |                          otherState[i], [taskState[i]])
 94 |         vehicle.buffer.append(exp)
 95 |     return round(Reward, 2)  # 返回总的平均奖励
 96 | 
 97 | 
 98 | # 计算一个智能体的损失
 99 | def calc_loss(batch, net: DQN, tgt_net: DQN, device="cpu"):
100 |     cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = batch  #
101 | 
102 |     otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device)
103 |     taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device)
104 |     # print("states_v:", states_v)  # batch状态
105 |     taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device)
106 |     aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device)
107 |     # print("actions_v", actions_v)  # batch动作
108 |     rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device)
109 |     # print("rewards_v", rewards_v)  # batch奖励
110 |     next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device)
111 |     next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device)
112 |     # print("next_states_v", next_states_v)  # batch下一个状态
113 | 
114 |     # 计算当前网络q值
115 |     taskActionValues, aimActionValues = net(otherStates_v,
116 |                                             taskStates_v)  # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
117 |     taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1)
118 |     aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
119 | 
120 |     # 计算目标网络q值
121 |     next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v,
122 |                                                           next_taskStates_v)  # .max(1)[0]  # 得到最大的q值
123 | 
124 |     next_taskActionValues = next_taskActionValues.max(1)[0].detach()
125 |     next_aimActionValues = next_aimActionValues.max(1)[0].detach()
126 | 
127 |     # 防止梯度流入用于计算下一状态q近似值得NN
128 |     # next_states_values = next_aimActionValues.detach()
129 |     # print("next_states_values", next_states_values)
130 |     expected_aim_values = next_aimActionValues * GAMMA + rewards_v
131 |     expected_task_values = next_taskActionValues * GAMMA + rewards_v
132 |     # print(" expected_state_values", expected_state_values)
133 | 
134 |     return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values)
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     env = Env()
139 |     env.reset()
140 | 
141 |     frame_idx = 0
142 |     # writer = SummaryWriter(comment="-" + env.__doc__)
143 |     agents = env.vehicles
144 |     models = []
145 |     tgt_models = []
146 |     optimizers = []
147 |     for agent in agents:
148 |         # print(agent.get_location, agent.velocity)
149 |         task_shape = np.array([agent.task_state]).shape
150 |         # print(task_shape)
151 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
152 |         models.append(model)
153 |         optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum)
154 |         optimizers.append(optimer)
155 |     for agent in agents:
156 |         # print(agent.get_location, agent.velocity)
157 |         task_shape = np.array([agent.task_state]).shape
158 |         # print(task_shape)
159 |         model = DQN(len(agent.self_state), task_shape, MAX_TASK, len(agent.neighbor) + 2)
160 |         model.load_state_dict(models[agent.id].state_dict())
161 |         tgt_models.append(model)
162 | 
163 |     # 打印网络结构
164 |     # model = models[0]
165 |     # state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32)
166 |     # taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32)
167 |     # # 针对有网络模型，但还没有训练保存 .pth 文件的情况
168 |     # modelpath = "./netStruct/demo.onnx"  # 定义模型结构保存的路径
169 |     # torch.onnx.export(model, (state_v, taskState_v), modelpath)  # 导出并保存
170 |     # netron.start(modelpath)
171 | 
172 |     total_reward = []
173 |     recent_reward = []
174 |     loss_task_list = []
175 |     loss_aim_list = []
176 |     reward_1 = []
177 | 
178 |     epsilon = EPSILON_START
179 |     eliposde = EPSILON
180 |     while eliposde > 0:
181 |         # 重置游戏
182 |         # if frame_idx % RESET == 0:
183 |         #     print("游戏重置")
184 |         #     env.reset()
185 |         #     agents = env.vehicles
186 | 
187 |         frame_idx += 1
188 |         print("the {} steps".format(frame_idx))
189 |         epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)
190 |         reward = play_step(env, epsilon, models)
191 |         total_reward.append(reward)
192 |         print("current reward:", reward)
193 |         print("current 100 times total rewards:", np.mean(total_reward[-100:]))
194 |         recent_reward.append(np.mean(total_reward[-100:]))
195 |         if np.mean(total_reward[-100:]) > 0.7:
196 |             break
197 | 
198 |         for i, agent in enumerate(agents):
199 |             # print("length of {} buffer".format(agent.id), len(agent.buffer))
200 |             if len(agent.buffer) < REPLAY_SIZE:  # 缓冲池要足够大
201 |                 continue
202 |             if frame_idx % SYNC_TARGET_FRAMES == 0:  # 更新目标网络
203 |                 tgt_models[i].load_state_dict(models[i].state_dict())
204 |             optimizers[i].zero_grad()
205 |             batch = agent.buffer.sample(BATCH_SIZE)
206 |             loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i])
207 |             # print("loss:", loss_task, " ", loss_aim)
208 |             # loss_t.backward()
209 |             torch.autograd.backward([loss_task, loss_aim])
210 |             optimizers[i].step()
211 |             if agent.id == 0:
212 |                 # print("cur_loss:", loss_task.item())
213 |                 # print("cur_aim_loss", loss_aim.item())
214 |                 loss_task_list.append(loss_task.item())
215 |                 loss_aim_list.append(loss_aim.item())
216 |                 reward_1.append(env.reward[1])
217 |         eliposde -= 1
218 | 
219 |     # cur_time = time.strftime("%Y-%m-%d", time.localtime(time.time()))
220 |     # # 创建文件夹
221 |     # os.makedirs("D:/pycharm/Project/VML/MyErion/experiment4/result/" + cur_time)
222 |     # for i, vehicle in enumerate(env.vehicles):
223 |     #     # 保存每个网络模型
224 |     #     torch.save(tgt_models[i].state_dict(),
225 |     #                "D:/pycharm/Project/VML/MyErion/experiment4/result/" + cur_time + "/vehicle" + str(i) + ".pkl")
226 | 
227 |     plt.plot(range(len(recent_reward)), recent_reward)
228 |     # plt.title("奖励曲线")
229 |     plt.ylabel("Average Reward", fontproperties=prop)
230 |     plt.xlabel("Episode", fontproperties=prop)
231 |     # 设置x轴和y轴的字体大小
232 |     plt.tick_params(axis='both', which='major', labelsize=10)
233 |     plt.tick_params(axis='both', which='minor', labelsize=10)
234 |     # # 显示指数
235 |     # # 创建数据
236 |     # x = range(0, len(recent_reward) + 1, 50000)
237 |     # # 设置x轴坐标为指数形式
238 |     # plt.xscale('log')
239 |     # plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: '{:.0e}'.format(x) if x != 0 else '0'))
240 |     #
241 |     # # 设置x轴坐标显示范围
242 |     # plt.xlim([1, 3e5])
243 |     #
244 |     # # 设置x轴坐标显示标签
245 |     # plt.xticks([1] + list(range(int(5e4), int(3e5) + 1, int(5e4))))
246 | 
247 |     plt.show()
248 | 
249 |     # plt.plot(range(len(loss_task_list)), loss_task_list)
250 |     # plt.title("任务选择损失曲线")
251 |     # plt.show()
252 |     #
253 |     # plt.plot(range(len(loss_aim_list)), loss_aim_list)
254 |     # plt.title("目标选择损失曲线")
255 |     # plt.show()
256 | 
257 |     # plt.plot(range(100000), reward_1[-100000:])
258 |     # plt.title("车辆一奖励曲线")
259 |     # plt.show()
260 | 


--------------------------------------------------------------------------------
/experiment4/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | if __name__ == '__main__':
 6 |     print()
 7 |     env = Env()
 8 |     env.reset()
 9 | 
10 |     # 测试网络节点数
11 |     task = np.array(env.taskState)
12 |     print(task.shape)
13 |     vehicles = env.vehicles
14 | 
15 |     for vehicle in vehicles:
16 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
17 |         print("该车邻居:")
18 |         for i in vehicle.neighbor:
19 |             print(i.id, end="  ")
20 |         print()
21 | 
22 |     # 测试环境运行
23 |     reward = []
24 |     x = [[] for i in range(20)]
25 |     y = [[] for i in range(20)]
26 |     for i in range(1000):
27 |         # for j in range(20):
28 |         #     x[j].append(env.vehicles[j].position[0])
29 |         #     y[j].append(env.vehicles[j].position[1])
30 |         action1 = []
31 |         action2 = []
32 |         for j in range(40):
33 |             # action1.append(np.random.randint(0, 10))
34 |             action1.append(0)
35 |             # action2.append(np.random.randint(0, 7))
36 |             action2.append(1)
37 |         other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2)
38 |         reward.append(Reward)
39 |         print("第{}次平均奖励{}".format(i, Reward))
40 |         # print("当前状态:", state)
41 |         # print("下一状态:", next_state)
42 |         # print("车状态:", vehicleState)
43 |         # print("任务状态", taskState)
44 |         # print("当前奖励:", reward)
45 |         # print("每个奖励,", vehicleReward)
46 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
47 |         # print("average reward:", env.Reward)
48 |     # plt.figure(figsize=(100, 100))
49 |     # fix, ax = plt.subplots(5, 4)
50 |     #
51 |     # for i in range(5):
52 |     #     for j in range(4):
53 |     #         number = i * 4 + j
54 |     #         ax[i, j].plot(x[number], y[number])
55 |     #         ax[i, j].set_title('vehicle {}'.format(number))
56 |     plt.plot(range(len(reward)), reward)
57 |     print(reward)
58 |     plt.show()
59 | 


--------------------------------------------------------------------------------
/experiment4/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | import os
 4 | import time
 5 | from collections import namedtuple
 6 | 
 7 | import ptan
 8 | import torch
 9 | import torch.nn.functional as F
10 | import model
11 | from env import Env
12 | from mec import MEC
13 | from vehicle import Vehicle
14 | from memory import ReplayMemory
15 | from tensorboardX import SummaryWriter
16 | from torch.distributions.categorical import Categorical
17 | 
18 | ENV_ID = "computing offloading"
19 | GAMMA = 0.99
20 | GAE_LAMBDA = 0.95
21 | 
22 | TRAJECTORY_SIZE = 65
23 | LEARNING_RATE_ACTOR = 1e-5
24 | LEARNING_RATE_CRITIC = 1e-4
25 | 
26 | PPO_EPS = 0.2
27 | PPO_EPOCHES = 10
28 | PPO_BATCH_SIZE = 64
29 | 
30 | TEST_ITERS = 10000
31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))  # Define a transition tuple
32 | 
33 | 
34 | # 将list装换成tensor存入缓冲池中
35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory):
36 |     reward = torch.tensor([reward])
37 |     action = torch.tensor([action])
38 |     state = torch.tensor(state)
39 |     state = state.unsqueeze(0)
40 |     next_state = torch.tensor(next_state)
41 |     next_state = next_state.unsqueeze(0)
42 |     memory.push(state, action, reward, next_state)
43 | 
44 | 
45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"):
46 |     """
47 |     By trajectory calculate advantage and 1-step ref value
48 |     :param trajectory: trajectory list
49 |     :param net_crt: critic network
50 |     :param states_v: states tensor
51 |     :return: tuple with advantage numpy array and reference values
52 |     """
53 |     values_v = net_crt(torch.tensor(states_v))
54 |     values = values_v.squeeze().data.cpu().numpy()
55 |     # generalized advantage estimator: smoothed version of the advantage
56 |     last_gae = 0.0
57 |     result_adv = []
58 |     result_ref = []
59 |     for val, next_val, exp in zip(reversed(values[:-1]),
60 |                                   reversed(values[1:]),
61 |                                   reversed(trajectory[:-1])):
62 |         delta = exp.vehicleReward + GAMMA * next_val - val
63 |         last_gae = delta + GAMMA * GAE_LAMBDA * last_gae
64 |         result_adv.append(last_gae)
65 |         result_ref.append(last_gae + val)
66 | 
67 |     adv_v = torch.FloatTensor(list(reversed(result_adv)))
68 |     ref_v = torch.FloatTensor(list(reversed(result_ref)))
69 |     return adv_v.to(device), ref_v.to(device)
70 | 
71 | 
72 | # 将状态信息放入各自的缓冲池中
73 | def push(env, state, actions, next_state):
74 |     for i, vehicle in enumerate(env.vehicles):
75 |         if vehicle.task is not None:  # 没有任务不算经验
76 |             continue
77 |         exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state)
78 |         vehicle.buffer.append(exp)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     task = MEC([10, 20])
83 |     vehicle = Vehicle(1, [10, 20], 'd')
84 |     print(type(task) == MEC)
85 |     print(type(task) == Vehicle)
86 |     print(type(vehicle) == Vehicle)
87 |     print(type(vehicle))
88 |     print(vehicle)
89 | 


--------------------------------------------------------------------------------
/experiment4/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 650  # MEC通信范围 /m
 4 | RESOURCE = 10000  # 可用资源  MHz
 5 | MAX_QUEUE = 10
 6 | 
 7 | 
 8 | # 边缘服务器
 9 | class MEC:
10 |     def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE):
11 |         self.loc_x = position[0]
12 |         self.loc_y = position[1]
13 |         self.loc = position
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息(最多同时处理10个任务)
20 |         self.accept_task = []
21 |         # 最多处理任务量
22 |         self.max_task = 10
23 |         # 接受任务的数量
24 |         self.sum_needDeal_task = 0
25 |         # 此时刻有多少动作选则我 多少任务选择传输给我
26 |         self.len_action = 0
27 |         # 等待计算的任务队列（理解为挂起状态）
28 |         self.task_queue = []
29 |         # 用于奖励计算的任务队列
30 |         self.task_queue_for_reward = []
31 |         # 队列最长长度
32 |         self.max_queue = max_queue
33 |         # 当前状态
34 |         self.get_state()
35 | 
36 |     @property
37 |     def get_x(self):
38 |         return self.loc_x
39 | 
40 |     @property
41 |     def get_y(self):
42 |         return self.loc_y
43 | 
44 |     @property
45 |     def get_location(self):
46 |         return self.loc
47 | 
48 |     """
49 |         获得状态
50 |     """
51 | 
52 |     def get_state(self):
53 |         """
54 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
55 |         """
56 |         self.state = []
57 |         self.state.extend(self.loc)
58 |         self.state.append(self.sum_needDeal_task)
59 |         self.state.append(self.len_action)
60 |         self.state.append(self.resources)
61 |         return self.state
62 | 


--------------------------------------------------------------------------------
/experiment4/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | from random import sample
 5 | import numpy as np
 6 | 
 7 | Experience = namedtuple('Transition',
 8 |                         field_names=['state', 'action', 'reward', 'next_state'])  # Define a transition tuple
 9 | 
10 | 
11 | class ReplayMemory(object):  # Define a replay memory
12 | 
13 |     # 初始化缓冲池
14 |     def __init__(self, capacity):
15 |         # 最大容量
16 |         self.capacity = capacity
17 |         # 缓冲池经验
18 |         self.memory = []
19 |         # ？
20 |         self.position = 0
21 | 
22 |     # 存入经验
23 |     def push(self, *args):
24 |         if len(self.memory) < self.capacity:
25 |             self.memory.append(None)
26 |             # 存入经验
27 |         self.memory[self.position] = Experience(*args)
28 |         # 记录最新经验所在位置
29 |         self.position = (self.position + 1) % self.capacity
30 | 
31 |     # 采样
32 |     def sample(self, batch_size):
33 |         return sample(self.memory, batch_size)
34 | 
35 |     def __len__(self):
36 |         return len(self.memory)
37 | 
38 | 
39 | class ExperienceBuffer:
40 |     def __init__(self, capacity):
41 |         self.maxLen = capacity
42 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
43 | 
44 |     def __len__(self):
45 |         return len(self.buffer)
46 | 
47 |     def append(self, experience: Experience):
48 |         self.buffer.append(experience)
49 | 
50 |     def sample(self, batch_size):
51 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
52 |         cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip(
53 |             *[self.buffer[idx] for idx in indices])
54 |         # 转换成numpy
55 |         return np.array(cur_otherState), np.array(cur_TaskState), \
56 |                np.array(taskAction), np.array(aimAction), \
57 |                np.array(rewards, dtype=np.float32), \
58 |                np.array(next_otherState), np.array(next_TaskState)
59 | 
60 |     # 清空
61 |     def clear(self):
62 |         self.buffer = collections.deque(maxlen=self.maxLen)
63 | 


--------------------------------------------------------------------------------
/experiment4/model.py:
--------------------------------------------------------------------------------
  1 | import ptan
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.distributions.categorical import Categorical
  7 | 
  8 | HID_SIZE = 64
  9 | 
 10 | 
 11 | class ModelActor(nn.Module):
 12 |     def __init__(self, obs_dim, act_dim, task_dim):
 13 |         super(ModelActor, self).__init__()
 14 |         self.act_dim = act_dim
 15 | 
 16 |         self.mu = nn.Sequential(
 17 |             nn.Linear(obs_dim + HID_SIZE, HID_SIZE * 2),
 18 |             nn.ReLU(),
 19 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 20 |             nn.ReLU(),
 21 |             nn.Linear(HID_SIZE, act_dim * 2),
 22 |         )
 23 |         self.cnn = CNNLayer(task_dim, HID_SIZE)
 24 | 
 25 |     def forward(self, x, task):
 26 |         cnn_out = self.cnn(task)
 27 |         x = torch.cat((x, cnn_out), -1)
 28 |         out = self.mu(x)
 29 |         action_out = out[:, :self.act_dim]
 30 |         task_out = out[:, self.act_dim:]
 31 |         action_pro = F.softmax(action_out, dim=-1)
 32 |         task_pro = F.softmax(task_out, dim=1)
 33 |         print(action_pro)
 34 |         print(task_pro)
 35 |         return action_pro, Categorical(action_pro), task_pro, Categorical(task_pro)
 36 | 
 37 | 
 38 | class ModelCritic(nn.Module):
 39 |     def __init__(self, obs_size):
 40 |         super(ModelCritic, self).__init__()
 41 | 
 42 |         self.value = nn.Sequential(
 43 |             nn.Linear(obs_size, HID_SIZE),
 44 |             nn.ReLU(),
 45 |             nn.Linear(HID_SIZE, HID_SIZE),
 46 |             nn.ReLU(),
 47 |             nn.Linear(HID_SIZE, 1),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.value(x)
 52 | 
 53 | 
 54 | class ModelSACTwinQ(nn.Module):
 55 |     def __init__(self, obs_size, act_size):
 56 |         super(ModelSACTwinQ, self).__init__()
 57 | 
 58 |         self.q1 = nn.Sequential(
 59 |             nn.Linear(obs_size + act_size, HID_SIZE),
 60 |             nn.ReLU(),
 61 |             nn.Linear(HID_SIZE, HID_SIZE),
 62 |             nn.ReLU(),
 63 |             nn.Linear(HID_SIZE, 1),
 64 |         )
 65 | 
 66 |         self.q2 = nn.Sequential(
 67 |             nn.Linear(obs_size + act_size, HID_SIZE),
 68 |             nn.ReLU(),
 69 |             nn.Linear(HID_SIZE, HID_SIZE),
 70 |             nn.ReLU(),
 71 |             nn.Linear(HID_SIZE, 1),
 72 |         )
 73 | 
 74 |     def forward(self, obs, act):
 75 |         x = torch.cat([obs, act], dim=1)
 76 |         return self.q1(x), self.q2(x)
 77 | 
 78 | 
 79 | class AgentDDPG(ptan.agent.BaseAgent):
 80 |     """
 81 |     Agent implementing Orstein-Uhlenbeck exploration process
 82 |     """
 83 | 
 84 |     def __init__(self, net, device="cpu", ou_enabled=True,
 85 |                  ou_mu=0.0, ou_teta=0.15, ou_sigma=0.2,
 86 |                  ou_epsilon=1.0):
 87 |         self.net = net
 88 |         self.device = device
 89 |         self.ou_enabled = ou_enabled
 90 |         self.ou_mu = ou_mu
 91 |         self.ou_teta = ou_teta
 92 |         self.ou_sigma = ou_sigma
 93 |         self.ou_epsilon = ou_epsilon
 94 | 
 95 |     def initial_state(self):
 96 |         return None
 97 | 
 98 |     def __call__(self, states, agent_states):
 99 |         states_v = ptan.agent.float32_preprocessor(states)
100 |         states_v = states_v.to(self.device)
101 |         mu_v = self.net(states_v)
102 |         actions = mu_v.data.cpu().numpy()
103 | 
104 |         if self.ou_enabled and self.ou_epsilon > 0:
105 |             new_a_states = []
106 |             for a_state, action in zip(agent_states, actions):
107 |                 if a_state is None:
108 |                     a_state = np.zeros(
109 |                         shape=action.shape, dtype=np.float32)
110 |                 a_state += self.ou_teta * (self.ou_mu - a_state)
111 |                 a_state += self.ou_sigma * np.random.normal(
112 |                     size=action.shape)
113 | 
114 |                 action += self.ou_epsilon * a_state
115 |                 new_a_states.append(a_state)
116 |         else:
117 |             new_a_states = agent_states
118 | 
119 |         actions = np.clip(actions, -1, 1)
120 |         return actions, new_a_states
121 | 
122 | 
123 | class DQN(nn.Module):
124 |     def __init__(self, obs_dim, task_dim, taskAction_dim, aimAction_dim):
125 |         super(DQN, self).__init__()
126 |         self.input_layer = nn.Linear(obs_dim + 32, 128)
127 |         self.hidden1 = nn.Linear(128, 64)
128 |         self.hidden2 = nn.Linear(64, 64)
129 |         self.hidden3 = nn.Linear(64, 128)
130 |         self.cnn = CNNLayer(task_dim, 32)
131 |         self.output_layer1 = self.common(64, taskAction_dim)
132 |         self.output_layer2 = self.common(64, aimAction_dim)
133 | 
134 |     def common(self, input_dim, action_dim):
135 |         return nn.Sequential(
136 |             nn.Linear(input_dim, 128),
137 |             nn.ReLU(),
138 |             self.hidden1,
139 |             nn.ReLU(),
140 |             self.hidden2,
141 |             nn.ReLU(),
142 |             nn.Linear(64, action_dim)
143 |         )
144 | 
145 |     def forward(self, x, task):
146 |         """
147 | 
148 |         :param x: batch_size*state_n
149 |         :return: batch_size*actions_n  输出每个动作对应的q值
150 |         """
151 |         # 任务卷积层
152 |         cnn_out = self.cnn(task)
153 |         x = torch.cat((x, cnn_out), -1)
154 | 
155 |         # 公共层
156 |         x1 = F.relu(self.input_layer(x))
157 |         x2 = F.relu(self.hidden1(x1))
158 |         x3 = F.relu(self.hidden2(x2))
159 | 
160 |         taskActionValue = self.output_layer1(x3)
161 |         aimActionValue = self.output_layer2(x3)
162 | 
163 |         return taskActionValue, aimActionValue
164 | 
165 | 
166 | class CNNLayer(nn.Module):
167 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
168 |         super(CNNLayer, self).__init__()
169 | 
170 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
171 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
172 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
173 | 
174 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
175 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
176 | 
177 |         input_channel = obs_shape[0]
178 |         input_width = obs_shape[1]
179 |         input_height = obs_shape[2]
180 | 
181 |         self.cnn = nn.Sequential(
182 |             init_(nn.Conv2d(in_channels=input_channel,
183 |                             out_channels=hidden_size // 2,
184 |                             kernel_size=kernel_size,
185 |                             stride=stride)
186 |                   ),
187 |             active_func,
188 |             nn.Flatten(),
189 |             init_(nn.Linear(
190 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
191 |                 hidden_size)
192 |             ),
193 |             active_func,
194 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
195 | 
196 |     def forward(self, x):
197 |         x = x / 255.0
198 |         x = self.cnn(x)
199 | 
200 |         return x
201 | 
202 | 
203 | def init(module, weight_init, bias_init, gain=1):
204 |     weight_init(module.weight.data, gain=gain)
205 |     bias_init(module.bias.data)
206 |     return module
207 | 


--------------------------------------------------------------------------------
/experiment4/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle=None, createTime=0):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.max_time = 50 # ms  最大容忍时间
18 |         self.size = np.random.uniform(0.2, 1)  # Mb
19 |         self.cycle = np.random.randint(50, 100)  # cycle/bit
20 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
21 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
22 |         self.need_time = 0  # 需要计算时间
23 |         self.hold_time = 0  # 任务在计算等待队列中得等待时间
24 | 
25 |         self.rate = 0  # 当前速率
26 | 
27 |         self.compute_resource = 0
28 | 
29 |         self.create_time = createTime  # 任务产生时间
30 |         self.pick_time = 0  # 被选择的时间（出队列时间）
31 | 
32 |         # 完成该任务所消耗的cup资源
33 |         self.energy = 0
34 |         self.trans_time = 0  # 传输所需要的时间（实际）
35 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
36 | 


--------------------------------------------------------------------------------
/experiment4/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | import matplotlib
 5 | from pylab import mpl
 6 | import model
 7 | from env import Env
 8 | import matplotlib.pyplot as plt
 9 | 
10 | # 设置显示中文字体
11 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
12 | matplotlib.rcParams['axes.unicode_minus'] = False
13 | np.random.seed(2)
14 | 
15 | if __name__ == '__main__':
16 |     env = Env()
17 |     env.reset()
18 | 
19 |     N = env.num_Vehicles
20 |     vehicles = env.vehicles
21 |     models = []
22 | 
23 |     task_shape = np.array([vehicles[0].task_state]).shape
24 |     for i in range(N):
25 |         tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2)
26 |         tgt_model.load_state_dict(torch.load(
27 |             "D:\pycharm\Project\VML\MyErion\experiment4\\result\\2022-11-26\\vehicle{}.pkl".format(i)))
28 |         models.append(tgt_model)
29 | 
30 |     # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32)
31 |     # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32)
32 |     # taskAction, aimAction = models[0](state_v, taskState_v)
33 | 
34 |     vehicleReward = []
35 |     averageReward = []
36 |     for step in range(1000):
37 |         action1 = []
38 |         action2 = []
39 | 
40 |         for i in range(N):
41 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
42 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
43 |             taskAction, aimAction = models[i](state_v, taskState_v)
44 | 
45 |             # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
46 |             # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
47 |             taskAction = taskAction.detach().numpy().reshape(-1)
48 |             aimAction = aimAction.detach().numpy().reshape(-1)
49 |             action1.append(np.argmax(taskAction))
50 |             # action1.append(0)
51 |             action2.append(np.argmax(aimAction))
52 | 
53 |         print(action1)
54 |         print(action2)
55 |         other_state, task_state, vehicle_state, _, _, _, Reward, reward = env.step(action1, action2)
56 |         vehicleReward.append(reward[5])
57 |         averageReward.append(Reward)
58 |         print("第{}次车辆平均奖励{}".format(step, Reward))
59 | 
60 |     fig, aix = plt.subplots(2, 1)
61 |     aix[0].plot(range(len(vehicleReward)), vehicleReward)
62 |     aix[1].plot(range(len(averageReward)), averageReward)
63 |     plt.show()
64 | 
65 |     avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg) if i % 3 != 0]
66 |     plt.bar(range(len(avg)), avg)
67 |     plt.title("平均时延")
68 |     plt.ylabel("时延/ms")
69 |     plt.show()
70 | 
71 |     avg = [np.mean(energy) for i, energy in enumerate(env.avg_energy) if i % 3 != 0]
72 |     plt.bar(range(len(avg)), avg)
73 |     plt.title("平均能量消耗")
74 |     plt.ylabel("能量/J")
75 |     plt.show()
76 | 


--------------------------------------------------------------------------------
/experiment4/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from memory import ExperienceBuffer
  6 | from task import Task
  7 | 
  8 | Dv = 100  # 车的最大通信范围
  9 | Fv = 4000  # 车最大计算能力  MHZ
 10 | MAX_TASK = 10  # 任务队列最大长度
 11 | 
 12 | CAPACITY = 10000  # 缓冲池大小
 13 | TASK_SOLT = 10  # 任务产生时隙
 14 | 
 15 | # 等待队列最长长度
 16 | MAX_QUEUE = 10
 17 | 
 18 | np.random.seed(2)
 19 | 
 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 21 | 
 22 | 
 23 | class Vehicle:
 24 |     # 位置：x，y 速度、方向：-1左，1右
 25 |     def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE):
 26 |         self.id = id
 27 |         # 车的位置信息
 28 |         self.loc_x = position[0]
 29 |         self.loc_y = position[1]
 30 |         self.position = position
 31 |         self.velocity = velocity  # m/s
 32 |         self.direction = direction
 33 |         # 通信范围
 34 |         self.range = Dv
 35 |         # 邻居表
 36 |         self.neighbor = []
 37 |         # mec
 38 |         self.Mec = None
 39 |         # 当前时间
 40 |         self.cur_frame = 0
 41 |         # 接受的任务的列表(最多同时处理5个任务)
 42 |         self.accept_task = []
 43 |         # 最多处理任务量
 44 |         self.max_task = 3
 45 |         # 等待队列最长长度
 46 |         self.max_queue = max_queue
 47 |         # 等待计算的任务队列（理解为挂起状态）
 48 |         self.task_queue = []
 49 |         # 用于奖励计算的任务队列
 50 |         self.task_queue_for_reward = []
 51 |         # 接受任务的数量
 52 |         self.sum_needDeal_task = 0
 53 |         # 此时刻有多少动作选则我
 54 |         self.len_action = 0
 55 |         # 当前可用资源
 56 |         self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2)  # MHz
 57 |         # 表示当前是否有任务正在传输给邻居车辆（0：没有，1：有）
 58 |         self.trans_task_for_vehicle = 0
 59 |         # 当前是否有任务正在传输给mec
 60 |         self.trans_task_for_mec = 0
 61 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 62 |         self.cur_task = None
 63 |         # 任务队列
 64 |         self.total_task = []
 65 |         # 任务队列的长度
 66 |         self.len_task = len(self.total_task)
 67 | 
 68 |         # 当前状态信息
 69 |         self.self_state = []
 70 |         # 当前任务队列状态
 71 |         self.task_state = []
 72 |         # 去除邻居的状态信息用于邻居车观察和全局critic的处理
 73 |         self.excludeNeighbor_state = []
 74 |         # 缓冲池
 75 |         self.buffer = ExperienceBuffer(capacity=CAPACITY)
 76 |         # 总奖励
 77 |         self.reward = []
 78 |         # 任务溢出的数量
 79 |         self.overflow = 0
 80 |         # 上一个任务产生的时间
 81 |         self.lastCreatWorkTime = 0
 82 | 
 83 |         # 产生任务
 84 |         self.create_work()
 85 | 
 86 |     # 获得位置
 87 |     @property
 88 |     def get_location(self):
 89 |         return self.position
 90 | 
 91 |     # 设置位置
 92 |     def set_location(self, loc_x, loc_y):
 93 |         self.loc_x = loc_x
 94 |         self.loc_y = loc_y
 95 |         self.position = [self.loc_x, self.loc_y]
 96 | 
 97 |     # 获得x
 98 |     @property
 99 |     def get_x(self):
100 |         return self.loc_x
101 | 
102 |     # 获得y
103 |     @property
104 |     def get_y(self):
105 |         return self.loc_y
106 | 
107 |     # 产生任务 传入当前时间
108 |     def create_work(self):
109 |         if self.id % 3 == 0:
110 |             return
111 |             # 每隔一段时间进行一次任务产生
112 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
113 |             # # 每次有0.6的概率产生任务
114 |             if np.random.random() < 0.6:
115 |                 if self.len_task < MAX_TASK:  # 队列不满
116 |                     task = Task(self, self.cur_frame % 50)
117 |                     self.lastCreatWorkTime = self.cur_frame
118 |                     self.total_task.append(task)
119 |                     self.len_task += 1
120 |                     # print("第{}辆车产生了任务".format(self.id))
121 |                     self.overflow = 0
122 |                 else:
123 |                     # print("第{}辆车任务队列已满".format(self.id))
124 |                     self.overflow = 1
125 | 
126 |     """
127 |     获得状态
128 |     """
129 | 
130 |     def get_state(self):
131 |         self.self_state = []
132 |         self.excludeNeighbor_state = []
133 |         self.task_state = []
134 | 
135 |         # 位置信息  4
136 |         self.self_state.extend(self.position)
137 |         self.self_state.append(self.velocity)
138 |         self.self_state.append(direction_map.get(self.direction))
139 |         self.excludeNeighbor_state.extend(self.position)
140 |         self.excludeNeighbor_state.append(self.velocity)
141 |         self.excludeNeighbor_state.append(direction_map.get(self.direction))
142 | 
143 |         # 资源信息（可用资源）
144 |         self.self_state.append(self.resources)
145 |         self.excludeNeighbor_state.append(self.resources)
146 | 
147 |         # 当前处理的任务量
148 |         self.self_state.append(self.sum_needDeal_task)
149 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
150 |         # 当前接受传输的任务量
151 |         self.self_state.append(self.len_action)
152 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
153 | 
154 |         # 当前是否有任务在传输
155 |         self.excludeNeighbor_state.append(self.trans_task_for_vehicle)
156 |         self.excludeNeighbor_state.append(self.trans_task_for_mec)
157 |         self.self_state.append(self.trans_task_for_vehicle)
158 |         self.self_state.append(self.trans_task_for_mec)
159 | 
160 |         # 正在传输的任务信息
161 |         # if self.trans_task is not None:
162 |         #     self.otherState.append(self.trans_task.need_trans_size)
163 |         #     self.excludeNeighbor_state.append(self.trans_task.need_trans_size)
164 |         # else:
165 |         #     self.otherState.append(0)
166 |         #     self.excludeNeighbor_state.append(0)
167 | 
168 |         # 当前队列长度
169 |         self.self_state.append(self.len_task)
170 |         self.excludeNeighbor_state.append(self.len_task)
171 | 
172 |         # 邻居表  7*数量
173 |         for neighbor in self.neighbor:
174 |             self.self_state.extend(neighbor.position)  # 位置
175 |             self.self_state.append(neighbor.velocity)  # 速度
176 |             self.self_state.append(direction_map.get(neighbor.direction))  # 方向
177 |             self.self_state.append(neighbor.resources)  # 可用资源
178 |             self.self_state.append(neighbor.sum_needDeal_task)  # 处理任务长度
179 |             self.self_state.append(neighbor.len_action)  # 当前正在传输任务数量
180 | 
181 |         self.self_state.extend(self.Mec.state)
182 | 
183 |         # 任务状态信息
184 |         for i in range(MAX_TASK):
185 |             if i < self.len_task:
186 |                 task = self.total_task[i]
187 |                 self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
188 |             else:
189 |                 self.task_state.append([0, 0, 0, 0])
190 | 
191 |         return self.excludeNeighbor_state
192 | 


--------------------------------------------------------------------------------
/experiment5/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 环境5
 3 | （两个动作：选择任务和选择对象）
 4 | 加入了mec和车在时隙内处理任务的上限（mec最多同时处理10个任务、车最多处理5个任务）
 5 | 使用经典城市道路（使用不同数量车辆和邻居）
 6 | 为mec卸载和车辆卸载提供两种传输方式（即可同时像车辆和mec传输任务）
 7 | 使用MAPPO训练网络
 8 | """
 9 | import ptan
10 | import numpy as np
11 | import torch
12 | from torch.distributions.categorical import Categorical
13 | from env import Env
14 | 
15 | 
16 | def test_net(nets, env: Env, count=10):
17 |     rewards = 0.0
18 |     steps = 0
19 |     for _ in range(count):
20 |         env.reset()
21 |         while steps < 1000:
22 |             action = []
23 |             with torch.no_grad():
24 |                 for vehicle in env.vehicles:
25 |                     state = torch.tensor(vehicle.self_state)
26 |                     _, pro = nets[vehicle.id](state)
27 |                     act = Categorical.sample(pro)
28 |                     action.append(act.item())
29 |             _, _, reward, _ = env.step(action)
30 |             rewards += reward
31 |             steps += 1
32 |     return rewards / count, steps / count
33 | 
34 | # def calc_logprob(pro_v, actions_v):
35 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
36 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
37 | #     return p1 + p2
38 | 


--------------------------------------------------------------------------------
/experiment5/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # if __name__ == '__main__':
 6 | #     print()
 7 | #     env = Env()
 8 | #     env.reset()
 9 | #
10 | #     # 测试网络节点数
11 | #     task = np.array(env.taskState)
12 | #     print(task.shape)
13 | #     vehicles = env.vehicles
14 | #
15 | #     for vehicle in vehicles:
16 | #         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
17 | #         print("该车邻居:")
18 | #         for i in vehicle.neighbor:
19 | #             print(i.id, end="  ")
20 | #         print()
21 | #
22 | #     # 测试环境运行
23 | #     reward = []
24 | #     x = [[] for i in range(20)]
25 | #     y = [[] for i in range(20)]
26 | #     for i in range(1000):
27 | #         # for j in range(20):
28 | #         #     x[j].append(env.vehicles[j].position[0])
29 | #         #     y[j].append(env.vehicles[j].position[1])
30 | #         action1 = []
31 | #         action2 = []
32 | #         for j in range(40):
33 | #             # action1.append(np.random.randint(0, 10))
34 | #             action1.append(0)
35 | #             # action2.append(np.random.randint(0, 7))
36 | #             action2.append(1)
37 | #         other_state, task_state, vehicle_state, _, _, _, Reward, _ = env.step(action1, action2)
38 | #         reward.append(Reward)
39 | #         print("第{}次平均奖励{}".format(i, Reward))
40 | #         # print("当前状态:", state)
41 | #         # print("下一状态:", next_state)
42 | #         # print("车状态:", vehicleState)
43 | #         # print("任务状态", taskState)
44 | #         # print("当前奖励:", reward)
45 | #         # print("每个奖励,", vehicleReward)
46 | #         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
47 | #         # print("average reward:", env.Reward)
48 | #     # plt.figure(figsize=(100, 100))
49 | #     # fix, ax = plt.subplots(5, 4)
50 | #     #
51 | #     # for i in range(5):
52 | #     #     for j in range(4):
53 | #     #         number = i * 4 + j
54 | #     #         ax[i, j].plot(x[number], y[number])
55 | #     #         ax[i, j].set_title('vehicle {}'.format(number))
56 | #     plt.plot(range(len(reward)), reward)
57 | #     print(reward)
58 | #     plt.show()
59 | 
60 | if __name__ == '__main__':
61 |     print()
62 |     env = Env()
63 |     env.reset()
64 | 
65 |     # 测试网络节点数
66 |     vehicles = env.vehicles
67 | 
68 |     for vehicle in vehicles:
69 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
70 |         print("该车邻居:")
71 |         for i in vehicle.neighbor:
72 |             print(i.id, end="  ")
73 |         print()
74 | 
75 |     # 测试环境运行
76 |     reward = []
77 |     x = [[] for i in range(20)]
78 |     y = [[] for i in range(20)]
79 |     for i in range(1000):
80 |         # for j in range(20):
81 |         #     x[j].append(env.vehicles[j].position[0])
82 |         #     y[j].append(env.vehicles[j].position[1])
83 |         action1 = []
84 |         action2 = []
85 |         for j in range(20):
86 |             # action1.append(np.random.randint(0, 10))
87 |             action1.append(0)
88 |             # action2.append(np.random.randint(0, 7))
89 |             action2.append(1)
90 |         Reward, _ = env.step(action1, action2)
91 |         reward.append(Reward)
92 |         print("第{}次平均奖励{}".format(i, Reward))
93 | 


--------------------------------------------------------------------------------
/experiment5/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | import os
 4 | import time
 5 | from collections import namedtuple
 6 | 
 7 | import ptan
 8 | import torch
 9 | import torch.nn.functional as F
10 | import model
11 | from env import Env
12 | from mec import MEC
13 | from vehicle import Vehicle
14 | from memory import ReplayMemory
15 | from tensorboardX import SummaryWriter
16 | from torch.distributions.categorical import Categorical
17 | 
18 | ENV_ID = "computing offloading"
19 | GAMMA = 0.99
20 | GAE_LAMBDA = 0.95
21 | 
22 | TRAJECTORY_SIZE = 65
23 | LEARNING_RATE_ACTOR = 1e-5
24 | LEARNING_RATE_CRITIC = 1e-4
25 | 
26 | PPO_EPS = 0.2
27 | PPO_EPOCHES = 10
28 | PPO_BATCH_SIZE = 64
29 | 
30 | TEST_ITERS = 10000
31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))  # Define a transition tuple
32 | 
33 | 
34 | # 将list装换成tensor存入缓冲池中
35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory):
36 |     reward = torch.tensor([reward])
37 |     action = torch.tensor([action])
38 |     state = torch.tensor(state)
39 |     state = state.unsqueeze(0)
40 |     next_state = torch.tensor(next_state)
41 |     next_state = next_state.unsqueeze(0)
42 |     memory.push(state, action, reward, next_state)
43 | 
44 | 
45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"):
46 |     """
47 |     By trajectory calculate advantage and 1-step ref value
48 |     :param trajectory: trajectory list
49 |     :param net_crt: critic network
50 |     :param states_v: states tensor
51 |     :return: tuple with advantage numpy array and reference values
52 |     """
53 |     values_v = net_crt(torch.tensor(states_v))
54 |     values = values_v.squeeze().data.cpu().numpy()
55 |     # generalized advantage estimator: smoothed version of the advantage
56 |     last_gae = 0.0
57 |     result_adv = []
58 |     result_ref = []
59 |     for val, next_val, exp in zip(reversed(values[:-1]),
60 |                                   reversed(values[1:]),
61 |                                   reversed(trajectory[:-1])):
62 |         delta = exp.vehicleReward + GAMMA * next_val - val
63 |         last_gae = delta + GAMMA * GAE_LAMBDA * last_gae
64 |         result_adv.append(last_gae)
65 |         result_ref.append(last_gae + val)
66 | 
67 |     adv_v = torch.FloatTensor(list(reversed(result_adv)))
68 |     ref_v = torch.FloatTensor(list(reversed(result_ref)))
69 |     return adv_v.to(device), ref_v.to(device)
70 | 
71 | 
72 | # 将状态信息放入各自的缓冲池中
73 | def push(env, state, actions, next_state):
74 |     for i, vehicle in enumerate(env.vehicles):
75 |         if vehicle.task is not None:  # 没有任务不算经验
76 |             continue
77 |         exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state)
78 |         vehicle.buffer.append(exp)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     task = MEC([10, 20])
83 |     vehicle = Vehicle(1, [10, 20], 'd')
84 |     print(type(task) == MEC)
85 |     print(type(task) == Vehicle)
86 |     print(type(vehicle) == Vehicle)
87 |     print(type(vehicle))
88 |     print(vehicle)
89 | 


--------------------------------------------------------------------------------
/experiment5/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 500  # MEC通信范围 /m
 4 | RESOURCE = 10000  # 可用资源  MHz
 5 | MAX_QUEUE = 10
 6 | 
 7 | 
 8 | # 边缘服务器
 9 | class MEC:
10 |     def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE):
11 |         self.loc_x = position[0]
12 |         self.loc_y = position[1]
13 |         self.loc = position
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息(最多同时处理10个任务)
20 |         self.accept_task = []
21 |         # 最多处理任务量
22 |         self.max_task = 10
23 |         # 接受任务的数量
24 |         self.sum_needDeal_task = 0
25 |         # 此时刻有多少动作选则我 多少任务选择传输给我
26 |         self.len_action = 0
27 |         # 等待计算的任务队列（理解为挂起状态）
28 |         self.task_queue = []
29 |         # 用于奖励计算的任务队列
30 |         self.task_queue_for_reward = []
31 |         # 队列最长长度
32 |         self.max_queue = max_queue
33 |         # 当前状态
34 |         self.get_state()
35 | 
36 |     @property
37 |     def get_x(self):
38 |         return self.loc_x
39 | 
40 |     @property
41 |     def get_y(self):
42 |         return self.loc_y
43 | 
44 |     @property
45 |     def get_location(self):
46 |         return self.loc
47 | 
48 |     """
49 |         获得状态
50 |     """
51 | 
52 |     def get_state(self):
53 |         """
54 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
55 |         """
56 |         self.state = []
57 |         self.state.extend(self.loc)
58 |         self.state.append(self.sum_needDeal_task)
59 |         self.state.append(self.len_action)
60 |         self.state.append(self.resources)
61 |         return self.state
62 | 


--------------------------------------------------------------------------------
/experiment5/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | from random import sample
 5 | import numpy as np
 6 | 
 7 | Experience = namedtuple('Transition',
 8 |                         field_names=['cur_otherState', 'cur_TaskState',  # 状态
 9 |                                      'taskAction', 'aimAction',  # 动作
10 |                                      'reward',  # 奖励
11 |                                      'next_otherState', 'next_TaskState'])  # Define a transition tuple
12 | 
13 | 
14 | class PPOMemory:
15 |     def __init__(self, batch_size):
16 |         self.self_state = []
17 |         self.neighbor_state = []
18 |         self.task_state = []
19 |         self.vehicles_state = []
20 |         self.task_probs = []
21 |         self.aim_probs = []
22 |         self.vals = []
23 |         self.action = []
24 |         self.rewards = []
25 |         self.batch_size = batch_size
26 | 
27 |     def sample(self):
28 |         batch_step = np.arange(0, len(self.self_state), self.batch_size)
29 |         indices = np.arange(len(self.self_state), dtype=np.int64)
30 |         # np.random.shuffle(indices)
31 |         batches = [indices[i:i + self.batch_size] for i in batch_step]
32 |         return np.array(self.self_state), \
33 |                np.array(self.neighbor_state), \
34 |                np.array(self.task_state), \
35 |                np.array(self.vehicles_state), \
36 |                np.array(self.task_probs), \
37 |                np.array(self.aim_probs), \
38 |                np.array(self.vals), \
39 |                np.array(self.action), \
40 |                np.array(self.rewards), \
41 |                batches
42 | 
43 |     def push(self, self_state, neighbor_state, task_state, vehicles_state,
44 |              task_action, aim_action,
45 |              task_probs, aim_probs,
46 |              vals, reward):
47 |         self.self_state.append(self_state)
48 |         self.neighbor_state.append(neighbor_state)
49 |         self.task_state.append(task_state)
50 |         self.vehicles_state.append(vehicles_state)
51 |         self.action.append([task_action, aim_action])
52 |         self.task_probs.append(task_probs)
53 |         self.aim_probs.append(aim_probs)
54 |         self.vals.append(vals)
55 |         self.rewards.append(reward)
56 | 
57 |     def clear(self):
58 |         self.self_state = []
59 |         self.neighbor_state = []
60 |         self.task_state = []
61 |         self.vehicles_state = []
62 |         self.task_probs = []
63 |         self.aim_probs = []
64 |         self.vals = []
65 |         self.action = []
66 |         self.rewards = []
67 | 
68 | 
69 | class ExperienceBuffer:
70 |     def __init__(self, capacity):
71 |         self.maxLen = capacity
72 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
73 | 
74 |     def __len__(self):
75 |         return len(self.buffer)
76 | 
77 |     def append(self, experience: Experience):
78 |         self.buffer.append(experience)
79 | 
80 |     def sample(self, batch_size):
81 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
82 |         cur_otherState, cur_TaskState, taskAction, aimAction, rewards, next_otherState, next_TaskState = zip(
83 |             *[self.buffer[idx] for idx in indices])
84 |         # 转换成numpy
85 |         return np.array(cur_otherState), np.array(cur_TaskState), \
86 |                np.array(taskAction), np.array(aimAction), \
87 |                np.array(rewards, dtype=np.float32), \
88 |                np.array(next_otherState), np.array(next_TaskState)
89 | 
90 |     # 清空
91 |     def clear(self):
92 |         self.buffer = collections.deque(maxlen=self.maxLen)
93 | 


--------------------------------------------------------------------------------
/experiment5/model.py:
--------------------------------------------------------------------------------
  1 | import ptan
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.distributions.categorical import Categorical
  7 | 
  8 | HID_SIZE = 64
  9 | HID_SIZE_MIN = 32
 10 | 
 11 | 
 12 | class ModelActor(nn.Module):
 13 |     def __init__(self, obs_dim, neighbor_dim, task_dim, task_aim_dim, act_aim_dim):
 14 |         super(ModelActor, self).__init__()
 15 | 
 16 |         self.cnn_task = CNNLayer(task_dim, HID_SIZE)
 17 |         self.cnn_neighbor = CNNLayer(neighbor_dim, HID_SIZE_MIN)
 18 |         self.same = nn.Sequential(
 19 |             nn.Linear(HID_SIZE + HID_SIZE_MIN + obs_dim, 2 * HID_SIZE),
 20 |             nn.ReLU(),
 21 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 22 |             nn.ReLU(),
 23 |             nn.Linear(HID_SIZE, 2 * HID_SIZE),
 24 |             nn.ReLU(),
 25 |         )
 26 |         self.task = nn.Sequential(
 27 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 28 |             nn.ReLU(),
 29 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 30 |             nn.ReLU(),
 31 |             nn.Linear(HID_SIZE_MIN, task_aim_dim),
 32 |         )
 33 |         self.act = nn.Sequential(
 34 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 35 |             nn.ReLU(),
 36 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 37 |             nn.ReLU(),
 38 |             nn.Linear(HID_SIZE_MIN, act_aim_dim),
 39 |         )
 40 |         self.logstd_task = nn.Parameter(torch.zeros(task_aim_dim))
 41 |         self.logstd_aim = nn.Parameter(torch.zeros(act_aim_dim))
 42 | 
 43 |     def forward(self, obs, neighbor, task, is_train=True):
 44 |         task_out = self.cnn_task(task)
 45 |         neighbor_out = self.cnn_neighbor(neighbor)
 46 |         x = torch.cat((task_out, neighbor_out, obs), -1)
 47 |         same_out = self.same(x)
 48 |         act_out = self.act(same_out)
 49 |         task_out = self.task(same_out)
 50 |         if is_train:
 51 |             rnd_task = torch.tensor(np.random.normal(size=task_out.shape))
 52 |             rnd_aim = torch.tensor(np.random.normal(size=act_out.shape))
 53 |             task_out = task_out + torch.exp(self.logstd_task) * rnd_task
 54 |             act_out = act_out + torch.exp(self.logstd_aim) * rnd_aim
 55 | 
 56 |         act_out = F.gumbel_softmax(act_out)
 57 | 
 58 |         act_pro = F.softmax(act_out, dim=-1)
 59 |         task_pro = F.softmax(task_out, dim=-1)
 60 |         # print(act_pro)
 61 |         # print(torch.sum(act_pro))
 62 |         # print(task_pro)
 63 |         # return act_pro, task_pro  # 打印网络结构用
 64 |         return Categorical(task_pro), Categorical(act_pro)  # 真实使用
 65 | 
 66 | 
 67 | class ModelCritic(nn.Module):
 68 |     def __init__(self, obs_size, task_size, act_size):
 69 |         super(ModelCritic, self).__init__()
 70 | 
 71 |         self.cnn = CNNLayer(obs_size, HID_SIZE)
 72 | 
 73 |         self.task_cnn = CNNLayer(task_size, HID_SIZE)
 74 | 
 75 |         self.value = nn.Sequential(
 76 |             nn.Linear(HID_SIZE * 2 + act_size, HID_SIZE * 2),
 77 |             nn.ReLU(),
 78 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 79 |             nn.ReLU(),
 80 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 81 |             nn.ReLU(),
 82 |             nn.Linear(HID_SIZE_MIN, 1),
 83 |         )
 84 |         self.value1 = nn.Sequential(
 85 |             nn.Linear(HID_SIZE * 2 + act_size, HID_SIZE * 2),
 86 |             nn.ReLU(),
 87 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
 88 |             nn.ReLU(),
 89 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 90 |             nn.ReLU(),
 91 |             nn.Linear(HID_SIZE_MIN, 1),
 92 |         )
 93 | 
 94 |     def forward(self, states_v, task_states_v, actions_v):
 95 |         cnn_out = self.cnn(states_v)
 96 |         task_out = self.task_cnn(task_states_v)
 97 | 
 98 |         v = torch.cat((actions_v, cnn_out, task_out), -1)
 99 |         task_value = self.value(v)
100 |         # aim_value = self.value1(v)
101 |         return task_value  # , aim_value
102 | 
103 | 
104 | class CNNLayer(nn.Module):
105 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
106 |         super(CNNLayer, self).__init__()
107 | 
108 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
109 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
110 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
111 | 
112 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
113 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
114 | 
115 |         in_channels = obs_shape[0]
116 |         input_width = obs_shape[1]
117 |         input_height = obs_shape[2]
118 | 
119 |         self.cnn = nn.Sequential(
120 |             init_(nn.Conv2d(
121 |                 in_channels=in_channels,
122 |                 out_channels=hidden_size // 2,
123 |                 kernel_size=kernel_size,
124 |                 stride=stride)
125 |             ),
126 |             active_func,
127 |             # nn.AvgPool2d(
128 |             #     kernel_size=kernel_size,
129 |             #     stride=stride),
130 |             # active_func,
131 |             # init_(nn.Conv2d(
132 |             #     in_channels=3,
133 |             #     out_channels=1,
134 |             #     kernel_size=kernel_size,
135 |             #     stride=stride)
136 |             # ),
137 |             # active_func,
138 |             # nn.AvgPool2d(
139 |             #     kernel_size=kernel_size,
140 |             #     stride=stride),
141 |             # active_func,
142 |             nn.Flatten(),
143 |             init_(nn.Linear(
144 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
145 |                 hidden_size)
146 |             ),
147 |             active_func,
148 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
149 | 
150 |     def forward(self, x):
151 |         x = x / 255.0
152 |         x = self.cnn(x)
153 | 
154 |         return x
155 | 
156 | 
157 | def init(module, weight_init, bias_init, gain=1):
158 |     weight_init(module.weight.data, gain=gain)
159 |     bias_init(module.bias.data)
160 |     return module
161 | 


--------------------------------------------------------------------------------
/experiment5/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle=None, createTime=0):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.max_time = 50  # ms  最大容忍时间
18 |         self.size = np.random.uniform(0.2, 1)  # Mb
19 |         self.cycle = np.random.randint(50, 100)  # cycle/bit
20 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
21 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
22 |         self.need_time = 0  # 需要计算时间
23 |         self.hold_time = 0  # 任务在计算等待队列中得等待时间
24 | 
25 |         self.rate = 0  # 当前速率
26 | 
27 |         self.compute_resource = 0
28 | 
29 |         self.create_time = createTime  # 任务产生时间
30 |         self.pick_time = 0  # 被选择的时间（出队列时间）
31 | 
32 |         # 完成该任务所消耗的cup资源
33 |         self.energy = 0
34 |         self.trans_time = 0  # 传输所需要的时间（实际）
35 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
36 | 


--------------------------------------------------------------------------------
/experiment5/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from memory import PPOMemory
  6 | from task import Task
  7 | 
  8 | Dv = 100  # 车的最大通信范围
  9 | Fv = 2000  # 车最大计算能力  MHZ
 10 | MAX_TASK = 10  # 任务队列最大长度
 11 | 
 12 | CAPACITY = 2050  # 缓冲池大小
 13 | TASK_SOLT = 10  # 任务产生时隙
 14 | 
 15 | # 等待队列最长长度
 16 | MAX_QUEUE = 10
 17 | 
 18 | np.random.seed(2)
 19 | 
 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 21 | 
 22 | 
 23 | class Vehicle:
 24 |     # 位置：x，y 速度、方向：-1左，1右
 25 |     def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE):
 26 |         self.id = id
 27 |         # 车的位置信息
 28 |         self.loc_x = position[0]
 29 |         self.loc_y = position[1]
 30 |         self.position = position
 31 |         self.velocity = velocity  # m/s
 32 |         self.direction = direction
 33 |         # 通信范围
 34 |         self.range = Dv
 35 |         # 邻居表
 36 |         self.neighbor = []
 37 |         # mec
 38 |         self.Mec = None
 39 |         # 当前时间
 40 |         self.cur_frame = 0
 41 |         # 接受的任务的列表(最多同时处理5个任务)
 42 |         self.accept_task = []
 43 |         # 最多处理任务量
 44 |         self.max_task = 3
 45 |         # 等待队列最长长度
 46 |         self.max_queue = max_queue
 47 |         # 等待计算的任务队列（理解为挂起状态）
 48 |         self.task_queue = []
 49 |         # 用于奖励计算的任务队列
 50 |         self.task_queue_for_reward = []
 51 |         # 接受任务的数量
 52 |         self.sum_needDeal_task = 0
 53 |         # 此时刻有多少动作选则我
 54 |         self.len_action = 0
 55 |         # 当前可用资源
 56 |         self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2)  # MHz
 57 |         # 表示当前是否有任务正在传输给邻居车辆（0：没有，1：有）
 58 |         self.trans_task_for_vehicle = 0
 59 |         # 当前是否有任务正在传输给mec
 60 |         self.trans_task_for_mec = 0
 61 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 62 |         self.cur_task = None
 63 |         # 任务队列
 64 |         self.total_task = []
 65 |         # 任务队列的长度
 66 |         self.len_task = len(self.total_task)
 67 | 
 68 |         # 自身状态信息
 69 |         self.self_state = []
 70 |         # 当前任务队列状态
 71 |         self.task_state = []
 72 |         # 邻居状态信息
 73 |         self.neighbor_state = []
 74 |         # 缓冲池
 75 |         self.memory = PPOMemory(CAPACITY)
 76 |         # 总奖励
 77 |         self.reward = []
 78 |         # 任务溢出的数量
 79 |         self.overflow = 0
 80 |         # 上一个任务产生的时间
 81 |         self.lastCreatWorkTime = 0
 82 | 
 83 |         # 产生任务
 84 |         self.create_work()
 85 | 
 86 |     # 获得位置
 87 |     @property
 88 |     def get_location(self):
 89 |         return self.position
 90 | 
 91 |     # 设置位置
 92 |     def set_location(self, loc_x, loc_y):
 93 |         self.loc_x = loc_x
 94 |         self.loc_y = loc_y
 95 |         self.position = [self.loc_x, self.loc_y]
 96 | 
 97 |     # 获得x
 98 |     @property
 99 |     def get_x(self):
100 |         return self.loc_x
101 | 
102 |     # 获得y
103 |     @property
104 |     def get_y(self):
105 |         return self.loc_y
106 | 
107 |     # 产生任务 传入当前时间
108 |     def create_work(self):
109 |         if self.id % 3 == 0:
110 |             return
111 |             # 每隔一段时间进行一次任务产生
112 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
113 |             # # 每次有0.6的概率产生任务
114 |             if np.random.random() < 0.6:
115 |                 if self.len_task < MAX_TASK:  # 队列不满
116 |                     task = Task(self, self.cur_frame)
117 |                     self.lastCreatWorkTime = self.cur_frame
118 |                     self.total_task.append(task)
119 |                     self.len_task += 1
120 |                     # print("第{}辆车产生了任务".format(self.id))
121 |                     self.overflow = 0
122 |                 else:
123 |                     # print("第{}辆车任务队列已满".format(self.id))
124 |                     self.overflow = 1
125 | 
126 |     """
127 |     获得状态
128 |     """
129 | 
130 |     def get_state(self):
131 |         self.self_state = []
132 |         self.neighbor_state = []
133 |         self.task_state = []
134 | 
135 |         # 位置信息  4
136 |         self.self_state.extend(self.position)
137 |         self.self_state.append(self.velocity)
138 |         self.self_state.append(direction_map.get(self.direction))
139 | 
140 |         # 资源信息（可用资源）
141 |         self.self_state.append(self.resources)
142 | 
143 |         # 当前处理的任务量
144 |         self.self_state.append(self.sum_needDeal_task)
145 |         # 当前接受传输的任务量
146 |         self.self_state.append(self.len_action)
147 | 
148 |         # 当前是否有任务在传输
149 |         self.self_state.append(self.trans_task_for_vehicle)
150 |         self.self_state.append(self.trans_task_for_mec)
151 | 
152 |         # 邻居表  7*数量
153 |         for neighbor in self.neighbor:
154 |             state = []
155 |             state.extend(neighbor.position)  # 位置
156 |             state.append(neighbor.velocity)  # 速度
157 |             state.append(direction_map.get(neighbor.direction))  # 方向
158 |             state.append(neighbor.resources)  # 可用资源
159 |             state.append(neighbor.sum_needDeal_task)  # 处理任务长度
160 |             state.append(neighbor.len_action)  # 当前正在接受传输任务数量
161 |             self.neighbor_state.append(state)
162 | 
163 |         self.self_state.extend(self.Mec.state)
164 | 
165 |         # 任务状态信息
166 |         for i in range(MAX_TASK):
167 |             if i < self.len_task:
168 |                 task = self.total_task[i]
169 |                 self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
170 |             else:
171 |                 self.task_state.append([0, 0, 0, 0])
172 | 
173 |         return self.self_state
174 | 


--------------------------------------------------------------------------------
/experiment6/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 环境6
 3 | （两个动作：选择任务和选择对象）
 4 | 加入了mec和车在时隙内处理任务的上限（mec最多同时处理10个任务、车最多处理5个任务）
 5 | 使用经典城市道路（使用不同数量车辆和邻居）
 6 | 为mec卸载和车辆卸载提供两种传输方式（即可同时像车辆和mec传输任务）
 7 | """
 8 | import ptan
 9 | import numpy as np
10 | import torch
11 | from torch.distributions.categorical import Categorical
12 | from env import Env
13 | 
14 | 
15 | def test_net(nets, env: Env, count=10):
16 |     rewards = 0.0
17 |     steps = 0
18 |     for _ in range(count):
19 |         env.reset()
20 |         while steps < 1000:
21 |             action = []
22 |             with torch.no_grad():
23 |                 for vehicle in env.vehicles:
24 |                     state = torch.tensor(vehicle.self_state)
25 |                     _, pro = nets[vehicle.id](state)
26 |                     act = Categorical.sample(pro)
27 |                     action.append(act.item())
28 |             _, _, reward, _ = env.step(action)
29 |             rewards += reward
30 |             steps += 1
31 |     return rewards / count, steps / count
32 | 
33 | # def calc_logprob(pro_v, actions_v):
34 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
35 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
36 | #     return p1 + p2
37 | 


--------------------------------------------------------------------------------
/experiment6/env_test.py:
--------------------------------------------------------------------------------
 1 | from env import Env
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | if __name__ == '__main__':
 6 |     print()
 7 |     env = Env()
 8 |     env.reset()
 9 | 
10 |     # 测试网络节点数
11 |     vehicles = env.vehicles
12 | 
13 |     for vehicle in vehicles:
14 |         print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
15 |         print("该车邻居:")
16 |         for i in vehicle.neighbor:
17 |             print(i.id, end="  ")
18 |         print()
19 | 
20 |     # 测试环境运行
21 |     reward = []
22 |     x = [[] for i in range(20)]
23 |     y = [[] for i in range(20)]
24 |     for i in range(1000):
25 |         # for j in range(20):
26 |         #     x[j].append(env.vehicles[j].position[0])
27 |         #     y[j].append(env.vehicles[j].position[1])
28 |         action1 = []
29 |         action2 = []
30 |         for j in range(20):
31 |             # action1.append(np.random.randint(0, 10))
32 |             action1.append(0)
33 |             # action2.append(np.random.randint(0, 7))
34 |             action2.append(1)
35 |         Reward, _ = env.step(action1, action2)
36 |         reward.append(Reward)
37 |         print("第{}次平均奖励{}".format(i, Reward))
38 |         # print("当前状态:", state)
39 |         # print("下一状态:", next_state)
40 |         # print("车状态:", vehicleState)
41 |         # print("任务状态", taskState)
42 |         # print("当前奖励:", reward)
43 |         # print("每个奖励,", vehicleReward)
44 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
45 |         # print("average reward:", env.Reward)
46 |     # plt.figure(figsize=(100, 100))
47 |     # fix, ax = plt.subplots(5, 4)
48 |     #
49 |     # for i in range(5):
50 |     #     for j in range(4):
51 |     #         number = i * 4 + j
52 |     #         ax[i, j].plot(x[number], y[number])
53 |     #         ax[i, j].set_title('vehicle {}'.format(number))
54 |     plt.plot(range(len(reward)), reward)
55 |     print(reward)
56 |     plt.show()
57 | 


--------------------------------------------------------------------------------
/experiment6/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | import os
 4 | import time
 5 | from collections import namedtuple
 6 | 
 7 | import ptan
 8 | import torch
 9 | import torch.nn.functional as F
10 | import model
11 | from env import Env
12 | from mec import MEC
13 | from vehicle import Vehicle
14 | from memory import ReplayMemory
15 | from tensorboardX import SummaryWriter
16 | from torch.distributions.categorical import Categorical
17 | 
18 | ENV_ID = "computing offloading"
19 | GAMMA = 0.99
20 | GAE_LAMBDA = 0.95
21 | 
22 | TRAJECTORY_SIZE = 65
23 | LEARNING_RATE_ACTOR = 1e-5
24 | LEARNING_RATE_CRITIC = 1e-4
25 | 
26 | PPO_EPS = 0.2
27 | PPO_EPOCHES = 10
28 | PPO_BATCH_SIZE = 64
29 | 
30 | TEST_ITERS = 10000
31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))  # Define a transition tuple
32 | 
33 | 
34 | # 将list装换成tensor存入缓冲池中
35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory):
36 |     reward = torch.tensor([reward])
37 |     action = torch.tensor([action])
38 |     state = torch.tensor(state)
39 |     state = state.unsqueeze(0)
40 |     next_state = torch.tensor(next_state)
41 |     next_state = next_state.unsqueeze(0)
42 |     memory.push(state, action, reward, next_state)
43 | 
44 | 
45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"):
46 |     """
47 |     By trajectory calculate advantage and 1-step ref value
48 |     :param trajectory: trajectory list
49 |     :param net_crt: critic network
50 |     :param states_v: states tensor
51 |     :return: tuple with advantage numpy array and reference values
52 |     """
53 |     values_v = net_crt(torch.tensor(states_v))
54 |     values = values_v.squeeze().data.cpu().numpy()
55 |     # generalized advantage estimator: smoothed version of the advantage
56 |     last_gae = 0.0
57 |     result_adv = []
58 |     result_ref = []
59 |     for val, next_val, exp in zip(reversed(values[:-1]),
60 |                                   reversed(values[1:]),
61 |                                   reversed(trajectory[:-1])):
62 |         delta = exp.vehicleReward + GAMMA * next_val - val
63 |         last_gae = delta + GAMMA * GAE_LAMBDA * last_gae
64 |         result_adv.append(last_gae)
65 |         result_ref.append(last_gae + val)
66 | 
67 |     adv_v = torch.FloatTensor(list(reversed(result_adv)))
68 |     ref_v = torch.FloatTensor(list(reversed(result_ref)))
69 |     return adv_v.to(device), ref_v.to(device)
70 | 
71 | 
72 | # 将状态信息放入各自的缓冲池中
73 | def push(env, state, actions, next_state):
74 |     for i, vehicle in enumerate(env.vehicles):
75 |         if vehicle.task is not None:  # 没有任务不算经验
76 |             continue
77 |         exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state)
78 |         vehicle.buffer.append(exp)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     task = MEC([10, 20])
83 |     vehicle = Vehicle(1, [10, 20], 'd')
84 |     print(type(task) == MEC)
85 |     print(type(task) == Vehicle)
86 |     print(type(vehicle) == Vehicle)
87 |     print(type(vehicle))
88 |     print(vehicle)
89 | 


--------------------------------------------------------------------------------
/experiment6/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 650  # MEC通信范围 /m
 4 | RESOURCE = 10000  # 可用资源  MHz
 5 | MAX_QUEUE = 10
 6 | 
 7 | 
 8 | # 边缘服务器
 9 | class MEC:
10 |     def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE):
11 |         self.loc_x = position[0]
12 |         self.loc_y = position[1]
13 |         self.loc = position
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息(最多同时处理10个任务)
20 |         self.accept_task = []
21 |         # 最多处理任务量
22 |         self.max_task = 10
23 |         # 接受任务的数量
24 |         self.sum_needDeal_task = 0
25 |         # 此时刻有多少动作选则我 多少任务选择传输给我
26 |         self.len_action = 0
27 |         # 等待计算的任务队列（理解为挂起状态）
28 |         self.task_queue = []
29 |         # 用于奖励计算的任务队列
30 |         self.task_queue_for_reward = []
31 |         # 队列最长长度
32 |         self.max_queue = max_queue
33 |         # 当前状态
34 |         self.get_state()
35 | 
36 |     @property
37 |     def get_x(self):
38 |         return self.loc_x
39 | 
40 |     @property
41 |     def get_y(self):
42 |         return self.loc_y
43 | 
44 |     @property
45 |     def get_location(self):
46 |         return self.loc
47 | 
48 |     """
49 |         获得状态
50 |     """
51 | 
52 |     def get_state(self):
53 |         """
54 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
55 |         """
56 |         self.state = []
57 |         self.state.extend(self.loc)
58 |         self.state.append(self.sum_needDeal_task)
59 |         self.state.append(self.len_action)
60 |         self.state.append(self.resources)
61 |         return self.state
62 | 


--------------------------------------------------------------------------------
/experiment6/memory.py:
--------------------------------------------------------------------------------
 1 | # 经验类型
 2 | import collections
 3 | from collections import namedtuple
 4 | import numpy as np
 5 | 
 6 | Experience = namedtuple('Transition',
 7 |                         field_names=['vehicle_state', 'neighbor_state', 'task_state', 'all_vehicle_state',
 8 |                                      'task_action', 'aim_action', 'reward',
 9 |                                      'next_vehicle_state', 'next_neighbor_state', 'next_task_state',
10 |                                      'next_all_vehicle_state'])  # Define a transition tuple
11 | 
12 | 
13 | class ExperienceBuffer:
14 |     def __init__(self, capacity):
15 |         self.maxLen = capacity
16 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
17 | 
18 |     def __len__(self):
19 |         return len(self.buffer)
20 | 
21 |     def append(self, experience: Experience):
22 |         self.buffer.append(experience)
23 | 
24 |     def sample(self, batch_size):
25 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
26 |         vehicle_state, neighbor_state, task_state, all_vehicle_state, \
27 |         task_action, aim_action, reward, \
28 |         next_vehicle_state, next_neighbor_state, next_task_state, next_all_vehicle_state = zip(
29 |             *[self.buffer[idx] for idx in indices])
30 |         # 转换成numpy
31 |         return np.array(vehicle_state), np.array(neighbor_state), \
32 |                np.array(task_state), np.array(all_vehicle_state), \
33 |                np.array(task_action), np.array(aim_action), \
34 |                np.array(reward, dtype=np.float32), \
35 |                np.array(next_vehicle_state), np.array(next_neighbor_state), \
36 |                np.array(next_task_state), np.array(next_all_vehicle_state)
37 | 
38 | 
39 | def clear(self):
40 |     """
41 |     清空
42 |     """
43 |     self.buffer = collections.deque(maxlen=self.maxLen)
44 | 


--------------------------------------------------------------------------------
/experiment6/model.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.nn import functional as F
  7 | from torch.distributions.categorical import Categorical
  8 | 
  9 | HID_SIZE = 64
 10 | HID_SIZE_MIN = 32
 11 | 
 12 | 
 13 | class TargetNet:
 14 |     """
 15 |     Wrapper around model which provides copy of it instead of trained weights
 16 |     """
 17 | 
 18 |     def __init__(self, model):
 19 |         self.model = model
 20 |         self.target_model = copy.deepcopy(model)
 21 | 
 22 |     def sync(self):
 23 |         self.target_model.load_state_dict(self.model.state_dict())
 24 | 
 25 |     # 软更新
 26 |     def alpha_sync(self, alpha):
 27 |         """
 28 |         Blend params of target net with params from the model
 29 |         :param alpha:
 30 |         """
 31 |         assert isinstance(alpha, float)
 32 |         assert 0.0 < alpha <= 1.0
 33 |         state = self.model.state_dict()
 34 |         tgt_state = self.target_model.state_dict()
 35 |         for k, v in state.items():
 36 |             tgt_state[k] = tgt_state[k] * alpha + (1 - alpha) * v
 37 |         self.target_model.load_state_dict(tgt_state)
 38 | 
 39 | 
 40 | class ModelActor(nn.Module):
 41 |     def __init__(self, obs_dim, neighbor_dim, task_dim, task_aim_dim, act_aim_dim):
 42 |         super(ModelActor, self).__init__()
 43 | 
 44 |         self.cnn_task = CNNLayer(task_dim, HID_SIZE)
 45 |         self.cnn_neighbor = CNNLayer(neighbor_dim, HID_SIZE_MIN)
 46 |         self.same = nn.Sequential(
 47 |             nn.Linear(HID_SIZE + HID_SIZE_MIN + obs_dim, 2 * HID_SIZE),
 48 |             nn.ReLU(),
 49 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 50 |             nn.ReLU(),
 51 |             nn.Linear(HID_SIZE, 2 * HID_SIZE),
 52 |             nn.ReLU(),
 53 |         )
 54 |         self.task = nn.Sequential(
 55 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 56 |             nn.ReLU(),
 57 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 58 |             nn.ReLU(),
 59 |             nn.Linear(HID_SIZE_MIN, task_aim_dim),
 60 |         )
 61 |         self.act = nn.Sequential(
 62 |             nn.Linear(2 * HID_SIZE, HID_SIZE),
 63 |             nn.ReLU(),
 64 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
 65 |             nn.ReLU(),
 66 |             nn.Linear(HID_SIZE_MIN, act_aim_dim),
 67 |         )
 68 | 
 69 |     def forward(self, obs, neighbor, task, train=True):
 70 |         task_out = self.cnn_task(task)
 71 |         neighbor_out = self.cnn_neighbor(neighbor)
 72 |         x = torch.cat((task_out, neighbor_out, obs), -1)
 73 |         same_out = self.same(x)
 74 |         act_out = self.act(same_out)
 75 |         task_out = self.task(same_out)
 76 | 
 77 |         # 训练完成之后无需添加噪音
 78 |         if train:
 79 |             # act_out += torch.tensor(np.random.normal(size=act_out.shape))
 80 |             # task_out += torch.tensor(np.random.normal(size=task_out.shape))
 81 |             act_out = F.gumbel_softmax(act_out, hard=True)
 82 |             task_out = F.gumbel_softmax(task_out, hard=True)
 83 |         # else:
 84 |         #     task_out = F.softmax(task_out, dim=-1)
 85 |         #     act_out = F.softmax(act_out, dim=-1)
 86 | 
 87 |         # act_pro = F.softmax(act_out, dim=-1)
 88 |         # task_pro = F.softmax(task_out, dim=-1)
 89 |         # print(act_pro)
 90 |         # print(torch.sum(act_pro))
 91 |         # print(task_pro)
 92 |         # return act_pro, task_pro  # 打印网络结构用
 93 |         # return Categorical(task_pro), Categorical(act_pro)  # 真实使用
 94 |         return task_out, act_out
 95 | 
 96 | 
 97 | class ModelCritic(nn.Module):
 98 |     def __init__(self, obs_size, task_size, task_action_size, aim_action_size):
 99 |         super(ModelCritic, self).__init__()
100 | 
101 |         self.cnn = CNNLayer(obs_size, HID_SIZE)
102 | 
103 |         self.task_cnn = CNNLayer(task_size, HID_SIZE)
104 | 
105 |         self.task_value = nn.Sequential(
106 |             nn.Linear(HID_SIZE * 2 + task_action_size, HID_SIZE * 2),
107 |             nn.ReLU(),
108 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
109 |             nn.ReLU(),
110 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
111 |             nn.ReLU(),
112 |             nn.Linear(HID_SIZE_MIN, 1),
113 |         )
114 |         self.aim_value = nn.Sequential(
115 |             nn.Linear(HID_SIZE * 2 + aim_action_size, HID_SIZE * 2),
116 |             nn.ReLU(),
117 |             nn.Linear(HID_SIZE * 2, HID_SIZE),
118 |             nn.ReLU(),
119 |             nn.Linear(HID_SIZE, HID_SIZE_MIN),
120 |             nn.ReLU(),
121 |             nn.Linear(HID_SIZE_MIN, 1),
122 |         )
123 | 
124 |     def forward(self, states_v, task_states_v, task_action_v, aim_action_v):
125 |         cnn_out = self.cnn(states_v)
126 |         task_out = self.task_cnn(task_states_v)
127 | 
128 |         v = torch.cat((cnn_out, task_out), -1)
129 |         task_value = self.task_value(torch.cat((v, task_action_v), -1))
130 |         aim_value = self.aim_value(torch.cat((v, aim_action_v), -1))
131 |         return task_value, aim_value
132 | 
133 | 
134 | class CNNLayer(nn.Module):
135 |     def __init__(self, obs_shape, hidden_size, use_orthogonal=True, use_ReLU=True, kernel_size=3, stride=1):
136 |         super(CNNLayer, self).__init__()
137 | 
138 |         active_func = [nn.Tanh(), nn.ReLU()][use_ReLU]
139 |         init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][use_orthogonal]
140 |         gain = nn.init.calculate_gain(['tanh', 'relu'][use_ReLU])
141 | 
142 |         def init_(m):  # 权重使用正交初始化，激活函数使用relu
143 |             return init(m, init_method, lambda x: nn.init.constant_(x, 0), gain=gain)
144 | 
145 |         in_channels = obs_shape[0]
146 |         input_width = obs_shape[1]
147 |         input_height = obs_shape[2]
148 | 
149 |         self.cnn = nn.Sequential(
150 |             init_(nn.Conv2d(
151 |                 in_channels=in_channels,
152 |                 out_channels=hidden_size // 2,
153 |                 kernel_size=kernel_size,
154 |                 stride=stride)
155 |             ),
156 |             active_func,
157 |             # nn.AvgPool2d(
158 |             #     kernel_size=kernel_size,
159 |             #     stride=stride),
160 |             # active_func,
161 |             # init_(nn.Conv2d(
162 |             #     in_channels=3,
163 |             #     out_channels=1,
164 |             #     kernel_size=kernel_size,
165 |             #     stride=stride)
166 |             # ),
167 |             # active_func,
168 |             # nn.AvgPool2d(
169 |             #     kernel_size=kernel_size,
170 |             #     stride=stride),
171 |             # active_func,
172 |             nn.Flatten(),
173 |             init_(nn.Linear(
174 |                 hidden_size // 2 * (input_width - kernel_size + stride) * (input_height - kernel_size + stride),
175 |                 hidden_size)
176 |             ),
177 |             active_func,
178 |             init_(nn.Linear(hidden_size, hidden_size)), active_func)
179 | 
180 |     def forward(self, x):
181 |         x = x / 255.0
182 |         x = self.cnn(x)
183 | 
184 |         return x
185 | 
186 | 
187 | def init(module, weight_init, bias_init, gain=1):
188 |     weight_init(module.weight.data, gain=gain)
189 |     bias_init(module.bias.data)
190 |     return module
191 | 


--------------------------------------------------------------------------------
/experiment6/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle=None, createTime=0):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         self.max_time = 50 # ms  最大容忍时间
18 |         self.size = np.random.uniform(0.2, 1)  # Mb
19 |         self.cycle = np.random.randint(50, 100)  # cycle/bit
20 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
21 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
22 |         self.need_time = 0  # 需要计算时间
23 |         self.hold_time = 0  # 任务在计算等待队列中得等待时间
24 | 
25 |         self.rate = 0  # 当前速率
26 | 
27 |         self.compute_resource = 0
28 | 
29 |         self.create_time = createTime  # 任务产生时间
30 |         self.pick_time = 0  # 被选择的时间（出队列时间）
31 | 
32 |         # 完成该任务所消耗的cup资源
33 |         self.energy = 0
34 |         self.trans_time = 0  # 传输所需要的时间（实际）
35 |         self.precess_time = 0  # 任务处理所需要的时间(实际)
36 | 


--------------------------------------------------------------------------------
/experiment6/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | import matplotlib
 5 | from pylab import mpl
 6 | import model
 7 | from env import Env
 8 | import matplotlib.pyplot as plt
 9 | 
10 | # 设置显示中文字体
11 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
12 | matplotlib.rcParams['axes.unicode_minus'] = False
13 | np.random.seed(2)
14 | 
15 | if __name__ == '__main__':
16 |     env = Env()
17 |     env.reset()
18 | 
19 |     N = env.num_Vehicles
20 |     vehicles = env.vehicles
21 |     models = []
22 | 
23 |     # 初始化网络
24 |     TASK_DIM = 5
25 |     AIM_DIM = len(vehicles[0].neighbor) + 2
26 |     vehicle_shape = len(vehicles[0].self_state)
27 |     neighbor_shape = np.array([vehicles[0].neighbor_state]).shape
28 |     task_shape = np.array([vehicles[0].task_state]).shape
29 |     for i in range(N):
30 |         tgt_model = model.ModelActor(vehicle_shape, neighbor_shape, task_shape, TASK_DIM, AIM_DIM)
31 |         tgt_model.load_state_dict(torch.load(
32 |             "D:\pycharm\Project\VML\MyErion\experiment6\\result\\2022-12-09-05-11\\vehicle{}.pkl".format(i)))
33 |         models.append(tgt_model)
34 | 
35 |     # state_v = torch.tensor([vehicles[i].otherState], dtype=torch.float32)
36 |     # taskState_v = torch.tensor([[vehicles[i].taskState]], dtype=torch.float32)
37 |     # taskAction, aimAction = models[0](state_v, taskState_v)
38 | 
39 |     vehicleReward = []
40 |     averageReward = []
41 |     for step in range(10000):
42 |         action1 = []
43 |         action2 = []
44 | 
45 |         for i in range(N):
46 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
47 |             neighbor_state_v = torch.tensor([[vehicles[i].neighbor_state]], dtype=torch.float32)
48 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
49 |             taskAction, aimAction = models[i](state_v, neighbor_state_v, taskState_v, False)
50 | 
51 |             # taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
52 |             # aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
53 |             taskAction = taskAction.detach().numpy().reshape(-1)
54 |             aimAction = aimAction.detach().numpy().reshape(-1)
55 |             action1.append(np.argmax(taskAction))
56 |             # action1.append(0)
57 |             action2.append(np.argmax(aimAction))
58 | 
59 |         print(action1)
60 |         print(action2)
61 |         Reward, reward = env.step(action1, action2)
62 |         vehicleReward.append(reward[5])
63 |         averageReward.append(Reward)
64 |         print("第{}次车辆平均奖励{}".format(step, Reward))
65 | 
66 |     fig, aix = plt.subplots(2, 1)
67 |     aix[0].plot(range(len(vehicleReward)), vehicleReward)
68 |     aix[1].plot(range(len(averageReward)), averageReward)
69 |     plt.show()
70 | 


--------------------------------------------------------------------------------
/experiment6/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from memory import ExperienceBuffer
  6 | from task import Task
  7 | 
  8 | Dv = 100  # 车的最大通信范围
  9 | Fv = 4000  # 车最大计算能力  MHZ
 10 | MAX_TASK = 10  # 任务队列最大长度
 11 | 
 12 | CAPACITY = 100000  # 缓冲池大小
 13 | TASK_SOLT = 10  # 任务产生时隙
 14 | 
 15 | # 等待队列最长长度
 16 | MAX_QUEUE = 10
 17 | 
 18 | np.random.seed(2)
 19 | 
 20 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 21 | 
 22 | 
 23 | class Vehicle:
 24 |     # 位置：x，y 速度、方向：-1左，1右
 25 |     def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE):
 26 |         self.id = id
 27 |         # 车的位置信息
 28 |         self.loc_x = position[0]
 29 |         self.loc_y = position[1]
 30 |         self.position = position
 31 |         self.velocity = velocity  # m/s
 32 |         self.direction = direction
 33 |         # 通信范围
 34 |         self.range = Dv
 35 |         # 邻居表
 36 |         self.neighbor = []
 37 |         # mec
 38 |         self.Mec = None
 39 |         # 当前时间
 40 |         self.cur_frame = 0
 41 |         # 接受的任务的列表(最多同时处理5个任务)
 42 |         self.accept_task = []
 43 |         # 最多处理任务量
 44 |         self.max_task = 3
 45 |         # 等待队列最长长度
 46 |         self.max_queue = max_queue
 47 |         # 等待计算的任务队列（理解为挂起状态）
 48 |         self.task_queue = []
 49 |         # 用于奖励计算的任务队列
 50 |         self.task_queue_for_reward = []
 51 |         # 接受任务的数量
 52 |         self.sum_needDeal_task = 0
 53 |         # 此时刻有多少动作选则我
 54 |         self.len_action = 0
 55 |         # 当前可用资源
 56 |         self.resources = round((1 - np.random.randint(1, 4) / 10) * Fv, 2)  # MHz
 57 |         # 表示当前是否有任务正在传输给邻居车辆（0：没有，1：有）
 58 |         self.trans_task_for_vehicle = 0
 59 |         # 当前是否有任务正在传输给mec
 60 |         self.trans_task_for_mec = 0
 61 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 62 |         self.cur_task = None
 63 |         # 任务队列
 64 |         self.total_task = []
 65 |         # 任务队列的长度
 66 |         self.len_task = len(self.total_task)
 67 | 
 68 |         # 自身状态信息
 69 |         self.self_state = []
 70 |         # 当前任务队列状态
 71 |         self.task_state = []
 72 |         # 邻居状态信息
 73 |         self.neighbor_state = []
 74 |         # 缓冲池
 75 |         self.buffer = ExperienceBuffer(CAPACITY)
 76 |         # 总奖励
 77 |         self.reward = []
 78 |         # 任务溢出的数量
 79 |         self.overflow = 0
 80 |         # 上一个任务产生的时间
 81 |         self.lastCreatWorkTime = 0
 82 | 
 83 |         # 产生任务
 84 |         self.create_work()
 85 | 
 86 |     # 获得位置
 87 |     @property
 88 |     def get_location(self):
 89 |         return self.position
 90 | 
 91 |     # 设置位置
 92 |     def set_location(self, loc_x, loc_y):
 93 |         self.loc_x = loc_x
 94 |         self.loc_y = loc_y
 95 |         self.position = [self.loc_x, self.loc_y]
 96 | 
 97 |     # 获得x
 98 |     @property
 99 |     def get_x(self):
100 |         return self.loc_x
101 | 
102 |     # 获得y
103 |     @property
104 |     def get_y(self):
105 |         return self.loc_y
106 | 
107 |     # 产生任务 传入当前时间
108 |     def create_work(self):
109 |         if self.id % 3 == 0:
110 |             return
111 |             # 每隔一段时间进行一次任务产生
112 |         if (self.cur_frame - self.lastCreatWorkTime) % TASK_SOLT == 0:
113 |             # # 每次有0.6的概率产生任务
114 |             if np.random.random() < 0.6:
115 |                 if self.len_task < MAX_TASK:  # 队列不满
116 |                     task = Task(self, self.cur_frame)
117 |                     self.lastCreatWorkTime = self.cur_frame
118 |                     self.total_task.append(task)
119 |                     self.len_task += 1
120 |                     # print("第{}辆车产生了任务".format(self.id))
121 |                     self.overflow = 0
122 |                 else:
123 |                     # print("第{}辆车任务队列已满".format(self.id))
124 |                     self.overflow = 1
125 | 
126 |     """
127 |     获得状态
128 |     """
129 | 
130 |     def get_state(self):
131 |         self.self_state = []
132 |         self.neighbor_state = []
133 |         self.task_state = []
134 | 
135 |         # 位置信息  4
136 |         self.self_state.extend(self.position)
137 |         self.self_state.append(self.velocity)
138 |         self.self_state.append(direction_map.get(self.direction))
139 | 
140 |         # 资源信息（可用资源）
141 |         self.self_state.append(self.resources)
142 | 
143 |         # 当前处理的任务量
144 |         self.self_state.append(self.sum_needDeal_task)
145 |         # 当前接受传输的任务量
146 |         self.self_state.append(self.len_action)
147 | 
148 |         # 当前是否有任务在传输
149 |         self.self_state.append(self.trans_task_for_vehicle)
150 |         self.self_state.append(self.trans_task_for_mec)
151 | 
152 |         # 邻居表  7*数量
153 |         for neighbor in self.neighbor:
154 |             state = []
155 |             state.extend(neighbor.position)  # 位置
156 |             state.append(neighbor.velocity)  # 速度
157 |             state.append(direction_map.get(neighbor.direction))  # 方向
158 |             state.append(neighbor.resources)  # 可用资源
159 |             state.append(neighbor.sum_needDeal_task)  # 处理任务长度
160 |             state.append(neighbor.len_action)  # 当前正在接受传输任务数量
161 |             self.neighbor_state.append(state)
162 | 
163 |         self.self_state.extend(self.Mec.state)
164 | 
165 |         # 任务状态信息
166 |         for i in range(MAX_TASK):
167 |             if i < self.len_task:
168 |                 task = self.total_task[i]
169 |                 self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
170 |             else:
171 |                 self.task_state.append([0, 0, 0, 0])
172 | 
173 |         return self.self_state
174 | 


--------------------------------------------------------------------------------
/experiment7/MyQueue.py:
--------------------------------------------------------------------------------
 1 | class MyQueue:
 2 |     def __init__(self):
 3 |         self.items = []
 4 | 
 5 |     def push(self, item):
 6 |         self.items.append(item)
 7 | 
 8 |     def pop(self):
 9 |         if self.is_empty():
10 |             return None
11 |         return self.items.pop(0)
12 | 
13 |     def peek(self):
14 |         return self.items[0]
15 | 
16 |     def getLast(self):
17 |         if self.is_empty():
18 |             return None
19 |         return self.items[len(self.items) - 1]
20 | 
21 |     def is_empty(self):
22 |         return len(self.items) == 0
23 | 
24 |     def size(self):
25 |         return len(self.items)
26 | 


--------------------------------------------------------------------------------
/experiment7/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 环境4
 3 | （两个动作：选择任务和选择对象）
 4 | 加入了mec和车在时隙内处理任务的上限（mec最多同时处理10个任务、车最多处理5个任务）
 5 | 使用经典城市道路（使用不同数量车辆和邻居）
 6 | 为mec卸载和车辆卸载提供两种传输方式（即可同时像车辆和mec传输任务）
 7 | """
 8 | import ptan
 9 | import numpy as np
10 | import torch
11 | from torch.distributions.categorical import Categorical
12 | from env import Env
13 | 
14 | 
15 | def test_net(nets, env: Env, count=10):
16 |     rewards = 0.0
17 |     steps = 0
18 |     for _ in range(count):
19 |         env.reset()
20 |         while steps < 1000:
21 |             action = []
22 |             with torch.no_grad():
23 |                 for vehicle in env.vehicles:
24 |                     state = torch.tensor(vehicle.self_state)
25 |                     _, pro = nets[vehicle.id](state)
26 |                     act = Categorical.sample(pro)
27 |                     action.append(act.item())
28 |             _, _, reward, _ = env.step(action)
29 |             rewards += reward
30 |             steps += 1
31 |     return rewards / count, steps / count
32 | 
33 | # def calc_logprob(pro_v, actions_v):
34 | #     p1 = - ((mu_v - actions_v) ** 2) / (2 * torch.exp(logstd_v).clamp(min=1e-3))
35 | #     p2 = - torch.log(torch.sqrt(2 * math.pi * torch.exp(logstd_v)))
36 | #     return p1 + p2
37 | 


--------------------------------------------------------------------------------
/experiment7/dqn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import time
  4 | from collections import namedtuple
  5 | 
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | from pylab import mpl
 13 | import matplotlib.font_manager as fm
 14 | import netron
 15 | 
 16 | from env import Env
 17 | from model import DQN, DQNCNN
 18 | from test.test import C
 19 | 
 20 | np.random.seed(2)
 21 | 
 22 | # 设置显示中文字体
 23 | mpl.rcParams["font.sans-serif"] = ["SimHei"]
 24 | matplotlib.rcParams['axes.unicode_minus'] = False
 25 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 26 | # 加载 Times New Roman 字体
 27 | font_path = 'C:/Windows/Fonts/times.ttf'
 28 | prop = fm.FontProperties(fname=font_path, size=8)
 29 | 
 30 | Experience = namedtuple('Transition',
 31 |                         field_names=['cur_otherState', 'cur_TaskState', "cur_NeighborState",  # 状态
 32 |                                      'taskAction', 'aimAction',  # 动作
 33 |                                      'reward',  # 奖励
 34 |                                      'next_otherState', 'next_TaskState',
 35 |                                      'next_NeighborState'])  # Define a transition tuple
 36 | GAMMA = 0.99
 37 | BATCH_SIZE = 32
 38 | REPLAY_SIZE = 100
 39 | LEARNING_RATE = 1e-4
 40 | SYNC_TARGET_FRAMES = 100  # 更新目标网络频率
 41 | 
 42 | EPSILON_DECAY_LAST_FRAME = 150000
 43 | EPSILON_START = 0.8
 44 | EPSILON_FINAL = 0.01
 45 | EPSILON = 200000
 46 | 
 47 | RESET = 1000  # 重置游戏次数
 48 | 
 49 | MAX_TASK = 10  # 任务队列最大长度
 50 | 
 51 | momentum = 0.005
 52 | 
 53 | RESOURCE = [0.2, 0.4, 0.6, 0.8]
 54 | 
 55 | 
 56 | @torch.no_grad()
 57 | def play_step(env, epsilon, models):
 58 |     vehicles = env.vehicles
 59 |     old_otherState = []
 60 |     old_taskState = []
 61 |     old_neighborState = []
 62 | 
 63 |     actionTask = []
 64 |     actionAim = []
 65 |     # 贪心选择动作
 66 |     for i, model in enumerate(models):
 67 |         old_otherState.append(vehicles[i].self_state)
 68 |         old_taskState.append(vehicles[i].task_state)
 69 |         old_neighborState.append(vehicles[i].neighbor_state)
 70 |         if np.random.random() < epsilon:
 71 |             # 随机动作
 72 |             actionTask.append(np.random.randint(0, 5))
 73 |             actionAim.append(np.random.randint(0, 7))  # local+mec+neighbor
 74 |         else:
 75 |             state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 76 |             taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 77 |             neighborState_v = torch.tensor([[vehicles[i].neighbor_state]], dtype=torch.float32)
 78 |             taskAction, aimAction = model(state_v, taskState_v, neighborState_v)
 79 | 
 80 |             taskAction = np.array(taskAction, dtype=np.float32).reshape(-1)
 81 |             aimAction = np.array(aimAction, dtype=np.float32).reshape(-1)
 82 | 
 83 |             actionAim.append(np.argmax(aimAction))
 84 |             actionTask.append(np.argmax(taskAction))
 85 |     # print("action:", action)
 86 |     _, _, _, otherState, _, taskState, neighborState, Reward, reward = env.step(actionTask, actionAim)
 87 |     # print("reward:", reward)
 88 | 
 89 |     # 加入各自的缓存池【当前其他状态、当前任务状态、目标动作、任务动作，下一其他状态、下一任务状态】
 90 |     for i, vehicle in enumerate(vehicles):
 91 |         exp = Experience(old_otherState[i], [old_taskState[i]], [old_neighborState[i]],
 92 |                          actionTask[i], actionAim[i],
 93 |                          reward[i],
 94 |                          otherState[i], [taskState[i]], [neighborState[i]])
 95 |         vehicle.buffer.append(exp)
 96 |     return round(Reward, 2)  # 返回总的平均奖励
 97 | 
 98 | 
 99 | # 计算一个智能体的损失
100 | def calc_loss(batch, net: DQNCNN, tgt_net: DQNCNN, device="cpu"):
101 |     c=C()
102 |     cur_otherState, cur_TaskState, curNeighborState, taskAction, aimAction, rewards, next_otherState, next_TaskState, next_NeighborState = batch  #
103 | 
104 |     otherStates_v = torch.tensor(np.array(cur_otherState, copy=False), dtype=torch.float32).to(device)
105 |     taskStates_v = torch.tensor(np.array(cur_TaskState, copy=False), dtype=torch.float32).to(device)
106 |     neighborStates_v = torch.tensor(np.array(curNeighborState, copy=False), dtype=torch.float32).to(device)
107 |     # print("states_v:", states_v)  # batch状态
108 |     taskActions_v = torch.tensor(np.array(taskAction), dtype=torch.int64).to(device)
109 |     aimActions_v = torch.tensor(np.array(aimAction), dtype=torch.int64).to(device)
110 |     # print("actions_v", actions_v)  # batch动作
111 |     rewards_v = torch.tensor(np.array(rewards), dtype=torch.float32).to(device)
112 |     # print("rewards_v", rewards_v)  # batch奖励
113 |     next_otherStates_v = torch.tensor(np.array(next_otherState, copy=False), dtype=torch.float32).to(device)
114 |     next_taskStates_v = torch.tensor(np.array(next_TaskState, copy=False), dtype=torch.float32).to(device)
115 |     next_NeighborState_v = torch.tensor(np.array(next_NeighborState, copy=False), dtype=torch.float32).to(device)
116 |     # print("next_states_v", next_states_v)  # batch下一个状态
117 | 
118 |     # 计算当前网络q值
119 |     taskActionValues, aimActionValues = net(otherStates_v,
120 |                                             taskStates_v,
121 |                                             neighborStates_v)  # .gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
122 |     taskActionValues = taskActionValues.gather(1, taskActions_v.unsqueeze(-1)).squeeze(-1)
123 |     aimActionValues = aimActionValues.gather(1, aimActions_v.unsqueeze(-1)).squeeze(-1)
124 | 
125 |     # 计算目标网络q值
126 |     next_taskActionValues, next_aimActionValues = tgt_net(next_otherStates_v,
127 |                                                           next_taskStates_v,
128 |                                                           next_NeighborState_v)  # .max(1)[0]  # 得到最大的q值
129 | 
130 |     next_taskActionValues = next_taskActionValues.max(1)[0].detach()
131 |     next_aimActionValues = next_aimActionValues.max(1)[0].detach()
132 | 
133 |     # 防止梯度流入用于计算下一状态q近似值得NN
134 |     # next_states_values = next_aimActionValues.detach()
135 |     # print("next_states_values", next_states_values)
136 |     expected_aim_values = next_aimActionValues * GAMMA + rewards_v
137 |     expected_task_values = next_taskActionValues * GAMMA + rewards_v
138 |     # print(" expected_state_values", expected_state_values)
139 | 
140 |     return nn.MSELoss()(taskActionValues, expected_task_values), nn.MSELoss()(aimActionValues, expected_aim_values)
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     env = Env()
145 |     env.reset()
146 | 
147 |     frame_idx = 0
148 |     # writer = SummaryWriter(comment="-" + env.__doc__)
149 |     agents = env.vehicles
150 |     models = []
151 |     tgt_models = []
152 |     optimizers = []
153 |     task_shape = np.array([agents[0].task_state]).shape
154 |     neighbor_shape = np.array([agents[0].neighbor_state]).shape
155 |     for agent in agents:
156 |         # print(agent.get_location, agent.velocity)
157 | 
158 |         # print(task_shape)
159 |         model = DQNCNN(len(agent.self_state), task_shape, neighbor_shape, MAX_TASK, len(agent.neighbor) + 2)
160 |         models.append(model)
161 |         optimer = optim.RMSprop(params=model.parameters(), lr=LEARNING_RATE, momentum=momentum)
162 |         optimizers.append(optimer)
163 |     for agent in agents:
164 |         # print(agent.get_location, agent.velocity)
165 |         # task_shape = np.array([agent.task_state]).shape
166 |         # print(task_shape)
167 |         model = DQNCNN(len(agent.self_state), task_shape, neighbor_shape, MAX_TASK, len(agent.neighbor) + 2)
168 |         model.load_state_dict(models[agent.id].state_dict())
169 |         tgt_models.append(model)
170 | 
171 |     # 打印网络结构
172 |     # model = models[0]
173 |     # state_v = torch.tensor([env.vehicles[0].self_state], dtype=torch.float32)
174 |     # taskState_v = torch.tensor([[env.vehicles[0].task_state]], dtype=torch.float32)
175 |     # neighbor_v = torch.tensor([[env.vehicles[0].neighbor_state]], dtype=torch.float32)
176 |     # # 针对有网络模型，但还没有训练保存 .pth 文件的情况
177 |     # modelpath = "./netStruct/demo.onnx"  # 定义模型结构保存的路径
178 |     # torch.onnx.export(model, (state_v, taskState_v, neighbor_v), modelpath)  # 导出并保存
179 |     # netron.start(modelpath)
180 | 
181 |     total_reward = []
182 |     recent_reward = []
183 |     loss_task_list = []
184 |     loss_aim_list = []
185 |     reward_1 = []
186 | 
187 |     epsilon = EPSILON_START
188 |     eliposde = EPSILON
189 |     while eliposde > 0:
190 |         frame_idx += 1
191 |         # 重置游戏
192 |         # if frame_idx % RESET == 0:
193 |         #     print("游戏重置")
194 |         #     # memory = []
195 |         #     # for vehicle in env.vehicles:
196 |         #     #     memory.append(vehicle.buffer)
197 |         #     env.reset()
198 |         #     agents = env.vehicles
199 |         #     # for i, vehicle in enumerate(agents):
200 |         #     #     vehicle.buffer = memory[i]
201 |         print("the {} steps".format(frame_idx))
202 |         epsilon = max(EPSILON_FINAL, EPSILON_START - frame_idx / EPSILON_DECAY_LAST_FRAME)
203 |         reward = play_step(env, epsilon, models)
204 |         total_reward.append(reward)
205 |         print("current reward:", reward)
206 |         print("current 100 times total rewards:", np.mean(total_reward[-100:]))
207 |         recent_reward.append(np.mean(total_reward[-100:]))
208 |         # if np.mean(total_reward[-100:]) > 0.7:
209 |         #     break
210 | 
211 |         for i, agent in enumerate(agents):
212 |             # print("length of {} buffer".format(agent.id), len(agent.buffer))
213 |             if len(agent.buffer) < REPLAY_SIZE:  # 缓冲池要足够大
214 |                 continue
215 |             if frame_idx % SYNC_TARGET_FRAMES == 0:  # 更新目标网络
216 |                 tgt_models[i].load_state_dict(models[i].state_dict())
217 |             optimizers[i].zero_grad()
218 |             batch = agent.buffer.sample(BATCH_SIZE)
219 |             loss_task, loss_aim = calc_loss(batch, models[i], tgt_models[i])
220 |             if i == 2:
221 |                 print("loss:", loss_task, " ", loss_aim)
222 |             # loss_t.backward()
223 |             torch.autograd.backward([loss_task, loss_aim])
224 |             # total_loss = 0.6 * loss_aim + 0.4 * loss_task
225 |             optimizers[i].step()
226 |         eliposde -= 1
227 |         if frame_idx % 10000 == 0 and frame_idx != 0:
228 |             cur_time = time.strftime("%Y-%m-%d-%H", time.localtime(time.time())) + "-" + str(frame_idx)
229 |             # 创建文件夹
230 |             os.makedirs("D:/pycharm/Project/VML/MyErion/experiment7/result/" + cur_time)
231 |             for i, vehicle in enumerate(env.vehicles):
232 |                 # 保存每个网络模型
233 |                 torch.save(tgt_models[i].state_dict(),
234 |                            "D:/pycharm/Project/VML/MyErion/experiment7/result/" + cur_time + "/vehicle" + str(
235 |                                i) + ".pkl")
236 | 
237 |     plt.plot(range(len(recent_reward)), recent_reward)
238 |     plt.ylabel("Average Reward", fontproperties=prop)
239 |     plt.xlabel("Episode", fontproperties=prop)
240 |     plt.show()
241 | 


--------------------------------------------------------------------------------
/experiment7/env_test.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from experiment7 import model
  4 | from env import Env
  5 | import os
  6 | import numpy as np
  7 | from mecEnv import MecEnv
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 11 | np.random.seed(2)
 12 | if __name__ == '__main__':
 13 |     print()
 14 |     # env = Env()
 15 |     env = MecEnv()
 16 |     env.reset()
 17 | 
 18 |     # 测试网络节点数
 19 |     # task = np.array(env.taskState)
 20 |     # print(task.shape)
 21 |     vehicles = env.vehicles
 22 | 
 23 |     # for vehicle in vehicles:
 24 |     #     print("第{}车状态：{}".format(vehicle.id, vehicle.self_state))
 25 |     #     print("该车邻居:")
 26 |     #     for i in vehicle.neighbor:
 27 |     #         print(i.id, end="  ")
 28 |     #     print()
 29 | 
 30 |     # 测试环境运行
 31 |     reward = []
 32 |     models = []
 33 | 
 34 |     # task_shape = np.array([vehicles[0].task_state]).shape
 35 |     # for i in range(env.num_Vehicles):
 36 |     #     # 加载模型
 37 |     #     tgt_model = model.DQN(len(vehicles[0].self_state), task_shape, 10, len(vehicles[0].neighbor) + 2)
 38 |     #     tgt_model.load_state_dict(
 39 |     #         torch.load("D:\\pycharm\\Project\\VML\\MyErion\\experiment7\\result\\2023-05-23\\vehicle{}.pkl".format(i)))
 40 |     #     models.append(tgt_model)
 41 |     for step in range(100):
 42 |         # for j in range(20):
 43 |         #     x[j].append(env.vehicles[j].position[0])
 44 |         #     y[j].append(env.vehicles[j].position[1])
 45 |         action_task = []
 46 |         action_aim = []
 47 |         for i in range(env.num_Vehicles):
 48 |             # state_v = torch.tensor([vehicles[i].self_state], dtype=torch.float32)
 49 |             # taskState_v = torch.tensor([[vehicles[i].task_state]], dtype=torch.float32)
 50 |             # taskAction, aimAction = models[i](state_v, taskState_v)
 51 |             #
 52 |             # taskAction = taskAction.detach().numpy().reshape(-1)
 53 |             # aimAction = aimAction.detach().numpy().reshape(-1)
 54 |             # # ppo
 55 |             # action_task.append(np.argmax(taskAction))
 56 |             # action_aim.append(np.argmax(aimAction))
 57 | 
 58 |             # action_task.append(np.random.randint(0, 10))
 59 |             action_task.append(0)
 60 |             # action_aim.append(np.random.randint(0, 7))
 61 |             action_aim.append(0)
 62 |             # action_aim.append(1)
 63 |             # other_state, task_state, vehicle_state, _, _, _, _,
 64 |         Reward, _ = env.step(action_task, action_aim)
 65 |         # reward.append(Reward)
 66 |         # print("第{}次平均奖励{}".format(step, Reward))
 67 |         # print("当前状态:", state)
 68 |         # print("下一状态:", next_state)
 69 |         # print("车状态:", vehicleState)
 70 |         # print("任务状态", taskState)
 71 |         # print("当前奖励:", reward)
 72 |         # print("每个奖励,", vehicleReward)
 73 |         # print("当前有{}任务没有传输完成".format(len(env.need_trans_task)))
 74 |         # print("average reward:", env.Reward)
 75 |     # plt.figure()
 76 |     # fix, ax = plt.subplots(5, 4)
 77 |     #
 78 |     # for i in range(5):
 79 |     #     for j in range(4):
 80 |     #         number = i * 4 + j
 81 |     #         ax[i, j].plot(x[number], y[number])
 82 |     #         ax[i, j].set_title('vehicle {}'.format(number))
 83 |     # plt.plot(range(len(reward)), reward)
 84 |     # plt.ylabel("Reward")
 85 |     # plt.show()
 86 | 
 87 |     plt.figure()
 88 |     avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg) if i % 4 != 0]
 89 |     plt.ylabel("sumTime")
 90 |     plt.bar(range(len(avg)), avg, color="blue")
 91 |     plt.show()
 92 | 
 93 |     plt.figure()
 94 |     avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg_reward) if i % 4 != 0]
 95 |     plt.ylabel("avg_reward")
 96 |     plt.plot(range(len(avg)), avg, color="blue")
 97 |     plt.show()
 98 | 
 99 |     plt.figure()
100 |     avg = [np.mean(avg_energy) for i, avg_energy in enumerate(env.avg_energy) if i % 4 != 0]
101 |     plt.ylabel("Energy")
102 |     plt.bar(range(len(avg)), avg, color="blue")
103 |     plt.show()
104 |     #
105 |     plt.figure()
106 |     avg = [np.mean(sum_time) for i, sum_time in enumerate(env.avg_price) if i % 4 != 0]
107 |     plt.ylabel("Price")
108 |     plt.bar(range(len(avg)), avg, color="blue")
109 |     plt.show()
110 | 
111 |     plt.figure()
112 |     avg = [vehicle.success_task / vehicle.sum_create_task for i, vehicle in enumerate(env.vehicles) if i % 4 != 0]
113 |     plt.ylabel("successRate")
114 |     plt.bar(range(len(avg)), avg, color="blue")
115 |     plt.show()
116 | 
117 |     # plt.figure()
118 |     # plt.ylabel("transTime")
119 |     # for i, time in enumerate(env.avg_trans_time):
120 |     #     if i % 3 != 0:
121 |     #         plt.plot(range(0, len(time)), time)
122 |     # plt.show()
123 | 


--------------------------------------------------------------------------------
/experiment7/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | import os
 4 | import time
 5 | from collections import namedtuple
 6 | 
 7 | import ptan
 8 | import torch
 9 | import torch.nn.functional as F
10 | import model
11 | from env import Env
12 | from mec import MEC
13 | from vehicle import Vehicle
14 | from memory import ReplayMemory
15 | from tensorboardX import SummaryWriter
16 | from torch.distributions.categorical import Categorical
17 | 
18 | ENV_ID = "computing offloading"
19 | GAMMA = 0.99
20 | GAE_LAMBDA = 0.95
21 | 
22 | TRAJECTORY_SIZE = 65
23 | LEARNING_RATE_ACTOR = 1e-5
24 | LEARNING_RATE_CRITIC = 1e-4
25 | 
26 | PPO_EPS = 0.2
27 | PPO_EPOCHES = 10
28 | PPO_BATCH_SIZE = 64
29 | 
30 | TEST_ITERS = 10000
31 | Experience = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))  # Define a transition tuple
32 | 
33 | 
34 | # 将list装换成tensor存入缓冲池中
35 | def save_experience(state, action, reward, next_state, memory: ReplayMemory):
36 |     reward = torch.tensor([reward])
37 |     action = torch.tensor([action])
38 |     state = torch.tensor(state)
39 |     state = state.unsqueeze(0)
40 |     next_state = torch.tensor(next_state)
41 |     next_state = next_state.unsqueeze(0)
42 |     memory.push(state, action, reward, next_state)
43 | 
44 | 
45 | def calc_adv_ref(trajectory, net_crt, states_v, device="cpu"):
46 |     """
47 |     By trajectory calculate advantage and 1-step ref value
48 |     :param trajectory: trajectory list
49 |     :param net_crt: critic network
50 |     :param states_v: states tensor
51 |     :return: tuple with advantage numpy array and reference values
52 |     """
53 |     values_v = net_crt(torch.tensor(states_v))
54 |     values = values_v.squeeze().data.cpu().numpy()
55 |     # generalized advantage estimator: smoothed version of the advantage
56 |     last_gae = 0.0
57 |     result_adv = []
58 |     result_ref = []
59 |     for val, next_val, exp in zip(reversed(values[:-1]),
60 |                                   reversed(values[1:]),
61 |                                   reversed(trajectory[:-1])):
62 |         delta = exp.vehicleReward + GAMMA * next_val - val
63 |         last_gae = delta + GAMMA * GAE_LAMBDA * last_gae
64 |         result_adv.append(last_gae)
65 |         result_ref.append(last_gae + val)
66 | 
67 |     adv_v = torch.FloatTensor(list(reversed(result_adv)))
68 |     ref_v = torch.FloatTensor(list(reversed(result_ref)))
69 |     return adv_v.to(device), ref_v.to(device)
70 | 
71 | 
72 | # 将状态信息放入各自的缓冲池中
73 | def push(env, state, actions, next_state):
74 |     for i, vehicle in enumerate(env.vehicles):
75 |         if vehicle.task is not None:  # 没有任务不算经验
76 |             continue
77 |         exp = Experience(state, actions[i], env.vehicleReward[i][-1], next_state)
78 |         vehicle.buffer.append(exp)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     task = MEC([10, 20])
83 |     vehicle = Vehicle(1, [10, 20], 'd')
84 |     print(type(task) == MEC)
85 |     print(type(task) == Vehicle)
86 |     print(type(vehicle) == Vehicle)
87 |     print(type(vehicle))
88 |     print(vehicle)
89 | 


--------------------------------------------------------------------------------
/experiment7/mec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | RANGE_MEC = 1000  # MEC通信范围 /m
 4 | RESOURCE = 10000  # 可用资源  MHz
 5 | MAX_QUEUE = 40
 6 | 
 7 | 
 8 | # 边缘服务器
 9 | class MEC:
10 |     def __init__(self, position, resources=RESOURCE, max_queue=MAX_QUEUE):
11 |         self.loc_x = position[0]
12 |         self.loc_y = position[1]
13 |         self.loc = position
14 |         # 当前可用资源 MHz
15 |         self.resources = resources
16 |         self.state = []
17 |         # 通信范围 m
18 |         self.range = RANGE_MEC
19 |         # 当前接到需要处理的任务信息(最多同时处理10个任务)
20 |         self.accept_task = []
21 |         # 最多处理任务量
22 |         self.max_task = 10
23 |         # 接受任务的数量
24 |         self.sum_needDeal_task = 0
25 |         # 此时刻有多少动作选则我 多少任务选择传输给我
26 |         # self.len_action = 0
27 |         # 等待计算的任务队列（理解为挂起状态）
28 |         self.task_queue = []
29 |         # 用于奖励计算的任务队列
30 |         self.task_queue_for_reward = []
31 |         # 队列最长长度
32 |         self.max_queue = max_queue
33 |         # 当前状态
34 |         self.get_state()
35 | 
36 |     @property
37 |     def get_x(self):
38 |         return self.loc_x
39 | 
40 |     @property
41 |     def get_y(self):
42 |         return self.loc_y
43 | 
44 |     @property
45 |     def get_location(self):
46 |         return self.loc
47 | 
48 |     """
49 |         获得状态
50 |     """
51 | 
52 |     def get_state(self):
53 |         """
54 |         :return:state 维度：[loc_x,loc_y,sum_needDeal_task,resources]
55 |         """
56 |         self.state = []
57 |         self.state.extend(self.loc)
58 |         self.state.append(self.sum_needDeal_task)
59 |         # self.state.append(self.len_action)
60 |         self.state.append(self.resources)
61 |         return self.state
62 | 


--------------------------------------------------------------------------------
/experiment7/memory.py:
--------------------------------------------------------------------------------
  1 | # 经验类型
  2 | import collections
  3 | from collections import namedtuple
  4 | from random import sample
  5 | import numpy as np
  6 | 
  7 | Experience = namedtuple('Transition',
  8 |                         field_names=['cur_otherState', 'cur_TaskState', "cur_NeighborState",  # 状态
  9 |                                      'taskAction', 'aimAction',  # 动作
 10 |                                      'reward',  # 奖励
 11 |                                      'next_otherState', 'next_TaskState',
 12 |                                      'next_NeighborState'])  # Define a transition tuple
 13 | 
 14 | 
 15 | class PPOMemory:
 16 |     def __init__(self, batch_size):
 17 |         self.self_state = []
 18 |         self.neighbor_state = []
 19 |         self.task_state = []
 20 |         self.vehicles_state = []
 21 |         self.task_probs = []
 22 |         self.aim_probs = []
 23 |         self.vals = []
 24 |         self.action = []
 25 |         self.rewards = []
 26 |         self.batch_size = batch_size
 27 | 
 28 |     def sample(self):
 29 |         batch_step = np.arange(0, len(self.self_state), self.batch_size)
 30 |         indices = np.arange(len(self.self_state), dtype=np.int64)
 31 |         # np.random.shuffle(indices)
 32 |         batches = [indices[i:i + self.batch_size] for i in batch_step]
 33 |         return np.array(self.self_state), \
 34 |                np.array(self.neighbor_state), \
 35 |                np.array(self.task_state), \
 36 |                np.array(self.vehicles_state), \
 37 |                np.array(self.task_probs), \
 38 |                np.array(self.aim_probs), \
 39 |                np.array(self.vals), \
 40 |                np.array(self.action), \
 41 |                np.array(self.rewards), \
 42 |                batches
 43 | 
 44 |     def push(self, self_state, neighbor_state, task_state, vehicles_state,
 45 |              task_action, aim_action,
 46 |              task_probs, aim_probs,
 47 |              vals, reward):
 48 |         self.self_state.append(self_state)
 49 |         self.neighbor_state.append(neighbor_state)
 50 |         self.task_state.append(task_state)
 51 |         self.vehicles_state.append(vehicles_state)
 52 |         self.action.append([task_action, aim_action])
 53 |         self.task_probs.append(task_probs)
 54 |         self.aim_probs.append(aim_probs)
 55 |         self.vals.append(vals)
 56 |         self.rewards.append(reward)
 57 | 
 58 |     def clear(self):
 59 |         self.self_state = []
 60 |         self.neighbor_state = []
 61 |         self.task_state = []
 62 |         self.vehicles_state = []
 63 |         self.task_probs = []
 64 |         self.aim_probs = []
 65 |         self.vals = []
 66 |         self.action = []
 67 |         self.rewards = []
 68 | 
 69 | 
 70 | class ReplayMemory(object):  # Define a replay memory
 71 | 
 72 |     # 初始化缓冲池
 73 |     def __init__(self, capacity):
 74 |         # 最大容量
 75 |         self.capacity = capacity
 76 |         # 缓冲池经验
 77 |         self.memory = []
 78 |         # ？
 79 |         self.position = 0
 80 | 
 81 |     # 存入经验
 82 |     def push(self, *args):
 83 |         if len(self.memory) < self.capacity:
 84 |             self.memory.append(None)
 85 |             # 存入经验
 86 |         self.memory[self.position] = Experience(*args)
 87 |         # 记录最新经验所在位置
 88 |         self.position = (self.position + 1) % self.capacity
 89 | 
 90 |     # 采样
 91 |     def sample(self, batch_size):
 92 |         return sample(self.memory, batch_size)
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.memory)
 96 | 
 97 | 
 98 | class ExperienceBuffer:
 99 |     def __init__(self, capacity):
100 |         self.maxLen = capacity
101 |         self.buffer = collections.deque(maxlen=capacity)  # 队列，先进先出
102 | 
103 |     def __len__(self):
104 |         return len(self.buffer)
105 | 
106 |     def append(self, experience: Experience):
107 |         self.buffer.append(experience)
108 | 
109 |     def sample(self, batch_size):
110 |         indices = np.random.choice(len(self.buffer), batch_size, replace=False)
111 |         cur_otherState, cur_TaskState, cur_NeighborState, taskAction, aimAction, rewards, next_otherState, next_TaskState, next_NeighborState = zip(
112 |             *[self.buffer[idx] for idx in indices])
113 |         # 转换成numpy
114 |         return np.array(cur_otherState), np.array(cur_TaskState), np.array(cur_NeighborState), \
115 |                np.array(taskAction), np.array(aimAction), \
116 |                np.array(rewards, dtype=np.float32), \
117 |                np.array(next_otherState), np.array(next_TaskState), np.array(next_NeighborState)
118 | 
119 |     # 清空
120 |     def clear(self):
121 |         self.buffer = collections.deque(maxlen=self.maxLen)
122 | 


--------------------------------------------------------------------------------
/experiment7/task.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.random.seed(2)
 4 | 
 5 | 
 6 | class Task:
 7 |     """
 8 |     定义任务类型
 9 |     """
10 | 
11 |     def __init__(self, vehicle=None, createTime=0, flag=1):
12 |         # 产生任务的车辆
13 |         self.vehicle = vehicle
14 |         # 完成该任务所消耗的资源
15 |         self.aim = None  # 传送对象
16 | 
17 |         if flag == 1:
18 |             # 娱乐性任务
19 |             self.max_time = 60  # np.random.randint(50, 70)  # ms  最大容忍时间
20 |         else:
21 |             # 安全性任务
22 |             self.max_time = 50  # np.random.randint(40, 50)
23 |         self.size = np.random.uniform(0.3, 0.5)  # np.random.uniform(0.5, 1)  # Mb
24 |         self.cycle = 40  # np.random.randint(30, 50)  # cycle/bit
25 |         self.need_trans_size = self.size * np.power(2, 10)  # Kb 还剩余多少未传输完成
26 |         self.need_precess_cycle = self.cycle * self.size * 1000  # Mb * cycle/byte =M cycle 还剩余多少轮次未完成（10^6)
27 | 
28 |         self.need_time = 0  # 需要计算时间
29 |         self.trans_time = 0  # 需要传输的时间
30 |         self.hold_time = 0  # 任务在计算等待队列中得等待时间
31 |         self.wait_time = 0  # 需要等待传输的时间
32 | 
33 |         self.rate = 0  # 当前速率
34 |         self.compute_resource = 0  # 被分配的资源
35 | 
36 |         self.create_time = createTime  # 任务产生时间
37 |         self.pick_time = 0  # 被选择的时间（出队列时间）
38 | 


--------------------------------------------------------------------------------
/experiment7/vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from MyQueue import MyQueue
  6 | from memory import ExperienceBuffer, PPOMemory
  7 | from task import Task
  8 | 
  9 | Dv = 100  # 车的最大通信范围
 10 | Fv = 2000  # 车最大计算能力  MHZ
 11 | MAX_TASK = 10  # 任务队列最大长度
 12 | 
 13 | CAPACITY = 10000  # 缓冲池大小
 14 | TASK_SOLT = 10  # 任务产生时隙
 15 | 
 16 | # 等待队列最长长度
 17 | MAX_QUEUE = 10
 18 | 
 19 | np.random.seed(2)
 20 | 
 21 | direction_map = {"d": 1, "u": 2, "l": 3, "r": 4}
 22 | 
 23 | 
 24 | class Vehicle:
 25 |     # 位置：x，y 速度、方向：-1左，1右
 26 |     def __init__(self, id, position, direction, velocity=20, max_queue=MAX_QUEUE):
 27 |         self.id = id
 28 |         # 车的位置信息
 29 |         self.loc_x = position[0]
 30 |         self.loc_y = position[1]
 31 |         self.position = position
 32 |         self.velocity = velocity  # m/s
 33 |         self.direction = direction
 34 |         # 通信范围
 35 |         self.range = Dv
 36 |         # 邻居表
 37 |         self.neighbor = []
 38 |         # mec
 39 |         self.Mec = None
 40 |         # 当前时间
 41 |         self.cur_frame = 0
 42 |         # 接受的任务的列表(最多同时处理5个任务)
 43 |         self.accept_task = []
 44 |         # 等待计算的任务队列（理解为挂起状态）
 45 |         self.task_queue = []
 46 |         # 用于奖励计算的任务队列
 47 |         self.task_queue_for_reward = []
 48 |         # 最多处理任务量
 49 |         self.max_task = 5
 50 |         # 等待队列最长长度
 51 |         self.max_queue = max_queue
 52 |         # 接受任务的数量(包括处理的任务和正在等待的任务)
 53 |         self.sum_needDeal_task = 0
 54 |         # 此时刻有多少动作选择我进行卸载对象
 55 |         # self.len_action = 0
 56 |         # 当前可用资源
 57 |         self.resources = round((1 - np.random.randint(1, 3) / 10) * Fv, 2)  # MHz
 58 |         # 表示当前是否有任务正在传输给邻居车辆（0：没有，1：有）
 59 |         self.trans_task_for_vehicle = 0
 60 |         self.queue_for_trans_vehicle = MyQueue()
 61 |         # 当前是否有任务正在传输给mec
 62 |         self.trans_task_for_mec = 0
 63 |         self.queue_for_trans_mec = MyQueue()
 64 |         # 当前处理的任务（用于计算奖励，不用于状态信息）
 65 |         self.cur_task = None
 66 |         # 任务队列
 67 |         self.total_task = []
 68 |         # 任务队列的长度
 69 |         self.len_task = len(self.total_task)
 70 |         # 卸载成功率
 71 |         self.success_rate = 0
 72 |         self.success_task = 0
 73 |         self.sum_create_task = 0
 74 | 
 75 |         # 当前状态信息
 76 |         self.self_state = []
 77 |         # 当前任务队列状态
 78 |         self.task_state = []
 79 |         # 邻居车状态
 80 |         self.neighbor_state = []
 81 |         # 去除邻居的状态信息用于邻居车观察和全局critic的处理
 82 |         self.excludeNeighbor_state = []
 83 |         # 缓冲池
 84 |         self.buffer = ExperienceBuffer(capacity=CAPACITY)
 85 |         # 总奖励
 86 |         self.reward = []
 87 |         # 任务溢出的数量
 88 |         self.overflow = 0
 89 |         # 上一个任务产生的时间
 90 |         self.lastCreatWorkTime = 0
 91 | 
 92 |         self.timeSolt = TASK_SOLT  # * (id % 2 + 1)
 93 |         self.memory = PPOMemory(CAPACITY)
 94 |         # 产生任务
 95 |         self.create_work()
 96 | 
 97 |     # 获得位置
 98 |     @property
 99 |     def get_location(self):
100 |         return self.position
101 | 
102 |     # 设置位置
103 |     def set_location(self, loc_x, loc_y):
104 |         self.loc_x = loc_x
105 |         self.loc_y = loc_y
106 |         self.position = [self.loc_x, self.loc_y]
107 | 
108 |     # 获得x
109 |     @property
110 |     def get_x(self):
111 |         return self.loc_x
112 | 
113 |     # 获得y
114 |     @property
115 |     def get_y(self):
116 |         return self.loc_y
117 | 
118 |     # 产生任务 传入当前时间
119 |     def create_work(self):
120 |         if self.id % 3 == 0:
121 |             return
122 |             # 每隔一段时间进行一次任务产生
123 |         if (self.cur_frame - self.lastCreatWorkTime) % self.timeSolt == 0:
124 |             # # 每次有0.6的概率产生任务
125 |             # if np.random.random() < 0.8:
126 |             if self.len_task < MAX_TASK:  # 队列不满
127 |                 if self.cur_frame % 3 == 0:
128 |                     task = Task(self, self.cur_frame % 1000, 2)
129 |                 else:
130 |                     task = Task(self, self.cur_frame % 1000)
131 |                 self.sum_create_task += 1
132 |                 self.lastCreatWorkTime = self.cur_frame
133 |                 self.total_task.append(task)
134 |                 self.len_task += 1
135 |                 self.overflow = 0
136 |             else:
137 |                 self.overflow = 1
138 |         # 创建第二个任务
139 |         # if np.random.random() > 0.5:
140 |         #     if self.len_task < MAX_TASK:  # 队列不满
141 |         #         task = Task(self, self.cur_frame % 1000)
142 |         #         self.sum_create_task += 1
143 |         #         self.lastCreatWorkTime = self.cur_frame
144 |         #         self.total_task.append(task)
145 |         #         self.len_task += 1
146 |         #         # print("第{}辆车产生了任务".format(self.id))
147 |         #         self.overflow = 0
148 | 
149 |     """
150 |     获得状态
151 |     """
152 | 
153 |     def get_state(self):
154 |         self.self_state = []
155 |         self.neighbor_state = []
156 |         self.excludeNeighbor_state = []
157 |         self.task_state = []
158 | 
159 |         # 位置信息  4
160 |         self.self_state.extend(self.position)
161 |         self.self_state.append(self.velocity)
162 |         self.self_state.append(direction_map.get(self.direction))
163 |         self.excludeNeighbor_state.extend(self.position)
164 |         self.excludeNeighbor_state.append(self.velocity)
165 |         self.excludeNeighbor_state.append(direction_map.get(self.direction))
166 | 
167 |         # 资源信息（可用资源）
168 |         self.self_state.append(self.resources)
169 |         self.excludeNeighbor_state.append(self.resources)
170 | 
171 |         # 当前处理的任务量
172 |         self.self_state.append(self.sum_needDeal_task)
173 |         self.excludeNeighbor_state.append(self.sum_needDeal_task)
174 |         # 当前接受传输的任务量
175 |         # self.self_state.append(self.len_action)
176 |         # self.excludeNeighbor_state.append(self.len_action)
177 | 
178 |         # 当前是否有任务在传输
179 |         self.excludeNeighbor_state.append(self.queue_for_trans_vehicle.size())
180 |         self.excludeNeighbor_state.append(self.queue_for_trans_mec.size())
181 |         self.self_state.append(self.queue_for_trans_vehicle.size())
182 |         self.self_state.append(self.queue_for_trans_mec.size())
183 | 
184 |         # 当前任务数量
185 |         self.self_state.append(self.len_task)
186 |         self.excludeNeighbor_state.append(self.len_task)
187 | 
188 |         # 邻居表  7*数量
189 |         for neighbor in self.neighbor:
190 |             state = []
191 |             state.extend(neighbor.position)  # 位置
192 |             state.append(neighbor.velocity)  # 速度
193 |             state.append(direction_map.get(neighbor.direction))  # 方向
194 |             state.append(neighbor.resources)  # 可用资源
195 |             state.append(neighbor.sum_needDeal_task)  # 处理任务长度
196 |             # self.self_state.append(neighbor.len_action)  # 当前正在传输任务数量
197 |             self.neighbor_state.append(state)
198 | 
199 |         self.self_state.extend(self.Mec.state)
200 | 
201 |         # 任务状态信息
202 |         for i in range(MAX_TASK):
203 |             if i < self.len_task:
204 |                 task = self.total_task[i]
205 |                 self.task_state.append([task.create_time, task.need_trans_size, task.need_precess_cycle, task.max_time])
206 |             else:
207 |                 self.task_state.append([0, 0, 0, 0])
208 | 
209 |         return self.excludeNeighbor_state
210 | 


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
 1 | class A:
 2 |     name='chen'
 3 |     def __init__(self,name):
 4 |         self.name=name
 5 | 
 6 |     def get_str(self):
 7 |         print("A.name"+self.name)
 8 | 
 9 | 
10 | 
11 | class B:
12 |     name='yu'
13 |     def __init__(self,name):
14 |         self.name=name
15 | 
16 |     def get_str(self):
17 |         print("B.name"+self.name)
18 | 
19 | class C(A,B):
20 |     name='hao'
21 |     def __init__(self, name):
22 |         super().__init__(name)
23 |         self.name=name
24 |     def __init__(self):
25 |       return
26 | 
27 |     def get_str(self):
28 |         print("C.name"+self.name)
29 | 
30 | if  __name__ == '__main__':
31 |     c=C();
32 |     c.get_str()


--------------------------------------------------------------------------------
/test/test2.py:
--------------------------------------------------------------------------------
1 | from numpy import array
2 | 
3 | from test.test import C
4 | 
5 | array(1,2,3)


--------------------------------------------------------------------------------