├── 1126 NEW_DDPG ├── DDPG.ckpt.data-00000-of-00001 ├── DDPG.ckpt.index ├── DDPG.ckpt.meta └── checkpoint ├── China_city+HWFET.mat ├── Coupling training - total.py ├── README.md ├── UDDS+US06_2.mat ├── car-follow-5.24 ├── car-lc-net.net.xml ├── car-lc-rou.rou.xml ├── car-lc.settings.xml ├── car-lc.sumocfg ├── collision-output.xml └── lanechange-output.xml ├── structure_car_follow.py ├── structure_lane_change.py ├── sumo-923 ├── car-lc-net.net.xml ├── car-lc-rou.rou.xml ├── car-lc.settings.xml ├── car-lc.sumocfg ├── collision-output.xml └── lanechange-output.xml └── train_car_follow_step.py /1126 NEW_DDPG/DDPG.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCII-Team/Double-layer-decision-making-model/HEAD/1126 NEW_DDPG/DDPG.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /1126 NEW_DDPG/DDPG.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCII-Team/Double-layer-decision-making-model/HEAD/1126 NEW_DDPG/DDPG.ckpt.index -------------------------------------------------------------------------------- /1126 NEW_DDPG/DDPG.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCII-Team/Double-layer-decision-making-model/HEAD/1126 NEW_DDPG/DDPG.ckpt.meta -------------------------------------------------------------------------------- /1126 NEW_DDPG/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "DDPG.ckpt" 2 | all_model_checkpoint_paths: "DDPG.ckpt" 3 | -------------------------------------------------------------------------------- /China_city+HWFET.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCII-Team/Double-layer-decision-making-model/HEAD/China_city+HWFET.mat -------------------------------------------------------------------------------- /Coupling training - total.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import shutil 4 | import scipy.io as scio 5 | from structure_car_follow import Actor 6 | from structure_car_follow import Critic 7 | from structure_car_follow import DDPG_Memory 8 | import os 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "/device:GPU:0" 10 | # CUDA_VISIBLE_DEVICES=2 11 | import time 12 | import traci 13 | import sumolib 14 | import sys 15 | import math 16 | import numpy as np 17 | import shutil 18 | import tensorflow.compat.v1 as tf 19 | import matplotlib.pyplot as plt 20 | import numpy 21 | import pandas as pd 22 | from structure_lane_change import DuelingDQNPrioritizedReplay 23 | 24 | 25 | 26 | ###读取工况数据作为训练车流环境 27 | data_path1 = 'D:\\Wechat\\file\\Data_Standard Driving Cycles\\China_city+HWFET.mat' 28 | data1 = scio.loadmat(data_path1) 29 | y1 = data1['speed_vector'][0] 30 | data_path2 = 'D:\\Wechat\\file\\Data_Standard Driving Cycles\\UDDS+US06_2.mat' 31 | data2 = scio.loadmat(data_path2) 32 | y2 = data2['speed_vector'][0] 33 | y=[] 34 | y = list(y1)+list(y2) 35 | base_mean_speed = sum(y)/len(y) 36 | np.random.seed(1) 37 | tf.set_random_seed(1) 38 | 39 | ###此处对下层跟驰模块的DDPG算法进行定义 40 | MAX_EPISODES = 0 41 | LR_A = 1e-3 # learning rate for actor 42 | last_LR_A = 8e-4 43 | LR_C = 1e-4 # learning rate for critic 44 | last_LR_C = 8e-5 45 | GAMMA = 0.9 # reward discount 46 | REPLACE_ITER_A = 3865 47 | REPLACE_ITER_C = 3000 48 | MEMORY_CAPACITY = 38650 49 | BATCH_SIZE = 256 50 | VAR_MIN = 0.1 51 | RENDER = False 52 | LOAD1 = True 53 | DISCRETE_ACTION = False 54 | STATE_DIM = 4 55 | ACTION_DIM = 1 56 | ACTION_BOUND = [-1,1] 57 | sess1 = tf.Session() 58 | # Create actor and critic. 59 | actor = Actor(sess1, ACTION_DIM, ACTION_BOUND[1], LR_A,last_LR_A, REPLACE_ITER_A) 60 | critic = Critic(sess1, STATE_DIM, ACTION_DIM, LR_C,last_LR_C, GAMMA, REPLACE_ITER_C, actor.a, actor.a_) 61 | actor.add_grad_to_graph(critic.a_grads) 62 | M = DDPG_Memory(MEMORY_CAPACITY, dims=2 * STATE_DIM + ACTION_DIM + 1) 63 | saver1 = tf.train.Saver() 64 | # path1 = './10.7-9 NEW_DDPG' 65 | # path1 = './1118 NEW_DDPG' 66 | path1 ='./1126 NEW_DDPG' 67 | if LOAD1: 68 | saver1.restore(sess1, tf.train.latest_checkpoint(path1)) 69 | else: 70 | sess1.run(tf.global_variables_initializer()) 71 | 72 | 73 | 74 | ###对换道决策模块的DQN算法进行定义 75 | s_dim = 16 76 | DQN_s_dim = 15 77 | 78 | a_dim = 3 79 | DDPG_MEMORY_CAPACITY = 38650 80 | TARGET_REP_ITER = 300 81 | sess2 = tf.Session() 82 | E_GREEDY = 1 83 | E_INCREMENT = 0.00001 84 | GAMMA = 0.9 85 | LR = 0.0001 86 | BATCH_SIZE = 128 87 | HIDDEN = [600, 600, 600, 600] 88 | LOAD2 = False 89 | RENDER = True 90 | RL = DuelingDQNPrioritizedReplay( 91 | n_actions=a_dim, n_features=DQN_s_dim, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA, 92 | hidden=HIDDEN, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REP_ITER, 93 | memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT,) 94 | saver = tf.train.Saver() 95 | saver2 = tf.train.Saver() 96 | path2 = './coupling train 917 2' 97 | if LOAD2: 98 | saver2.restore(sess2, tf.train.latest_checkpoint(path2)) 99 | else: 100 | sess2.run(tf.global_variables_initializer()) 101 | 102 | 103 | ###打开sumo的接口 104 | if 'SUMO_HOME' in os.environ: 105 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools') 106 | sys.path.append(tools) 107 | else: 108 | sys.exit("please declare environment variable 'SUMO_HOME'") 109 | 110 | if_sumo_gui = True 111 | if not if_sumo_gui: 112 | sumoBinary = sumolib.checkBinary('sumo') 113 | else: 114 | sumoBinary = sumolib.checkBinary('sumo-gui') 115 | sumocfgfile = "D:\\Project_codes of pycharm\\2021.6.29\\sumo-923\\car-lc.sumocfg" 116 | traci.start([sumoBinary, "-c", sumocfgfile]) 117 | 118 | # var = 2. # control exploration 119 | var = 0.1 #此时的跟驰模型探索率 120 | total_step = 0 #总的训练步数,每一步记一次,用在跟驰部分记忆池 121 | total_ep_list = [] #总训练回合数生成的列表,可以用作回合数画图的横坐标 122 | total_lc_r_list = [] #每训练回合的奖励 123 | mean_speed_list = [] #每训练回合的平均速度 124 | total_fleet_mean_speed = [] #每训练回合的车流中位车速 125 | lanechange_learn_start = 0 #开始换道学习 126 | carfollowing_learn_start = 0 #开始跟驰学习 127 | for i in range(MAX_EPISODES): 128 | 129 | fleet_mean_speed = [] 130 | ep_reward = 0 131 | ep_reward_all = 0 132 | ep_step = 0 133 | 134 | v = [] 135 | all_r = [] 136 | rear_v_list = [] 137 | l_v_list = [] 138 | all_changelanetimes = [] 139 | # all_changelanetimes.append(0) 140 | lane_index0_position_x = [] 141 | lane_index0_id_list = [] 142 | lane_index1_position_x = [] 143 | lane_index1_id_list = [] 144 | lane_index2_position_x = [] 145 | lane_index2_id_list = [] 146 | lane_index0_front = [] 147 | lane_index1_front = [] 148 | lane_index2_front = [] 149 | danger_lc = [] 150 | all_cf_r = [] 151 | cf_collision_list = [] 152 | lane_index0_acc_list=[] 153 | lane_index1_acc_list=[] 154 | lane_index2_acc_list=[] 155 | ep_step_list=[] 156 | distance_headway_list = [] 157 | dis_safe_list = [] 158 | total_r = 0 159 | r0 = 0 # 换道惩罚初始化 160 | t = 0 161 | k = 1 162 | 163 | 164 | s = np.zeros(s_dim) 165 | s_ = np.zeros(s_dim) 166 | DQN_s = np.zeros(DQN_s_dim) 167 | DQN_s_ = np.zeros(DQN_s_dim) 168 | 169 | DDPG_s = np.zeros(4) 170 | DDPG_s_ = np.zeros(4) 171 | cf_s = np.zeros(4) 172 | ego_id = 0 # 初始时主车的ID是0####################################### 173 | horizontal_position_list=[] 174 | lengthwise_position_list=[] 175 | order_ego_id_list = [] 176 | r_danger_lc = 0 177 | r_high_frequency_lc = 1 178 | 179 | 180 | MAX_EP_STEPS = len(y)-60 181 | traci.load(["-c", "D:\\Project_codes of pycharm\\2021.6.29\\sumo-923\\car-lc.sumocfg"]) 182 | traci.simulationStep(50) 183 | for step in range(10): 184 | ID_list_all = traci.edge.getLastStepVehicleIDs("gneE0") 185 | for vehicle in ID_list_all: 186 | if int(vehicle)!=ego_id: 187 | traci.vehicle.setSpeed(vehicle, 5) # 初始速度设定 188 | traci.vehicle.setSpeedMode(vehicle, 12) 189 | traci.vehicle.setLaneChangeMode(vehicle, 512) # 换道机制设定 190 | traci.simulationStep(50 + step) 191 | 192 | origin_speed_ego_car_list = [] 193 | origin_speed_ego_car=traci.vehicle.getSpeed('%d' % ego_id) 194 | if i >=0: 195 | lanechange_learn_start = 1 196 | if i>=100: 197 | carfollowing_learn_start = 1 198 | state_space = [] 199 | lc_action_list = [] 200 | cf_action_list = [] 201 | start_time = time.time() 202 | already_t = 60 203 | for j in range(MAX_EP_STEPS): 204 | all_car_position_x = [] 205 | all_car_position_y = [] 206 | lane_index0_position_x = [] 207 | lane_index0_id_list = [] 208 | lane_index1_position_x = [] 209 | lane_index1_id_list = [] 210 | lane_index2_position_x = [] 211 | lane_index2_id_list = [] 212 | lane_index0_front = [] 213 | lane_index1_front = [] 214 | lane_index2_front = [] 215 | Distance_list = [] 216 | all_speed_list = [] 217 | changelane = 0 218 | m = j 219 | r0 = 0 220 | cf_danger = 0 221 | left_lanechange_dangerious = 0 222 | right_lanechange_dangerious = 0 223 | traci.simulationStep(j + already_t) 224 | ####这部分的代码作用是获取路网所有车辆,并按照行驶的位置进行前后排序 225 | ID_list_all = traci.edge.getLastStepVehicleIDs("gneE0") # 获取主路所有车辆的ID编号 226 | for x in range(len(ID_list_all)): # 长度应该为16 227 | Distance_list.append(traci.vehicle.getDistance(ID_list_all[x])) # 16个元素跑的里程 228 | Index = sorted(range(len(Distance_list)), key=lambda k: Distance_list[k], reverse=True) # 距离表降序排列的索引 229 | Index = np.array(Index) 230 | ID_list_all = list(map(int, ID_list_all)) # 字符串转化为数值型 231 | ID_list_order = np.array(ID_list_all)[Index] # 按照前后顺序进行车序排列 232 | ID_list_order_list = ID_list_order.tolist() #转换为list 233 | order_ego_id = ID_list_order_list.index(ego_id) #返回主车在车流的的序号 234 | ###排序完成之后对主车进行判断(是否主车已经超越所有的车辆成为第一辆车)和操作 235 | lane_index = traci.vehicle.getLaneIndex('%d' % ego_id) # 获得主车所在车道index 236 | position = traci.vehicle.getPosition('%d' % ego_id) # 获得主车坐标 237 | position = [max(position[0], 0), max(position[1], -8)] 238 | if ego_id == ID_list_order[0] or ego_id == ID_list_order[1] or ego_id == ID_list_order[2]: # 如果主车成为了头车 239 | ego_id = ID_list_order[-1] # 主车的控制对象变成了最后一辆车 240 | 241 | # traci.vehicle.setLaneChangeMode('%d' % ego_id, 1621) # 让主车可以自由进行换道 242 | # traci.vehicle.setLaneChangeMode('%d' % ego_id, 256) # 避免碰撞 243 | # traci.vehicle.setLaneChangeMode('%d' % ego_id, 512) # 避免碰撞和安全间隙 244 | # traci.vehicle.setSpeedMode('%d' % ego_id, 31) 245 | ###获取状态值 246 | r_v = traci.vehicle.getSpeed('%d' % ego_id) # 获取主车车速,状态0 247 | s[0] = r_v 248 | s[1] = traci.vehicle.getLaneIndex('%d' % ego_id) # 获取车道数 249 | s[14] = traci.vehicle.couldChangeLane('%d' % ego_id, 1) # 左换道可行性 250 | s[15] = traci.vehicle.couldChangeLane('%d' % ego_id, -1) # 右换道可行性 251 | s[5] = 33 252 | s[6] = 0.01 253 | s[7] = 150 254 | s[8] = 33 255 | s[9] = 0.01 256 | s[10] = 150 257 | s[11] = 33 258 | s[12] = 0.01 259 | s[13] = 150 260 | r_distance_value = max(traci.vehicle.getDistance('%d' % ego_id), 0) # 获得主车行驶里程 261 | ######此部分代码用于每车道前后车排序 262 | for p in range(len(ID_list_order)): 263 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -8: 264 | lane_index0_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) # 265 | lane_index0_id_list.append(ID_list_order[p]) 266 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -4.8: 267 | lane_index1_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) 268 | lane_index1_id_list.append(ID_list_order[p]) 269 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -1.6: 270 | lane_index2_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) 271 | lane_index2_id_list.append(ID_list_order[p]) 272 | ######此部分代码用于找出每一车道上的前车ID 273 | for p in range(len(lane_index0_position_x)): 274 | if lane_index0_position_x[p] <= position[0]+5: 275 | break 276 | lane_index0_front.append(lane_index0_id_list[p]) #int 277 | for p in range(len(lane_index1_position_x)): 278 | 279 | if lane_index1_position_x[p] <= position[0]+5: 280 | break 281 | lane_index1_front.append(lane_index1_id_list[p]) 282 | for p in range(len(lane_index2_position_x)): 283 | 284 | if lane_index2_position_x[p] <= position[0]+5: 285 | break 286 | lane_index2_front.append(lane_index2_id_list[p]) 287 | 288 | ###对车辆进行速度设定 289 | for w in range(len(ID_list_order)): 290 | # if w==0: 291 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 292 | # traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 512) # 换道机制设定 293 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - w]) 294 | # continue 295 | if ID_list_order[w]==ego_id:###对于主车 296 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 297 | if lanechange_learn_start==0: 298 | traci.vehicle.setLaneChangeMode('%d' % ego_id, 1621) # 让主车可以自由进行换道 299 | if lanechange_learn_start==1: 300 | traci.vehicle.setLaneChangeMode('%d' % ego_id, 512) # 301 | origin_speed_ego_car = y[ m + already_t - w] 302 | origin_speed_ego_car_list.append(origin_speed_ego_car) 303 | continue 304 | if ID_list_order[w]!=ego_id:###对于非主车 305 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) ###安全检查开启,但是仍要按照工况设定行驶 306 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], y[m + 36 - w]) 307 | # traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 512) # 换道机制设定 308 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 309 | traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 310 | if -2.5 <=(traci.vehicle.getPosition('%d' % ego_id)[0]-traci.vehicle.getPosition('%d' % ID_list_order[w])[0]-5)<=10: 311 | if (traci.vehicle.getLaneIndex('%d' % ego_id) - s[1])==1: 312 | left_lanechange_dangerious = 1 313 | continue 314 | if (traci.vehicle.getLaneIndex('%d' % ego_id) - s[1]) == -1: 315 | right_lanechange_dangerious = 1 316 | continue 317 | 318 | if lane_index0_front: 319 | if ID_list_order[w] in lane_index0_front: 320 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 321 | # traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 1621) # 换道机制设定 322 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - 2*w]) 323 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 324 | traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 325 | if ID_list_order[w] == (lane_index0_front[0]): 326 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 327 | traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 512) # 换道机制设定 328 | traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - w]) 329 | continue 330 | if ID_list_order[w] == (lane_index0_front[-1]): 331 | lane_index0_v = traci.vehicle.getSpeed('%d' % lane_index0_front[-1]) 332 | # lane_index0_acc = y[ m + already_t - 2*w] - lane_index0_v 333 | lane_index0_acc = traci.vehicle.getAcceleration('%d' % lane_index0_front[-1]) 334 | lane_index0_s = traci.vehicle.getPosition('%d'% ID_list_order[w])[0]-traci.vehicle.getPosition('%d' % ego_id)[0]-5 335 | if lane_index0_s <= 150: 336 | s[5] = lane_index0_v 337 | s[6] = lane_index0_acc 338 | s[7] = lane_index0_s 339 | if lane_index0_s <=10 and s[0]==1: 340 | right_lanechange_dangerious = 1 341 | continue 342 | if lane_index1_front: 343 | if ID_list_order[w] in lane_index1_front: 344 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 345 | # traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 1621) # 换道机制设定 346 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - 2*w]) 347 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 348 | traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 349 | if ID_list_order[w] == (lane_index1_front[0]): 350 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 351 | traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 512) # 换道机制设定 352 | traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - w]) 353 | continue 354 | if ID_list_order[w] == (lane_index1_front[-1]): 355 | lane_index1_v = traci.vehicle.getSpeed('%d' % lane_index1_front[-1]) 356 | # lane_index1_acc = y[ m + already_t - w] - lane_index1_v 357 | lane_index1_acc = traci.vehicle.getAcceleration('%d' % lane_index1_front[-1]) 358 | lane_index1_s = traci.vehicle.getPosition('%d'% ID_list_order[w])[0]-traci.vehicle.getPosition('%d' % ego_id)[0]-5 359 | if lane_index1_s <= 150: 360 | s[8] = lane_index1_v 361 | s[9] = lane_index1_acc 362 | s[10] = lane_index1_s 363 | if lane_index1_s <=10: 364 | if s[0]==0: 365 | left_lanechange_dangerious = 1 366 | if s[0]==2: 367 | right_lanechange_dangerious = 1 368 | continue 369 | if lane_index2_front: 370 | if ID_list_order[w] in lane_index2_front: 371 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 372 | # traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 1621) # 换道机制设定 373 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - 2*w]) 374 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 375 | traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 376 | if ID_list_order[w] == (lane_index2_front[0]): 377 | traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 12) 378 | traci.vehicle.setLaneChangeMode('%d' % ID_list_order[w], 512) # 换道机制设定 379 | traci.vehicle.setSpeed('%d' % ID_list_order[w], y[ m + already_t - w]) 380 | continue 381 | 382 | if ID_list_order[w] == (lane_index2_front[-1]): 383 | lane_index2_v = traci.vehicle.getSpeed('%d' % lane_index2_front[-1]) 384 | # lane_index2_acc = y[ m + already_t - w] - lane_index2_v 385 | lane_index2_acc = traci.vehicle.getAcceleration('%d' % lane_index2_front[-1]) 386 | lane_index2_s = traci.vehicle.getPosition('%d'% ID_list_order[w])[0]-traci.vehicle.getPosition('%d' % ego_id)[0]-5 387 | if lane_index2_s <= 150: 388 | s[11] = lane_index2_v 389 | s[12] = lane_index2_acc 390 | s[13] = lane_index2_s 391 | if lane_index2_s <=10 and s[0]==1: 392 | left_lanechange_dangerious = 1 393 | continue 394 | 395 | ###获得前车的ID和车距 396 | if s[1]==0: 397 | s[3] = s[6] 398 | s[4] = s[7] 399 | s[2] = s[5] 400 | if s[1] == 1: 401 | s[3] = s[9] 402 | s[4] = s[10] 403 | s[2] = s[8] 404 | if s[1]==2: 405 | s[3] = s[12] 406 | s[4] = s[13] 407 | s[2] = s[11] 408 | 409 | 410 | ### 此部分用于计算奖励时间 411 | for p in range(len(ID_list_order)): 412 | all_speed_list.append(traci.vehicle.getSpeed('%d' % ID_list_order[p])) 413 | sorted(all_speed_list, reverse=False) ###速度列表升序 414 | alist = numpy.array(all_speed_list) ###转换为数组 415 | q1 = numpy.percentile(alist, 25) 416 | q2 = numpy.percentile(alist, 50) 417 | q3 = numpy.percentile(alist, 75) 418 | iqr = q3 - q1 419 | q_low = q1 - (1.5 * iqr) 420 | q_85 = numpy.percentile(alist, 85) # 85位车速 421 | v_max_limit = 33 422 | potential_reward_max = 33 423 | t0 = (q_85 - s[0]) / 3 # 计算加速到85位车速时间 424 | t1 = (33 - s[0]) / 3 425 | s1 = (s[0] + 33) * t1 * 0.5 426 | ##############车道0的优势函数计算 427 | ss1 = s[7] + t1 * s[5] - s1 # 加速阶段结束车间距 428 | if s[0] <= s[5]: # 主车车速小于前车 429 | if ss1 >= 0: # 加速阶段车距大于等于0 430 | t3 = (v_max_limit - s[5]) / 3 # 计算减速时间 431 | if t3 > 0: # 减速时间大于0 432 | s2 = (s[5] + v_max_limit) * t3 / 2 # 计算减速距离 433 | ss2 = s[7] + (t1 + t3) * s[5] - s1 - s2 # 计算完整加减速过程后车距 434 | if ss2 >= 0: # 如果车距大于等于0 435 | t2 = ss2 / (v_max_limit - s[5] + 0.01) # 计算持续高速时间 436 | reward_speed = ((t1 + t2 + t3) * s[5] + s[7]) / (t1 + t2 + t3) 437 | potential_reward = min(reward_speed, potential_reward_max) 438 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 439 | t2_1 = round((((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 440 | t1_1 = (s[5] - s[0]) / 3 + t2_1 441 | reward_speed = ((t1_1 + t2_1) * s[5] + s[7]) / (t1_1 + t2_1) 442 | potential_reward = min(reward_speed, potential_reward_max) 443 | # print(potential_reward,'line607') 444 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 445 | potential_reward = potential_reward_max 446 | # print(potential_reward, 'line610') 447 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 448 | t2_1 = round((((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 449 | t1_1 = (s[5] - s[0]) / 3 + t2_1 450 | reward_speed = ((t1_1 + t2_1) * s[5] + s[7]) / (t1_1 + t2_1) 451 | potential_reward = min(reward_speed, potential_reward_max) 452 | if s[0] > s[5]: # 主车车速大于前车 453 | t_brake = (s[0] - s[5]) / 3 # 计算紧急刹车时间 454 | s_brake = (s[0] + s[5]) / 2 * t_brake # 紧急刹车距离 455 | ss_brake = s[5] * t_brake + s[7] - s_brake # 刹车结束车距 456 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 457 | if s[0] == v_max_limit: # 此时只能匀速再刹车 458 | t_keep = ss_brake / (s[0] - s[5]) 459 | reward_speed = ((t_brake + t_keep) * s[5] + s[7]) / (t_brake + t_keep) 460 | potential_reward = min(reward_speed, potential_reward_max) 461 | if s[0] < v_max_limit: 462 | ss_last = s[5] * (t_brake + t1) + s[7] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 463 | if ss_last > 0: # 主车可以先加速再匀速再减速 464 | t_keep = ss_last / (v_max_limit - s[5]) # 计算持续高速时间 465 | reward_speed = ((t1 + t_keep + t_brake) * s[5] + s[7]) / (t1 + t_keep + t_brake) 466 | potential_reward = min(reward_speed, potential_reward_max) 467 | if ss_last <= 0: 468 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 469 | reward_speed = ((t_brake + t_add) * s[5] + s[7]) / (t_brake + t_add) 470 | potential_reward = min(reward_speed, potential_reward_max) 471 | if ss_brake < 0: 472 | potential_reward = 0 473 | if ss_brake == 0: 474 | reward_speed = s_brake / t_brake 475 | potential_reward = min(reward_speed, potential_reward_max) 476 | potential_reward_0 = potential_reward 477 | #########车道1的优势函数计算 478 | ss1 = s[10] + t1 * s[8] - s1 # 加速阶段结束车间距 479 | if s[0] <= s[8]: # 主车车速小于前车 480 | if ss1 >= 0: # 加速阶段车距大于等于0 481 | t3 = (v_max_limit - s[8]) / 3 # 计算减速时间 482 | if t3 > 0: # 减速时间大于0 483 | s2 = (s[8] + v_max_limit) * t3 / 2 # 计算减速距离 484 | ss2 = s[10] + (t1 + t3) * s[8] - s1 - s2 # 计算完整加减速过程后车距 485 | if ss2 >= 0: # 如果车距大于等于0 486 | t2 = ss2 / (v_max_limit - s[8] + 0.01) # 计算持续高速时间 487 | reward_speed = ((t1 + t2 + t3) * s[8] + s[10]) / (t1 + t2 + t3) 488 | potential_reward = min(reward_speed, potential_reward_max) 489 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 490 | t2_1 = round((((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 491 | # t2_1 = round(((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 492 | t1_1 = (s[8] - s[0]) / 3 + t2_1 493 | reward_speed = ((t1_1 + t2_1) * s[8] + s[10]) / (t1_1 + t2_1) 494 | potential_reward = min(reward_speed, potential_reward_max) 495 | # print(potential_reward,'line607') 496 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 497 | potential_reward = potential_reward_max 498 | # print(potential_reward, 'line610') 499 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 500 | t2_1 = round((((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 501 | # t2_1 = ((s[10] / 3) + (((s[8] - s[0]) ** 2) / 18)) ** 0.5 502 | t1_1 = (s[8] - s[0]) / 3 + t2_1 503 | reward_speed = ((t1_1 + t2_1) * s[8] + s[10]) / (t1_1 + t2_1) 504 | potential_reward = min(reward_speed, potential_reward_max) 505 | if s[0] > s[8]: # 主车车速大于前车 506 | t_brake = (s[0] - s[8]) / 3 # 计算紧急刹车时间 507 | s_brake = (s[0] + s[8]) / 2 * t_brake # 紧急刹车距离 508 | ss_brake = s[8] * t_brake + s[10] - s_brake # 刹车结束车距 509 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 510 | if s[0] == v_max_limit: # 此时只能匀速再刹车 511 | t_keep = ss_brake / (s[0] - s[8]) 512 | reward_speed = ((t_brake + t_keep) * s[8] + s[10]) / (t_brake + t_keep) 513 | potential_reward = min(reward_speed, potential_reward_max) 514 | if s[0] < v_max_limit: 515 | ss_last = s[8] * (t_brake + t1) + s[10] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 516 | if ss_last > 0: # 主车可以先加速再匀速再减速 517 | t_keep = ss_last / (v_max_limit - s[8]) # 计算持续高速时间 518 | reward_speed = ((t1 + t_keep + t_brake) * s[8] + s[10]) / (t1 + t_keep + t_brake) 519 | potential_reward = min(reward_speed, potential_reward_max) 520 | if ss_last <= 0: 521 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 522 | reward_speed = ((t_brake + t_add) * s[8] + s[10]) / (t_brake + t_add) 523 | potential_reward = min(reward_speed, potential_reward_max) 524 | if ss_brake < 0: 525 | potential_reward = 0 526 | if ss_brake == 0: 527 | reward_speed = s_brake / t_brake 528 | potential_reward = min(reward_speed, potential_reward_max) 529 | potential_reward_1 = potential_reward 530 | ################## 531 | ######车道2优势函数计算 532 | ss1 = s[13] + t1 * s[11] - s1 # 加速阶段结束车间距 533 | if s[0] <= s[11]: # 主车车速小于前车 534 | if ss1 >= 0: # 加速阶段车距大于等于0 535 | t3 = (v_max_limit - s[11]) / 3 # 计算减速时间 536 | if t3 > 0: # 减速时间大于0 537 | s2 = (s[11] + v_max_limit) * t3 / 2 # 计算减速距离 538 | ss2 = s[13] + (t1 + t3) * s[11] - s1 - s2 # 计算完整加减速过程后车距 539 | if ss2 >= 0: # 如果车距大于等于0 540 | t2 = ss2 / (v_max_limit - s[11] + 0.01) # 计算持续高速时间 541 | reward_speed = ((t1 + t2 + t3) * s[11] + s[13]) / (t1 + t2 + t3) 542 | potential_reward = min(reward_speed, potential_reward_max) 543 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 544 | t2_1 = round((((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 545 | 2) 546 | # t2_1 = round(((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 547 | t1_1 = (s[11] - s[0]) / 3 + t2_1 548 | reward_speed = ((t1_1 + t2_1) * s[11] + s[13]) / (t1_1 + t2_1) 549 | potential_reward = min(reward_speed, potential_reward_max) 550 | # print(potential_reward,'line607') 551 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 552 | potential_reward = potential_reward_max 553 | # print(potential_reward, 'line610') 554 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 555 | t2_1 = round((((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 556 | # t2_1 = ((s[13] / 3) + (((s[11] - s[0]) ** 2) / 18)) ** 0.5 557 | t1_1 = (s[13] - s[0]) / 3 + t2_1 558 | reward_speed = ((t1_1 + t2_1) * s[11] + s[13]) / (t1_1 + t2_1) 559 | potential_reward = min(reward_speed, potential_reward_max) 560 | if s[0] > s[11]: # 主车车速大于前车 561 | t_brake = (s[0] - s[13]) / 3 # 计算紧急刹车时间 562 | s_brake = (s[0] + s[13]) / 2 * t_brake # 紧急刹车距离 563 | ss_brake = s[11] * t_brake + s[13] - s_brake # 刹车结束车距 564 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 565 | if s[0] == v_max_limit: # 此时只能匀速再刹车 566 | t_keep = ss_brake / (s[0] - s[11]) 567 | reward_speed = ((t_brake + t_keep) * s[11] + s[13]) / (t_brake + t_keep) 568 | potential_reward = min(reward_speed, potential_reward_max) 569 | if s[0] < v_max_limit: 570 | ss_last = s[11] * (t_brake + t1) + s[13] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 571 | if ss_last > 0: # 主车可以先加速再匀速再减速 572 | t_keep = ss_last / (v_max_limit - s[11]) # 计算持续高速时间 573 | reward_speed = ((t1 + t_keep + t_brake) * s[11] + s[13]) / (t1 + t_keep + t_brake) 574 | potential_reward = min(reward_speed, potential_reward_max) 575 | if ss_last <= 0: 576 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 577 | reward_speed = ((t_brake + t_add) * s[11] + s[13]) / (t_brake + t_add) 578 | potential_reward = min(reward_speed, potential_reward_max) 579 | if ss_brake < 0: 580 | potential_reward = 0 581 | if ss_brake == 0: 582 | reward_speed = s_brake / t_brake 583 | potential_reward = min(reward_speed, potential_reward_max) 584 | potential_reward_2 = potential_reward 585 | #########下部分代码为换道状态赋值 586 | DQN_s[0] = s[0] 587 | DQN_s[1] = traci.vehicle.getAcceleration('%d' % ego_id) 588 | DQN_s[2] = s[1] 589 | DQN_s[3] = s[5] 590 | DQN_s[4] = s[6] 591 | DQN_s[5] = s[7] 592 | DQN_s[6] = s[8] 593 | DQN_s[7] = s[9] 594 | DQN_s[8] = s[10] 595 | DQN_s[9] = s[11] 596 | DQN_s[10] = s[12] 597 | DQN_s[11] = s[13] 598 | if lane_index == 0: 599 | DQN_s[12] = potential_reward_0 * 1.1+0.001 600 | # DQN_s[13] = potential_reward_1 * s[14] 601 | DQN_s[13] = potential_reward_1 * (1-left_lanechange_dangerious) 602 | DQN_s[14] = 0 603 | if lane_index == 1: 604 | # DQN_s[12] = potential_reward_0 * s[15] 605 | DQN_s[12] = potential_reward_0 * (1-right_lanechange_dangerious) 606 | DQN_s[13] = potential_reward_1 * 1.1+0.001 607 | # DQN_s[14] = potential_reward_2 * s[14] 608 | DQN_s[14] = potential_reward_2 * (1-left_lanechange_dangerious) 609 | if lane_index == 2: 610 | DQN_s[12] = 0 611 | # DQN_s[13] = potential_reward_1 * s[15] 612 | DQN_s[13] = potential_reward_1 * (1-right_lanechange_dangerious) 613 | DQN_s[14] = potential_reward_2 * 1.1+0.001 614 | 615 | 616 | 617 | 618 | 619 | 620 | ###选择车道 621 | a = RL.choose_action(DQN_s) 622 | changelane = 0 # 换道状态初始化 623 | dangerious_lc = 0 624 | r_danger = 0 625 | r0 = 0 # 换道惩罚初始化 626 | r_potential = 0 627 | #########开始执行换道动作 628 | if a == 0: ##去车道0 629 | ############对于车道0来说 630 | cf_s[0] = DQN_s[0] 631 | cf_s[1] = DQN_s[3] 632 | cf_s[2] = DQN_s[5] 633 | cf_s[3] = DQN_s[4] 634 | # print(cf_s[1]) 635 | a_lane0 = actor.choose_action(cf_s) 636 | # a_lane0 = np.clip(np.random.normal(a_lane0, var), *ACTION_BOUND) 637 | if cf_s[2] <= 150: 638 | a_lane0 = np.clip(np.random.normal(a_lane0, var), *ACTION_BOUND) 639 | else: 640 | a_lane0 = 1 641 | DDPG_action = a_lane0 642 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane0),v_max_limit)) 643 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 644 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 645 | if carfollowing_learn_start==1: 646 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane0), v_max_limit)) 647 | if DQN_s[12] == 0: 648 | r0 = 0 649 | r_danger = -100 650 | elif lane_index == 1: # 主车由车道1换道0 651 | r_potential = DQN_s[12] - DQN_s[13] 652 | r0 = -1 653 | if lanechange_learn_start==1: 654 | traci.vehicle.changeLane('%d' % ego_id, lane_index - 1, 1) # 右换道 655 | changelane = 1 656 | if a == 1: ###去车道1 657 | ############对于车道1来说 658 | cf_s[0] = DQN_s[0] 659 | cf_s[1] = DQN_s[6] 660 | cf_s[2] = DQN_s[8] 661 | cf_s[3] = DQN_s[7] 662 | # print(cf_s[1]) 663 | a_lane1 = actor.choose_action(cf_s) 664 | # a_lane1 = np.clip(np.random.normal(a_lane1, var), *ACTION_BOUND) 665 | if cf_s[2] <= 150: 666 | a_lane1 = np.clip(np.random.normal(a_lane1, var), *ACTION_BOUND) 667 | else: 668 | a_lane1 = 1 669 | DDPG_action = a_lane1 670 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane1),v_max_limit)) 671 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 672 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 673 | if carfollowing_learn_start == 1: 674 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane1), v_max_limit)) 675 | if DQN_s[13] == 0: 676 | r0 = 0 677 | r_danger = -100 678 | elif lane_index == 0: # 证明主车在车道0 679 | if lanechange_learn_start == 1: 680 | traci.vehicle.changeLane('%d' % ego_id, lane_index + 1, 1) # 左换道 681 | changelane = 1 682 | elif lane_index == 2: # 主车由车道2 683 | if lanechange_learn_start == 1: 684 | traci.vehicle.changeLane('%d' % ego_id, lane_index - 1, 1) # 右换道 685 | changelane = 1 686 | if a == 2: ###去车道2 687 | ############对于车道2来说 688 | cf_s[0] = DQN_s[0] 689 | cf_s[1] = DQN_s[9] 690 | cf_s[2] = DQN_s[11] 691 | cf_s[3] = DQN_s[10] 692 | # print(cf_s[1]) 693 | a_lane2 = actor.choose_action(cf_s) 694 | # a_lane2 = np.clip(np.random.normal(a_lane2, var), *ACTION_BOUND) 695 | if cf_s[2] <= 150: 696 | a_lane2 = np.clip(np.random.normal(a_lane2, var), *ACTION_BOUND) 697 | else: 698 | a_lane2 = 1 699 | DDPG_action = a_lane2 700 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane2),v_max_limit)) 701 | # traci.vehicle.setSpeedMode('%d' % ID_list_order[w], 31) 702 | # traci.vehicle.setSpeed('%d' % ID_list_order[w], -1) 703 | if carfollowing_learn_start == 1: 704 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane2), v_max_limit)) 705 | if DQN_s[14] == 0: 706 | r0 = 0 707 | r_danger = -100 708 | elif lane_index == 1: # 主车由车道1换道2 709 | if lanechange_learn_start == 1: 710 | traci.vehicle.changeLane('%d' % ego_id, lane_index + 1, 1) # 左换道 711 | changelane = 1 712 | if r_danger == -100: 713 | dangerious_lc = 1 714 | 715 | ######################仿真到下一步 716 | lc_action_list.append(a) 717 | cf_action_list.append(DDPG_action) 718 | state_space.append(s.tolist()) 719 | if len(lc_action_list)==3: 720 | del(lc_action_list[0]) 721 | if len(cf_action_list)==3: 722 | del(cf_action_list[0]) 723 | if len(state_space)<2: 724 | continue 725 | if len(state_space)==3: 726 | del(state_space[0]) 727 | 728 | s = np.array(state_space[-2]) 729 | s_ = np.array(state_space[-1]) 730 | a = lc_action_list[-2] 731 | DDPG_action = cf_action_list[-2] 732 | # print(s_-s,a,DDPG_action) 733 | 734 | 735 | DDPG_s[0] = s[0] 736 | DDPG_s[1] = s[2] 737 | DDPG_s[2] = s[4] 738 | DDPG_s[3] = s[3] 739 | DDPG_s_[0] = s_[0] 740 | DDPG_s_[1] = s_[2] 741 | DDPG_s_[2] = s_[4] 742 | DDPG_s_[3] = s_[3] 743 | 744 | collision = 0 745 | distance_headway = s_[4] 746 | if distance_headway > 0: 747 | # r_dis = 1.1*(150-distance_headway)/75 748 | r_dis = 1 749 | if distance_headway >= 150: 750 | r_dis = -30 751 | if distance_headway <= 0: 752 | r_dis = min(10 * distance_headway, -100) 753 | collision = 1 754 | # print(j,s[4],s[7],s[10],s[13]) 755 | 756 | cf_r_efficient = s_[0] / 33 757 | cf_r = 33 * cf_r_efficient * 0.6 + r_dis - 2.25 * (DDPG_s_[0] - DDPG_s[0])*(DDPG_s_[0] - DDPG_s[0]) 758 | all_changelanetimes.append(changelane) 759 | r_efficient = s_[0]-11.6 760 | # r = -30 * collision + 0.8*33 * r_efficient + (-1) * changelane + r_dis*collision -2.25 * (DDPG_s_[0] - DDPG_s[0])*(DDPG_s_[0] - DDPG_s[0]) 761 | r = -30 * collision + 1 * r_efficient + (-1) * changelane 762 | RL.store_transition(DQN_s, a, r, DQN_s_) 763 | if total_step > MEMORY_CAPACITY: 764 | RL.learn() 765 | 766 | # M.store_transition(DDPG_s, DDPG_action, cf_r, DDPG_s_) 767 | # if M.pointer > MEMORY_CAPACITY: 768 | # var = max([var * 0.9995, VAR_MIN]) # decay the action randomness 769 | # # var = var * 0.99995 770 | # b_M = M.sample(BATCH_SIZE) 771 | # b_s = b_M[:, :STATE_DIM] 772 | # b_a = b_M[:, STATE_DIM: STATE_DIM + ACTION_DIM] 773 | # b_r = b_M[:, -STATE_DIM - 1: -STATE_DIM] 774 | # b_s_ = b_M[:, -STATE_DIM:] 775 | # critic.learn(b_s, b_a, b_r, b_s_) 776 | # actor.learn(b_s) 777 | 778 | all_cf_r.append(cf_r) 779 | cf_collision_list.append(collision) 780 | DQN_s = DQN_s_ 781 | 782 | total_step += 1 783 | ep_step += 1 784 | all_r.append(r) 785 | rear_v_list.append(s_[0]) 786 | l_v_list.append(s_[2]) 787 | 788 | danger_lc.append(dangerious_lc) 789 | # lane_index0_acc_list.append(a_lane0) 790 | # lane_index1_acc_list.append(a_lane1) 791 | # lane_index2_acc_list.append(a_lane2) 792 | ep_step_list.append(j) 793 | distance_headway_list.append(s_[4]) 794 | 795 | fleet_mean_speed.append(q2) 796 | horizontal_position_list.append(traci.vehicle.getPosition('%d'%ego_id)[1]) 797 | lengthwise_position_list.append(traci.vehicle.getPosition('%d'%ego_id)[0]) 798 | order_ego_id_list.append(order_ego_id) 799 | 800 | mean_speed = sum(rear_v_list) / j 801 | mean_speed_list.append(mean_speed) 802 | total_ep_list.append(i) 803 | total_lc_r_list.append(sum(all_r)) 804 | total_mean_speed =sum(fleet_mean_speed)/j 805 | total_fleet_mean_speed.append(sum(fleet_mean_speed)/j) 806 | origin_speed = sum(origin_speed_ego_car_list)/len(origin_speed_ego_car_list) 807 | end_time=time.time() 808 | time_running = end_time-start_time 809 | print('episode=%s' % i, 'steps=%s' % j, 'reward=%s' % (sum(all_r)/len(all_r)), 'lanechange-times=%s' % sum(all_changelanetimes), 810 | 'dangerious-times=%s' % sum(danger_lc), 'epsilon=%s' % RL.epsilon,'time_spend=%s' % time_running) 811 | # if i >= stable_episodes: 812 | print("car_following:", 'reward=%s' % (sum(all_cf_r)/len(all_cf_r)),'collision-times=%s' % sum(cf_collision_list),'explore=%s'% var,'mean_speed=%s' % mean_speed,'total_fleet_mean_speed=%s' % total_mean_speed,'origin_mean_speed=%s' % origin_speed) 813 | 814 | 815 | # if mmm==1 and jjj==1 and sum(cf_collision_list) == 0 and sum(all_changelanetimes) <= last_save_lc and mean_speed >= last_save_speed: 816 | # path4 = './coupling train 106' 817 | # if os.path.isdir(path4): shutil.rmtree(path4) 818 | # os.mkdir(path4) 819 | # ckpt_path = os.path.join(path4, 'DuelingDQNPrioritizedReplay.ckpt') 820 | # save_path = saver2.save(sess2, ckpt_path, write_meta_graph=False) 821 | # print("\nSave Model %s\n" % save_path,'ep=%s'%i,'speed=%s'%mean_speed,'lc_time=%s'%sum(all_changelanetimes)) 822 | 823 | 824 | RL.epsilon=1 825 | var=0.1 826 | 827 | import pandas as pd 828 | # from construct import set_trajectory,set_lane,set_speed,read_acc 829 | def output_useful_id(vehicle_list=None): 830 | useless_car_list = [] 831 | for vehicle_id in vehicle_list: 832 | total_vehicle_id = df[df['Vehicle_ID'] == vehicle_id] 833 | hangshu = total_vehicle_id.shape[0] 834 | if hangshu <= 29: 835 | useless_car_list.append(vehicle_id) 836 | useless_car = list(set(useless_car_list)) 837 | use_list = list(set(vehicle_list) - set(useless_car)) 838 | return use_list 839 | 840 | def set_speed(time=None , vehicle_id=None): 841 | df_vehicle_id = df[df['Vehicle_ID']== vehicle_id] 842 | hangshu = df_vehicle_id.shape[0] 843 | if (time + 1) <= hangshu: 844 | # if time <= hangshu: 845 | # print(df_vehicle_id.iloc[time]) 846 | vehicle_speed = df_vehicle_id.iloc[time] #读取第time行 847 | traci.vehicle.setSpeed('%d' % vehicle_id, vehicle_speed['v_Vel']) 848 | if time == hangshu: 849 | # if time > hangshu: 850 | # traci.vehicle.remove('%d' % vehicle_id) 851 | traci.vehicle.setSpeed('%d' % vehicle_id, 33) 852 | # vehicle_speed = df_vehicle_id.iloc[hangshu-1] 853 | # traci.vehicle.setSpeed('%d' % vehicle_id, vehicle_speed['v_Vel']) 854 | # print(hangshu,time,vehicle_id) 855 | 856 | def read_acc(time=None,vehicle_id=None): 857 | df_vehicle_id = df[df['Vehicle_ID'] == vehicle_id] 858 | hangshu = df_vehicle_id.shape[0] 859 | if (time + 1) <= hangshu: 860 | # if time <= hangshu: 861 | vehicle_speed = df_vehicle_id.iloc[time] # 读取第time+1行 862 | vehicle_acc = vehicle_speed['v_Acc'] 863 | if time >= hangshu: 864 | # if time > hangshu: 865 | vehicle_acc = max(min(33-traci.vehicle.getSpeed('%d' % vehicle_id),3),-3) 866 | return vehicle_acc 867 | # 868 | # 869 | def set_trajectory(vehicle_id=None): 870 | # df_vehicle_id = df[df['Vehicle_ID'].isin(vehicle_id)] 871 | df_vehicle_id = df[df['Vehicle_ID'] == vehicle_id] 872 | df_vehicle_id = df_vehicle_id.iloc[0] 873 | vehicle_y_distance = 1.6-(abs(df_vehicle_id['Lane_ID']-2))*3.2 874 | vehicle_x_distance = (df_vehicle_id['Local_Y']-min_y) + last_y[0] 875 | traci.vehicle.moveToXY('%d' % vehicle_id,"gneE0", abs(df_vehicle_id['Lane_ID']-5), vehicle_x_distance+5, int(vehicle_y_distance),keepRoute=1) 876 | 877 | def set_lane(time=None,vehicle_id=None): 878 | 879 | df_vehicle_id = df[df['Vehicle_ID'] == vehicle_id] 880 | hangshu2 = df_vehicle_id.shape[0] 881 | if (time + 1) <= hangshu2: 882 | # if time <= hangshu2: 883 | df_vehicle_id = df_vehicle_id.iloc[time] 884 | vehicle_lane = abs((df_vehicle_id['Lane_ID'])-5) 885 | vehicle_lane_before = traci.vehicle.getLaneIndex('%d' % vehicle_id) 886 | if vehicle_lane!=vehicle_lane_before: 887 | traci.vehicle.changeLane('%d' % vehicle_id,vehicle_lane,1) 888 | # print(vehicle_id) 889 | if time == hangshu2: 890 | # if time > hangshu2: 891 | # # traci.vehicle.remove('%d' % vehicle_id) 892 | # 893 | traci.vehicle.setSpeed('%d' % vehicle_id, 33) 894 | # print(hangshu2,time,vehicle_id) 895 | MAX_EPISODES = 2 896 | MAX_EP_STEPS = 30 897 | 898 | for sceen in range(15): 899 | # ego_id = 213 900 | sceen_id = sceen +1 901 | # df = pd.read_csv('./data/sumo-'+'%d'% sceen_id + 'us-101-'+'%d'% sceen_id + '.csv',usecols=["Vehicle_ID","Global_Time","Global_Y","Local_Y","v_Class","Location","v_Vel","Lane_ID","Preceding","Following","Space_Headway","Time_Headway","Location"]) 902 | df = pd.read_csv('D:\\Project_codes of pycharm\\2021.8.12\\data\\'+'sumo-'+'%d'%sceen_id+'\\'+'us-101-'+'%d'% sceen_id + '.csv', 903 | usecols=["Vehicle_ID", "Global_Time", "Global_Y", "Local_Y", "v_Class", "Location", "v_Vel", 904 | "Lane_ID", "Preceding", "Following", "Space_Headway", "Time_Headway", "Location"]) 905 | vehicle_list = df['Vehicle_ID'].unique() #返回一个无重复元素(车辆ID)的列表 906 | min_y = df['Local_Y'].min() 907 | for vehicle in vehicle_list: 908 | y1 = df[df['Vehicle_ID'] == vehicle] 909 | mean_speed = sum(y1["v_Vel"]) / y1.shape[0] 910 | # print(vehicle,mean_speed) 911 | y2 = y1.iloc[0] 912 | y3 = y2['Local_Y'] 913 | if y3 == min_y: 914 | last_vehicle = vehicle 915 | 916 | for ego_id in output_useful_id(vehicle_list): 917 | # if ego_id in wrong_collision_list: 918 | # break 919 | y1 = df[df['Vehicle_ID'] == ego_id] 920 | ego_mean_speed = sum(y1["v_Vel"]) / y1.shape[0] 921 | start_IDM_follow = 0 922 | 923 | for i in range(MAX_EPISODES): 924 | ep_reward = 0 925 | ep_reward_all = 0 926 | ep_step = 0 927 | car_15_speed = [] 928 | v = [] 929 | all_r = [] 930 | rear_v_list = [] 931 | l_v_list = [] 932 | all_changelanetimes = [] 933 | all_changelanetimes.append(0) 934 | lane_index0_position_x = [] 935 | lane_index0_id_list = [] 936 | lane_index1_position_x = [] 937 | lane_index1_id_list = [] 938 | lane_index2_position_x = [] 939 | lane_index2_id_list = [] 940 | lane_index0_front = [] 941 | lane_index1_front = [] 942 | lane_index2_front = [] 943 | # r_potential = [] 944 | danger_lc = [] 945 | all_cf_r = [] 946 | cf_collision_list = [] 947 | lane_index0_acc_list=[] 948 | lane_index1_acc_list=[] 949 | lane_index2_acc_list=[] 950 | ep_step_list=[] 951 | distance_headway_list = [] 952 | dis_safe_list = [] 953 | total_r = 0 954 | r0 = 0 # 换道惩罚初始化 955 | t = 0 956 | k = 1 957 | s = np.zeros(s_dim) 958 | s_ = np.zeros(s_dim) 959 | DQN_s = np.zeros(DQN_s_dim) 960 | DQN_s_ = np.zeros(DQN_s_dim) 961 | 962 | DDPG_s = np.zeros(4) 963 | DDPG_s_ = np.zeros(4) 964 | r_danger_lc = 0 965 | r_high_frequency_lc = 1 966 | potential_reward_2_list = [] 967 | horizontal_position_list = [] 968 | lengthwise_position_list = [] 969 | order_ego_id_list = [] 970 | 971 | # traci.load(["-c", "./data//car-lc.sumocfg"]) 972 | traci.load(["-c", 'D:\\Project_codes of pycharm\\2021.8.12\data\\'+'sumo-'+'%d'%sceen_id+'\\'+'car-lc.sumocfg']) 973 | # traci.simulationStep(33) 974 | for step in range(39): 975 | ID_list_all = traci.edge.getLastStepVehicleIDs("gneE0") 976 | for vehicle in vehicle_list: 977 | if vehicle in ID_list_all: 978 | traci.vehicle.setSpeedMode('%d' % vehicle, 12) 979 | traci.vehicle.setLaneChangeMode('%d' % vehicle, 512) # 换道机制设定 980 | # traci.vehicle.setSpeed('%d' % vehicle, 15) # 初始速度设定 981 | set_speed(time=1, vehicle_id=int(vehicle)) 982 | traci.simulationStep(1+step) 983 | # print(traci.vehicle.getSpeed('%d'%4)) 984 | for vehicle in vehicle_list: 985 | # traci.vehicle.setSpeedMode('%d' % vehicle, 12) 986 | # traci.vehicle.setLaneChangeMode('%d' % vehicle, 512) # 换道机制设定 987 | # # traci.vehicle.setSpeed('%d' % vehicle, 15) # 初始速度设定 988 | if vehicle == last_vehicle: 989 | last_y = traci.vehicle.getPosition('%d' % vehicle) 990 | for vehicle in vehicle_list: 991 | # traci.vehicle.setSpeed('%d' % vehicle, 10) # 初始速度设定 992 | # traci.vehicle.setSpeedMode('%d' % vehicle, 12) 993 | # traci.vehicle.setLaneChangeMode('%d' % vehicle, 512) # 换道机制设定 994 | set_speed(time=1, vehicle_id=int(vehicle)) 995 | set_trajectory(vehicle_id=int(vehicle)) 996 | # traci.vehicle.setSpeedMode('%d' % ego_id, 12) 997 | traci.simulationStep(39) 998 | # for vehicle in vehicle_list: 999 | # set_speed(time=1,vehicle_id=int(vehicle)) 1000 | # traci.simulationStep(40) 1001 | # print(vehicle_list) 1002 | # print(traci.vehicle.getSpeed('%d'%4)) 1003 | for j in range(MAX_EP_STEPS): 1004 | 1005 | all_car_position_x = [] 1006 | all_car_position_y = [] 1007 | lane_index0_position_x = [] 1008 | lane_index0_id_list = [] 1009 | lane_index1_position_x = [] 1010 | lane_index1_id_list = [] 1011 | lane_index2_position_x = [] 1012 | lane_index2_id_list = [] 1013 | lane_index0_front = [] 1014 | lane_index1_front = [] 1015 | lane_index2_front = [] 1016 | Distance_list = [] 1017 | all_speed_list = [] 1018 | cf_s = np.zeros(4) 1019 | 1020 | changelane = 0 1021 | m = j 1022 | r0 = 0 1023 | ####这部分的代码作用是获取路网所有车辆,并按照行驶的位置进行前后排序 1024 | ID_list_all = traci.edge.getLastStepVehicleIDs("gneE0") # 获取主路所有车辆的ID编号 1025 | for x in range(len(ID_list_all)): # 长度 1026 | Distance_list.append(traci.vehicle.getDistance(ID_list_all[x])) # 元素跑的里程 1027 | Index = sorted(range(len(Distance_list)), key=lambda k: Distance_list[k], reverse=True) # 距离表降序排列的索引 1028 | Index = np.array(Index) 1029 | ID_list_all = list(map(int, ID_list_all)) # 字符串转化为数值型 1030 | ID_list_order = np.array(ID_list_all)[Index] # 按照前后顺序进行车序排列 1031 | 1032 | ###排序完成之后对主车进行判断(是否主车已经超越所有的车辆成为第一辆车)和操作 1033 | # if ego_id == ID_list_order[0]: # 如果主车成为了头车 1034 | # ego_id = ID_list_order[-1] # 主车的控制对象变成了最后一辆车 1035 | for vehicle in (vehicle_list): 1036 | if vehicle != ego_id: 1037 | 1038 | set_speed(time=j, vehicle_id=int(vehicle)) 1039 | set_lane(time=j, vehicle_id=int(vehicle)) 1040 | if start_IDM_follow ==1: 1041 | if traci.vehicle.getLaneIndex('%d' % vehicle) != traci.vehicle.getLaneIndex('%d' % ego_id): 1042 | set_speed(time=j, vehicle_id=int(vehicle)) 1043 | set_lane(time=j, vehicle_id=int(vehicle)) 1044 | # if traci.vehicle.getPosition('%d' % vehicle)[0] - traci.vehicle.getPosition('%d' % ego_id)[ 1045 | # 0] >= 5: 1046 | # traci.vehicle.setSpeedMode('%d' % vehicle, 12) 1047 | # set_speed(time=j, vehicle_id=int(vehicle)) 1048 | # set_lane(time=j, vehicle_id=int(vehicle)) 1049 | if 0 150: 1190 | s[2] = 33 1191 | s[3] = 0.01 1192 | s[4] = 150 1193 | else: # 如果前车不存在 1194 | or_gap = 150 # 前车距离150 1195 | l_v = 33 # 前车车速30 1196 | l_acc = 0.01 1197 | s[3] = l_acc 1198 | s[4] = or_gap # 前车车距赋值给换道状态3 1199 | s[2] = l_v 1200 | # print(s[6], s[9], s[12], s[3],ego_id,j) 1201 | # print(s[7], s[10], s[13],s[4], or_gap_1[-1]) 1202 | # 此部分用于计算奖励时间 1203 | for vehicle in vehicle_list: 1204 | # if p != ego_id: 1205 | # all_speed_list.append(traci.vehicle.getSpeed('%d' % p)) 1206 | all_speed_list.append(traci.vehicle.getSpeed('%d' % vehicle)) 1207 | # print(all_speed_list) 1208 | sorted(all_speed_list, reverse=False) ##列表升序 1209 | 1210 | alist = numpy.array(all_speed_list) 1211 | # print(alist) 1212 | q1 = numpy.percentile(alist, 25) 1213 | q2 = numpy.percentile(alist, 50) 1214 | q3 = numpy.percentile(alist, 75) 1215 | iqr = q3 - q1 1216 | q_low = q1 - (1.5 * iqr) 1217 | q_85 = numpy.percentile(alist, 85) # 85位车速 1218 | 1219 | t0 = (q_85 - s[0]) / 3 # 计算加速到85位车速时间 1220 | v_max_limit = 33 1221 | potential_reward_max = 33 1222 | # if t0 > 0 and s[2]>q_85: #如果主车车速小于85位车速且前车大于85位车速 1223 | t1 = (v_max_limit - s[0]) / 3 # 加速到最高限速时间 1224 | s1 = (s[0] + v_max_limit) * t1 * 0.5 # 加速距离 1225 | ##############车道0的优势函数计算 1226 | ss1 = s[7] + t1 * s[5] - s1 # 加速阶段结束车间距 1227 | if s[0] <= s[5]: # 主车车速小于前车 1228 | if ss1 >= 0: # 加速阶段车距大于等于0 1229 | t3 = (v_max_limit - s[5]) / 3 # 计算减速时间 1230 | if t3 > 0: # 减速时间大于0 1231 | s2 = (s[5] + v_max_limit) * t3 / 2 # 计算减速距离 1232 | ss2 = s[7] + (t1 + t3) * s[5] - s1 - s2 # 计算完整加减速过程后车距 1233 | if ss2 >= 0: # 如果车距大于等于0 1234 | t2 = ss2 / (v_max_limit - s[5] + 0.01) # 计算持续高速时间 1235 | reward_speed = ((t1 + t2 + t3) * s[5] + s[7]) / (t1 + t2 + t3) 1236 | potential_reward = min(reward_speed, potential_reward_max) 1237 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1238 | t2_1 = round( 1239 | (((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1240 | t1_1 = (s[5] - s[0]) / 3 + t2_1 1241 | reward_speed = ((t1_1 + t2_1) * s[5] + s[7]) / (t1_1 + t2_1) 1242 | potential_reward = min(reward_speed, potential_reward_max) 1243 | # print(potential_reward,'line607') 1244 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1245 | potential_reward = potential_reward_max 1246 | # print(potential_reward, 'line610') 1247 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1248 | t2_1 = round((((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1249 | # t2_1 = ((s[7] / 3) + (((s[5] - s[0]) ** 2) / 18)) ** 0.5 1250 | t1_1 = (s[5] - s[0]) / 3 + t2_1 1251 | reward_speed = ((t1_1 + t2_1) * s[5] + s[7]) / (t1_1 + t2_1) 1252 | potential_reward = min(reward_speed, potential_reward_max) 1253 | if s[0] > s[5]: # 主车车速大于前车 1254 | t_brake = (s[0] - s[5]) / 3 # 计算紧急刹车时间 1255 | s_brake = (s[0] + s[5]) / 2 * t_brake # 紧急刹车距离 1256 | ss_brake = s[5] * t_brake + s[7] - s_brake # 刹车结束车距 1257 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1258 | if s[0] == v_max_limit: # 此时只能匀速再刹车 1259 | t_keep = ss_brake / (s[0] - s[5]) 1260 | reward_speed = ((t_brake + t_keep) * s[5] + s[7]) / (t_brake + t_keep) 1261 | potential_reward = min(reward_speed, potential_reward_max) 1262 | if s[0] < v_max_limit: 1263 | ss_last = s[5] * (t_brake + t1) + s[7] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1264 | if ss_last > 0: # 主车可以先加速再匀速再减速 1265 | t_keep = ss_last / (v_max_limit - s[5]) # 计算持续高速时间 1266 | reward_speed = ((t1 + t_keep + t_brake) * s[5] + s[7]) / (t1 + t_keep + t_brake) 1267 | potential_reward = min(reward_speed, potential_reward_max) 1268 | if ss_last <= 0: 1269 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1270 | reward_speed = ((t_brake + t_add) * s[5] + s[7]) / (t_brake + t_add) 1271 | potential_reward = min(reward_speed, potential_reward_max) 1272 | if ss_brake < 0: 1273 | potential_reward = 0 1274 | if ss_brake == 0: 1275 | reward_speed = s_brake / t_brake 1276 | potential_reward = min(reward_speed, potential_reward_max) 1277 | potential_reward_0 = potential_reward 1278 | #########车道1的优势函数计算 1279 | ss1 = s[10] + t1 * s[8] - s1 # 加速阶段结束车间距 1280 | if s[0] <= s[8]: # 主车车速小于前车 1281 | if ss1 >= 0: # 加速阶段车距大于等于0 1282 | t3 = (v_max_limit - s[8]) / 3 # 计算减速时间 1283 | if t3 > 0: # 减速时间大于0 1284 | s2 = (s[8] + v_max_limit) * t3 / 2 # 计算减速距离 1285 | ss2 = s[10] + (t1 + t3) * s[8] - s1 - s2 # 计算完整加减速过程后车距 1286 | if ss2 >= 0: # 如果车距大于等于0 1287 | t2 = ss2 / (v_max_limit - s[8] + 0.01) # 计算持续高速时间 1288 | reward_speed = ((t1 + t2 + t3) * s[8] + s[10]) / (t1 + t2 + t3) 1289 | potential_reward = min(reward_speed, potential_reward_max) 1290 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1291 | t2_1 = round( 1292 | (((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1293 | # t2_1 = round(((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 1294 | t1_1 = (s[8] - s[0]) / 3 + t2_1 1295 | reward_speed = ((t1_1 + t2_1) * s[8] + s[10]) / (t1_1 + t2_1) 1296 | potential_reward = min(reward_speed, potential_reward_max) 1297 | # print(potential_reward,'line607') 1298 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1299 | potential_reward = potential_reward_max 1300 | # print(potential_reward, 'line610') 1301 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1302 | t2_1 = round((((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1303 | # t2_1 = ((s[10] / 3) + (((s[8] - s[0]) ** 2) / 18)) ** 0.5 1304 | t1_1 = (s[8] - s[0]) / 3 + t2_1 1305 | reward_speed = ((t1_1 + t2_1) * s[8] + s[10]) / (t1_1 + t2_1) 1306 | potential_reward = min(reward_speed, potential_reward_max) 1307 | if s[0] > s[8]: # 主车车速大于前车 1308 | t_brake = (s[0] - s[8]) / 3 # 计算紧急刹车时间 1309 | s_brake = (s[0] + s[8]) / 2 * t_brake # 紧急刹车距离 1310 | ss_brake = s[8] * t_brake + s[10] - s_brake # 刹车结束车距 1311 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1312 | if s[0] == v_max_limit: # 此时只能匀速再刹车 1313 | t_keep = ss_brake / (s[0] - s[8]) 1314 | reward_speed = ((t_brake + t_keep) * s[8] + s[10]) / (t_brake + t_keep) 1315 | potential_reward = min(reward_speed, potential_reward_max) 1316 | if s[0] < v_max_limit: 1317 | ss_last = s[8] * (t_brake + t1) + s[10] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1318 | if ss_last > 0: # 主车可以先加速再匀速再减速 1319 | t_keep = ss_last / (v_max_limit - s[8]) # 计算持续高速时间 1320 | reward_speed = ((t1 + t_keep + t_brake) * s[8] + s[10]) / (t1 + t_keep + t_brake) 1321 | potential_reward = min(reward_speed, potential_reward_max) 1322 | if ss_last <= 0: 1323 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1324 | reward_speed = ((t_brake + t_add) * s[8] + s[10]) / (t_brake + t_add) 1325 | potential_reward = min(reward_speed, potential_reward_max) 1326 | if ss_brake < 0: 1327 | potential_reward = 0 1328 | if ss_brake == 0: 1329 | reward_speed = s_brake / t_brake 1330 | potential_reward = min(reward_speed, potential_reward_max) 1331 | potential_reward_1 = potential_reward 1332 | ################## 1333 | 1334 | ######车道2优势函数计算 1335 | ss1 = s[13] + t1 * s[11] - s1 # 加速阶段结束车间距 1336 | if s[0] <= s[11]: # 主车车速小于前车 1337 | if ss1 >= 0: # 加速阶段车距大于等于0 1338 | t3 = (v_max_limit - s[11]) / 3 # 计算减速时间 1339 | if t3 > 0: # 减速时间大于0 1340 | s2 = (s[11] + v_max_limit) * t3 / 2 # 计算减速距离 1341 | ss2 = s[13] + (t1 + t3) * s[11] - s1 - s2 # 计算完整加减速过程后车距 1342 | if ss2 >= 0: # 如果车距大于等于0 1343 | t2 = ss2 / (v_max_limit - s[11] + 0.01) # 计算持续高速时间 1344 | reward_speed = ((t1 + t2 + t3) * s[11] + s[13]) / (t1 + t2 + t3) 1345 | potential_reward = min(reward_speed, potential_reward_max) 1346 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1347 | t2_1 = round( 1348 | (((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1349 | # t2_1 = round(((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 1350 | t1_1 = (s[11] - s[0]) / 3 + t2_1 1351 | reward_speed = ((t1_1 + t2_1) * s[11] + s[13]) / (t1_1 + t2_1) 1352 | potential_reward = min(reward_speed, potential_reward_max) 1353 | # print(potential_reward,'line607') 1354 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1355 | potential_reward = potential_reward_max 1356 | # print(potential_reward, 'line610') 1357 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1358 | t2_1 = round((((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 1359 | 2) 1360 | # t2_1 = ((s[13] / 3) + (((s[11] - s[0]) ** 2) / 18)) ** 0.5 1361 | t1_1 = (s[13] - s[0]) / 3 + t2_1 1362 | reward_speed = ((t1_1 + t2_1) * s[11] + s[13]) / (t1_1 + t2_1) 1363 | potential_reward = min(reward_speed, potential_reward_max) 1364 | if s[0] > s[11]: # 主车车速大于前车 1365 | t_brake = (s[0] - s[13]) / 3 # 计算紧急刹车时间 1366 | s_brake = (s[0] + s[13]) / 2 * t_brake # 紧急刹车距离 1367 | ss_brake = s[11] * t_brake + s[13] - s_brake # 刹车结束车距 1368 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1369 | if s[0] == v_max_limit: # 此时只能匀速再刹车 1370 | t_keep = ss_brake / (s[0] - s[11]) 1371 | reward_speed = ((t_brake + t_keep) * s[11] + s[13]) / (t_brake + t_keep) 1372 | potential_reward = min(reward_speed, potential_reward_max) 1373 | if s[0] < v_max_limit: 1374 | ss_last = s[11] * (t_brake + t1) + s[13] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1375 | if ss_last > 0: # 主车可以先加速再匀速再减速 1376 | t_keep = ss_last / (v_max_limit - s[11]) # 计算持续高速时间 1377 | reward_speed = ((t1 + t_keep + t_brake) * s[11] + s[13]) / (t1 + t_keep + t_brake) 1378 | potential_reward = min(reward_speed, potential_reward_max) 1379 | if ss_last <= 0: 1380 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1381 | reward_speed = ((t_brake + t_add) * s[11] + s[13]) / (t_brake + t_add) 1382 | potential_reward = min(reward_speed, potential_reward_max) 1383 | if ss_brake < 0: 1384 | potential_reward = 0 1385 | if ss_brake == 0: 1386 | reward_speed = s_brake / t_brake 1387 | potential_reward = min(reward_speed, potential_reward_max) 1388 | potential_reward_2 = potential_reward 1389 | # potential_reward_2_list.append(potential_reward_2) 1390 | #########下部分代码为换道状态赋值 1391 | DQN_s[0] = s[0] 1392 | DQN_s[1] = traci.vehicle.getAcceleration('%d' % ego_id) 1393 | DQN_s[2] = s[1] 1394 | DQN_s[3] = s[5] 1395 | DQN_s[4] = s[6] 1396 | DQN_s[5] = s[7] 1397 | DQN_s[6] = s[8] 1398 | DQN_s[7] = s[9] 1399 | DQN_s[8] = s[10] 1400 | DQN_s[9] = s[11] 1401 | DQN_s[10] = s[12] 1402 | DQN_s[11] = s[13] 1403 | if lane_index == 0: 1404 | DQN_s[12] = potential_reward_0 * 1.1 + 0.001 1405 | DQN_s[13] = potential_reward_1 * s[14] 1406 | DQN_s[14] = 0 1407 | 1408 | if lane_index == 1: 1409 | DQN_s[12] = potential_reward_0 * s[15] 1410 | DQN_s[13] = potential_reward_1 * 1.1 + 0.001 1411 | DQN_s[14] = potential_reward_2 * s[14] 1412 | 1413 | if lane_index == 2: 1414 | DQN_s[12] = 0 1415 | DQN_s[13] = potential_reward_1 * s[15] 1416 | DQN_s[14] = potential_reward_2 * 1.1 + 0.001 1417 | 1418 | r0 = 0 # 换道惩罚初始化 1419 | changelane = 0 # 换道状态初始化 1420 | r_danger = 0 1421 | r_potential = 0 1422 | 1423 | ############对于车道0来说 1424 | cf_s[0] = DQN_s[0] 1425 | cf_s[1] = DQN_s[3] 1426 | cf_s[2] = DQN_s[5] 1427 | cf_s[3] = DQN_s[4] 1428 | 1429 | a_lane0 = actor.choose_action(cf_s) 1430 | 1431 | a_lane0 = np.clip(np.random.normal(a_lane0, var), *ACTION_BOUND) 1432 | ############对于车道1来说 1433 | cf_s[0] = DQN_s[0] 1434 | cf_s[1] = DQN_s[6] 1435 | cf_s[2] = DQN_s[8] 1436 | cf_s[3] = DQN_s[7] 1437 | 1438 | a_lane1 = actor.choose_action(cf_s) 1439 | 1440 | a_lane1 = np.clip(np.random.normal(a_lane1, var), *ACTION_BOUND) 1441 | ############对于车道2来说 1442 | cf_s[0] = DQN_s[0] 1443 | cf_s[1] = DQN_s[9] 1444 | cf_s[2] = DQN_s[11] 1445 | cf_s[3] = DQN_s[10] 1446 | 1447 | a_lane2 = actor.choose_action(cf_s) 1448 | 1449 | a_lane2 = np.clip(np.random.normal(a_lane2, var), *ACTION_BOUND) 1450 | # if i >= stable_episodes: 1451 | # DQN_s[1] = max(a_lane0,a_lane1,a_lane2) 1452 | # DQN_s[15] = all_changelanetimes[-1] 1453 | a = RL.choose_action(DQN_s) 1454 | dangerious_lc = 0 1455 | if a == 0: ##去车道0 1456 | # if i >= stable_episodes: 1457 | DDPG_action = a_lane0 1458 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane0),v_max_limit)) 1459 | # traci.vehicle.setSpeed('%d' % ego_id,-1) 1460 | if DQN_s[12] == 0: 1461 | r0 = 0 1462 | r_danger = -100 1463 | elif lane_index == 0: # 证明主车就在车道0 1464 | r_potential = DQN_s[12] - DQN_s[13] 1465 | elif lane_index == 1: # 主车由车道1换道0 1466 | r_potential = DQN_s[12] - DQN_s[13] 1467 | r0 = -1 1468 | traci.vehicle.changeLane('%d' % ego_id, lane_index - 1, 1) # 右换道 1469 | changelane = 1 1470 | if a == 1: ###去车道1 1471 | # if i >= stable_episodes: 1472 | DDPG_action = a_lane1 1473 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane1),v_max_limit)) 1474 | # traci.vehicle.setSpeed('%d' % ego_id, -1) 1475 | if DQN_s[13] == 0: 1476 | r0 = 0 1477 | r_danger = -100 1478 | elif lane_index == 0: # 证明主车在车道0 1479 | r_potential = DQN_s[13] - DQN_s[12] 1480 | r0 = -1 1481 | traci.vehicle.changeLane('%d' % ego_id, lane_index + 1, 1) # 左换道 1482 | changelane = 1 1483 | elif lane_index == 1: # 主车由车道1保持 1484 | r_potential = min(DQN_s[13] - DQN_s[12], DQN_s[13] - DQN_s[14]) 1485 | elif lane_index == 2: # 主车由车道2 1486 | r_potential = DQN_s[13] - DQN_s[14] 1487 | r0 = -1 1488 | traci.vehicle.changeLane('%d' % ego_id, lane_index - 1, 1) # 右换道 1489 | changelane = 1 1490 | if a == 2: ###去车道2 1491 | # if i >= stable_episodes: 1492 | DDPG_action = a_lane2 1493 | traci.vehicle.setSpeed('%d' % ego_id, min((DQN_s[0] + 3 * a_lane2),v_max_limit)) 1494 | # traci.vehicle.setSpeed('%d' % ego_id, -1) 1495 | if DQN_s[14] == 0: 1496 | r0 = 0 1497 | r_danger = -100 1498 | elif lane_index == 2: # 证明主车就在车道2 1499 | r_potential = DQN_s[14] - DQN_s[13] 1500 | elif lane_index == 1: # 主车由车道1换道2 1501 | r_potential = DQN_s[14] - DQN_s[13] 1502 | r0 = -1 1503 | traci.vehicle.changeLane('%d' % ego_id, lane_index + 1, 1) # 左换道 1504 | changelane = 1 1505 | if r_danger == -100: 1506 | dangerious_lc = 1 1507 | # if dangerious_lc==1 and i>=MAX_EPISODES-200: 1508 | # if dangerious_lc == 1 : 1509 | # break 1510 | traci.simulationStep(j + 40) 1511 | 1512 | s_ = s 1513 | # s_[0] = update_speed 1514 | s_[0] = traci.vehicle.getSpeed('%d' % ego_id) # 主车车速更新 1515 | # print(s_[0]) 1516 | s_[0] = max(s_[0], 1) 1517 | s_[1] = traci.vehicle.getLaneIndex('%d' % ego_id) 1518 | ID_list_all = [] 1519 | ID_list_order = [] 1520 | Distance_list = [] 1521 | or_gap_1 = [] 1522 | lane_index0_position_x = [] 1523 | lane_index0_id_list = [] 1524 | lane_index1_position_x = [] 1525 | lane_index1_id_list = [] 1526 | lane_index2_position_x = [] 1527 | lane_index2_id_list = [] 1528 | lane_index0_front = [] 1529 | lane_index1_front = [] 1530 | lane_index2_front = [] 1531 | ID_list_all = traci.edge.getLastStepVehicleIDs("gneE0") # 获取主路所有车辆的ID编号 1532 | for x in range(len(ID_list_all)): # 长度应该为36 1533 | Distance_list.append(traci.vehicle.getDistance(ID_list_all[x])) # 36个元素跑的里程 1534 | Index = sorted(range(len(Distance_list)), key=lambda k: Distance_list[k], reverse=True) # 距离表降序排列的索引 1535 | Index = np.array(Index) 1536 | ID_list_all = list(map(int, ID_list_all)) # 字符串转化为数值型 1537 | ID_list_order = np.array(ID_list_all)[Index] # 按照前后顺序进行车序排列 1538 | ID_list_order_list = ID_list_order.tolist() 1539 | order_ego_id = ID_list_order_list.index(ego_id) 1540 | for p in range(len(ID_list_order)): 1541 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -8: 1542 | lane_index0_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) 1543 | lane_index0_id_list.append(ID_list_order[p]) 1544 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -4.8: 1545 | lane_index1_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) 1546 | lane_index1_id_list.append(ID_list_order[p]) 1547 | if traci.vehicle.getPosition('%d' % ID_list_order[p])[1] == -1.6: 1548 | lane_index2_position_x.append(traci.vehicle.getPosition('%d' % ID_list_order[p])[0]) 1549 | lane_index2_id_list.append(ID_list_order[p]) 1550 | lane_index = traci.vehicle.getLaneIndex('%d' % ego_id) # 获得主车所在车道index 1551 | position = traci.vehicle.getPosition('%d' % ego_id) # 获得主车坐标 1552 | # s[16] = (8-abs(position[1]))/3.2 # 获得主车所在车道的状态 1553 | position = [max(position[0], 0), max(position[1], -8)] 1554 | for p in range(len(lane_index0_position_x)): 1555 | if lane_index0_position_x[p] <= position[0]+5: 1556 | break 1557 | lane_index0_front.append(lane_index0_id_list[p]) 1558 | for p in range(len(lane_index1_position_x)): 1559 | 1560 | if lane_index1_position_x[p] <= position[0]+5: 1561 | break 1562 | lane_index1_front.append(lane_index1_id_list[p]) 1563 | for p in range(len(lane_index2_position_x)): 1564 | 1565 | if lane_index2_position_x[p] <= position[0]+5: 1566 | break 1567 | lane_index2_front.append(lane_index2_id_list[p]) 1568 | s_[5] = 33 1569 | s_[6] = 0.01 1570 | s_[7] = 150 1571 | s_[8] = 33 1572 | s_[9] = 0.01 1573 | s_[10] = 150 1574 | s_[11] = 33 1575 | s_[12] = 0.01 1576 | s_[13] = 150 1577 | for w in range(len(ID_list_all)): 1578 | if lane_index0_front: 1579 | if ID_list_order[w] == int(lane_index0_front[-1]): 1580 | lane_index0_v = traci.vehicle.getSpeed('%d' % lane_index0_front[-1]) 1581 | if df[df['Vehicle_ID'] == int(lane_index0_front[-1])].shape[0] 150: 1644 | s_[2] = 33 1645 | s_[3] = 0.01 1646 | s_[4] = 150 1647 | else: # 如果前车不存在 1648 | or_gap = 150 # 前车距离150 1649 | l_v = 33 # 前车车速30 1650 | l_acc = 0.01 1651 | s_[3] = l_acc 1652 | s_[4] = or_gap # 前车车距赋值给换道状态3 1653 | s_[2] = l_v 1654 | s_[14] = traci.vehicle.couldChangeLane('%d' % ego_id, 1) # 左换道可行性 1655 | s_[15] = traci.vehicle.couldChangeLane('%d' % ego_id, -1) # 右换道可行性 1656 | # s_[16] = a[1] # 横向平均速度 1657 | ######状态更新部分 1658 | for vehicle in vehicle_list: 1659 | # if p != ego_id: 1660 | # all_speed_list.append(traci.vehicle.getSpeed('%d' % p)) 1661 | all_speed_list.append(traci.vehicle.getSpeed('%d' % vehicle)) 1662 | # print(all_speed_list) 1663 | sorted(all_speed_list, reverse=False) ##列表升序 1664 | 1665 | alist = numpy.array(all_speed_list) 1666 | # print(alist) 1667 | q1 = numpy.percentile(alist, 25) 1668 | q2 = numpy.percentile(alist, 50) 1669 | q3 = numpy.percentile(alist, 75) 1670 | iqr = q3 - q1 1671 | q_low = q1 - (1.5 * iqr) 1672 | q_85 = numpy.percentile(alist, 85) # 85位车速 1673 | 1674 | t0 = (q_85 - s_[0]) / 3 # 计算加速到85位车速时间 1675 | # if t0 > 0 and s_[2]>q_85: #如果主车车速小于85位车速且前车大于85位车速 1676 | t1 = (33 - s_[0]) / 3 1677 | s1 = (s_[0] + 33) * t1 * 0.5 1678 | ##############车道0的优势函数计算 1679 | ss1 = s_[7] + t1 * s_[5] - s1 # 加速阶段结束车间距 1680 | if s_[0] <= s_[5]: # 主车车速小于前车 1681 | if ss1 >= 0: # 加速阶段车距大于等于0 1682 | t3 = (v_max_limit - s[5]) / 3 # 计算减速时间 1683 | if t3 > 0: # 减速时间大于0 1684 | s2 = (s_[5] + v_max_limit) * t3 / 2 # 计算减速距离 1685 | ss2 = s_[7] + (t1 + t3) * s_[5] - s1 - s2 # 计算完整加减速过程后车距 1686 | if ss2 >= 0: # 如果车距大于等于0 1687 | t2 = ss2 / (v_max_limit - s_[5] + 0.01) # 计算持续高速时间 1688 | reward_speed = ((t1 + t2 + t3) * s_[5] + s_[7]) / (t1 + t2 + t3) 1689 | potential_reward = min(reward_speed, potential_reward_max) 1690 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1691 | # print(s_[0],s_[5],s_[7]) 1692 | t2_1 = (math.ceil(((math.ceil((s_[5] - s_[0]) + 0.01) ** 2) / 18 + (s_[7] / 3)))) ** 0.5 1693 | # t2_1 = round((((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5),2) 1694 | # t2_1 = round(((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 1695 | t1_1 = (s_[5] - s_[0]) / 3 + t2_1 1696 | reward_speed = ((t1_1 + t2_1) * s_[5] + s_[7]) / (t1_1 + t2_1) 1697 | potential_reward = min(reward_speed, potential_reward_max) 1698 | # print(potential_reward,'line607') 1699 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1700 | potential_reward = potential_reward_max 1701 | # print(potential_reward, 'line610') 1702 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1703 | # print(s_[0], s_[5], s_[7]) 1704 | t2_1 = (math.ceil(((math.ceil((s_[5] - s_[0]) + 0.01) ** 2) / 18 + (s_[7] / 3)))) ** 0.5 1705 | # t2_1 = round((((s[7] / 3) + round(((round((s[5] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5),2) 1706 | # t2_1 = ((s[7] / 3) + (((s[5] - s[0]+0.01) ** 2) / 18)) ** 0.5 1707 | t1_1 = (s_[5] - s_[0]) / 3 + t2_1 1708 | reward_speed = ((t1_1 + t2_1) * s_[5] + s_[7]) / (t1_1 + t2_1) 1709 | potential_reward = min(reward_speed, potential_reward_max) 1710 | if s_[0] > s_[5]: # 主车车速大于前车 1711 | t_brake = (s_[0] - s_[5]) / 3 # 计算紧急刹车时间 1712 | s_brake = (s_[0] + s_[5]) / 2 * t_brake # 紧急刹车距离 1713 | ss_brake = s_[5] * t_brake + s_[7] - s_brake # 刹车结束车距 1714 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1715 | if s_[0] == v_max_limit: # 此时只能匀速再刹车 1716 | t_keep = ss_brake / (s_[0] - s_[5]) 1717 | reward_speed = ((t_brake + t_keep) * s_[5] + s_[7]) / (t_brake + t_keep) 1718 | potential_reward = min(reward_speed, potential_reward_max) 1719 | if s_[0] < v_max_limit: 1720 | ss_last = s_[5] * (t_brake + t1) + s_[7] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1721 | if ss_last > 0: # 主车可以先加速再匀速再减速 1722 | t_keep = ss_last / (v_max_limit - s_[5]) # 计算持续高速时间 1723 | reward_speed = ((t1 + t_keep + t_brake) * s_[5] + s_[7]) / (t1 + t_keep + t_brake) 1724 | potential_reward = min(reward_speed, potential_reward_max) 1725 | if ss_last <= 0: 1726 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1727 | reward_speed = ((t_brake + t_add) * s_[5] + s_[7]) / (t_brake + t_add) 1728 | potential_reward = min(reward_speed, potential_reward_max) 1729 | if ss_brake < 0: 1730 | potential_reward = 0 1731 | if ss_brake == 0: 1732 | reward_speed = s_brake / t_brake 1733 | potential_reward = min(reward_speed, potential_reward_max) 1734 | potential_reward_0 = potential_reward 1735 | #########车道1的优势函数计算 1736 | ss1 = s_[10] + t1 * s_[8] - s1 # 加速阶段结束车间距 1737 | if s_[0] <= s_[8]: # 主车车速小于前车 1738 | if ss1 >= 0: # 加速阶段车距大于等于0 1739 | t3 = (v_max_limit - s_[8]) / 3 # 计算减速时间 1740 | if t3 > 0: # 减速时间大于0 1741 | s2 = (s_[8] + v_max_limit) * t3 / 2 # 计算减速距离 1742 | ss2 = s_[10] + (t1 + t3) * s_[8] - s1 - s2 # 计算完整加减速过程后车距 1743 | if ss2 >= 0: # 如果车距大于等于0 1744 | t2 = ss2 / (v_max_limit - s_[8] + 0.01) # 计算持续高速时间 1745 | reward_speed = ((t1 + t2 + t3) * s_[8] + s_[10]) / (t1 + t2 + t3) 1746 | potential_reward = min(reward_speed, potential_reward_max) 1747 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1748 | t2_1 = (math.ceil( 1749 | ((math.ceil((s_[8] - s_[0]) + 0.01) ** 2) / 18 + (s_[10] / 3)))) ** 0.5 1750 | # t2_1 = round((((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1751 | # t2_1 = round(((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18), 2) ** 0.5 1752 | t1_1 = (s_[8] - s_[0]) / 3 + t2_1 1753 | reward_speed = ((t1_1 + t2_1) * s_[8] + s_[10]) / (t1_1 + t2_1) 1754 | potential_reward = min(reward_speed, potential_reward_max) 1755 | # print(potential_reward,'line607') 1756 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1757 | potential_reward = potential_reward_max 1758 | # print(potential_reward, 'line610') 1759 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1760 | t2_1 = (math.ceil(((math.ceil((s_[8] - s_[0]) + 0.01) ** 2) / 18 + (s_[10] / 3)))) ** 0.5 1761 | # t2_1 = round((((s[10] / 3) + round(((round((s[8] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1762 | # t2_1 = ((s[10] / 3) + (((s[8] - s[0]+0.01) ** 2) / 18)) ** 0.5 1763 | t1_1 = (s_[8] - s_[0]) / 3 + t2_1 1764 | reward_speed = ((t1_1 + t2_1) * s_[8] + s_[10]) / (t1_1 + t2_1) 1765 | potential_reward = min(reward_speed, potential_reward_max) 1766 | if s_[0] > s_[8]: # 主车车速大于前车 1767 | t_brake = (s_[0] - s_[8]) / 3 # 计算紧急刹车时间 1768 | s_brake = (s_[0] + s_[8]) / 2 * t_brake # 紧急刹车距离 1769 | ss_brake = s_[8] * t_brake + s_[10] - s_brake # 刹车结束车距 1770 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1771 | if s_[0] == v_max_limit: # 此时只能匀速再刹车 1772 | t_keep = ss_brake / (s_[0] - s_[8]) 1773 | reward_speed = ((t_brake + t_keep) * s_[8] + s_[10]) / (t_brake + t_keep) 1774 | potential_reward = min(reward_speed, potential_reward_max) 1775 | if s_[0] < v_max_limit: 1776 | ss_last = s_[8] * (t_brake + t1) + s_[10] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1777 | if ss_last > 0: # 主车可以先加速再匀速再减速 1778 | t_keep = ss_last / (v_max_limit - s_[8]) # 计算持续高速时间 1779 | reward_speed = ((t1 + t_keep + t_brake) * s_[8] + s_[10]) / (t1 + t_keep + t_brake) 1780 | potential_reward = min(reward_speed, potential_reward_max) 1781 | if ss_last <= 0: 1782 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1783 | reward_speed = ((t_brake + t_add) * s_[8] + s_[10]) / (t_brake + t_add) 1784 | potential_reward = min(reward_speed, potential_reward_max) 1785 | if ss_brake < 0: 1786 | potential_reward = 0 1787 | if ss_brake == 0: 1788 | reward_speed = s_brake / t_brake 1789 | potential_reward = min(reward_speed, potential_reward_max) 1790 | potential_reward_1 = potential_reward 1791 | ################## 1792 | 1793 | ######车道2优势函数计算 1794 | ss1 = s_[13] + t1 * s_[11] - s1 # 加速阶段结束车间距 1795 | if s_[0] <= s_[11]: # 主车车速小于前车 1796 | if ss1 >= 0: # 加速阶段车距大于等于0 1797 | t3 = (v_max_limit - s_[11]) / 3 # 计算减速时间 1798 | if t3 > 0: # 减速时间大于0 1799 | s2 = (s_[11] + v_max_limit) * t3 / 2 # 计算减速距离 1800 | ss2 = s_[13] + (t1 + t3) * s_[11] - s1 - s2 # 计算完整加减速过程后车距 1801 | if ss2 >= 0: # 如果车距大于等于0 1802 | t2 = ss2 / (v_max_limit - s_[11] + 0.01) # 计算持续高速时间 1803 | reward_speed = ((t1 + t2 + t3) * s_[11] + s_[13]) / (t1 + t2 + t3) 1804 | potential_reward = min(reward_speed, potential_reward_max) 1805 | if ss2 < 0: # 如果车距小于0,证明没有完整的减速过程 1806 | t2_1 = (math.ceil( 1807 | ((math.ceil((s_[11] - s_[0]) + 0.01) ** 2) / 18 + (s_[13] / 3)))) ** 0.5 1808 | # t2_1 = round((((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1809 | # t2_1 = round(((s[13] / 3) + round(((round((s[11] - s[0]),2)+0.01) ** 2),2) / 18),2) ** 0.5 1810 | t1_1 = (s_[11] - s_[0]) / 3 + t2_1 1811 | reward_speed = ((t1_1 + t2_1) * s_[11] + s_[13]) / (t1_1 + t2_1) 1812 | potential_reward = min(reward_speed, potential_reward_max) 1813 | # print(potential_reward,'line607') 1814 | if t3 == 0: # 减速时间等于0,证明前车是最高车速,此时永远追不上 1815 | potential_reward = potential_reward_max 1816 | # print(potential_reward, 'line610') 1817 | if ss1 < 0: # 主车还没加速到最高速度就追上前车 1818 | t2_1 = (math.ceil(((math.ceil((s_[11] - s_[0]) + 0.01) ** 2) / 18 + (s_[13] / 3)))) ** 0.5 1819 | # t2_1 = round((((s[13] / 3) + round(((round((s[11] - s[0]), 2) + 0.01) ** 2), 2) / 18) ** 0.5), 2) 1820 | t1_1 = (s_[13] - s_[0]) / 3 + t2_1 1821 | reward_speed = ((t1_1 + t2_1) * s_[11] + s_[13]) / (t1_1 + t2_1) 1822 | potential_reward = min(reward_speed, potential_reward_max) 1823 | if s_[0] > s_[11]: # 主车车速大于前车 1824 | t_brake = (s_[0] - s_[13]) / 3 # 计算紧急刹车时间 1825 | s_brake = (s_[0] + s_[13]) / 2 * t_brake # 紧急刹车距离 1826 | ss_brake = s_[11] * t_brake + s_[13] - s_brake # 刹车结束车距 1827 | if ss_brake > 0: # 如果紧急刹车车距大于0,说明主车此刻不需要紧急刹车,可以先加速再紧急刹车或匀速再刹车 1828 | if s_[0] == v_max_limit: # 此时只能匀速再刹车 1829 | t_keep = ss_brake / (s_[0] - s_[11]) 1830 | reward_speed = ((t_brake + t_keep) * s_[11] + s_[13]) / (t_brake + t_keep) 1831 | potential_reward = min(reward_speed, potential_reward_max) 1832 | if s_[0] < v_max_limit: 1833 | ss_last = s_[11] * (t_brake + t1) + s_[13] - s1 - s_brake # 计算车间间距能否完成加速过程和刹车过程 1834 | if ss_last > 0: # 主车可以先加速再匀速再减速 1835 | t_keep = ss_last / (v_max_limit - s_[11]) # 计算持续高速时间 1836 | reward_speed = ((t1 + t_keep + t_brake) * s_[11] + s_[13]) / (t1 + t_keep + t_brake) 1837 | potential_reward = min(reward_speed, potential_reward_max) 1838 | if ss_last <= 0: 1839 | t_add = 2 * ((ss_brake / 3 + t_brake * t_brake) ** 0.5 - t_brake) 1840 | reward_speed = ((t_brake + t_add) * s_[11] + s_[13]) / (t_brake + t_add) 1841 | potential_reward = min(reward_speed, potential_reward_max) 1842 | if ss_brake < 0: 1843 | potential_reward = 0 1844 | if ss_brake == 0: 1845 | reward_speed = s_brake / t_brake 1846 | potential_reward = min(reward_speed, potential_reward_max) 1847 | potential_reward_2 = potential_reward 1848 | # potential_reward_2_list.append(potential_reward_2) 1849 | #########下部分代码为换道状态赋值 1850 | DQN_s_[0] = s_[0] 1851 | DQN_s_[1] = traci.vehicle.getAcceleration('%d' % ego_id) 1852 | DQN_s_[2] = s_[1] 1853 | DQN_s_[3] = s_[5] 1854 | DQN_s_[4] = s_[6] 1855 | DQN_s_[5] = s_[7] 1856 | DQN_s_[6] = s_[8] 1857 | DQN_s_[7] = s_[9] 1858 | DQN_s_[8] = s_[10] 1859 | DQN_s_[9] = s_[11] 1860 | DQN_s_[10] = s_[12] 1861 | DQN_s_[11] = s_[13] 1862 | if lane_index == 0: 1863 | DQN_s_[12] = potential_reward_0 * 1.1+0.001 1864 | DQN_s_[13] = potential_reward_1 * s_[14] 1865 | DQN_s_[14] = 0 1866 | 1867 | if lane_index == 1: 1868 | DQN_s_[12] = potential_reward_0 * s_[15] 1869 | DQN_s_[13] = potential_reward_1 * 1.1+0.001 1870 | DQN_s_[14] = potential_reward_2 * s_[14] 1871 | 1872 | if lane_index == 2: 1873 | DQN_s_[12] = 0 1874 | DQN_s_[13] = potential_reward_1 * s_[15] 1875 | DQN_s_[14] = potential_reward_2 * 1.1+0.001 1876 | 1877 | ####以下为奖励部分 1878 | 1879 | # r_efficient = math.log((s_[0] + 0.01), max((q_85, 1))) 1880 | # r_efficient = math.log((s_[0] + 0.01), 30) 1881 | # r = (r_potential+5*r_danger ) #可以使得安全换道 1882 | # if i >=stable_episodes: 1883 | 1884 | cf_danger = 0 1885 | collision = 0 1886 | distance_headway = s_[4] 1887 | if distance_headway > 0: 1888 | r_dis = 1 1889 | if distance_headway > 150: 1890 | r_dis = -3 1891 | if distance_headway <= 0: 1892 | r_dis = min(10 * distance_headway, -20) 1893 | collision = 1 1894 | cf_r_efficient = s_[0] / 25 1895 | # r = 3*r_efficient + r_dis 1896 | cf_r = 10 * cf_r_efficient + r_dis 1897 | DDPG_s[0] = s[0] 1898 | DDPG_s[1] = s[2] 1899 | DDPG_s[2] = s[4] 1900 | DDPG_s[3] = s[3] 1901 | DDPG_s_[0] = s_[0] 1902 | DDPG_s_[1] = s_[2] 1903 | DDPG_s_[2] = s_[4] 1904 | DDPG_s_[3] = s_[3] 1905 | all_cf_r.append(cf_r) 1906 | cf_collision_list.append(collision) 1907 | # all_changelanetimes.append(changelane) 1908 | # DQN_s_[15] = all_changelanetimes[-1] 1909 | r_efficient = s_[0] / 25 1910 | r = 5 * r_danger + 10*r_efficient + (-1)*changelane 1911 | # if i <500: 1912 | # r = r_danger + 10*r_potential 1913 | # if i >=500: 1914 | # r = r_danger + 10*r_efficient 1915 | # r = 5*r_danger + 10 * r_efficient - 2 1916 | # r = r_danger + 10 * r_efficient 1917 | # r = r_danger + r_efficient + 1 1918 | 1919 | # RL.store_transition(DQN_s, a, r, DQN_s_) 1920 | # if total_step > MEMORY_CAPACITY: 1921 | # RL.learn() 1922 | # M.store_transition(DDPG_s, DDPG_action, cf_r, DDPG_s_) 1923 | # if M.pointer > MEMORY_CAPACITY: 1924 | # var = max([var * 0.9995, VAR_MIN]) # decay the action randomness 1925 | # # var = var * 0.99995 1926 | # b_M = M.sample(BATCH_SIZE) 1927 | # b_s = b_M[:, :STATE_DIM] 1928 | # b_a = b_M[:, STATE_DIM: STATE_DIM + ACTION_DIM] 1929 | # b_r = b_M[:, -STATE_DIM - 1: -STATE_DIM] 1930 | # b_s_ = b_M[:, -STATE_DIM:] 1931 | # 1932 | # critic.learn(b_s, b_a, b_r, b_s_) 1933 | # actor.learn(b_s) 1934 | 1935 | s = s_ 1936 | DQN_s = DQN_s_ 1937 | 1938 | total_step += 1 1939 | ep_step += 1 1940 | all_r.append(r) 1941 | 1942 | rear_v_list.append(s_[0]) 1943 | l_v_list.append(s_[2]) 1944 | all_changelanetimes.append(changelane) 1945 | danger_lc.append(dangerious_lc) 1946 | lane_index0_acc_list.append(a_lane0) 1947 | lane_index1_acc_list.append(a_lane1) 1948 | lane_index2_acc_list.append(a_lane2) 1949 | # ep_step_list.append(j) 1950 | distance_headway_list.append(s_[4]) 1951 | horizontal_position_list.append(traci.vehicle.getPosition('%d' % ego_id)[1]) 1952 | lengthwise_position_list.append(traci.vehicle.getPosition('%d' % ego_id)[0]) 1953 | order_ego_id_list.append(order_ego_id) 1954 | # car_15_speed.append(traci.vehicle.getSpeed('15')) 1955 | mean_speed = sum(rear_v_list)/MAX_EP_STEPS 1956 | # car_15_mean_speed = sum(car_15_speed)/MAX_EP_STEPS 1957 | # print(car_15_mean_speed) 1958 | mean_speed_list.append(mean_speed) 1959 | total_ep_list.append(i) 1960 | total_lc_r_list.append(sum(all_r)) 1961 | 1962 | 1963 | 1964 | # print("car_following:", 'reward=%s' % sum(all_cf_r),'collision-times=%s' % sum(cf_collision_list),'explore=%s'% var,'mean_speed=%s' % mean_speed) 1965 | 1966 | # print('sceen=%s' % sceen_id, 'ego_id=%s' % ego_id, 'episode=%s' % i, 'reward=%s' % sum(all_r), 1967 | # 'lanechange-times=%s' % sum(all_changelanetimes), 1968 | # 'cf-dangerious-times=%s' % sum(cf_collision_list), 'mean_speed=%s' % mean_speed) 1969 | # 1970 | # if sum(cf_collision_list) == 0: 1971 | # 1972 | # test1 = pd.DataFrame({'ego_speed': rear_v_list, 'horizontal position_list': horizontal_position_list, 1973 | # 'lengthwise_position_list': lengthwise_position_list, 1974 | # 'order_ego_id_list': order_ego_id_list}) 1975 | # # test1.to_csv('./figure_result.csv') 1976 | # test1.to_csv('data\\data_sceen_new\\' +'%d' % sceen_id +'\\'+'%d' % sceen_id+'-ego_id' +'-'+'%d' % ego_id + '-' + '%d' % i + '.csv', mode='a', header=True) # 第一行用作列名称 1977 | if sum(cf_collision_list) != 0: 1978 | print('sceen=%s' % sceen_id, 'ego_id=%s' % ego_id,'cf-dangerious-times=%s' % sum(cf_collision_list),'Dangerious!!!Not save!!!') 1979 | start_IDM_follow = 1 1980 | # if sum(cf_collision_list) == 0 and mean_speed >= ego_mean_speed: 1981 | if sum(cf_collision_list) == 0: 1982 | print('sceen=%s' % sceen_id, 'ego_id=%s' % ego_id, 'ego_mean_speed=%s' % ego_mean_speed,'episode=%s' % i, 'reward=%s' % sum(all_r), 1983 | 'lanechange-times=%s' % sum(all_changelanetimes), 1984 | 'cf-dangerious-times=%s' % sum(cf_collision_list), 'mean_speed=%s' % mean_speed) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Double-layer-decision-making-model 2 | An Integrated Model for Autonomous Speed and Lane Change Decision-Making Based on Deep Reinforcement Learning 3 | 引用本文: 4 | Peng J, Zhang S, Zhou Y, et al. An Integrated Model for Autonomous Speed and Lane Change Decision-Making Based on Deep Reinforcement Learning[J]. IEEE Transactions on Intelligent Transportation Systems, 2022, 23(11): 21848-21860 5 | -------------------------------------------------------------------------------- /UDDS+US06_2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCII-Team/Double-layer-decision-making-model/HEAD/UDDS+US06_2.mat -------------------------------------------------------------------------------- /car-follow-5.24/car-lc-net.net.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /car-follow-5.24/car-lc-rou.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /car-follow-5.24/car-lc.settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /car-follow-5.24/car-lc.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /car-follow-5.24/collision-output.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /car-follow-5.24/lanechange-output.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /structure_car_follow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 5 | import shutil 6 | 7 | import sumolib 8 | import sys 9 | import traci 10 | import scipy.io as scio 11 | 12 | tf.set_random_seed(1) 13 | STATE_DIM = 4 14 | # all placeholder for tf 15 | with tf.name_scope('S'): 16 | S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s') 17 | with tf.name_scope('R'): 18 | R = tf.placeholder(tf.float32, [None, 1], name='r') 19 | with tf.name_scope('S_'): 20 | S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_') 21 | 22 | 23 | class Actor(object): 24 | def __init__(self, sess, action_dim, action_bound, learning_rate,last_learning_rate, t_replace_iter): 25 | self.sess = sess 26 | self.a_dim = action_dim 27 | self.action_bound = action_bound 28 | self.lr = learning_rate 29 | self.lr_min = last_learning_rate 30 | self.t_replace_iter = t_replace_iter 31 | self.t_replace_counter = 0 32 | 33 | 34 | with tf.variable_scope('Actor'): 35 | # input s, output a 36 | self.a = self._build_net(S, scope='eval_net', trainable=True) 37 | 38 | # input s_, output a, get a_ for critic 39 | self.a_ = self._build_net(S_, scope='target_net', trainable=False) 40 | 41 | self.e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval_net') 42 | self.t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target_net') 43 | 44 | def _build_net(self, s, scope, trainable): 45 | with tf.variable_scope(scope): 46 | init_w = tf.contrib.layers.xavier_initializer() 47 | init_b = tf.constant_initializer(0.001) 48 | net1 = tf.layers.dense(s, 100, activation=tf.nn.relu, 49 | kernel_initializer=init_w, bias_initializer=init_b, name='l1', 50 | trainable=trainable) 51 | net2 = tf.layers.dense(net1, 100, activation=tf.nn.relu, 52 | kernel_initializer=init_w, bias_initializer=init_b, name='l2', 53 | trainable=trainable) 54 | net = tf.layers.dense(net2, 100, activation=tf.nn.relu, 55 | kernel_initializer=init_w, bias_initializer=init_b, name='l3', 56 | trainable=trainable) 57 | with tf.variable_scope('a'): 58 | actions = tf.layers.dense(net, self.a_dim, activation=tf.nn.tanh, kernel_initializer=init_w, 59 | name='a', trainable=trainable) 60 | scaled_a = tf.multiply(actions, self.action_bound, 61 | name='scaled_a') # Scale output to -action_bound to action_bound 62 | return scaled_a 63 | 64 | def learn(self, s): # batch update 65 | self.sess.run(self.train_op, feed_dict={S: s}) 66 | if self.t_replace_counter % self.t_replace_iter == 0: 67 | # self.sess.run([tf.compat.v1.assign(t, e) for t, e in zip(self.t_params, self.e_params)]) 68 | self.sess.run([tf.assign(t, e) for t, e in zip(self.t_params, self.e_params)]) 69 | self.t_replace_counter += 1 70 | self.lr = self.lr * 0.999995 if self.lr > self.lr_min else self.lr_min 71 | 72 | def choose_action(self, s): 73 | s = s[np.newaxis, :] # single state 74 | return self.sess.run(self.a, feed_dict={S: s})[0] # single action 75 | 76 | def add_grad_to_graph(self, a_grads): 77 | with tf.variable_scope('policy_grads'): 78 | self.policy_grads = tf.gradients(ys=self.a, xs=self.e_params, grad_ys=a_grads) 79 | 80 | with tf.variable_scope('A_train'): 81 | opt = tf.train.RMSPropOptimizer(-self.lr) # (- learning rate) for ascent policy 82 | self.train_op = opt.apply_gradients(zip(self.policy_grads, self.e_params)) 83 | 84 | 85 | class Critic(object): 86 | def __init__(self, sess, state_dim, action_dim, learning_rate,last_learning_rate, gamma, t_replace_iter, a, a_): 87 | self.sess = sess 88 | self.s_dim = state_dim 89 | self.a_dim = action_dim 90 | self.lr = learning_rate 91 | self.lr_min =last_learning_rate 92 | self.gamma = gamma 93 | self.t_replace_iter = t_replace_iter 94 | self.t_replace_counter = 0 95 | self.loss_his = [] 96 | with tf.variable_scope('Critic'): 97 | # Input (s, a), output q 98 | self.a = a 99 | self.q = self._build_net(S, self.a, 'eval_net', trainable=True) 100 | 101 | # Input (s_, a_), output q_ for q_target 102 | self.q_ = self._build_net(S_, a_, 'target_net', 103 | trainable=False) # target_q is based on a_ from Actor's target_net 104 | 105 | self.e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval_net') 106 | self.t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target_net') 107 | 108 | with tf.variable_scope('target_q'): 109 | self.target_q = R + self.gamma * self.q_ 110 | 111 | with tf.variable_scope('TD_error'): 112 | self.loss = tf.reduce_mean(tf.squared_difference(self.target_q, self.q)) 113 | 114 | # self.loss_summary = tf.summry.scalar('loss', self.loss) 115 | 116 | with tf.variable_scope('C_train'): 117 | self.train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 118 | 119 | with tf.variable_scope('a_grad'): 120 | self.a_grads = tf.gradients(self.q, a)[0] # tensor of gradients of each sample (None, a_dim) 121 | 122 | def _build_net(self, s, a, scope, trainable): 123 | with tf.variable_scope(scope): 124 | init_w = tf.contrib.layers.xavier_initializer() 125 | init_b = tf.constant_initializer(0.01) 126 | 127 | with tf.variable_scope('l1'): 128 | n_l1 = 100 129 | w1_s = tf.get_variable('w1_s', [self.s_dim, n_l1], initializer=init_w, trainable=trainable) 130 | w1_a = tf.get_variable('w1_a', [self.a_dim, n_l1], initializer=init_w, trainable=trainable) 131 | b1 = tf.get_variable('b1', [1, n_l1], initializer=init_b, trainable=trainable) 132 | net = tf.nn.relu6(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1) 133 | 134 | net = tf.layers.dense(net, 100, activation=tf.nn.relu, 135 | kernel_initializer=init_w, bias_initializer=init_b, name='l2', 136 | trainable=trainable) 137 | net2 = tf.layers.dense(net, 100, activation=tf.nn.relu, 138 | kernel_initializer=init_w, bias_initializer=init_b, name='l3', 139 | trainable=trainable) 140 | with tf.variable_scope('q'): 141 | q = tf.layers.dense(net2, 1, kernel_initializer=init_w, bias_initializer=init_b, 142 | trainable=trainable) # Q(s,a) 143 | return q 144 | 145 | def learn(self, s, a, r, s_): 146 | self.sess.run(self.train_op, feed_dict={S: s, self.a: a, R: r, S_: s_}) 147 | if self.t_replace_counter % self.t_replace_iter == 0: 148 | # self.sess.run([tf.compat.v1.assign(t, e) for t, e in zip(self.t_params, self.e_params)]) 149 | self.sess.run([tf.assign(t, e) for t, e in zip(self.t_params, self.e_params)]) 150 | self.t_replace_counter += 1 151 | self.lr = self.lr*0.999995 if self.lr > self.lr_min else self.lr_min 152 | # self.loss_summary = tf.summary.scalar('loss', self.loss) 153 | # self.loss_his.append(self.loss_summary) 154 | 155 | 156 | class DDPG_Memory(object): 157 | def __init__(self, capacity, dims): 158 | self.capacity = capacity 159 | self.data = np.zeros((capacity, dims)) 160 | self.pointer = 0 161 | 162 | def store_transition(self, s, a, r, s_): 163 | transition = np.hstack((s, a, [r], s_)) 164 | index = self.pointer % self.capacity # replace the old memory with new memory 165 | self.data[index, :] = transition 166 | self.pointer += 1 167 | 168 | def sample(self, n): 169 | assert self.pointer >= self.capacity, 'Memory has not been fulfilled' 170 | indices = np.random.choice(self.capacity, size=n) 171 | return self.data[indices, :] 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /structure_lane_change.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 4 | import traci 5 | import sumolib 6 | import sys 7 | import scipy.io as scio 8 | import math 9 | import numpy as np 10 | 11 | # import tensorflow.compat.v1 as tf1 12 | import tensorflow as tf1 13 | import os 14 | import matplotlib.pyplot as plt 15 | import numpy 16 | 17 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 18 | # np.random.seed(1) 19 | # tf.set_random_seed(1) 20 | 21 | 22 | class SumTree(object): 23 | """ 24 | This SumTree code is modified version and the original code is from: 25 | https://github.com/jaara/AI-blog/blob/master/SumTree.py 26 | Story the data with it priority in tree and data frameworks. 27 | """ 28 | data_pointer = 0 29 | 30 | def __init__(self, capacity): 31 | self.capacity = capacity # for all priority values 32 | self.tree = np.zeros(2 * capacity - 1) 33 | # [--------------Parent nodes-------------][-------leaves to recode priority-------] 34 | # size: capacity - 1 size: capacity 35 | self.data = np.zeros(capacity, dtype=object) # for all transitions 36 | # [--------------data frame-------------] 37 | # size: capacity 38 | 39 | def add_new_priority(self, p, data): 40 | leaf_idx = self.data_pointer + self.capacity - 1 41 | 42 | self.data[self.data_pointer] = data # update data_frame 43 | self.update(leaf_idx, p) # update tree_frame 44 | self.data_pointer += 1 45 | if self.data_pointer >= self.capacity: # replace when exceed the capacity 46 | self.data_pointer = 0 47 | 48 | def update(self, tree_idx, p): 49 | change = p - self.tree[tree_idx] 50 | 51 | self.tree[tree_idx] = p 52 | self._propagate_change(tree_idx, change) 53 | 54 | def _propagate_change(self, tree_idx, change): 55 | """change the sum of priority value in all parent nodes""" 56 | parent_idx = (tree_idx - 1) // 2 57 | self.tree[parent_idx] += change 58 | if parent_idx != 0: 59 | self._propagate_change(parent_idx, change) 60 | 61 | def get_leaf(self, lower_bound): 62 | leaf_idx = self._retrieve(lower_bound) # search the max leaf priority based on the lower_bound 63 | data_idx = leaf_idx - self.capacity + 1 64 | return [leaf_idx, self.tree[leaf_idx], self.data[data_idx]] 65 | 66 | def _retrieve(self, lower_bound, parent_idx=0): 67 | """ 68 | Tree structure and array storage: 69 | Tree index: 70 | 0 -> storing priority sum 71 | / \ 72 | 1 2 73 | / \ / \ 74 | 3 4 5 6 -> storing priority for transitions 75 | Array type for storing: 76 | [0,1,2,3,4,5,6] 77 | """ 78 | left_child_idx = 2 * parent_idx + 1 79 | right_child_idx = left_child_idx + 1 80 | 81 | if left_child_idx >= len(self.tree): # end search when no more child 82 | return parent_idx 83 | 84 | if self.tree[left_child_idx] == self.tree[right_child_idx]: 85 | return self._retrieve(lower_bound, np.random.choice([left_child_idx, right_child_idx])) 86 | if lower_bound <= self.tree[left_child_idx]: # downward search, always search for a higher priority node 87 | return self._retrieve(lower_bound, left_child_idx) 88 | else: 89 | return self._retrieve(lower_bound - self.tree[left_child_idx], right_child_idx) 90 | 91 | @property 92 | def root_priority(self): 93 | return self.tree[0] # the root 94 | 95 | 96 | class Memory(object): # stored as ( s, a, r, s_ ) in SumTree 97 | """ 98 | This SumTree code is modified version and the original code is from: 99 | https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py 100 | """ 101 | epsilon = 0.001 # small amount to avoid zero priority 102 | alpha = 0.6 # [0~1] convert the importance of TD error to priority 103 | beta = 0.4 # importance-sampling, from initial value increasing to 1 104 | beta_increment_per_sampling = 1e-4 # annealing the bias 105 | abs_err_upper = 1 # for stability refer to paper 106 | 107 | def __init__(self, capacity): 108 | self.tree = SumTree(capacity) 109 | 110 | def store(self, error, transition): 111 | p = self._get_priority(error) 112 | self.tree.add_new_priority(p, transition) 113 | 114 | def sample(self, n): 115 | batch_idx, batch_memory, ISWeights = [], [], [] 116 | segment = self.tree.root_priority / n 117 | self.beta = np.min([1, self.beta + self.beta_increment_per_sampling]) # max = 1 118 | 119 | min_prob = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.root_priority 120 | maxiwi = np.power(self.tree.capacity * min_prob, -self.beta) # for later normalizing ISWeights 121 | for i in range(n): 122 | a = segment * i 123 | b = segment * (i + 1) 124 | lower_bound = np.random.uniform(a, b) 125 | idx, p, data = self.tree.get_leaf(lower_bound) 126 | prob = p / self.tree.root_priority 127 | ISWeights.append(self.tree.capacity * prob) 128 | batch_idx.append(idx) 129 | batch_memory.append(data) 130 | 131 | ISWeights = np.vstack(ISWeights) 132 | ISWeights = np.power(ISWeights, -self.beta) / maxiwi # normalize 133 | return batch_idx, np.vstack(batch_memory), ISWeights 134 | 135 | def update(self, idx, error): 136 | p = self._get_priority(error) 137 | self.tree.update(idx, p) 138 | 139 | def _get_priority(self, error): 140 | error += self.epsilon # avoid 0 141 | clipped_error = np.clip(error, 0, self.abs_err_upper) 142 | return np.power(clipped_error, self.alpha) 143 | 144 | 145 | class DuelingDQNPrioritizedReplay: 146 | np.random.seed(1) 147 | tf1.set_random_seed(1) 148 | 149 | def __init__( 150 | self, 151 | n_actions, 152 | n_features, 153 | learning_rate=0.00001, 154 | reward_decay=0.9, 155 | e_greedy=0.9, 156 | replace_target_iter=500, 157 | memory_size=10000, 158 | batch_size=32, 159 | e_greedy_increment=None, 160 | hidden=[400, 400, 400], 161 | lr_min = 0.00001, 162 | output_graph=False, 163 | sess=None, 164 | 165 | 166 | ): 167 | self.n_actions = n_actions 168 | self.n_features = n_features 169 | self.lr = learning_rate 170 | self.gamma = reward_decay 171 | self.epsilon_max = e_greedy 172 | self.replace_target_iter = replace_target_iter 173 | self.memory_size = memory_size 174 | self.batch_size = batch_size 175 | self.hidden = hidden 176 | self.epsilon_increment = e_greedy_increment 177 | self.epsilon = 0.5 if e_greedy_increment is not None else self.epsilon_max 178 | # self.lr_min = learning_rate_min 179 | self.lr_min = 0.000001 180 | self.learn_step_counter = 0 181 | self._build_net() 182 | self.memory = Memory(capacity=memory_size) 183 | self.sess = sess 184 | 185 | 186 | 187 | 188 | if sess is None: 189 | self.sess = tf1.Session() 190 | self.sess.run(tf1.global_variables_initializer()) 191 | else: 192 | self.sess = sess 193 | 194 | 195 | 196 | if output_graph: 197 | tf1.summary.FileWriter("logs/", self.sess.graph) 198 | 199 | self.cost_his = [] 200 | 201 | def _build_net(self): 202 | def build_layers(s, c_names, w_initializer, b_initializer): 203 | for i, h in enumerate(self.hidden): 204 | if i == 0: 205 | in_units, out_units, inputs = self.n_features, self.hidden[i], s 206 | else: 207 | in_units, out_units, inputs = self.hidden[i - 1], self.hidden[i], l 208 | with tf1.variable_scope('l%i' % i): 209 | w = tf1.get_variable('w', [in_units, out_units], initializer=w_initializer, collections=c_names) 210 | b = tf1.get_variable('b', [1, out_units], initializer=b_initializer, collections=c_names) 211 | # l = tf.nn.relu(tf.matmul(inputs, w) + b) 212 | l = tf1.nn.leaky_relu(tf1.matmul(inputs, w) + b) 213 | # l = tf.nn.RReLu(tf.matmul(inputs, w) + b) 214 | 215 | with tf1.variable_scope('Value'): 216 | w = tf1.get_variable('w', [self.hidden[-1], 1], initializer=w_initializer, collections=c_names) 217 | b = tf1.get_variable('b', [1, 1], initializer=b_initializer, collections=c_names) 218 | self.V = tf1.matmul(l, w) + b 219 | 220 | with tf1.variable_scope('Advantage'): 221 | w = tf1.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, 222 | collections=c_names) 223 | b = tf1.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names) 224 | self.A = tf1.matmul(l, w) + b 225 | 226 | with tf1.variable_scope('Q'): 227 | out = self.V + (self.A - tf1.reduce_mean(self.A, axis=1, keep_dims=True)) # Q = V(s) + A(s,a) 228 | 229 | # with tf.variable_scope('out'): 230 | # w = tf.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, collections=c_names) 231 | # b = tf.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names) 232 | # out = tf.matmul(l, w) + b 233 | return out 234 | 235 | # ------------------ build evaluate_net ------------------ 236 | self.s = tf1.placeholder(tf1.float32, [None, self.n_features], name='s') # input 237 | self.q_target = tf1.placeholder(tf1.float32, [None, self.n_actions], name='Q_target') # for calculating loss 238 | self.ISWeights = tf1.placeholder(tf1.float32, [None, 1], name='IS_weights') 239 | with tf1.variable_scope('eval_net'): 240 | c_names, w_initializer, b_initializer = \ 241 | ['eval_net_params', tf1.GraphKeys.GLOBAL_VARIABLES], \ 242 | tf1.random_normal_initializer(0., 0.01), tf1.constant_initializer(0.01) # config of layers 243 | 244 | self.q_eval = build_layers(self.s, c_names, w_initializer, b_initializer) 245 | 246 | with tf1.variable_scope('loss'): 247 | self.abs_errors = tf1.abs(tf1.reduce_sum(self.q_target - self.q_eval, axis=1)) # for updating Sumtree 248 | self.loss = tf1.reduce_mean(self.ISWeights * tf1.squared_difference(self.q_target, self.q_eval)) 249 | tf1.summary.scalar('loss', self.loss) 250 | 251 | with tf1.variable_scope('train'): 252 | self._train_op = tf1.train.AdamOptimizer(self.lr).minimize(self.loss) 253 | 254 | # ------------------ build target_net ------------------ 255 | self.s_ = tf1.placeholder(tf1.float32, [None, self.n_features], name='s_') # input 256 | with tf1.variable_scope('target_net'): 257 | c_names = ['target_net_params', tf1.GraphKeys.GLOBAL_VARIABLES] 258 | self.q_next = build_layers(self.s_, c_names, w_initializer, b_initializer) 259 | 260 | def store_transition(self, s, a, r, s_): 261 | transition = np.hstack((s, [a, r], s_)) 262 | max_p = np.max(self.memory.tree.tree[-self.memory.tree.capacity:]) 263 | self.memory.store(max_p, transition) 264 | 265 | # def choose_action(self, observation): 266 | # observation = observation[np.newaxis, :] 267 | # if np.random.uniform() < self.epsilon: 268 | # actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation}) 269 | # action = np.argmax(actions_value) 270 | # else: 271 | # action = np.random.randint(0, self.n_actions) 272 | # return action 273 | def choose_action(self, observation): 274 | observation = observation[np.newaxis, :] 275 | if np.random.uniform() < self.epsilon: 276 | actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation}) 277 | actions_value_min = min(actions_value[0]) 278 | if observation[0][12]==0: 279 | actions_value = [[actions_value_min-1,actions_value[0][1],actions_value[0][2]]] 280 | actions_value_min = min(actions_value[0]) 281 | if observation[0][13]==0: 282 | actions_value = [[actions_value[0][0],actions_value_min-1,actions_value[0][2]]] 283 | actions_value_min = min(actions_value[0]) 284 | if observation[0][14]==0: 285 | actions_value = [[actions_value[0][0],actions_value[0][1],actions_value_min-1]] 286 | action = np.argmax(actions_value) 287 | else: 288 | actions_list = [0,1,2] 289 | if observation[0][12]==0: 290 | actions_list.remove(0) 291 | if observation[0][13]==0: 292 | actions_list.remove(1) 293 | if observation[0][14]==0: 294 | actions_list.remove(2) 295 | action = np.random.choice(actions_list) 296 | return action 297 | 298 | def _replace_target_params(self): 299 | t_params = tf1.get_collection('target_net_params') 300 | e_params = tf1.get_collection('eval_net_params') 301 | self.sess.run([tf1.assign(t, e) for t, e in zip(t_params, e_params)]) 302 | 303 | def learn(self): 304 | if self.learn_step_counter % self.replace_target_iter == 0: 305 | self._replace_target_params() 306 | 307 | tree_idx, batch_memory, ISWeights = self.memory.sample(self.batch_size) 308 | 309 | # double DQN 310 | q_next, q_eval4next = self.sess.run( 311 | [self.q_next, self.q_eval], 312 | feed_dict={self.s_: batch_memory[:, -self.n_features:], # next observation 313 | self.s: batch_memory[:, -self.n_features:]}) # next observation 314 | q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features]}) 315 | 316 | q_target = q_eval.copy() 317 | 318 | batch_index = np.arange(self.batch_size, dtype=np.int32) 319 | eval_act_index = batch_memory[:, self.n_features].astype(int) 320 | reward = batch_memory[:, self.n_features + 1] 321 | max_act4next = np.argmax(q_eval4next, 322 | axis=1) # the action that brings the highest value is evaluated by q_eval 323 | selected_q_next = q_next[batch_index, max_act4next] # Double DQN, select q_next depending on above actions 324 | 325 | q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next 326 | 327 | # q_next, q_eval = self.sess.run( 328 | # [self.q_next, self.q_eval], 329 | # feed_dict={self.s_: batch_memory[:, -self.n_features:], 330 | # self.s: batch_memory[:, :self.n_features]}) 331 | # 332 | # q_target = q_eval.copy() 333 | # batch_index = np.arange(self.batch_size, dtype=np.int32) 334 | # eval_act_index = batch_memory[:, self.n_features].astype(int) 335 | # reward = batch_memory[:, self.n_features + 1] 336 | # 337 | # q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1) 338 | 339 | _, abs_errors, self.cost = self.sess.run([self._train_op, self.abs_errors, self.loss], 340 | feed_dict={self.s: batch_memory[:, :self.n_features], 341 | self.q_target: q_target, 342 | self.ISWeights: ISWeights}) 343 | for i in range(len(tree_idx)): # update priority 344 | idx = tree_idx[i] 345 | self.memory.update(idx, abs_errors[i]) 346 | 347 | self.cost_his.append(self.cost) 348 | 349 | self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max 350 | # self.lr = self.lr*0.9995 if self.lr > self.lr_min else self.lr_min 351 | self.learn_step_counter += 1 -------------------------------------------------------------------------------- /sumo-923/car-lc-net.net.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /sumo-923/car-lc-rou.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /sumo-923/car-lc.settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /sumo-923/car-lc.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /sumo-923/collision-output.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 150: 155 | DDPG_s[1]=33 156 | DDPG_s[2]=150 157 | DDPG_s[3]=0.01 158 | s = DDPG_s 159 | # s[0] = s[0]/33 160 | # s[1] = s[1] / 33 161 | # s[2] = s[2] / 150 162 | # s[3] = s[3] / 3 163 | a = actor.choose_action(s) 164 | a = np.clip(np.random.normal(a, var), *ACTION_BOUND) # add randomness to action selection for exploration 165 | DDPG_action = a 166 | update_speed = min(max(DDPG_s[0] + 3 * DDPG_action, 0.2),33) # 速度更新 167 | traci.vehicle.setSpeed("002", update_speed) # 主车车速更新 168 | traci.vehicle.setSpeed("001", y[m + 15]) # 虚拟前车车速设定 169 | 170 | traci.simulationStep(m + 15) 171 | DDPG_s_[0] = traci.vehicle.getSpeed("002") 172 | DDPG_s_[1] = traci.vehicle.getSpeed("001") 173 | DDPG_s_[2] = (traci.vehicle.getPosition("001")[0] - traci.vehicle.getPosition("002")[0] - 5) 174 | # DDPG_s_[2] = (traci.vehicle.getPosition("001")[0] - traci.vehicle.getPosition("002")[0] - 2.5) 175 | DDPG_s_[3] = y[m + 16] - traci.vehicle.getSpeed("001") 176 | if DDPG_s_[2]>150: 177 | DDPG_s_[1]=33 178 | DDPG_s_[2]=150 179 | DDPG_s_[3]=0.01 180 | distance_headway = DDPG_s_[2] 181 | # dis_safe = (0.36 * DDPG_s_[0] + (DDPG_s_[0] * DDPG_s_[0]) / 6 + 0.0001) 182 | dis_safe = (0.36 * DDPG_s_[0] + (DDPG_s_[0] * DDPG_s_[0]) * 0.1) 183 | if distance_headway > 0: 184 | # r_dis = 1 - min(abs(np.log(distance_headway) / np.log(2 * dis_safe)), 5) 185 | # r_dis = 1 186 | r_dis = 1.12*(150-distance_headway)/75 ####时距 187 | 188 | # r_dis = 3*(distance_headway/150-1)*(distance_headway/150-1) 189 | # if distance_headway < min(dis_safe,30): 190 | # # r_dis = -10 191 | # r_dis = 1.5 192 | if distance_headway >= 150: 193 | # r_dis = min(-30,150-distance_headway) 194 | r_dis = -30 195 | if distance_headway <= 0: 196 | # r_dis = min(10 * distance_headway,-20) 197 | r_dis = min(10 * distance_headway, -500) 198 | collision = 1 199 | # if i >=150: 200 | # break 201 | # if distance_headway == 0: 202 | # r_dis = -10 203 | # collision = 1 204 | r_efficient = DDPG_s_[0] / 33 205 | 206 | # r = 3*r_efficient + r_dis 207 | # r = 10 * r_efficient + r_dis - 6*abs(DDPG_s_[0]-DDPG_s[0]) 208 | # r = 17.5 * r_efficient + r_dis - 2*abs(DDPG_s_[0]-DDPG_s[0]) 209 | # r = 16.5 * r_efficient + r_dis - 2 * abs(DDPG_s_[0] - DDPG_s[0]) 210 | 211 | # r = 33 * r_efficient * 0.6 + r_dis - 2.25 * (DDPG_s_[0] - DDPG_s[0])*(DDPG_s_[0] - DDPG_s[0]) 212 | r = 33 * r_efficient * 0.6 - 2.25 * (DDPG_s_[0] - DDPG_s[0]) * (DDPG_s_[0] - DDPG_s[0]) 213 | # r = 3*r_dis - 2.25 * (DDPG_s_[0] - DDPG_s[0]) * (DDPG_s_[0] - DDPG_s[0]) 214 | # r = 33 * r_efficient * 0.6 + r_dis - 4*abs(DDPG_s_[0] - DDPG_s[0]) 215 | jerk_value=abs(DDPG_s_[0]-DDPG_s[0]) 216 | 217 | # total_step += 1 218 | 219 | ep_step += 1 220 | # print(DDPG_s_) 221 | s = DDPG_s 222 | a = DDPG_action 223 | s_ = DDPG_s_ 224 | # s_[0] = s_[0] / 33 225 | # s_[1] = s_[1] / 33 226 | # s_[2] = s_[2] / 150 227 | # s_[3] = s_[3] / 3 228 | 229 | # M.store_transition(s, a, r, s_) 230 | # if M.pointer > MEMORY_CAPACITY: 231 | # var = max([var * 0.999995, VAR_MIN]) # decay the action randomness 232 | # # var = var * 0.99995 233 | # b_M = M.sample(BATCH_SIZE) 234 | # b_s = b_M[:, :STATE_DIM] 235 | # b_a = b_M[:, STATE_DIM: STATE_DIM + ACTION_DIM] 236 | # b_r = b_M[:, -STATE_DIM - 1: -STATE_DIM] 237 | # b_s_ = b_M[:, -STATE_DIM:] 238 | # 239 | # critic.learn(b_s, b_a, b_r, b_s_) 240 | # actor.learn(b_s) 241 | 242 | all_r.append(r) 243 | collision_list.append(collision) 244 | if collision==1: 245 | # print('episode=%s' % i,'step=%s' % j,'dangerious!')0 246 | traci.vehicle.moveToXY('002',"gneE0", traci.vehicle.getLaneIndex('002'), traci.vehicle.getPosition('001')[0]-22.5, traci.vehicle.getPosition('001')[1],keepRoute=1) 247 | # break 248 | # total_step += 1 249 | ep_step_list.append(ep_step) 250 | rear_v_list.append(DDPG_s_[0]) 251 | l_v_list.append(DDPG_s_[1]) 252 | distance_headway_list.append(DDPG_s_[2]) 253 | dis_safe_list.append(dis_safe) 254 | danger_time.append(danger) 255 | # collision_list.append(collision) 256 | all_jerk_value.append(jerk_value) 257 | ep_list.append(i) 258 | ep_collision_list.append(sum(collision_list)) 259 | ep_reward.append(sum(all_r)) 260 | mean_speed = sum(rear_v_list) / MAX_EP_STEPS 261 | ep_mean_speed.append(mean_speed) 262 | ep_base_mean_speed.append(base_mean_speed) 263 | mean_r = sum(all_r)/MAX_EP_STEPS 264 | mean_jerk=sum(all_jerk_value)/MAX_EP_STEPS 265 | mean_headway = sum(distance_headway_list)/len(distance_headway_list) 266 | print('episode=%s' % i, 'mean_reward=%s' % mean_r,'total_collision=%s' % sum(collision_list), 'mean_speed=%s' % mean_speed,'mean_jerk_value=%s' % mean_jerk, 'mean_headway=%s' % mean_headway,'A learn=%s' % actor.lr,'C learn=%s' % critic.lr) 267 | 268 | # if mean_speed>=11.62 and mean_jerk<=0.4 and sum(collision_list)==0 and (33 not in rear_v_list) and (33 not in l_v_list): 269 | if sum(collision_list) == 0 and mean_speed >= 11.6: 270 | kkk+=1 271 | if sum(collision_list) != 0: 272 | kkk=0 273 | if kkk>=3 and mean_speed >= 11.6 and max(l_v_list) <33: 274 | 275 | plt.plot(ep_step_list, rear_v_list, color='r', label='Ego_car') 276 | plt.plot(ep_step_list, l_v_list, color='b', label='Leader') 277 | plt.xlabel('Running time (s)', fontsize=20, color='black') 278 | plt.ylabel('Velocity (m/s)', fontsize=20, color='black') 279 | plt.legend() 280 | plt.show() 281 | plt.plot(ep_step_list,distance_headway_list) 282 | plt.show() 283 | test1=pd.DataFrame({'ep_step_list':ep_step_list,'rear_v_list':rear_v_list,'l_v_list':l_v_list,'distance_headway_list':distance_headway_list}) 284 | test1.to_csv('./figure106/2023-5-30-carfollow.csv') 285 | # plt.plot([i+1 for i in range(len(critic.loss_his))],critic.loss_his) 286 | # plt.show() 287 | # plt.plot(total_distance,RL.cost_his) 288 | # path = './1126 NEW_DDPG' 289 | # if os.path.isdir(path): shutil.rmtree(path) 290 | # os.mkdir(path) 291 | # ckpt_path = os.path.join(path, 'DDPG.ckpt') 292 | # save_path = saver.save(sess, ckpt_path, write_meta_graph=True) 293 | # print("\nSave Model %s\n" % save_path) 294 | ###record:100, 295 | # path = './10.7-10 NEW_DDPG' 296 | # if os.path.isdir(path): shutil.rmtree(path) 297 | # os.mkdir(path) 298 | # ckpt_path = os.path.join(path, 'DDPG.ckpt') 299 | # save_path = saver.save(sess, ckpt_path, write_meta_graph=False) 300 | # print("\nSave Model %s\n" % save_path) 301 | 302 | # break 303 | # if mean_speed >= 11.6 and mean_jerk <= 1 and sum(collision_list) == 0 and i>=80 and first_time ==1: 304 | # plt.plot(ep_step_list, rear_v_list, color='r', label='Ego_car') 305 | # plt.plot(ep_step_list, l_v_list, color='b', label='Leader') 306 | # plt.xlabel('Running time (s)', fontsize=20, color='black') 307 | # plt.ylabel('Velocity (m/s)', fontsize=20, color='black') 308 | # plt.legend() 309 | # plt.show() 310 | # plt.plot(ep_step_list,distance_headway_list) 311 | # plt.show() 312 | # print(i) 313 | # last_speed = mean_speed 314 | # last_jerk = mean_jerk 315 | # first_time = 0 316 | # if mean_speed>=last_speed and mean_jerk<=last_jerk and first_time == 0: 317 | # last_speed = mean_speed 318 | # last_jerk = mean_jerk 319 | # path = './10.7-2-1 NEW_DDPG' 320 | # if os.path.isdir(path): shutil.rmtree(path) 321 | # os.mkdir(path) 322 | # ckpt_path = os.path.join(path, 'DDPG.ckpt') 323 | # save_path = saver.save(sess, ckpt_path, write_meta_graph=False) 324 | # print("\nSave Model %s\n" % save_path) 325 | # break 326 | # if mean_speed>=11.6 and mean_jerk<=0.5 and sum(collision_list)==0 and i>=40: 327 | # break 328 | # if mean_speed >= 11.62 and mean_jerk <= 0.378 and sum(collision_list) == 0: 329 | # plt.plot(ep_step_list, rear_v_list, color='r', label='Ego_car') 330 | # plt.plot(ep_step_list, l_v_list, color='b', label='Leader') 331 | # plt.xlabel('Running time (s)', fontsize=20, color='black') 332 | # plt.ylabel('Velocity (m/s)', fontsize=20, color='black') 333 | # plt.legend() 334 | # plt.show() 335 | # path = './9.24_DDPG' 336 | # if os.path.isdir(path): shutil.rmtree(path) 337 | # os.mkdir(path) 338 | # ckpt_path = os.path.join(path, 'DDPG.ckpt') 339 | # save_path = saver.save(sess, ckpt_path, write_meta_graph=False) 340 | # print("\nSave Model %s\n" % save_path) 341 | 342 | traci.close() 343 | # if mean_speed>=11.6 and mean_jerk<=0.5 and sum(collision_list)==0 and i>=40: 344 | # plt.plot(ep_step_list, rear_v_list, color='r', label='Ego_car') 345 | # plt.plot(ep_step_list, l_v_list, color='b', label='Leader') 346 | # plt.xlabel('Running time (s)', fontsize=20, color='black') 347 | # plt.ylabel('Velocity (m/s)', fontsize=20, color='black') 348 | # plt.legend() 349 | # plt.show() 350 | # path = './1118 NEW_DDPG' 351 | # if os.path.isdir(path): shutil.rmtree(path) 352 | # os.mkdir(path) 353 | # ckpt_path = os.path.join(path, 'DDPG.ckpt') 354 | # save_path = saver.save(sess, ckpt_path, write_meta_graph=False) 355 | # print("\nSave Model %s\n" % save_path) 356 | 357 | # test1=pd.DataFrame(columns=column1,data=list1) 358 | # test1=pd.DataFrame({'ep_list':ep_list,'ep_collision_list':ep_collision_list,'mean_speed':ep_mean_speed,'ep_reward':ep_reward,'ep_base_mean_speed':ep_base_mean_speed,'ep_base_mean_speed':ep_base_mean_speed}) 359 | # test1.to_csv('./figure1.csv') 360 | # test1=pd.DataFrame({'ep_step_list':ep_step_list,'rear_v_list':rear_v_list,'l_v_list':l_v_list,'distance_headway_list':distance_headway_list}) 361 | # test1.to_csv('./figure106/1-DDPG-carfollow.csv') --------------------------------------------------------------------------------