├── Baseline_random.py ├── Environment.py ├── MR_vs_NV.png ├── README.md ├── S_V2V_link_VS_NV.png ├── agent.py ├── base.py ├── ddqn.png ├── dqn.png ├── main.py ├── replay_memory.py ├── requirement.txt └── utils.py /Baseline_random.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from Environment import * 4 | import matplotlib.pyplot as plt 5 | 6 | # This py file using the random algorithm. 7 | 8 | def main(): 9 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 10 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 11 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 12 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 13 | width = 750 14 | height = 1299 15 | n = 40 16 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 17 | number_of_game = 50 18 | n_step = 100 19 | V2I_Rate_List = np.zeros([number_of_game, n_step]) 20 | Fail_Percent = np.zeros([number_of_game, n_step]) 21 | for game_idx in range(number_of_game): 22 | print (game_idx) 23 | Env.new_random_game(n) 24 | for i in range(n_step): 25 | #print(i) 26 | actions = np.random.randint(0,20,[n,3]) 27 | power_selection = np.zeros(actions.shape, dtype = 'int') 28 | actions = np.concatenate((actions[..., np.newaxis],power_selection[...,np.newaxis]), axis = 2) 29 | reward, percent = Env.act(actions) 30 | V2I_Rate_List[game_idx, i] = np.sum(reward) 31 | Fail_Percent[game_idx, i] = percent 32 | print(np.sum(reward)) 33 | print ('percentage here is ', percent) 34 | print ('The number of vehicles is ', n) 35 | print ('mean of V2I rate is that ', np.mean(V2I_Rate_List)) 36 | print ('mean of percent is ', np.mean(Fail_Percent[:,-1])) 37 | 38 | main() 39 | -------------------------------------------------------------------------------- /Environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import time 4 | import random 5 | import math 6 | # This file is revised for more precise and concise expression. 7 | class V2Vchannels: 8 | # Simulator of the V2V Channels 9 | def __init__(self, n_Veh, n_RB): 10 | self.t = 0 11 | self.h_bs = 1.5 12 | self.h_ms = 1.5 13 | self.fc = 2 14 | self.decorrelation_distance = 10 15 | self.shadow_std = 3 16 | self.n_Veh = n_Veh 17 | self.n_RB = n_RB 18 | self.update_shadow([]) 19 | def update_positions(self, positions): 20 | self.positions = positions 21 | def update_pathloss(self): 22 | self.PathLoss = np.zeros(shape=(len(self.positions),len(self.positions))) 23 | for i in range(len(self.positions)): 24 | for j in range(len(self.positions)): 25 | self.PathLoss[i][j] = self.get_path_loss(self.positions[i], self.positions[j]) 26 | def update_shadow(self, delta_distance_list): 27 | delta_distance = np.zeros((len(delta_distance_list), len(delta_distance_list))) 28 | for i in range(len(delta_distance)): 29 | for j in range(len(delta_distance)): 30 | delta_distance[i][j] = delta_distance_list[i] + delta_distance_list[j] 31 | if len(delta_distance_list) == 0: 32 | self.Shadow = np.random.normal(0,self.shadow_std, size=(self.n_Veh, self.n_Veh)) 33 | else: 34 | self.Shadow = np.exp(-1*(delta_distance/self.decorrelation_distance)) * self.Shadow +\ 35 | np.sqrt(1 - np.exp(-2*(delta_distance/self.decorrelation_distance))) * np.random.normal(0, self.shadow_std, size = (self.n_Veh, self.n_Veh)) 36 | def update_fast_fading(self): 37 | h = 1/np.sqrt(2) * (np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB) ) + 1j * np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB))) 38 | self.FastFading = 20 * np.log10(np.abs(h)) 39 | def get_path_loss(self, position_A, position_B): 40 | d1 = abs(position_A[0] - position_B[0]) 41 | d2 = abs(position_A[1] - position_B[1]) 42 | d = math.hypot(d1,d2)+0.001 43 | d_bp = 4 * (self.h_bs - 1) * (self.h_ms - 1) * self.fc * (10**9)/(3*10**8) 44 | def PL_Los(d): 45 | if d <= 3: 46 | return 22.7 * np.log10(3) + 41 + 20*np.log10(self.fc/5) 47 | else: 48 | if d < d_bp: 49 | return 22.7 * np.log10(d) + 41 + 20 * np.log10(self.fc/5) 50 | else: 51 | return 40.0 * np.log10(d) + 9.45 - 17.3 * np.log10(self.h_bs) - 17.3 * np.log10(self.h_ms) + 2.7 * np.log10(self.fc/5) 52 | def PL_NLos(d_a,d_b): 53 | n_j = max(2.8 - 0.0024*d_b, 1.84) 54 | return PL_Los(d_a) + 20 - 12.5*n_j + 10 * n_j * np.log10(d_b) + 3*np.log10(self.fc/5) 55 | if min(d1,d2) < 7: 56 | PL = PL_Los(d) 57 | self.ifLOS = True 58 | self.shadow_std = 3 59 | else: 60 | PL = min(PL_NLos(d1,d2), PL_NLos(d2,d1)) 61 | self.ifLOS = False 62 | self.shadow_std = 4 # if Non line of sight, the std is 4 63 | return PL 64 | 65 | class V2Ichannels: 66 | # Simulator of the V2I channels 67 | def __init__(self, n_Veh, n_RB): 68 | self.h_bs = 25 69 | self.h_ms = 1.5 70 | self.Decorrelation_distance = 50 71 | self.BS_position = [750/2, 1299/2] # Suppose the BS is in the center 72 | self.shadow_std = 8 73 | self.n_Veh = n_Veh 74 | self.n_RB = n_RB 75 | self.update_shadow([]) 76 | def update_positions(self, positions): 77 | self.positions = positions 78 | 79 | def update_pathloss(self): 80 | self.PathLoss = np.zeros(len(self.positions)) 81 | for i in range(len(self.positions)): 82 | d1 = abs(self.positions[i][0] - self.BS_position[0]) 83 | d2 = abs(self.positions[i][1] - self.BS_position[1]) 84 | distance = math.hypot(d1,d2) # change from meters to kilometers 85 | self.PathLoss[i] = 128.1 + 37.6*np.log10(math.sqrt(distance**2 + (self.h_bs-self.h_ms)**2)/1000) 86 | def update_shadow(self, delta_distance_list): 87 | if len(delta_distance_list) == 0: # initialization 88 | self.Shadow = np.random.normal(0, self.shadow_std, self.n_Veh) 89 | else: 90 | delta_distance = np.asarray(delta_distance_list) 91 | self.Shadow = np.exp(-1*(delta_distance/self.Decorrelation_distance))* self.Shadow +\ 92 | np.sqrt(1-np.exp(-2*(delta_distance/self.Decorrelation_distance)))*np.random.normal(0,self.shadow_std, self.n_Veh) 93 | def update_fast_fading(self): 94 | h = 1/np.sqrt(2) * (np.random.normal(size = (self.n_Veh, self.n_RB)) + 1j* np.random.normal(size = (self.n_Veh, self.n_RB))) 95 | self.FastFading = 20 * np.log10(np.abs(h)) 96 | 97 | class Vehicle: 98 | # Vehicle simulator: include all the information for a vehicle 99 | def __init__(self, start_position, start_direction, velocity): 100 | self.position = start_position 101 | self.direction = start_direction 102 | self.velocity = velocity 103 | self.neighbors = [] 104 | self.destinations = [] 105 | class Environ: 106 | # Enviroment Simulator: Provide states and rewards to agents. 107 | # Evolve to new state based on the actions taken by the vehicles. 108 | def __init__ (self, down_lane, up_lane, left_lane, right_lane, width, height): 109 | self.timestep = 0.01 110 | self.down_lanes = down_lane 111 | self.up_lanes = up_lane 112 | self.left_lanes = left_lane 113 | self.right_lanes = right_lane 114 | self.width = width 115 | self.height = height 116 | self.vehicles = [] 117 | self.demands = [] 118 | self.V2V_power_dB = 23 # dBm 119 | self.V2I_power_dB = 23 # dBm 120 | self.V2V_power_dB_List = [23, 10, 5] # the power levels 121 | #self.V2V_power = 10**(self.V2V_power_dB) 122 | #self.V2I_power = 10**(self.V2I_power_dB) 123 | self.sig2_dB = -114 124 | self.bsAntGain = 8 125 | self.bsNoiseFigure = 5 126 | self.vehAntGain = 3 127 | self.vehNoiseFigure = 9 128 | self.sig2 = 10**(self.sig2_dB/10) 129 | self.V2V_Shadowing = [] 130 | self.V2I_Shadowing = [] 131 | self.delta_distance = [] 132 | self.n_RB = 20 133 | self.n_Veh = 60 134 | self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB) # number of vehicles 135 | self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB) 136 | 137 | self.V2V_Interference_all = np.zeros((self.n_Veh, 3, self.n_RB)) + self.sig2 138 | self.n_step = 0 139 | def add_new_vehicles(self, start_position, start_direction, start_velocity): 140 | self.vehicles.append(Vehicle(start_position, start_direction, start_velocity)) 141 | 142 | def add_new_vehicles_by_number(self, n): 143 | for i in range(n): 144 | ind = np.random.randint(0,len(self.down_lanes)) 145 | start_position = [self.down_lanes[ind], random.randint(0,self.height)] 146 | start_direction = 'd' 147 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 148 | start_position = [self.up_lanes[ind], random.randint(0,self.height)] 149 | start_direction = 'u' 150 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 151 | start_position = [random.randint(0,self.width), self.left_lanes[ind]] 152 | start_direction = 'l' 153 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 154 | start_position = [random.randint(0,self.width), self.right_lanes[ind]] 155 | start_direction = 'r' 156 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 157 | self.V2V_Shadowing = np.random.normal(0, 3, [len(self.vehicles), len(self.vehicles)]) 158 | self.V2I_Shadowing = np.random.normal(0, 8, len(self.vehicles)) 159 | self.delta_distance = np.asarray([c.velocity for c in self.vehicles]) 160 | #self.renew_channel() 161 | def renew_positions(self): 162 | # ======================================================== 163 | # This function update the position of each vehicle 164 | # =========================================================== 165 | i = 0 166 | #for i in range(len(self.position)): 167 | while(i < len(self.vehicles)): 168 | #print ('start iteration ', i) 169 | #print(self.position, len(self.position), self.direction) 170 | delta_distance = self.vehicles[i].velocity * self.timestep 171 | change_direction = False 172 | if self.vehicles[i].direction == 'u': 173 | #print ('len of position', len(self.position), i) 174 | for j in range(len(self.left_lanes)): 175 | 176 | if (self.vehicles[i].position[1] <=self.left_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.left_lanes[j]): # came to an cross 177 | if (random.uniform(0,1) < 0.4): 178 | self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - (self.left_lanes[j] - self.vehicles[i].position[1])),self.left_lanes[j] ] 179 | self.vehicles[i].direction = 'l' 180 | change_direction = True 181 | break 182 | if change_direction == False : 183 | for j in range(len(self.right_lanes)): 184 | if (self.vehicles[i].position[1] <=self.right_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.right_lanes[j]): 185 | if (random.uniform(0,1) < 0.4): 186 | self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + (self.right_lanes[j] - self.vehicles[i].position[1])), self.right_lanes[j] ] 187 | self.vehicles[i].direction = 'r' 188 | change_direction = True 189 | break 190 | if change_direction == False: 191 | self.vehicles[i].position[1] += delta_distance 192 | if (self.vehicles[i].direction == 'd') and (change_direction == False): 193 | #print ('len of position', len(self.position), i) 194 | for j in range(len(self.left_lanes)): 195 | if (self.vehicles[i].position[1] >=self.left_lanes[j]) and ((self.vehicles[i].position[1] - delta_distance) <= self.left_lanes[j]): # came to an cross 196 | if (random.uniform(0,1) < 0.4): 197 | self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - ( self.vehicles[i].position[1]- self.left_lanes[j])), self.left_lanes[j] ] 198 | #print ('down with left', self.vehicles[i].position) 199 | self.vehicles[i].direction = 'l' 200 | change_direction = True 201 | break 202 | if change_direction == False : 203 | for j in range(len(self.right_lanes)): 204 | if (self.vehicles[i].position[1] >=self.right_lanes[j]) and (self.vehicles[i].position[1] - delta_distance <= self.right_lanes[j]): 205 | if (random.uniform(0,1) < 0.4): 206 | self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + ( self.vehicles[i].position[1]- self.right_lanes[j])),self.right_lanes[j] ] 207 | #print ('down with right', self.vehicles[i].position) 208 | self.vehicles[i].direction = 'r' 209 | change_direction = True 210 | break 211 | if change_direction == False: 212 | self.vehicles[i].position[1] -= delta_distance 213 | if (self.vehicles[i].direction == 'r') and (change_direction == False): 214 | #print ('len of position', len(self.position), i) 215 | for j in range(len(self.up_lanes)): 216 | if (self.vehicles[i].position[0] <= self.up_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.up_lanes[j]): # came to an cross 217 | if (random.uniform(0,1) < 0.4): 218 | self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.up_lanes[j] - self.vehicles[i].position[0]))] 219 | change_direction = True 220 | self.vehicles[i].direction = 'u' 221 | break 222 | if change_direction == False : 223 | for j in range(len(self.down_lanes)): 224 | if (self.vehicles[i].position[0] <= self.down_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.down_lanes[j]): 225 | if (random.uniform(0,1) < 0.4): 226 | self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.down_lanes[j] - self.vehicles[i].position[0]))] 227 | change_direction = True 228 | self.vehicles[i].direction = 'd' 229 | break 230 | if change_direction == False: 231 | self.vehicles[i].position[0] += delta_distance 232 | if (self.vehicles[i].direction == 'l') and (change_direction == False): 233 | for j in range(len(self.up_lanes)): 234 | 235 | if (self.vehicles[i].position[0] >= self.up_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.up_lanes[j]): # came to an cross 236 | if (random.uniform(0,1) < 0.4): 237 | self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.vehicles[i].position[0] - self.up_lanes[j]))] 238 | change_direction = True 239 | self.vehicles[i].direction = 'u' 240 | break 241 | if change_direction == False : 242 | for j in range(len(self.down_lanes)): 243 | if (self.vehicles[i].position[0] >= self.down_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.down_lanes[j]): 244 | if (random.uniform(0,1) < 0.4): 245 | self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.vehicles[i].position[0] - self.down_lanes[j]))] 246 | change_direction = True 247 | self.vehicles[i].direction = 'd' 248 | break 249 | if change_direction == False: 250 | self.vehicles[i].position[0] -= delta_distance 251 | # if it comes to an exit 252 | if (self.vehicles[i].position[0] < 0) or (self.vehicles[i].position[1] < 0) or (self.vehicles[i].position[0] > self.width) or (self.vehicles[i].position[1] > self.height): 253 | # delete 254 | # print ('delete ', self.position[i]) 255 | if (self.vehicles[i].direction == 'u'): 256 | self.vehicles[i].direction = 'r' 257 | self.vehicles[i].position = [self.vehicles[i].position[0], self.right_lanes[-1]] 258 | else: 259 | if (self.vehicles[i].direction == 'd'): 260 | self.vehicles[i].direction = 'l' 261 | self.vehicles[i].position = [self.vehicles[i].position[0], self.left_lanes[0]] 262 | else: 263 | if (self.vehicles[i].direction == 'l'): 264 | self.vehicles[i].direction = 'u' 265 | self.vehicles[i].position = [self.up_lanes[0],self.vehicles[i].position[1]] 266 | else: 267 | if (self.vehicles[i].direction == 'r'): 268 | self.vehicles[i].direction = 'd' 269 | self.vehicles[i].position = [self.down_lanes[-1],self.vehicles[i].position[1]] 270 | 271 | i += 1 272 | def test_channel(self): 273 | # =================================== 274 | # test the V2I and the V2V channel 275 | # =================================== 276 | self.n_step = 0 277 | self.vehicles = [] 278 | n_Veh = 60 279 | self.n_Veh = n_Veh 280 | self.add_new_vehicles_by_number(int(self.n_Veh/4)) 281 | step = 1000 282 | time_step = 0.1 # every 0.1s update 283 | for i in range(step): 284 | self.renew_positions() 285 | positions = [c.position for c in self.vehicles] 286 | self.update_large_fading(positions, time_step) 287 | self.update_small_fading() 288 | print("Time step: ", i) 289 | print(" ============== V2I ===========") 290 | print("Path Loss: ", self.V2Ichannels.PathLoss) 291 | print("Shadow:", self.V2Ichannels.Shadow) 292 | print("Fast Fading: ", self.V2Ichannels.FastFading) 293 | print(" ============== V2V ===========") 294 | print("Path Loss: ", self.V2Vchannels.PathLoss[0:3]) 295 | print("Shadow:", self.V2Vchannels.Shadow[0:3]) 296 | print("Fast Fading: ", self.V2Vchannels.FastFading[0:3]) 297 | 298 | def update_large_fading(self, positions, time_step): 299 | self.V2Ichannels.update_positions(positions) 300 | self.V2Vchannels.update_positions(positions) 301 | self.V2Ichannels.update_pathloss() 302 | self.V2Vchannels.update_pathloss() 303 | delta_distance = time_step * np.asarray([c.velocity for c in self.vehicles]) 304 | self.V2Ichannels.update_shadow(delta_distance) 305 | self.V2Vchannels.update_shadow(delta_distance) 306 | def update_small_fading(self): 307 | self.V2Ichannels.update_fast_fading() 308 | self.V2Vchannels.update_fast_fading() 309 | 310 | def renew_neighbor(self): 311 | # ========================================== 312 | # update the neighbors of each vehicle. 313 | # =========================================== 314 | for i in range(len(self.vehicles)): 315 | self.vehicles[i].neighbors = [] 316 | self.vehicles[i].actions = [] 317 | #print('action and neighbors delete', self.vehicles[i].actions, self.vehicles[i].neighbors) 318 | Distance = np.zeros((len(self.vehicles),len(self.vehicles))) 319 | z = np.array([[complex(c.position[0],c.position[1]) for c in self.vehicles]]) 320 | Distance = abs(z.T-z) 321 | for i in range(len(self.vehicles)): 322 | sort_idx = np.argsort(Distance[:,i]) 323 | for j in range(3): 324 | self.vehicles[i].neighbors.append(sort_idx[j+1]) 325 | destination = np.random.choice(sort_idx[1:int(len(sort_idx)/5)],3, replace = False) 326 | self.vehicles[i].destinations = destination 327 | def renew_channel(self): 328 | # =========================================================================== 329 | # This function updates all the channels including V2V and V2I channels 330 | # ============================================================================= 331 | positions = [c.position for c in self.vehicles] 332 | self.V2Ichannels.update_positions(positions) 333 | self.V2Vchannels.update_positions(positions) 334 | self.V2Ichannels.update_pathloss() 335 | self.V2Vchannels.update_pathloss() 336 | delta_distance = 0.002 * np.asarray([c.velocity for c in self.vehicles]) # time slot is 2 ms. 337 | self.V2Ichannels.update_shadow(delta_distance) 338 | self.V2Vchannels.update_shadow(delta_distance) 339 | self.V2V_channels_abs = self.V2Vchannels.PathLoss + self.V2Vchannels.Shadow + 50 * np.identity( 340 | len(self.vehicles)) 341 | self.V2I_channels_abs = self.V2Ichannels.PathLoss + self.V2Ichannels.Shadow 342 | 343 | def renew_channels_fastfading(self): 344 | # ======================================================================= 345 | # This function updates all the channels including V2V and V2I channels 346 | # ========================================================================= 347 | self.renew_channel() 348 | self.V2Ichannels.update_fast_fading() 349 | self.V2Vchannels.update_fast_fading() 350 | V2V_channels_with_fastfading = np.repeat(self.V2V_channels_abs[:, :, np.newaxis], self.n_RB, axis=2) 351 | self.V2V_channels_with_fastfading = V2V_channels_with_fastfading - self.V2Vchannels.FastFading 352 | V2I_channels_with_fastfading = np.repeat(self.V2I_channels_abs[:, np.newaxis], self.n_RB, axis=1) 353 | self.V2I_channels_with_fastfading = V2I_channels_with_fastfading - self.V2Ichannels.FastFading 354 | #print("V2I channels", self.V2I_channels_with_fastfading) 355 | 356 | def Compute_Performance_Reward_fast_fading_with_power(self, actions_power): # revising based on the fast fading part 357 | actions = actions_power.copy()[:,:,0] # the channel_selection_part 358 | power_selection = actions_power.copy()[:,:,1] 359 | Rate = np.zeros(len(self.vehicles)) 360 | Interference = np.zeros(self.n_RB) # V2V signal interference to V2I links 361 | for i in range(len(self.vehicles)): 362 | for j in range(len(actions[i,:])): 363 | if not self.activate_links[i,j]: 364 | continue 365 | #print('power selection,', power_selection[i,j]) 366 | Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - self.V2I_channels_with_fastfading[i, actions[i,j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) # fast fading 367 | 368 | self.V2I_Interference = Interference + self.sig2 369 | V2V_Interference = np.zeros((len(self.vehicles), 3)) 370 | V2V_Signal = np.zeros((len(self.vehicles), 3)) 371 | 372 | # remove the effects of none active links 373 | #print('shapes', actions.shape, self.activate_links.shape) 374 | #print(not self.activate_links) 375 | actions[(np.logical_not(self.activate_links))] = -1 376 | #print('action are', actions) 377 | for i in range(self.n_RB): 378 | indexes = np.argwhere(actions == i) 379 | for j in range(len(indexes)): 380 | #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]] 381 | receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]] 382 | # compute the V2V signal links 383 | V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 384 | #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 385 | if i < self.n_Veh: 386 | V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure )/10) # V2I links interference to V2V links 387 | for k in range(j+1, len(indexes)): # computer the peer V2V links 388 | #receiver_k = self.vehicles[indexes[k][0]].neighbors[indexes[k][1]] 389 | receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]] 390 | V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 391 | V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 392 | 393 | self.V2V_Interference = V2V_Interference + self.sig2 394 | V2V_Rate = np.zeros(self.activate_links.shape) 395 | V2V_Rate[self.activate_links] = np.log2(1 + np.divide(V2V_Signal[self.activate_links], self.V2V_Interference[self.activate_links])) 396 | 397 | #print("V2V Rate", V2V_Rate * self.update_time_test * 1500) 398 | #print ('V2V_Signal is ', np.log(np.mean(V2V_Signal[self.activate_links]))) 399 | V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure 400 | V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)])) 401 | 402 | 403 | # -- compute the latency constraits -- 404 | self.demand -= V2V_Rate * self.update_time_test * 1500 # decrease the demand 405 | self.test_time_count -= self.update_time_test # compute the time left for estimation 406 | self.individual_time_limit -= self.update_time_test # compute the time left for individual V2V transmission 407 | self.individual_time_interval -= self.update_time_test # compute the time interval left for next transmission 408 | 409 | # --- update the demand --- 410 | 411 | new_active = self.individual_time_interval <= 0 412 | self.activate_links[new_active] = True 413 | self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape ) + self.V2V_limit 414 | self.individual_time_limit[new_active] = self.V2V_limit 415 | self.demand[new_active] = self.demand_amount 416 | #print("demand is", self.demand) 417 | #print('mean rate of average V2V link is', np.mean(V2V_Rate[self.activate_links])) 418 | 419 | # -- update the statistics--- 420 | early_finish = np.multiply(self.demand <= 0, self.activate_links) 421 | unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links) 422 | self.activate_links[np.add(early_finish, unqulified)] = False 423 | #print('number of activate links is', np.sum(self.activate_links)) 424 | self.success_transmission += np.sum(early_finish) 425 | self.failed_transmission += np.sum(unqulified) 426 | #if self.n_step % 1000 == 0 : 427 | # self.success_transmission = 0 428 | # self.failed_transmission = 0 429 | failed_percentage = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001) 430 | # print('Percentage of failed', np.sum(new_active), self.failed_transmission, self.failed_transmission + self.success_transmission , failed_percentage) 431 | return V2I_Rate, failed_percentage #failed_percentage 432 | 433 | 434 | def Compute_Performance_Reward_fast_fading_with_power_asyn(self, actions_power): # revising based on the fast fading part 435 | # =================================================== 436 | # --------- Used for Testing ------- 437 | # =================================================== 438 | actions = actions_power[:,:,0] # the channel_selection_part 439 | power_selection = actions_power[:,:,1] 440 | Interference = np.zeros(self.n_RB) # Calculate the interference from V2V to V2I 441 | for i in range(len(self.vehicles)): 442 | for j in range(len(actions[i,:])): 443 | if not self.activate_links[i,j]: 444 | continue 445 | Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \ 446 | self.V2I_channels_with_fastfading[i, actions[i,j]] + \ 447 | self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) 448 | self.V2I_Interference = Interference + self.sig2 449 | V2V_Interference = np.zeros((len(self.vehicles), 3)) 450 | V2V_Signal = np.zeros((len(self.vehicles), 3)) 451 | Interfence_times = np.zeros((len(self.vehicles), 3)) 452 | actions[(np.logical_not(self.activate_links))] = -1 453 | for i in range(self.n_RB): 454 | indexes = np.argwhere(actions == i) 455 | for j in range(len(indexes)): 456 | #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]] 457 | receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]] 458 | V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\ 459 | self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 460 | #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 461 | if i= 0): 602 | continue 603 | V2V_Interference[k, m, channel_selection[i,j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] -\ 604 | self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][channel_selection[i,j]] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 605 | 606 | self.V2V_Interference_all = 10 * np.log10(V2V_Interference) 607 | 608 | 609 | def renew_demand(self): 610 | # generate a new demand of a V2V 611 | self.demand = self.demand_amount*np.ones((self.n_RB,3)) 612 | self.time_limit = 10 613 | def act_for_training(self, actions, idx): 614 | # ============================================= 615 | # This function gives rewards for training 616 | # =========================================== 617 | rewards_list = np.zeros(self.n_RB) 618 | action_temp = actions.copy() 619 | self.activate_links = np.ones((self.n_Veh,3), dtype = 'bool') 620 | V2I_rewardlist, V2V_rewardlist, time_left = self.Compute_Performance_Reward_Batch(action_temp,idx) 621 | self.renew_positions() 622 | self.renew_channels_fastfading() 623 | self.Compute_Interference(actions) 624 | rewards_list = rewards_list.T.reshape([-1]) 625 | V2I_rewardlist = V2I_rewardlist.T.reshape([-1]) 626 | V2V_rewardlist = V2V_rewardlist.T.reshape([-1]) 627 | V2I_reward = (V2I_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\ 628 | np.min(V2I_rewardlist))/(np.max(V2I_rewardlist) -np.min(V2I_rewardlist) + 0.000001) 629 | V2V_reward = (V2V_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\ 630 | np.min(V2V_rewardlist))/(np.max(V2V_rewardlist) -np.min(V2V_rewardlist) + 0.000001) 631 | lambdda = 0.1 632 | #print ("Reward", V2I_reward, V2V_reward, time_left) 633 | t = lambdda * V2I_reward + (1-lambdda) * V2V_reward 634 | #print("time left", time_left) 635 | #return t 636 | return t - (self.V2V_limit - time_left)/self.V2V_limit 637 | 638 | def act_asyn(self, actions): 639 | self.n_step += 1 640 | if self.n_step % 10 == 0: 641 | self.renew_positions() 642 | self.renew_channels_fastfading() 643 | reward = self.Compute_Performance_Reward_fast_fading_with_power_asyn(actions) 644 | self.Compute_Interference(actions) 645 | return reward 646 | def act(self, actions): 647 | # simulate the next state after the action is given 648 | self.n_step += 1 649 | reward = self.Compute_Performance_Reward_fast_fading_with_power(actions) 650 | self.renew_positions() 651 | self.renew_channels_fastfading() 652 | self.Compute_Interference(actions) 653 | return reward 654 | 655 | def new_random_game(self, n_Veh = 0): 656 | # make a new game 657 | self.n_step = 0 658 | self.vehicles = [] 659 | if n_Veh > 0: 660 | self.n_Veh = n_Veh 661 | self.add_new_vehicles_by_number(int(self.n_Veh/4)) 662 | self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB) # number of vehicles 663 | self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB) 664 | self.renew_channels_fastfading() 665 | self.renew_neighbor() 666 | self.demand_amount = 30 667 | self.demand = self.demand_amount * np.ones((self.n_Veh,3)) 668 | self.test_time_count = 10 669 | self.V2V_limit = 0.1 # 100 ms V2V toleratable latency 670 | self.individual_time_limit = self.V2V_limit * np.ones((self.n_Veh,3)) 671 | self.individual_time_interval = np.random.exponential(0.05, (self.n_Veh,3)) 672 | self.UnsuccessfulLink = np.zeros((self.n_Veh,3)) 673 | self.success_transmission = 0 674 | self.failed_transmission = 0 675 | self.update_time_train = 0.01 # 10ms update time for the training 676 | self.update_time_test = 0.002 # 2ms update time for testing 677 | self.update_time_asyn = 0.0002 # 0.2 ms update one subset of the vehicles; for each vehicle, the update time is 2 ms 678 | self.activate_links = np.zeros((self.n_Veh,3), dtype='bool') 679 | 680 | if __name__ == "__main__": 681 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 682 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 683 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 684 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 685 | width = 750 686 | height = 1299 687 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 688 | Env.test_channel() 689 | -------------------------------------------------------------------------------- /MR_vs_NV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/MR_vs_NV.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Reinforcement Learning based Resource Allocation for V2V Communications 2 | 3 | This repository contains the implementation of `reinforcement learning` algorithm `double deep-Q learning` for resource allocation problem in the vehicle to vehicle communication based on the research paper ["Deep Reinforcement Learning based Resource Allocation for V2V Communications"](https://ieeexplore.ieee.org/document/8633948) by Hao Ye, Geoffrey Ye Li, and Biing-Hwang Fred Juang. 4 | Orignal codes are developed by [IIT-lab, Paper-with-Code-of-Wireless-communication-Based-on-DL](https://github.com/haoyye/ResourceAllocationReinforcementLearning) which implements `deep-q learning`. 5 | 6 | I have made some modifications in code so that the results of the research paper can be reproduced. 7 | 8 | # Installation and use 9 | 10 | ## Linux 11 | 12 | ### Fork the repository and open the terminal using `ctrl+alt+t` 13 | ``` 14 | cd 15 | 16 | pip3 install -r requirement.txt 17 | ``` 18 | 19 | ### After successful installation close the terminal and again open it, use the below command in terminal to run the program. 20 | 21 | ``` 22 | cd 23 | 24 | python3 agent.py 25 | ``` 26 | Running this code will require a good amount of time (36 hours on i7 7th gen) 27 | 28 | ## Tips and Tricks 29 | Use the below commands to save the terminal output in .txt file. It will be beneficial while you are debugging the code. 30 | 31 | ``` 32 | python3 agent.py 2>&1 | tee SomeFile.txt 33 | ``` 34 | 35 | Run the code using the above command. 36 | 37 | # Results reproduced using Deep-Q learning 38 | 39 | ## Sum Rate of V2I vs Number of Vehicles 40 | 41 | | ![](MR_vs_NV.png) | 42 | |:--:| 43 | | *Figure-1* | 44 | 45 | 46 | > The above figure shows the sum rate of V2I vs the number of vehicles. From the figure, we can infer that, with the increase in the number of vehicles, the number of V2V links increases as a result, the interference with the V2I link grows, therefore the V2I capacity will drop. 47 | 48 | 49 | ## Probability of Satisfied V2V links vs the number of vehicles 50 | 51 | | ![](S_V2V_link_VS_NV.png) | 52 | |:--:| 53 | | *Figure-2* | 54 | 55 | >The given figure shows the probability that the V2V links satisfy the latency constraint versus the number of vehicles. From the figure, we can infer that, with the increase in the number of vehicles, the V2V links in increases, as a result, it is more difficult to ensure every vehicle satisfies the latency constraint. 56 | 57 | ## The Probability of power level selection with the remaining time for transmission 58 | 59 | | ![](dqn.png) | 60 | |:--:| 61 | | *Figure-3* | 62 | 63 | > The above figure shows the probability for the agent to choose power levels with different time left for transmission. In general, the probability for the agent to choose the maximum power is low when there is abundant time for transmission, while the agent will select the maximum power with a high probability to ensure satisfying the V2V latency constraint when only a small amount of time left. However, when only 10 ms left, the probability for choosing the maximum power level suddenly drops to about 0.6 because the agent learns that even with the maximum power the latency constraints will be violated with high probability and switching to a lower power will get more reward by reducing interference to the V2I and other V2V links. 64 | 65 | > Therefore, we can infer that the improvement of the deep reinforcement learning based approach comes from learning the implicit relationship between the state and the reward function. 66 | 67 | 68 | # Effect of Double Deep-Q Learning 69 | 70 | ## The Probability of power level selection with the remaining time for transmission 71 | 72 | | ![](ddqn.png) | 73 | |:--:| 74 | | *Figure-4* | 75 | 76 | 77 | > Figure-4 shows the probability for the agent to choose power levels with different time left for transmission when Double-Deep Q-Learning is used. The probability for the agent to choose the maximum power is decreased compared to the figure-3 when there is abundant time for transmission. Also, the probability of selecting maximum power to ensure the V2V latency constraint when a small amount of time left is increased. 78 | 79 | > Apart from this, when the agent has abundant time for transmission it will select low power transmission to reduce resource usage. 80 | -------------------------------------------------------------------------------- /S_V2V_link_VS_NV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/S_V2V_link_VS_NV.png -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import time 4 | import random 5 | import numpy as np 6 | from Environment import * 7 | from base import BaseModel 8 | from replay_memory import ReplayMemory 9 | from utils import save_pkl, load_pkl 10 | import tensorflow as tf 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | class Agent(BaseModel): 15 | def __init__(self, config, environment, sess): 16 | self.sess = sess 17 | self.weight_dir = 'weight' 18 | self.env = environment 19 | #self.history = History(self.config) 20 | model_dir = './Model/a.model' 21 | self.memory = ReplayMemory(model_dir) 22 | self.max_step = 100000 23 | self.RB_number = 20 24 | self.num_vehicle = len(self.env.vehicles) 25 | print('-------------------------------------------') 26 | print(self.num_vehicle) 27 | print('-------------------------------------------') 28 | self.action_all_with_power = np.zeros([self.num_vehicle, 3, 2],dtype = 'int32') # this is actions that taken by V2V links with power 29 | self.action_all_with_power_training = np.zeros([20, 3, 2],dtype = 'int32') # this is actions that taken by V2V links with power 30 | self.reward = [] 31 | self.learning_rate = 0.01 32 | self.learning_rate_minimum = 0.0001 33 | self.learning_rate_decay = 0.96 34 | self.learning_rate_decay_step = 500000 35 | self.target_q_update_step = 100 36 | self.discount = 0.5 37 | self.double_q = True 38 | print("------------") 39 | print(self.double_q) 40 | print("------------") 41 | self.build_dqn() 42 | self.V2V_number = 3 * len(self.env.vehicles) # every vehicle need to communicate with 3 neighbors 43 | self.training = True 44 | #self.actions_all = np.zeros([len(self.env.vehicles),3], dtype = 'int32') 45 | def merge_action(self, idx, action): 46 | self.action_all_with_power[idx[0], idx[1], 0] = action % self.RB_number 47 | self.action_all_with_power[idx[0], idx[1], 1] = int(np.floor(action/self.RB_number)) 48 | def get_state(self, idx): 49 | # =============== 50 | # Get State from the environment 51 | # ============= 52 | vehicle_number = len(self.env.vehicles) 53 | V2V_channel = (self.env.V2V_channels_with_fastfading[idx[0],self.env.vehicles[idx[0]].destinations[idx[1]],:] - 80)/60 54 | V2I_channel = (self.env.V2I_channels_with_fastfading[idx[0], :] - 80)/60 55 | V2V_interference = (-self.env.V2V_Interference_all[idx[0],idx[1],:] - 60)/60 56 | NeiSelection = np.zeros(self.RB_number) 57 | for i in range(3): 58 | for j in range(3): 59 | if self.training: 60 | NeiSelection[self.action_all_with_power_training[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1 61 | else: 62 | NeiSelection[self.action_all_with_power[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1 63 | 64 | for i in range(3): 65 | if i == idx[1]: 66 | continue 67 | if self.training: 68 | if self.action_all_with_power_training[idx[0],i,0] >= 0: 69 | NeiSelection[self.action_all_with_power_training[idx[0],i,0]] = 1 70 | else: 71 | if self.action_all_with_power[idx[0],i,0] >= 0: 72 | NeiSelection[self.action_all_with_power[idx[0],i,0]] = 1 73 | time_remaining = np.asarray([self.env.demand[idx[0],idx[1]] / self.env.demand_amount]) 74 | load_remaining = np.asarray([self.env.individual_time_limit[idx[0],idx[1]] / self.env.V2V_limit]) 75 | #print('shapes', time_remaining.shape,load_remaining.shape) 76 | return np.concatenate((V2I_channel, V2V_interference, V2V_channel, NeiSelection, time_remaining, load_remaining))#,time_remaining)) 77 | #return np.concatenate((V2I_channel, V2V_interference, V2V_channel, time_remaining, load_remaining))#,time_remaining)) 78 | def predict(self, s_t, step, test_ep = False): 79 | # ========================== 80 | # Select actions 81 | # ====================== 82 | ep = 1/(step/1000000 + 1) 83 | if random.random() < ep and test_ep == False: # epsion to balance the exporation and exploition 84 | action = np.random.randint(60) 85 | else: 86 | action = self.q_action.eval({self.s_t:[s_t]})[0] 87 | return action 88 | def observe(self, prestate, state, reward, action): 89 | # ----------- 90 | # Collect Data for Training 91 | # --------- 92 | self.memory.add(prestate, state, reward, action) # add the state and the action and the reward to the memory 93 | #print(self.step) 94 | if self.step > 0: 95 | if self.step % 50 == 0: 96 | #print('Training') 97 | self.q_learning_mini_batch() # training a mini batch 98 | self.save_weight_to_pkl() 99 | if self.step % self.target_q_update_step == self.target_q_update_step - 1: 100 | #print("Update Target Q network:") 101 | self.update_target_q_network() # ?? what is the meaning ?? 102 | def train(self): 103 | num_game, self.update_count, ep_reward = 0, 0, 0. 104 | total_reward, self.total_loss, self.total_q = 0.,0.,0. 105 | max_avg_ep_reward = 0 106 | ep_reward, actions = [], [] 107 | mean_big = 0 108 | number_big = 0 109 | mean_not_big = 0 110 | number_not_big = 0 111 | self.env.new_random_game(20) 112 | for self.step in (range(0, 10000)): # need more configuration 113 | if self.step == 0: # initialize set some varibles 114 | num_game, self.update_count,ep_reward = 0, 0, 0. 115 | total_reward, self.total_loss, self.total_q = 0., 0., 0. 116 | ep_reward, actions = [], [] 117 | 118 | # prediction 119 | # action = self.predict(self.history.get()) 120 | if (self.step % 2000 == 1): 121 | self.env.new_random_game(20) 122 | print(self.step) 123 | state_old = self.get_state([0,0]) 124 | #print("state", state_old) 125 | self.training = True 126 | for k in range(1): 127 | for i in range(len(self.env.vehicles)): 128 | for j in range(3): 129 | state_old = self.get_state([i,j]) 130 | action = self.predict(state_old, self.step) 131 | #self.merge_action([i,j], action) 132 | self.action_all_with_power_training[i, j, 0] = action % self.RB_number 133 | self.action_all_with_power_training[i, j, 1] = int(np.floor(action/self.RB_number)) 134 | reward_train = self.env.act_for_training(self.action_all_with_power_training, [i,j]) 135 | state_new = self.get_state([i,j]) 136 | self.observe(state_old, state_new, reward_train, action) 137 | if (self.step % 2000 == 0) and (self.step > 0): 138 | # testing 139 | self.training = False 140 | number_of_game = 10 141 | if (self.step % 10000 == 0) and (self.step > 0): 142 | number_of_game = 50 143 | if (self.step == 38000): 144 | number_of_game = 100 145 | V2I_Rate_list = np.zeros(number_of_game) 146 | Fail_percent_list = np.zeros(number_of_game) 147 | for game_idx in range(number_of_game): 148 | self.env.new_random_game(self.num_vehicle) 149 | test_sample = 200 150 | Rate_list = [] 151 | print('test game idx:', game_idx) 152 | for k in range(test_sample): 153 | action_temp = self.action_all_with_power.copy() 154 | for i in range(len(self.env.vehicles)): 155 | self.action_all_with_power[i,:,0] = -1 156 | sorted_idx = np.argsort(self.env.individual_time_limit[i,:]) 157 | for j in sorted_idx: 158 | state_old = self.get_state([i,j]) 159 | action = self.predict(state_old, self.step, True) 160 | self.merge_action([i,j], action) 161 | if i % (len(self.env.vehicles)/10) == 1: 162 | action_temp = self.action_all_with_power.copy() 163 | reward, percent = self.env.act_asyn(action_temp) #self.action_all) 164 | Rate_list.append(np.sum(reward)) 165 | #print("actions", self.action_all_with_power) 166 | V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list)) 167 | Fail_percent_list[game_idx] = percent 168 | #print("action is", self.action_all_with_power) 169 | print('failure probability is, ', percent) 170 | #print('action is that', action_temp[0,:]) 171 | #print("OUT") 172 | self.save_weight_to_pkl() 173 | print ('The number of vehicle is ', len(self.env.vehicles)) 174 | print ('Mean of the V2I rate is that ', np.mean(V2I_Rate_list)) 175 | print('Mean of Fail percent is that ', np.mean(Fail_percent_list)) 176 | #print('Test Reward is ', np.mean(test_result)) 177 | 178 | 179 | 180 | 181 | def q_learning_mini_batch(self): 182 | 183 | # Training the DQN model 184 | # ------ 185 | #s_t, action,reward, s_t_plus_1, terminal = self.memory.sample() 186 | s_t, s_t_plus_1, action, reward = self.memory.sample() 187 | #print() 188 | #print('samples:', s_t[0:10], s_t_plus_1[0:10], action[0:10], reward[0:10]) 189 | t = time.time() 190 | if self.double_q: #double Q learning 191 | pred_action = self.q_action.eval({self.s_t: s_t_plus_1}) 192 | q_t_plus_1_with_pred_action = self.target_q_with_idx.eval({self.target_s_t: s_t_plus_1, self.target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]}) 193 | target_q_t = self.discount * q_t_plus_1_with_pred_action + reward 194 | else: 195 | q_t_plus_1 = self.target_q.eval({self.target_s_t: s_t_plus_1}) 196 | max_q_t_plus_1 = np.max(q_t_plus_1, axis=1) 197 | target_q_t = self.discount * max_q_t_plus_1 +reward 198 | _, q_t, loss,w = self.sess.run([self.optim, self.q, self.loss, self.w], {self.target_q_t: target_q_t, self.action:action, self.s_t:s_t, self.learning_rate_step: self.step}) # training the network 199 | 200 | print('loss is ', loss) 201 | self.total_loss += loss 202 | self.total_q += q_t.mean() 203 | self.update_count += 1 204 | 205 | 206 | def build_dqn(self): 207 | # --- Building the DQN ------- 208 | self.w = {} 209 | self.t_w = {} 210 | 211 | initializer = tf. truncated_normal_initializer(0, 0.02) 212 | activation_fn = tf.nn.relu 213 | n_hidden_1 = 500 214 | n_hidden_2 = 250 215 | n_hidden_3 = 120 216 | n_input = 82 217 | n_output = 60 218 | def encoder(x): 219 | weights = { 220 | 'encoder_h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1],stddev=0.1)), 221 | 'encoder_h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2],stddev=0.1)), 222 | 'encoder_h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3],stddev=0.1)), 223 | 'encoder_h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_output],stddev=0.1)), 224 | 'encoder_b1': tf.Variable(tf.truncated_normal([n_hidden_1],stddev=0.1)), 225 | 'encoder_b2': tf.Variable(tf.truncated_normal([n_hidden_2],stddev=0.1)), 226 | 'encoder_b3': tf.Variable(tf.truncated_normal([n_hidden_3],stddev=0.1)), 227 | 'encoder_b4': tf.Variable(tf.truncated_normal([n_output],stddev=0.1)), 228 | 229 | } 230 | layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['encoder_h1']), weights['encoder_b1'])) 231 | layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['encoder_h2']), weights['encoder_b2'])) 232 | layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['encoder_h3']), weights['encoder_b3'])) 233 | layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['encoder_h4']), weights['encoder_b4'])) 234 | return layer_4, weights 235 | with tf.variable_scope('prediction'): 236 | self.s_t = tf.placeholder('float32',[None, n_input]) 237 | self.q, self.w = encoder(self.s_t) 238 | self.q_action = tf.argmax(self.q, dimension = 1) 239 | with tf.variable_scope('target'): 240 | self.target_s_t = tf.placeholder('float32', [None, n_input]) 241 | self.target_q, self.target_w = encoder(self.target_s_t) 242 | self.target_q_idx = tf.placeholder('int32', [None,None], 'output_idx') 243 | self.target_q_with_idx = tf.gather_nd(self.target_q, self.target_q_idx) 244 | with tf.variable_scope('pred_to_target'): 245 | self.t_w_input = {} 246 | self.t_w_assign_op = {} 247 | for name in self.w.keys(): 248 | print('name in self w keys', name) 249 | self.t_w_input[name] = tf.placeholder('float32', self.target_w[name].get_shape().as_list(),name = name) 250 | self.t_w_assign_op[name] = self.target_w[name].assign(self.t_w_input[name]) 251 | 252 | def clipped_error(x): 253 | try: 254 | return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5) 255 | except: 256 | return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5) 257 | 258 | with tf.variable_scope('optimizer'): 259 | self.target_q_t = tf.placeholder('float32', None, name='target_q_t') 260 | self.action = tf.placeholder('int32',None, name = 'action') 261 | action_one_hot = tf.one_hot(self.action, n_output, 1.0, 0.0, name='action_one_hot') 262 | q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices = 1, name='q_acted') 263 | self.delta = self.target_q_t - q_acted 264 | self.global_step = tf.Variable(0, trainable=False) 265 | self.loss = tf.reduce_mean(tf.square(self.delta), name = 'loss') 266 | self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step') 267 | self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) 268 | self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss) 269 | 270 | tf.initialize_all_variables().run() 271 | self.update_target_q_network() 272 | 273 | 274 | 275 | def update_target_q_network(self): 276 | for name in self.w.keys(): 277 | self.t_w_assign_op[name].eval({self.t_w_input[name]: self.w[name].eval()}) 278 | 279 | def save_weight_to_pkl(self): 280 | if not os.path.exists(self.weight_dir): 281 | os.makedirs(self.weight_dir) 282 | for name in self.w.keys(): 283 | save_pkl(self.w[name].eval(), os.path.join(self.weight_dir,"%s.pkl" % name)) 284 | def load_weight_from_pkl(self): 285 | with tf.variable_scope('load_pred_from_pkl'): 286 | self.w_input = {} 287 | self.w_assign_op = {} 288 | for name in self.w.keys(): 289 | self.w_input[name] = tf.placeholder('float32') 290 | self.w_assign_op[name] = self.w[name].assign(self.w_input[name]) 291 | for name in self.w.keys(): 292 | self.w_assign_op[name].eval({self.w_input[name]:load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))}) 293 | self.update_target_q_network() 294 | 295 | def play(self, n_step = 100, n_episode = 100, test_ep = None, render = False): 296 | number_of_game = 100 297 | V2I_Rate_list = np.zeros(number_of_game) 298 | Fail_percent_list = np.zeros(number_of_game) 299 | self.load_weight_from_pkl() 300 | self.training = False 301 | 302 | 303 | for game_idx in range(number_of_game): 304 | self.env.new_random_game(self.num_vehicle) 305 | test_sample = 200 306 | Rate_list = [] 307 | print('test game idx:', game_idx) 308 | print('The number of vehicle is ', len(self.env.vehicles)) 309 | time_left_list = [] 310 | power_select_list_0 = [] 311 | power_select_list_1 = [] 312 | power_select_list_2 = [] 313 | 314 | for k in range(test_sample): 315 | #print(k) 316 | action_temp = self.action_all_with_power.copy() 317 | for i in range(len(self.env.vehicles)): 318 | self.action_all_with_power[i, :, 0] = -1 319 | sorted_idx = np.argsort(self.env.individual_time_limit[i, :]) 320 | for j in sorted_idx: 321 | state_old = self.get_state([i, j]) 322 | time_left_list.append(state_old[-1]) 323 | action = self.predict(state_old, 0, True) 324 | 325 | if state_old[-1] <=0: 326 | continue 327 | power_selection = int(np.floor(action/self.RB_number)) 328 | if power_selection == 0: 329 | power_select_list_0.append(state_old[-1]) 330 | 331 | if power_selection == 1: 332 | power_select_list_1.append(state_old[-1]) 333 | if power_selection == 2: 334 | power_select_list_2.append(state_old[-1]) 335 | 336 | self.merge_action([i, j], action) 337 | if i % (len(self.env.vehicles) / 10) == 1: 338 | action_temp = self.action_all_with_power.copy() 339 | reward, percent = self.env.act_asyn(action_temp) # self.action_all) 340 | Rate_list.append(np.sum(reward)) 341 | # print("actions", self.action_all_with_power) 342 | 343 | number_0, bin_edges = np.histogram(power_select_list_0, bins = 10) 344 | 345 | number_1, bin_edges = np.histogram(power_select_list_1, bins = 10) 346 | 347 | number_2, bin_edges = np.histogram(power_select_list_2, bins = 10) 348 | 349 | 350 | p_0 = number_0 / (number_0 + number_1 + number_2) 351 | p_1 = number_1 / (number_0 + number_1 + number_2) 352 | p_2 = number_2 / (number_0 + number_1 + number_2) 353 | plt.figure() 354 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_0, 'b*-', label='Power Level 23 dB') 355 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_1, 'rs-', label='Power Level 10 dB') 356 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_2, 'go-', label='Power Level 5 dB') 357 | plt.xlim([0,0.12]) 358 | plt.xlabel("Time left for V2V transmission (s)") 359 | plt.ylabel("Probability of power selection") 360 | plt.legend() 361 | plt.grid() 362 | plt.savefig() 363 | #plt.show() 364 | 365 | V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list)) 366 | Fail_percent_list[game_idx] = percent 367 | 368 | print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list[0:game_idx] )) 369 | print('Mean of Fail percent is that ',percent, np.mean(Fail_percent_list[0:game_idx])) 370 | # print('action is that', action_temp[0,:]) 371 | 372 | print('The number of vehicle is ', len(self.env.vehicles)) 373 | print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list)) 374 | print('Mean of Fail percent is that ', np.mean(Fail_percent_list)) 375 | # print('Test Reward is ', np.mean(test_result)) 376 | 377 | 378 | def main(_): 379 | 380 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 381 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 382 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 383 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 384 | width = 750 385 | height = 1299 386 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 387 | Env.new_random_game() 388 | ''' 389 | gpu_options = tf.GPUOptions( 390 | per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction)) 391 | config = tf.ConfigProto() 392 | config.gpu_options.allow_growth = True 393 | ''' 394 | with tf.Session(config=tf.ConfigProto()) as sess: 395 | config = [] 396 | agent = Agent(config, Env, sess) 397 | #agent.play() 398 | agent.train() 399 | agent.play() 400 | 401 | if __name__ == '__main__': 402 | tf.app.run() 403 | 404 | 405 | 406 | 407 | 408 | -------------------------------------------------------------------------------- /base.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | 4 | 5 | class BaseModel(object): 6 | 7 | def __init__(self, config): 8 | self._saver = None 9 | self.config = config 10 | 11 | try: 12 | self._attr = config.__dict__['__flags'] 13 | except: 14 | self._attr = class_var(config) 15 | 16 | self.config = config 17 | for attr in self._attrs: 18 | name = attr if not attr.startswith('_') else attr[1:] 19 | setattr(self, name, getattr(self.config, attr)) 20 | 21 | 22 | 23 | def save_model(self, step=None): 24 | print(" [*] Saving checkpoints...") 25 | model_name = type(self).__name__ 26 | 27 | if not os.path.exists(self.checkpoint_dir): 28 | os.makedirs(self.checkpoint_dir) 29 | self.saver.save(self.sess, self.checkpoint_dir, global_step=step) 30 | 31 | def load_model(self): 32 | print(" [*] Loading checkpoints...") 33 | 34 | ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) 35 | if ckpt and ckpt.model_checkpoint_path: 36 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 37 | fname = os.path.join(self.checkpoint_dir, ckpt_name) 38 | self.saver.restore(self.sess, fname) 39 | print(" [*] Load SUCCESS: %s" % fname) 40 | return True 41 | else: 42 | print(" [!] Load FAILED: %s" % self.checkpoint_dir) 43 | return False 44 | 45 | @property 46 | def checkpoint_dir(self): 47 | return os.path.join('checkpoints', self.model_dir) 48 | 49 | @property 50 | def model_dir(self): 51 | model_dir = self.config.env_name 52 | for k, v in self._attrs.items(): 53 | if not k.startswith('_') and k not in ['display']: 54 | model_dir += "/%s-%s" % (k, ",".join([str(i) for i in v]) 55 | if type(v) == list else v) 56 | return model_dir + '/' 57 | 58 | @property 59 | def saver(self): 60 | if self._saver == None: 61 | self._saver = tf.train.Saver(max_to_keep=10) 62 | return self._saver 63 | -------------------------------------------------------------------------------- /ddqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/ddqn.png -------------------------------------------------------------------------------- /dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/dqn.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import random 3 | import tensorflow as tf 4 | from agent import Agent 5 | from Environment import * 6 | flags = tf.app.flags 7 | 8 | # Model 9 | flags.DEFINE_string('model', 'm1', 'Type of model') 10 | flags.DEFINE_boolean('dueling', False, 'Whether to use dueling deep q-network') 11 | flags.DEFINE_boolean('double_q', False, 'Whether to use double q-learning') 12 | 13 | # Environment 14 | flags.DEFINE_string('env_name', 'Breakout-v0', 'The name of gym environment to use') 15 | flags.DEFINE_integer('action_repeat', 4, 'The number of action to be repeated') 16 | 17 | # Etc 18 | flags.DEFINE_boolean('use_gpu', True, 'Whether to use gpu or not') 19 | flags.DEFINE_string('gpu_fraction', '1/1', 'idx / # of gpu fraction e.g. 1/3, 2/3, 3/3') 20 | flags.DEFINE_boolean('display', False, 'Whether to do display the game screen or not') 21 | flags.DEFINE_boolean('is_train', True, 'Whether to do training or testing') 22 | flags.DEFINE_integer('random_seed', 123, 'Value of random seed') 23 | 24 | FLAGS = flags.FLAGS 25 | 26 | # Set random seed 27 | tf.set_random_seed(FLAGS.random_seed) 28 | random.seed(FLAGS.random_seed) 29 | 30 | if FLAGS.gpu_fraction == '': 31 | raise ValueError("--gpu_fraction should be defined") 32 | 33 | def calc_gpu_fraction(fraction_string): 34 | idx, num = fraction_string.split('/') 35 | idx, num = float(idx), float(num) 36 | 37 | fraction = 1 / (num - idx + 1) 38 | print(" [*] GPU : %.4f" % fraction) 39 | return fraction 40 | 41 | def main(_): 42 | 43 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 44 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 45 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 46 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 47 | width = 750 48 | height = 1299 49 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 50 | Env.new_random_game() 51 | gpu_options = tf.GPUOptions( 52 | per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction)) 53 | config = tf.ConfigProto() 54 | config.gpu_options.allow_growth = True 55 | 56 | with tf.Session(config=config) as sess: 57 | config = [] 58 | agent = Agent(config, Env, sess) 59 | #agent.play() 60 | agent.train() 61 | 62 | #agent.play() 63 | 64 | if __name__ == '__main__': 65 | tf.app.run() 66 | -------------------------------------------------------------------------------- /replay_memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import logging 4 | import numpy as np 5 | #from utils import save_npy, load_npy 6 | 7 | class ReplayMemory: 8 | def __init__(self, model_dir): 9 | self.model_dir = model_dir 10 | self.memory_size = 1000000 11 | self.actions = np.empty(self.memory_size, dtype = np.uint8) 12 | self.rewards = np.empty(self.memory_size, dtype = np.float64) 13 | self.prestate = np.empty((self.memory_size, 82), dtype = np.float16) 14 | self.poststate = np.empty((self.memory_size, 82), dtype = np.float16) 15 | self.batch_size = 2000 16 | self.count = 0 17 | self.current = 0 18 | 19 | 20 | def add(self, prestate, poststate, reward, action): 21 | self.actions[self.current] = action 22 | self.rewards[self.current] = reward 23 | self.prestate[self.current] = prestate 24 | self.poststate[self.current] = poststate 25 | self.count = max(self.count, self.current + 1) 26 | self.current = (self.current + 1) % self.memory_size 27 | 28 | 29 | 30 | def sample(self): 31 | indexes = [] 32 | while len(indexes) < self.batch_size: 33 | index = random.randint(0, self.count - 1) 34 | indexes.append(index) 35 | prestate = self.prestate[indexes] 36 | poststate = self.poststate[indexes] 37 | actions = self.actions[indexes] 38 | rewards = self.rewards[indexes] 39 | return prestate, poststate, actions, rewards 40 | 41 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | os 2 | time 3 | random 4 | numpy==1.13.1 5 | math 6 | matplotlib 7 | logging 8 | _pickle 9 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import _pickle as cPickle 4 | def save_pkl(obj, path): 5 | with open(path, 'wb') as f: 6 | cPickle.dump(obj, f) 7 | print(" [*] save %s" % path) 8 | def load_pkl(path): 9 | with open(path, 'rb') as f: 10 | obj = cPickle.load(f) 11 | print(" [*] load %s" % path) 12 | return obj --------------------------------------------------------------------------------